diff --git a/config.json b/config.json index 71951be..28132cf 100755 --- a/config.json +++ b/config.json @@ -1 +1 @@ -{"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"} +{"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "/home/raelon/Books", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"} diff --git a/importBooks b/importBooks index 244906d..272b643 100755 --- a/importBooks +++ b/importBooks @@ -1,4 +1,4 @@ -#!python +#!/usr/bin/env python import pathlib import sys diff --git a/installer b/installer index f6882b5..b8c112b 100755 --- a/installer +++ b/installer @@ -1,4 +1,4 @@ -#!python +#!/usr/bin/ env python import json import os import pathlib diff --git a/pyproject.toml b/pyproject.toml index eaa5fde..6caf254 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,4 +7,4 @@ use_parentheses = true # NOTE: the known_third_party setting is managed by # seed-isort-config and should not be modified directly. # Any changes made to this setting will be overwritten. -known_third_party = ["backend", "bs4", "django", "interface", "prompt_toolkit", "psycopg2", "pyfiglet", "requests"] +known_third_party = ["backend", "bs4", "django", "interface", "mobi", "prompt_toolkit", "psycopg2", "pyfiglet", "requests"] diff --git a/requirements.txt b/requirements.txt index 749a3dd..89974a9 100755 --- a/requirements.txt +++ b/requirements.txt @@ -17,3 +17,4 @@ psycopg2-binary prompt_toolkit psutil pyfiglet +mobi-python diff --git a/src/backend/lib/library.py b/src/backend/lib/library.py index 6585f6d..9b0686c 100755 --- a/src/backend/lib/library.py +++ b/src/backend/lib/library.py @@ -7,6 +7,8 @@ import zipfile from bs4 import BeautifulSoup +from mobi import Mobi + from .api_hooks import DuckDuckGo from .config import Config from .storage import Storage @@ -26,8 +28,7 @@ class Catalogue: self.html_regx = re.compile(r"\.html") self.root_dir = config.root self.book_folder = config.book_path - self.book_shelf = config.book_shelf - self._book_list_expanded = None + # self.book_shelf = config.book_shelf self.books = None self.db_pointer = config.catalogue_db self.config = config @@ -56,24 +57,30 @@ class Catalogue: :returns self._book_list_expanded: json string containing all book metadata """ - self.scan_folder() # Populate file list - regx = re.compile(r"\.epub") + self.scan_folder() # Populate file list + regx = re.compile(r"\.epub|\.mobi") try: self.books = list(filter(regx.search, filter(None, self.file_list))) except TypeError as e: print(e) - self._book_list_expanded = {} - with open(self.book_shelf, "w") as f: - for book in self.books: - self._book_list_expanded[book] = self.process_book(book) - json.dump(self._book_list_expanded, f) + """ + for book in self.books: + self._book_list_expanded[book] = self.process_by_filetype(book) return self._book_list_expanded + """ + + def process_by_filetype(self, book): + if book.endswith(".epub"): + epub = self.process_epub(book) + return self.extract_metadata_epub(epub) + elif book.endswith(".mobi"): + return self.extract_metadata_mobi(book) @staticmethod - def process_book(book): + def process_epub(book): """Return dictionary of epub file contents""" - book = zipfile.ZipFile(book, "r") details = {} + book = zipfile.ZipFile(book, "r") with book as book_zip: details["files"] = [] details["path"] = book.filename @@ -86,7 +93,7 @@ class Catalogue: details["files"].append(match.string) return details - def extract_metadata(self, book): + def extract_metadata_epub(self, book): """ Return extracted metadata and cover picture book['path'] == Full path to ebook file @@ -94,7 +101,7 @@ class Catalogue: """ book_zip = zipfile.ZipFile(book["path"], "r") with book_zip as f: - content = self.extract_content(book_zip, book) + content = self.extract_content(f, book) soup = BeautifulSoup(content, "lxml") title = soup.find("dc:title") if title is None: @@ -105,13 +112,27 @@ class Catalogue: if author is not None: author = author.contents[0] try: - cover = self.extract_cover_image(book_zip, book) + cover = self.extract_cover_image(f, book) except IndexError: # cover = self.extract_cover_html(book_zip, book) cover = DuckDuckGo().image_result(title) book_details = [title, author, cover, book["path"]] return book_details + @staticmethod + def extract_metadata_mobi(book): + book = Mobi(book) + book.parse() + try: + cover_image = book.readImageRecord(0) + except KeyError: + cover_image = None + title = book.title().decode("utf-8") + author = book.author().decode( + "utf-8" + ) # TODO some files are still passing encoded data for author. + return [title, author, cover_image, book.f.name] + def extract_content(self, book_zip, book): """ Opens epub as zip file filters then stores as list any files matching opf_regx @@ -161,12 +182,12 @@ class Catalogue: Gets a list of new files via compare_shelf_current. Iterates over list and inserts new books into database. """ + # TODO Refactor metadata extraction into process_book call to more easily handle additional formats book_list = self.compare_shelf_current() db = Storage(self.config) for book in book_list: - book = self.process_book(book) - extracted = self.extract_metadata(book) - db.insert_book(extracted) + book = self.process_by_filetype(book) + db.insert_book(book) inserted = db.commit() if inserted is not True: print(inserted)