First push with mobi support

2026-04-28 01:59:35 -04:00 · 2020-05-31 01:20:52 -04:00
parent ac1a166ae5
commit 7ee9c57ed1
6 changed files with 43 additions and 21 deletions
--- a/src/backend/lib/library.py
+++ b/src/backend/lib/library.py
@@ -7,6 +7,8 @@ import zipfile

 from bs4 import BeautifulSoup

+from mobi import Mobi
+
 from .api_hooks import DuckDuckGo
 from .config import Config
 from .storage import Storage
@@ -26,8 +28,7 @@ class Catalogue:
        self.html_regx = re.compile(r"\.html")
        self.root_dir = config.root
        self.book_folder = config.book_path
-        self.book_shelf = config.book_shelf
-        self._book_list_expanded = None
+        # self.book_shelf = config.book_shelf
        self.books = None
        self.db_pointer = config.catalogue_db
        self.config = config
@@ -56,24 +57,30 @@ class Catalogue:

        :returns self._book_list_expanded: json string containing all book metadata
        """
-        self.scan_folder() # Populate file list
-        regx = re.compile(r"\.epub")
+        self.scan_folder()  # Populate file list
+        regx = re.compile(r"\.epub|\.mobi")
        try:
            self.books = list(filter(regx.search, filter(None, self.file_list)))
        except TypeError as e:
            print(e)
-        self._book_list_expanded = {}
-        with open(self.book_shelf, "w") as f:
-            for book in self.books:
-                self._book_list_expanded[book] = self.process_book(book)
-            json.dump(self._book_list_expanded, f)
+        """
+        for book in self.books:
+            self._book_list_expanded[book] = self.process_by_filetype(book)
        return self._book_list_expanded
+        """
+
+    def process_by_filetype(self, book):
+        if book.endswith(".epub"):
+            epub = self.process_epub(book)
+            return self.extract_metadata_epub(epub)
+        elif book.endswith(".mobi"):
+            return self.extract_metadata_mobi(book)

    @staticmethod
-    def process_book(book):
+    def process_epub(book):
        """Return dictionary of epub file contents"""
-        book = zipfile.ZipFile(book, "r")
        details = {}
+        book = zipfile.ZipFile(book, "r")
        with book as book_zip:
            details["files"] = []
            details["path"] = book.filename
@@ -86,7 +93,7 @@ class Catalogue:
                    details["files"].append(match.string)
        return details

-    def extract_metadata(self, book):
+    def extract_metadata_epub(self, book):
        """
        Return extracted metadata and cover picture
        book['path'] == Full path to ebook file
@@ -94,7 +101,7 @@ class Catalogue:
        """
        book_zip = zipfile.ZipFile(book["path"], "r")
        with book_zip as f:
-            content = self.extract_content(book_zip, book)
+            content = self.extract_content(f, book)
            soup = BeautifulSoup(content, "lxml")
            title = soup.find("dc:title")
            if title is None:
@@ -105,13 +112,27 @@ class Catalogue:
            if author is not None:
                author = author.contents[0]
            try:
-                cover = self.extract_cover_image(book_zip, book)
+                cover = self.extract_cover_image(f, book)
            except IndexError:
                # cover = self.extract_cover_html(book_zip, book)
                cover = DuckDuckGo().image_result(title)
            book_details = [title, author, cover, book["path"]]
        return book_details

+    @staticmethod
+    def extract_metadata_mobi(book):
+        book = Mobi(book)
+        book.parse()
+        try:
+            cover_image = book.readImageRecord(0)
+        except KeyError:
+            cover_image = None
+        title = book.title().decode("utf-8")
+        author = book.author().decode(
+            "utf-8"
+        )  # TODO some files are still passing encoded data for author.
+        return [title, author, cover_image, book.f.name]
+
    def extract_content(self, book_zip, book):
        """
        Opens epub as zip file filters then stores as list any files matching opf_regx
@@ -161,12 +182,12 @@ class Catalogue:
        Gets a list of new files via compare_shelf_current.
        Iterates over list and inserts new books into database.
        """
+        # TODO Refactor metadata extraction into process_book call to more easily handle additional formats
        book_list = self.compare_shelf_current()
        db = Storage(self.config)
        for book in book_list:
-            book = self.process_book(book)
-            extracted = self.extract_metadata(book)
-            db.insert_book(extracted)
+            book = self.process_by_filetype(book)
+            db.insert_book(book)
        inserted = db.commit()
        if inserted is not True:
            print(inserted)