First push with mobi support

2026-06-27 07:21:38 -04:00 · 2020-05-31 01:20:52 -04:00
parent ac1a166ae5
commit 7ee9c57ed1
6 changed files with 43 additions and 21 deletions
--- a/config.json
+++ b/config.json
@@ -1 +1 @@
-{"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"}
+{"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "/home/raelon/Books", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"}
--- a/2
+++ b/2
@@ -1,4 +1,4 @@
-#!python
+#!/usr/bin/env python
 import pathlib
 import sys
--- a/2
+++ b/2
@@ -1,4 +1,4 @@
-#!python
+#!/usr/bin/ env python
 import json
 import os
 import pathlib
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,4 +7,4 @@ use_parentheses = true
 # NOTE: the known_third_party setting is managed by
 # seed-isort-config and should not be modified directly.
 # Any changes made to this setting will be overwritten.
-known_third_party = ["backend", "bs4", "django", "interface", "prompt_toolkit", "psycopg2", "pyfiglet", "requests"]
+known_third_party = ["backend", "bs4", "django", "interface", "mobi", "prompt_toolkit", "psycopg2", "pyfiglet", "requests"]
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,3 +17,4 @@ psycopg2-binary
 prompt_toolkit
 psutil
 pyfiglet
 mobi-python
--- a/src/backend/lib/library.py
+++ b/src/backend/lib/library.py
@@ -7,6 +7,8 @@ import zipfile
 from bs4 import BeautifulSoup
 from mobi import Mobi
 from .api_hooks import DuckDuckGo
 from .config import Config
 from .storage import Storage
@@ -26,8 +28,7 @@ class Catalogue:
        self.html_regx = re.compile(r"\.html")
        self.root_dir = config.root
        self.book_folder = config.book_path
-        self.book_shelf = config.book_shelf
+        # self.book_shelf = config.book_shelf
        self._book_list_expanded = None
        self.books = None
        self.db_pointer = config.catalogue_db
        self.config = config
@@ -57,23 +58,29 @@ class Catalogue:
        :returns self._book_list_expanded: json string containing all book metadata
        """
        self.scan_folder()  # Populate file list
-        regx = re.compile(r"\.epub")
+        regx = re.compile(r"\.epub|\.mobi")
        try:
            self.books = list(filter(regx.search, filter(None, self.file_list)))
        except TypeError as e:
            print(e)
-        self._book_list_expanded = {}
+        """
        with open(self.book_shelf, "w") as f:
        for book in self.books:
-                self._book_list_expanded[book] = self.process_book(book)
+            self._book_list_expanded[book] = self.process_by_filetype(book)
            json.dump(self._book_list_expanded, f)
        return self._book_list_expanded
        """
    def process_by_filetype(self, book):
        if book.endswith(".epub"):
            epub = self.process_epub(book)
            return self.extract_metadata_epub(epub)
        elif book.endswith(".mobi"):
            return self.extract_metadata_mobi(book)
    @staticmethod
-    def process_book(book):
+    def process_epub(book):
        """Return dictionary of epub file contents"""
        book = zipfile.ZipFile(book, "r")
        details = {}
        book = zipfile.ZipFile(book, "r")
        with book as book_zip:
            details["files"] = []
            details["path"] = book.filename
@@ -86,7 +93,7 @@ class Catalogue:
                    details["files"].append(match.string)
        return details
-    def extract_metadata(self, book):
+    def extract_metadata_epub(self, book):
        """
        Return extracted metadata and cover picture
        book['path'] == Full path to ebook file
@@ -94,7 +101,7 @@ class Catalogue:
        """
        book_zip = zipfile.ZipFile(book["path"], "r")
        with book_zip as f:
-            content = self.extract_content(book_zip, book)
+            content = self.extract_content(f, book)
            soup = BeautifulSoup(content, "lxml")
            title = soup.find("dc:title")
            if title is None:
@@ -105,13 +112,27 @@ class Catalogue:
            if author is not None:
                author = author.contents[0]
            try:
-                cover = self.extract_cover_image(book_zip, book)
+                cover = self.extract_cover_image(f, book)
            except IndexError:
                # cover = self.extract_cover_html(book_zip, book)
                cover = DuckDuckGo().image_result(title)
            book_details = [title, author, cover, book["path"]]
        return book_details
    @staticmethod
    def extract_metadata_mobi(book):
        book = Mobi(book)
        book.parse()
        try:
            cover_image = book.readImageRecord(0)
        except KeyError:
            cover_image = None
        title = book.title().decode("utf-8")
        author = book.author().decode(
            "utf-8"
        )  # TODO some files are still passing encoded data for author.
        return [title, author, cover_image, book.f.name]
    def extract_content(self, book_zip, book):
        """
        Opens epub as zip file filters then stores as list any files matching opf_regx
@@ -161,12 +182,12 @@ class Catalogue:
        Gets a list of new files via compare_shelf_current.
        Iterates over list and inserts new books into database.
        """
        # TODO Refactor metadata extraction into process_book call to more easily handle additional formats
        book_list = self.compare_shelf_current()
        db = Storage(self.config)
        for book in book_list:
-            book = self.process_book(book)
+            book = self.process_by_filetype(book)
-            extracted = self.extract_metadata(book)
+            db.insert_book(book)
            db.insert_book(extracted)
        inserted = db.commit()
        if inserted is not True:
            print(inserted)
`@@ -1 +1 @@`
	`{"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"}`	`{"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "/home/raelon/Books", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"}`