Refactored for better packaging

2026-04-28 01:59:35 -04:00 · 2019-11-20 00:49:40 -05:00
parent 98fd28dc3f
commit 37fe006da5
169 changed files with 28672 additions and 2 deletions
--- a/src/backend/lib/library.py
+++ b/src/backend/lib/library.py
@@ -0,0 +1,154 @@
+#!/usr/bin/python
+import json
+import os
+import re
+import zipfile
+
+from bs4 import BeautifulSoup
+from PIL import Image
+
+from .api_hooks import DuckDuckGo
+from .config import Config
+from .storage import Storage
+
+# config = Config()
+
+
+class Catalogue:
+    """Decodes and stores book information"""
+
+    """Step One: filter_books"""
+
+    def __init__(self, config):
+        self.file_list = []
+        self.opf_regx = re.compile(r"\.opf")
+        self.cover_regx = re.compile(r"\.jpg|\.jpeg|\.png|\.bmp|\.gif")
+        self.html_regx = re.compile(r"\.html")
+        self.root_dir = config.root
+        self.book_folder = config.book_path
+        self.book_shelf = config.book_shelf
+        self._book_list_expanded = None
+        self.books = None
+
+    def scan_folder(self, _path=None):
+        if _path is not None:
+            folder = _path
+        elif os.path.isdir(self.root_dir + "/" + self.book_folder):
+            folder = self.root_dir + "/" + self.book_folder
+        else:
+            folder = self.book_folder
+        for f in os.listdir(folder):
+            _path = os.path.abspath(folder + "/" + f)
+            _is_dir = os.path.isdir(_path.strip() + "/")
+            if _is_dir:
+                self.file_list.append(self.scan_folder(_path))
+            self.file_list.append(_path)
+
+    def filter_books(self):
+        """
+            Scan book folder recursively for epub files
+            filter_books(0) -> Catalogue.books
+            filter_books(1) -> self.books[]
+            :param ret: 0 -> create class property -> dump json
+            :param ret: 1 -> create & return class property
+        """
+        self.scan_folder()
+        regx = re.compile(r"\.epub")
+        try:
+            self.books = list(filter(regx.search, filter(None, self.file_list)))
+        except TypeError as e:
+            print(e)
+        self._book_list_expanded = {}
+        with open(self.book_shelf, "w") as f:
+            for book in self.books:
+                self._book_list_expanded[book] = self.process_book(book)
+            json.dump(self._book_list_expanded, f)
+        return self._book_list_expanded
+
+    @staticmethod
+    def process_book(book):
+        """Return dictionary of epub file contents"""
+        book = zipfile.ZipFile(book, "r")
+        details = {}
+        with book as book_zip:
+            details["files"] = []
+            details["path"] = book.filename
+            expanded = book_zip.infolist()
+            regx = re.compile(r"\.opf|cover")
+            for i in expanded:
+                match = re.search(regx, i.filename)
+                if match:
+                    # Returns zip file location of requested files
+                    details["files"].append(match.string)
+        return details
+
+    def extract_metadata(self, book):
+        """
+        Return extracted metadata and cover picture
+        book['path'] == Full path to ebook file
+        book['files'] == list of files from self.process_book(book)
+        """
+        book_zip = zipfile.ZipFile(book["path"], "r")
+        with book_zip as f:
+            content = self.extract_content(book_zip, book)
+            soup = BeautifulSoup(content, "lxml")
+            title = soup.find("dc:title")
+            if title is None:
+                title = book["path"].split("/")[-1].rsplit(".", 1)[0]
+            else:
+                title = title.contents[0]
+            author = soup.find("dc:creator")
+            if author is not None:
+                author = author.contents[0]
+            try:
+                cover = self.extract_cover_image(book_zip, book)
+            except IndexError:
+                # cover = self.extract_cover_html(book_zip, book)
+                cover = DuckDuckGo().image_result(title)
+            book_details = [title, author, cover, book["path"]]
+        return book_details
+
+    def extract_content(self, book_zip, book):
+        content = book_zip.open(list(filter(self.opf_regx.search, book["files"]))[0])
+        return content
+
+    def extract_cover_html(self, book_zip, book):
+        cover = book_zip.open(list(filter(self.html_regx.search, book["files"]))[0])
+        return cover
+
+    def extract_cover_image(self, book_zip, book):
+        cover = book_zip.open(list(filter(self.cover_regx.search, book["files"]))[0])
+        try:
+            cover = book_zip.read(cover.name)
+            return cover
+        except KeyError:
+            return False
+
+    def compare_shelf_current(self):
+        db = Storage()
+        stored = db.book_paths_list()
+        closed = db.close()
+        if self.books is None:
+            self.filter_books()
+        on_disk, in_storage = [], []
+        for _x in self.books:
+            on_disk.append(_x)
+        for _y in stored:
+            in_storage.append(_y[0])
+        a, b, = set(on_disk), set(in_storage)
+        c = set.difference(a, b)
+        return c
+
+    def import_books(self, list=None):
+        book_list = self.compare_shelf_current()
+        db = Storage()
+        for book in book_list:
+            book = self.process_book(book)
+            extracted = self.extract_metadata(book)
+            db.insert_book(extracted)
+        inserted = db.commit()
+        if inserted is not True:
+            print(inserted)
+            if input("Continue ? y/n") == "y":
+                pass
+        db.close()