Get cover image from epub, or DuckDUckGo

2026-06-27 07:21:38 -04:00 · 2019-10-04 17:15:42 -04:00
parent d7f0e2dab5
commit acaaa7b76e
1 changed files with 39 additions and 6 deletions
--- a/lib/library.py
+++ b/lib/library.py
@@ -6,6 +6,8 @@ import zipfile
 from PIL import Image
 from bs4 import BeautifulSoup
 from config import Config
 from api_hooks import DuckDuckGo
 config = Config()
@@ -14,6 +16,9 @@ class Catalogue:
    """Step One: filter_books"""
    def __init__(self):
        self.file_list = []
        self.opf_regx = re.compile(r'\.opf')
        self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
        self.html_regx = re.compile(r'\.html')
        with open(config.book_shelf, 'r') as f:
            try:
                self.catalogue = json.load(f)
@@ -79,18 +84,46 @@ class Catalogue:
        book['files'] == list of files from self.process_book(book)
        """
        book_zip = zipfile.ZipFile(book['path'], 'r')
        opf_regx, cover_regx = re.compile(r'\.opf'), re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
        with book_zip as f:
-            content = book_zip.open(list(filter(opf_regx.search, book['files']))[0])
+            content = self.extract_content(book_zip, book)
-            cover = book_zip.open(list(filter(cover_regx.search, book['files']))[0])
+            soup = BeautifulSoup(content, "lxml")
            # TODO Handle books that have no Cover Image
            ## TODO Handle books with html covers
            soup = BeautifulSoup(content, "xml")
            title = soup.find("dc:title")
            if title == None:
                title = book['path'].split('/')[-1].rsplit('.', 1)[0]
            author = soup.find("dc:creator")
            try: cover = self.extract_cover_image(book_zip, book)
            except IndexError:
                # cover = self.extract_cover_html(book_zip, book)
                cover = DuckDuckGo().image_result(title)
            book_details = [title.contents[0], author.contents[0], cover]
        return book_details
    def extract_content(self, book_zip, book):
        content = book_zip.open(
            list(
                filter(self.opf_regx.search, book['files'])
            )[0]
        )
        return content
    def extract_cover_html(self, book_zip, book):
        cover = book_zip.open(
            list(
                filter(self.html_regx.search, book['files'])
            )[0]
        )
        return cover
    def extract_cover_image(self, book_zip, book):
        # TODO Handle books that have no Cover Image
        # TODO Handle books with html covers
        cover = book_zip.open(
            list(
                filter(self.cover_regx.search, book['files'])
            )[0]
        )
        return cover
    def compare_shelf_current(self):
        try:
            self.books