mirror of
https://github.com/th3r00t/pyShelf.git
synced 2026-04-28 01:59:35 -04:00
Get cover image from epub, or DuckDUckGo
This commit is contained in:
@@ -6,6 +6,8 @@ import zipfile
|
||||
from PIL import Image
|
||||
from bs4 import BeautifulSoup
|
||||
from config import Config
|
||||
from api_hooks import DuckDuckGo
|
||||
|
||||
config = Config()
|
||||
|
||||
|
||||
@@ -14,6 +16,9 @@ class Catalogue:
|
||||
"""Step One: filter_books"""
|
||||
def __init__(self):
|
||||
self.file_list = []
|
||||
self.opf_regx = re.compile(r'\.opf')
|
||||
self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
|
||||
self.html_regx = re.compile(r'\.html')
|
||||
with open(config.book_shelf, 'r') as f:
|
||||
try:
|
||||
self.catalogue = json.load(f)
|
||||
@@ -79,18 +84,46 @@ class Catalogue:
|
||||
book['files'] == list of files from self.process_book(book)
|
||||
"""
|
||||
book_zip = zipfile.ZipFile(book['path'], 'r')
|
||||
opf_regx, cover_regx = re.compile(r'\.opf'), re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
|
||||
with book_zip as f:
|
||||
content = book_zip.open(list(filter(opf_regx.search, book['files']))[0])
|
||||
cover = book_zip.open(list(filter(cover_regx.search, book['files']))[0])
|
||||
# TODO Handle books that have no Cover Image
|
||||
## TODO Handle books with html covers
|
||||
soup = BeautifulSoup(content, "xml")
|
||||
content = self.extract_content(book_zip, book)
|
||||
soup = BeautifulSoup(content, "lxml")
|
||||
title = soup.find("dc:title")
|
||||
if title == None:
|
||||
title = book['path'].split('/')[-1].rsplit('.', 1)[0]
|
||||
author = soup.find("dc:creator")
|
||||
try: cover = self.extract_cover_image(book_zip, book)
|
||||
except IndexError:
|
||||
# cover = self.extract_cover_html(book_zip, book)
|
||||
cover = DuckDuckGo().image_result(title)
|
||||
book_details = [title.contents[0], author.contents[0], cover]
|
||||
return book_details
|
||||
|
||||
def extract_content(self, book_zip, book):
|
||||
content = book_zip.open(
|
||||
list(
|
||||
filter(self.opf_regx.search, book['files'])
|
||||
)[0]
|
||||
)
|
||||
return content
|
||||
|
||||
def extract_cover_html(self, book_zip, book):
|
||||
cover = book_zip.open(
|
||||
list(
|
||||
filter(self.html_regx.search, book['files'])
|
||||
)[0]
|
||||
)
|
||||
return cover
|
||||
|
||||
def extract_cover_image(self, book_zip, book):
|
||||
# TODO Handle books that have no Cover Image
|
||||
# TODO Handle books with html covers
|
||||
cover = book_zip.open(
|
||||
list(
|
||||
filter(self.cover_regx.search, book['files'])
|
||||
)[0]
|
||||
)
|
||||
return cover
|
||||
|
||||
def compare_shelf_current(self):
|
||||
try:
|
||||
self.books
|
||||
|
||||
Reference in New Issue
Block a user