mirror of
https://github.com/th3r00t/pyShelf.git
synced 2026-04-28 01:59:35 -04:00
Get cover image from epub, or DuckDUckGo
This commit is contained in:
@@ -6,6 +6,8 @@ import zipfile
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from config import Config
|
from config import Config
|
||||||
|
from api_hooks import DuckDuckGo
|
||||||
|
|
||||||
config = Config()
|
config = Config()
|
||||||
|
|
||||||
|
|
||||||
@@ -14,6 +16,9 @@ class Catalogue:
|
|||||||
"""Step One: filter_books"""
|
"""Step One: filter_books"""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.file_list = []
|
self.file_list = []
|
||||||
|
self.opf_regx = re.compile(r'\.opf')
|
||||||
|
self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
|
||||||
|
self.html_regx = re.compile(r'\.html')
|
||||||
with open(config.book_shelf, 'r') as f:
|
with open(config.book_shelf, 'r') as f:
|
||||||
try:
|
try:
|
||||||
self.catalogue = json.load(f)
|
self.catalogue = json.load(f)
|
||||||
@@ -79,18 +84,46 @@ class Catalogue:
|
|||||||
book['files'] == list of files from self.process_book(book)
|
book['files'] == list of files from self.process_book(book)
|
||||||
"""
|
"""
|
||||||
book_zip = zipfile.ZipFile(book['path'], 'r')
|
book_zip = zipfile.ZipFile(book['path'], 'r')
|
||||||
opf_regx, cover_regx = re.compile(r'\.opf'), re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
|
|
||||||
with book_zip as f:
|
with book_zip as f:
|
||||||
content = book_zip.open(list(filter(opf_regx.search, book['files']))[0])
|
content = self.extract_content(book_zip, book)
|
||||||
cover = book_zip.open(list(filter(cover_regx.search, book['files']))[0])
|
soup = BeautifulSoup(content, "lxml")
|
||||||
# TODO Handle books that have no Cover Image
|
|
||||||
## TODO Handle books with html covers
|
|
||||||
soup = BeautifulSoup(content, "xml")
|
|
||||||
title = soup.find("dc:title")
|
title = soup.find("dc:title")
|
||||||
|
if title == None:
|
||||||
|
title = book['path'].split('/')[-1].rsplit('.', 1)[0]
|
||||||
author = soup.find("dc:creator")
|
author = soup.find("dc:creator")
|
||||||
|
try: cover = self.extract_cover_image(book_zip, book)
|
||||||
|
except IndexError:
|
||||||
|
# cover = self.extract_cover_html(book_zip, book)
|
||||||
|
cover = DuckDuckGo().image_result(title)
|
||||||
book_details = [title.contents[0], author.contents[0], cover]
|
book_details = [title.contents[0], author.contents[0], cover]
|
||||||
return book_details
|
return book_details
|
||||||
|
|
||||||
|
def extract_content(self, book_zip, book):
|
||||||
|
content = book_zip.open(
|
||||||
|
list(
|
||||||
|
filter(self.opf_regx.search, book['files'])
|
||||||
|
)[0]
|
||||||
|
)
|
||||||
|
return content
|
||||||
|
|
||||||
|
def extract_cover_html(self, book_zip, book):
|
||||||
|
cover = book_zip.open(
|
||||||
|
list(
|
||||||
|
filter(self.html_regx.search, book['files'])
|
||||||
|
)[0]
|
||||||
|
)
|
||||||
|
return cover
|
||||||
|
|
||||||
|
def extract_cover_image(self, book_zip, book):
|
||||||
|
# TODO Handle books that have no Cover Image
|
||||||
|
# TODO Handle books with html covers
|
||||||
|
cover = book_zip.open(
|
||||||
|
list(
|
||||||
|
filter(self.cover_regx.search, book['files'])
|
||||||
|
)[0]
|
||||||
|
)
|
||||||
|
return cover
|
||||||
|
|
||||||
def compare_shelf_current(self):
|
def compare_shelf_current(self):
|
||||||
try:
|
try:
|
||||||
self.books
|
self.books
|
||||||
|
|||||||
Reference in New Issue
Block a user