mirror of
https://github.com/th3r00t/pyShelf.git
synced 2026-04-28 01:59:35 -04:00
First push with mobi support
This commit is contained in:
@@ -7,6 +7,8 @@ import zipfile
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from mobi import Mobi
|
||||
|
||||
from .api_hooks import DuckDuckGo
|
||||
from .config import Config
|
||||
from .storage import Storage
|
||||
@@ -26,8 +28,7 @@ class Catalogue:
|
||||
self.html_regx = re.compile(r"\.html")
|
||||
self.root_dir = config.root
|
||||
self.book_folder = config.book_path
|
||||
self.book_shelf = config.book_shelf
|
||||
self._book_list_expanded = None
|
||||
# self.book_shelf = config.book_shelf
|
||||
self.books = None
|
||||
self.db_pointer = config.catalogue_db
|
||||
self.config = config
|
||||
@@ -56,24 +57,30 @@ class Catalogue:
|
||||
|
||||
:returns self._book_list_expanded: json string containing all book metadata
|
||||
"""
|
||||
self.scan_folder() # Populate file list
|
||||
regx = re.compile(r"\.epub")
|
||||
self.scan_folder() # Populate file list
|
||||
regx = re.compile(r"\.epub|\.mobi")
|
||||
try:
|
||||
self.books = list(filter(regx.search, filter(None, self.file_list)))
|
||||
except TypeError as e:
|
||||
print(e)
|
||||
self._book_list_expanded = {}
|
||||
with open(self.book_shelf, "w") as f:
|
||||
for book in self.books:
|
||||
self._book_list_expanded[book] = self.process_book(book)
|
||||
json.dump(self._book_list_expanded, f)
|
||||
"""
|
||||
for book in self.books:
|
||||
self._book_list_expanded[book] = self.process_by_filetype(book)
|
||||
return self._book_list_expanded
|
||||
"""
|
||||
|
||||
def process_by_filetype(self, book):
|
||||
if book.endswith(".epub"):
|
||||
epub = self.process_epub(book)
|
||||
return self.extract_metadata_epub(epub)
|
||||
elif book.endswith(".mobi"):
|
||||
return self.extract_metadata_mobi(book)
|
||||
|
||||
@staticmethod
|
||||
def process_book(book):
|
||||
def process_epub(book):
|
||||
"""Return dictionary of epub file contents"""
|
||||
book = zipfile.ZipFile(book, "r")
|
||||
details = {}
|
||||
book = zipfile.ZipFile(book, "r")
|
||||
with book as book_zip:
|
||||
details["files"] = []
|
||||
details["path"] = book.filename
|
||||
@@ -86,7 +93,7 @@ class Catalogue:
|
||||
details["files"].append(match.string)
|
||||
return details
|
||||
|
||||
def extract_metadata(self, book):
|
||||
def extract_metadata_epub(self, book):
|
||||
"""
|
||||
Return extracted metadata and cover picture
|
||||
book['path'] == Full path to ebook file
|
||||
@@ -94,7 +101,7 @@ class Catalogue:
|
||||
"""
|
||||
book_zip = zipfile.ZipFile(book["path"], "r")
|
||||
with book_zip as f:
|
||||
content = self.extract_content(book_zip, book)
|
||||
content = self.extract_content(f, book)
|
||||
soup = BeautifulSoup(content, "lxml")
|
||||
title = soup.find("dc:title")
|
||||
if title is None:
|
||||
@@ -105,13 +112,27 @@ class Catalogue:
|
||||
if author is not None:
|
||||
author = author.contents[0]
|
||||
try:
|
||||
cover = self.extract_cover_image(book_zip, book)
|
||||
cover = self.extract_cover_image(f, book)
|
||||
except IndexError:
|
||||
# cover = self.extract_cover_html(book_zip, book)
|
||||
cover = DuckDuckGo().image_result(title)
|
||||
book_details = [title, author, cover, book["path"]]
|
||||
return book_details
|
||||
|
||||
@staticmethod
|
||||
def extract_metadata_mobi(book):
|
||||
book = Mobi(book)
|
||||
book.parse()
|
||||
try:
|
||||
cover_image = book.readImageRecord(0)
|
||||
except KeyError:
|
||||
cover_image = None
|
||||
title = book.title().decode("utf-8")
|
||||
author = book.author().decode(
|
||||
"utf-8"
|
||||
) # TODO some files are still passing encoded data for author.
|
||||
return [title, author, cover_image, book.f.name]
|
||||
|
||||
def extract_content(self, book_zip, book):
|
||||
"""
|
||||
Opens epub as zip file filters then stores as list any files matching opf_regx
|
||||
@@ -161,12 +182,12 @@ class Catalogue:
|
||||
Gets a list of new files via compare_shelf_current.
|
||||
Iterates over list and inserts new books into database.
|
||||
"""
|
||||
# TODO Refactor metadata extraction into process_book call to more easily handle additional formats
|
||||
book_list = self.compare_shelf_current()
|
||||
db = Storage(self.config)
|
||||
for book in book_list:
|
||||
book = self.process_book(book)
|
||||
extracted = self.extract_metadata(book)
|
||||
db.insert_book(extracted)
|
||||
book = self.process_by_filetype(book)
|
||||
db.insert_book(book)
|
||||
inserted = db.commit()
|
||||
if inserted is not True:
|
||||
print(inserted)
|
||||
|
||||
Reference in New Issue
Block a user