First push with mobi support

This commit is contained in:
Raelon Masters
2020-05-31 01:20:52 -04:00
parent ac1a166ae5
commit 7ee9c57ed1
6 changed files with 43 additions and 21 deletions

2
config.json vendored
View File

@@ -1 +1 @@
{"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"}
{"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "/home/raelon/Books", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"}

2
importBooks vendored
View File

@@ -1,4 +1,4 @@
#!python
#!/usr/bin/env python
import pathlib
import sys

2
installer vendored
View File

@@ -1,4 +1,4 @@
#!python
#!/usr/bin/ env python
import json
import os
import pathlib

2
pyproject.toml vendored
View File

@@ -7,4 +7,4 @@ use_parentheses = true
# NOTE: the known_third_party setting is managed by
# seed-isort-config and should not be modified directly.
# Any changes made to this setting will be overwritten.
known_third_party = ["backend", "bs4", "django", "interface", "prompt_toolkit", "psycopg2", "pyfiglet", "requests"]
known_third_party = ["backend", "bs4", "django", "interface", "mobi", "prompt_toolkit", "psycopg2", "pyfiglet", "requests"]

1
requirements.txt vendored
View File

@@ -17,3 +17,4 @@ psycopg2-binary
prompt_toolkit
psutil
pyfiglet
mobi-python

View File

@@ -7,6 +7,8 @@ import zipfile
from bs4 import BeautifulSoup
from mobi import Mobi
from .api_hooks import DuckDuckGo
from .config import Config
from .storage import Storage
@@ -26,8 +28,7 @@ class Catalogue:
self.html_regx = re.compile(r"\.html")
self.root_dir = config.root
self.book_folder = config.book_path
self.book_shelf = config.book_shelf
self._book_list_expanded = None
# self.book_shelf = config.book_shelf
self.books = None
self.db_pointer = config.catalogue_db
self.config = config
@@ -56,24 +57,30 @@ class Catalogue:
:returns self._book_list_expanded: json string containing all book metadata
"""
self.scan_folder() # Populate file list
regx = re.compile(r"\.epub")
self.scan_folder() # Populate file list
regx = re.compile(r"\.epub|\.mobi")
try:
self.books = list(filter(regx.search, filter(None, self.file_list)))
except TypeError as e:
print(e)
self._book_list_expanded = {}
with open(self.book_shelf, "w") as f:
for book in self.books:
self._book_list_expanded[book] = self.process_book(book)
json.dump(self._book_list_expanded, f)
"""
for book in self.books:
self._book_list_expanded[book] = self.process_by_filetype(book)
return self._book_list_expanded
"""
def process_by_filetype(self, book):
if book.endswith(".epub"):
epub = self.process_epub(book)
return self.extract_metadata_epub(epub)
elif book.endswith(".mobi"):
return self.extract_metadata_mobi(book)
@staticmethod
def process_book(book):
def process_epub(book):
"""Return dictionary of epub file contents"""
book = zipfile.ZipFile(book, "r")
details = {}
book = zipfile.ZipFile(book, "r")
with book as book_zip:
details["files"] = []
details["path"] = book.filename
@@ -86,7 +93,7 @@ class Catalogue:
details["files"].append(match.string)
return details
def extract_metadata(self, book):
def extract_metadata_epub(self, book):
"""
Return extracted metadata and cover picture
book['path'] == Full path to ebook file
@@ -94,7 +101,7 @@ class Catalogue:
"""
book_zip = zipfile.ZipFile(book["path"], "r")
with book_zip as f:
content = self.extract_content(book_zip, book)
content = self.extract_content(f, book)
soup = BeautifulSoup(content, "lxml")
title = soup.find("dc:title")
if title is None:
@@ -105,13 +112,27 @@ class Catalogue:
if author is not None:
author = author.contents[0]
try:
cover = self.extract_cover_image(book_zip, book)
cover = self.extract_cover_image(f, book)
except IndexError:
# cover = self.extract_cover_html(book_zip, book)
cover = DuckDuckGo().image_result(title)
book_details = [title, author, cover, book["path"]]
return book_details
@staticmethod
def extract_metadata_mobi(book):
book = Mobi(book)
book.parse()
try:
cover_image = book.readImageRecord(0)
except KeyError:
cover_image = None
title = book.title().decode("utf-8")
author = book.author().decode(
"utf-8"
) # TODO some files are still passing encoded data for author.
return [title, author, cover_image, book.f.name]
def extract_content(self, book_zip, book):
"""
Opens epub as zip file filters then stores as list any files matching opf_regx
@@ -161,12 +182,12 @@ class Catalogue:
Gets a list of new files via compare_shelf_current.
Iterates over list and inserts new books into database.
"""
# TODO Refactor metadata extraction into process_book call to more easily handle additional formats
book_list = self.compare_shelf_current()
db = Storage(self.config)
for book in book_list:
book = self.process_book(book)
extracted = self.extract_metadata(book)
db.insert_book(extracted)
book = self.process_by_filetype(book)
db.insert_book(book)
inserted = db.commit()
if inserted is not True:
print(inserted)