First push with mobi support

This commit is contained in:
Raelon Masters
2020-05-31 01:20:52 -04:00
parent ac1a166ae5
commit 7ee9c57ed1
6 changed files with 43 additions and 21 deletions

2
config.json vendored
View File

@@ -1 +1 @@
{"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"} {"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "/home/raelon/Books", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"}

2
importBooks vendored
View File

@@ -1,4 +1,4 @@
#!python #!/usr/bin/env python
import pathlib import pathlib
import sys import sys

2
installer vendored
View File

@@ -1,4 +1,4 @@
#!python #!/usr/bin/ env python
import json import json
import os import os
import pathlib import pathlib

2
pyproject.toml vendored
View File

@@ -7,4 +7,4 @@ use_parentheses = true
# NOTE: the known_third_party setting is managed by # NOTE: the known_third_party setting is managed by
# seed-isort-config and should not be modified directly. # seed-isort-config and should not be modified directly.
# Any changes made to this setting will be overwritten. # Any changes made to this setting will be overwritten.
known_third_party = ["backend", "bs4", "django", "interface", "prompt_toolkit", "psycopg2", "pyfiglet", "requests"] known_third_party = ["backend", "bs4", "django", "interface", "mobi", "prompt_toolkit", "psycopg2", "pyfiglet", "requests"]

1
requirements.txt vendored
View File

@@ -17,3 +17,4 @@ psycopg2-binary
prompt_toolkit prompt_toolkit
psutil psutil
pyfiglet pyfiglet
mobi-python

View File

@@ -7,6 +7,8 @@ import zipfile
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from mobi import Mobi
from .api_hooks import DuckDuckGo from .api_hooks import DuckDuckGo
from .config import Config from .config import Config
from .storage import Storage from .storage import Storage
@@ -26,8 +28,7 @@ class Catalogue:
self.html_regx = re.compile(r"\.html") self.html_regx = re.compile(r"\.html")
self.root_dir = config.root self.root_dir = config.root
self.book_folder = config.book_path self.book_folder = config.book_path
self.book_shelf = config.book_shelf # self.book_shelf = config.book_shelf
self._book_list_expanded = None
self.books = None self.books = None
self.db_pointer = config.catalogue_db self.db_pointer = config.catalogue_db
self.config = config self.config = config
@@ -57,23 +58,29 @@ class Catalogue:
:returns self._book_list_expanded: json string containing all book metadata :returns self._book_list_expanded: json string containing all book metadata
""" """
self.scan_folder() # Populate file list self.scan_folder() # Populate file list
regx = re.compile(r"\.epub") regx = re.compile(r"\.epub|\.mobi")
try: try:
self.books = list(filter(regx.search, filter(None, self.file_list))) self.books = list(filter(regx.search, filter(None, self.file_list)))
except TypeError as e: except TypeError as e:
print(e) print(e)
self._book_list_expanded = {} """
with open(self.book_shelf, "w") as f:
for book in self.books: for book in self.books:
self._book_list_expanded[book] = self.process_book(book) self._book_list_expanded[book] = self.process_by_filetype(book)
json.dump(self._book_list_expanded, f)
return self._book_list_expanded return self._book_list_expanded
"""
def process_by_filetype(self, book):
if book.endswith(".epub"):
epub = self.process_epub(book)
return self.extract_metadata_epub(epub)
elif book.endswith(".mobi"):
return self.extract_metadata_mobi(book)
@staticmethod @staticmethod
def process_book(book): def process_epub(book):
"""Return dictionary of epub file contents""" """Return dictionary of epub file contents"""
book = zipfile.ZipFile(book, "r")
details = {} details = {}
book = zipfile.ZipFile(book, "r")
with book as book_zip: with book as book_zip:
details["files"] = [] details["files"] = []
details["path"] = book.filename details["path"] = book.filename
@@ -86,7 +93,7 @@ class Catalogue:
details["files"].append(match.string) details["files"].append(match.string)
return details return details
def extract_metadata(self, book): def extract_metadata_epub(self, book):
""" """
Return extracted metadata and cover picture Return extracted metadata and cover picture
book['path'] == Full path to ebook file book['path'] == Full path to ebook file
@@ -94,7 +101,7 @@ class Catalogue:
""" """
book_zip = zipfile.ZipFile(book["path"], "r") book_zip = zipfile.ZipFile(book["path"], "r")
with book_zip as f: with book_zip as f:
content = self.extract_content(book_zip, book) content = self.extract_content(f, book)
soup = BeautifulSoup(content, "lxml") soup = BeautifulSoup(content, "lxml")
title = soup.find("dc:title") title = soup.find("dc:title")
if title is None: if title is None:
@@ -105,13 +112,27 @@ class Catalogue:
if author is not None: if author is not None:
author = author.contents[0] author = author.contents[0]
try: try:
cover = self.extract_cover_image(book_zip, book) cover = self.extract_cover_image(f, book)
except IndexError: except IndexError:
# cover = self.extract_cover_html(book_zip, book) # cover = self.extract_cover_html(book_zip, book)
cover = DuckDuckGo().image_result(title) cover = DuckDuckGo().image_result(title)
book_details = [title, author, cover, book["path"]] book_details = [title, author, cover, book["path"]]
return book_details return book_details
@staticmethod
def extract_metadata_mobi(book):
book = Mobi(book)
book.parse()
try:
cover_image = book.readImageRecord(0)
except KeyError:
cover_image = None
title = book.title().decode("utf-8")
author = book.author().decode(
"utf-8"
) # TODO some files are still passing encoded data for author.
return [title, author, cover_image, book.f.name]
def extract_content(self, book_zip, book): def extract_content(self, book_zip, book):
""" """
Opens epub as zip file filters then stores as list any files matching opf_regx Opens epub as zip file filters then stores as list any files matching opf_regx
@@ -161,12 +182,12 @@ class Catalogue:
Gets a list of new files via compare_shelf_current. Gets a list of new files via compare_shelf_current.
Iterates over list and inserts new books into database. Iterates over list and inserts new books into database.
""" """
# TODO Refactor metadata extraction into process_book call to more easily handle additional formats
book_list = self.compare_shelf_current() book_list = self.compare_shelf_current()
db = Storage(self.config) db = Storage(self.config)
for book in book_list: for book in book_list:
book = self.process_book(book) book = self.process_by_filetype(book)
extracted = self.extract_metadata(book) db.insert_book(book)
db.insert_book(extracted)
inserted = db.commit() inserted = db.commit()
if inserted is not True: if inserted is not True:
print(inserted) print(inserted)