Merge pull request #42 from th3r00t/mobi

Now with mobi support!
This commit is contained in:
th3r00t
2020-06-01 09:12:44 -04:00
committed by GitHub
9 changed files with 50 additions and 27 deletions

2
config.json vendored
View File

@@ -1 +1 @@
{"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "/srv/Books", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"} {"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "/srv/Books", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"}

2
importBooks vendored
View File

@@ -1,4 +1,4 @@
#!python #!/usr/bin/env python
import pathlib import pathlib
import sys import sys

2
installer vendored
View File

@@ -1,4 +1,4 @@
#!python #!/usr/bin/ env python
import json import json
import os import os
import pathlib import pathlib

2
makeCollections vendored
View File

@@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/env python
import pathlib import pathlib
import sys import sys

2
pyproject.toml vendored
View File

@@ -7,4 +7,4 @@ use_parentheses = true
# NOTE: the known_third_party setting is managed by # NOTE: the known_third_party setting is managed by
# seed-isort-config and should not be modified directly. # seed-isort-config and should not be modified directly.
# Any changes made to this setting will be overwritten. # Any changes made to this setting will be overwritten.
known_third_party = ["backend", "bs4", "django", "interface", "prompt_toolkit", "psycopg2", "pyfiglet", "requests"] known_third_party = ["backend", "bs4", "django", "interface", "mobi", "prompt_toolkit", "psycopg2", "pyfiglet", "requests"]

3
requirements.txt vendored
View File

@@ -16,4 +16,5 @@ django-debug-toolbar
psycopg2-binary psycopg2-binary
prompt_toolkit prompt_toolkit
psutil psutil
pyfiglet pyfiglet
mobi-python

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/env python
import json import json
import os import os
import pathlib import pathlib
@@ -7,6 +7,8 @@ import zipfile
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from mobi import Mobi
from .api_hooks import DuckDuckGo from .api_hooks import DuckDuckGo
from .config import Config from .config import Config
from .storage import Storage from .storage import Storage
@@ -26,8 +28,7 @@ class Catalogue:
self.html_regx = re.compile(r"\.html") self.html_regx = re.compile(r"\.html")
self.root_dir = config.root self.root_dir = config.root
self.book_folder = config.book_path self.book_folder = config.book_path
self.book_shelf = config.book_shelf # self.book_shelf = config.book_shelf
self._book_list_expanded = None
self.books = None self.books = None
self.db_pointer = config.catalogue_db self.db_pointer = config.catalogue_db
self.config = config self.config = config
@@ -56,24 +57,30 @@ class Catalogue:
:returns self._book_list_expanded: json string containing all book metadata :returns self._book_list_expanded: json string containing all book metadata
""" """
self.scan_folder() # Populate file list self.scan_folder() # Populate file list
regx = re.compile(r"\.epub") regx = re.compile(r"\.epub|\.mobi")
try: try:
self.books = list(filter(regx.search, filter(None, self.file_list))) self.books = list(filter(regx.search, filter(None, self.file_list)))
except TypeError as e: except TypeError as e:
print(e) print(e)
self._book_list_expanded = {} """
with open(self.book_shelf, "w") as f: for book in self.books:
for book in self.books: self._book_list_expanded[book] = self.process_by_filetype(book)
self._book_list_expanded[book] = self.process_book(book)
json.dump(self._book_list_expanded, f)
return self._book_list_expanded return self._book_list_expanded
"""
def process_by_filetype(self, book):
if book.endswith(".epub"):
epub = self.process_epub(book)
return self.extract_metadata_epub(epub)
elif book.endswith(".mobi"):
return self.extract_metadata_mobi(book)
@staticmethod @staticmethod
def process_book(book): def process_epub(book):
"""Return dictionary of epub file contents""" """Return dictionary of epub file contents"""
book = zipfile.ZipFile(book, "r")
details = {} details = {}
book = zipfile.ZipFile(book, "r")
with book as book_zip: with book as book_zip:
details["files"] = [] details["files"] = []
details["path"] = book.filename details["path"] = book.filename
@@ -86,7 +93,7 @@ class Catalogue:
details["files"].append(match.string) details["files"].append(match.string)
return details return details
def extract_metadata(self, book): def extract_metadata_epub(self, book):
""" """
Return extracted metadata and cover picture Return extracted metadata and cover picture
book['path'] == Full path to ebook file book['path'] == Full path to ebook file
@@ -94,7 +101,7 @@ class Catalogue:
""" """
book_zip = zipfile.ZipFile(book["path"], "r") book_zip = zipfile.ZipFile(book["path"], "r")
with book_zip as f: with book_zip as f:
content = self.extract_content(book_zip, book) content = self.extract_content(f, book)
soup = BeautifulSoup(content, "lxml") soup = BeautifulSoup(content, "lxml")
title = soup.find("dc:title") title = soup.find("dc:title")
if title is None: if title is None:
@@ -105,13 +112,27 @@ class Catalogue:
if author is not None: if author is not None:
author = author.contents[0] author = author.contents[0]
try: try:
cover = self.extract_cover_image(book_zip, book) cover = self.extract_cover_image(f, book)
except IndexError: except IndexError:
# cover = self.extract_cover_html(book_zip, book) # cover = self.extract_cover_html(book_zip, book)
cover = DuckDuckGo().image_result(title) cover = DuckDuckGo().image_result(title)
book_details = [title, author, cover, book["path"]] book_details = [title, author, cover, book["path"]]
return book_details return book_details
@staticmethod
def extract_metadata_mobi(book):
book = Mobi(book)
book.parse()
try:
cover_image = book.readImageRecord(0)
except KeyError:
cover_image = None
title = book.title().decode("utf-8")
author = book.author().decode("utf-8")
breakpoint()
# TODO some files are still passing encoded data for author.
return [title, author, cover_image, book.f.name]
def extract_content(self, book_zip, book): def extract_content(self, book_zip, book):
""" """
Opens epub as zip file filters then stores as list any files matching opf_regx Opens epub as zip file filters then stores as list any files matching opf_regx
@@ -161,12 +182,13 @@ class Catalogue:
Gets a list of new files via compare_shelf_current. Gets a list of new files via compare_shelf_current.
Iterates over list and inserts new books into database. Iterates over list and inserts new books into database.
""" """
# TODO Refactor metadata extraction into process_book \
# call to more easily handle additional formats
book_list = self.compare_shelf_current() book_list = self.compare_shelf_current()
db = Storage(self.config) db = Storage(self.config)
for book in book_list: for book in book_list:
book = self.process_book(book) book = self.process_by_filetype(book)
extracted = self.extract_metadata(book) db.insert_book(book)
db.insert_book(extracted)
inserted = db.commit() inserted = db.commit()
if inserted is not True: if inserted is not True:
print(inserted) print(inserted)

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/env python
import os import os
import sys import sys
import time import time
@@ -16,7 +16,7 @@ def MakeCollections(root):
config = Config(root) # Get configuration settings config = Config(root) # Get configuration settings
# InitFiles(config.file_array) # Initialize file system # InitFiles(config.file_array) # Initialize file system
_storage = Storage(config) _storage = Storage(config)
_storage.make_collections() _storage.make_collections()
_t2 = time.time() _t2 = time.time()
scan_time = round(_t2 - _t1) scan_time = round(_t2 - _t1)
print("Collections Made.") print("Collections Made.")

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/env python
import os import os
import sys import sys
import time import time