mirror of
https://github.com/th3r00t/pyShelf.git
synced 2026-04-28 01:59:35 -04:00
2
config.json
vendored
2
config.json
vendored
@@ -1 +1 @@
|
|||||||
{"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "/srv/Books", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"}
|
{"TITLE": "pyShelf E-Book Server", "VERSION": "0.5.0", "BOOKPATH": "/srv/Books", "DB_HOST": "localhost", "DB_PORT": "5432", "DATABASE": "pyshelf", "USER": "pyshelf", "PASSWORD": "pyshelf", "BOOKSHELF": "data/shelf.json", "ALLOWED_HOSTS": "*", "hostname": "localhost", "webport": "8000", "wsgiport": "8001"}
|
||||||
2
importBooks
vendored
2
importBooks
vendored
@@ -1,4 +1,4 @@
|
|||||||
#!python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
import pathlib
|
import pathlib
|
||||||
import sys
|
import sys
|
||||||
|
|||||||
2
installer
vendored
2
installer
vendored
@@ -1,4 +1,4 @@
|
|||||||
#!python
|
#!/usr/bin/ env python
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|||||||
2
makeCollections
vendored
2
makeCollections
vendored
@@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
import pathlib
|
import pathlib
|
||||||
import sys
|
import sys
|
||||||
|
|||||||
2
pyproject.toml
vendored
2
pyproject.toml
vendored
@@ -7,4 +7,4 @@ use_parentheses = true
|
|||||||
# NOTE: the known_third_party setting is managed by
|
# NOTE: the known_third_party setting is managed by
|
||||||
# seed-isort-config and should not be modified directly.
|
# seed-isort-config and should not be modified directly.
|
||||||
# Any changes made to this setting will be overwritten.
|
# Any changes made to this setting will be overwritten.
|
||||||
known_third_party = ["backend", "bs4", "django", "interface", "prompt_toolkit", "psycopg2", "pyfiglet", "requests"]
|
known_third_party = ["backend", "bs4", "django", "interface", "mobi", "prompt_toolkit", "psycopg2", "pyfiglet", "requests"]
|
||||||
|
|||||||
3
requirements.txt
vendored
3
requirements.txt
vendored
@@ -16,4 +16,5 @@ django-debug-toolbar
|
|||||||
psycopg2-binary
|
psycopg2-binary
|
||||||
prompt_toolkit
|
prompt_toolkit
|
||||||
psutil
|
psutil
|
||||||
pyfiglet
|
pyfiglet
|
||||||
|
mobi-python
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/env python
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
@@ -7,6 +7,8 @@ import zipfile
|
|||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from mobi import Mobi
|
||||||
|
|
||||||
from .api_hooks import DuckDuckGo
|
from .api_hooks import DuckDuckGo
|
||||||
from .config import Config
|
from .config import Config
|
||||||
from .storage import Storage
|
from .storage import Storage
|
||||||
@@ -26,8 +28,7 @@ class Catalogue:
|
|||||||
self.html_regx = re.compile(r"\.html")
|
self.html_regx = re.compile(r"\.html")
|
||||||
self.root_dir = config.root
|
self.root_dir = config.root
|
||||||
self.book_folder = config.book_path
|
self.book_folder = config.book_path
|
||||||
self.book_shelf = config.book_shelf
|
# self.book_shelf = config.book_shelf
|
||||||
self._book_list_expanded = None
|
|
||||||
self.books = None
|
self.books = None
|
||||||
self.db_pointer = config.catalogue_db
|
self.db_pointer = config.catalogue_db
|
||||||
self.config = config
|
self.config = config
|
||||||
@@ -56,24 +57,30 @@ class Catalogue:
|
|||||||
|
|
||||||
:returns self._book_list_expanded: json string containing all book metadata
|
:returns self._book_list_expanded: json string containing all book metadata
|
||||||
"""
|
"""
|
||||||
self.scan_folder() # Populate file list
|
self.scan_folder() # Populate file list
|
||||||
regx = re.compile(r"\.epub")
|
regx = re.compile(r"\.epub|\.mobi")
|
||||||
try:
|
try:
|
||||||
self.books = list(filter(regx.search, filter(None, self.file_list)))
|
self.books = list(filter(regx.search, filter(None, self.file_list)))
|
||||||
except TypeError as e:
|
except TypeError as e:
|
||||||
print(e)
|
print(e)
|
||||||
self._book_list_expanded = {}
|
"""
|
||||||
with open(self.book_shelf, "w") as f:
|
for book in self.books:
|
||||||
for book in self.books:
|
self._book_list_expanded[book] = self.process_by_filetype(book)
|
||||||
self._book_list_expanded[book] = self.process_book(book)
|
|
||||||
json.dump(self._book_list_expanded, f)
|
|
||||||
return self._book_list_expanded
|
return self._book_list_expanded
|
||||||
|
"""
|
||||||
|
|
||||||
|
def process_by_filetype(self, book):
|
||||||
|
if book.endswith(".epub"):
|
||||||
|
epub = self.process_epub(book)
|
||||||
|
return self.extract_metadata_epub(epub)
|
||||||
|
elif book.endswith(".mobi"):
|
||||||
|
return self.extract_metadata_mobi(book)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def process_book(book):
|
def process_epub(book):
|
||||||
"""Return dictionary of epub file contents"""
|
"""Return dictionary of epub file contents"""
|
||||||
book = zipfile.ZipFile(book, "r")
|
|
||||||
details = {}
|
details = {}
|
||||||
|
book = zipfile.ZipFile(book, "r")
|
||||||
with book as book_zip:
|
with book as book_zip:
|
||||||
details["files"] = []
|
details["files"] = []
|
||||||
details["path"] = book.filename
|
details["path"] = book.filename
|
||||||
@@ -86,7 +93,7 @@ class Catalogue:
|
|||||||
details["files"].append(match.string)
|
details["files"].append(match.string)
|
||||||
return details
|
return details
|
||||||
|
|
||||||
def extract_metadata(self, book):
|
def extract_metadata_epub(self, book):
|
||||||
"""
|
"""
|
||||||
Return extracted metadata and cover picture
|
Return extracted metadata and cover picture
|
||||||
book['path'] == Full path to ebook file
|
book['path'] == Full path to ebook file
|
||||||
@@ -94,7 +101,7 @@ class Catalogue:
|
|||||||
"""
|
"""
|
||||||
book_zip = zipfile.ZipFile(book["path"], "r")
|
book_zip = zipfile.ZipFile(book["path"], "r")
|
||||||
with book_zip as f:
|
with book_zip as f:
|
||||||
content = self.extract_content(book_zip, book)
|
content = self.extract_content(f, book)
|
||||||
soup = BeautifulSoup(content, "lxml")
|
soup = BeautifulSoup(content, "lxml")
|
||||||
title = soup.find("dc:title")
|
title = soup.find("dc:title")
|
||||||
if title is None:
|
if title is None:
|
||||||
@@ -105,13 +112,27 @@ class Catalogue:
|
|||||||
if author is not None:
|
if author is not None:
|
||||||
author = author.contents[0]
|
author = author.contents[0]
|
||||||
try:
|
try:
|
||||||
cover = self.extract_cover_image(book_zip, book)
|
cover = self.extract_cover_image(f, book)
|
||||||
except IndexError:
|
except IndexError:
|
||||||
# cover = self.extract_cover_html(book_zip, book)
|
# cover = self.extract_cover_html(book_zip, book)
|
||||||
cover = DuckDuckGo().image_result(title)
|
cover = DuckDuckGo().image_result(title)
|
||||||
book_details = [title, author, cover, book["path"]]
|
book_details = [title, author, cover, book["path"]]
|
||||||
return book_details
|
return book_details
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def extract_metadata_mobi(book):
|
||||||
|
book = Mobi(book)
|
||||||
|
book.parse()
|
||||||
|
try:
|
||||||
|
cover_image = book.readImageRecord(0)
|
||||||
|
except KeyError:
|
||||||
|
cover_image = None
|
||||||
|
title = book.title().decode("utf-8")
|
||||||
|
author = book.author().decode("utf-8")
|
||||||
|
breakpoint()
|
||||||
|
# TODO some files are still passing encoded data for author.
|
||||||
|
return [title, author, cover_image, book.f.name]
|
||||||
|
|
||||||
def extract_content(self, book_zip, book):
|
def extract_content(self, book_zip, book):
|
||||||
"""
|
"""
|
||||||
Opens epub as zip file filters then stores as list any files matching opf_regx
|
Opens epub as zip file filters then stores as list any files matching opf_regx
|
||||||
@@ -161,12 +182,13 @@ class Catalogue:
|
|||||||
Gets a list of new files via compare_shelf_current.
|
Gets a list of new files via compare_shelf_current.
|
||||||
Iterates over list and inserts new books into database.
|
Iterates over list and inserts new books into database.
|
||||||
"""
|
"""
|
||||||
|
# TODO Refactor metadata extraction into process_book \
|
||||||
|
# call to more easily handle additional formats
|
||||||
book_list = self.compare_shelf_current()
|
book_list = self.compare_shelf_current()
|
||||||
db = Storage(self.config)
|
db = Storage(self.config)
|
||||||
for book in book_list:
|
for book in book_list:
|
||||||
book = self.process_book(book)
|
book = self.process_by_filetype(book)
|
||||||
extracted = self.extract_metadata(book)
|
db.insert_book(book)
|
||||||
db.insert_book(extracted)
|
|
||||||
inserted = db.commit()
|
inserted = db.commit()
|
||||||
if inserted is not True:
|
if inserted is not True:
|
||||||
print(inserted)
|
print(inserted)
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/env python
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
@@ -16,7 +16,7 @@ def MakeCollections(root):
|
|||||||
config = Config(root) # Get configuration settings
|
config = Config(root) # Get configuration settings
|
||||||
# InitFiles(config.file_array) # Initialize file system
|
# InitFiles(config.file_array) # Initialize file system
|
||||||
_storage = Storage(config)
|
_storage = Storage(config)
|
||||||
_storage.make_collections()
|
_storage.make_collections()
|
||||||
_t2 = time.time()
|
_t2 = time.time()
|
||||||
scan_time = round(_t2 - _t1)
|
scan_time = round(_t2 - _t1)
|
||||||
print("Collections Made.")
|
print("Collections Made.")
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/env python
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
|||||||
Reference in New Issue
Block a user