workin on new book check system

This commit is contained in:
Mike
2019-10-16 09:12:56 -04:00
parent f821eb02cb
commit 545b55a486
9 changed files with 59 additions and 75 deletions

1
.#config.py Symbolic link
View File

@@ -0,0 +1 @@
raelon@golumnsec.33289:1571093667

Binary file not shown.

View File

@@ -1,7 +1,7 @@
class Config: class Config:
"""Main System Configuration""" """Main System Configuration"""
def __init__(self): def __init__(self):
self.book_path = "/home/raelon/Books/" self.book_path = "books/"
self.book_shelf = "data/shelf.json" self.book_shelf = "data/shelf.json"
self.catalogue_db = "data/catalogue.db" self.catalogue_db = "data/catalogue.db"
self.file_array = [ self.file_array = [

File diff suppressed because one or more lines are too long

View File

@@ -3,12 +3,13 @@ import json
import os import os
import re import re
import zipfile import zipfile
# import sys
# sys.path.insert(1, '../')
from lib.storage import Storage
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from PIL import Image
from api_hooks import DuckDuckGo
from config import Config from config import Config
from lib.api_hooks import DuckDuckGo from storage import Storage
config = Config() config = Config()
@@ -21,7 +22,6 @@ class Catalogue:
self.opf_regx = re.compile(r'\.opf') self.opf_regx = re.compile(r'\.opf')
self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif') self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
self.html_regx = re.compile(r'\.html') self.html_regx = re.compile(r'\.html')
self.scan_folder()
def scan_folder(self, folder=config.book_path): def scan_folder(self, folder=config.book_path):
for f in os.listdir(folder): for f in os.listdir(folder):
@@ -31,21 +31,15 @@ class Catalogue:
if _is_dir: if _is_dir:
self.file_list.append(self.scan_folder(_path)) self.file_list.append(self.scan_folder(_path))
self.file_list.append(_path) self.file_list.append(_path)
regx = re.compile(r"\.epub")
self.books = list(filter(regx.search, filter(None, self.file_list)))
def scan_book(self, book): def scan_book(self, book):
"""REMOVE ME?""" """REMOVE ME?"""
_epub = zipfile.ZipFile(book) _epub = zipfile.ZipFile(book)
with _epub as _epub_open: with _epub as _epub_open:
try: try: _epub_open.open('content.opf'); return True
_epub_open.open('content.opf') except Exception as e: print(e); return False
return True
except Exception as e:
print(e)
return False
def filter_books(self, ret=0): def filter_books(self):
""" """
Scan book folder recursively for epub files Scan book folder recursively for epub files
filter_books(0) -> Catalogue.books filter_books(0) -> Catalogue.books
@@ -53,12 +47,13 @@ class Catalogue:
:param ret: 0 -> create class property -> dump json :param ret: 0 -> create class property -> dump json
:param ret: 1 -> create & return class property :param ret: 1 -> create & return class property
""" """
self.scan_folder()
regx = re.compile(r"\.epub")
self.books = list(filter(regx.search, filter(None, self.file_list)))
_book_list_expanded = {} _book_list_expanded = {}
with open(config.book_shelf, 'w') as f: with open(config.book_shelf, 'w') as f:
for book in self.books: for book in self.books:
_book_list_expanded[book] = self.process_book(book) _book_list_expanded[book] = self.process_book(book)
if ret != 0: return _book_list_expanded
else:
json.dump(_book_list_expanded, f) json.dump(_book_list_expanded, f)
return _book_list_expanded return _book_list_expanded
@@ -92,12 +87,10 @@ class Catalogue:
title = soup.find("dc:title") title = soup.find("dc:title")
if title == None: if title == None:
title = book['path'].split('/')[-1].rsplit('.', 1)[0] title = book['path'].split('/')[-1].rsplit('.', 1)[0]
else: else: title = title.contents[0]
title = title.contents[0]
author = soup.find("dc:creator") author = soup.find("dc:creator")
if author != None: author = author.contents[0] if author != None: author = author.contents[0]
try: try: cover = self.extract_cover_image(book_zip, book)
cover = self.extract_cover_image(book_zip, book)
except IndexError: except IndexError:
# cover = self.extract_cover_html(book_zip, book) # cover = self.extract_cover_html(book_zip, book)
cover = DuckDuckGo().image_result(title) cover = DuckDuckGo().image_result(title)
@@ -106,32 +99,44 @@ class Catalogue:
def extract_content(self, book_zip, book): def extract_content(self, book_zip, book):
content = book_zip.open( content = book_zip.open(
list(filter(self.opf_regx.search, book['files']))[0]) list(
filter(self.opf_regx.search, book['files'])
)[0]
)
return content return content
def extract_cover_html(self, book_zip, book): def extract_cover_html(self, book_zip, book):
cover = book_zip.open( cover = book_zip.open(
list(filter(self.html_regx.search, book['files']))[0]) list(
filter(self.html_regx.search, book['files'])
)[0]
)
return cover return cover
def extract_cover_image(self, book_zip, book): def extract_cover_image(self, book_zip, book):
# TODO Handle books that have no Cover Image
# TODO Handle books with html covers
cover = book_zip.open( cover = book_zip.open(
list(filter(self.cover_regx.search, book['files']))[0]) list(
try: filter(self.cover_regx.search, book['files'])
cover = book_zip.read(cover.name) )[0]
return cover )
except KeyError: try: cover = book_zip.read(cover.name); return cover
return False except KeyError: return False
def new_files(self): def compare_shelf_current(self):
storage = Storage() stored_books = Storage()
stored_books = stored_books.book_paths_list()
try: try:
a = [] self.books
stored = storage.book_paths_list()
for i in stored: a.append(i[-1])
unique = set(self.books) - set(a)
return unique
except Exception: except Exception:
return False self.filter_books()
unique = set(self.books) - set(stored_books)
return unique
def import_books(self, list=None):
book_list = self.compare_shelf_current()
db = Storage()
for book in book_list:
book = self.process_book(book)
extracted = self.extract_metadata(book)
db.insert_book(extracted)
db.commit()

View File

@@ -22,22 +22,3 @@ class InitFiles:
os.mkdir(os.path.split(_pointer)[0]) os.mkdir(os.path.split(_pointer)[0])
f = open(_pointer, "w+") f = open(_pointer, "w+")
f.close() f.close()
class Epub:
"""All Epub file handling"""
def __init__(self):
global config
self.book_path = config.book_path
self.Catalogue = Catalogue()
def import_books(self, list=None):
if list is not None: book_list = list
else: book_list = self.Catalogue.filter_books()
for book in book_list:
extracted = self.Catalogue.extract_metadata(book_list[book])
Storage.insert_book(extracted)
Storage.commit()
def book_list(self):
pass

View File

@@ -1,19 +1,16 @@
#!/usr/bin/python #!/usr/bin/python
import sys import sys
sys.path.insert(1, 'lib/') sys.path.insert(1, 'lib/')
from pyShelf import InitFiles, Epub from pyShelf import InitFiles
from config import Config from config import Config
from library import Catalogue from library import Catalogue
config = Config() # Get configuration settings config = Config() # Get configuration settings
InitFiles(config.file_array) # Initialize file system InitFiles(config.file_array) # Initialize file system
Catalogue = Catalogue() # Open the Catalogue Catalogue = Catalogue() # Open the Catalogue
# This only needs to be run on first run, & when new books are added # new_books = Catalogue.new_files()
new_books = Catalogue.new_files() Catalogue.import_books() # Filter Your books
Epub().import_books(new_books) # Filter Your books
# TODO Implement file tracking system to avoid processing already tracked books
# TODO Figure out a system to get books page count # TODO Figure out a system to get books page count
# TODO Update testing
# TODO Update Documentation # TODO Update Documentation
# TODO Requirements.txt # TODO Requirements.txt
# TODO Test image storage # TODO Test image storage