Implemented the return of unique files for storage

This commit is contained in:
Raelon Masters
2019-10-14 17:52:16 -04:00
parent a1bb0aa101
commit f821eb02cb
15 changed files with 87 additions and 76 deletions

Binary file not shown.

View File

@@ -1,7 +1,7 @@
class Config: class Config:
"""Main System Configuration""" """Main System Configuration"""
def __init__(self): def __init__(self):
self.book_path = "books/" self.book_path = "/home/raelon/Books/"
self.book_shelf = "data/shelf.json" self.book_shelf = "data/shelf.json"
self.catalogue_db = "data/catalogue.db" self.catalogue_db = "data/catalogue.db"
self.file_array = [ self.file_array = [

File diff suppressed because one or more lines are too long

View File

@@ -1,7 +1,7 @@
#!/usr/bin/python #!/usr/bin/python
import sys import sys
import requests import requests
sys.path.insert(1, 'lib/') # sys.path.insert(1, 'lib/')
class DuckDuckGo: class DuckDuckGo:
@@ -18,7 +18,10 @@ class DuckDuckGo:
try: query = query.string try: query = query.string
except AttributeError: query = query except AttributeError: query = query
search_result = requests.get(self.url+query+_key) search_result = requests.get(self.url+query+_key)
if search_result.status_code == 200 and search_result.json()['Image'] != '': try: image_result = search_result.json()['Image']
except ValueError:
image_result = ''
if search_result.status_code == 200 and image_result != '':
image = requests.get(search_result.json()['Image'], stream=True) image = requests.get(search_result.json()['Image'], stream=True)
image.raw.decode_content = True image.raw.decode_content = True
return image.raw return image.raw

View File

@@ -3,7 +3,9 @@ import json
import os import os
import re import re
import zipfile import zipfile
from PIL import Image # import sys
# sys.path.insert(1, '../')
from lib.storage import Storage
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from config import Config from config import Config
from lib.api_hooks import DuckDuckGo from lib.api_hooks import DuckDuckGo
@@ -19,6 +21,7 @@ class Catalogue:
self.opf_regx = re.compile(r'\.opf') self.opf_regx = re.compile(r'\.opf')
self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif') self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
self.html_regx = re.compile(r'\.html') self.html_regx = re.compile(r'\.html')
self.scan_folder()
def scan_folder(self, folder=config.book_path): def scan_folder(self, folder=config.book_path):
for f in os.listdir(folder): for f in os.listdir(folder):
@@ -28,13 +31,19 @@ class Catalogue:
if _is_dir: if _is_dir:
self.file_list.append(self.scan_folder(_path)) self.file_list.append(self.scan_folder(_path))
self.file_list.append(_path) self.file_list.append(_path)
regx = re.compile(r"\.epub")
self.books = list(filter(regx.search, filter(None, self.file_list)))
def scan_book(self, book): def scan_book(self, book):
"""REMOVE ME?""" """REMOVE ME?"""
_epub = zipfile.ZipFile(book) _epub = zipfile.ZipFile(book)
with _epub as _epub_open: with _epub as _epub_open:
try: _epub_open.open('content.opf'); return True try:
except Exception as e: print(e); return False _epub_open.open('content.opf')
return True
except Exception as e:
print(e)
return False
def filter_books(self, ret=0): def filter_books(self, ret=0):
""" """
@@ -44,16 +53,12 @@ class Catalogue:
:param ret: 0 -> create class property -> dump json :param ret: 0 -> create class property -> dump json
:param ret: 1 -> create & return class property :param ret: 1 -> create & return class property
""" """
self.scan_folder()
regx = re.compile(r"\.epub")
self.books = list(filter(regx.search, filter(None, self.file_list)))
_book_list_expanded = {} _book_list_expanded = {}
with open(config.book_shelf, 'w') as f: with open(config.book_shelf, 'w') as f:
for book in self.books: for book in self.books:
_book_list_expanded[book] = self.process_book(book) _book_list_expanded[book] = self.process_book(book)
if ret != 0: return _book_list_expanded if ret != 0: return _book_list_expanded
else: else:
import ipdb; ipdb.set_trace()
json.dump(_book_list_expanded, f) json.dump(_book_list_expanded, f)
return _book_list_expanded return _book_list_expanded
@@ -87,10 +92,12 @@ class Catalogue:
title = soup.find("dc:title") title = soup.find("dc:title")
if title == None: if title == None:
title = book['path'].split('/')[-1].rsplit('.', 1)[0] title = book['path'].split('/')[-1].rsplit('.', 1)[0]
else: title = title.contents[0] else:
title = title.contents[0]
author = soup.find("dc:creator") author = soup.find("dc:creator")
if author != None: author = author.contents[0] if author != None: author = author.contents[0]
try: cover = self.extract_cover_image(book_zip, book) try:
cover = self.extract_cover_image(book_zip, book)
except IndexError: except IndexError:
# cover = self.extract_cover_html(book_zip, book) # cover = self.extract_cover_html(book_zip, book)
cover = DuckDuckGo().image_result(title) cover = DuckDuckGo().image_result(title)
@@ -99,35 +106,32 @@ class Catalogue:
def extract_content(self, book_zip, book): def extract_content(self, book_zip, book):
content = book_zip.open( content = book_zip.open(
list( list(filter(self.opf_regx.search, book['files']))[0])
filter(self.opf_regx.search, book['files'])
)[0]
)
return content return content
def extract_cover_html(self, book_zip, book): def extract_cover_html(self, book_zip, book):
cover = book_zip.open( cover = book_zip.open(
list( list(filter(self.html_regx.search, book['files']))[0])
filter(self.html_regx.search, book['files'])
)[0]
)
return cover return cover
def extract_cover_image(self, book_zip, book): def extract_cover_image(self, book_zip, book):
# TODO Handle books that have no Cover Image # TODO Handle books that have no Cover Image
# TODO Handle books with html covers # TODO Handle books with html covers
cover = book_zip.open( cover = book_zip.open(
list( list(filter(self.cover_regx.search, book['files']))[0])
filter(self.cover_regx.search, book['files'])
)[0]
)
try: cover = book_zip.read(cover.name); return cover
except KeyError: return False
def compare_shelf_current(self):
try: try:
self.books cover = book_zip.read(cover.name)
except Exception: return cover
self.filter_books(1) except KeyError:
unique = set(self.books) - set(self.catalogue) return False
def new_files(self):
storage = Storage()
try:
a = []
stored = storage.book_paths_list()
for i in stored: a.append(i[-1])
unique = set(self.books) - set(a)
return unique return unique
except Exception:
return False

View File

@@ -2,10 +2,12 @@
import os import os
import zipfile import zipfile
from config import Config from config import Config
from library import Catalogue from lib.library import Catalogue
from storage import Storage from lib.storage import Storage
config = Config() config = Config()
Storage = Storage() Storage = Storage()
class InitFiles: class InitFiles:
"""First run file creation operations""" """First run file creation operations"""
def __init__(self, file_array): def __init__(self, file_array):
@@ -29,8 +31,9 @@ class Epub:
self.book_path = config.book_path self.book_path = config.book_path
self.Catalogue = Catalogue() self.Catalogue = Catalogue()
def import_books(self): def import_books(self, list=None):
book_list = self.Catalogue.filter_books() if list is not None: book_list = list
else: book_list = self.Catalogue.filter_books()
for book in book_list: for book in book_list:
extracted = self.Catalogue.extract_metadata(book_list[book]) extracted = self.Catalogue.extract_metadata(book_list[book])
Storage.insert_book(extracted) Storage.insert_book(extracted)

View File

@@ -1,7 +1,7 @@
#!/usr/bin/python #!/usr/bin/python
import sys import sys
import sqlite3 import sqlite3
sys.path.insert(1,'../') # sys.path.insert(1, '../')
from config import Config from config import Config
db_pointer = Config().catalogue_db db_pointer = Config().catalogue_db
@@ -55,6 +55,13 @@ class Storage:
print(e) print(e)
return False return False
def book_paths_list(self):
q = '''SELECT file_name FROM books'''
x = self.cursor.execute(q)
try: x = x.fetchall()
except Exception: x = []
return x
def commit(self): def commit(self):
try: self.db.commit(); return True try: self.db.commit(); return True
except Exception as e: return False except Exception as e: return False

View File

@@ -1,6 +1,5 @@
#!/usr/bin/python #!/usr/bin/python
import sys import sys
from PIL import Image
sys.path.insert(1, 'lib/') sys.path.insert(1, 'lib/')
from pyShelf import InitFiles, Epub from pyShelf import InitFiles, Epub
from config import Config from config import Config
@@ -10,7 +9,8 @@ config = Config() # Get configuration settings
InitFiles(config.file_array) # Initialize file system InitFiles(config.file_array) # Initialize file system
Catalogue = Catalogue() # Open the Catalogue Catalogue = Catalogue() # Open the Catalogue
# This only needs to be run on first run, & when new books are added # This only needs to be run on first run, & when new books are added
Epub().import_books() # Filter Your books new_books = Catalogue.new_files()
Epub().import_books(new_books) # Filter Your books
# TODO Implement file tracking system to avoid processing already tracked books # TODO Implement file tracking system to avoid processing already tracked books
# TODO Figure out a system to get books page count # TODO Figure out a system to get books page count
# TODO Update testing # TODO Update testing

View File

@@ -1,23 +1,10 @@
appdirs==1.4.3 bs4
beautifulsoup4==4.8.0 certifi
bs4==0.0.1 lxml
certifi==2019.9.11 Pillow
chardet==3.0.4 requests
cssselect==1.1.0 soupsieve
fake-useragent==0.1.11 urllib3
idna==2.8 urwid
lxml==4.4.1 w3lib
parse==1.12.1 websockets
Pillow==6.2.0
pyee==6.0.0
Pygments==2.4.2
pyppeteer==0.0.25
pyquery==1.4.0
requests==2.22.0
six==1.12.0
soupsieve==1.9.4
tqdm==4.36.1
urllib3==1.25.6
urwid==2.0.1
w3lib==1.21.0
websockets==8.0.2

View File

@@ -0,0 +1,2 @@
import sys
sys.path.insert(1, '../lib/')

View File

@@ -1,6 +1,6 @@
import unittest import unittest
import sys import sys
sys.path.insert(1, '../') # sys.path.insert(1, '../')
from lib.library import Catalogue from lib.library import Catalogue
Catalogue = Catalogue() Catalogue = Catalogue()
@@ -12,5 +12,9 @@ class LibraryTest(unittest.TestCase):
def test_library_catalogue_filter_books(self): def test_library_catalogue_filter_books(self):
self.assertIsNotNone(Catalogue.filter_books()) self.assertIsNotNone(Catalogue.filter_books())
def test_library_catalogue_new_files(self):
self.assertIsNot(Catalogue.new_files(), False)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -1,6 +1,6 @@
import unittest import unittest
import sys import sys
sys.path.insert(1, '../') # sys.path.insert(1, '../')
from lib.storage import Storage from lib.storage import Storage
storage = Storage() storage = Storage()

View File

@@ -2,7 +2,7 @@ import unittest
import os import os
import shutil import shutil
import sys import sys
sys.path.insert(1, '../') # sys.path.insert(1, '../')
from lib.pyShelf import InitFiles from lib.pyShelf import InitFiles
from lib.pyShelf import Epub from lib.pyShelf import Epub