From f821eb02cb638c83e9c3321db6b927067b3447c3 Mon Sep 17 00:00:00 2001 From: Raelon Masters Date: Mon, 14 Oct 2019 17:52:16 -0400 Subject: [PATCH] Implemented the return of unique files for storage --- __pycache__/config.cpython-37.pyc | Bin 623 -> 636 bytes config.py | 4 +- data/shelf.json | 1 + lib/api_hooks.py | 9 ++- lib/library.py | 68 +++++++++--------- lib/pyShelf.py | 13 ++-- lib/storage.py | 11 ++- main.py | 12 ++-- requirements.txt | 33 +++------ tests/__init__.py | 2 + tests/__pycache__/test_library.cpython-37.pyc | Bin 847 -> 1023 bytes tests/__pycache__/test_sysio.cpython-37.pyc | Bin 987 -> 951 bytes tests/test_library.py | 6 +- tests/test_storage.py | 2 +- tests/test_sysio.py | 2 +- 15 files changed, 87 insertions(+), 76 deletions(-) diff --git a/__pycache__/config.cpython-37.pyc b/__pycache__/config.cpython-37.pyc index c3590a7cb39a573c85d5f478bca6f2b6b7e9428a..fa48c0320dc87b23e42262ec783a9d4bfef4ecfd 100644 GIT binary patch delta 140 zcmaFQ@`r`jiI$DtBYoJsll+3^L5 zB^gm%U}kYfYEIg%$pMV|j4YEI7$=Id0gWhPK_yrxt1&67p$h`l+~Tmw%}*)KNws4H J@;(DG0{~&GBUk_c delta 127 zcmeyv@}7m)iI*QX>iK6U4T}7;@1lwd|CS^5rL7 create class property -> dump json :param ret: 1 -> create & return class property """ - self.scan_folder() - regx = re.compile(r"\.epub") - self.books = list(filter(regx.search, filter(None, self.file_list))) _book_list_expanded = {} with open(config.book_shelf, 'w') as f: for book in self.books: _book_list_expanded[book] = self.process_book(book) if ret != 0: return _book_list_expanded else: - import ipdb; ipdb.set_trace() json.dump(_book_list_expanded, f) return _book_list_expanded @@ -87,10 +92,12 @@ class Catalogue: title = soup.find("dc:title") if title == None: title = book['path'].split('/')[-1].rsplit('.', 1)[0] - else: title = title.contents[0] + else: + title = title.contents[0] author = soup.find("dc:creator") if author != None: author = author.contents[0] - try: cover = self.extract_cover_image(book_zip, book) + try: + cover = self.extract_cover_image(book_zip, book) except IndexError: # cover = self.extract_cover_html(book_zip, book) cover = DuckDuckGo().image_result(title) @@ -99,35 +106,32 @@ class Catalogue: def extract_content(self, book_zip, book): content = book_zip.open( - list( - filter(self.opf_regx.search, book['files']) - )[0] - ) + list(filter(self.opf_regx.search, book['files']))[0]) return content def extract_cover_html(self, book_zip, book): cover = book_zip.open( - list( - filter(self.html_regx.search, book['files']) - )[0] - ) + list(filter(self.html_regx.search, book['files']))[0]) return cover def extract_cover_image(self, book_zip, book): # TODO Handle books that have no Cover Image # TODO Handle books with html covers cover = book_zip.open( - list( - filter(self.cover_regx.search, book['files']) - )[0] - ) - try: cover = book_zip.read(cover.name); return cover - except KeyError: return False - - def compare_shelf_current(self): + list(filter(self.cover_regx.search, book['files']))[0]) try: - self.books + cover = book_zip.read(cover.name) + return cover + except KeyError: + return False + + def new_files(self): + storage = Storage() + try: + a = [] + stored = storage.book_paths_list() + for i in stored: a.append(i[-1]) + unique = set(self.books) - set(a) + return unique except Exception: - self.filter_books(1) - unique = set(self.books) - set(self.catalogue) - return unique + return False diff --git a/lib/pyShelf.py b/lib/pyShelf.py index d5da1cd..a5d7c2e 100755 --- a/lib/pyShelf.py +++ b/lib/pyShelf.py @@ -2,10 +2,12 @@ import os import zipfile from config import Config -from library import Catalogue -from storage import Storage +from lib.library import Catalogue +from lib.storage import Storage config = Config() Storage = Storage() + + class InitFiles: """First run file creation operations""" def __init__(self, file_array): @@ -18,7 +20,7 @@ class InitFiles: """Create the file""" if not os.path.isdir(os.path.split(_pointer)[0]): os.mkdir(os.path.split(_pointer)[0]) - f = open(_pointer, "w+") + f = open(_pointer, "w+") f.close() @@ -29,8 +31,9 @@ class Epub: self.book_path = config.book_path self.Catalogue = Catalogue() - def import_books(self): - book_list = self.Catalogue.filter_books() + def import_books(self, list=None): + if list is not None: book_list = list + else: book_list = self.Catalogue.filter_books() for book in book_list: extracted = self.Catalogue.extract_metadata(book_list[book]) Storage.insert_book(extracted) diff --git a/lib/storage.py b/lib/storage.py index a4b186e..57d0a25 100644 --- a/lib/storage.py +++ b/lib/storage.py @@ -1,7 +1,7 @@ #!/usr/bin/python import sys import sqlite3 -sys.path.insert(1,'../') +# sys.path.insert(1, '../') from config import Config db_pointer = Config().catalogue_db @@ -55,6 +55,13 @@ class Storage: print(e) return False + def book_paths_list(self): + q = '''SELECT file_name FROM books''' + x = self.cursor.execute(q) + try: x = x.fetchall() + except Exception: x = [] + return x + def commit(self): try: self.db.commit(); return True - except Exception as e: return False \ No newline at end of file + except Exception as e: return False diff --git a/main.py b/main.py index a665933..b5cdc69 100755 --- a/main.py +++ b/main.py @@ -1,19 +1,19 @@ #!/usr/bin/python import sys -from PIL import Image sys.path.insert(1, 'lib/') from pyShelf import InitFiles, Epub from config import Config from library import Catalogue -config = Config() # Get configuration settings -InitFiles(config.file_array) # Initialize file system -Catalogue = Catalogue() # Open the Catalogue +config = Config() # Get configuration settings +InitFiles(config.file_array) # Initialize file system +Catalogue = Catalogue() # Open the Catalogue # This only needs to be run on first run, & when new books are added -Epub().import_books() # Filter Your books +new_books = Catalogue.new_files() +Epub().import_books(new_books) # Filter Your books # TODO Implement file tracking system to avoid processing already tracked books # TODO Figure out a system to get books page count # TODO Update testing # TODO Update Documentation # TODO Requirements.txt -# TODO Test image storage \ No newline at end of file +# TODO Test image storage diff --git a/requirements.txt b/requirements.txt index 36bc418..770a533 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,23 +1,10 @@ -appdirs==1.4.3 -beautifulsoup4==4.8.0 -bs4==0.0.1 -certifi==2019.9.11 -chardet==3.0.4 -cssselect==1.1.0 -fake-useragent==0.1.11 -idna==2.8 -lxml==4.4.1 -parse==1.12.1 -Pillow==6.2.0 -pyee==6.0.0 -Pygments==2.4.2 -pyppeteer==0.0.25 -pyquery==1.4.0 -requests==2.22.0 -six==1.12.0 -soupsieve==1.9.4 -tqdm==4.36.1 -urllib3==1.25.6 -urwid==2.0.1 -w3lib==1.21.0 -websockets==8.0.2 \ No newline at end of file +bs4 +certifi +lxml +Pillow +requests +soupsieve +urllib3 +urwid +w3lib +websockets diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..b8bb7c9 100755 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,2 @@ +import sys +sys.path.insert(1, '../lib/') diff --git a/tests/__pycache__/test_library.cpython-37.pyc b/tests/__pycache__/test_library.cpython-37.pyc index 3a54b6d3827bb3e7a4e2d902e65c4e26e91ca42c..ac1de5e716636e67cf7eddb72698989a78ac038c 100644 GIT binary patch literal 1023 zcma)5&ubGw6n-_Ou|wHZ!$pH@kIagG9Wwe@Fse zy!rRc)sr4Qdh)$p)2xty1MlsdH}A*y&6_>i-Q59PzrKBW_mcs9r_E{wXdI%rV^kV6 z%OP#PEM>UoIiHGDFfifbF&gAD`!G^f~{WQKP zrzWsYs*VBu3UUQKe@oDxWcZk)+%ps46o<@KDismT#Z*$djy7h0Q>Z?g?hL# z`X4SdkDALq8V#2PUqYLYuoP5tNC)SWyfRjumF1O-nC;LB(P>l{E&piU)wV#YcIp5F zSBT@yh_~i$#(#hn*Z4JC^1l<|$;(LierqHRm0n>!5+<{a$Ac|A?(+6pB%b^SiMzxk z3cXaS$fiaqzoXQ&)U%xIfl?o4S>Bvji480b*Y-)R@42?N)I{t0p9eWf&@WN0SPI))wF3MyYbK>c&KL~Z2p?Y5CV;m6ZeTy%Ta=Q?4CL4tyx8v@`k zwkpglhHDE9OA<1*0~^pWVm5Og5py^iI}04RoHBP}i8GF>R*ke9E|5o;W`@56jdPO3 z!~J63huz|Nk7(5?Ui$}|TDnIS%3*!Pmi&FRo6ar=l diff --git a/tests/__pycache__/test_sysio.cpython-37.pyc b/tests/__pycache__/test_sysio.cpython-37.pyc index e54ebc184e9ec28040a8f2231c68a10c9aa89732..e7f67963bac4d2a86369d5dc907ce583b6dcbb6a 100644 GIT binary patch delta 331 zcmcc3zMY-diIEHtrx0s77i=()6GL!TQDuXjpbJB{KKyu6=IgSvZ zTIa;#RIpJ_MZzFK7LajBZUZ}m2dvyCH$SB`C)JJ-$SW2A5*&D}|Ydw1tA@gwB1lpW^I^TXR!x0E}OmV3Lg=2L$WC{OXF2TU>L zr>#sxqTzw#7$LAzrdmq8G9@&dwqJPE(OreNo|$O22$#%N!-x&rD;jXV0*8-&nq2Rf z@87_yOE`eA`hed7ZBT`u*6eMEn>eFq5Z+k+57-j@h}%#2FJ diff --git a/tests/test_library.py b/tests/test_library.py index 0df85af..8b4b03d 100755 --- a/tests/test_library.py +++ b/tests/test_library.py @@ -1,6 +1,6 @@ import unittest import sys -sys.path.insert(1, '../') +# sys.path.insert(1, '../') from lib.library import Catalogue Catalogue = Catalogue() @@ -12,5 +12,9 @@ class LibraryTest(unittest.TestCase): def test_library_catalogue_filter_books(self): self.assertIsNotNone(Catalogue.filter_books()) + def test_library_catalogue_new_files(self): + self.assertIsNot(Catalogue.new_files(), False) + + if __name__ == '__main__': unittest.main() diff --git a/tests/test_storage.py b/tests/test_storage.py index 5a14cb8..f3dabf5 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -1,6 +1,6 @@ import unittest import sys -sys.path.insert(1, '../') +# sys.path.insert(1, '../') from lib.storage import Storage storage = Storage() diff --git a/tests/test_sysio.py b/tests/test_sysio.py index 31ea441..8e50b36 100755 --- a/tests/test_sysio.py +++ b/tests/test_sysio.py @@ -2,7 +2,7 @@ import unittest import os import shutil import sys -sys.path.insert(1, '../') +# sys.path.insert(1, '../') from lib.pyShelf import InitFiles from lib.pyShelf import Epub