Implemented the return of unique files for storage

2026-06-27 07:21:38 -04:00 · 2019-10-14 17:52:16 -04:00
parent a1bb0aa101
commit f821eb02cb
15 changed files with 87 additions and 76 deletions
--- a/pycache/config.cpython-37.pyc
+++ b/pycache/config.cpython-37.pyc
--- a/config.py
+++ b/config.py
@@ -1,7 +1,7 @@
 class Config:
   """Main System Configuration"""
   def __init__(self):
-      self.book_path = "books/"
+      self.book_path = "/home/raelon/Books/"
      self.book_shelf = "data/shelf.json"
      self.catalogue_db = "data/catalogue.db"
      self.file_array = [
--- a/data/shelf.json
+++ b/data/shelf.json
--- a/lib/api_hooks.py
+++ b/lib/api_hooks.py
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 import sys
 import requests
-sys.path.insert(1, 'lib/')
+# sys.path.insert(1, 'lib/')
 class DuckDuckGo:
@@ -18,7 +18,10 @@ class DuckDuckGo:
        try: query = query.string
        except AttributeError: query = query
        search_result = requests.get(self.url+query+_key)
-        if search_result.status_code == 200 and search_result.json()['Image'] != '':
+        try: image_result = search_result.json()['Image']
        except ValueError:
            image_result = ''
        if search_result.status_code == 200 and image_result != '':
            image = requests.get(search_result.json()['Image'], stream=True)
            image.raw.decode_content = True
            return image.raw
--- a/lib/library.py
+++ b/lib/library.py
@@ -3,7 +3,9 @@ import json
 import os
 import re
 import zipfile
-from PIL import Image
+# import sys
 # sys.path.insert(1, '../')
 from lib.storage import Storage
 from bs4 import BeautifulSoup
 from config import Config
 from lib.api_hooks import DuckDuckGo
@@ -19,22 +21,29 @@ class Catalogue:
        self.opf_regx = re.compile(r'\.opf')
        self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
        self.html_regx = re.compile(r'\.html')
        self.scan_folder()
    def scan_folder(self, folder=config.book_path):
        for f in os.listdir(folder):
-            _path = os.path.abspath(folder+'/'+f)
+            _path = os.path.abspath(folder + '/' + f)
-            #_path = os.path.abspath('.')+'/'+folder+f+'/'
+            # _path = os.path.abspath('.')+'/'+folder+f+'/'
-            _is_dir = os.path.isdir(_path.strip()+'/')
+            _is_dir = os.path.isdir(_path.strip() + '/')
            if _is_dir:
                self.file_list.append(self.scan_folder(_path))
            self.file_list.append(_path)
        regx = re.compile(r"\.epub")
        self.books = list(filter(regx.search, filter(None, self.file_list)))
    def scan_book(self, book):
        """REMOVE ME?"""
        _epub = zipfile.ZipFile(book)
        with _epub as _epub_open:
-            try: _epub_open.open('content.opf'); return True
+            try:
-            except Exception as e: print(e); return False
+                _epub_open.open('content.opf')
                return True
            except Exception as e:
                print(e)
                return False
    def filter_books(self, ret=0):
        """
@@ -44,16 +53,12 @@ class Catalogue:
        :param ret: 0 -> create class property -> dump json
        :param ret: 1 -> create & return class property
        """
        self.scan_folder()
        regx = re.compile(r"\.epub")
        self.books = list(filter(regx.search, filter(None, self.file_list)))
        _book_list_expanded = {}
        with open(config.book_shelf, 'w') as f:
            for book in self.books:
                _book_list_expanded[book] = self.process_book(book)
            if ret != 0: return _book_list_expanded
            else:
                import ipdb; ipdb.set_trace()
                json.dump(_book_list_expanded, f)
                return _book_list_expanded
@@ -87,10 +92,12 @@ class Catalogue:
            title = soup.find("dc:title")
            if title == None:
                title = book['path'].split('/')[-1].rsplit('.', 1)[0]
-            else: title = title.contents[0]
+            else:
                title = title.contents[0]
            author = soup.find("dc:creator")
            if author != None: author = author.contents[0]
-            try: cover = self.extract_cover_image(book_zip, book)
+            try:
                cover = self.extract_cover_image(book_zip, book)
            except IndexError:
                # cover = self.extract_cover_html(book_zip, book)
                cover = DuckDuckGo().image_result(title)
@@ -99,35 +106,32 @@ class Catalogue:
    def extract_content(self, book_zip, book):
        content = book_zip.open(
-            list(
+            list(filter(self.opf_regx.search, book['files']))[0])
                filter(self.opf_regx.search, book['files'])
            )[0]
        )
        return content
    def extract_cover_html(self, book_zip, book):
        cover = book_zip.open(
-            list(
+            list(filter(self.html_regx.search, book['files']))[0])
                filter(self.html_regx.search, book['files'])
            )[0]
        )
        return cover
    def extract_cover_image(self, book_zip, book):
        # TODO Handle books that have no Cover Image
        # TODO Handle books with html covers
        cover = book_zip.open(
-            list(
+            list(filter(self.cover_regx.search, book['files']))[0])
                filter(self.cover_regx.search, book['files'])
            )[0]
        )
        try: cover = book_zip.read(cover.name); return cover
        except KeyError: return False
    def compare_shelf_current(self):
        try:
-            self.books
+            cover = book_zip.read(cover.name)
-        except Exception:
+            return cover
-            self.filter_books(1)
+        except KeyError:
-        unique = set(self.books) - set(self.catalogue)
+            return False
    def new_files(self):
        storage = Storage()
        try:
            a = []
            stored = storage.book_paths_list()
            for i in stored: a.append(i[-1])
            unique = set(self.books) - set(a)
            return unique
        except Exception:
            return False
--- a/lib/pyShelf.py
+++ b/lib/pyShelf.py
@@ -2,10 +2,12 @@
 import os
 import zipfile
 from config import Config
-from library import Catalogue
+from lib.library import Catalogue
-from storage import Storage
+from lib.storage import Storage
 config = Config()
 Storage = Storage()
 class InitFiles:
    """First run file creation operations"""
    def __init__(self, file_array):
@@ -29,8 +31,9 @@ class Epub:
        self.book_path = config.book_path
        self.Catalogue = Catalogue()
-    def import_books(self):
+    def import_books(self, list=None):
-        book_list = self.Catalogue.filter_books()
+        if list is not None: book_list = list
        else: book_list = self.Catalogue.filter_books()
        for book in book_list:
            extracted = self.Catalogue.extract_metadata(book_list[book])
            Storage.insert_book(extracted)
--- a/lib/storage.py
+++ b/lib/storage.py
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 import sys
 import sqlite3
-sys.path.insert(1,'../')
+# sys.path.insert(1, '../')
 from config import Config
 db_pointer = Config().catalogue_db
@@ -55,6 +55,13 @@ class Storage:
            print(e)
            return False
    def book_paths_list(self):
        q = '''SELECT file_name FROM books'''
        x = self.cursor.execute(q)
        try: x = x.fetchall()
        except Exception: x = []
        return x
    def commit(self):
        try: self.db.commit(); return True
        except Exception as e: return False
--- a/main.py
+++ b/main.py
@@ -1,6 +1,5 @@
 #!/usr/bin/python
 import sys
 from PIL import Image
 sys.path.insert(1, 'lib/')
 from pyShelf import InitFiles, Epub
 from config import Config
@@ -10,7 +9,8 @@ config = Config() # Get configuration settings
 InitFiles(config.file_array)  # Initialize file system
 Catalogue = Catalogue()  # Open the Catalogue
 # This only needs to be run on first run, & when new books are added
-Epub().import_books() # Filter Your books
+new_books = Catalogue.new_files()
 Epub().import_books(new_books)  # Filter Your books
 # TODO Implement file tracking system to avoid processing already tracked books
 # TODO Figure out a system to get books page count
 # TODO Update testing
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,23 +1,10 @@
-appdirs==1.4.3
+bs4
-beautifulsoup4==4.8.0
+certifi
-bs4==0.0.1
+lxml
-certifi==2019.9.11
+Pillow
-chardet==3.0.4
+requests
-cssselect==1.1.0
+soupsieve
-fake-useragent==0.1.11
+urllib3
-idna==2.8
+urwid
-lxml==4.4.1
+w3lib
-parse==1.12.1
+websockets
 Pillow==6.2.0
 pyee==6.0.0
 Pygments==2.4.2
 pyppeteer==0.0.25
 pyquery==1.4.0
 requests==2.22.0
 six==1.12.0
 soupsieve==1.9.4
 tqdm==4.36.1
 urllib3==1.25.6
 urwid==2.0.1
 w3lib==1.21.0
 websockets==8.0.2
--- a/tests/init.py
+++ b/tests/init.py
@@ -0,0 +1,2 @@
 import sys
 sys.path.insert(1, '../lib/')
--- a/tests/pycache/test_library.cpython-37.pyc
+++ b/tests/pycache/test_library.cpython-37.pyc
--- a/tests/pycache/test_sysio.cpython-37.pyc
+++ b/tests/pycache/test_sysio.cpython-37.pyc
--- a/tests/test_library.py
+++ b/tests/test_library.py
@@ -1,6 +1,6 @@
 import unittest
 import sys
-sys.path.insert(1, '../')
+# sys.path.insert(1, '../')
 from lib.library import Catalogue
 Catalogue = Catalogue()
@@ -12,5 +12,9 @@ class LibraryTest(unittest.TestCase):
    def test_library_catalogue_filter_books(self):
        self.assertIsNotNone(Catalogue.filter_books())
    def test_library_catalogue_new_files(self):
        self.assertIsNot(Catalogue.new_files(), False)
 if __name__ == '__main__':
    unittest.main()
--- a/tests/test_storage.py
+++ b/tests/test_storage.py
@@ -1,6 +1,6 @@
 import unittest
 import sys
-sys.path.insert(1, '../')
+# sys.path.insert(1, '../')
 from lib.storage import Storage
 storage = Storage()
--- a/tests/test_sysio.py
+++ b/tests/test_sysio.py
@@ -2,7 +2,7 @@ import unittest
 import os
 import shutil
 import sys
-sys.path.insert(1, '../')
+# sys.path.insert(1, '../')
 from lib.pyShelf import InitFiles
 from lib.pyShelf import Epub
		`@@ -0,0 +1,2 @@`
							`import sys`
							`sys.path.insert(1, '../lib/')`