Implemented the return of unique files for storage

This commit is contained in:
Raelon Masters
2019-10-14 17:52:16 -04:00
parent a1bb0aa101
commit f821eb02cb
15 changed files with 87 additions and 76 deletions

Binary file not shown.

View File

@@ -1,7 +1,7 @@
class Config:
"""Main System Configuration"""
def __init__(self):
self.book_path = "books/"
self.book_path = "/home/raelon/Books/"
self.book_shelf = "data/shelf.json"
self.catalogue_db = "data/catalogue.db"
self.file_array = [
@@ -10,4 +10,4 @@ class Config:
self.catalogue_db,
"conf/settings.json"
]
self.auto_scan = True
self.auto_scan = True

File diff suppressed because one or more lines are too long

View File

@@ -1,7 +1,7 @@
#!/usr/bin/python
import sys
import requests
sys.path.insert(1, 'lib/')
# sys.path.insert(1, 'lib/')
class DuckDuckGo:
@@ -18,8 +18,11 @@ class DuckDuckGo:
try: query = query.string
except AttributeError: query = query
search_result = requests.get(self.url+query+_key)
if search_result.status_code == 200 and search_result.json()['Image'] != '':
try: image_result = search_result.json()['Image']
except ValueError:
image_result = ''
if search_result.status_code == 200 and image_result != '':
image = requests.get(search_result.json()['Image'], stream=True)
image.raw.decode_content = True
return image.raw
else: return False
else: return False

View File

@@ -3,7 +3,9 @@ import json
import os
import re
import zipfile
from PIL import Image
# import sys
# sys.path.insert(1, '../')
from lib.storage import Storage
from bs4 import BeautifulSoup
from config import Config
from lib.api_hooks import DuckDuckGo
@@ -19,22 +21,29 @@ class Catalogue:
self.opf_regx = re.compile(r'\.opf')
self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
self.html_regx = re.compile(r'\.html')
self.scan_folder()
def scan_folder(self, folder=config.book_path):
for f in os.listdir(folder):
_path = os.path.abspath(folder+'/'+f)
#_path = os.path.abspath('.')+'/'+folder+f+'/'
_is_dir = os.path.isdir(_path.strip()+'/')
_path = os.path.abspath(folder + '/' + f)
# _path = os.path.abspath('.')+'/'+folder+f+'/'
_is_dir = os.path.isdir(_path.strip() + '/')
if _is_dir:
self.file_list.append(self.scan_folder(_path))
self.file_list.append(_path)
regx = re.compile(r"\.epub")
self.books = list(filter(regx.search, filter(None, self.file_list)))
def scan_book(self, book):
"""REMOVE ME?"""
_epub = zipfile.ZipFile(book)
with _epub as _epub_open:
try: _epub_open.open('content.opf'); return True
except Exception as e: print(e); return False
try:
_epub_open.open('content.opf')
return True
except Exception as e:
print(e)
return False
def filter_books(self, ret=0):
"""
@@ -44,16 +53,12 @@ class Catalogue:
:param ret: 0 -> create class property -> dump json
:param ret: 1 -> create & return class property
"""
self.scan_folder()
regx = re.compile(r"\.epub")
self.books = list(filter(regx.search, filter(None, self.file_list)))
_book_list_expanded = {}
with open(config.book_shelf, 'w') as f:
for book in self.books:
_book_list_expanded[book] = self.process_book(book)
if ret != 0: return _book_list_expanded
else:
import ipdb; ipdb.set_trace()
json.dump(_book_list_expanded, f)
return _book_list_expanded
@@ -87,10 +92,12 @@ class Catalogue:
title = soup.find("dc:title")
if title == None:
title = book['path'].split('/')[-1].rsplit('.', 1)[0]
else: title = title.contents[0]
else:
title = title.contents[0]
author = soup.find("dc:creator")
if author != None: author = author.contents[0]
try: cover = self.extract_cover_image(book_zip, book)
try:
cover = self.extract_cover_image(book_zip, book)
except IndexError:
# cover = self.extract_cover_html(book_zip, book)
cover = DuckDuckGo().image_result(title)
@@ -99,35 +106,32 @@ class Catalogue:
def extract_content(self, book_zip, book):
content = book_zip.open(
list(
filter(self.opf_regx.search, book['files'])
)[0]
)
list(filter(self.opf_regx.search, book['files']))[0])
return content
def extract_cover_html(self, book_zip, book):
cover = book_zip.open(
list(
filter(self.html_regx.search, book['files'])
)[0]
)
list(filter(self.html_regx.search, book['files']))[0])
return cover
def extract_cover_image(self, book_zip, book):
# TODO Handle books that have no Cover Image
# TODO Handle books with html covers
cover = book_zip.open(
list(
filter(self.cover_regx.search, book['files'])
)[0]
)
try: cover = book_zip.read(cover.name); return cover
except KeyError: return False
def compare_shelf_current(self):
list(filter(self.cover_regx.search, book['files']))[0])
try:
self.books
cover = book_zip.read(cover.name)
return cover
except KeyError:
return False
def new_files(self):
storage = Storage()
try:
a = []
stored = storage.book_paths_list()
for i in stored: a.append(i[-1])
unique = set(self.books) - set(a)
return unique
except Exception:
self.filter_books(1)
unique = set(self.books) - set(self.catalogue)
return unique
return False

View File

@@ -2,10 +2,12 @@
import os
import zipfile
from config import Config
from library import Catalogue
from storage import Storage
from lib.library import Catalogue
from lib.storage import Storage
config = Config()
Storage = Storage()
class InitFiles:
"""First run file creation operations"""
def __init__(self, file_array):
@@ -18,7 +20,7 @@ class InitFiles:
"""Create the file"""
if not os.path.isdir(os.path.split(_pointer)[0]):
os.mkdir(os.path.split(_pointer)[0])
f = open(_pointer, "w+")
f = open(_pointer, "w+")
f.close()
@@ -29,8 +31,9 @@ class Epub:
self.book_path = config.book_path
self.Catalogue = Catalogue()
def import_books(self):
book_list = self.Catalogue.filter_books()
def import_books(self, list=None):
if list is not None: book_list = list
else: book_list = self.Catalogue.filter_books()
for book in book_list:
extracted = self.Catalogue.extract_metadata(book_list[book])
Storage.insert_book(extracted)

View File

@@ -1,7 +1,7 @@
#!/usr/bin/python
import sys
import sqlite3
sys.path.insert(1,'../')
# sys.path.insert(1, '../')
from config import Config
db_pointer = Config().catalogue_db
@@ -55,6 +55,13 @@ class Storage:
print(e)
return False
def book_paths_list(self):
q = '''SELECT file_name FROM books'''
x = self.cursor.execute(q)
try: x = x.fetchall()
except Exception: x = []
return x
def commit(self):
try: self.db.commit(); return True
except Exception as e: return False
except Exception as e: return False

12
main.py
View File

@@ -1,19 +1,19 @@
#!/usr/bin/python
import sys
from PIL import Image
sys.path.insert(1, 'lib/')
from pyShelf import InitFiles, Epub
from config import Config
from library import Catalogue
config = Config() # Get configuration settings
InitFiles(config.file_array) # Initialize file system
Catalogue = Catalogue() # Open the Catalogue
config = Config() # Get configuration settings
InitFiles(config.file_array) # Initialize file system
Catalogue = Catalogue() # Open the Catalogue
# This only needs to be run on first run, & when new books are added
Epub().import_books() # Filter Your books
new_books = Catalogue.new_files()
Epub().import_books(new_books) # Filter Your books
# TODO Implement file tracking system to avoid processing already tracked books
# TODO Figure out a system to get books page count
# TODO Update testing
# TODO Update Documentation
# TODO Requirements.txt
# TODO Test image storage
# TODO Test image storage

View File

@@ -1,23 +1,10 @@
appdirs==1.4.3
beautifulsoup4==4.8.0
bs4==0.0.1
certifi==2019.9.11
chardet==3.0.4
cssselect==1.1.0
fake-useragent==0.1.11
idna==2.8
lxml==4.4.1
parse==1.12.1
Pillow==6.2.0
pyee==6.0.0
Pygments==2.4.2
pyppeteer==0.0.25
pyquery==1.4.0
requests==2.22.0
six==1.12.0
soupsieve==1.9.4
tqdm==4.36.1
urllib3==1.25.6
urwid==2.0.1
w3lib==1.21.0
websockets==8.0.2
bs4
certifi
lxml
Pillow
requests
soupsieve
urllib3
urwid
w3lib
websockets

View File

@@ -0,0 +1,2 @@
import sys
sys.path.insert(1, '../lib/')

View File

@@ -1,6 +1,6 @@
import unittest
import sys
sys.path.insert(1, '../')
# sys.path.insert(1, '../')
from lib.library import Catalogue
Catalogue = Catalogue()
@@ -12,5 +12,9 @@ class LibraryTest(unittest.TestCase):
def test_library_catalogue_filter_books(self):
self.assertIsNotNone(Catalogue.filter_books())
def test_library_catalogue_new_files(self):
self.assertIsNot(Catalogue.new_files(), False)
if __name__ == '__main__':
unittest.main()

View File

@@ -1,6 +1,6 @@
import unittest
import sys
sys.path.insert(1, '../')
# sys.path.insert(1, '../')
from lib.storage import Storage
storage = Storage()

View File

@@ -2,7 +2,7 @@ import unittest
import os
import shutil
import sys
sys.path.insert(1, '../')
# sys.path.insert(1, '../')
from lib.pyShelf import InitFiles
from lib.pyShelf import Epub