Merge pull request #6 from th3r00t/transfer

Changes include ability to only scan books not already in the database. Also ability to to import books not already stored
This commit is contained in:
th3r00t
2019-10-31 14:45:17 -04:00
committed by GitHub
15 changed files with 101 additions and 97 deletions

Binary file not shown.

View File

@@ -10,4 +10,4 @@ class Config:
self.catalogue_db, self.catalogue_db,
"conf/settings.json" "conf/settings.json"
] ]
self.auto_scan = True self.auto_scan = True

1
lib/.#library.py Symbolic link
View File

@@ -0,0 +1 @@
raelon@golumnsec.6371:1572529288

View File

@@ -1,7 +1,7 @@
#!/usr/bin/python #!/usr/bin/python
import sys import sys
import requests import requests
sys.path.insert(1, 'lib/') # sys.path.insert(1, 'lib/')
class DuckDuckGo: class DuckDuckGo:
@@ -18,8 +18,11 @@ class DuckDuckGo:
try: query = query.string try: query = query.string
except AttributeError: query = query except AttributeError: query = query
search_result = requests.get(self.url+query+_key) search_result = requests.get(self.url+query+_key)
if search_result.status_code == 200 and search_result.json()['Image'] != '': try: image_result = search_result.json()['Image']
except ValueError:
image_result = ''
if search_result.status_code == 200 and image_result != '':
image = requests.get(search_result.json()['Image'], stream=True) image = requests.get(search_result.json()['Image'], stream=True)
image.raw.decode_content = True image.raw.decode_content = True
return image.raw return image.raw
else: return False else: return False

View File

@@ -3,10 +3,13 @@ import json
import os import os
import re import re
import zipfile import zipfile
from PIL import Image
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from PIL import Image
from config import Config from config import Config
from api_hooks import DuckDuckGo from lib.api_hooks import DuckDuckGo
from lib.storage import Storage
config = Config() config = Config()
@@ -14,19 +17,12 @@ config = Config()
class Catalogue: class Catalogue:
"""Decodes and stores book information""" """Decodes and stores book information"""
"""Step One: filter_books""" """Step One: filter_books"""
def __init__(self): def __init__(self):
self.file_list = [] self.file_list = []
self.opf_regx = re.compile(r'\.opf') self.opf_regx = re.compile(r'\.opf')
self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif') self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
self.html_regx = re.compile(r'\.html') self.html_regx = re.compile(r'\.html')
"""
with open(config.book_shelf, 'r') as f:
try:
self.catalogue = json.load(f)
self.current_files = self.scan_folder()
except Exception:
self.filter_books()
"""
def scan_folder(self, folder=config.book_path): def scan_folder(self, folder=config.book_path):
for f in os.listdir(folder): for f in os.listdir(folder):
@@ -46,11 +42,11 @@ class Catalogue:
def filter_books(self): def filter_books(self):
""" """
Scan book folder recursively for epub files Scan book folder recursively for epub files
filter_books(0) -> Catalogue.books filter_books(0) -> Catalogue.books
filter_books(1) -> self.books[] filter_books(1) -> self.books[]
:param ret: 0 -> create class property -> dump json :param ret: 0 -> create class property -> dump json
:param ret: 1 -> create & return class property :param ret: 1 -> create & return class property
""" """
self.scan_folder() self.scan_folder()
regx = re.compile(r"\.epub") regx = re.compile(r"\.epub")
@@ -119,8 +115,6 @@ class Catalogue:
return cover return cover
def extract_cover_image(self, book_zip, book): def extract_cover_image(self, book_zip, book):
# TODO Handle books that have no Cover Image
# TODO Handle books with html covers
cover = book_zip.open( cover = book_zip.open(
list( list(
filter(self.cover_regx.search, book['files']) filter(self.cover_regx.search, book['files'])
@@ -130,9 +124,28 @@ class Catalogue:
except KeyError: return False except KeyError: return False
def compare_shelf_current(self): def compare_shelf_current(self):
try: db = Storage()
self.books stored = db.book_paths_list()
except Exception: closed = db.close()
self.filter_books() try: self.books
unique = set(self.books) - set(self.catalogue) except Exception: self.filter_books()
return unique on_disk, in_storage = [], []
for _x in self.books: on_disk.append(_x)
for _y in stored: in_storage.append(_y[0])
a, b, = set(on_disk), set(in_storage)
c = set.difference(a, b)
return c
def import_books(self, list=None):
book_list = self.compare_shelf_current()
db = Storage()
for book in book_list:
book = self.process_book(book)
extracted = self.extract_metadata(book)
db.insert_book(extracted)
inserted = db.commit()
if inserted is not True:
print(inserted)
if input('Continue ? y/n') == 'y':
pass
db.close()

View File

@@ -2,10 +2,12 @@
import os import os
import zipfile import zipfile
from config import Config from config import Config
from library import Catalogue from lib.library import Catalogue
from storage import Storage from lib.storage import Storage
config = Config() config = Config()
Storage = Storage() Storage = Storage()
class InitFiles: class InitFiles:
"""First run file creation operations""" """First run file creation operations"""
def __init__(self, file_array): def __init__(self, file_array):
@@ -18,23 +20,5 @@ class InitFiles:
"""Create the file""" """Create the file"""
if not os.path.isdir(os.path.split(_pointer)[0]): if not os.path.isdir(os.path.split(_pointer)[0]):
os.mkdir(os.path.split(_pointer)[0]) os.mkdir(os.path.split(_pointer)[0])
f = open(_pointer, "w+") f = open(_pointer, "w+")
f.close() f.close()
class Epub:
"""All Epub file handling"""
def __init__(self):
global config
self.book_path = config.book_path
self.Catalogue = Catalogue()
def import_books(self):
book_list = self.Catalogue.filter_books()
for book in book_list:
extracted = self.Catalogue.extract_metadata(book_list[book])
Storage.insert_book(extracted)
Storage.commit()
def book_list(self):
pass

View File

@@ -1,7 +1,7 @@
#!/usr/bin/python #!/usr/bin/python
import sys import sys
import sqlite3 import sqlite3
sys.path.insert(1,'../') # sys.path.insert(1, '../')
from config import Config from config import Config
db_pointer = Config().catalogue_db db_pointer = Config().catalogue_db
@@ -55,6 +55,17 @@ class Storage:
print(e) print(e)
return False return False
def book_paths_list(self):
q = '''SELECT file_name FROM books'''
x = self.cursor.execute(q)
try: x = x.fetchall()
except Exception: x = []
return x
def commit(self): def commit(self):
try: self.db.commit(); return True try: self.db.commit(); return True
except Exception as e: return False except Exception as e: return e
def close(self):
self.db.close()
return True

25
main.py
View File

@@ -1,19 +1,18 @@
#!/usr/bin/python #!/usr/bin/python
import sys import sys
from PIL import Image
sys.path.insert(1, 'lib/')
from pyShelf import InitFiles, Epub
from config import Config
from library import Catalogue
config = Config() # Get configuration settings from config import Config
InitFiles(config.file_array) # Initialize file system from lib.library import Catalogue
Catalogue = Catalogue() # Open the Catalogue from lib.pyShelf import InitFiles
# This only needs to be run on first run, & when new books are added
Epub().import_books() # Filter Your books sys.path.insert(1, 'lib/')
# TODO Implement file tracking system to avoid processing already tracked books
config = Config() # Get configuration settings
InitFiles(config.file_array) # Initialize file system
Catalogue = Catalogue() # Open the Catalogue
# new_books = Catalogue.new_files()
Catalogue.import_books() # Filter Your books
# TODO Figure out a system to get books page count # TODO Figure out a system to get books page count
# TODO Update testing
# TODO Update Documentation # TODO Update Documentation
# TODO Requirements.txt # TODO Requirements.txt
# TODO Test image storage

View File

@@ -1,23 +1,10 @@
appdirs==1.4.3 bs4
beautifulsoup4==4.8.0 certifi
bs4==0.0.1 lxml
certifi==2019.9.11 Pillow
chardet==3.0.4 requests
cssselect==1.1.0 soupsieve
fake-useragent==0.1.11 urllib3
idna==2.8 urwid
lxml==4.4.1 w3lib
parse==1.12.1 websockets
Pillow==6.2.0
pyee==6.0.0
Pygments==2.4.2
pyppeteer==0.0.25
pyquery==1.4.0
requests==2.22.0
six==1.12.0
soupsieve==1.9.4
tqdm==4.36.1
urllib3==1.25.6
urwid==2.0.1
w3lib==1.21.0
websockets==8.0.2

View File

@@ -0,0 +1,2 @@
import sys
sys.path.insert(1, '../lib/')

View File

@@ -1,16 +1,20 @@
import unittest import unittest
import sys import sys
sys.path.insert(1, 'lib/') # sys.path.insert(1, '../')
from library import Catalogue from lib.library import Catalogue
Catalogue = Catalogue()
class LibraryTest(unittest.TestCase): class LibraryTest(unittest.TestCase):
def test_libray_catalogue(self): def test_libray_catalogue(self):
self.assertIsNotNone(Catalogue()) self.assertIsNotNone(Catalogue)
def test_library_catalogue_filter_books(self): def test_library_catalogue_filter_books(self):
self.assertIsNotNone(Catalogue().filter_books()) self.assertIsNotNone(Catalogue.filter_books())
def test_library_catalogue_new_files(self):
self.assertIsNot(Catalogue.new_files(), False)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -1,7 +1,7 @@
import unittest import unittest
import sys import sys
sys.path.insert(1, 'lib/') # sys.path.insert(1, '../')
from storage import Storage from lib.storage import Storage
storage = Storage() storage = Storage()
@@ -13,4 +13,4 @@ class StorageTest(unittest.TestCase):
def test_Storage_create_tables(self): def test_Storage_create_tables(self):
self.assertIsNot(storage.create_tables(), Exception) self.assertIsNot(storage.create_tables(), Exception)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -2,9 +2,9 @@ import unittest
import os import os
import shutil import shutil
import sys import sys
sys.path.insert(1, 'lib/') # sys.path.insert(1, '../')
from pyShelf import InitFiles from lib.pyShelf import InitFiles
from pyShelf import Epub from lib.pyShelf import Epub
class SysIoTest(unittest.TestCase): class SysIoTest(unittest.TestCase):