Merge pull request #6 from th3r00t/transfer

Changes include ability to only scan books not already in the database. Also ability to to import books not already stored
This commit is contained in:
th3r00t
2019-10-31 14:45:17 -04:00
committed by GitHub
15 changed files with 101 additions and 97 deletions

Binary file not shown.

View File

@@ -10,4 +10,4 @@ class Config:
self.catalogue_db,
"conf/settings.json"
]
self.auto_scan = True
self.auto_scan = True

1
lib/.#library.py Symbolic link
View File

@@ -0,0 +1 @@
raelon@golumnsec.6371:1572529288

View File

@@ -1,7 +1,7 @@
#!/usr/bin/python
import sys
import requests
sys.path.insert(1, 'lib/')
# sys.path.insert(1, 'lib/')
class DuckDuckGo:
@@ -18,8 +18,11 @@ class DuckDuckGo:
try: query = query.string
except AttributeError: query = query
search_result = requests.get(self.url+query+_key)
if search_result.status_code == 200 and search_result.json()['Image'] != '':
try: image_result = search_result.json()['Image']
except ValueError:
image_result = ''
if search_result.status_code == 200 and image_result != '':
image = requests.get(search_result.json()['Image'], stream=True)
image.raw.decode_content = True
return image.raw
else: return False
else: return False

View File

@@ -3,10 +3,13 @@ import json
import os
import re
import zipfile
from PIL import Image
from bs4 import BeautifulSoup
from PIL import Image
from config import Config
from api_hooks import DuckDuckGo
from lib.api_hooks import DuckDuckGo
from lib.storage import Storage
config = Config()
@@ -14,19 +17,12 @@ config = Config()
class Catalogue:
"""Decodes and stores book information"""
"""Step One: filter_books"""
def __init__(self):
self.file_list = []
self.opf_regx = re.compile(r'\.opf')
self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
self.html_regx = re.compile(r'\.html')
"""
with open(config.book_shelf, 'r') as f:
try:
self.catalogue = json.load(f)
self.current_files = self.scan_folder()
except Exception:
self.filter_books()
"""
def scan_folder(self, folder=config.book_path):
for f in os.listdir(folder):
@@ -46,11 +42,11 @@ class Catalogue:
def filter_books(self):
"""
Scan book folder recursively for epub files
filter_books(0) -> Catalogue.books
filter_books(1) -> self.books[]
:param ret: 0 -> create class property -> dump json
:param ret: 1 -> create & return class property
Scan book folder recursively for epub files
filter_books(0) -> Catalogue.books
filter_books(1) -> self.books[]
:param ret: 0 -> create class property -> dump json
:param ret: 1 -> create & return class property
"""
self.scan_folder()
regx = re.compile(r"\.epub")
@@ -119,8 +115,6 @@ class Catalogue:
return cover
def extract_cover_image(self, book_zip, book):
# TODO Handle books that have no Cover Image
# TODO Handle books with html covers
cover = book_zip.open(
list(
filter(self.cover_regx.search, book['files'])
@@ -130,9 +124,28 @@ class Catalogue:
except KeyError: return False
def compare_shelf_current(self):
try:
self.books
except Exception:
self.filter_books()
unique = set(self.books) - set(self.catalogue)
return unique
db = Storage()
stored = db.book_paths_list()
closed = db.close()
try: self.books
except Exception: self.filter_books()
on_disk, in_storage = [], []
for _x in self.books: on_disk.append(_x)
for _y in stored: in_storage.append(_y[0])
a, b, = set(on_disk), set(in_storage)
c = set.difference(a, b)
return c
def import_books(self, list=None):
book_list = self.compare_shelf_current()
db = Storage()
for book in book_list:
book = self.process_book(book)
extracted = self.extract_metadata(book)
db.insert_book(extracted)
inserted = db.commit()
if inserted is not True:
print(inserted)
if input('Continue ? y/n') == 'y':
pass
db.close()

View File

@@ -2,10 +2,12 @@
import os
import zipfile
from config import Config
from library import Catalogue
from storage import Storage
from lib.library import Catalogue
from lib.storage import Storage
config = Config()
Storage = Storage()
class InitFiles:
"""First run file creation operations"""
def __init__(self, file_array):
@@ -18,23 +20,5 @@ class InitFiles:
"""Create the file"""
if not os.path.isdir(os.path.split(_pointer)[0]):
os.mkdir(os.path.split(_pointer)[0])
f = open(_pointer, "w+")
f = open(_pointer, "w+")
f.close()
class Epub:
"""All Epub file handling"""
def __init__(self):
global config
self.book_path = config.book_path
self.Catalogue = Catalogue()
def import_books(self):
book_list = self.Catalogue.filter_books()
for book in book_list:
extracted = self.Catalogue.extract_metadata(book_list[book])
Storage.insert_book(extracted)
Storage.commit()
def book_list(self):
pass

View File

@@ -1,7 +1,7 @@
#!/usr/bin/python
import sys
import sqlite3
sys.path.insert(1,'../')
# sys.path.insert(1, '../')
from config import Config
db_pointer = Config().catalogue_db
@@ -55,6 +55,17 @@ class Storage:
print(e)
return False
def book_paths_list(self):
q = '''SELECT file_name FROM books'''
x = self.cursor.execute(q)
try: x = x.fetchall()
except Exception: x = []
return x
def commit(self):
try: self.db.commit(); return True
except Exception as e: return False
except Exception as e: return e
def close(self):
self.db.close()
return True

25
main.py
View File

@@ -1,19 +1,18 @@
#!/usr/bin/python
import sys
from PIL import Image
sys.path.insert(1, 'lib/')
from pyShelf import InitFiles, Epub
from config import Config
from library import Catalogue
config = Config() # Get configuration settings
InitFiles(config.file_array) # Initialize file system
Catalogue = Catalogue() # Open the Catalogue
# This only needs to be run on first run, & when new books are added
Epub().import_books() # Filter Your books
# TODO Implement file tracking system to avoid processing already tracked books
from config import Config
from lib.library import Catalogue
from lib.pyShelf import InitFiles
sys.path.insert(1, 'lib/')
config = Config() # Get configuration settings
InitFiles(config.file_array) # Initialize file system
Catalogue = Catalogue() # Open the Catalogue
# new_books = Catalogue.new_files()
Catalogue.import_books() # Filter Your books
# TODO Figure out a system to get books page count
# TODO Update testing
# TODO Update Documentation
# TODO Requirements.txt
# TODO Test image storage

View File

@@ -1,23 +1,10 @@
appdirs==1.4.3
beautifulsoup4==4.8.0
bs4==0.0.1
certifi==2019.9.11
chardet==3.0.4
cssselect==1.1.0
fake-useragent==0.1.11
idna==2.8
lxml==4.4.1
parse==1.12.1
Pillow==6.2.0
pyee==6.0.0
Pygments==2.4.2
pyppeteer==0.0.25
pyquery==1.4.0
requests==2.22.0
six==1.12.0
soupsieve==1.9.4
tqdm==4.36.1
urllib3==1.25.6
urwid==2.0.1
w3lib==1.21.0
websockets==8.0.2
bs4
certifi
lxml
Pillow
requests
soupsieve
urllib3
urwid
w3lib
websockets

View File

@@ -0,0 +1,2 @@
import sys
sys.path.insert(1, '../lib/')

View File

@@ -1,16 +1,20 @@
import unittest
import sys
sys.path.insert(1, 'lib/')
from library import Catalogue
# sys.path.insert(1, '../')
from lib.library import Catalogue
Catalogue = Catalogue()
class LibraryTest(unittest.TestCase):
def test_libray_catalogue(self):
self.assertIsNotNone(Catalogue())
self.assertIsNotNone(Catalogue)
def test_library_catalogue_filter_books(self):
self.assertIsNotNone(Catalogue().filter_books())
self.assertIsNotNone(Catalogue.filter_books())
def test_library_catalogue_new_files(self):
self.assertIsNot(Catalogue.new_files(), False)
if __name__ == '__main__':
unittest.main()

View File

@@ -1,7 +1,7 @@
import unittest
import sys
sys.path.insert(1, 'lib/')
from storage import Storage
# sys.path.insert(1, '../')
from lib.storage import Storage
storage = Storage()
@@ -13,4 +13,4 @@ class StorageTest(unittest.TestCase):
def test_Storage_create_tables(self):
self.assertIsNot(storage.create_tables(), Exception)
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -2,9 +2,9 @@ import unittest
import os
import shutil
import sys
sys.path.insert(1, 'lib/')
from pyShelf import InitFiles
from pyShelf import Epub
# sys.path.insert(1, '../')
from lib.pyShelf import InitFiles
from lib.pyShelf import Epub
class SysIoTest(unittest.TestCase):