mirror of
https://github.com/th3r00t/pyShelf.git
synced 2026-04-28 01:59:35 -04:00
Implemented the return of unique files for storage
This commit is contained in:
Binary file not shown.
@@ -1,7 +1,7 @@
|
||||
class Config:
|
||||
"""Main System Configuration"""
|
||||
def __init__(self):
|
||||
self.book_path = "books/"
|
||||
self.book_path = "/home/raelon/Books/"
|
||||
self.book_shelf = "data/shelf.json"
|
||||
self.catalogue_db = "data/catalogue.db"
|
||||
self.file_array = [
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import requests
|
||||
sys.path.insert(1, 'lib/')
|
||||
# sys.path.insert(1, 'lib/')
|
||||
|
||||
|
||||
class DuckDuckGo:
|
||||
@@ -18,7 +18,10 @@ class DuckDuckGo:
|
||||
try: query = query.string
|
||||
except AttributeError: query = query
|
||||
search_result = requests.get(self.url+query+_key)
|
||||
if search_result.status_code == 200 and search_result.json()['Image'] != '':
|
||||
try: image_result = search_result.json()['Image']
|
||||
except ValueError:
|
||||
image_result = ''
|
||||
if search_result.status_code == 200 and image_result != '':
|
||||
image = requests.get(search_result.json()['Image'], stream=True)
|
||||
image.raw.decode_content = True
|
||||
return image.raw
|
||||
|
||||
@@ -3,7 +3,9 @@ import json
|
||||
import os
|
||||
import re
|
||||
import zipfile
|
||||
from PIL import Image
|
||||
# import sys
|
||||
# sys.path.insert(1, '../')
|
||||
from lib.storage import Storage
|
||||
from bs4 import BeautifulSoup
|
||||
from config import Config
|
||||
from lib.api_hooks import DuckDuckGo
|
||||
@@ -19,22 +21,29 @@ class Catalogue:
|
||||
self.opf_regx = re.compile(r'\.opf')
|
||||
self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
|
||||
self.html_regx = re.compile(r'\.html')
|
||||
self.scan_folder()
|
||||
|
||||
def scan_folder(self, folder=config.book_path):
|
||||
for f in os.listdir(folder):
|
||||
_path = os.path.abspath(folder+'/'+f)
|
||||
#_path = os.path.abspath('.')+'/'+folder+f+'/'
|
||||
_is_dir = os.path.isdir(_path.strip()+'/')
|
||||
_path = os.path.abspath(folder + '/' + f)
|
||||
# _path = os.path.abspath('.')+'/'+folder+f+'/'
|
||||
_is_dir = os.path.isdir(_path.strip() + '/')
|
||||
if _is_dir:
|
||||
self.file_list.append(self.scan_folder(_path))
|
||||
self.file_list.append(_path)
|
||||
regx = re.compile(r"\.epub")
|
||||
self.books = list(filter(regx.search, filter(None, self.file_list)))
|
||||
|
||||
def scan_book(self, book):
|
||||
"""REMOVE ME?"""
|
||||
_epub = zipfile.ZipFile(book)
|
||||
with _epub as _epub_open:
|
||||
try: _epub_open.open('content.opf'); return True
|
||||
except Exception as e: print(e); return False
|
||||
try:
|
||||
_epub_open.open('content.opf')
|
||||
return True
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return False
|
||||
|
||||
def filter_books(self, ret=0):
|
||||
"""
|
||||
@@ -44,16 +53,12 @@ class Catalogue:
|
||||
:param ret: 0 -> create class property -> dump json
|
||||
:param ret: 1 -> create & return class property
|
||||
"""
|
||||
self.scan_folder()
|
||||
regx = re.compile(r"\.epub")
|
||||
self.books = list(filter(regx.search, filter(None, self.file_list)))
|
||||
_book_list_expanded = {}
|
||||
with open(config.book_shelf, 'w') as f:
|
||||
for book in self.books:
|
||||
_book_list_expanded[book] = self.process_book(book)
|
||||
if ret != 0: return _book_list_expanded
|
||||
else:
|
||||
import ipdb; ipdb.set_trace()
|
||||
json.dump(_book_list_expanded, f)
|
||||
return _book_list_expanded
|
||||
|
||||
@@ -87,10 +92,12 @@ class Catalogue:
|
||||
title = soup.find("dc:title")
|
||||
if title == None:
|
||||
title = book['path'].split('/')[-1].rsplit('.', 1)[0]
|
||||
else: title = title.contents[0]
|
||||
else:
|
||||
title = title.contents[0]
|
||||
author = soup.find("dc:creator")
|
||||
if author != None: author = author.contents[0]
|
||||
try: cover = self.extract_cover_image(book_zip, book)
|
||||
try:
|
||||
cover = self.extract_cover_image(book_zip, book)
|
||||
except IndexError:
|
||||
# cover = self.extract_cover_html(book_zip, book)
|
||||
cover = DuckDuckGo().image_result(title)
|
||||
@@ -99,35 +106,32 @@ class Catalogue:
|
||||
|
||||
def extract_content(self, book_zip, book):
|
||||
content = book_zip.open(
|
||||
list(
|
||||
filter(self.opf_regx.search, book['files'])
|
||||
)[0]
|
||||
)
|
||||
list(filter(self.opf_regx.search, book['files']))[0])
|
||||
return content
|
||||
|
||||
def extract_cover_html(self, book_zip, book):
|
||||
cover = book_zip.open(
|
||||
list(
|
||||
filter(self.html_regx.search, book['files'])
|
||||
)[0]
|
||||
)
|
||||
list(filter(self.html_regx.search, book['files']))[0])
|
||||
return cover
|
||||
|
||||
def extract_cover_image(self, book_zip, book):
|
||||
# TODO Handle books that have no Cover Image
|
||||
# TODO Handle books with html covers
|
||||
cover = book_zip.open(
|
||||
list(
|
||||
filter(self.cover_regx.search, book['files'])
|
||||
)[0]
|
||||
)
|
||||
try: cover = book_zip.read(cover.name); return cover
|
||||
except KeyError: return False
|
||||
|
||||
def compare_shelf_current(self):
|
||||
list(filter(self.cover_regx.search, book['files']))[0])
|
||||
try:
|
||||
self.books
|
||||
except Exception:
|
||||
self.filter_books(1)
|
||||
unique = set(self.books) - set(self.catalogue)
|
||||
cover = book_zip.read(cover.name)
|
||||
return cover
|
||||
except KeyError:
|
||||
return False
|
||||
|
||||
def new_files(self):
|
||||
storage = Storage()
|
||||
try:
|
||||
a = []
|
||||
stored = storage.book_paths_list()
|
||||
for i in stored: a.append(i[-1])
|
||||
unique = set(self.books) - set(a)
|
||||
return unique
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -2,10 +2,12 @@
|
||||
import os
|
||||
import zipfile
|
||||
from config import Config
|
||||
from library import Catalogue
|
||||
from storage import Storage
|
||||
from lib.library import Catalogue
|
||||
from lib.storage import Storage
|
||||
config = Config()
|
||||
Storage = Storage()
|
||||
|
||||
|
||||
class InitFiles:
|
||||
"""First run file creation operations"""
|
||||
def __init__(self, file_array):
|
||||
@@ -29,8 +31,9 @@ class Epub:
|
||||
self.book_path = config.book_path
|
||||
self.Catalogue = Catalogue()
|
||||
|
||||
def import_books(self):
|
||||
book_list = self.Catalogue.filter_books()
|
||||
def import_books(self, list=None):
|
||||
if list is not None: book_list = list
|
||||
else: book_list = self.Catalogue.filter_books()
|
||||
for book in book_list:
|
||||
extracted = self.Catalogue.extract_metadata(book_list[book])
|
||||
Storage.insert_book(extracted)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import sqlite3
|
||||
sys.path.insert(1,'../')
|
||||
# sys.path.insert(1, '../')
|
||||
from config import Config
|
||||
db_pointer = Config().catalogue_db
|
||||
|
||||
@@ -55,6 +55,13 @@ class Storage:
|
||||
print(e)
|
||||
return False
|
||||
|
||||
def book_paths_list(self):
|
||||
q = '''SELECT file_name FROM books'''
|
||||
x = self.cursor.execute(q)
|
||||
try: x = x.fetchall()
|
||||
except Exception: x = []
|
||||
return x
|
||||
|
||||
def commit(self):
|
||||
try: self.db.commit(); return True
|
||||
except Exception as e: return False
|
||||
4
main.py
4
main.py
@@ -1,6 +1,5 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
from PIL import Image
|
||||
sys.path.insert(1, 'lib/')
|
||||
from pyShelf import InitFiles, Epub
|
||||
from config import Config
|
||||
@@ -10,7 +9,8 @@ config = Config() # Get configuration settings
|
||||
InitFiles(config.file_array) # Initialize file system
|
||||
Catalogue = Catalogue() # Open the Catalogue
|
||||
# This only needs to be run on first run, & when new books are added
|
||||
Epub().import_books() # Filter Your books
|
||||
new_books = Catalogue.new_files()
|
||||
Epub().import_books(new_books) # Filter Your books
|
||||
# TODO Implement file tracking system to avoid processing already tracked books
|
||||
# TODO Figure out a system to get books page count
|
||||
# TODO Update testing
|
||||
|
||||
@@ -1,23 +1,10 @@
|
||||
appdirs==1.4.3
|
||||
beautifulsoup4==4.8.0
|
||||
bs4==0.0.1
|
||||
certifi==2019.9.11
|
||||
chardet==3.0.4
|
||||
cssselect==1.1.0
|
||||
fake-useragent==0.1.11
|
||||
idna==2.8
|
||||
lxml==4.4.1
|
||||
parse==1.12.1
|
||||
Pillow==6.2.0
|
||||
pyee==6.0.0
|
||||
Pygments==2.4.2
|
||||
pyppeteer==0.0.25
|
||||
pyquery==1.4.0
|
||||
requests==2.22.0
|
||||
six==1.12.0
|
||||
soupsieve==1.9.4
|
||||
tqdm==4.36.1
|
||||
urllib3==1.25.6
|
||||
urwid==2.0.1
|
||||
w3lib==1.21.0
|
||||
websockets==8.0.2
|
||||
bs4
|
||||
certifi
|
||||
lxml
|
||||
Pillow
|
||||
requests
|
||||
soupsieve
|
||||
urllib3
|
||||
urwid
|
||||
w3lib
|
||||
websockets
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
import sys
|
||||
sys.path.insert(1, '../lib/')
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,6 +1,6 @@
|
||||
import unittest
|
||||
import sys
|
||||
sys.path.insert(1, '../')
|
||||
# sys.path.insert(1, '../')
|
||||
from lib.library import Catalogue
|
||||
Catalogue = Catalogue()
|
||||
|
||||
@@ -12,5 +12,9 @@ class LibraryTest(unittest.TestCase):
|
||||
def test_library_catalogue_filter_books(self):
|
||||
self.assertIsNotNone(Catalogue.filter_books())
|
||||
|
||||
def test_library_catalogue_new_files(self):
|
||||
self.assertIsNot(Catalogue.new_files(), False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import unittest
|
||||
import sys
|
||||
sys.path.insert(1, '../')
|
||||
# sys.path.insert(1, '../')
|
||||
from lib.storage import Storage
|
||||
|
||||
storage = Storage()
|
||||
|
||||
@@ -2,7 +2,7 @@ import unittest
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
sys.path.insert(1, '../')
|
||||
# sys.path.insert(1, '../')
|
||||
from lib.pyShelf import InitFiles
|
||||
from lib.pyShelf import Epub
|
||||
|
||||
|
||||
Reference in New Issue
Block a user