mirror of
https://github.com/th3r00t/pyShelf.git
synced 2026-04-28 01:59:35 -04:00
Implemented the return of unique files for storage
This commit is contained in:
Binary file not shown.
@@ -1,7 +1,7 @@
|
|||||||
class Config:
|
class Config:
|
||||||
"""Main System Configuration"""
|
"""Main System Configuration"""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.book_path = "books/"
|
self.book_path = "/home/raelon/Books/"
|
||||||
self.book_shelf = "data/shelf.json"
|
self.book_shelf = "data/shelf.json"
|
||||||
self.catalogue_db = "data/catalogue.db"
|
self.catalogue_db = "data/catalogue.db"
|
||||||
self.file_array = [
|
self.file_array = [
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
import sys
|
||||||
import requests
|
import requests
|
||||||
sys.path.insert(1, 'lib/')
|
# sys.path.insert(1, 'lib/')
|
||||||
|
|
||||||
|
|
||||||
class DuckDuckGo:
|
class DuckDuckGo:
|
||||||
@@ -18,7 +18,10 @@ class DuckDuckGo:
|
|||||||
try: query = query.string
|
try: query = query.string
|
||||||
except AttributeError: query = query
|
except AttributeError: query = query
|
||||||
search_result = requests.get(self.url+query+_key)
|
search_result = requests.get(self.url+query+_key)
|
||||||
if search_result.status_code == 200 and search_result.json()['Image'] != '':
|
try: image_result = search_result.json()['Image']
|
||||||
|
except ValueError:
|
||||||
|
image_result = ''
|
||||||
|
if search_result.status_code == 200 and image_result != '':
|
||||||
image = requests.get(search_result.json()['Image'], stream=True)
|
image = requests.get(search_result.json()['Image'], stream=True)
|
||||||
image.raw.decode_content = True
|
image.raw.decode_content = True
|
||||||
return image.raw
|
return image.raw
|
||||||
|
|||||||
@@ -3,7 +3,9 @@ import json
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import zipfile
|
import zipfile
|
||||||
from PIL import Image
|
# import sys
|
||||||
|
# sys.path.insert(1, '../')
|
||||||
|
from lib.storage import Storage
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from config import Config
|
from config import Config
|
||||||
from lib.api_hooks import DuckDuckGo
|
from lib.api_hooks import DuckDuckGo
|
||||||
@@ -19,22 +21,29 @@ class Catalogue:
|
|||||||
self.opf_regx = re.compile(r'\.opf')
|
self.opf_regx = re.compile(r'\.opf')
|
||||||
self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
|
self.cover_regx = re.compile(r'\.jpg|\.jpeg|\.png|\.bmp|\.gif')
|
||||||
self.html_regx = re.compile(r'\.html')
|
self.html_regx = re.compile(r'\.html')
|
||||||
|
self.scan_folder()
|
||||||
|
|
||||||
def scan_folder(self, folder=config.book_path):
|
def scan_folder(self, folder=config.book_path):
|
||||||
for f in os.listdir(folder):
|
for f in os.listdir(folder):
|
||||||
_path = os.path.abspath(folder+'/'+f)
|
_path = os.path.abspath(folder + '/' + f)
|
||||||
#_path = os.path.abspath('.')+'/'+folder+f+'/'
|
# _path = os.path.abspath('.')+'/'+folder+f+'/'
|
||||||
_is_dir = os.path.isdir(_path.strip()+'/')
|
_is_dir = os.path.isdir(_path.strip() + '/')
|
||||||
if _is_dir:
|
if _is_dir:
|
||||||
self.file_list.append(self.scan_folder(_path))
|
self.file_list.append(self.scan_folder(_path))
|
||||||
self.file_list.append(_path)
|
self.file_list.append(_path)
|
||||||
|
regx = re.compile(r"\.epub")
|
||||||
|
self.books = list(filter(regx.search, filter(None, self.file_list)))
|
||||||
|
|
||||||
def scan_book(self, book):
|
def scan_book(self, book):
|
||||||
"""REMOVE ME?"""
|
"""REMOVE ME?"""
|
||||||
_epub = zipfile.ZipFile(book)
|
_epub = zipfile.ZipFile(book)
|
||||||
with _epub as _epub_open:
|
with _epub as _epub_open:
|
||||||
try: _epub_open.open('content.opf'); return True
|
try:
|
||||||
except Exception as e: print(e); return False
|
_epub_open.open('content.opf')
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
return False
|
||||||
|
|
||||||
def filter_books(self, ret=0):
|
def filter_books(self, ret=0):
|
||||||
"""
|
"""
|
||||||
@@ -44,16 +53,12 @@ class Catalogue:
|
|||||||
:param ret: 0 -> create class property -> dump json
|
:param ret: 0 -> create class property -> dump json
|
||||||
:param ret: 1 -> create & return class property
|
:param ret: 1 -> create & return class property
|
||||||
"""
|
"""
|
||||||
self.scan_folder()
|
|
||||||
regx = re.compile(r"\.epub")
|
|
||||||
self.books = list(filter(regx.search, filter(None, self.file_list)))
|
|
||||||
_book_list_expanded = {}
|
_book_list_expanded = {}
|
||||||
with open(config.book_shelf, 'w') as f:
|
with open(config.book_shelf, 'w') as f:
|
||||||
for book in self.books:
|
for book in self.books:
|
||||||
_book_list_expanded[book] = self.process_book(book)
|
_book_list_expanded[book] = self.process_book(book)
|
||||||
if ret != 0: return _book_list_expanded
|
if ret != 0: return _book_list_expanded
|
||||||
else:
|
else:
|
||||||
import ipdb; ipdb.set_trace()
|
|
||||||
json.dump(_book_list_expanded, f)
|
json.dump(_book_list_expanded, f)
|
||||||
return _book_list_expanded
|
return _book_list_expanded
|
||||||
|
|
||||||
@@ -87,10 +92,12 @@ class Catalogue:
|
|||||||
title = soup.find("dc:title")
|
title = soup.find("dc:title")
|
||||||
if title == None:
|
if title == None:
|
||||||
title = book['path'].split('/')[-1].rsplit('.', 1)[0]
|
title = book['path'].split('/')[-1].rsplit('.', 1)[0]
|
||||||
else: title = title.contents[0]
|
else:
|
||||||
|
title = title.contents[0]
|
||||||
author = soup.find("dc:creator")
|
author = soup.find("dc:creator")
|
||||||
if author != None: author = author.contents[0]
|
if author != None: author = author.contents[0]
|
||||||
try: cover = self.extract_cover_image(book_zip, book)
|
try:
|
||||||
|
cover = self.extract_cover_image(book_zip, book)
|
||||||
except IndexError:
|
except IndexError:
|
||||||
# cover = self.extract_cover_html(book_zip, book)
|
# cover = self.extract_cover_html(book_zip, book)
|
||||||
cover = DuckDuckGo().image_result(title)
|
cover = DuckDuckGo().image_result(title)
|
||||||
@@ -99,35 +106,32 @@ class Catalogue:
|
|||||||
|
|
||||||
def extract_content(self, book_zip, book):
|
def extract_content(self, book_zip, book):
|
||||||
content = book_zip.open(
|
content = book_zip.open(
|
||||||
list(
|
list(filter(self.opf_regx.search, book['files']))[0])
|
||||||
filter(self.opf_regx.search, book['files'])
|
|
||||||
)[0]
|
|
||||||
)
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def extract_cover_html(self, book_zip, book):
|
def extract_cover_html(self, book_zip, book):
|
||||||
cover = book_zip.open(
|
cover = book_zip.open(
|
||||||
list(
|
list(filter(self.html_regx.search, book['files']))[0])
|
||||||
filter(self.html_regx.search, book['files'])
|
|
||||||
)[0]
|
|
||||||
)
|
|
||||||
return cover
|
return cover
|
||||||
|
|
||||||
def extract_cover_image(self, book_zip, book):
|
def extract_cover_image(self, book_zip, book):
|
||||||
# TODO Handle books that have no Cover Image
|
# TODO Handle books that have no Cover Image
|
||||||
# TODO Handle books with html covers
|
# TODO Handle books with html covers
|
||||||
cover = book_zip.open(
|
cover = book_zip.open(
|
||||||
list(
|
list(filter(self.cover_regx.search, book['files']))[0])
|
||||||
filter(self.cover_regx.search, book['files'])
|
|
||||||
)[0]
|
|
||||||
)
|
|
||||||
try: cover = book_zip.read(cover.name); return cover
|
|
||||||
except KeyError: return False
|
|
||||||
|
|
||||||
def compare_shelf_current(self):
|
|
||||||
try:
|
try:
|
||||||
self.books
|
cover = book_zip.read(cover.name)
|
||||||
except Exception:
|
return cover
|
||||||
self.filter_books(1)
|
except KeyError:
|
||||||
unique = set(self.books) - set(self.catalogue)
|
return False
|
||||||
|
|
||||||
|
def new_files(self):
|
||||||
|
storage = Storage()
|
||||||
|
try:
|
||||||
|
a = []
|
||||||
|
stored = storage.book_paths_list()
|
||||||
|
for i in stored: a.append(i[-1])
|
||||||
|
unique = set(self.books) - set(a)
|
||||||
return unique
|
return unique
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|||||||
@@ -2,10 +2,12 @@
|
|||||||
import os
|
import os
|
||||||
import zipfile
|
import zipfile
|
||||||
from config import Config
|
from config import Config
|
||||||
from library import Catalogue
|
from lib.library import Catalogue
|
||||||
from storage import Storage
|
from lib.storage import Storage
|
||||||
config = Config()
|
config = Config()
|
||||||
Storage = Storage()
|
Storage = Storage()
|
||||||
|
|
||||||
|
|
||||||
class InitFiles:
|
class InitFiles:
|
||||||
"""First run file creation operations"""
|
"""First run file creation operations"""
|
||||||
def __init__(self, file_array):
|
def __init__(self, file_array):
|
||||||
@@ -29,8 +31,9 @@ class Epub:
|
|||||||
self.book_path = config.book_path
|
self.book_path = config.book_path
|
||||||
self.Catalogue = Catalogue()
|
self.Catalogue = Catalogue()
|
||||||
|
|
||||||
def import_books(self):
|
def import_books(self, list=None):
|
||||||
book_list = self.Catalogue.filter_books()
|
if list is not None: book_list = list
|
||||||
|
else: book_list = self.Catalogue.filter_books()
|
||||||
for book in book_list:
|
for book in book_list:
|
||||||
extracted = self.Catalogue.extract_metadata(book_list[book])
|
extracted = self.Catalogue.extract_metadata(book_list[book])
|
||||||
Storage.insert_book(extracted)
|
Storage.insert_book(extracted)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
import sys
|
||||||
import sqlite3
|
import sqlite3
|
||||||
sys.path.insert(1,'../')
|
# sys.path.insert(1, '../')
|
||||||
from config import Config
|
from config import Config
|
||||||
db_pointer = Config().catalogue_db
|
db_pointer = Config().catalogue_db
|
||||||
|
|
||||||
@@ -55,6 +55,13 @@ class Storage:
|
|||||||
print(e)
|
print(e)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def book_paths_list(self):
|
||||||
|
q = '''SELECT file_name FROM books'''
|
||||||
|
x = self.cursor.execute(q)
|
||||||
|
try: x = x.fetchall()
|
||||||
|
except Exception: x = []
|
||||||
|
return x
|
||||||
|
|
||||||
def commit(self):
|
def commit(self):
|
||||||
try: self.db.commit(); return True
|
try: self.db.commit(); return True
|
||||||
except Exception as e: return False
|
except Exception as e: return False
|
||||||
4
main.py
4
main.py
@@ -1,6 +1,5 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import sys
|
import sys
|
||||||
from PIL import Image
|
|
||||||
sys.path.insert(1, 'lib/')
|
sys.path.insert(1, 'lib/')
|
||||||
from pyShelf import InitFiles, Epub
|
from pyShelf import InitFiles, Epub
|
||||||
from config import Config
|
from config import Config
|
||||||
@@ -10,7 +9,8 @@ config = Config() # Get configuration settings
|
|||||||
InitFiles(config.file_array) # Initialize file system
|
InitFiles(config.file_array) # Initialize file system
|
||||||
Catalogue = Catalogue() # Open the Catalogue
|
Catalogue = Catalogue() # Open the Catalogue
|
||||||
# This only needs to be run on first run, & when new books are added
|
# This only needs to be run on first run, & when new books are added
|
||||||
Epub().import_books() # Filter Your books
|
new_books = Catalogue.new_files()
|
||||||
|
Epub().import_books(new_books) # Filter Your books
|
||||||
# TODO Implement file tracking system to avoid processing already tracked books
|
# TODO Implement file tracking system to avoid processing already tracked books
|
||||||
# TODO Figure out a system to get books page count
|
# TODO Figure out a system to get books page count
|
||||||
# TODO Update testing
|
# TODO Update testing
|
||||||
|
|||||||
@@ -1,23 +1,10 @@
|
|||||||
appdirs==1.4.3
|
bs4
|
||||||
beautifulsoup4==4.8.0
|
certifi
|
||||||
bs4==0.0.1
|
lxml
|
||||||
certifi==2019.9.11
|
Pillow
|
||||||
chardet==3.0.4
|
requests
|
||||||
cssselect==1.1.0
|
soupsieve
|
||||||
fake-useragent==0.1.11
|
urllib3
|
||||||
idna==2.8
|
urwid
|
||||||
lxml==4.4.1
|
w3lib
|
||||||
parse==1.12.1
|
websockets
|
||||||
Pillow==6.2.0
|
|
||||||
pyee==6.0.0
|
|
||||||
Pygments==2.4.2
|
|
||||||
pyppeteer==0.0.25
|
|
||||||
pyquery==1.4.0
|
|
||||||
requests==2.22.0
|
|
||||||
six==1.12.0
|
|
||||||
soupsieve==1.9.4
|
|
||||||
tqdm==4.36.1
|
|
||||||
urllib3==1.25.6
|
|
||||||
urwid==2.0.1
|
|
||||||
w3lib==1.21.0
|
|
||||||
websockets==8.0.2
|
|
||||||
|
|||||||
@@ -0,0 +1,2 @@
|
|||||||
|
import sys
|
||||||
|
sys.path.insert(1, '../lib/')
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
@@ -1,6 +1,6 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import sys
|
import sys
|
||||||
sys.path.insert(1, '../')
|
# sys.path.insert(1, '../')
|
||||||
from lib.library import Catalogue
|
from lib.library import Catalogue
|
||||||
Catalogue = Catalogue()
|
Catalogue = Catalogue()
|
||||||
|
|
||||||
@@ -12,5 +12,9 @@ class LibraryTest(unittest.TestCase):
|
|||||||
def test_library_catalogue_filter_books(self):
|
def test_library_catalogue_filter_books(self):
|
||||||
self.assertIsNotNone(Catalogue.filter_books())
|
self.assertIsNotNone(Catalogue.filter_books())
|
||||||
|
|
||||||
|
def test_library_catalogue_new_files(self):
|
||||||
|
self.assertIsNot(Catalogue.new_files(), False)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import sys
|
import sys
|
||||||
sys.path.insert(1, '../')
|
# sys.path.insert(1, '../')
|
||||||
from lib.storage import Storage
|
from lib.storage import Storage
|
||||||
|
|
||||||
storage = Storage()
|
storage = Storage()
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import unittest
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
sys.path.insert(1, '../')
|
# sys.path.insert(1, '../')
|
||||||
from lib.pyShelf import InitFiles
|
from lib.pyShelf import InitFiles
|
||||||
from lib.pyShelf import Epub
|
from lib.pyShelf import Epub
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user