mirror of
https://github.com/th3r00t/pyShelf.git
synced 2026-04-28 01:59:35 -04:00
Began extracting data from epub files
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
# pyShelf
|
# pyShelf
|
||||||
A simple terminal based ebook server
|
A simple terminal based ebook server
|
||||||
|
|
||||||
Frusterated with Calibre being my only option for hosting my eBook collection, I have decided to spin up my own.
|
Frustrated with Calibre being my only option for hosting my eBook collection, I have decided to spin up my own.
|
||||||
|
|
||||||
Calibre is a great organizational tool for your books, however not having a terminal based option for running and maintaining
|
Calibre is a great organizational tool for your books, however not having a terminal based option for running and maintaining
|
||||||
a server is cumbersome when running on older equipment. Thus I am creating pyShelf and I hope to be able to provide all
|
a server is cumbersome when running on older equipment. Thus I am creating pyShelf and I hope to be able to provide all
|
||||||
|
|||||||
BIN
__pycache__/config.cpython-37.pyc
Executable file → Normal file
BIN
__pycache__/config.cpython-37.pyc
Executable file → Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -3,9 +3,11 @@ class Config:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.book_path = "books/"
|
self.book_path = "books/"
|
||||||
self.book_shelf = "data/shelf.json"
|
self.book_shelf = "data/shelf.json"
|
||||||
|
self.catalogue_db = "data/catalogue.db"
|
||||||
self.file_array = [
|
self.file_array = [
|
||||||
"data/catalogue.json",
|
"data/catalogue.json",
|
||||||
"data/shelf.json",
|
self.book_shelf,
|
||||||
|
self.catalogue_db,
|
||||||
"conf/settings.json"
|
"conf/settings.json"
|
||||||
]
|
]
|
||||||
self.auto_scan = True
|
self.auto_scan = True
|
||||||
BIN
data/catalogue.db
Normal file
BIN
data/catalogue.db
Normal file
Binary file not shown.
145
data/shelf.json
145
data/shelf.json
File diff suppressed because one or more lines are too long
0
lib/__init__.py
Normal file
0
lib/__init__.py
Normal file
96
lib/library.py
Executable file
96
lib/library.py
Executable file
@@ -0,0 +1,96 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import zipfile
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from config import Config
|
||||||
|
config = Config()
|
||||||
|
|
||||||
|
|
||||||
|
class Catalogue:
|
||||||
|
"""Decodes and stores book information"""
|
||||||
|
"""Step One: filter_books"""
|
||||||
|
def __init__(self):
|
||||||
|
self.file_list = []
|
||||||
|
with open(config.book_shelf, 'r') as f:
|
||||||
|
try:
|
||||||
|
self.catalogue = json.load(f)
|
||||||
|
self.current_files = self.scan_folder()
|
||||||
|
except Exception:
|
||||||
|
self.filter_books()
|
||||||
|
|
||||||
|
def scan_folder(self, folder=config.book_path):
|
||||||
|
for f in os.listdir(folder):
|
||||||
|
_path = os.path.abspath(folder+'/'+f)
|
||||||
|
#_path = os.path.abspath('.')+'/'+folder+f+'/'
|
||||||
|
_is_dir = os.path.isdir(_path.strip()+'/')
|
||||||
|
if _is_dir:
|
||||||
|
self.file_list.append(self.scan_folder(_path))
|
||||||
|
self.file_list.append(_path)
|
||||||
|
|
||||||
|
def scan_book(self, book):
|
||||||
|
"""REMOVE ME?"""
|
||||||
|
_epub = zipfile.ZipFile(book)
|
||||||
|
with _epub as _epub_open:
|
||||||
|
try: _epub_open.open('content.opf'); return True
|
||||||
|
except Exception as e: print(e); return False
|
||||||
|
|
||||||
|
def filter_books(self):
|
||||||
|
"""
|
||||||
|
Scan book folder recursively for epub files
|
||||||
|
filter_books(0) -> Catalogue.books
|
||||||
|
filter_books(1) -> self.books[]
|
||||||
|
:param ret: 0 -> create class property -> dump json
|
||||||
|
:param ret: 1 -> create & return class property
|
||||||
|
"""
|
||||||
|
self.scan_folder()
|
||||||
|
regx = re.compile(r"\.epub")
|
||||||
|
self.books = list(filter(regx.search, filter(None, self.file_list)))
|
||||||
|
_book_list_expanded = {}
|
||||||
|
with open(config.book_shelf, 'w') as f:
|
||||||
|
for book in self.books:
|
||||||
|
_book_list_expanded[book] = self.process_book(book)
|
||||||
|
json.dump(_book_list_expanded, f)
|
||||||
|
return _book_list_expanded
|
||||||
|
|
||||||
|
def process_book(self, book):
|
||||||
|
"""Return dictionary of epub file contents"""
|
||||||
|
f_name = 'content.opf'
|
||||||
|
book = zipfile.ZipFile(book, 'r')
|
||||||
|
details = {}
|
||||||
|
with book as book_zip:
|
||||||
|
details['files'] = []
|
||||||
|
details['path'] = book.filename
|
||||||
|
expanded = book_zip.infolist()
|
||||||
|
regx = re.compile(r'\.opf|cover')
|
||||||
|
for i in expanded:
|
||||||
|
match = re.search(regx, i.filename)
|
||||||
|
if match:
|
||||||
|
# Returns zip file location of requested files
|
||||||
|
details['files'].append(match.string)
|
||||||
|
return details
|
||||||
|
|
||||||
|
def extract_metadata(self, book):
|
||||||
|
"""
|
||||||
|
Return extracted metadata and cover picture
|
||||||
|
book['path'] == Full path to ebook file
|
||||||
|
book['files'] == list of files from self.process_book(book)
|
||||||
|
"""
|
||||||
|
book_zip = zipfile.ZipFile(book['path'], 'r')
|
||||||
|
opf_regx, cover_regx = re.compile(r'\.opf'), re.compile(r'\.jpg|\.png|\.bmp|\.gif')
|
||||||
|
with book_zip as f:
|
||||||
|
content = list(filter(opf_regx.search, book['files']))
|
||||||
|
content = book_zip.open(content[0])
|
||||||
|
soup = BeautifulSoup(content, "xml")
|
||||||
|
title = soup.find("dc:title")
|
||||||
|
author = soup.find("dc:creator")
|
||||||
|
cover = soup.find("meta", attrs={"name" : "cover"})
|
||||||
|
return title
|
||||||
|
def compare_shelf_current(self):
|
||||||
|
try:
|
||||||
|
self.books
|
||||||
|
except Exception:
|
||||||
|
self.filter_books()
|
||||||
|
unique = set(self.books) - set(self.catalogue)
|
||||||
|
return unique
|
||||||
33
lib/pyShelf.py
Executable file
33
lib/pyShelf.py
Executable file
@@ -0,0 +1,33 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
import os
|
||||||
|
import zipfile
|
||||||
|
from config import Config
|
||||||
|
config = Config()
|
||||||
|
|
||||||
|
class InitFiles:
|
||||||
|
"""First run file creation operations"""
|
||||||
|
def __init__(self, file_array):
|
||||||
|
print("Begining creation of file structure")
|
||||||
|
for _pointer in file_array:
|
||||||
|
if not os.path.isfile(_pointer):
|
||||||
|
self.CreateFile(_pointer)
|
||||||
|
|
||||||
|
def CreateFile(self, _pointer):
|
||||||
|
"""Create the file"""
|
||||||
|
if not os.path.isdir(os.path.split(_pointer)[0]):
|
||||||
|
os.mkdir(os.path.split(_pointer)[0])
|
||||||
|
f = open(_pointer, "w+")
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
|
||||||
|
class Epub:
|
||||||
|
"""All Epub file handling"""
|
||||||
|
def __init__(self):
|
||||||
|
global config
|
||||||
|
self.book_path = config.book_path
|
||||||
|
|
||||||
|
def import_book(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def book_list(self):
|
||||||
|
pass
|
||||||
48
lib/storage.py
Normal file
48
lib/storage.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
import sys
|
||||||
|
import sqlite3
|
||||||
|
sys.path.insert(1,'../')
|
||||||
|
from config import Config
|
||||||
|
db_pointer = Config().catalogue_db
|
||||||
|
|
||||||
|
|
||||||
|
class Storage:
|
||||||
|
"""Contains all methods for system storage"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.db_file = db_pointer
|
||||||
|
self.database()
|
||||||
|
|
||||||
|
def database(self):
|
||||||
|
"""Create database cursor"""
|
||||||
|
try:
|
||||||
|
self.db = sqlite3.connect(self.db_file)
|
||||||
|
self.cursor = self.db.cursor()
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def create_tables(self):
|
||||||
|
"""Create table structure"""
|
||||||
|
q_check = "SELECT * FROM books"
|
||||||
|
q_create = '''CREATE TABLE books(title text, author text,
|
||||||
|
categories text, cover blob, pages int, progress int,
|
||||||
|
file_name text)'''
|
||||||
|
try:
|
||||||
|
self.cursor.execute(q_check)
|
||||||
|
except Exception as e:
|
||||||
|
self.cursor.execute(q_create)
|
||||||
|
|
||||||
|
def insert_book(self, book):
|
||||||
|
"""
|
||||||
|
Insert book in database
|
||||||
|
:returns: True if succeeds False if not
|
||||||
|
"""
|
||||||
|
q = '''INSERT INTO books (title, author, categories, cover,
|
||||||
|
pages, progress, file_name) values (%s, %s, %s, %s, 0, %s)''' % ()
|
||||||
|
try:
|
||||||
|
self.cursor.execute(q)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
return False
|
||||||
8
main.py
8
main.py
@@ -1,5 +1,7 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# import zipfile as Zip
|
# import zipfile as Zip
|
||||||
|
import sys
|
||||||
|
sys.path.insert(1, 'lib/')
|
||||||
from pyShelf import InitFiles
|
from pyShelf import InitFiles
|
||||||
from config import Config
|
from config import Config
|
||||||
from library import Catalogue
|
from library import Catalogue
|
||||||
@@ -11,5 +13,7 @@ InitFiles(config.file_array)
|
|||||||
Catalogue = Catalogue()
|
Catalogue = Catalogue()
|
||||||
# Filter Your books
|
# Filter Your books
|
||||||
# This only needs to be run on first run, & when new books are added
|
# This only needs to be run on first run, & when new books are added
|
||||||
book_list = Catalogue.filter_books(0)
|
book_list = Catalogue.filter_books()
|
||||||
|
for book in book_list:
|
||||||
|
extracted = Catalogue.extract_metadata(book_list[book])
|
||||||
|
print(extracted)
|
||||||
Binary file not shown.
BIN
tests/__pycache__/test_sysio.cpython-37.pyc
Executable file → Normal file
BIN
tests/__pycache__/test_sysio.cpython-37.pyc
Executable file → Normal file
Binary file not shown.
@@ -1,14 +1,16 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
import sys
|
||||||
|
sys.path.insert(1, 'lib/')
|
||||||
from library import Catalogue
|
from library import Catalogue
|
||||||
|
|
||||||
|
|
||||||
class Testing(unittest.TestCase):
|
class LibraryTest(unittest.TestCase):
|
||||||
|
|
||||||
def test_libray_catalogue(self):
|
def test_libray_catalogue(self):
|
||||||
self.assertIsNotNone(Catalogue())
|
self.assertIsNotNone(Catalogue())
|
||||||
|
|
||||||
def test_library_catalogue_filter_books(self):
|
def test_library_catalogue_filter_books(self):
|
||||||
self.assertIsNotNone(Catalogue().filter_books(1))
|
self.assertIsNotNone(Catalogue().filter_books())
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
16
tests/test_storage.py
Normal file
16
tests/test_storage.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
import unittest
|
||||||
|
import sys
|
||||||
|
sys.path.insert(1, 'lib/')
|
||||||
|
from storage import Storage
|
||||||
|
|
||||||
|
storage = Storage()
|
||||||
|
|
||||||
|
class StorageTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_Storage_databasee(self):
|
||||||
|
self.assertTrue(storage.database())
|
||||||
|
|
||||||
|
def test_Storage_create_tables(self):
|
||||||
|
self.assertIsNot(storage.create_tables(), Exception)
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
import sys
|
||||||
|
sys.path.insert(1, 'lib/')
|
||||||
from pyShelf import InitFiles
|
from pyShelf import InitFiles
|
||||||
from pyShelf import Epub
|
from pyShelf import Epub
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user