Began extracting data from epub files

This commit is contained in:
Mike
2019-10-01 17:30:36 -04:00
parent 81fe90bce5
commit 8e90fc851c
18 changed files with 210 additions and 150 deletions

0
lib/__init__.py Normal file
View File

96
lib/library.py Executable file
View File

@@ -0,0 +1,96 @@
#!/usr/bin/python
import json
import os
import re
import zipfile
from bs4 import BeautifulSoup
from config import Config
config = Config()
class Catalogue:
"""Decodes and stores book information"""
"""Step One: filter_books"""
def __init__(self):
self.file_list = []
with open(config.book_shelf, 'r') as f:
try:
self.catalogue = json.load(f)
self.current_files = self.scan_folder()
except Exception:
self.filter_books()
def scan_folder(self, folder=config.book_path):
for f in os.listdir(folder):
_path = os.path.abspath(folder+'/'+f)
#_path = os.path.abspath('.')+'/'+folder+f+'/'
_is_dir = os.path.isdir(_path.strip()+'/')
if _is_dir:
self.file_list.append(self.scan_folder(_path))
self.file_list.append(_path)
def scan_book(self, book):
"""REMOVE ME?"""
_epub = zipfile.ZipFile(book)
with _epub as _epub_open:
try: _epub_open.open('content.opf'); return True
except Exception as e: print(e); return False
def filter_books(self):
"""
Scan book folder recursively for epub files
filter_books(0) -> Catalogue.books
filter_books(1) -> self.books[]
:param ret: 0 -> create class property -> dump json
:param ret: 1 -> create & return class property
"""
self.scan_folder()
regx = re.compile(r"\.epub")
self.books = list(filter(regx.search, filter(None, self.file_list)))
_book_list_expanded = {}
with open(config.book_shelf, 'w') as f:
for book in self.books:
_book_list_expanded[book] = self.process_book(book)
json.dump(_book_list_expanded, f)
return _book_list_expanded
def process_book(self, book):
"""Return dictionary of epub file contents"""
f_name = 'content.opf'
book = zipfile.ZipFile(book, 'r')
details = {}
with book as book_zip:
details['files'] = []
details['path'] = book.filename
expanded = book_zip.infolist()
regx = re.compile(r'\.opf|cover')
for i in expanded:
match = re.search(regx, i.filename)
if match:
# Returns zip file location of requested files
details['files'].append(match.string)
return details
def extract_metadata(self, book):
"""
Return extracted metadata and cover picture
book['path'] == Full path to ebook file
book['files'] == list of files from self.process_book(book)
"""
book_zip = zipfile.ZipFile(book['path'], 'r')
opf_regx, cover_regx = re.compile(r'\.opf'), re.compile(r'\.jpg|\.png|\.bmp|\.gif')
with book_zip as f:
content = list(filter(opf_regx.search, book['files']))
content = book_zip.open(content[0])
soup = BeautifulSoup(content, "xml")
title = soup.find("dc:title")
author = soup.find("dc:creator")
cover = soup.find("meta", attrs={"name" : "cover"})
return title
def compare_shelf_current(self):
try:
self.books
except Exception:
self.filter_books()
unique = set(self.books) - set(self.catalogue)
return unique

33
lib/pyShelf.py Executable file
View File

@@ -0,0 +1,33 @@
#!/usr/bin/python
import os
import zipfile
from config import Config
config = Config()
class InitFiles:
"""First run file creation operations"""
def __init__(self, file_array):
print("Begining creation of file structure")
for _pointer in file_array:
if not os.path.isfile(_pointer):
self.CreateFile(_pointer)
def CreateFile(self, _pointer):
"""Create the file"""
if not os.path.isdir(os.path.split(_pointer)[0]):
os.mkdir(os.path.split(_pointer)[0])
f = open(_pointer, "w+")
f.close()
class Epub:
"""All Epub file handling"""
def __init__(self):
global config
self.book_path = config.book_path
def import_book(self):
pass
def book_list(self):
pass

48
lib/storage.py Normal file
View File

@@ -0,0 +1,48 @@
#!/usr/bin/python
import sys
import sqlite3
sys.path.insert(1,'../')
from config import Config
db_pointer = Config().catalogue_db
class Storage:
"""Contains all methods for system storage"""
def __init__(self):
self.db_file = db_pointer
self.database()
def database(self):
"""Create database cursor"""
try:
self.db = sqlite3.connect(self.db_file)
self.cursor = self.db.cursor()
return True
except Exception as e:
return False
def create_tables(self):
"""Create table structure"""
q_check = "SELECT * FROM books"
q_create = '''CREATE TABLE books(title text, author text,
categories text, cover blob, pages int, progress int,
file_name text)'''
try:
self.cursor.execute(q_check)
except Exception as e:
self.cursor.execute(q_create)
def insert_book(self, book):
"""
Insert book in database
:returns: True if succeeds False if not
"""
q = '''INSERT INTO books (title, author, categories, cover,
pages, progress, file_name) values (%s, %s, %s, %s, 0, %s)''' % ()
try:
self.cursor.execute(q)
return True
except Exception as e:
print(e)
return False