mirror of
https://github.com/th3r00t/pyShelf.git
synced 2026-04-28 01:59:35 -04:00
Refactored for better packaging
This commit is contained in:
154
src/backend/lib/library.py
Executable file
154
src/backend/lib/library.py
Executable file
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/python
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import zipfile
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from PIL import Image
|
||||
|
||||
from .api_hooks import DuckDuckGo
|
||||
from .config import Config
|
||||
from .storage import Storage
|
||||
|
||||
# config = Config()
|
||||
|
||||
|
||||
class Catalogue:
|
||||
"""Decodes and stores book information"""
|
||||
|
||||
"""Step One: filter_books"""
|
||||
|
||||
def __init__(self, config):
|
||||
self.file_list = []
|
||||
self.opf_regx = re.compile(r"\.opf")
|
||||
self.cover_regx = re.compile(r"\.jpg|\.jpeg|\.png|\.bmp|\.gif")
|
||||
self.html_regx = re.compile(r"\.html")
|
||||
self.root_dir = config.root
|
||||
self.book_folder = config.book_path
|
||||
self.book_shelf = config.book_shelf
|
||||
self._book_list_expanded = None
|
||||
self.books = None
|
||||
|
||||
def scan_folder(self, _path=None):
|
||||
if _path is not None:
|
||||
folder = _path
|
||||
elif os.path.isdir(self.root_dir + "/" + self.book_folder):
|
||||
folder = self.root_dir + "/" + self.book_folder
|
||||
else:
|
||||
folder = self.book_folder
|
||||
for f in os.listdir(folder):
|
||||
_path = os.path.abspath(folder + "/" + f)
|
||||
_is_dir = os.path.isdir(_path.strip() + "/")
|
||||
if _is_dir:
|
||||
self.file_list.append(self.scan_folder(_path))
|
||||
self.file_list.append(_path)
|
||||
|
||||
def filter_books(self):
|
||||
"""
|
||||
Scan book folder recursively for epub files
|
||||
filter_books(0) -> Catalogue.books
|
||||
filter_books(1) -> self.books[]
|
||||
:param ret: 0 -> create class property -> dump json
|
||||
:param ret: 1 -> create & return class property
|
||||
"""
|
||||
self.scan_folder()
|
||||
regx = re.compile(r"\.epub")
|
||||
try:
|
||||
self.books = list(filter(regx.search, filter(None, self.file_list)))
|
||||
except TypeError as e:
|
||||
print(e)
|
||||
self._book_list_expanded = {}
|
||||
with open(self.book_shelf, "w") as f:
|
||||
for book in self.books:
|
||||
self._book_list_expanded[book] = self.process_book(book)
|
||||
json.dump(self._book_list_expanded, f)
|
||||
return self._book_list_expanded
|
||||
|
||||
@staticmethod
|
||||
def process_book(book):
|
||||
"""Return dictionary of epub file contents"""
|
||||
book = zipfile.ZipFile(book, "r")
|
||||
details = {}
|
||||
with book as book_zip:
|
||||
details["files"] = []
|
||||
details["path"] = book.filename
|
||||
expanded = book_zip.infolist()
|
||||
regx = re.compile(r"\.opf|cover")
|
||||
for i in expanded:
|
||||
match = re.search(regx, i.filename)
|
||||
if match:
|
||||
# Returns zip file location of requested files
|
||||
details["files"].append(match.string)
|
||||
return details
|
||||
|
||||
def extract_metadata(self, book):
|
||||
"""
|
||||
Return extracted metadata and cover picture
|
||||
book['path'] == Full path to ebook file
|
||||
book['files'] == list of files from self.process_book(book)
|
||||
"""
|
||||
book_zip = zipfile.ZipFile(book["path"], "r")
|
||||
with book_zip as f:
|
||||
content = self.extract_content(book_zip, book)
|
||||
soup = BeautifulSoup(content, "lxml")
|
||||
title = soup.find("dc:title")
|
||||
if title is None:
|
||||
title = book["path"].split("/")[-1].rsplit(".", 1)[0]
|
||||
else:
|
||||
title = title.contents[0]
|
||||
author = soup.find("dc:creator")
|
||||
if author is not None:
|
||||
author = author.contents[0]
|
||||
try:
|
||||
cover = self.extract_cover_image(book_zip, book)
|
||||
except IndexError:
|
||||
# cover = self.extract_cover_html(book_zip, book)
|
||||
cover = DuckDuckGo().image_result(title)
|
||||
book_details = [title, author, cover, book["path"]]
|
||||
return book_details
|
||||
|
||||
def extract_content(self, book_zip, book):
|
||||
content = book_zip.open(list(filter(self.opf_regx.search, book["files"]))[0])
|
||||
return content
|
||||
|
||||
def extract_cover_html(self, book_zip, book):
|
||||
cover = book_zip.open(list(filter(self.html_regx.search, book["files"]))[0])
|
||||
return cover
|
||||
|
||||
def extract_cover_image(self, book_zip, book):
|
||||
cover = book_zip.open(list(filter(self.cover_regx.search, book["files"]))[0])
|
||||
try:
|
||||
cover = book_zip.read(cover.name)
|
||||
return cover
|
||||
except KeyError:
|
||||
return False
|
||||
|
||||
def compare_shelf_current(self):
|
||||
db = Storage()
|
||||
stored = db.book_paths_list()
|
||||
closed = db.close()
|
||||
if self.books is None:
|
||||
self.filter_books()
|
||||
on_disk, in_storage = [], []
|
||||
for _x in self.books:
|
||||
on_disk.append(_x)
|
||||
for _y in stored:
|
||||
in_storage.append(_y[0])
|
||||
a, b, = set(on_disk), set(in_storage)
|
||||
c = set.difference(a, b)
|
||||
return c
|
||||
|
||||
def import_books(self, list=None):
|
||||
book_list = self.compare_shelf_current()
|
||||
db = Storage()
|
||||
for book in book_list:
|
||||
book = self.process_book(book)
|
||||
extracted = self.extract_metadata(book)
|
||||
db.insert_book(extracted)
|
||||
inserted = db.commit()
|
||||
if inserted is not True:
|
||||
print(inserted)
|
||||
if input("Continue ? y/n") == "y":
|
||||
pass
|
||||
db.close()
|
||||
Reference in New Issue
Block a user