pyShelf/app/lib/library.py

#!/usr/bin/python
import json
import os
import re
import zipfile

from bs4 import BeautifulSoup
from PIL import Image

from .api_hooks import DuckDuckGo
from .config import Config
from .storage import Storage

# config = Config()


class Catalogue:
    """Decodes and stores book information"""

    """Step One: filter_books"""

    def __init__(self, root):
        self.file_list = []
        self.opf_regx = re.compile(r"\.opf")
        self.cover_regx = re.compile(r"\.jpg|\.jpeg|\.png|\.bmp|\.gif")
        self.html_regx = re.compile(r"\.html")
        self.root_dir = root
        _config = Config(root)
        self.book_folder = _config.book_path
        self.book_shelf = _config.book_shelf
        self._book_list_expanded = None
        self.books = None

    def scan_folder(self, _path=None):
        if _path is not None:
            folder = _path
        elif os.path.isdir(self.root_dir + "/" + self.book_folder):
            folder = self.root_dir + "/" + self.book_folder
        else:
            folder = self.book_folder
        for f in os.listdir(folder):
            _path = os.path.abspath(folder + "/" + f)
            _is_dir = os.path.isdir(_path.strip() + "/")
            if _is_dir:
                self.file_list.append(self.scan_folder(_path))
            self.file_list.append(_path)

    def filter_books(self):
        """
            Scan book folder recursively for epub files
            filter_books(0) -> Catalogue.books
            filter_books(1) -> self.books[]
            :param ret: 0 -> create class property -> dump json
            :param ret: 1 -> create & return class property
        """
        self.scan_folder()
        regx = re.compile(r"\.epub")
        try:
            self.books = list(filter(regx.search, filter(None, self.file_list)))
        except TypeError as e:
            print(e)
        self._book_list_expanded = {}
        with open(self.book_shelf, "w") as f:
            for book in self.books:
                self._book_list_expanded[book] = self.process_book(book)
            json.dump(self._book_list_expanded, f)
        return self._book_list_expanded

    @staticmethod
    def process_book(book):
        """Return dictionary of epub file contents"""
        book = zipfile.ZipFile(book, "r")
        details = {}
        with book as book_zip:
            details["files"] = []
            details["path"] = book.filename
            expanded = book_zip.infolist()
            regx = re.compile(r"\.opf|cover")
            for i in expanded:
                match = re.search(regx, i.filename)
                if match:
                    # Returns zip file location of requested files
                    details["files"].append(match.string)
        return details

    def extract_metadata(self, book):
        """
        Return extracted metadata and cover picture
        book['path'] == Full path to ebook file
        book['files'] == list of files from self.process_book(book)
        """
        book_zip = zipfile.ZipFile(book["path"], "r")
        with book_zip as f:
            content = self.extract_content(book_zip, book)
            soup = BeautifulSoup(content, "lxml")
            title = soup.find("dc:title")
            if title is None:
                title = book["path"].split("/")[-1].rsplit(".", 1)[0]
            else:
                title = title.contents[0]
            author = soup.find("dc:creator")
            if author is not None:
                author = author.contents[0]
            try:
                cover = self.extract_cover_image(book_zip, book)
            except IndexError:
                # cover = self.extract_cover_html(book_zip, book)
                cover = DuckDuckGo().image_result(title)
            book_details = [title, author, cover, book["path"]]
        return book_details

    def extract_content(self, book_zip, book):
        content = book_zip.open(list(filter(self.opf_regx.search, book["files"]))[0])
        return content

    def extract_cover_html(self, book_zip, book):
        cover = book_zip.open(list(filter(self.html_regx.search, book["files"]))[0])
        return cover

    def extract_cover_image(self, book_zip, book):
        cover = book_zip.open(list(filter(self.cover_regx.search, book["files"]))[0])
        try:
            cover = book_zip.read(cover.name)
            return cover
        except KeyError:
            return False

    def compare_shelf_current(self):
        db = Storage()
        stored = db.book_paths_list()
        closed = db.close()
        if self.books is None:
            self.filter_books()
        on_disk, in_storage = [], []
        for _x in self.books:
            on_disk.append(_x)
        for _y in stored:
            in_storage.append(_y[0])
        a, b, = set(on_disk), set(in_storage)
        c = set.difference(a, b)
        return c

    def import_books(self, list=None):
        book_list = self.compare_shelf_current()
        db = Storage()
        for book in book_list:
            book = self.process_book(book)
            extracted = self.extract_metadata(book)
            db.insert_book(extracted)
        inserted = db.commit()
        if inserted is not True:
            print(inserted)
            if input("Continue ? y/n") == "y":
                pass
        db.close()