diff --git a/docs/Automated Collections.html b/docs/Automated Collections.html index ff251a6..0d61087 100644 --- a/docs/Automated Collections.html +++ b/docs/Automated Collections.html @@ -16,7 +16,7 @@ The model should look something like this.

Development

@@ -123,6 +123,14 @@ pyShelf takes advantage of the following tools: semver standards.

+ + + +

Please note that I use task, vimwiki, & taskwiki to manage my projects. Due to this Task entries marked via * [ ] task are postfixed with hex color diff --git a/src/backend/lib/library.py b/src/backend/lib/library.py index 245242f..5df7a32 100644 --- a/src/backend/lib/library.py +++ b/src/backend/lib/library.py @@ -22,7 +22,8 @@ class Catalogue: self.cover_regx = re.compile(r"\.jpg|\.jpeg|\.png|\.bmp|\.gif") self.html_regx = re.compile(r"\.html") self.title_sanitization_regx = re.compile(r"^(Book )+[0-9]*") - self.title_sanitization_lvl2_regx = re.compile(r"^(Book )+[0-9]*\W+(-)") + self.title_sanitization_lvl2_regx = re.compile( + r"^(Book )+[0-9]*\W+(-)") self.title_sanitization_dirs_regx = re.compile(r"/") self.root_dir = config.root self.book_folder = config.book_path @@ -33,6 +34,8 @@ class Catalogue: def scan_folder(self, _path=None): """ Scan folder by _path, allows recurisive scanning + + :param _path: Path to scan """ if _path is not None: folder = _path @@ -64,7 +67,6 @@ class Catalogue: except TypeError as error: self.config.logger.error(error) - def process_by_filetype(self, book): """Determine books filetype and process.""" if book.endswith(".epub"): @@ -95,9 +97,10 @@ class Catalogue: def extract_metadata_epub(self, book): """ - Return extracted metadata and cover picture - book['path'] == Full path to ebook file - book['files'] == list of files from self.process_book(book) + Extract metadata from epub file + + :param book: Dictionary of epub file contents + :returns: Dictionary of book metadata """ book_zip = zipfile.ZipFile(book["path"], "r") with book_zip as f: diff --git a/src/backend/lib/storage.py b/src/backend/lib/storage.py index 3699a7b..08aa233 100644 --- a/src/backend/lib/storage.py +++ b/src/backend/lib/storage.py @@ -2,6 +2,7 @@ import re from sqlalchemy import create_engine, select from sqlalchemy.orm import Session +from pathlib import Path from .models import Book, Collection @@ -35,7 +36,8 @@ class Storage: self.password = self.config.password self.db_host = self.config.db_host self.db_port = self.config.db_port - self.engine = create_engine(self.get_connection_string(), pool_pre_ping=True) + self.engine = create_engine(self.get_connection_string(), + pool_pre_ping=True) def get_connection_string(self): """Get connection string. @@ -50,10 +52,10 @@ class Storage: return f"sqlite:////{self.config.root}/pyshelf.sqlite3" elif self.config.db_engine == "psql": return f"postgresql://{self.user}:{self.password}\ - @{self.db_host}:{self.db_port}/{self.sql}" + @{self.db_host}:{self.db_port}/{self.sql}" elif self.config.db_engine == "mysql": return f"mysql://{self.user}:{self.password}\ - @{self.db_host}:{self.db_port}/{self.sql}" + @{self.db_host}:{self.db_port}/{self.sql}" def create_tables(self): """Create table structure.""" @@ -84,19 +86,18 @@ class Storage: cover_image = None if not book[1]: pass - # breakpoint() - self.parse_collections_from_path(book) + collections = self.parse_collections_from_path(book) _book = Book( - title=book[0], - author=book[1], - cover=cover_image, - file_name=book[3], - description=book[4], - identifier=book[5], - publisher=book[6], - rights=book[8], - tags=book[9], - ) + title=book[0], + author=book[1], + cover=cover_image, + file_name=book[3], + description=book[4], + identifier=book[5], + publisher=book[6], + rights=book[8], + tags=book[9], + ) session.add(_book) session.commit() session.close() @@ -104,6 +105,7 @@ class Storage: return True except Exception as e: self.config.logger.error(f"{book[0][0:80]} :: {e}") + return False def book_paths_list(self): """Get file paths from database for comparison to system files. @@ -134,17 +136,12 @@ class Storage: """ collections = [] title_regx = re.compile(r"^[0-9][0-9]*|-|\ \B") - _pathing = book[3].split(self.config.book_path + "/")[1].split("/") - try: - _pathing.pop(0) - _pathing.pop(-1) - except IndexError: - pass - for _p in _pathing: - _s = _p.replace("'", "") - _x = re.sub(title_regx, "", _s) - _s = _x.strip() - collections.append(_s) + book_path: Path = Path(book[3]) + store_path: Path = Path(self.config.book_path) + relative_book_path: Path = book_path.relative_to(store_path) + for path in relative_book_path.parts: + collections.append(re.sub(title_regx, "", path).strip()) + collections.pop(-1) return collections def make_collections(self): @@ -173,11 +170,11 @@ class Storage: _s = _x.strip() _sess = Session(self.engine) _q = _sess.execute( - select(Collection.id).where( - Collection.collection == _s, - Collection.book_id == book.id, - ) - ) + select(Collection.id).where( + Collection.collection == _s, + Collection.book_id == book.id, + ) + ) _sess.close() if _q.fetchone() is None: _collection = Collection(collection=_s, book_id=book.id) @@ -207,12 +204,12 @@ class Storage: session = Session(self.engine) if collection: _result = session.execute( - select(Book) - .join(Collection) - .where(Collection.id == collection) - .offset(skip) - .limit(limit) - ).all() + select(Book) + .join(Collection) + .where(Collection.id == collection) + .offset(skip) + .limit(limit) + ).all() else: _result = session.execute(select(Book).offset(skip).limit(limit)).all() session.close() diff --git a/src/backend/pyShelf_MakeCollections.py b/src/backend/pyShelf_MakeCollections.py index 18e69ca..b778bac 100755 --- a/src/backend/pyShelf_MakeCollections.py +++ b/src/backend/pyShelf_MakeCollections.py @@ -4,8 +4,8 @@ import sys import time from .lib.config import Config -from .lib.library import Catalogue -from .lib.pyShelf import InitFiles +# from .lib.library import Catalogue +# from .lib.pyShelf import InitFiles from .lib.storage import Storage sys.path.append(os.path.abspath(".")) diff --git a/src/backend/pyShelf_ScanLibrary.py b/src/backend/pyShelf_ScanLibrary.py index c216eee..db25949 100644 --- a/src/backend/pyShelf_ScanLibrary.py +++ b/src/backend/pyShelf_ScanLibrary.py @@ -13,8 +13,10 @@ sys.path.append(os.path.abspath(".")) def execute_scan(root, **kwargs): """ Main scan execution + :param root: Project root. Required to properly execute program. Sends to configuration. """ + # TODO: Refactor for new collections system. _t1 = time.time() try: config = kwargs["config"] diff --git a/src/frontend/lib/FastAPIServer.py b/src/frontend/lib/FastAPIServer.py index 0ea1ead..339cbca 100644 --- a/src/frontend/lib/FastAPIServer.py +++ b/src/frontend/lib/FastAPIServer.py @@ -79,10 +79,6 @@ def books_tojson(obj) -> dumps: "identifier": convert_none(book[0].identifier), "publisher": convert_none(book[0].publisher), }) - # compressed = gzip.compress(dumps(_books).encode("utf-8")) - # compressed = gzip.compress(dumps(_books).encode()) - # breakpoint() - # return dumps(_books) return _books diff --git a/wiki/Automated Collections.wiki b/wiki/Automated Collections.wiki index 4b7b72c..72a279e 100644 --- a/wiki/Automated Collections.wiki +++ b/wiki/Automated Collections.wiki @@ -1,7 +1,7 @@ = Automated Collection Management System = The collection management system needs rewritten from the ground up. The model should look something like this. -* [ ] TODO :: Refactor collections algorithm. #8f46262c +* [ ] TODO :: Refactor collections algorithm. #aebaee91 {{{python Collection { id: int @@ -19,3 +19,6 @@ it is likely the book object will need rewritten as well. collection: list(Collection.id) } }}} + +{{file://$HOME/.local/builds/pyShelf/src/backend/lib/storage.py}} +{{file://$HOME/.local/builds/pyShelf/src/backend/pyShelf_ScanLibrary.py}} diff --git a/wiki/Program Flow.wiki b/wiki/Program Flow.wiki new file mode 100644 index 0000000..6d596df --- /dev/null +++ b/wiki/Program Flow.wiki @@ -0,0 +1,20 @@ += PyShelf's Program Flow Chart = + +== Entry Point == +{{{bash + > ./pyShelf.py +}}} +=== pyShelf.py === +- from src.backend.pyShelf_ScanLibrary import execute_scan +- from src.backend.pyShelf_MakeCollections import MakeCollections +- config = Config() +- PRG_PATH +- Storage(config).create_tables() +- Thread(run_import) +- FastAPIServer(config) + +==== run_import() ==== +:bookimport: +- execute_scan(PRG_PATH, config) *Book DB Insertion Point* +- MakeCollections(PRG_PATH, config) *Collection DB Insertion Point* +* [ ] TODO Move collections insertion to run in tandem with book insertion as this is the last time we have all of the information in one place. #a34ad104 diff --git a/wiki/index.wiki b/wiki/index.wiki index b7ad694..2947991 100644 --- a/wiki/index.wiki +++ b/wiki/index.wiki @@ -38,7 +38,7 @@ pyShelf supports the following formats: == TODO == * [ ] [[Book Scraping System]] #f7edafb1 * [ ] [[REST API]] #c7bc51c5 - * [ ] [[Client]] #c68b0664 + * [ ] [[Client]] #21cbc756 == Development == @@ -48,6 +48,10 @@ pyShelf takes advantage of the following tools: [[https://vimwiki.github.io/|VimWiki]] we also follow [[https://semver.org|semver]] standards. + + - [[Program Flow]] + + Please note that I use task, vimwiki, & taskwiki to manage my projects. Due to this Task entries marked via `* [ ] task` are postfixed with hex color data. Anyone who knows how to fix that I'm all ears :).