Working towards the overhaul of the collections system.

This commit is contained in:
th3r00t
2023-03-20 20:42:20 -04:00
parent 3e89273cbc
commit 4ec85dfd82
11 changed files with 149 additions and 52 deletions

View File

@@ -16,7 +16,7 @@ The model should look something like this.
</p>
<ul>
<li class="done0">
<span class="todo">TODO</span> :: Refactor collections algorithm. #8f46262c
<span class="todo">TODO</span> :: Refactor collections algorithm. #aebaee91
<pre python>
Collection {
id: int
@@ -38,5 +38,10 @@ it is likely the book object will need rewritten as well.
}
</pre>
<p>
<img src="/home/th3r00t/.local/builds/pyShelf/src/backend/lib/storage.py" />
<img src="/home/th3r00t/.local/builds/pyShelf/src/backend/pyShelf_ScanLibrary.py" />
</p>
</body>
</html>

59
docs/Program Flow.html Normal file
View File

@@ -0,0 +1,59 @@
<!DOCTYPE html>
<html>
<head>
<link rel="Stylesheet" type="text/css" href="style.css">
<link rel="alternate" type="application/rss+xml" title="RSS" href="rss.xml">
<title>Program Flow</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<div id="PyShelf's Program Flow Chart"><h1 id="PyShelf's Program Flow Chart" class="header"><a href="#PyShelf's Program Flow Chart">PyShelf's Program Flow Chart</a></h1></div>
<div id="PyShelf's Program Flow Chart-Entry Point"><h2 id="Entry Point" class="header"><a href="#PyShelf's Program Flow Chart-Entry Point">Entry Point</a></h2></div>
<pre bash>
&gt; ./pyShelf.py
</pre>
<div id="PyShelf's Program Flow Chart-Entry Point-pyShelf.py"><h3 id="pyShelf.py" class="header"><a href="#PyShelf's Program Flow Chart-Entry Point-pyShelf.py">pyShelf.py</a></h3></div>
<ul>
<li>
from src.backend.pyShelf_ScanLibrary import execute_scan
<li>
from src.backend.pyShelf_MakeCollections import MakeCollections
<li>
config = Config()
<li>
PRG_PATH
<li>
Storage(config).create_tables()
<li>
Thread(run_import)
<li>
FastAPIServer(config)
</ul>
<div id="PyShelf's Program Flow Chart-Entry Point-pyShelf.py-run_import()"><h4 id="run_import()" class="header"><a href="#PyShelf's Program Flow Chart-Entry Point-pyShelf.py-run_import()">run_import()</a></h4></div>
<p>
<span id="PyShelf's Program Flow Chart-Entry Point-pyShelf.py-run_import()-bookimport"></span><span class="tag" id="bookimport">bookimport</span>
</p>
<ul>
<li>
execute_scan(PRG_PATH, config) <span id="PyShelf's Program Flow Chart-Entry Point-pyShelf.py-run_import()-Book DB Insertion Point"></span><strong id="Book DB Insertion Point">Book DB Insertion Point</strong>
<li>
MakeCollections(PRG_PATH, config) <span id="PyShelf's Program Flow Chart-Entry Point-pyShelf.py-run_import()-Collection DB Insertion Point"></span><strong id="Collection DB Insertion Point">Collection DB Insertion Point</strong>
<li class="done0">
<span class="todo">TODO</span> Move collections insertion to run in tandem with book insertion as this is the last time we have all of the information in one place. #a34ad104
</ul>
</body>
</html>

View File

@@ -108,7 +108,7 @@ Filesystem-io
<a href="REST API.html">REST API</a> #c7bc51c5
<li class="done0">
<a href="Client.html">Client</a> #c68b0664
<a href="Client.html">Client</a> #21cbc756
</ul>
<div id="pyShelf | project:pyshelf-Development"><h2 id="Development" class="header"><a href="#pyShelf | project:pyshelf-Development">Development</a></h2></div>
@@ -123,6 +123,14 @@ pyShelf takes advantage of the following tools:
<a href="https://semver.org">semver</a> standards.
</p>
<ul>
<li>
<a href="Program Flow.html">Program Flow</a>
</ul>
<p>
Please note that I use task, vimwiki, &amp; taskwiki to manage my projects.
Due to this Task entries marked via <code>* [ ] task</code> are postfixed with hex color

View File

@@ -22,7 +22,8 @@ class Catalogue:
self.cover_regx = re.compile(r"\.jpg|\.jpeg|\.png|\.bmp|\.gif")
self.html_regx = re.compile(r"\.html")
self.title_sanitization_regx = re.compile(r"^(Book )+[0-9]*")
self.title_sanitization_lvl2_regx = re.compile(r"^(Book )+[0-9]*\W+(-)")
self.title_sanitization_lvl2_regx = re.compile(
r"^(Book )+[0-9]*\W+(-)")
self.title_sanitization_dirs_regx = re.compile(r"/")
self.root_dir = config.root
self.book_folder = config.book_path
@@ -33,6 +34,8 @@ class Catalogue:
def scan_folder(self, _path=None):
"""
Scan folder by _path, allows recurisive scanning
:param _path: Path to scan
"""
if _path is not None:
folder = _path
@@ -64,7 +67,6 @@ class Catalogue:
except TypeError as error:
self.config.logger.error(error)
def process_by_filetype(self, book):
"""Determine books filetype and process."""
if book.endswith(".epub"):
@@ -95,9 +97,10 @@ class Catalogue:
def extract_metadata_epub(self, book):
"""
Return extracted metadata and cover picture
book['path'] == Full path to ebook file
book['files'] == list of files from self.process_book(book)
Extract metadata from epub file
:param book: Dictionary of epub file contents
:returns: Dictionary of book metadata
"""
book_zip = zipfile.ZipFile(book["path"], "r")
with book_zip as f:

View File

@@ -2,6 +2,7 @@
import re
from sqlalchemy import create_engine, select
from sqlalchemy.orm import Session
from pathlib import Path
from .models import Book, Collection
@@ -35,7 +36,8 @@ class Storage:
self.password = self.config.password
self.db_host = self.config.db_host
self.db_port = self.config.db_port
self.engine = create_engine(self.get_connection_string(), pool_pre_ping=True)
self.engine = create_engine(self.get_connection_string(),
pool_pre_ping=True)
def get_connection_string(self):
"""Get connection string.
@@ -50,10 +52,10 @@ class Storage:
return f"sqlite:////{self.config.root}/pyshelf.sqlite3"
elif self.config.db_engine == "psql":
return f"postgresql://{self.user}:{self.password}\
@{self.db_host}:{self.db_port}/{self.sql}"
@{self.db_host}:{self.db_port}/{self.sql}"
elif self.config.db_engine == "mysql":
return f"mysql://{self.user}:{self.password}\
@{self.db_host}:{self.db_port}/{self.sql}"
@{self.db_host}:{self.db_port}/{self.sql}"
def create_tables(self):
"""Create table structure."""
@@ -84,19 +86,18 @@ class Storage:
cover_image = None
if not book[1]:
pass
# breakpoint()
self.parse_collections_from_path(book)
collections = self.parse_collections_from_path(book)
_book = Book(
title=book[0],
author=book[1],
cover=cover_image,
file_name=book[3],
description=book[4],
identifier=book[5],
publisher=book[6],
rights=book[8],
tags=book[9],
)
title=book[0],
author=book[1],
cover=cover_image,
file_name=book[3],
description=book[4],
identifier=book[5],
publisher=book[6],
rights=book[8],
tags=book[9],
)
session.add(_book)
session.commit()
session.close()
@@ -104,6 +105,7 @@ class Storage:
return True
except Exception as e:
self.config.logger.error(f"{book[0][0:80]} :: {e}")
return False
def book_paths_list(self):
"""Get file paths from database for comparison to system files.
@@ -134,17 +136,12 @@ class Storage:
"""
collections = []
title_regx = re.compile(r"^[0-9][0-9]*|-|\ \B")
_pathing = book[3].split(self.config.book_path + "/")[1].split("/")
try:
_pathing.pop(0)
_pathing.pop(-1)
except IndexError:
pass
for _p in _pathing:
_s = _p.replace("'", "")
_x = re.sub(title_regx, "", _s)
_s = _x.strip()
collections.append(_s)
book_path: Path = Path(book[3])
store_path: Path = Path(self.config.book_path)
relative_book_path: Path = book_path.relative_to(store_path)
for path in relative_book_path.parts:
collections.append(re.sub(title_regx, "", path).strip())
collections.pop(-1)
return collections
def make_collections(self):
@@ -173,11 +170,11 @@ class Storage:
_s = _x.strip()
_sess = Session(self.engine)
_q = _sess.execute(
select(Collection.id).where(
Collection.collection == _s,
Collection.book_id == book.id,
)
)
select(Collection.id).where(
Collection.collection == _s,
Collection.book_id == book.id,
)
)
_sess.close()
if _q.fetchone() is None:
_collection = Collection(collection=_s, book_id=book.id)
@@ -207,12 +204,12 @@ class Storage:
session = Session(self.engine)
if collection:
_result = session.execute(
select(Book)
.join(Collection)
.where(Collection.id == collection)
.offset(skip)
.limit(limit)
).all()
select(Book)
.join(Collection)
.where(Collection.id == collection)
.offset(skip)
.limit(limit)
).all()
else:
_result = session.execute(select(Book).offset(skip).limit(limit)).all()
session.close()

View File

@@ -4,8 +4,8 @@ import sys
import time
from .lib.config import Config
from .lib.library import Catalogue
from .lib.pyShelf import InitFiles
# from .lib.library import Catalogue
# from .lib.pyShelf import InitFiles
from .lib.storage import Storage
sys.path.append(os.path.abspath("."))

View File

@@ -13,8 +13,10 @@ sys.path.append(os.path.abspath("."))
def execute_scan(root, **kwargs):
"""
Main scan execution
:param root: Project root. Required to properly execute program. Sends to configuration.
"""
# TODO: Refactor for new collections system.
_t1 = time.time()
try:
config = kwargs["config"]

View File

@@ -79,10 +79,6 @@ def books_tojson(obj) -> dumps:
"identifier": convert_none(book[0].identifier),
"publisher": convert_none(book[0].publisher),
})
# compressed = gzip.compress(dumps(_books).encode("utf-8"))
# compressed = gzip.compress(dumps(_books).encode())
# breakpoint()
# return dumps(_books)
return _books

View File

@@ -1,7 +1,7 @@
= Automated Collection Management System =
The collection management system needs rewritten from the ground up.
The model should look something like this.
* [ ] TODO :: Refactor collections algorithm. #8f46262c
* [ ] TODO :: Refactor collections algorithm. #aebaee91
{{{python
Collection {
id: int
@@ -19,3 +19,6 @@ it is likely the book object will need rewritten as well.
collection: list(Collection.id)
}
}}}
{{file://$HOME/.local/builds/pyShelf/src/backend/lib/storage.py}}
{{file://$HOME/.local/builds/pyShelf/src/backend/pyShelf_ScanLibrary.py}}

20
wiki/Program Flow.wiki vendored Normal file
View File

@@ -0,0 +1,20 @@
= PyShelf's Program Flow Chart =
== Entry Point ==
{{{bash
> ./pyShelf.py
}}}
=== pyShelf.py ===
- from src.backend.pyShelf_ScanLibrary import execute_scan
- from src.backend.pyShelf_MakeCollections import MakeCollections
- config = Config()
- PRG_PATH
- Storage(config).create_tables()
- Thread(run_import)
- FastAPIServer(config)
==== run_import() ====
:bookimport:
- execute_scan(PRG_PATH, config) *Book DB Insertion Point*
- MakeCollections(PRG_PATH, config) *Collection DB Insertion Point*
* [ ] TODO Move collections insertion to run in tandem with book insertion as this is the last time we have all of the information in one place. #a34ad104

6
wiki/index.wiki vendored
View File

@@ -38,7 +38,7 @@ pyShelf supports the following formats:
== TODO ==
* [ ] [[Book Scraping System]] #f7edafb1
* [ ] [[REST API]] #c7bc51c5
* [ ] [[Client]] #c68b0664
* [ ] [[Client]] #21cbc756
== Development ==
@@ -48,6 +48,10 @@ pyShelf takes advantage of the following tools:
[[https://vimwiki.github.io/|VimWiki]] we also follow
[[https://semver.org|semver]] standards.
- [[Program Flow]]
Please note that I use task, vimwiki, & taskwiki to manage my projects.
Due to this Task entries marked via `* [ ] task` are postfixed with hex color
data. Anyone who knows how to fix that I'm all ears :).