From 13fc85f6ffa79b2bffad7aba0fe6b7439174b3e8 Mon Sep 17 00:00:00 2001
From: Raelon Masters <admin@mylt.dev>
Date: Sat, 20 Jun 2020 23:28:20 -0400
Subject: [PATCH] Added sanitization to book title collection. Stripping
 patterns mathching Book 0-9*+ -

---
 src/backend/lib/library.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/backend/lib/library.py b/src/backend/lib/library.py
index 2013e1b..737d09e 100755
--- a/src/backend/lib/library.py
+++ b/src/backend/lib/library.py
@@ -26,6 +26,8 @@ class Catalogue:
         self.opf_regx = re.compile(r"\.opf")
         self.cover_regx = re.compile(r"\.jpg|\.jpeg|\.png|\.bmp|\.gif")
         self.html_regx = re.compile(r"\.html")
+        self.title_sanitization_regx = re.compile(r"^(Book )+[0-9]*")
+        self.title_sanitization_lvl2_regx = re.compile(r"^(Book )+[0-9]*\W+(-)")
         self.root_dir = config.root
         self.book_folder = config.book_path
         self.books = None
@@ -69,6 +71,7 @@ class Catalogue:
         """
 
     def process_by_filetype(self, book):
+        print(str(book), end='\r', flush=True)
         if book.endswith(".epub"):
             epub = self.process_epub(book)
             return self.extract_metadata_epub(epub)
@@ -107,6 +110,12 @@ class Catalogue:
                 title = book["path"].split("/")[-1].rsplit(".", 1)[0]
             else:
                 title = title.contents[0]
+            if re.match(self.title_sanitization_regx, title):
+                breakpoint()
+                if re.match(self.title_sanitization_lvl2_regx, title):
+                    title = re.split(r"-+\W", title)[1]
+                else: title = re.split(self.title_sanitization_regx, title)[2]
+
             author = soup.find("dc:creator")
             if author is not None:
                 author = author.contents[0]