Now acquiring more book details

This commit is contained in:
Raelon Masters
2020-06-13 23:36:03 -04:00
parent 7d39d1534a
commit 76b631a5a4

View File

@@ -115,13 +115,26 @@ class Catalogue:
except IndexError: except IndexError:
# cover = self.extract_cover_html(book_zip, book) # cover = self.extract_cover_html(book_zip, book)
cover = DuckDuckGo().image_result(title) cover = DuckDuckGo().image_result(title)
description = soup.find("dc:description") try: description = self.stripTags(soup.find("dc:description").text)
identifier = soup.find("dc:identifier") except AttributeError: description = None
publisher = soup.find("dc:identifier") try: identifier = self.stripTags(soup.find("dc:identifier").text)
date = soup.find("dc:date") except AttributeError: identifier = None
rights = soup.find("dc:rights") try: publisher = self.stripTags(soup.find("dc:publisher").text)
tags = soup.find_all("dc:subject") except AttributeError: publisher = None
try: date = self.stripTags(soup.find("dc:date").text)
except AttributeError: date = None
try: rights = self.stripTags(soup.find("dc:rights").text)
except AttributeError: rights = None
try: tags = self.stripTags(soup.find_all("dc:subject").text)
except AttributeError: tags = None
ftags = None
breakpoint() breakpoint()
if tags is not None:
for tag in tags:
if ftags is None:
ftags = tag
else:
ftags = ftags+","+tag
book_details = [ book_details = [
title, title,
author, author,
@@ -132,10 +145,15 @@ class Catalogue:
publisher, publisher,
date, date,
rights, rights,
tags ftags
] ]
return book_details return book_details
@staticmethod
def stripTags(source):
p = re.compile(r'<.*?>')
return p.sub('', source)
@staticmethod @staticmethod
def extract_metadata_mobi(book): def extract_metadata_mobi(book):
book = Mobi(book) book = Mobi(book)