From 81fe90bce5767d9b2e15420f76a85e61166b712b Mon Sep 17 00:00:00 2001 From: Mike Date: Fri, 27 Sep 2019 16:03:37 -0400 Subject: [PATCH] Finalized filter_books. dumps to shelf.json --- .../scratchpad-checkpoint.ipynb | 96 +++--------- .vscode/settings.json | 3 +- __pycache__/library.cpython-37.pyc | Bin 1664 -> 2642 bytes data/shelf.json | 145 +++++++++++++++++- library.py | 23 ++- main.py | 10 +- scratchpad.ipynb | 30 ++-- scratchpad.py | 29 ++++ 8 files changed, 233 insertions(+), 103 deletions(-) create mode 100644 scratchpad.py diff --git a/.ipynb_checkpoints/scratchpad-checkpoint.ipynb b/.ipynb_checkpoints/scratchpad-checkpoint.ipynb index edcdb4f..b6061a6 100644 --- a/.ipynb_checkpoints/scratchpad-checkpoint.ipynb +++ b/.ipynb_checkpoints/scratchpad-checkpoint.ipynb @@ -2,91 +2,39 @@ "cells": [ { "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "from library import Catalogue\n", - "import zipfile\n", - "\n", - "Catalogue = Catalogue()\n", - "book_list = Catalogue.filter_books(1)\n", - "unpacked = []" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "ename": "SyntaxError", - "evalue": "invalid syntax (, line 4)", - "output_type": "error", - "traceback": [ - "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m4\u001b[0m\n\u001b[0;31m book.\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" - ] - } - ], - "source": [ - "for book in book_list:\n", - " book = zipfile.ZipFile(book, 'r')\n", - " with book as bookzip:\n", - " book." - ] - }, - { - "cell_type": "code", - "execution_count": 23, + "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "\n", "\n" ] - }, - { - "ename": "KeyError", - "evalue": "\"There is no item named 'content.opf' in the archive\"", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0munpacked\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mbookzip\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdetails\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbookzip\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'content.opf'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdetails\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/lib64/python3.7/zipfile.py\u001b[0m in \u001b[0;36mopen\u001b[0;34m(self, name, mode, pwd, force_zip64)\u001b[0m\n\u001b[1;32m 1468\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1469\u001b[0m \u001b[0;31m# Get info object for name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1470\u001b[0;31m \u001b[0mzinfo\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetinfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1471\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1472\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/lib64/python3.7/zipfile.py\u001b[0m in \u001b[0;36mgetinfo\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1396\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minfo\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1397\u001b[0m raise KeyError(\n\u001b[0;32m-> 1398\u001b[0;31m 'There is no item named %r in the archive' % name)\n\u001b[0m\u001b[1;32m 1399\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1400\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0minfo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyError\u001b[0m: \"There is no item named 'content.opf' in the archive\"" - ] } ], "source": [ - "for i in unpacked:\n", - " with i as bookzip:\n", - " details = bookzip.open('content.opf')\n", - " print(details)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "'ZipFile' object is not subscriptable", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mbookzip\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m: 'ZipFile' object is not subscriptable" - ] - } - ], - "source": [ - "bookzip" + "from library import Catalogue\n", + "import zipfile\n", + "import pprint as pp\n", + "import re\n", + "\n", + "Catalogue = Catalogue()\n", + "book_list = Catalogue.filter_books(1)\n", + "unpacked = []\n", + "\n", + "for book in book_list:\n", + " book = zipfile.ZipFile(book, 'r')\n", + " with book as bookzip:\n", + " try:\n", + " content_opf = bookzip.open('content.opf')\n", + " print(content_opf)\n", + " except KeyError as e:\n", + " expanded = bookzip.infolist()\n", + " regx1 = re.compile(r'\\.opf|^cover')\n", + " for i in expanded:\n", + " if re.search(regx1, str(i)) == True: pp.pprint(i.filename); print(res)" ] }, { diff --git a/.vscode/settings.json b/.vscode/settings.json index 66bfd50..ec1cf14 100755 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,5 +8,6 @@ ], "python.testing.pytestEnabled": false, "python.testing.nosetestsEnabled": false, - "python.testing.unittestEnabled": true + "python.testing.unittestEnabled": true, + "python.pythonPath": "/home/raelon/.virtualenvs/spyder/bin/python" } \ No newline at end of file diff --git a/__pycache__/library.cpython-37.pyc b/__pycache__/library.cpython-37.pyc index e712bf98823de73ce931c6c9ebf547104f0de663..a2d2151e63efae2532dbbcfc82f778073acba5d2 100644 GIT binary patch delta 1439 zcmZ`(&u`pB6rQnX{A+(CP1-6z8@3fqYoN;(2~kC*LZq}kpokKr1k-47cD$*RU9UTK zAYF}Fq(mHvBHuXe{sA2L1CWroa^mQ}0?u4`&r6q3gst~x=gs$Kp1*J2epCDHeEox3 z%^~po6n?k;yM0<;qW4a(FJGfQ-O~jTT95U)$0-T;5%JgqB6PuyiC{;XXPoZ5Ngtk` ztXwyQc}zS@%nMsM$JDb$MN}c;h?=OwSP^p@#69=puN_Kjc}x3)dihnoMK9(X`WvH; zh6TXG!75;DV96G^l#HpO6PjsCJ9$6TpVgH<)&xW-l(s<_N%eh4KBOdLlnkpWQ|t)W z^bH~)_BGogpOHx0BAYOOL_Q%~FhVJ8eOT!XqBMxo=4cRiKfUkr!W@QiH|zz4>4ouq z_#q9bLDs+(JeFaUx?IlTjO`2t`yOux@xhMgltX(E2T?%-c@A?*w}4PP>G)9_@vp)( znFmLxLk+4ChcG%%thdQ1}{~l zZH6S%6&3ovCRJ*s_MU?xowPD`cnOf^J+i%!aYe=z#V1thir*nm*EdO89oJySCK^SS zv?)uGfjPh;u?b!`ZsX?N#y{2LcP9VEBJ@@XO@dyxS)xAQ zp5azwU;6!q4ARv`3peb@z)yolr{^b0BbGp|OrN~~|8R=Gbq2qJp#vHHU*gbs+(gON zOKuDLm0Qo3*xIP5!Wo?)NlKk0@?0=+(2reXs<#gN7EA`ahu&g|Y4^e;Z3l<3ABjLn zR8=<6{abxmhuAf+SHDtw2yJvR2(kAN@hZgD?^!k2IN}GlB;S2 zNiUJHsjNK$7V@}mB&p8sm|cbK)SZP3`=qu*R3$> z4v;AM8pe!=Qcdrz7Sx4-rPh=Igl~>WB&Jxd)xAQ;wrMK`eHNl_-hgf#qwhU*{nv*(& cT(i7k@&dw3W-jON8Sl*FZde<>Rch#e0eodtg#Z8m delta 476 zcmYLG%}N|W5U%QOXS-*2CW)*uNQ^mz$Y2mJqKJ5K4|)gz^$;_to7EnFcGv9EJBuPK z2(woSOCP-IBlrMb^8h*SAtztJH%RpaOch`8)erSm)4$7CLAV+OK9D_R*SotTgYXS5 z52oKuvL-(ju|P;f1~Fd)&9&%*78#322K7(4IJo&ZUeX0E`xr~DwBLtV>Eaed!TsX_ zV$rU%Cmh-(K8I!7;4^OxlFk8hYGi_-2o*x-(Ebwd_n-%LFJt7`gKD9Nnl+f&NoA_) zzUCSGS-#|H@wP;Bg&x0S2wy?N25iDM{P0|Li*hR3o&lF)GkP@<0$6hoX~7q2U|wAO4Lb(7h{ zlV+Nj>VB)$=-5m79Y5GV@-t4`8MT7Gt*eD^v2xvNyOtiC679-Yo5->MnmNmbIW^q1 j9rr&H1xghv-#4xvdHF-k7;!UY1AjjJHznz`5_\n", "\n", - "\n" - ] - }, - { - "ename": "KeyError", - "evalue": "\"There is no item named 'content.opf' in the archive\"", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mbook\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mbookzip\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbookzip\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbookzip\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'content.opf'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/usr/lib64/python3.7/zipfile.py\u001b[0m in \u001b[0;36mopen\u001b[0;34m(self, name, mode, pwd, force_zip64)\u001b[0m\n\u001b[1;32m 1468\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1469\u001b[0m \u001b[0;31m# Get info object for name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1470\u001b[0;31m \u001b[0mzinfo\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetinfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1471\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1472\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/lib64/python3.7/zipfile.py\u001b[0m in \u001b[0;36mgetinfo\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1396\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minfo\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1397\u001b[0m raise KeyError(\n\u001b[0;32m-> 1398\u001b[0;31m 'There is no item named %r in the archive' % name)\n\u001b[0m\u001b[1;32m 1399\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1400\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0minfo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyError\u001b[0m: \"There is no item named 'content.opf' in the archive\"" + "\n" ] } ], "source": [ "from library import Catalogue\n", "import zipfile\n", + "import pprint as pp\n", + "import re\n", "\n", "Catalogue = Catalogue()\n", "book_list = Catalogue.filter_books(1)\n", @@ -39,8 +27,14 @@ "for book in book_list:\n", " book = zipfile.ZipFile(book, 'r')\n", " with book as bookzip:\n", - " print(bookzip)\n", - " print(bookzip.open('content.opf'))" + " try:\n", + " content_opf = bookzip.open('content.opf')\n", + " print(content_opf)\n", + " except KeyError as e:\n", + " expanded = bookzip.infolist()\n", + " regx1 = re.compile(r'\\.opf|^cover')\n", + " for i in expanded:\n", + " if re.search(regx1, str(i)) == True: pp.pprint(i.filename); print(res)" ] }, { diff --git a/scratchpad.py b/scratchpad.py new file mode 100644 index 0000000..8894cef --- /dev/null +++ b/scratchpad.py @@ -0,0 +1,29 @@ +# To add a new cell, type '#%%' +# To add a new markdown cell, type '#%% [markdown]' +#%% +from library import Catalogue +import zipfile +import pprint as pp +import re + +Catalogue = Catalogue() +book_list = Catalogue.filter_books(1) +unpacked = [] + +for book in book_list: + book = zipfile.ZipFile(book, 'r') + with book as bookzip: + try: + content_opf = bookzip.open('content.opf') + print(content_opf) + except KeyError as e: + expanded = bookzip.infolist() + regx1 = re.compile(r'\.opf|^cover') + for i in expanded: + if re.search(regx1, str(i)) == True: pp.pprint(i.filename); print(res) + + +#%% + + +