Merge pull request #798 from Gluejar/dj111-errata

really fix pyepub
2018-07-29 21:07:24 -04:00 · 2018-07-29 21:07:24 -04:00 · c1ecedcb58
parent 977d9025b6 7d2b052ef0
commit c1ecedcb58
2 changed files with 10 additions and 3 deletions
--- a/core/validation.py
+++ b/core/validation.py
@ -4,6 +4,7 @@ methods to validate and clean identifiers
 '''
 import re
 import datetime
+import logging

 from dateutil.parser import parse
 from PyPDF2 import PdfFileReader
@ -15,6 +16,8 @@ from regluit.pyepub import EPUB
 from regluit.mobi import Mobi
 from .isbn import ISBN

+logger = logging.getLogger(__name__)
+
 ID_VALIDATION = {
    'http': (re.compile(r"(https?|ftp)://(-\.)?([^\s/?\.#]+\.?)+(/[^\s]*)?$",
                        flags=re.IGNORECASE|re.S),
@ -105,17 +108,21 @@ def test_file(the_file, fformat):
            try:
                book = EPUB(the_file.file)
            except Exception as e:
+                logger.exception(e)
                raise ValidationError(_('Are you sure this is an EPUB file?: %s' % e))
        elif fformat == 'mobi':
            try:
                book = Mobi(the_file.file)
                book.parse()
            except Exception as e:
-                raise ValidationError(_('Are you sure this is a MOBI file?: %s' % e))
+                logger.exception(e)
+                #raise ValidationError(_('Are you sure this is a MOBI file?: %s' % e))
+                raise e
        elif fformat == 'pdf':
            try:
                PdfFileReader(the_file.file)
-            except Exception, e:
+            except Exception as e:
+                logger.exception(e)
                raise ValidationError(_('%s is not a valid PDF file' % the_file.name))
    return True

--- a/pyepub/init.py
+++ b/pyepub/init.py
@ -107,7 +107,7 @@ class EPUB(zipfile.ZipFile):

        # Iterate over <metadata> section, fill EPUB.info["metadata"] dictionary
        for i in self.opf.find("{0}metadata".format(NAMESPACE["opf"])):
-            if i.tag:
+            if i.tag and isinstance(i.tag, str):
                tag = ns.sub('', i.tag)
                if tag not in self.info["metadata"]:
                    self.info["metadata"][tag] = i.text or i.attrib