Merge pull request #798 from Gluejar/dj111-errata

really fix pyepub
pull/95/head
eshellman 2018-07-29 21:07:24 -04:00 committed by GitHub
commit c1ecedcb58
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 3 deletions

View File

@ -4,6 +4,7 @@ methods to validate and clean identifiers
'''
import re
import datetime
import logging
from dateutil.parser import parse
from PyPDF2 import PdfFileReader
@ -15,6 +16,8 @@ from regluit.pyepub import EPUB
from regluit.mobi import Mobi
from .isbn import ISBN
logger = logging.getLogger(__name__)
ID_VALIDATION = {
'http': (re.compile(r"(https?|ftp)://(-\.)?([^\s/?\.#]+\.?)+(/[^\s]*)?$",
flags=re.IGNORECASE|re.S),
@ -105,17 +108,21 @@ def test_file(the_file, fformat):
try:
book = EPUB(the_file.file)
except Exception as e:
logger.exception(e)
raise ValidationError(_('Are you sure this is an EPUB file?: %s' % e))
elif fformat == 'mobi':
try:
book = Mobi(the_file.file)
book.parse()
except Exception as e:
raise ValidationError(_('Are you sure this is a MOBI file?: %s' % e))
logger.exception(e)
#raise ValidationError(_('Are you sure this is a MOBI file?: %s' % e))
raise e
elif fformat == 'pdf':
try:
PdfFileReader(the_file.file)
except Exception, e:
except Exception as e:
logger.exception(e)
raise ValidationError(_('%s is not a valid PDF file' % the_file.name))
return True

View File

@ -107,7 +107,7 @@ class EPUB(zipfile.ZipFile):
# Iterate over <metadata> section, fill EPUB.info["metadata"] dictionary
for i in self.opf.find("{0}metadata".format(NAMESPACE["opf"])):
if i.tag:
if i.tag and isinstance(i.tag, str):
tag = ns.sub('', i.tag)
if tag not in self.info["metadata"]:
self.info["metadata"][tag] = i.text or i.attrib