catch more pdf errors

pull/94/head
eric 2019-03-05 12:02:42 -05:00
parent cefbc7c56f
commit 9b12418ada
2 changed files with 11 additions and 4 deletions

View File

@ -107,8 +107,10 @@ def dl_online(ebook, limiter=rl.delay):
# staple the chapters
pdflinks = [urlparse.urljoin(base, a['href']) for a in doc.select('a.pdf-link')]
if pdflinks:
made = make_stapled_ebook(pdflinks, ebook, user_agent=settings.GOOGLEBOT_UA)
if made:
stapled = make_stapled_ebook(pdflinks, ebook, user_agent=settings.GOOGLEBOT_UA)
if stapled:
return stapled
elif made:
return made
else:
logger.warning('couldn\'t get dl_url for %s', ebook.url)

View File

@ -65,9 +65,10 @@ def test_pdf(pdf_file):
def staple_pdf(urllist, user_agent=settings.USER_AGENT):
merger = PdfFileMerger(strict=False)
s = requests.Session()
for url in urllist:
try:
response = requests.get(url, headers={"User-Agent": user_agent})
response = s.get(url, headers={"User-Agent": user_agent})
except requests.exceptions.ConnectionError:
logger.error("Error getting url: %s", url)
return None
@ -80,7 +81,11 @@ def staple_pdf(urllist, user_agent=settings.USER_AGENT):
else:
return None
out = BytesIO()
merger.write(out)
try:
merger.write(out)
except PdfReadError:
logger.error("error writing pdf url: %s", url)
return None
return out
def test_test_pdf():