Merge pull request #832 from Gluejar/more_online

bugfix
pull/94/head
eshellman 2019-03-05 12:20:17 -05:00 committed by GitHub
commit bb15b48569
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 5 deletions

View File

@ -107,8 +107,10 @@ def dl_online(ebook, limiter=rl.delay):
# staple the chapters
pdflinks = [urlparse.urljoin(base, a['href']) for a in doc.select('a.pdf-link')]
if pdflinks:
made = make_stapled_ebook(pdflinks, ebook, user_agent=settings.GOOGLEBOT_UA)
if made:
stapled = make_stapled_ebook(pdflinks, ebook, user_agent=settings.GOOGLEBOT_UA)
if stapled:
return stapled
elif made:
return made
else:
logger.warning('couldn\'t get dl_url for %s', ebook.url)

View File

@ -368,7 +368,7 @@ ID_URLPATTERNS = {
def ids_from_urls(url):
ids = {}
for ident, pattern in ID_URLPATTERNS:
for ident, pattern in ID_URLPATTERNS.items():
id_match = pattern.search(url)
if id_match:
ids[ident] = id_match.group('id')

View File

@ -65,9 +65,10 @@ def test_pdf(pdf_file):
def staple_pdf(urllist, user_agent=settings.USER_AGENT):
merger = PdfFileMerger(strict=False)
s = requests.Session()
for url in urllist:
try:
response = requests.get(url, headers={"User-Agent": user_agent})
response = s.get(url, headers={"User-Agent": user_agent})
except requests.exceptions.ConnectionError:
logger.error("Error getting url: %s", url)
return None
@ -80,7 +81,11 @@ def staple_pdf(urllist, user_agent=settings.USER_AGENT):
else:
return None
out = BytesIO()
merger.write(out)
try:
merger.write(out)
except PdfReadError:
logger.error("error writing pdf url: %s", url)
return None
return out
def test_test_pdf():