commit
bb15b48569
|
@ -107,8 +107,10 @@ def dl_online(ebook, limiter=rl.delay):
|
|||
# staple the chapters
|
||||
pdflinks = [urlparse.urljoin(base, a['href']) for a in doc.select('a.pdf-link')]
|
||||
if pdflinks:
|
||||
made = make_stapled_ebook(pdflinks, ebook, user_agent=settings.GOOGLEBOT_UA)
|
||||
if made:
|
||||
stapled = make_stapled_ebook(pdflinks, ebook, user_agent=settings.GOOGLEBOT_UA)
|
||||
if stapled:
|
||||
return stapled
|
||||
elif made:
|
||||
return made
|
||||
else:
|
||||
logger.warning('couldn\'t get dl_url for %s', ebook.url)
|
||||
|
|
|
@ -368,7 +368,7 @@ ID_URLPATTERNS = {
|
|||
|
||||
def ids_from_urls(url):
|
||||
ids = {}
|
||||
for ident, pattern in ID_URLPATTERNS:
|
||||
for ident, pattern in ID_URLPATTERNS.items():
|
||||
id_match = pattern.search(url)
|
||||
if id_match:
|
||||
ids[ident] = id_match.group('id')
|
||||
|
|
|
@ -65,9 +65,10 @@ def test_pdf(pdf_file):
|
|||
|
||||
def staple_pdf(urllist, user_agent=settings.USER_AGENT):
|
||||
merger = PdfFileMerger(strict=False)
|
||||
s = requests.Session()
|
||||
for url in urllist:
|
||||
try:
|
||||
response = requests.get(url, headers={"User-Agent": user_agent})
|
||||
response = s.get(url, headers={"User-Agent": user_agent})
|
||||
except requests.exceptions.ConnectionError:
|
||||
logger.error("Error getting url: %s", url)
|
||||
return None
|
||||
|
@ -80,7 +81,11 @@ def staple_pdf(urllist, user_agent=settings.USER_AGENT):
|
|||
else:
|
||||
return None
|
||||
out = BytesIO()
|
||||
merger.write(out)
|
||||
try:
|
||||
merger.write(out)
|
||||
except PdfReadError:
|
||||
logger.error("error writing pdf url: %s", url)
|
||||
return None
|
||||
return out
|
||||
|
||||
def test_test_pdf():
|
||||
|
|
Loading…
Reference in New Issue