add bloomsbury harvest
parent
9d98b2a0cb
commit
f793914279
|
@ -65,6 +65,7 @@ def harvesters(ebook):
|
|||
yield ebook.url.find('link.springer') >= 0, harvest_springerlink
|
||||
yield ebook.provider == 'OAPEN Library', harvest_oapen
|
||||
yield ebook.provider == 'pulp.up.ac.za', harvest_pulp
|
||||
yield ebook.provider == 'bloomsburycollections.com', harvest_bloomsbury
|
||||
|
||||
def ebf_if_harvested(url):
|
||||
onlines = EbookFile.objects.filter(source=url)
|
||||
|
@ -110,8 +111,8 @@ def make_dl_ebook(url, ebook, user_agent=settings.USER_AGENT, method='GET'):
|
|||
logger.warning('couldn\'t get %s', url)
|
||||
return None, 0
|
||||
|
||||
def make_stapled_ebook(urllist, ebook, user_agent=settings.USER_AGENT):
|
||||
pdffile = staple_pdf(urllist, user_agent)
|
||||
def make_stapled_ebook(urllist, ebook, user_agent=settings.USER_AGENT, strip_covers=False):
|
||||
pdffile = staple_pdf(urllist, user_agent, strip_covers=strip_covers)
|
||||
if not pdffile:
|
||||
return None, 0
|
||||
return make_harvested_ebook(pdffile.getvalue(), ebook, 'pdf')
|
||||
|
@ -411,7 +412,27 @@ def harvest_pulp(ebook):
|
|||
if made == 0:
|
||||
logger.warning('couldn\'t get any dl_url for %s or %s', ebook.url, dl_url)
|
||||
return harvested, made
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def harvest_bloomsbury(ebook):
|
||||
doc = get_soup(ebook.url)
|
||||
if doc:
|
||||
pdflinks = []
|
||||
try:
|
||||
base = doc.find('base')['href']
|
||||
except:
|
||||
base = ebook.url
|
||||
for obj in doc.select('li.pdf-chapter--title a[href]'):
|
||||
if obj:
|
||||
chap = urljoin(base, obj['href']) + '.pdf?dl'
|
||||
pdflinks.append(chap)
|
||||
if pdflinks:
|
||||
stapled = make_stapled_ebook(pdflinks, ebook, strip_covers=True)
|
||||
if stapled:
|
||||
return stapled
|
||||
else:
|
||||
logger.warning('couldn\'t staple %s', pdflinks)
|
||||
else:
|
||||
logger.warning('couldn\'t get soup for %s', ebook.url)
|
||||
return None, 0
|
||||
|
||||
|
|
Loading…
Reference in New Issue