add digitalis harvest

pull/94/head
eric 2020-07-28 20:58:25 -04:00
parent 19d39cf4a6
commit 066b81fb74
1 changed files with 15 additions and 0 deletions

View File

@ -59,6 +59,7 @@ def harvesters(ebook):
yield OPENBOOKPUB.search(ebook.url), harvest_obp
yield ebook.provider == 'Transcript-Verlag', harvest_transcript
yield ebook.provider == 'ksp.kit.edu', harvest_ksp
yield ebook.provider == 'digitalis.uc.pt', harvest_digitalis
def ebf_if_harvested(url):
@ -269,3 +270,17 @@ def harvest_ksp(ebook):
logger.warning('couldn\'t get soup for %s', ebook.url)
return None, 0
def harvest_digitalis(ebook):
doc = get_soup(ebook.url)
if doc:
obj = doc.find('meta', attrs={"name": "citation_pdf_url"})
if obj:
dl_url = urljoin(ebook.url, obj.get('content', None))
if dl_url:
return make_dl_ebook(dl_url, ebook)
else:
logger.warning('couldn\'t get dl_url for %s', ebook.url)
else:
logger.warning('couldn\'t get soup for %s', ebook.url)
return None, 0