From 066b81fb747a1e49940183c5da67f50340956bc1 Mon Sep 17 00:00:00 2001 From: eric Date: Tue, 28 Jul 2020 20:58:25 -0400 Subject: [PATCH] add digitalis harvest --- core/loaders/harvest.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/core/loaders/harvest.py b/core/loaders/harvest.py index 85686064..826cf35a 100644 --- a/core/loaders/harvest.py +++ b/core/loaders/harvest.py @@ -59,6 +59,7 @@ def harvesters(ebook): yield OPENBOOKPUB.search(ebook.url), harvest_obp yield ebook.provider == 'Transcript-Verlag', harvest_transcript yield ebook.provider == 'ksp.kit.edu', harvest_ksp + yield ebook.provider == 'digitalis.uc.pt', harvest_digitalis def ebf_if_harvested(url): @@ -269,3 +270,17 @@ def harvest_ksp(ebook): logger.warning('couldn\'t get soup for %s', ebook.url) return None, 0 +def harvest_digitalis(ebook): + doc = get_soup(ebook.url) + if doc: + obj = doc.find('meta', attrs={"name": "citation_pdf_url"}) + if obj: + dl_url = urljoin(ebook.url, obj.get('content', None)) + if dl_url: + return make_dl_ebook(dl_url, ebook) + else: + logger.warning('couldn\'t get dl_url for %s', ebook.url) + else: + logger.warning('couldn\'t get soup for %s', ebook.url) + return None, 0 +