From 1f2b223c0f5703f385738759e732995c1712ebac Mon Sep 17 00:00:00 2001 From: eric Date: Tue, 28 Jul 2020 20:59:13 -0400 Subject: [PATCH] add frontiersin harves --- core/loaders/harvest.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/core/loaders/harvest.py b/core/loaders/harvest.py index 67150fb9..fddba150 100644 --- a/core/loaders/harvest.py +++ b/core/loaders/harvest.py @@ -61,6 +61,7 @@ def harvesters(ebook): yield ebook.provider == 'ksp.kit.edu', harvest_ksp yield ebook.provider == 'digitalis.uc.pt', harvest_digitalis yield ebook.provider == 'nomos-elibrary.de', harvest_nomos + yield ebook.provider == 'frontiersin.org', harvest_frontiersin def ebf_if_harvested(url): @@ -313,3 +314,20 @@ def harvest_nomos(ebook): logger.warning('couldn\'t get soup for %s', ebook.url) return None, 0 +def harvest_frontiersin(ebook): + num = 0 + harvested = None + doc = get_soup(ebook.url) + if doc: + for obj in doc.select('button[data-href]'): + dl_url = obj['data-href'] + harvested, made = make_dl_ebook( + dl_url, + ebook, + user_agent=requests.utils.default_user_agent(), + ) + num += made + if num == 0: + logger.warning('couldn\'t get any dl_url for %s', ebook.url) + return harvested, num +