From 4b770d7a02561b818be3147b9bb8239e669fc3d3 Mon Sep 17 00:00:00 2001 From: eric Date: Mon, 10 May 2021 13:53:38 -0400 Subject: [PATCH] add orkana, set dykinson as bookshop --- core/loaders/harvest.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/core/loaders/harvest.py b/core/loaders/harvest.py index c93a31bd..03259db7 100644 --- a/core/loaders/harvest.py +++ b/core/loaders/harvest.py @@ -115,6 +115,7 @@ STOREPROVIDERS = [ "cabi.org", "cdcshoppingcart.uchicago.edu", "checkout.sas.ac.uk", + "dykinson.com", "epubli.de", "iospress.nl", "karolinum.cz", @@ -198,7 +199,7 @@ def harvesters(ebook): yield ebook.provider == 'esv.info', harvest_esv yield ebook.provider == 'fulcrum.org', harvest_fulcrum yield ebook.provider in ('epress.lib.uts.edu.au', 'utsepress.lib.uts.edu.au'), harvest_ubiquity - + yield ebook.provider == 'orkana.no', harvest_orkana def ebf_if_harvested(url): onlines = models.EbookFile.objects.filter(source=url) @@ -980,4 +981,12 @@ def harvest_ubiquity(ebook): return doc.find_all('a', attrs={'data-category': re.compile('(epub|mobi|pdf) download')}) return harvest_multiple_generic(ebook, selector) - \ No newline at end of file +def harvest_orkana(ebook): + def selector(doc): + for obj in doc.find_all('p', string=re.compile(r'\((PDF|E-BOK)\)')): + div = obj.find_parent('div') + if div and div.find_next_sibling('div') and div.find_next_sibling('div').find('a'): + yield div.find_next_sibling('div').find('a') + return harvest_multiple_generic(ebook, selector) + + \ No newline at end of file