From b6c54dbfeb767c7fd6d3243dc5ca36f516791d7f Mon Sep 17 00:00:00 2001 From: eric Date: Mon, 10 Aug 2020 13:40:21 -0400 Subject: [PATCH] add 4 providers --- core/loaders/harvest.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/core/loaders/harvest.py b/core/loaders/harvest.py index 11e1d697..4ad44734 100644 --- a/core/loaders/harvest.py +++ b/core/loaders/harvest.py @@ -87,6 +87,10 @@ def harvesters(ebook): yield ebook.provider in CMPPROVIDERS, harvest_cmp yield 'mdpi' in ebook.provider.lower(), harvest_mdpi yield ebook.provider == 'idunn.no', harvest_idunn + yield ebook.provider == 'press.ucalgary.ca', harvest_calgary + yield ebook.provider == 'Ledizioni', harvest_badhead + yield ebook.provider == 'muse.jhu.edu', harvest_muse + yield ebook.provider == 'IOS Press Ebooks', harvest_ios def ebf_if_harvested(url): onlines = EbookFile.objects.filter(source=url) @@ -575,3 +579,37 @@ def harvest_idunn(ebook): dl_url = 'https://www.idunn.no/file/pdf/%s/%s.pdf' % (prod_id, filename) return make_dl_ebook(dl_url, ebook) return None, 0 + + +def harvest_calgary(ebook): + def selector(doc): + return doc.find('a', string=re.compile('Full Text')) + def chap_selector(doc): + return doc.find_all('a', href=re.compile('/bitstream/')) + return harvest_stapled_generic(ebook, selector, chap_selector, strip_covers=2) + + +def harvest_muse(ebook): + def chap_selector(doc): + return doc.find_all('a', href=re.compile(r'/chapter/\d+/pdf')) + return harvest_stapled_generic(ebook, None, chap_selector, strip_covers=1) + + +def harvest_ios(ebook): + booknum = None + doc = get_soup(ebook.url) + if doc: + obj = doc.find('link', rel='image_src', href=True) + if obj: + booknum = obj['href'].replace('http://ebooks.iospress.nl/Cover/', '') + if booknum: + dl_url = 'http://ebooks.iospress.nl/Download/Pdf?id=%s' % booknum + return make_dl_ebook(dl_url, ebook, method='POST') + else: + logger.warning('couldn\'t get booknum for %s', ebook.url) + else: + logger.warning('couldn\'t get link for %s', ebook.url) + else: + logger.warning('couldn\'t get soup for %s', ebook.url) + return None, 0 +