add 4 providers

pull/94/head
eric 2020-08-10 13:40:21 -04:00
parent 904460563b
commit b6c54dbfeb
1 changed files with 38 additions and 0 deletions

View File

@ -87,6 +87,10 @@ def harvesters(ebook):
yield ebook.provider in CMPPROVIDERS, harvest_cmp
yield 'mdpi' in ebook.provider.lower(), harvest_mdpi
yield ebook.provider == 'idunn.no', harvest_idunn
yield ebook.provider == 'press.ucalgary.ca', harvest_calgary
yield ebook.provider == 'Ledizioni', harvest_badhead
yield ebook.provider == 'muse.jhu.edu', harvest_muse
yield ebook.provider == 'IOS Press Ebooks', harvest_ios
def ebf_if_harvested(url):
onlines = EbookFile.objects.filter(source=url)
@ -575,3 +579,37 @@ def harvest_idunn(ebook):
dl_url = 'https://www.idunn.no/file/pdf/%s/%s.pdf' % (prod_id, filename)
return make_dl_ebook(dl_url, ebook)
return None, 0
def harvest_calgary(ebook):
def selector(doc):
return doc.find('a', string=re.compile('Full Text'))
def chap_selector(doc):
return doc.find_all('a', href=re.compile('/bitstream/'))
return harvest_stapled_generic(ebook, selector, chap_selector, strip_covers=2)
def harvest_muse(ebook):
def chap_selector(doc):
return doc.find_all('a', href=re.compile(r'/chapter/\d+/pdf'))
return harvest_stapled_generic(ebook, None, chap_selector, strip_covers=1)
def harvest_ios(ebook):
booknum = None
doc = get_soup(ebook.url)
if doc:
obj = doc.find('link', rel='image_src', href=True)
if obj:
booknum = obj['href'].replace('http://ebooks.iospress.nl/Cover/', '')
if booknum:
dl_url = 'http://ebooks.iospress.nl/Download/Pdf?id=%s' % booknum
return make_dl_ebook(dl_url, ebook, method='POST')
else:
logger.warning('couldn\'t get booknum for %s', ebook.url)
else:
logger.warning('couldn\'t get link for %s', ebook.url)
else:
logger.warning('couldn\'t get soup for %s', ebook.url)
return None, 0