commit
fb320fc510
|
@ -61,12 +61,13 @@ CMPPROVIDERS = [
|
|||
'teiresias-supplements.mcgill.ca',
|
||||
'humanities-digital-library.org',
|
||||
'editorial.uniagustiniana.edu.co',
|
||||
'monographs.uc.pt',
|
||||
]
|
||||
|
||||
|
||||
def harvesters(ebook):
|
||||
yield ebook.url.find(u'dropbox.com/s/') >= 0, harvest_dropbox
|
||||
yield ebook.url.find(u'jbe-platform.com/content/books/') >= 0, harvest_jbe
|
||||
yield 'dropbox.com/s/' in ebook.url, harvest_dropbox
|
||||
yield 'jbe-platform.com/content/books/' in ebook.url, harvest_jbe
|
||||
yield ebook.provider == u'De Gruyter Online', harvest_degruyter
|
||||
yield OPENBOOKPUB.search(ebook.url), harvest_obp
|
||||
yield ebook.provider == 'Transcript-Verlag', harvest_transcript
|
||||
|
@ -74,16 +75,18 @@ def harvesters(ebook):
|
|||
yield ebook.provider == 'digitalis.uc.pt', harvest_digitalis
|
||||
yield ebook.provider == 'nomos-elibrary.de', harvest_nomos
|
||||
yield ebook.provider == 'frontiersin.org', harvest_frontiersin
|
||||
yield ebook.url.find('link.springer') >= 0, harvest_springerlink
|
||||
yield 'link.springer' in ebook.url, harvest_springerlink
|
||||
yield ebook.provider == 'OAPEN Library', harvest_oapen
|
||||
yield ebook.provider == 'pulp.up.ac.za', harvest_pulp
|
||||
yield ebook.provider == 'bloomsburycollections.com', harvest_bloomsbury
|
||||
yield ebook.provider == 'Athabasca University Press', harvest_athabasca
|
||||
yield ebook.url.find('digitalcommons.usu.edu') > 0, harvest_usu
|
||||
yield 'digitalcommons.usu.edu' in ebook.url, harvest_usu
|
||||
yield ebook.provider == 'libros.fahce.unlp.edu.ar', harvest_fahce
|
||||
yield ebook.provider == 'digital.library.unt.edu', harvest_unt
|
||||
yield ebook.provider == 'diposit.ub.edu', harvest_ub
|
||||
yield ebook.provider in CMPPROVIDERS, harvest_cmp
|
||||
yield 'mdpi' in ebook.provider.lower(), harvest_mdpi
|
||||
yield ebook.provider == 'idunn.no', harvest_idunn
|
||||
|
||||
def ebf_if_harvested(url):
|
||||
onlines = EbookFile.objects.filter(source=url)
|
||||
|
@ -344,7 +347,7 @@ def harvest_nomos(ebook):
|
|||
dl_url = urljoin(ebook.url, obj['href'])
|
||||
return make_dl_ebook(dl_url, ebook)
|
||||
else:
|
||||
logger.warning('will try stabling a book for %s', ebook.url)
|
||||
logger.warning('will try stapling a book for %s', ebook.url)
|
||||
|
||||
# staple the chapters
|
||||
chaps = doc.select('li.access[data-doi]')
|
||||
|
@ -495,7 +498,7 @@ def harvest_cmp(ebook):
|
|||
return doc.select('a.cmp_download_link[href]')
|
||||
def dl(url):
|
||||
return url.replace('view', 'download') + '?inline=1'
|
||||
if ebook.url.find('/view/') >= 0:
|
||||
if '/view/' in ebook.url:
|
||||
return make_dl_ebook(dl(ebook.url), ebook)
|
||||
return harvest_multiple_generic(ebook, selector, dl=dl)
|
||||
|
||||
|
@ -514,4 +517,23 @@ def harvest_unt(ebook):
|
|||
return harvest_one_generic(ebook, selector)
|
||||
|
||||
|
||||
def harvest_mdpi(ebook):
|
||||
def selector(doc):
|
||||
return doc.select_one('div.main-download-container a[alt=download]')
|
||||
if 'http://books.mdpi.com' in ebook.url:
|
||||
ebook.delete()
|
||||
return None, 0
|
||||
return harvest_one_generic(ebook, selector)
|
||||
|
||||
|
||||
def harvest_idunn(ebook):
|
||||
doc = get_soup(ebook.url)
|
||||
if doc:
|
||||
obj = doc.select_one('#accessinfo[data-product-id]')
|
||||
if obj:
|
||||
prod_id = obj['data-product-id']
|
||||
filename = obj.get('data-issue-pdf-url', ebook.url[:21])
|
||||
if prod_id and filename:
|
||||
dl_url = 'https://www.idunn.no/file/pdf/%s/%s.pdf' % (prod_id, filename)
|
||||
return make_dl_ebook(dl_url, ebook)
|
||||
return None, 0
|
||||
|
|
|
@ -61,6 +61,8 @@ def get_soup(url, user_agent=settings.USER_AGENT):
|
|||
else:
|
||||
logger.error('No head for %s', url)
|
||||
return soup
|
||||
else:
|
||||
logger.error('%s returned code %s', url, response.status_code)
|
||||
return None
|
||||
|
||||
def get_authors(book):
|
||||
|
|
Loading…
Reference in New Issue