benjamins, kiel, UA
parent
46dd2fed25
commit
454acbe3ef
|
@ -82,6 +82,7 @@ STOREPROVIDERS = [
|
|||
'karolinum.cz',
|
||||
'librumstore.com',
|
||||
'logos-verlag.de',
|
||||
'universitaetsverlag.uni-kiel.de',
|
||||
'manchesteruniversitypress.co.uk',
|
||||
'mitpress.mit.edu',
|
||||
'munishop.muni.cz',
|
||||
|
|
|
@ -247,6 +247,8 @@ def harvesters(ebook):
|
|||
yield ebook.provider == 'verlag.gta.arch.ethz.ch', harvest_gta
|
||||
yield ebook.provider == 'manchesteruniversitypress.co.uk', harvest_manu
|
||||
yield ebook.provider == 'tectum-elibrary.de', harvest_tecnum
|
||||
yield ebook.provider == 'benjamins.com', harvest_benjamins
|
||||
yield ebook.provider == 'macau.uni-kiel.de', harvest_citation_meta_generic
|
||||
|
||||
|
||||
def ebf_if_harvested(url):
|
||||
|
@ -1454,6 +1456,7 @@ def harvest_sciendo(ebook):
|
|||
logger.error('No links in json for {ebook.url}')
|
||||
return harvest_multiple_generic(ebook, selector)
|
||||
|
||||
# 2step
|
||||
def harvest_liege(ebook):
|
||||
def selector(doc):
|
||||
urls = []
|
||||
|
@ -1486,4 +1489,28 @@ def harvest_liege(ebook):
|
|||
|
||||
return harvest_multiple_generic(ebook, selector)
|
||||
|
||||
# 2step
|
||||
def harvest_benjamins(ebook):
|
||||
def selector(doc):
|
||||
urls = []
|
||||
page = doc.find('a', href=re.compile(r'jbe-platform.com'))
|
||||
if page:
|
||||
base = page['href']
|
||||
base_doc = get_soup(base, follow_redirects=True)
|
||||
if base_doc:
|
||||
links = base_doc.select('.access-options a[href]')
|
||||
for link in links:
|
||||
dl_url = urljoin(base, link['href'])
|
||||
yield {'href': dl_url}
|
||||
return harvest_multiple_generic(ebook, selector)
|
||||
|
||||
def harvest_citation_meta_generic(ebook):
|
||||
def selector(doc):
|
||||
citation_pdf_url = get_meta(doc, "citation_pdf_url")
|
||||
citation_epub_url = get_meta(doc, "citation_epub_url")
|
||||
if citation_pdf_url or citation_epub_url:
|
||||
if citation_pdf_url:
|
||||
yield {'href': citation_pdf_url}
|
||||
if citation_epub_url:
|
||||
yield {'href': citation_epub_url}
|
||||
return harvest_multiple_generic(ebook, selector)
|
||||
|
|
|
@ -503,7 +503,8 @@ FILE_UPLOAD_MAX_MEMORY_SIZE = 20971520 #20MB
|
|||
FIREFOX_PATH = ''
|
||||
CHROMEDRIVER_PATH = ''
|
||||
GOOGLEBOT_UA = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
||||
CHROME_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
|
||||
CHROME_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15'
|
||||
|
||||
|
||||
try:
|
||||
from .keys.common import *
|
||||
|
|
Loading…
Reference in New Issue