Merge remote-tracking branch 'Gluejar/master' into production
commit
8a9260532e
|
@ -16,10 +16,10 @@ from .smashwords import SmashwordsScraper
|
|||
def get_scraper(url):
|
||||
scrapers = [
|
||||
PressbooksScraper,
|
||||
HathitrustScraper,
|
||||
SpringerScraper,
|
||||
UbiquityScraper,
|
||||
SmashwordsScraper,
|
||||
HathitrustScraper,
|
||||
BaseScraper,
|
||||
]
|
||||
for scraper in scrapers:
|
||||
|
|
|
@ -26,10 +26,11 @@ class HathitrustScraper(BaseScraper):
|
|||
for record in records:
|
||||
self.record = record
|
||||
return
|
||||
self.record = {}
|
||||
|
||||
self.record = None # probably a hdl not pointing at Hathitrust
|
||||
self.record = None
|
||||
|
||||
def get_downloads(self):
|
||||
if self.record:
|
||||
dl_a = self.doc.select_one('#fullPdfLink')
|
||||
value = dl_a['href'] if dl_a else None
|
||||
if value:
|
||||
|
@ -37,27 +38,42 @@ class HathitrustScraper(BaseScraper):
|
|||
'download_url_{}'.format('pdf'),
|
||||
'https://babel.hathitrust.org{}'.format(value)
|
||||
)
|
||||
return super(HathitrustScraper, self).get_downloads()
|
||||
|
||||
def get_isbns(self):
|
||||
if self.record:
|
||||
isbn = self.record.get('issn', [])
|
||||
value = identifier_cleaner('isbn', quiet=True)(isbn)
|
||||
return {'print': value} if value else {}
|
||||
return super(HathitrustScraper, self).get_isbns()
|
||||
|
||||
def get_title(self):
|
||||
if self.record:
|
||||
self.set('title', self.record.get('title', ''))
|
||||
return super(HathitrustScraper, self).get_title()
|
||||
|
||||
def get_keywords(self):
|
||||
if self.record:
|
||||
self.set('subjects', self.record.get('keywords', []))
|
||||
return super(HathitrustScraper, self).get_keywords()
|
||||
|
||||
def get_publisher(self):
|
||||
if self.record:
|
||||
self.set('publisher', self.record.get('publisher', ''))
|
||||
return super(HathitrustScraper, self).get_publisher()
|
||||
|
||||
def get_pubdate(self):
|
||||
if self.record:
|
||||
self.set('publication_date', self.record.get('year', ''))
|
||||
return super(HathitrustScraper, self).get_pubdate()
|
||||
|
||||
def get_description(self):
|
||||
if self.record:
|
||||
notes = self.record.get('notes', [])
|
||||
self.set('description', '\r'.join(notes))
|
||||
return super(HathitrustScraper, self).get_description()
|
||||
|
||||
def get_genre(self):
|
||||
if self.record:
|
||||
self.set('genre', self.record.get('type_of_reference', '').lower())
|
||||
return super(HathitrustScraper, self).get_genre()
|
||||
|
|
Loading…
Reference in New Issue