Merge remote-tracking branch 'Gluejar/master' into production

pull/91/head
eric 2018-04-27 11:10:09 -04:00
commit 8a9260532e
2 changed files with 36 additions and 20 deletions

View File

@ -16,10 +16,10 @@ from .smashwords import SmashwordsScraper
def get_scraper(url):
scrapers = [
PressbooksScraper,
HathitrustScraper,
SpringerScraper,
UbiquityScraper,
SmashwordsScraper,
HathitrustScraper,
BaseScraper,
]
for scraper in scrapers:

View File

@ -26,10 +26,11 @@ class HathitrustScraper(BaseScraper):
for record in records:
self.record = record
return
self.record = {}
self.record = None # probably a hdl not pointing at Hathitrust
self.record = None
def get_downloads(self):
if self.record:
dl_a = self.doc.select_one('#fullPdfLink')
value = dl_a['href'] if dl_a else None
if value:
@ -37,27 +38,42 @@ class HathitrustScraper(BaseScraper):
'download_url_{}'.format('pdf'),
'https://babel.hathitrust.org{}'.format(value)
)
return super(HathitrustScraper, self).get_downloads()
def get_isbns(self):
if self.record:
isbn = self.record.get('issn', [])
value = identifier_cleaner('isbn', quiet=True)(isbn)
return {'print': value} if value else {}
return super(HathitrustScraper, self).get_isbns()
def get_title(self):
if self.record:
self.set('title', self.record.get('title', ''))
return super(HathitrustScraper, self).get_title()
def get_keywords(self):
if self.record:
self.set('subjects', self.record.get('keywords', []))
return super(HathitrustScraper, self).get_keywords()
def get_publisher(self):
if self.record:
self.set('publisher', self.record.get('publisher', ''))
return super(HathitrustScraper, self).get_publisher()
def get_pubdate(self):
if self.record:
self.set('publication_date', self.record.get('year', ''))
return super(HathitrustScraper, self).get_pubdate()
def get_description(self):
if self.record:
notes = self.record.get('notes', [])
self.set('description', '\r'.join(notes))
return super(HathitrustScraper, self).get_description()
def get_genre(self):
if self.record:
self.set('genre', self.record.get('type_of_reference', '').lower())
return super(HathitrustScraper, self).get_genre()