Merge remote-tracking branch 'Gluejar/master' into production
commit
8a9260532e
|
@ -16,10 +16,10 @@ from .smashwords import SmashwordsScraper
|
||||||
def get_scraper(url):
|
def get_scraper(url):
|
||||||
scrapers = [
|
scrapers = [
|
||||||
PressbooksScraper,
|
PressbooksScraper,
|
||||||
HathitrustScraper,
|
|
||||||
SpringerScraper,
|
SpringerScraper,
|
||||||
UbiquityScraper,
|
UbiquityScraper,
|
||||||
SmashwordsScraper,
|
SmashwordsScraper,
|
||||||
|
HathitrustScraper,
|
||||||
BaseScraper,
|
BaseScraper,
|
||||||
]
|
]
|
||||||
for scraper in scrapers:
|
for scraper in scrapers:
|
||||||
|
|
|
@ -26,38 +26,54 @@ class HathitrustScraper(BaseScraper):
|
||||||
for record in records:
|
for record in records:
|
||||||
self.record = record
|
self.record = record
|
||||||
return
|
return
|
||||||
self.record = {}
|
self.record = None # probably a hdl not pointing at Hathitrust
|
||||||
|
self.record = None
|
||||||
|
|
||||||
def get_downloads(self):
|
def get_downloads(self):
|
||||||
dl_a = self.doc.select_one('#fullPdfLink')
|
if self.record:
|
||||||
value = dl_a['href'] if dl_a else None
|
dl_a = self.doc.select_one('#fullPdfLink')
|
||||||
if value:
|
value = dl_a['href'] if dl_a else None
|
||||||
self.set(
|
if value:
|
||||||
'download_url_{}'.format('pdf'),
|
self.set(
|
||||||
'https://babel.hathitrust.org{}'.format(value)
|
'download_url_{}'.format('pdf'),
|
||||||
)
|
'https://babel.hathitrust.org{}'.format(value)
|
||||||
|
)
|
||||||
|
return super(HathitrustScraper, self).get_downloads()
|
||||||
|
|
||||||
def get_isbns(self):
|
def get_isbns(self):
|
||||||
isbn = self.record.get('issn', [])
|
if self.record:
|
||||||
value = identifier_cleaner('isbn', quiet=True)(isbn)
|
isbn = self.record.get('issn', [])
|
||||||
return {'print': value} if value else {}
|
value = identifier_cleaner('isbn', quiet=True)(isbn)
|
||||||
|
return {'print': value} if value else {}
|
||||||
|
return super(HathitrustScraper, self).get_isbns()
|
||||||
|
|
||||||
def get_title(self):
|
def get_title(self):
|
||||||
self.set('title', self.record.get('title', ''))
|
if self.record:
|
||||||
|
self.set('title', self.record.get('title', ''))
|
||||||
|
return super(HathitrustScraper, self).get_title()
|
||||||
|
|
||||||
def get_keywords(self):
|
def get_keywords(self):
|
||||||
self.set('subjects', self.record.get('keywords', []))
|
if self.record:
|
||||||
|
self.set('subjects', self.record.get('keywords', []))
|
||||||
|
return super(HathitrustScraper, self).get_keywords()
|
||||||
|
|
||||||
def get_publisher(self):
|
def get_publisher(self):
|
||||||
self.set('publisher', self.record.get('publisher', ''))
|
if self.record:
|
||||||
|
self.set('publisher', self.record.get('publisher', ''))
|
||||||
|
return super(HathitrustScraper, self).get_publisher()
|
||||||
|
|
||||||
def get_pubdate(self):
|
def get_pubdate(self):
|
||||||
self.set('publication_date', self.record.get('year', ''))
|
if self.record:
|
||||||
|
self.set('publication_date', self.record.get('year', ''))
|
||||||
|
return super(HathitrustScraper, self).get_pubdate()
|
||||||
|
|
||||||
def get_description(self):
|
def get_description(self):
|
||||||
notes = self.record.get('notes', [])
|
if self.record:
|
||||||
self.set('description', '\r'.join(notes))
|
notes = self.record.get('notes', [])
|
||||||
|
self.set('description', '\r'.join(notes))
|
||||||
|
return super(HathitrustScraper, self).get_description()
|
||||||
|
|
||||||
def get_genre(self):
|
def get_genre(self):
|
||||||
self.set('genre', self.record.get('type_of_reference', '').lower())
|
if self.record:
|
||||||
|
self.set('genre', self.record.get('type_of_reference', '').lower())
|
||||||
|
return super(HathitrustScraper, self).get_genre()
|
||||||
|
|
Loading…
Reference in New Issue