fix bookloader when a bookdata is empty

pull/94/head^2
eric 2024-11-22 14:34:17 -05:00
parent 37a5486abe
commit 7bd6fba096
3 changed files with 21 additions and 17 deletions

View File

@ -1113,13 +1113,14 @@ def add_from_bookdatas(bookdatas):
editions = [] editions = []
for bookdata in bookdatas: for bookdata in bookdatas:
edition = work = None edition = work = None
loader = BasePandataLoader(bookdata.base) if bookdata and bookdata.metadata:
pandata = Pandata() loader = BasePandataLoader(bookdata.base)
pandata.metadata = bookdata.metadata pandata = Pandata()
for metadata in pandata.get_edition_list(): pandata.metadata = bookdata.metadata
edition = loader.load_from_pandata(metadata, work) for metadata in pandata.get_edition_list():
work = edition.work edition = loader.load_from_pandata(metadata, work)
loader.load_ebooks(pandata, edition) work = edition.work
if edition: loader.load_ebooks(pandata, edition)
editions.append(edition) if edition:
editions.append(edition)
return editions return editions

View File

@ -65,13 +65,16 @@ class BaseScraper(object):
for review in self.doc.find_all(itemtype="http://schema.org/Review"): for review in self.doc.find_all(itemtype="http://schema.org/Review"):
review.clear() review.clear()
self.get_all() self.get_all()
if not self.metadata.get('title', None): if not self.metadata.get('title', None):
self.set('title', '!!! missing title !!!') self.set('title', '!!! missing title !!!')
if not self.metadata.get('language', None): if not self.metadata.get('language', None):
self.set('language', 'en') self.set('language', 'en')
self.metadata['identifiers'] = self.identifiers
else:
self.metadata = None
else: else:
self.metadata = {} self.metadata = None
self.metadata['identifiers'] = self.identifiers
# #
# utilities # utilities

View File

@ -146,9 +146,9 @@ class SpringerScraper(BaseScraper):
self.set('rights_url', lic_url) self.set('rights_url', lic_url)
def get_pubdate(self): def get_pubdate(self):
pubinfo = self.doc.select_one('#copyright-info') pubinfo = self.doc.find(attrs={"data-test": "electronic_isbn_publication_date"})
if not pubinfo: if not pubinfo:
pubinfo = self.doc.select_one('header .c-article-identifiers') pubinfo = self.doc.find(attrs={"data-test": "softcover_isbn_publication_date"})
if pubinfo: if pubinfo:
for yearstring in pubinfo.stripped_strings: for yearstring in pubinfo.stripped_strings:
yearmatch = HAS_YEAR.search(yearstring) yearmatch = HAS_YEAR.search(yearstring)