fix bookloader when a bookdata is empty
parent
37a5486abe
commit
7bd6fba096
|
@ -1113,13 +1113,14 @@ def add_from_bookdatas(bookdatas):
|
||||||
editions = []
|
editions = []
|
||||||
for bookdata in bookdatas:
|
for bookdata in bookdatas:
|
||||||
edition = work = None
|
edition = work = None
|
||||||
loader = BasePandataLoader(bookdata.base)
|
if bookdata and bookdata.metadata:
|
||||||
pandata = Pandata()
|
loader = BasePandataLoader(bookdata.base)
|
||||||
pandata.metadata = bookdata.metadata
|
pandata = Pandata()
|
||||||
for metadata in pandata.get_edition_list():
|
pandata.metadata = bookdata.metadata
|
||||||
edition = loader.load_from_pandata(metadata, work)
|
for metadata in pandata.get_edition_list():
|
||||||
work = edition.work
|
edition = loader.load_from_pandata(metadata, work)
|
||||||
loader.load_ebooks(pandata, edition)
|
work = edition.work
|
||||||
if edition:
|
loader.load_ebooks(pandata, edition)
|
||||||
editions.append(edition)
|
if edition:
|
||||||
|
editions.append(edition)
|
||||||
return editions
|
return editions
|
||||||
|
|
|
@ -65,13 +65,16 @@ class BaseScraper(object):
|
||||||
for review in self.doc.find_all(itemtype="http://schema.org/Review"):
|
for review in self.doc.find_all(itemtype="http://schema.org/Review"):
|
||||||
review.clear()
|
review.clear()
|
||||||
self.get_all()
|
self.get_all()
|
||||||
if not self.metadata.get('title', None):
|
if not self.metadata.get('title', None):
|
||||||
self.set('title', '!!! missing title !!!')
|
self.set('title', '!!! missing title !!!')
|
||||||
if not self.metadata.get('language', None):
|
if not self.metadata.get('language', None):
|
||||||
self.set('language', 'en')
|
self.set('language', 'en')
|
||||||
|
self.metadata['identifiers'] = self.identifiers
|
||||||
|
else:
|
||||||
|
self.metadata = None
|
||||||
else:
|
else:
|
||||||
self.metadata = {}
|
self.metadata = None
|
||||||
self.metadata['identifiers'] = self.identifiers
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# utilities
|
# utilities
|
||||||
|
|
|
@ -146,9 +146,9 @@ class SpringerScraper(BaseScraper):
|
||||||
self.set('rights_url', lic_url)
|
self.set('rights_url', lic_url)
|
||||||
|
|
||||||
def get_pubdate(self):
|
def get_pubdate(self):
|
||||||
pubinfo = self.doc.select_one('#copyright-info')
|
pubinfo = self.doc.find(attrs={"data-test": "electronic_isbn_publication_date"})
|
||||||
if not pubinfo:
|
if not pubinfo:
|
||||||
pubinfo = self.doc.select_one('header .c-article-identifiers')
|
pubinfo = self.doc.find(attrs={"data-test": "softcover_isbn_publication_date"})
|
||||||
if pubinfo:
|
if pubinfo:
|
||||||
for yearstring in pubinfo.stripped_strings:
|
for yearstring in pubinfo.stripped_strings:
|
||||||
yearmatch = HAS_YEAR.search(yearstring)
|
yearmatch = HAS_YEAR.search(yearstring)
|
||||||
|
|
Loading…
Reference in New Issue