From 7bd6fba0967d6e553776b0d330b80f2a04a20d07 Mon Sep 17 00:00:00 2001 From: eric Date: Fri, 22 Nov 2024 14:34:17 -0500 Subject: [PATCH] fix bookloader when a bookdata is empty --- core/bookloader.py | 19 ++++++++++--------- core/loaders/scrape.py | 15 +++++++++------ core/loaders/springer.py | 4 ++-- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/core/bookloader.py b/core/bookloader.py index 08644fa6..2baabe05 100755 --- a/core/bookloader.py +++ b/core/bookloader.py @@ -1113,13 +1113,14 @@ def add_from_bookdatas(bookdatas): editions = [] for bookdata in bookdatas: edition = work = None - loader = BasePandataLoader(bookdata.base) - pandata = Pandata() - pandata.metadata = bookdata.metadata - for metadata in pandata.get_edition_list(): - edition = loader.load_from_pandata(metadata, work) - work = edition.work - loader.load_ebooks(pandata, edition) - if edition: - editions.append(edition) + if bookdata and bookdata.metadata: + loader = BasePandataLoader(bookdata.base) + pandata = Pandata() + pandata.metadata = bookdata.metadata + for metadata in pandata.get_edition_list(): + edition = loader.load_from_pandata(metadata, work) + work = edition.work + loader.load_ebooks(pandata, edition) + if edition: + editions.append(edition) return editions diff --git a/core/loaders/scrape.py b/core/loaders/scrape.py index 20a79bfb..863d9271 100644 --- a/core/loaders/scrape.py +++ b/core/loaders/scrape.py @@ -65,13 +65,16 @@ class BaseScraper(object): for review in self.doc.find_all(itemtype="http://schema.org/Review"): review.clear() self.get_all() - if not self.metadata.get('title', None): - self.set('title', '!!! missing title !!!') - if not self.metadata.get('language', None): - self.set('language', 'en') + if not self.metadata.get('title', None): + self.set('title', '!!! missing title !!!') + if not self.metadata.get('language', None): + self.set('language', 'en') + self.metadata['identifiers'] = self.identifiers + else: + self.metadata = None else: - self.metadata = {} - self.metadata['identifiers'] = self.identifiers + self.metadata = None + # # utilities diff --git a/core/loaders/springer.py b/core/loaders/springer.py index 88918e7f..cf6dc8c5 100644 --- a/core/loaders/springer.py +++ b/core/loaders/springer.py @@ -146,9 +146,9 @@ class SpringerScraper(BaseScraper): self.set('rights_url', lic_url) def get_pubdate(self): - pubinfo = self.doc.select_one('#copyright-info') + pubinfo = self.doc.find(attrs={"data-test": "electronic_isbn_publication_date"}) if not pubinfo: - pubinfo = self.doc.select_one('header .c-article-identifiers') + pubinfo = self.doc.find(attrs={"data-test": "softcover_isbn_publication_date"}) if pubinfo: for yearstring in pubinfo.stripped_strings: yearmatch = HAS_YEAR.search(yearstring)