From ed79ffd379097ddd899770ee4a65c0636cc53bae Mon Sep 17 00:00:00 2001 From: eric Date: Mon, 7 Sep 2020 10:15:23 -0400 Subject: [PATCH] "harvest" dead dois --- core/loaders/harvest.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/core/loaders/harvest.py b/core/loaders/harvest.py index 4914a5db..ffd8f68a 100644 --- a/core/loaders/harvest.py +++ b/core/loaders/harvest.py @@ -138,6 +138,7 @@ def harvesters(ebook): yield ebook.provider == 'edition-topoi.org', harvest_topoi yield ebook.provider == 'meson.press', harvest_meson yield 'brillonline' in ebook.provider, harvest_brill + yield ebook.provider == 'DOI Resolver', harvest_doi def ebf_if_harvested(url): @@ -761,3 +762,17 @@ def harvest_brill(ebook): dl_url = 'https://brill.com/downloadpdf/title/%s.pdf' % r.url[29:] return make_dl_ebook(dl_url, ebook, user_agent=settings.GOOGLEBOT_UA) +def harvest_doi(ebook): + # usually a 404. + r = requests.get(ebook.url) + if r.status_code == 404 and not ebook.ebook_files.exists(): + logger.info('deleting ebook for dead doi %s', ebook.url) + ebook.delete() + return None, -1 + else: + ebook.url = r.url + ebook.set_provider() + logger.info('reset provider to %s', ebook.provider) + ebook.save() + return None, 0 + \ No newline at end of file