From 3ebccdbe887fe4f0a8bb81721c495739c0e5dbde Mon Sep 17 00:00:00 2001 From: eric Date: Thu, 25 Jun 2020 14:21:37 -0400 Subject: [PATCH] update ubiquity sites --- bookdata/sitemaps.txt | 4 +++- core/loaders/__init__.py | 3 +++ core/loaders/ubiquity.py | 4 ++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/bookdata/sitemaps.txt b/bookdata/sitemaps.txt index 7bc8f225..5295a956 100644 --- a/bookdata/sitemaps.txt +++ b/bookdata/sitemaps.txt @@ -1,16 +1,18 @@ https://aperio.press/sitemap.xml +https://hup.fi/sitemap.xml https://iitikship.iiti.ac.in/sitemap.xml https://oa.finlit.fi/sitemap.xml https://oa.psupress.org/sitemap.xml https://press.lse.ac.uk/sitemap.xml https://press.sjms.nu/sitemap.xml https://publishing.vt.edu/sitemap.xml -https://trystingtree.library.oregonstate.edu/sitemap.xml https://universitypress.whiterose.ac.uk/sitemap.xml +https://utsepress.lib.uts.edu.au/sitemap.xml https://www.humanities-map.net/sitemap.xml https://www.kriterium.se/sitemap.xml https://www.larcommons.net/sitemap.xml https://www.luminosoa.org/sitemap.xml +https://www.mwv-open.de/sitemap.xml https://www.stockholmuniversitypress.se/sitemap.xml https://www.ubiquitypress.com/sitemap.xml https://www.uwestminsterpress.co.uk/sitemap.xml diff --git a/core/loaders/__init__.py b/core/loaders/__init__.py index fd1a7dd6..5440845d 100755 --- a/core/loaders/__init__.py +++ b/core/loaders/__init__.py @@ -1,4 +1,5 @@ import logging +from ssl import SSLError import requests from bs4 import BeautifulSoup @@ -41,6 +42,8 @@ def scrape_sitemap(url, maxnum=None): yield scraper except requests.exceptions.RequestException as e: logger.error(e) + except SSLError as e: + logger.error(e) def add_by_webpage(url, work=None, user=None): if not url: diff --git a/core/loaders/ubiquity.py b/core/loaders/ubiquity.py index 18ae8890..2091b02d 100644 --- a/core/loaders/ubiquity.py +++ b/core/loaders/ubiquity.py @@ -9,8 +9,8 @@ HAS_EDS = re.compile(r'\(eds?\.\)') UBIQUITY_HOSTS = ["ubiquitypress.com", "kriterium.se", "oa.finlit.fi", "humanities-map.net", "oa.psupress.org", "larcommons.net", "uwestminsterpress.co.uk", "stockholmuniversitypress.se", "luminosoa.org", "iitikship.iiti.ac.in", "aperio.press", "press.lse.ac.uk", "press.sjms.nu", - "trystingtree.library.oregonstate.edu", "publishing.vt.edu", "universitypress.whiterose.ac.uk", - "www.winchesteruniversitypress.org", + "publishing.vt.edu", "universitypress.whiterose.ac.uk", "www.winchesteruniversitypress.org", + "utsepress.lib.uts.edu.au", "www.mwv-open.de", "hup.fi", ] class UbiquityScraper(BaseScraper):