update ubiquity sites

pull/94/head
eric 2020-06-25 14:21:37 -04:00
parent 72876831b2
commit 3ebccdbe88
3 changed files with 8 additions and 3 deletions

View File

@ -1,16 +1,18 @@
https://aperio.press/sitemap.xml https://aperio.press/sitemap.xml
https://hup.fi/sitemap.xml
https://iitikship.iiti.ac.in/sitemap.xml https://iitikship.iiti.ac.in/sitemap.xml
https://oa.finlit.fi/sitemap.xml https://oa.finlit.fi/sitemap.xml
https://oa.psupress.org/sitemap.xml https://oa.psupress.org/sitemap.xml
https://press.lse.ac.uk/sitemap.xml https://press.lse.ac.uk/sitemap.xml
https://press.sjms.nu/sitemap.xml https://press.sjms.nu/sitemap.xml
https://publishing.vt.edu/sitemap.xml https://publishing.vt.edu/sitemap.xml
https://trystingtree.library.oregonstate.edu/sitemap.xml
https://universitypress.whiterose.ac.uk/sitemap.xml https://universitypress.whiterose.ac.uk/sitemap.xml
https://utsepress.lib.uts.edu.au/sitemap.xml
https://www.humanities-map.net/sitemap.xml https://www.humanities-map.net/sitemap.xml
https://www.kriterium.se/sitemap.xml https://www.kriterium.se/sitemap.xml
https://www.larcommons.net/sitemap.xml https://www.larcommons.net/sitemap.xml
https://www.luminosoa.org/sitemap.xml https://www.luminosoa.org/sitemap.xml
https://www.mwv-open.de/sitemap.xml
https://www.stockholmuniversitypress.se/sitemap.xml https://www.stockholmuniversitypress.se/sitemap.xml
https://www.ubiquitypress.com/sitemap.xml https://www.ubiquitypress.com/sitemap.xml
https://www.uwestminsterpress.co.uk/sitemap.xml https://www.uwestminsterpress.co.uk/sitemap.xml

View File

@ -1,4 +1,5 @@
import logging import logging
from ssl import SSLError
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -41,6 +42,8 @@ def scrape_sitemap(url, maxnum=None):
yield scraper yield scraper
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(e) logger.error(e)
except SSLError as e:
logger.error(e)
def add_by_webpage(url, work=None, user=None): def add_by_webpage(url, work=None, user=None):
if not url: if not url:

View File

@ -9,8 +9,8 @@ HAS_EDS = re.compile(r'\(eds?\.\)')
UBIQUITY_HOSTS = ["ubiquitypress.com", "kriterium.se", "oa.finlit.fi", "humanities-map.net", UBIQUITY_HOSTS = ["ubiquitypress.com", "kriterium.se", "oa.finlit.fi", "humanities-map.net",
"oa.psupress.org", "larcommons.net", "uwestminsterpress.co.uk", "stockholmuniversitypress.se", "oa.psupress.org", "larcommons.net", "uwestminsterpress.co.uk", "stockholmuniversitypress.se",
"luminosoa.org", "iitikship.iiti.ac.in", "aperio.press", "press.lse.ac.uk", "press.sjms.nu", "luminosoa.org", "iitikship.iiti.ac.in", "aperio.press", "press.lse.ac.uk", "press.sjms.nu",
"trystingtree.library.oregonstate.edu", "publishing.vt.edu", "universitypress.whiterose.ac.uk", "publishing.vt.edu", "universitypress.whiterose.ac.uk", "www.winchesteruniversitypress.org",
"www.winchesteruniversitypress.org", "utsepress.lib.uts.edu.au", "www.mwv-open.de", "hup.fi",
] ]
class UbiquityScraper(BaseScraper): class UbiquityScraper(BaseScraper):