update ubiquity sites
parent
72876831b2
commit
3ebccdbe88
|
@ -1,16 +1,18 @@
|
||||||
https://aperio.press/sitemap.xml
|
https://aperio.press/sitemap.xml
|
||||||
|
https://hup.fi/sitemap.xml
|
||||||
https://iitikship.iiti.ac.in/sitemap.xml
|
https://iitikship.iiti.ac.in/sitemap.xml
|
||||||
https://oa.finlit.fi/sitemap.xml
|
https://oa.finlit.fi/sitemap.xml
|
||||||
https://oa.psupress.org/sitemap.xml
|
https://oa.psupress.org/sitemap.xml
|
||||||
https://press.lse.ac.uk/sitemap.xml
|
https://press.lse.ac.uk/sitemap.xml
|
||||||
https://press.sjms.nu/sitemap.xml
|
https://press.sjms.nu/sitemap.xml
|
||||||
https://publishing.vt.edu/sitemap.xml
|
https://publishing.vt.edu/sitemap.xml
|
||||||
https://trystingtree.library.oregonstate.edu/sitemap.xml
|
|
||||||
https://universitypress.whiterose.ac.uk/sitemap.xml
|
https://universitypress.whiterose.ac.uk/sitemap.xml
|
||||||
|
https://utsepress.lib.uts.edu.au/sitemap.xml
|
||||||
https://www.humanities-map.net/sitemap.xml
|
https://www.humanities-map.net/sitemap.xml
|
||||||
https://www.kriterium.se/sitemap.xml
|
https://www.kriterium.se/sitemap.xml
|
||||||
https://www.larcommons.net/sitemap.xml
|
https://www.larcommons.net/sitemap.xml
|
||||||
https://www.luminosoa.org/sitemap.xml
|
https://www.luminosoa.org/sitemap.xml
|
||||||
|
https://www.mwv-open.de/sitemap.xml
|
||||||
https://www.stockholmuniversitypress.se/sitemap.xml
|
https://www.stockholmuniversitypress.se/sitemap.xml
|
||||||
https://www.ubiquitypress.com/sitemap.xml
|
https://www.ubiquitypress.com/sitemap.xml
|
||||||
https://www.uwestminsterpress.co.uk/sitemap.xml
|
https://www.uwestminsterpress.co.uk/sitemap.xml
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import logging
|
import logging
|
||||||
|
from ssl import SSLError
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
@ -41,6 +42,8 @@ def scrape_sitemap(url, maxnum=None):
|
||||||
yield scraper
|
yield scraper
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
|
except SSLError as e:
|
||||||
|
logger.error(e)
|
||||||
|
|
||||||
def add_by_webpage(url, work=None, user=None):
|
def add_by_webpage(url, work=None, user=None):
|
||||||
if not url:
|
if not url:
|
||||||
|
|
|
@ -9,8 +9,8 @@ HAS_EDS = re.compile(r'\(eds?\.\)')
|
||||||
UBIQUITY_HOSTS = ["ubiquitypress.com", "kriterium.se", "oa.finlit.fi", "humanities-map.net",
|
UBIQUITY_HOSTS = ["ubiquitypress.com", "kriterium.se", "oa.finlit.fi", "humanities-map.net",
|
||||||
"oa.psupress.org", "larcommons.net", "uwestminsterpress.co.uk", "stockholmuniversitypress.se",
|
"oa.psupress.org", "larcommons.net", "uwestminsterpress.co.uk", "stockholmuniversitypress.se",
|
||||||
"luminosoa.org", "iitikship.iiti.ac.in", "aperio.press", "press.lse.ac.uk", "press.sjms.nu",
|
"luminosoa.org", "iitikship.iiti.ac.in", "aperio.press", "press.lse.ac.uk", "press.sjms.nu",
|
||||||
"trystingtree.library.oregonstate.edu", "publishing.vt.edu", "universitypress.whiterose.ac.uk",
|
"publishing.vt.edu", "universitypress.whiterose.ac.uk", "www.winchesteruniversitypress.org",
|
||||||
"www.winchesteruniversitypress.org",
|
"utsepress.lib.uts.edu.au", "www.mwv-open.de", "hup.fi",
|
||||||
]
|
]
|
||||||
|
|
||||||
class UbiquityScraper(BaseScraper):
|
class UbiquityScraper(BaseScraper):
|
||||||
|
|
Loading…
Reference in New Issue