87 lines
3.2 KiB
Python
87 lines
3.2 KiB
Python
"""
|
|
doab_utils.py
|
|
|
|
"""
|
|
import logging
|
|
import re
|
|
from ssl import SSLError
|
|
from urllib.parse import urljoin
|
|
|
|
import requests
|
|
|
|
from oaipmh.metadata import MetadataReader
|
|
|
|
from django.conf import settings
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
doab_reader = MetadataReader(
|
|
fields={
|
|
'title': ('textList', 'oai_dc:dc/datacite:title/text()'),
|
|
'creator': ('textList', 'oai_dc:dc/datacite:creator/text()'),
|
|
'subject': ('textList', 'oai_dc:dc/datacite:subject/text()'),
|
|
'description': ('textList', 'oai_dc:dc/dc:description/text()'),
|
|
'publisher': ('textList', 'oai_dc:dc/dc:publisher/text()'),
|
|
'editor': ('textList', 'oai_dc:dc/datacite:contributor[@type="Editor"]/text()'),
|
|
'date': ('textList', 'oai_dc:dc/datacite:date[@type="Issued"]/text()'),
|
|
'timestamp': ('textList', 'oai_dc:dc/dc:date/text()'),
|
|
'type': ('textList', 'oai_dc:dc/oaire:resourceType/text()'),
|
|
'format': ('textList', 'oai_dc:dc/dc:format/text()'),
|
|
'identifier': ('textList', 'oai_dc:dc/dc:identifier/text()'),
|
|
'source': ('textList', 'oai_dc:dc/dc:source/text()'),
|
|
'language': ('textList', 'oai_dc:dc/dc:language/text()'),
|
|
'relation': ('textList', 'oai_dc:dc/dc:relation/text()'),
|
|
'coverage': ('textList', 'oai_dc:dc/dc:coverage/text()'),
|
|
'rights': ('textList', 'oai_dc:dc/oaire:licenseCondition/@uri'),
|
|
'isbn': ('textList', 'oai_dc:dc/datacite:alternateIdentifier[@type="ISBN"]/text()'),
|
|
'doi': ('textList', 'oai_dc:dc/datacite:alternateIdentifier[@type="DOI"]/text()'),
|
|
},
|
|
namespaces={
|
|
'oai_dc': 'http://www.openarchives.org/OAI/2.0/oai_dc/',
|
|
'dc' : 'http://purl.org/dc/elements/1.1/',
|
|
'grantor': 'http://purl.org/dc/elements/1.1/',
|
|
'publisher': 'http://purl.org/dc/elements/1.1/',
|
|
'oapen': 'http://purl.org/dc/elements/1.1/',
|
|
'oaire': 'https://raw.githubusercontent.com/rcic/openaire4/master/schemas/4.0/oaire.xsd',
|
|
'datacite': 'https://schema.datacite.org/meta/kernel-4.1/metadata.xsd',
|
|
'doc': 'http://www.lyncode.com/xoai'
|
|
}
|
|
)
|
|
|
|
|
|
|
|
|
|
STREAM_QUERY = 'https://directory.doabooks.org/rest/search?query=handle:{}&expand=bitstreams'
|
|
|
|
def get_streamdata(handle):
|
|
url = STREAM_QUERY.format(handle)
|
|
try:
|
|
response = requests.get(url, headers={"User-Agent": settings.USER_AGENT})
|
|
items = response.json()
|
|
if items:
|
|
for stream in items[0]['bitstreams']:
|
|
if stream['bundleName'] == "THUMBNAIL":
|
|
stream['handle'] = handle
|
|
return stream
|
|
else:
|
|
logger.error("No items in streamdata for %s", handle)
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(e)
|
|
except SSLError as e:
|
|
logger.error(e)
|
|
except ValueError as e:
|
|
# decoder error
|
|
logger.error(e)
|
|
|
|
COVER_FSTRING = "https://directory.doabooks.org/bitstream/handle/{handle}/{name}?sequence={sequenceId}&isAllowed=y"
|
|
def doab_cover(doab_id):
|
|
stream_data = get_streamdata(doab_id)
|
|
if not stream_data:
|
|
logger.error('get_streamdata failed for %s', doab_id)
|
|
return None
|
|
return COVER_FSTRING.format(**stream_data)
|
|
|