regluit/experimental/oai2json.py

39 lines
1.0 KiB
Python
Raw Normal View History

#!/usr/bin/env python
import re
import sys
import json
import urllib
import requests
from oaipmh.client import Client
from oaipmh.metadata import MetadataRegistry, oai_dc_reader
def lookup(oai_url, set_name):
registry = MetadataRegistry()
registry.registerReader('oai_dc', oai_dc_reader)
client = Client(oai_url, registry)
for header, record, other in client.listRecords(metadataPrefix='oai_dc', set=set_name):
if not record:
continue
title = record.getField('title')[0]
authors = record.getField('creator')
urls = record.getField('identifier')
subjects = record.getField('subject')
id = header.identifier()
record = {
"id": id,
"title": title,
"authors": authors,
"subjects": subjects,
"urls": urls,
}
print json.dumps(record)
if __name__ == "__main__":
oai_url = sys.argv[1]
set_name = None
if len(sys.argv) > 2:
set_name = sys.argv[2]
lookup(oai_url, set_name)