2014-10-17 21:14:02 +00:00
|
|
|
import pymarc
|
|
|
|
import logging
|
|
|
|
from datetime import datetime
|
|
|
|
from StringIO import StringIO
|
|
|
|
|
|
|
|
from django.conf import settings
|
2014-10-27 20:57:35 +00:00
|
|
|
from django.contrib.auth.models import User
|
2014-10-17 21:14:02 +00:00
|
|
|
from django.db import models
|
|
|
|
|
2014-10-20 20:57:20 +00:00
|
|
|
from . import load
|
|
|
|
|
2014-10-17 21:14:02 +00:00
|
|
|
# weak coupling
|
|
|
|
EDITION_MODEL = "core.Edition"
|
2014-11-05 23:39:57 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
2014-10-17 21:14:02 +00:00
|
|
|
|
2015-07-27 20:22:04 +00:00
|
|
|
marc_rels = {
|
|
|
|
'aut': 'author',
|
|
|
|
"adp": "adapter",
|
|
|
|
"aft": "author_of_afterword",
|
|
|
|
"ann": "annotator",
|
|
|
|
"arr": "arranger",
|
|
|
|
"art": "artist",
|
|
|
|
"aui": "author_of_introduction",
|
|
|
|
"clb": "collaborator",
|
|
|
|
"cmm": "commentator",
|
|
|
|
"cmp": "composer",
|
|
|
|
"cnd": "conductor",
|
|
|
|
"com": "compiler",
|
|
|
|
"ctb": "contributor",
|
|
|
|
"dub": "dubious_author",
|
2015-07-28 05:29:55 +00:00
|
|
|
"edt": "editor",
|
2015-07-27 20:22:04 +00:00
|
|
|
"egr": "engineer",
|
|
|
|
"ill": "illustrator",
|
|
|
|
"lbt": "librettist",
|
|
|
|
"oth": "other_contributor",
|
|
|
|
"pbl": "publisher_contributor",
|
|
|
|
"pht": "photographer",
|
|
|
|
"prf": "performer",
|
|
|
|
"prt": "printer",
|
|
|
|
"res": "researcher",
|
|
|
|
"trc": "transcriber",
|
|
|
|
"trl": "translator",
|
|
|
|
"unk": "unknown_contributor",
|
|
|
|
}
|
|
|
|
|
2014-10-20 20:57:20 +00:00
|
|
|
class AbstractEdition:
|
|
|
|
# define the methods and attributes an edition should have
|
|
|
|
isbn_13 = ''
|
|
|
|
oclc = ''
|
|
|
|
license = None
|
|
|
|
funding_info = ''
|
|
|
|
description = ''
|
|
|
|
publisher = ''
|
|
|
|
title = ''
|
|
|
|
publication_date = ''
|
2014-10-17 21:14:02 +00:00
|
|
|
|
|
|
|
# the edition should be able to report ebook downloads, with should have format and url attributes
|
|
|
|
def downloads(self):
|
|
|
|
return []
|
|
|
|
|
2014-10-20 20:57:20 +00:00
|
|
|
# the edition should be able to report an "ebook via" url
|
2014-10-17 21:14:02 +00:00
|
|
|
def download_via_url(self):
|
|
|
|
return []
|
|
|
|
|
2014-10-20 20:57:20 +00:00
|
|
|
# these should be last name first
|
|
|
|
def authnames(self):
|
|
|
|
return []
|
2014-11-05 23:39:57 +00:00
|
|
|
|
|
|
|
# gets the right edition
|
|
|
|
@staticmethod
|
|
|
|
def get_by_isbn(isbn):
|
|
|
|
return None
|
|
|
|
|
2014-10-18 02:48:51 +00:00
|
|
|
def _xml(record):
|
|
|
|
return pymarc.record_to_xml(record)
|
|
|
|
|
|
|
|
def _mrc(record):
|
|
|
|
mrc_file = StringIO()
|
|
|
|
writer = pymarc.MARCWriter(mrc_file)
|
|
|
|
writer.write(record)
|
|
|
|
mrc_file.seek(0)
|
|
|
|
return mrc_file.read()
|
|
|
|
|
2014-10-17 21:14:02 +00:00
|
|
|
class MARCRecord(models.Model):
|
|
|
|
# the record goes here
|
|
|
|
guts = models.TextField()
|
|
|
|
|
2014-10-20 20:57:20 +00:00
|
|
|
#storage for parsed guts
|
|
|
|
_the_record = None
|
|
|
|
|
2014-10-17 21:14:02 +00:00
|
|
|
# note capitalization of related_name
|
|
|
|
edition = models.ForeignKey(EDITION_MODEL, related_name="MARCRecords", null=True)
|
2014-10-27 20:57:35 +00:00
|
|
|
|
|
|
|
user = models.ForeignKey(User, related_name="MARCRecords", null=True )
|
|
|
|
created = models.DateTimeField(auto_now_add=True)
|
|
|
|
|
2014-10-20 20:57:20 +00:00
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
2014-11-05 23:39:57 +00:00
|
|
|
_the_record = kwargs.pop('the_record', None)
|
2014-10-20 20:57:20 +00:00
|
|
|
super(MARCRecord, self).__init__( *args, **kwargs)
|
|
|
|
edition = kwargs.pop('edition', None)
|
|
|
|
guts = kwargs.pop('guts', None)
|
|
|
|
if edition and not guts:
|
|
|
|
#make a stub _the_record from the edition
|
|
|
|
self._the_record = load.stub(edition)
|
|
|
|
|
2014-10-17 21:14:02 +00:00
|
|
|
@property
|
|
|
|
def accession(self):
|
|
|
|
zeroes = 9 - len(str(self.id))
|
|
|
|
return 'ung' + zeroes*'0' + str(self.id)
|
2014-10-20 20:57:20 +00:00
|
|
|
|
|
|
|
def save(self, *args, **kwargs):
|
|
|
|
if self.id == None and self._the_record:
|
|
|
|
# get the id first, add assession number
|
|
|
|
self.guts = ''
|
|
|
|
super(MARCRecord, self).save(*args, **kwargs)
|
2014-11-05 23:39:57 +00:00
|
|
|
self.guts = _xml(self._the_record)
|
2014-12-09 23:44:08 +00:00
|
|
|
try:
|
|
|
|
field001 = self._the_record.get_fields('001')[0]
|
|
|
|
if field001:
|
|
|
|
self._the_record.remove_field(field001)
|
|
|
|
except IndexError:
|
|
|
|
pass
|
2014-10-20 20:57:20 +00:00
|
|
|
field001 = pymarc.Field(tag='001', data=self.accession)
|
|
|
|
self._the_record.add_ordered_field(field001)
|
|
|
|
super(MARCRecord, self).save(*args, **kwargs)
|
|
|
|
|
2014-10-27 20:57:35 +00:00
|
|
|
def load_from_file(self, source='raw'):
|
2014-10-21 03:30:03 +00:00
|
|
|
#parse guts
|
2014-10-27 20:57:35 +00:00
|
|
|
if isinstance(self.guts, str) or isinstance(self.guts, unicode):
|
|
|
|
marcfile = StringIO(self.guts)
|
|
|
|
else:
|
|
|
|
marcfile = self.guts
|
|
|
|
if source == 'loc':
|
|
|
|
self._the_record = load.from_lc(marcfile, self.edition)
|
|
|
|
else:
|
|
|
|
self._the_record = load.raw(marcfile, self.edition)
|
2014-10-21 03:30:03 +00:00
|
|
|
self.save()
|
2014-10-27 20:57:35 +00:00
|
|
|
|
2014-10-21 03:30:03 +00:00
|
|
|
|
2014-10-17 21:14:02 +00:00
|
|
|
# the record without 856
|
|
|
|
def _record(self):
|
2014-10-20 20:57:20 +00:00
|
|
|
if self._the_record:
|
|
|
|
the_record = self._the_record
|
|
|
|
else:
|
|
|
|
the_record = pymarc.parse_xml_to_array(StringIO(self.guts))[0]
|
|
|
|
for field in the_record.get_fields('856'):
|
|
|
|
the_record.remove_field(field)
|
|
|
|
self._the_record = the_record
|
2014-10-17 21:14:02 +00:00
|
|
|
return the_record
|
|
|
|
|
|
|
|
def direct_record(self):
|
|
|
|
the_record = self._record()
|
|
|
|
for book in self.edition.downloads():
|
|
|
|
field856 = pymarc.Field(
|
|
|
|
tag='856',
|
|
|
|
indicators = ['4', '0'],
|
|
|
|
subfields = [
|
|
|
|
'3', book.format + ' version',
|
|
|
|
'q', settings.CONTENT_TYPES[book.format],
|
|
|
|
'u', book.url,
|
|
|
|
]
|
|
|
|
)
|
|
|
|
the_record.add_ordered_field(field856)
|
|
|
|
return the_record
|
|
|
|
|
2014-10-18 02:48:51 +00:00
|
|
|
def direct_record_xml(self):
|
|
|
|
return _xml(self.direct_record())
|
|
|
|
|
|
|
|
def direct_record_mrc(self):
|
|
|
|
return _mrc(self.direct_record())
|
|
|
|
|
2014-10-17 21:14:02 +00:00
|
|
|
def via_record(self):
|
|
|
|
the_record = self._record()
|
|
|
|
field856_via = pymarc.Field(
|
|
|
|
tag='856',
|
|
|
|
indicators = ['4', '0'],
|
|
|
|
subfields = [
|
|
|
|
'u', self.edition.download_via_url(),
|
|
|
|
]
|
|
|
|
)
|
|
|
|
the_record.add_ordered_field(field856_via)
|
|
|
|
return the_record
|
2014-10-18 02:48:51 +00:00
|
|
|
|
|
|
|
def via_record_xml(self):
|
|
|
|
return _xml(self.via_record())
|
|
|
|
|
|
|
|
def via_record_mrc(self):
|
|
|
|
return _mrc(self.via_record())
|
2014-10-20 20:57:20 +00:00
|
|
|
|
|
|
|
def record(self, link_target='via', format='xml'):
|
|
|
|
if format == 'xml':
|
|
|
|
if link_target == 'via':
|
|
|
|
return self.via_record_xml()
|
|
|
|
elif link_target == 'direct':
|
|
|
|
return self.direct_record_xml()
|
|
|
|
elif format == 'mrc':
|
|
|
|
if link_target == 'via':
|
|
|
|
return self.via_record_mrc()
|
|
|
|
elif link_target == 'direct':
|
|
|
|
return self.direct_record_mrc()
|
2014-11-05 23:39:57 +00:00
|
|
|
|
|
|
|
#load a many records minimal change
|
|
|
|
def import_records(marcfile):
|
|
|
|
|
|
|
|
class RecordLoader(pymarc.XmlHandler):
|
|
|
|
Edition = models.get_model(*EDITION_MODEL.split('.'))
|
|
|
|
num_loaded=0
|
|
|
|
def process_record(self, record):
|
|
|
|
try:
|
|
|
|
field020 = record.get_fields('020')[0]
|
|
|
|
isbn = field020.get_subfields('a')[0]
|
|
|
|
edition = self.Edition.get_by_isbn(isbn)
|
|
|
|
if edition:
|
|
|
|
try:
|
|
|
|
mr = MARCRecord.objects.get(edition=edition)
|
|
|
|
logger.info('already have a record for %s' % isbn)
|
|
|
|
except MARCRecord.DoesNotExist:
|
|
|
|
mr = MARCRecord(edition=edition, the_record=record)
|
|
|
|
mr.save()
|
|
|
|
self.num_loaded+=1
|
|
|
|
else:
|
|
|
|
logger.info('no edition for %s' % isbn)
|
|
|
|
except IndexError:
|
|
|
|
logger.info('020 absent')
|
|
|
|
|
|
|
|
handler = RecordLoader()
|
|
|
|
pymarc.parse_xml(marcfile, handler)
|
|
|
|
return handler.num_loaded
|