regluit/marc/models.py

234 lines
7.1 KiB
Python
Raw Normal View History

import pymarc
import logging
from datetime import datetime
from StringIO import StringIO
from django.apps import apps
from django.conf import settings
from django.db import models
2014-10-20 20:57:20 +00:00
from . import load
# weak coupling
2016-07-26 14:34:45 +00:00
EDITION_MODEL = getattr(settings, "EDITION_MODEL", "core.Edition")
2014-11-05 23:39:57 +00:00
logger = logging.getLogger(__name__)
marc_rels = {
'aut': 'author',
"adp": "adapter",
"aft": "author_of_afterword",
"ann": "annotator",
"arr": "arranger",
"art": "artist",
"aui": "author_of_introduction",
"clb": "collaborator",
"cmm": "commentator",
"cmp": "composer",
"cnd": "conductor",
"com": "compiler",
"ctb": "contributor",
"dsr": "designer",
"dub": "dubious_author",
2015-07-28 05:29:55 +00:00
"edt": "editor",
"egr": "engineer",
"ill": "illustrator",
"lbt": "librettist",
"oth": "other_contributor",
"pbl": "publisher_contributor",
"pht": "photographer",
"prf": "performer",
"prt": "printer",
"res": "researcher",
"trc": "transcriber",
"trl": "translator",
"unk": "unknown_contributor",
}
2015-07-30 03:01:43 +00:00
inverse_marc_rels = {v:k for k,v in marc_rels.items()}
2014-10-20 20:57:20 +00:00
class AbstractEdition:
# define the methods and attributes an edition should have
isbn_13 = ''
oclc = ''
license = None
funding_info = ''
description = ''
publisher = ''
title = ''
publication_date = ''
2016-08-16 21:16:44 +00:00
note = ''
# the edition should be able to report ebook downloads, with should have format and url attributes
def downloads(self):
return []
2014-10-20 20:57:20 +00:00
# the edition should be able to report an "ebook via" url
def download_via_url(self):
return []
2014-10-20 20:57:20 +00:00
# these should be last name first
def authnames(self):
return []
2014-11-05 23:39:57 +00:00
# gets the right edition
@staticmethod
def get_by_isbn(isbn):
return None
2014-10-18 02:48:51 +00:00
def _xml(record):
return pymarc.record_to_xml(record)
def _mrc(record):
mrc_file = StringIO()
writer = pymarc.MARCWriter(mrc_file)
writer.write(record)
mrc_file.seek(0)
return mrc_file.read()
class MARCRecord(models.Model):
# the record goes here
guts = models.TextField()
2014-10-20 20:57:20 +00:00
#storage for parsed guts
_the_record = None
# note capitalization of related_name
edition = models.ForeignKey(EDITION_MODEL, on_delete=models.CASCADE, related_name="MARCRecords", null=True)
2014-10-27 20:57:35 +00:00
user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, related_name="MARCRecords", null=True )
2014-10-27 20:57:35 +00:00
created = models.DateTimeField(auto_now_add=True)
2014-10-20 20:57:20 +00:00
def __init__(self, *args, **kwargs):
2014-11-05 23:39:57 +00:00
_the_record = kwargs.pop('the_record', None)
2014-10-20 20:57:20 +00:00
super(MARCRecord, self).__init__( *args, **kwargs)
edition = kwargs.pop('edition', None)
guts = kwargs.pop('guts', None)
if edition and not guts:
#make a stub _the_record from the edition
self._the_record = load.stub(edition)
@property
def accession(self):
zeroes = 9 - len(str(self.id))
return 'ung' + zeroes*'0' + str(self.id)
2014-10-20 20:57:20 +00:00
def save(self, *args, **kwargs):
if self.id == None and self._the_record:
# get the id first, add assession number
self.guts = ''
super(MARCRecord, self).save(*args, **kwargs)
2014-11-05 23:39:57 +00:00
self.guts = _xml(self._the_record)
2014-12-09 23:44:08 +00:00
try:
field001 = self._the_record.get_fields('001')[0]
if field001:
self._the_record.remove_field(field001)
except IndexError:
pass
2014-10-20 20:57:20 +00:00
field001 = pymarc.Field(tag='001', data=self.accession)
self._the_record.add_ordered_field(field001)
super(MARCRecord, self).save(*args, **kwargs)
2014-10-27 20:57:35 +00:00
def load_from_file(self, source='raw'):
2014-10-21 03:30:03 +00:00
#parse guts
2014-10-27 20:57:35 +00:00
if isinstance(self.guts, str) or isinstance(self.guts, unicode):
marcfile = StringIO(self.guts)
else:
marcfile = self.guts
if source == 'loc':
self._the_record = load.from_lc(marcfile, self.edition)
else:
self._the_record = load.raw(marcfile, self.edition)
2014-10-21 03:30:03 +00:00
self.save()
2014-10-27 20:57:35 +00:00
2014-10-21 03:30:03 +00:00
# the record without 856
def _record(self):
2014-10-20 20:57:20 +00:00
if self._the_record:
the_record = self._the_record
else:
the_record = pymarc.parse_xml_to_array(StringIO(self.guts))[0]
for field in the_record.get_fields('856'):
the_record.remove_field(field)
self._the_record = the_record
return the_record
def direct_record(self):
the_record = self._record()
for book in self.edition.downloads():
field856 = pymarc.Field(
tag='856',
indicators = ['4', '0'],
subfields = [
'3', book.format + ' version',
'q', settings.CONTENT_TYPES[book.format],
'u', book.url,
]
)
the_record.add_ordered_field(field856)
return the_record
2014-10-18 02:48:51 +00:00
def direct_record_xml(self):
return _xml(self.direct_record())
def direct_record_mrc(self):
return _mrc(self.direct_record())
def via_record(self):
the_record = self._record()
field856_via = pymarc.Field(
tag='856',
indicators = ['4', '0'],
subfields = [
'u', self.edition.download_via_url(),
]
)
the_record.add_ordered_field(field856_via)
return the_record
2014-10-18 02:48:51 +00:00
def via_record_xml(self):
return _xml(self.via_record())
def via_record_mrc(self):
return _mrc(self.via_record())
2014-10-20 20:57:20 +00:00
def record(self, link_target='via', format='xml'):
if format == 'xml':
if link_target == 'via':
return self.via_record_xml()
elif link_target == 'direct':
return self.direct_record_xml()
elif format == 'mrc':
if link_target == 'via':
return self.via_record_mrc()
elif link_target == 'direct':
return self.direct_record_mrc()
2014-11-05 23:39:57 +00:00
#load a many records minimal change
def import_records(marcfile):
class RecordLoader(pymarc.XmlHandler):
Edition = apps.get_model(*EDITION_MODEL.split('.'))
2014-11-05 23:39:57 +00:00
num_loaded=0
def process_record(self, record):
try:
field020 = record.get_fields('020')[0]
isbn = field020.get_subfields('a')[0]
edition = self.Edition.get_by_isbn(isbn)
if edition:
try:
mr = MARCRecord.objects.get(edition=edition)
logger.info('already have a record for %s' % isbn)
except MARCRecord.DoesNotExist:
mr = MARCRecord(edition=edition, the_record=record)
mr.save()
self.num_loaded+=1
else:
logger.info('no edition for %s' % isbn)
except IndexError:
logger.info('020 absent')
handler = RecordLoader()
pymarc.parse_xml(marcfile, handler)
return handler.num_loaded