regluit/core/marc.py

"""
This takes a MARCXML filename as an argument and converts it into
MARC records for the unglued edition (in .xml and .mrc formats).
Consider it a catalogolem: http://commons.wikimedia.org/wiki/File:Arcimboldo_Librarian_Stokholm.jpg
Use the MARCXML file for the non-unglued edition from Library of Congress.
"""

import pymarc
import logging
from copy import deepcopy
from datetime import datetime
from StringIO import StringIO

from django.conf import settings
from django.core.files.storage import default_storage
from django.core.urlresolvers import reverse

import regluit.core.cc as cc
from regluit.core import models

def makestub(edition):
    return makemarc(None,  edition)


def makemarc(marcfile,  edition):
    logger = logging.getLogger(__name__)
    
    try:
        license = edition.ebooks.all()[0].rights
        ebf = None
    except IndexError:
        license = None
        try:
            ebf = edition.ebook_files.all()[0]
        except IndexError:
            # no record if no ebooks
            return None
    
    logger.info("Making MARC records for edition %s " % edition)
    
    # save lccn for later (if there is one) before deleting it
    print_lccn = None
    if marcfile:
        record = pymarc.parse_xml_to_array(marcfile)[0]
        for lccn in record.get_fields('010'):
            for validlccn in lccn.get_subfields('a'):
                print_lccn = validlccn
        fields_to_delete = []
        fields_to_delete += record.get_fields('001')
        fields_to_delete += record.get_fields('003')
        fields_to_delete += record.get_fields('005')
        fields_to_delete += record.get_fields('006')
        fields_to_delete += record.get_fields('007')
        fields_to_delete += record.get_fields('010')
        fields_to_delete += record.get_fields('040')
        for field in fields_to_delete:
            record.remove_field(field)
    else:
        record = pymarc.Record()

        
    # create accession number and write 001 field 
    # (control field syntax is special)
    if ebf:
        (marc_record, created) = models.MARCRecord.objects.get_or_create(edition=edition,link_target='B2U')
    else:
        (marc_record, created) = models.MARCRecord.objects.get_or_create(edition=edition,link_target='UNGLUE')
    field001 = pymarc.Field(tag='001', data=marc_record.accession)
    record.add_ordered_field(field001)

    # add field indicating record originator
    field003 = pymarc.Field(tag='003', data='UnglueIt')
    record.add_ordered_field(field003)
    
    # update timestamp of record
    now = datetime.now()
    datestamp = now.strftime('%Y%m%d%H%M%S') + '.0'
    field005 = pymarc.Field(tag='005', data=datestamp)
    record.add_ordered_field(field005)

    # change 006, 007, 008 because this is an online resource
    field006 = pymarc.Field(
        tag='006',
        data='m     o  d        '
    )
    record.add_ordered_field(field006)
    
    field007 = pymarc.Field(
        tag='007',
        data='cr'
    )
    record.add_ordered_field(field007)
    
    try:
        field008 = record.get_fields('008')[0]
        record.remove_field(field008)
        old_field_value = field008.value()
        new_field_value = old_field_value[:23] + 'o' + old_field_value[24:]
    except IndexError:
        # fun fun fun 
        new_field_value= now.strftime('%y%m%d')+'s'
        if len(edition.publication_date)>3:
            new_field_value += edition.publication_date[0:4]
        else:
            new_field_value += '||||'
        new_field_value += '||||xx |||||o|||||||||||eng||'
    field008 = pymarc.Field(tag='008', data=new_field_value)
    record.add_ordered_field(field008)   
        
    # add IBSN for ebook where applicable; relegate print ISBN to $z
    isbn = ''
    try:
        isbn = edition.identifiers.filter(type='isbn')[0].value
    except IndexError:
        pass
    try:
        field020 = record.get_fields('020')[0]
        print_isbn = field020.get_subfields('a')[0]
        field020.delete_subfield('a')
        if isbn:
            field020.add_subfield('a', isbn)
        field020.add_subfield('z', print_isbn)
    except IndexError:
        print_isbn = None

    # change 050 and 082 indicators because LOC is no longer responsible for these
    # no easy indicator change function, so we'll just reconstruct the fields
    try:
        field050 = record.get_fields('050')[0]
        field050_new = field050
        field050_new.indicators = [' ', '4']
        record.remove_field(field050)
        record.add_ordered_field(field050_new)
    except:
        pass # if no 050 field, don't need to change indicator
    
    try:
        field082 = record.get_fields('082')[0]
        field082_new = field082
        field082_new.indicators = [' ', '4']
        record.remove_field(field082)
        record.add_ordered_field(field082_new)
    except:
        pass # if no 082 field, don't need to change indicator
    
    # author name
    try:
        field100 = record.get_fields('100')[0]
    except IndexError:
        num_auths = edition.authors.count()
        if num_auths:
            field100 = pymarc.Field(
                tag='100',
                indicators = ['1', ' '],
                subfields = [
                    'a', edition.authors.all()[0].last_name_first,
                ]
            )
            record.add_ordered_field(field100)
        if num_auths > 1:
            for auth in edition.authors.all()[1:]:
                field = pymarc.Field(
                    tag='700',
                    indicators = ['1', ' '],
                    subfields = [
                        'a', auth.last_name_first,
                        'e', 'joint author.',
                    ]
                )
                record.add_ordered_field(field)
    # add subfield to 245 indicating format
    try:
        field245 = record.get_fields('245')[0]
    except IndexError:
        field245 = pymarc.Field(
            tag='245',
            indicators = ['1', '0'],
            subfields = [
                'a', edition.title,
            ]
        )
        record.add_ordered_field(field245)
    field245.add_subfield('a', '[electronic resource]')
    
    # publisher, date
    try:
        field260 = record.get_fields('260')[0]
    except IndexError:
        field260 = pymarc.Field(
            tag='260',
            indicators = [' ', ' '],
            subfields = [
                'b', edition.publisher_name.name,
                'c', unicode(edition.publication_date),
            ]
        )
        record.add_ordered_field(field260)
    
    # modify 300 field (physical description)
    try:
        field300 = record.get_fields('300')[0]
        subfield_a = field300.get_subfields('a')[0]
        if (
            subfield_a[-2:] == ' ;' or 
            subfield_a[-2:] == ' :' or 
            subfield_a[-2:] == ' +'
        ):
            subfield_a = subfield_a[:-2]
        new300a = '1 online resource (' + subfield_a + ')'
        if field300.get_subfields('b'):
            new300a += ' :'
        field300.delete_subfield('a')
        field300.add_subfield('a', new300a)
        field300.delete_subfield('c')
    except:
        pass
    
    if license:
        # add 536 field (funding information)
        if edition.unglued:
            funding_info = 'The book is available as a free download thanks to the generous support of interested readers and organizations, who made donations using the crowd-funding website Unglue.it.'
        else:
            if edition.ebooks.all()[0].rights in cc.LICENSE_LIST:
                funding_info = 'The book is available as a free download thanks to a Creative Commons license.'
            else:
                funding_info = 'The book is available as a free download because it is in the Public Domain.'
        field536 = pymarc.Field(
            tag='536',
            indicators = [' ', ' '],
            subfields = [
                'a', funding_info,
            ]
        )
        record.add_ordered_field(field536)
    
        # add 540 field (terms governing use)
        field540 = pymarc.Field(
            tag='540',
            indicators = [' ', ' '],
            subfields = [
                'a', dict(cc.CHOICES)[license],
                'u', dict(cc.GRANTS)[license], 
            ]
        )
        record.add_ordered_field(field540)

    # add 588 field (source of description) - credit where credit is due
    if print_lccn:
        field588 = pymarc.Field(
            tag='588',
            indicators = [' ', ' '],
            subfields = [
                'a', 'Description based on print version record from the Library of Congress.',
            ]
        )
        record.add_ordered_field(field588)
    
    # add 776 field (related editions) - preserve pISBN, LCCN, OCLCnum
    title = record.get_fields('245')[0].get_subfields('a')[0]
    title = title.split('/')[0]
    try:
        oclcnum = edition.identifiers.filter(type='oclc')[0].value
    except IndexError:
        oclcnum = None
    
    subfields = ['i', 'Print version: ','t', title,]
    
    if print_isbn:
        subfields.extend(['z', print_isbn])
    elif isbn:
        subfields.extend(['z', isbn])
    if print_lccn:
        subfields.extend(['w', '(DLC) ' + print_lccn, ])
    if oclcnum:
        subfields.extend(['w', '(OCoLC) ' + oclcnum,])

    field776 = pymarc.Field(
        tag='776',
        indicators = ['0', '8'],
        subfields = subfields
    )
    
    record.add_ordered_field(field776)
    """
    add 776 fields
    indicators: 0 8
    '$i Print version: '
    $t Title. <--note space
    $d is optional
    $z pISBN goes here
        harvest from 020 (was moved from $a to $z)
    $w (DLC) LCCN_goes_here
        harvest from 010 field before deletion
    $w (OCoLC) OCLCnum_goes_here
        harvest from identifiers db
    """

    # strip any 9XX fields (they're for local use)    
    for i in range(900, 1000):
        fields = record.get_fields(str(i))
        for field in fields:
            record.remove_field(field)
    
    # add 856 fields with links for each available file
    # doing this out of order as it's the only thing that differs
    # between direct-link and via-unglue.it versions
    if not ebf:
        # need deepcopy() because omg referential transparency!
        record_direct = deepcopy(record)  # 2 records for unglued stuff
    
        for format_tuple in settings.FORMATS:
            format = format_tuple[0]
            ebooks = edition.ebooks.filter(format=format)
            if ebooks:
                for book in ebooks:
                    field856 = pymarc.Field(
                        tag='856',
                        indicators = ['4', '0'],
                        subfields = [
                            '3', format + ' version',
                            'q', settings.CONTENT_TYPES[format],
                            'u', book.url,
                        ]
                    )
                    record_direct.add_ordered_field(field856)
                
    unglued_url = settings.BASE_URL_SECURE + reverse('download', args=[edition.work.id])
    field856_via = pymarc.Field(
        tag='856',
        indicators = ['4', '0'],
        subfields = [
            'u', unglued_url,
        ]
    )
    record.add_ordered_field(field856_via)

    if not ebf:
        # this via_unglueit record needs its own accession number
        field001 = record_direct.get_fields('001')[0]
        record_direct.remove_field(field001)
        (marc_record_direct, created) = models.MARCRecord.objects.get_or_create(edition=edition,link_target='DIRECT')
        field001 = pymarc.Field(tag='001', data=marc_record_direct.accession)
        record_direct.add_ordered_field(field001)

        # write the unglued MARCxml records
        xmlrecord = pymarc.record_to_xml(record_direct)
        xml_file = default_storage.open(marc_record_direct.xml_record, 'w')
        xml_file.write(xmlrecord)
        xml_file.close()
    
        # write the unglued .mrc records, then save to s3
        mrc_file = default_storage.open(marc_record_direct.mrc_record, 'w')
        writer = pymarc.MARCWriter(mrc_file)
        writer.write(record_direct)
        mrc_file.close()

    xmlrecord = pymarc.record_to_xml(record)
    xml_file = default_storage.open(marc_record.xml_record, 'w')
    xml_file.write(xmlrecord)
    xml_file.close()

    mrc_file = default_storage.open(marc_record.mrc_record, 'w')
    writer = pymarc.MARCWriter(mrc_file)
    writer.write(record)
    mrc_file.close()
    
    return marc_record.pk
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`"""`
			`This takes a MARCXML filename as an argument and converts it into`
			`MARC records for the unglued edition (in .xml and .mrc formats).`
parametrizing formats and license choices, bugfixing 2013-07-17 17:03:35 +00:00			`Consider it a catalogolem: http://commons.wikimedia.org/wiki/File:Arcimboldo_Librarian_Stokholm.jpg`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`Use the MARCXML file for the non-unglued edition from Library of Congress.`
			`"""`

			`import pymarc`
basic stub records 2014-10-14 14:08:08 +00:00			`import logging`
write records that link through unglueit 2013-07-23 13:41:55 +00:00			`from copy import deepcopy`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`from datetime import datetime`
			`from StringIO import StringIO`

parametrizing formats and license choices, bugfixing 2013-07-17 17:03:35 +00:00			`from django.conf import settings`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`from django.core.files.storage import default_storage`
write records that link through unglueit 2013-07-23 13:41:55 +00:00			`from django.core.urlresolvers import reverse`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00
move CC constants into a module 2014-05-08 14:21:50 +00:00			`import regluit.core.cc as cc`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`from regluit.core import models`

basic stub records 2014-10-14 14:08:08 +00:00			`def makestub(edition):`
			`return makemarc(None, edition)`


get license from database, not from the record submitter 2013-07-26 23:52:15 +00:00			`def makemarc(marcfile, edition):`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`logger = logging.getLogger(__name__)`
Adapt marc record machinery to B2U titles 2013-09-23 16:39:47 +00:00
			`try:`
			`license = edition.ebooks.all()[0].rights`
			`ebf = None`
			`except IndexError:`
			`license = None`
basic stub records 2014-10-14 14:08:08 +00:00			`try:`
			`ebf = edition.ebook_files.all()[0]`
			`except IndexError:`
			`# no record if no ebooks`
			`return None`
Adapt marc record machinery to B2U titles 2013-09-23 16:39:47 +00:00
			`logger.info("Making MARC records for edition %s " % edition)`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00
fixed error when record is not from LC 2014-09-12 15:49:07 +00:00			`# save lccn for later (if there is one) before deleting it`
			`print_lccn = None`
basic stub records 2014-10-14 14:08:08 +00:00			`if marcfile:`
			`record = pymarc.parse_xml_to_array(marcfile)[0]`
			`for lccn in record.get_fields('010'):`
			`for validlccn in lccn.get_subfields('a'):`
			`print_lccn = validlccn`
			`fields_to_delete = []`
			`fields_to_delete += record.get_fields('001')`
			`fields_to_delete += record.get_fields('003')`
			`fields_to_delete += record.get_fields('005')`
			`fields_to_delete += record.get_fields('006')`
			`fields_to_delete += record.get_fields('007')`
			`fields_to_delete += record.get_fields('010')`
			`fields_to_delete += record.get_fields('040')`
			`for field in fields_to_delete:`
			`record.remove_field(field)`
			`else:`
			`record = pymarc.Record()`
add 776 field to preserve pISBN, OCLCnum, LCCN 2013-07-25 15:09:39 +00:00
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00
			`# create accession number and write 001 field`
			`# (control field syntax is special)`
Adapt marc record machinery to B2U titles 2013-09-23 16:39:47 +00:00			`if ebf:`
			`(marc_record, created) = models.MARCRecord.objects.get_or_create(edition=edition,link_target='B2U')`
			`else:`
			`(marc_record, created) = models.MARCRecord.objects.get_or_create(edition=edition,link_target='UNGLUE')`
cleaned up marc file storage issues before implementing b2u MIGRATION! 1. multi-record marc files no longer saved, sent to user directly 2. works with both s3 and file system- default storage by name used for both read and write. 3. stupid storage of computed filenames/urls eliminated 4. removed verbose logging 5. no need to have pymarc write to a StringIO 6. the MARCRecord.clean method didn't do anything 2013-09-23 04:34:51 +00:00			`field001 = pymarc.Field(tag='001', data=marc_record.accession)`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`record.add_ordered_field(field001)`

			`# add field indicating record originator`
			`field003 = pymarc.Field(tag='003', data='UnglueIt')`
			`record.add_ordered_field(field003)`

			`# update timestamp of record`
			`now = datetime.now()`
			`datestamp = now.strftime('%Y%m%d%H%M%S') + '.0'`
			`field005 = pymarc.Field(tag='005', data=datestamp)`
			`record.add_ordered_field(field005)`

			`# change 006, 007, 008 because this is an online resource`
			`field006 = pymarc.Field(`
			`tag='006',`
			`data='m o d '`
			`)`
			`record.add_ordered_field(field006)`
fix url-writing bug 2013-07-24 14:19:21 +00:00
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`field007 = pymarc.Field(`
			`tag='007',`
			`data='cr'`
			`)`
			`record.add_ordered_field(field007)`

basic stub records 2014-10-14 14:08:08 +00:00			`try:`
			`field008 = record.get_fields('008')[0]`
			`record.remove_field(field008)`
			`old_field_value = field008.value()`
			`new_field_value = old_field_value[:23] + 'o' + old_field_value[24:]`
			`except IndexError:`
add more metadata from db 008, authors, publishers, isbn, pub date from db 2014-10-16 21:14:54 +00:00			`# fun fun fun`
			`new_field_value= now.strftime('%y%m%d')+'s'`
			`if len(edition.publication_date)>3:`
			`new_field_value += edition.publication_date[0:4]`
			`else:`
			`new_field_value += '\|\|\|\|'`
# in MARC spec means blank space 2014-10-17 15:30:55 +00:00			`new_field_value += '\|\|\|\|xx \|\|\|\|\|o\|\|\|\|\|\|\|\|\|\|\|eng\|\|'`
add more metadata from db 008, authors, publishers, isbn, pub date from db 2014-10-16 21:14:54 +00:00			`field008 = pymarc.Field(tag='008', data=new_field_value)`
			`record.add_ordered_field(field008)`

parametrizing formats and license choices, bugfixing 2013-07-17 17:03:35 +00:00			`# add IBSN for ebook where applicable; relegate print ISBN to $z`
documentation for CC/PD MARC records 2013-07-22 18:23:59 +00:00			`isbn = ''`
			`try:`
			`isbn = edition.identifiers.filter(type='isbn')[0].value`
			`except IndexError:`
			`pass`
lest we have no 020 field 2013-07-26 20:12:36 +00:00			`try:`
			`field020 = record.get_fields('020')[0]`
			`print_isbn = field020.get_subfields('a')[0]`
			`field020.delete_subfield('a')`
			`if isbn:`
			`field020.add_subfield('a', isbn)`
			`field020.add_subfield('z', print_isbn)`
			`except IndexError:`
			`print_isbn = None`
parametrizing formats and license choices, bugfixing 2013-07-17 17:03:35 +00:00
			`# change 050 and 082 indicators because LOC is no longer responsible for these`
			`# no easy indicator change function, so we'll just reconstruct the fields`
not all records have 050 and 082; code for 082 was putting call number in Dewey! 2013-08-12 21:22:24 +00:00			`try:`
			`field050 = record.get_fields('050')[0]`
			`field050_new = field050`
			`field050_new.indicators = [' ', '4']`
			`record.remove_field(field050)`
			`record.add_ordered_field(field050_new)`
			`except:`
			`pass # if no 050 field, don't need to change indicator`
parametrizing formats and license choices, bugfixing 2013-07-17 17:03:35 +00:00
not all records have 050 and 082; code for 082 was putting call number in Dewey! 2013-08-12 21:22:24 +00:00			`try:`
			`field082 = record.get_fields('082')[0]`
			`field082_new = field082`
			`field082_new.indicators = [' ', '4']`
			`record.remove_field(field082)`
			`record.add_ordered_field(field082_new)`
			`except:`
			`pass # if no 082 field, don't need to change indicator`
add more metadata from db 008, authors, publishers, isbn, pub date from db 2014-10-16 21:14:54 +00:00
			`# author name`
			`try:`
			`field100 = record.get_fields('100')[0]`
			`except IndexError:`
			`num_auths = edition.authors.count()`
			`if num_auths:`
			`field100 = pymarc.Field(`
			`tag='100',`
			`indicators = ['1', ' '],`
			`subfields = [`
			`'a', edition.authors.all()[0].last_name_first,`
			`]`
			`)`
			`record.add_ordered_field(field100)`
			`if num_auths > 1:`
			`for auth in edition.authors.all()[1:]:`
			`field = pymarc.Field(`
			`tag='700',`
			`indicators = ['1', ' '],`
			`subfields = [`
			`'a', auth.last_name_first,`
			`'e', 'joint author.',`
			`]`
			`)`
			`record.add_ordered_field(field)`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`# add subfield to 245 indicating format`
basic stub records 2014-10-14 14:08:08 +00:00			`try:`
			`field245 = record.get_fields('245')[0]`
			`except IndexError:`
			`field245 = pymarc.Field(`
			`tag='245',`
			`indicators = ['1', '0'],`
			`subfields = [`
			`'a', edition.title,`
			`]`
			`)`
			`record.add_ordered_field(field245)`
			`field245.add_subfield('a', '[electronic resource]')`
add more metadata from db 008, authors, publishers, isbn, pub date from db 2014-10-16 21:14:54 +00:00
			`# publisher, date`
			`try:`
			`field260 = record.get_fields('260')[0]`
			`except IndexError:`
			`field260 = pymarc.Field(`
			`tag='260',`
			`indicators = [' ', ' '],`
			`subfields = [`
			`'b', edition.publisher_name.name,`
			`'c', unicode(edition.publication_date),`
			`]`
			`)`
			`record.add_ordered_field(field260)`

wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`# modify 300 field (physical description)`
basic stub records 2014-10-14 14:08:08 +00:00			`try:`
			`field300 = record.get_fields('300')[0]`
			`subfield_a = field300.get_subfields('a')[0]`
			`if (`
			`subfield_a[-2:] == ' ;' or`
			`subfield_a[-2:] == ' :' or`
			`subfield_a[-2:] == ' +'`
			`):`
			`subfield_a = subfield_a[:-2]`
			`new300a = '1 online resource (' + subfield_a + ')'`
			`if field300.get_subfields('b'):`
			`new300a += ' :'`
			`field300.delete_subfield('a')`
			`field300.add_subfield('a', new300a)`
			`field300.delete_subfield('c')`
			`except:`
			`pass`
Adapt marc record machinery to B2U titles 2013-09-23 16:39:47 +00:00
			`if license:`
			`# add 536 field (funding information)`
			`if edition.unglued:`
			`funding_info = 'The book is available as a free download thanks to the generous support of interested readers and organizations, who made donations using the crowd-funding website Unglue.it.'`
funding info for non-unglued books 2013-07-29 23:44:58 +00:00			`else:`
move CC constants into a module 2014-05-08 14:21:50 +00:00			`if edition.ebooks.all()[0].rights in cc.LICENSE_LIST:`
Adapt marc record machinery to B2U titles 2013-09-23 16:39:47 +00:00			`funding_info = 'The book is available as a free download thanks to a Creative Commons license.'`
			`else:`
			`funding_info = 'The book is available as a free download because it is in the Public Domain.'`
			`field536 = pymarc.Field(`
			`tag='536',`
			`indicators = [' ', ' '],`
			`subfields = [`
			`'a', funding_info,`
			`]`
			`)`
			`record.add_ordered_field(field536)`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00
Adapt marc record machinery to B2U titles 2013-09-23 16:39:47 +00:00			`# add 540 field (terms governing use)`
			`field540 = pymarc.Field(`
			`tag='540',`
			`indicators = [' ', ' '],`
			`subfields = [`
move CC constants into a module 2014-05-08 14:21:50 +00:00			`'a', dict(cc.CHOICES)[license],`
			`'u', dict(cc.GRANTS)[license],`
Adapt marc record machinery to B2U titles 2013-09-23 16:39:47 +00:00			`]`
			`)`
			`record.add_ordered_field(field540)`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00
			`# add 588 field (source of description) - credit where credit is due`
only set LC as source if LC is source 2014-09-12 21:44:41 +00:00			`if print_lccn:`
			`field588 = pymarc.Field(`
			`tag='588',`
			`indicators = [' ', ' '],`
			`subfields = [`
			`'a', 'Description based on print version record from the Library of Congress.',`
			`]`
			`)`
			`record.add_ordered_field(field588)`
add 776 field to preserve pISBN, OCLCnum, LCCN 2013-07-25 15:09:39 +00:00
			`# add 776 field (related editions) - preserve pISBN, LCCN, OCLCnum`
			`title = record.get_fields('245')[0].get_subfields('a')[0]`
			`title = title.split('/')[0]`
			`try:`
fix oclcnum buggy code 2013-09-23 03:41:24 +00:00			`oclcnum = edition.identifiers.filter(type='oclc')[0].value`
add 776 field to preserve pISBN, OCLCnum, LCCN 2013-07-25 15:09:39 +00:00			`except IndexError:`
			`oclcnum = None`

improve logic readability 2013-07-29 23:46:58 +00:00			`subfields = ['i', 'Print version: ','t', title,]`

			`if print_isbn:`
			`subfields.extend(['z', print_isbn])`
add more metadata from db 008, authors, publishers, isbn, pub date from db 2014-10-16 21:14:54 +00:00			`elif isbn:`
			`subfields.extend(['z', isbn])`
fixed error when record is not from LC 2014-09-12 15:49:07 +00:00			`if print_lccn:`
			`subfields.extend(['w', '(DLC) ' + print_lccn, ])`
add 776 field to preserve pISBN, OCLCnum, LCCN 2013-07-25 15:09:39 +00:00			`if oclcnum:`
improve logic readability 2013-07-29 23:46:58 +00:00			`subfields.extend(['w', '(OCoLC) ' + oclcnum,])`

			`field776 = pymarc.Field(`
			`tag='776',`
			`indicators = ['0', '8'],`
			`subfields = subfields`
			`)`
add 776 field to preserve pISBN, OCLCnum, LCCN 2013-07-25 15:09:39 +00:00
			`record.add_ordered_field(field776)`
			`"""`
			`add 776 fields`
			`indicators: 0 8`
			`'$i Print version: '`
			`$t Title. <--note space`
			`$d is optional`
			`$z pISBN goes here`
			`harvest from 020 (was moved from $a to $z)`
			`$w (DLC) LCCN_goes_here`
			`harvest from 010 field before deletion`
			`$w (OCoLC) OCLCnum_goes_here`
			`harvest from identifiers db`
			`"""`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00
write records that link through unglueit 2013-07-23 13:41:55 +00:00			`# strip any 9XX fields (they're for local use)`
			`for i in range(900, 1000):`
			`fields = record.get_fields(str(i))`
			`for field in fields:`
			`record.remove_field(field)`

wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`# add 856 fields with links for each available file`
write records that link through unglueit 2013-07-23 13:41:55 +00:00			`# doing this out of order as it's the only thing that differs`
			`# between direct-link and via-unglue.it versions`
Adapt marc record machinery to B2U titles 2013-09-23 16:39:47 +00:00			`if not ebf:`
			`# need deepcopy() because omg referential transparency!`
			`record_direct = deepcopy(record) # 2 records for unglued stuff`
write records that link through unglueit 2013-07-23 13:41:55 +00:00
Adapt marc record machinery to B2U titles 2013-09-23 16:39:47 +00:00			`for format_tuple in settings.FORMATS:`
			`format = format_tuple[0]`
			`ebooks = edition.ebooks.filter(format=format)`
			`if ebooks:`
			`for book in ebooks:`
			`field856 = pymarc.Field(`
			`tag='856',`
			`indicators = ['4', '0'],`
			`subfields = [`
			`'3', format + ' version',`
			`'q', settings.CONTENT_TYPES[format],`
			`'u', book.url,`
			`]`
			`)`
			`record_direct.add_ordered_field(field856)`
write records that link through unglueit 2013-07-23 13:41:55 +00:00
			`unglued_url = settings.BASE_URL_SECURE + reverse('download', args=[edition.work.id])`
			`field856_via = pymarc.Field(`
			`tag='856',`
			`indicators = ['4', '0'],`
			`subfields = [`
			`'u', unglued_url,`
			`]`
			`)`
Adapt marc record machinery to B2U titles 2013-09-23 16:39:47 +00:00			`record.add_ordered_field(field856_via)`

			`if not ebf:`
			`# this via_unglueit record needs its own accession number`
			`field001 = record_direct.get_fields('001')[0]`
			`record_direct.remove_field(field001)`
			`(marc_record_direct, created) = models.MARCRecord.objects.get_or_create(edition=edition,link_target='DIRECT')`
			`field001 = pymarc.Field(tag='001', data=marc_record_direct.accession)`
wrong object 2013-09-24 20:22:41 +00:00			`record_direct.add_ordered_field(field001)`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00
Adapt marc record machinery to B2U titles 2013-09-23 16:39:47 +00:00			`# write the unglued MARCxml records`
			`xmlrecord = pymarc.record_to_xml(record_direct)`
			`xml_file = default_storage.open(marc_record_direct.xml_record, 'w')`
			`xml_file.write(xmlrecord)`
			`xml_file.close()`

			`# write the unglued .mrc records, then save to s3`
			`mrc_file = default_storage.open(marc_record_direct.mrc_record, 'w')`
			`writer = pymarc.MARCWriter(mrc_file)`
			`writer.write(record_direct)`
			`mrc_file.close()`
user preference affects which MARC records we make available 2013-07-23 20:07:47 +00:00
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`xmlrecord = pymarc.record_to_xml(record)`
cleaned up marc file storage issues before implementing b2u MIGRATION! 1. multi-record marc files no longer saved, sent to user directly 2. works with both s3 and file system- default storage by name used for both read and write. 3. stupid storage of computed filenames/urls eliminated 4. removed verbose logging 5. no need to have pymarc write to a StringIO 6. the MARCRecord.clean method didn't do anything 2013-09-23 04:34:51 +00:00			`xml_file = default_storage.open(marc_record.xml_record, 'w')`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`xml_file.write(xmlrecord)`
persist information about where to find AWS files 2013-07-18 19:49:54 +00:00			`xml_file.close()`
write records that link through unglueit 2013-07-23 13:41:55 +00:00
cleaned up marc file storage issues before implementing b2u MIGRATION! 1. multi-record marc files no longer saved, sent to user directly 2. works with both s3 and file system- default storage by name used for both read and write. 3. stupid storage of computed filenames/urls eliminated 4. removed verbose logging 5. no need to have pymarc write to a StringIO 6. the MARCRecord.clean method didn't do anything 2013-09-23 04:34:51 +00:00			`mrc_file = default_storage.open(marc_record.mrc_record, 'w')`
			`writer = pymarc.MARCWriter(mrc_file)`
wiring ungluify_record to front end and s3 2013-07-17 14:34:01 +00:00			`writer.write(record)`
persist information about where to find AWS files 2013-07-18 19:49:54 +00:00			`mrc_file.close()`
basic stub records 2014-10-14 14:08:08 +00:00
			`return marc_record.pk`