383 lines
11 KiB
Python
383 lines
11 KiB
Python
import re
|
|
import pymarc
|
|
from pymarc import Subfield, Record, Field, MARCWriter
|
|
from datetime import datetime
|
|
from libgutenberg import GutenbergDatabase
|
|
from libgutenberg.DublinCoreMapping import DublinCoreObject
|
|
from os.path import join
|
|
|
|
|
|
def stub(dc):
|
|
record = pymarc.Record()
|
|
now = datetime.now()
|
|
|
|
# c - Corrected or revised, a - Language material, m - Monograph/Item, 3 - Abbreviated level, u - Unknown
|
|
|
|
record.leader[5] = 'c'
|
|
record.leader[6] = 'a'
|
|
record.leader[7] = 'm'
|
|
record.leader[17] = '3'
|
|
record.leader[18] = 'u'
|
|
|
|
field001 = pymarc.Field(tag='001', data=str(dc.project_gutenberg_id))
|
|
record.add_ordered_field(field001)
|
|
|
|
field003 = pymarc.Field(tag='003', data='UtSlPG')
|
|
record.add_ordered_field(field003)
|
|
|
|
# m - Computer file/Electronic resource - Coded data elements relating to either a computer file or an electronic resource in form.
|
|
|
|
field006 = pymarc.Field(tag='006', data='m')
|
|
record.add_ordered_field(field006)
|
|
|
|
# c - Electronic resource, r - Remote, n - Not applicable
|
|
|
|
field007 = pymarc.Field(tag='007', data='cr n')
|
|
record.add_ordered_field(field007)
|
|
|
|
# 008 in looking at pub date some have a 906 others have a 4 digit year in 260. Have to right an expression to capture that. For position 23 could be o for online or s for electronic. May have to not code for language. Because database is not coded for MARC lang codes only for ISO639-1--use MARCtag041 instead. Position 39 cataloging source d - Other.
|
|
|
|
new_field_value = now.strftime('%y%m%d') + 's||||||||xx |||||o|||||||||||||| d'
|
|
match_found = False
|
|
|
|
for att in dc.book.attributes:
|
|
if (att.fk_attriblist == 906 and att.fk_attriblist is not None) or (att.fk_attriblist == 260 and re.search(r'\b\d{4}\b', str(att.fk_attriblist))):
|
|
new_field_value = now.strftime('%y%m%d') + 's' + str(att.text) + '||||||||xx |||||o|||||||||||||| d'
|
|
match_found = True
|
|
break
|
|
|
|
if not match_found:
|
|
new_field_value = now.strftime('%y%m%d') + '|||||||||xx |||||o|||||||||||||| d'
|
|
|
|
field008 = pymarc.Field(tag='008', data=new_field_value)
|
|
record.add_ordered_field(field008)
|
|
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 10:
|
|
|
|
field010 = pymarc.Field(
|
|
tag='010',
|
|
indicators=[' ', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value=str(att.text)),
|
|
]
|
|
)
|
|
record.add_ordered_field(field010)
|
|
|
|
|
|
|
|
field040 = pymarc.Field(
|
|
tag='040',
|
|
indicators=[' ', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value='UtSlPG'),
|
|
]
|
|
)
|
|
record.add_ordered_field(field040)
|
|
|
|
if len(dc.languages):
|
|
|
|
field041 = pymarc.Field(
|
|
tag='041',
|
|
indicators=[' ', '7'],
|
|
subfields=[
|
|
Subfield(code='a', value=str(lang.id)) for lang in dc.languages
|
|
] + [
|
|
Subfield(code='2', value='iso639-1')
|
|
]
|
|
)
|
|
record.add_ordered_field(field041)
|
|
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 240:
|
|
|
|
field240 = pymarc.Field(
|
|
tag='240',
|
|
indicators=['1', str(att.nonfiling)],
|
|
subfields=[
|
|
Subfield(code='a', value=str(att.text)),
|
|
]
|
|
)
|
|
record.add_ordered_field(field240)
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 246:
|
|
|
|
field246 = pymarc.Field(
|
|
tag='246',
|
|
indicators=['1', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value=str(att.text)),
|
|
]
|
|
)
|
|
record.add_ordered_field(field246)
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 250:
|
|
|
|
field250 = pymarc.Field(
|
|
tag='250',
|
|
indicators=[' ', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value=str(att.text)),
|
|
]
|
|
)
|
|
record.add_ordered_field(field250)
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 300:
|
|
|
|
field300 = pymarc.Field(
|
|
tag='300',
|
|
indicators=[' ', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value=str(att.text)),
|
|
]
|
|
)
|
|
record.add_ordered_field(field300)
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 440:
|
|
|
|
field490 = pymarc.Field(
|
|
tag='490',
|
|
indicators=['1', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value=str(att.text)),
|
|
]
|
|
)
|
|
record.add_ordered_field(field490)
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 440:
|
|
|
|
field830 = pymarc.Field(
|
|
tag='830',
|
|
indicators=[' ', '0'],
|
|
subfields=[
|
|
Subfield(code='a', value=str(att.text)),
|
|
]
|
|
)
|
|
record.add_ordered_field(field830)
|
|
|
|
# need to replace carriage returns. Tag 500 has multiple lines.
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 500:
|
|
|
|
field500 = pymarc.Field(
|
|
tag='500',
|
|
indicators=[' ', " "],
|
|
subfields=[
|
|
Subfield(code='a', value=str(att.text)),
|
|
]
|
|
)
|
|
record.add_ordered_field(field500)
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 505:
|
|
|
|
field505 = pymarc.Field(
|
|
tag='505',
|
|
indicators=['0', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value=str(att.text)),
|
|
]
|
|
)
|
|
record.add_ordered_field(field505)
|
|
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 508:
|
|
|
|
field508 = pymarc.Field(
|
|
tag='508',
|
|
indicators=[' ', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value=str(att.text)),
|
|
]
|
|
)
|
|
record.add_ordered_field(field508)
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 904:
|
|
|
|
field856 = pymarc.Field(
|
|
tag='856',
|
|
indicators=['4', '0'],
|
|
subfields=[
|
|
Subfield(code='a', value=f"https://www.gutenberg.org/ebooks/{str(dc.project_gutenberg_id)}"),
|
|
]
|
|
)
|
|
record.add_ordered_field(field856)
|
|
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 904:
|
|
|
|
field856 = pymarc.Field(
|
|
tag='856',
|
|
indicators=['4', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value=str(att.text)),
|
|
]
|
|
)
|
|
record.add_ordered_field(field856)
|
|
|
|
|
|
# Author name
|
|
num_auths = len(dc.authors)
|
|
if num_auths:
|
|
field100 = pymarc.Field(
|
|
tag='100',
|
|
indicators=['1', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value=dc.format_author_date(dc.authors[0])) # Can do better
|
|
]
|
|
)
|
|
record.add_ordered_field(field100)
|
|
if num_auths > 1:
|
|
for auth in dc.authors[1:]:
|
|
field = pymarc.Field(
|
|
tag='700',
|
|
indicators=['1', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value=dc.format_author_date(auth)),
|
|
Subfield(code='e', value='joint author.'),
|
|
]
|
|
)
|
|
record.add_ordered_field(field)
|
|
|
|
|
|
# Add Subfield to 245 indicating format
|
|
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 245:
|
|
|
|
if '\n'in dc.title:
|
|
|
|
field245 = pymarc.Field(
|
|
tag='245',
|
|
indicators=['1', str(att.nonfiling)],
|
|
subfields=[
|
|
Subfield(code='a', value=dc.title_no_subtitle),
|
|
Subfield(code='h', value='[electronic resource] :'),
|
|
Subfield(code='b', value=re.sub(r'^[^\n]*\n', '', dc.title).replace('\n', ' ')),
|
|
]
|
|
)
|
|
else:
|
|
|
|
for att in dc.book.attributes:
|
|
if att.fk_attriblist == 245:
|
|
|
|
field245 = pymarc.Field(
|
|
tag='245',
|
|
indicators=['1', str(att.nonfiling)],
|
|
subfields=[
|
|
Subfield(code='a', value=dc.title_no_subtitle),
|
|
Subfield(code='h', value='[electronic resource]'),
|
|
]
|
|
)
|
|
record.add_ordered_field(field245)
|
|
|
|
# Publisher, date
|
|
if att.fk_attriblist == 260:
|
|
field260 = pymarc.Field(
|
|
tag='260',
|
|
indicators=[' ', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value=f"{dc.pubinfo.place} :"),
|
|
Subfield(code='b', value=f"{dc.pubinfo.publisher},"),
|
|
Subfield(code='c', value=str(dc.pubinfo.years).replace('[(\'copyright\', \'', 'c').replace('\'), (\'pubdate\', \'', ', ').replace('\'), (\'copyright\', \'', ', c').replace('\')]', '.')),
|
|
]
|
|
)
|
|
|
|
record.add_ordered_field(field260)
|
|
|
|
add_license(record, dc)
|
|
|
|
return record
|
|
|
|
def add_license(record, dc):
|
|
if dc.rights:
|
|
# Add 540 field (terms governing use)
|
|
field540 = pymarc.Field(
|
|
tag='540',
|
|
indicators=[' ', ' '],
|
|
subfields=[
|
|
Subfield(code='a', value=dc.rights),
|
|
]
|
|
)
|
|
record.add_ordered_field(field540)
|
|
|
|
|
|
def add_subject(record, dc):
|
|
if dc.subjects:
|
|
field653 = pymarc.Field(
|
|
tag='653',
|
|
indicators=[' ', ' '],
|
|
subfields=[
|
|
Subfield(code='a', data=dc.subjects),
|
|
]
|
|
)
|
|
record.add_ordered_field(field653)
|
|
|
|
|
|
# Generate 100 records
|
|
all_records = [] # Create a list to store all records
|
|
for i in range(100):
|
|
booknums = list(range(1, 101)) # Replace with your actual book numbers
|
|
dc = DublinCoreObject()
|
|
dc.load_from_database(booknums[i])
|
|
record = stub(dc)
|
|
all_records.append(record) # Append each record to the list
|
|
|
|
# Write all records to one file
|
|
with open("out/combined_output.txt100f", "w") as text_file:
|
|
for record in all_records:
|
|
text_file.write(str(record) + "\n") # Separate records with a newline
|
|
|
|
print("Combined records written to combined_output.txt")
|
|
|
|
|
|
# Generate 100 records
|
|
all_records = [] # Create a list to store all records
|
|
for i in range(100):
|
|
booknums = list(range(68995, 69195)) # Replace with your actual book numbers
|
|
dc = DublinCoreObject()
|
|
dc.load_from_database(booknums[i])
|
|
record = stub(dc)
|
|
all_records.append(record) # Append each record to the list
|
|
|
|
# Write all records to one file
|
|
with open("out/combined_output.txt69000f", "w") as text_file:
|
|
for record in all_records:
|
|
text_file.write(str(record) + "\n") # Separate records with a newline
|
|
|
|
print("Combined records written to combined_output.txt")
|
|
|
|
|
|
all_records = [] # Create a list to store all records
|
|
|
|
for i in range(100):
|
|
booknums = list(range(68995, 69195)) # Replace with your actual book numbers
|
|
|
|
dc = DublinCoreObject()
|
|
dc.load_from_database(booknums[i])
|
|
|
|
record = stub(dc)
|
|
all_records.append(record) # Append each record to the list
|
|
|
|
# Write all records to one MARC file
|
|
with open("out/combined_output.mrc", "wb") as marc_file:
|
|
writer = MARCWriter(marc_file)
|
|
for record in all_records:
|
|
writer.write(record)
|
|
writer.close()
|
|
|
|
print("Combined records written to combined_output.mrc")
|
|
|