Add new relationships to db maintenance and model

Improvements to the source data for this project have been implemented to designated the following relationships for copyright entries:

- `duplicateOf` provides a UUID that the current entry duplicates. If a record contains this value it can be skipped as it provides no new information for the database. Given equivalent records the earlier record will be preferred
- `partOf` indicates that the current entry is a registration of part of a larger expression. These records should return the parent expression with all children included (to accurately reflect renewal data)
- `groupRegistration` indicates that a group of similar expressions were registered together. This requires no work from the database.

To model these relationships a self-reference has been added to the `CCE` table to allow for the creation of parent/child relationships.
add-new-regnum-params
Mike Benowitz 2019-07-09 11:25:57 -04:00
parent 5cf334a029
commit 10247a07ab
2 changed files with 32 additions and 8 deletions

View File

@ -117,6 +117,16 @@ class CCEFile():
def parseEntry(self, entry, shared=[]): def parseEntry(self, entry, shared=[]):
uuid = entry.get('id') uuid = entry.get('id')
duplicate = entry.get('duplicateOf')
partOf = self.getEntryID(entry.get('partOf')) if entry.get('partOF') else None
if duplicate is not None:
print('Found Duplicate Entry')
raise DataError(
'duplicate_entry',
uuid=uuid,
entry=entry
)
if 'regnum' not in entry.attrib: if 'regnum' not in entry.attrib:
print('Entry Missing REGNUM') print('Entry Missing REGNUM')
@ -144,14 +154,19 @@ class CCEFile():
regs = self.createRegistrations(regnums, regDates) regs = self.createRegistrations(regnums, regDates)
existingRec = self.matchUUID(uuid) existingRec = self.matchUUID(uuid)
if existingRec: if existingRec:
self.updateEntry(existingRec, entryDates, entry, shared, regs) self.updateEntry(existingRec, entryDates, entry, shared, regs, partOf)
else: else:
self.createEntry(uuid, entryDates, entry, shared, regs) self.createEntry(uuid, entryDates, entry, shared, regs, partOf)
def matchUUID(self, uuid): def matchUUID(self, uuid):
return self.session.query(CCE).filter(CCE.uuid == uuid).one_or_none() return self.session.query(CCE).filter(CCE.uuid == uuid).one_or_none()
def createEntry(self, uuid, dates, entry, shared, registrations): def getEntryID(self, uuid):
ent = self.session.query(CCE.id).filter(CCE.uuid == uuid).one_or_none()
if ent: return ent.id
else: return None
def createEntry(self, uuid, dates, entry, shared, registrations, partOf):
titles = self.createTitleList(entry, shared) titles = self.createTitleList(entry, shared)
authors = self.createAuthorList(entry, shared) authors = self.createAuthorList(entry, shared)
copies = CCEFile.fetchText(entry, 'copies') copies = CCEFile.fetchText(entry, 'copies')
@ -186,12 +201,13 @@ class CCEFile():
authors=authors, authors=authors,
publishers=publishers, publishers=publishers,
lccn=lccn, lccn=lccn,
registrations=registrations registrations=registrations,
partOf=partOf
) )
self.session.add(cceRec) self.session.add(cceRec)
print('INSERT', cceRec) print('INSERT', cceRec)
def updateEntry(self, rec, dates, entry, shared, registrations): def updateEntry(self, rec, dates, entry, shared, registrations, partOf):
rec.title = self.createTitleList(entry, shared) rec.title = self.createTitleList(entry, shared)
rec.copies = CCEFile.fetchText(entry, 'copies') rec.copies = CCEFile.fetchText(entry, 'copies')
rec.description = CCEFile.fetchText(entry, 'desc') rec.description = CCEFile.fetchText(entry, 'desc')
@ -218,7 +234,8 @@ class CCEFile():
authors=authors, authors=authors,
publishers=publishers, publishers=publishers,
lccn=lccn, lccn=lccn,
registrations=registrations registrations=registrations,
partOf=partOf
) )
print('UPDATE', rec) print('UPDATE', rec)

View File

@ -42,22 +42,25 @@ class CCE(Core, Base):
aff_date_text = Column(Unicode) aff_date_text = Column(Unicode)
volume_id = Column(Integer, ForeignKey('volume.id')) volume_id = Column(Integer, ForeignKey('volume.id'))
parent_cce_id = Column(Integer, ForeignKey('cce.id'))
registrations = relationship('Registration', backref='cce') registrations = relationship('Registration', backref='cce')
lccns = relationship('LCCN', backref='cce', cascade='all, delete-orphan') lccns = relationship('LCCN', backref='cce', cascade='all, delete-orphan')
authors = relationship('Author', backref='cce', cascade='all, delete-orphan') authors = relationship('Author', backref='cce', cascade='all, delete-orphan')
publishers = relationship('Publisher', backref='cce', cascade='all, delete-orphan') publishers = relationship('Publisher', backref='cce', cascade='all, delete-orphan')
children_cces = relationship('CCE')
def __repr__(self): def __repr__(self):
return '<CCE(regnums={}, uuid={}, title={})>'.format(self.registrations, self.uuid, self.title) return '<CCE(regnums={}, uuid={}, title={})>'.format(self.registrations, self.uuid, self.title)
def addRelationships(self, volume, xml, lccn=[], authors=[], publishers=[], registrations=[]): def addRelationships(self, volume, xml, lccn=[], authors=[], publishers=[], registrations=[], partOf=None):
self.volume = volume self.volume = volume
self.addLCCN(lccn) self.addLCCN(lccn)
self.addAuthor(authors) self.addAuthor(authors)
self.addPublisher(publishers) self.addPublisher(publishers)
self.addRegistration(registrations) self.addRegistration(registrations)
self.addXML(xml) self.addXML(xml)
self.setParentCCE(partOf)
def addLCCN(self, lccns): def addLCCN(self, lccns):
self.lccns = [ LCCN(lccn=lccn) for lccn in lccns ] self.lccns = [ LCCN(lccn=lccn) for lccn in lccns ]
@ -92,12 +95,13 @@ class CCE(Core, Base):
for reg in registrations for reg in registrations
] ]
def updateRelationships(self, xml, lccn=[], authors=[], publishers=[], registrations=[]): def updateRelationships(self, xml, lccn=[], authors=[], publishers=[], registrations=[], partOf=None):
self.addXML(xml) self.addXML(xml)
self.updateLCCN(lccn) self.updateLCCN(lccn)
self.updateAuthors(authors) self.updateAuthors(authors)
self.updatePublishers(publishers) self.updatePublishers(publishers)
self.updateRegistrations(registrations) self.updateRegistrations(registrations)
self.setParentCCE(partOf)
def updateLCCN(self, lccns): def updateLCCN(self, lccns):
currentLCCNs = [ l.lccn for l in self.lccns ] currentLCCNs = [ l.lccn for l in self.lccns ]
@ -157,6 +161,9 @@ class CCE(Core, Base):
newReg = CCE.getReg(reg.regnum, registrations) newReg = CCE.getReg(reg.regnum, registrations)
reg.update(newReg) reg.update(newReg)
def setParentCCE(self, parentID):
self.parent_cce_id = parentID
@staticmethod @staticmethod
def getReg(regnum, newRegs): def getReg(regnum, newRegs):
for new in newRegs: for new in newRegs: