From 10247a07ab657392a6965d44b61367b35490c05a Mon Sep 17 00:00:00 2001 From: Mike Benowitz Date: Tue, 9 Jul 2019 11:25:57 -0400 Subject: [PATCH] Add new relationships to db maintenance and model Improvements to the source data for this project have been implemented to designated the following relationships for copyright entries: - `duplicateOf` provides a UUID that the current entry duplicates. If a record contains this value it can be skipped as it provides no new information for the database. Given equivalent records the earlier record will be preferred - `partOf` indicates that the current entry is a registration of part of a larger expression. These records should return the parent expression with all children included (to accurately reflect renewal data) - `groupRegistration` indicates that a group of similar expressions were registered together. This requires no work from the database. To model these relationships a self-reference has been added to the `CCE` table to allow for the creation of parent/child relationships. --- builder.py | 29 +++++++++++++++++++++++------ model/cce.py | 11 +++++++++-- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/builder.py b/builder.py index 6b4cc23..30bdf48 100644 --- a/builder.py +++ b/builder.py @@ -117,6 +117,16 @@ class CCEFile(): def parseEntry(self, entry, shared=[]): uuid = entry.get('id') + duplicate = entry.get('duplicateOf') + partOf = self.getEntryID(entry.get('partOf')) if entry.get('partOF') else None + + if duplicate is not None: + print('Found Duplicate Entry') + raise DataError( + 'duplicate_entry', + uuid=uuid, + entry=entry + ) if 'regnum' not in entry.attrib: print('Entry Missing REGNUM') @@ -144,14 +154,19 @@ class CCEFile(): regs = self.createRegistrations(regnums, regDates) existingRec = self.matchUUID(uuid) if existingRec: - self.updateEntry(existingRec, entryDates, entry, shared, regs) + self.updateEntry(existingRec, entryDates, entry, shared, regs, partOf) else: - self.createEntry(uuid, entryDates, entry, shared, regs) + self.createEntry(uuid, entryDates, entry, shared, regs, partOf) def matchUUID(self, uuid): return self.session.query(CCE).filter(CCE.uuid == uuid).one_or_none() + + def getEntryID(self, uuid): + ent = self.session.query(CCE.id).filter(CCE.uuid == uuid).one_or_none() + if ent: return ent.id + else: return None - def createEntry(self, uuid, dates, entry, shared, registrations): + def createEntry(self, uuid, dates, entry, shared, registrations, partOf): titles = self.createTitleList(entry, shared) authors = self.createAuthorList(entry, shared) copies = CCEFile.fetchText(entry, 'copies') @@ -186,12 +201,13 @@ class CCEFile(): authors=authors, publishers=publishers, lccn=lccn, - registrations=registrations + registrations=registrations, + partOf=partOf ) self.session.add(cceRec) print('INSERT', cceRec) - def updateEntry(self, rec, dates, entry, shared, registrations): + def updateEntry(self, rec, dates, entry, shared, registrations, partOf): rec.title = self.createTitleList(entry, shared) rec.copies = CCEFile.fetchText(entry, 'copies') rec.description = CCEFile.fetchText(entry, 'desc') @@ -218,7 +234,8 @@ class CCEFile(): authors=authors, publishers=publishers, lccn=lccn, - registrations=registrations + registrations=registrations, + partOf=partOf ) print('UPDATE', rec) diff --git a/model/cce.py b/model/cce.py index ed95b1d..ebf9689 100644 --- a/model/cce.py +++ b/model/cce.py @@ -42,22 +42,25 @@ class CCE(Core, Base): aff_date_text = Column(Unicode) volume_id = Column(Integer, ForeignKey('volume.id')) + parent_cce_id = Column(Integer, ForeignKey('cce.id')) registrations = relationship('Registration', backref='cce') lccns = relationship('LCCN', backref='cce', cascade='all, delete-orphan') authors = relationship('Author', backref='cce', cascade='all, delete-orphan') publishers = relationship('Publisher', backref='cce', cascade='all, delete-orphan') + children_cces = relationship('CCE') def __repr__(self): return ''.format(self.registrations, self.uuid, self.title) - def addRelationships(self, volume, xml, lccn=[], authors=[], publishers=[], registrations=[]): + def addRelationships(self, volume, xml, lccn=[], authors=[], publishers=[], registrations=[], partOf=None): self.volume = volume self.addLCCN(lccn) self.addAuthor(authors) self.addPublisher(publishers) self.addRegistration(registrations) self.addXML(xml) + self.setParentCCE(partOf) def addLCCN(self, lccns): self.lccns = [ LCCN(lccn=lccn) for lccn in lccns ] @@ -92,12 +95,13 @@ class CCE(Core, Base): for reg in registrations ] - def updateRelationships(self, xml, lccn=[], authors=[], publishers=[], registrations=[]): + def updateRelationships(self, xml, lccn=[], authors=[], publishers=[], registrations=[], partOf=None): self.addXML(xml) self.updateLCCN(lccn) self.updateAuthors(authors) self.updatePublishers(publishers) self.updateRegistrations(registrations) + self.setParentCCE(partOf) def updateLCCN(self, lccns): currentLCCNs = [ l.lccn for l in self.lccns ] @@ -156,6 +160,9 @@ class CCE(Core, Base): def updateReg(self, reg, registrations): newReg = CCE.getReg(reg.regnum, registrations) reg.update(newReg) + + def setParentCCE(self, parentID): + self.parent_cce_id = parentID @staticmethod def getReg(regnum, newRegs):