Add new relationships to db maintenance and model
Improvements to the source data for this project have been implemented to designated the following relationships for copyright entries: - `duplicateOf` provides a UUID that the current entry duplicates. If a record contains this value it can be skipped as it provides no new information for the database. Given equivalent records the earlier record will be preferred - `partOf` indicates that the current entry is a registration of part of a larger expression. These records should return the parent expression with all children included (to accurately reflect renewal data) - `groupRegistration` indicates that a group of similar expressions were registered together. This requires no work from the database. To model these relationships a self-reference has been added to the `CCE` table to allow for the creation of parent/child relationships.add-new-regnum-params
parent
5cf334a029
commit
10247a07ab
29
builder.py
29
builder.py
|
@ -117,6 +117,16 @@ class CCEFile():
|
||||||
|
|
||||||
def parseEntry(self, entry, shared=[]):
|
def parseEntry(self, entry, shared=[]):
|
||||||
uuid = entry.get('id')
|
uuid = entry.get('id')
|
||||||
|
duplicate = entry.get('duplicateOf')
|
||||||
|
partOf = self.getEntryID(entry.get('partOf')) if entry.get('partOF') else None
|
||||||
|
|
||||||
|
if duplicate is not None:
|
||||||
|
print('Found Duplicate Entry')
|
||||||
|
raise DataError(
|
||||||
|
'duplicate_entry',
|
||||||
|
uuid=uuid,
|
||||||
|
entry=entry
|
||||||
|
)
|
||||||
|
|
||||||
if 'regnum' not in entry.attrib:
|
if 'regnum' not in entry.attrib:
|
||||||
print('Entry Missing REGNUM')
|
print('Entry Missing REGNUM')
|
||||||
|
@ -144,14 +154,19 @@ class CCEFile():
|
||||||
regs = self.createRegistrations(regnums, regDates)
|
regs = self.createRegistrations(regnums, regDates)
|
||||||
existingRec = self.matchUUID(uuid)
|
existingRec = self.matchUUID(uuid)
|
||||||
if existingRec:
|
if existingRec:
|
||||||
self.updateEntry(existingRec, entryDates, entry, shared, regs)
|
self.updateEntry(existingRec, entryDates, entry, shared, regs, partOf)
|
||||||
else:
|
else:
|
||||||
self.createEntry(uuid, entryDates, entry, shared, regs)
|
self.createEntry(uuid, entryDates, entry, shared, regs, partOf)
|
||||||
|
|
||||||
def matchUUID(self, uuid):
|
def matchUUID(self, uuid):
|
||||||
return self.session.query(CCE).filter(CCE.uuid == uuid).one_or_none()
|
return self.session.query(CCE).filter(CCE.uuid == uuid).one_or_none()
|
||||||
|
|
||||||
def createEntry(self, uuid, dates, entry, shared, registrations):
|
def getEntryID(self, uuid):
|
||||||
|
ent = self.session.query(CCE.id).filter(CCE.uuid == uuid).one_or_none()
|
||||||
|
if ent: return ent.id
|
||||||
|
else: return None
|
||||||
|
|
||||||
|
def createEntry(self, uuid, dates, entry, shared, registrations, partOf):
|
||||||
titles = self.createTitleList(entry, shared)
|
titles = self.createTitleList(entry, shared)
|
||||||
authors = self.createAuthorList(entry, shared)
|
authors = self.createAuthorList(entry, shared)
|
||||||
copies = CCEFile.fetchText(entry, 'copies')
|
copies = CCEFile.fetchText(entry, 'copies')
|
||||||
|
@ -186,12 +201,13 @@ class CCEFile():
|
||||||
authors=authors,
|
authors=authors,
|
||||||
publishers=publishers,
|
publishers=publishers,
|
||||||
lccn=lccn,
|
lccn=lccn,
|
||||||
registrations=registrations
|
registrations=registrations,
|
||||||
|
partOf=partOf
|
||||||
)
|
)
|
||||||
self.session.add(cceRec)
|
self.session.add(cceRec)
|
||||||
print('INSERT', cceRec)
|
print('INSERT', cceRec)
|
||||||
|
|
||||||
def updateEntry(self, rec, dates, entry, shared, registrations):
|
def updateEntry(self, rec, dates, entry, shared, registrations, partOf):
|
||||||
rec.title = self.createTitleList(entry, shared)
|
rec.title = self.createTitleList(entry, shared)
|
||||||
rec.copies = CCEFile.fetchText(entry, 'copies')
|
rec.copies = CCEFile.fetchText(entry, 'copies')
|
||||||
rec.description = CCEFile.fetchText(entry, 'desc')
|
rec.description = CCEFile.fetchText(entry, 'desc')
|
||||||
|
@ -218,7 +234,8 @@ class CCEFile():
|
||||||
authors=authors,
|
authors=authors,
|
||||||
publishers=publishers,
|
publishers=publishers,
|
||||||
lccn=lccn,
|
lccn=lccn,
|
||||||
registrations=registrations
|
registrations=registrations,
|
||||||
|
partOf=partOf
|
||||||
)
|
)
|
||||||
print('UPDATE', rec)
|
print('UPDATE', rec)
|
||||||
|
|
||||||
|
|
11
model/cce.py
11
model/cce.py
|
@ -42,22 +42,25 @@ class CCE(Core, Base):
|
||||||
aff_date_text = Column(Unicode)
|
aff_date_text = Column(Unicode)
|
||||||
|
|
||||||
volume_id = Column(Integer, ForeignKey('volume.id'))
|
volume_id = Column(Integer, ForeignKey('volume.id'))
|
||||||
|
parent_cce_id = Column(Integer, ForeignKey('cce.id'))
|
||||||
|
|
||||||
registrations = relationship('Registration', backref='cce')
|
registrations = relationship('Registration', backref='cce')
|
||||||
lccns = relationship('LCCN', backref='cce', cascade='all, delete-orphan')
|
lccns = relationship('LCCN', backref='cce', cascade='all, delete-orphan')
|
||||||
authors = relationship('Author', backref='cce', cascade='all, delete-orphan')
|
authors = relationship('Author', backref='cce', cascade='all, delete-orphan')
|
||||||
publishers = relationship('Publisher', backref='cce', cascade='all, delete-orphan')
|
publishers = relationship('Publisher', backref='cce', cascade='all, delete-orphan')
|
||||||
|
children_cces = relationship('CCE')
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '<CCE(regnums={}, uuid={}, title={})>'.format(self.registrations, self.uuid, self.title)
|
return '<CCE(regnums={}, uuid={}, title={})>'.format(self.registrations, self.uuid, self.title)
|
||||||
|
|
||||||
def addRelationships(self, volume, xml, lccn=[], authors=[], publishers=[], registrations=[]):
|
def addRelationships(self, volume, xml, lccn=[], authors=[], publishers=[], registrations=[], partOf=None):
|
||||||
self.volume = volume
|
self.volume = volume
|
||||||
self.addLCCN(lccn)
|
self.addLCCN(lccn)
|
||||||
self.addAuthor(authors)
|
self.addAuthor(authors)
|
||||||
self.addPublisher(publishers)
|
self.addPublisher(publishers)
|
||||||
self.addRegistration(registrations)
|
self.addRegistration(registrations)
|
||||||
self.addXML(xml)
|
self.addXML(xml)
|
||||||
|
self.setParentCCE(partOf)
|
||||||
|
|
||||||
def addLCCN(self, lccns):
|
def addLCCN(self, lccns):
|
||||||
self.lccns = [ LCCN(lccn=lccn) for lccn in lccns ]
|
self.lccns = [ LCCN(lccn=lccn) for lccn in lccns ]
|
||||||
|
@ -92,12 +95,13 @@ class CCE(Core, Base):
|
||||||
for reg in registrations
|
for reg in registrations
|
||||||
]
|
]
|
||||||
|
|
||||||
def updateRelationships(self, xml, lccn=[], authors=[], publishers=[], registrations=[]):
|
def updateRelationships(self, xml, lccn=[], authors=[], publishers=[], registrations=[], partOf=None):
|
||||||
self.addXML(xml)
|
self.addXML(xml)
|
||||||
self.updateLCCN(lccn)
|
self.updateLCCN(lccn)
|
||||||
self.updateAuthors(authors)
|
self.updateAuthors(authors)
|
||||||
self.updatePublishers(publishers)
|
self.updatePublishers(publishers)
|
||||||
self.updateRegistrations(registrations)
|
self.updateRegistrations(registrations)
|
||||||
|
self.setParentCCE(partOf)
|
||||||
|
|
||||||
def updateLCCN(self, lccns):
|
def updateLCCN(self, lccns):
|
||||||
currentLCCNs = [ l.lccn for l in self.lccns ]
|
currentLCCNs = [ l.lccn for l in self.lccns ]
|
||||||
|
@ -157,6 +161,9 @@ class CCE(Core, Base):
|
||||||
newReg = CCE.getReg(reg.regnum, registrations)
|
newReg = CCE.getReg(reg.regnum, registrations)
|
||||||
reg.update(newReg)
|
reg.update(newReg)
|
||||||
|
|
||||||
|
def setParentCCE(self, parentID):
|
||||||
|
self.parent_cce_id = parentID
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getReg(regnum, newRegs):
|
def getReg(regnum, newRegs):
|
||||||
for new in newRegs:
|
for new in newRegs:
|
||||||
|
|
Loading…
Reference in New Issue