64 lines
3.0 KiB
Python
64 lines
3.0 KiB
Python
from __future__ import print_function
|
|
|
|
from django.core.management.base import BaseCommand
|
|
from django.db import IntegrityError
|
|
|
|
from regluit.core import models
|
|
from regluit.utils.text import sanitize_line, remove_badxml
|
|
|
|
|
|
class Command(BaseCommand):
|
|
help = "clean work and edition titles, work descriptions, and author and publisher names"
|
|
|
|
def handle(self, **options):
|
|
work_titles_fixed = edition_titles_fixed = work_descriptions_fixed = author_names_fixed = 0
|
|
publisher_names_fixed = 0
|
|
for work in models.Work.objects.all():
|
|
if sanitize_line(work.title) != work.title:
|
|
work.title = sanitize_line(work.title)
|
|
work.save()
|
|
work_titles_fixed +=1
|
|
if work.description and remove_badxml(work.description) != work.description:
|
|
work.description = remove_badxml(work.description)
|
|
work.save()
|
|
work_descriptions_fixed +=1
|
|
print ("work_titles_fixed = {}".format(work_titles_fixed))
|
|
print ("work_descriptions_fixed = {}".format(work_descriptions_fixed))
|
|
for edition in models.Edition.objects.all():
|
|
if sanitize_line(edition.title) != edition.title:
|
|
edition.title = sanitize_line(edition.title)
|
|
edition.save()
|
|
edition_titles_fixed +=1
|
|
print ("edition_titles_fixed = {}".format(edition_titles_fixed))
|
|
for author in models.Author.objects.all():
|
|
if sanitize_line(author.name) != author.name:
|
|
author.name = sanitize_line(author.name)
|
|
try:
|
|
author.save()
|
|
except IntegrityError as e:
|
|
# duplicate entry
|
|
correct = models.Author.objects.get(name=sanitize_line(author.name))
|
|
for relator in author.relator_set.all():
|
|
relator.author = correct
|
|
relator.save()
|
|
author.delete()
|
|
author_names_fixed +=1
|
|
print ("author_names_fixed = {}".format(author_names_fixed))
|
|
for publishername in models.PublisherName.objects.all():
|
|
if sanitize_line(publishername.name) != publishername.name:
|
|
publishername.name = sanitize_line(publishername.name)
|
|
try:
|
|
publishername.save()
|
|
except IntegrityError as e:
|
|
# duplicate entry
|
|
correct = models.PublisherName.objects.get(name=sanitize_line(publishername.name))
|
|
for edition in publishername.editions.all():
|
|
edition.publisher_name = correct
|
|
edition.save()
|
|
for publisher in publishername.key_publisher.all():
|
|
publisher.name = correct
|
|
publisher.save()
|
|
publishername.delete()
|
|
publisher_names_fixed +=1
|
|
print ("publisher_names_fixed = {}".format(publisher_names_fixed))
|