add a command to remove dead doabids

pull/94/head
eric 2021-05-03 22:17:46 -04:00
parent beb76ea4bb
commit e0c0d98c5d
2 changed files with 36 additions and 0 deletions

View File

@ -450,6 +450,14 @@ def getdoab(url):
return id_match.group(1) return id_match.group(1)
return False return False
def get_doab_record(doab_id):
record_id = 'oai:directory.doabooks.org:%s' % doab_id
try:
return doab_client.getRecord(metadataPrefix='oai_dc', identifier=record_id)
except IdDoesNotExistError:
return None
def load_doab_oai(from_date, until_date, limit=100): def load_doab_oai(from_date, until_date, limit=100):
''' '''
use oai feed to get oai updates use oai feed to get oai updates

View File

@ -0,0 +1,28 @@
from django.core.management.base import BaseCommand
from django.db.models import Count,Subquery, OuterRef, IntegerField
from regluit.core.loaders.doab import get_doab_record
from regluit.core.models import Work, Identifier
class Command(BaseCommand):
help = "remove duplicate doab ids "
def handle(self, **options):
doab_works = Work.objects.annotate(
doab_count=Subquery(
Identifier.objects.filter(
type='doab',
work=OuterRef('pk')
).values('work')
.annotate(cnt=Count('pk'))
.values('cnt'),
output_field=IntegerField()
)
)
for w in doab_works.filter(doab_count__gt=1):
for ident in w.identifiers.filter(type="doab"):
record = get_doab_record(ident.value)
if not record:
self.stdout.write('removing %s' % ident.value)
ident.delete()