add a command to remove dead doabids
parent
beb76ea4bb
commit
e0c0d98c5d
|
@ -450,6 +450,14 @@ def getdoab(url):
|
|||
return id_match.group(1)
|
||||
return False
|
||||
|
||||
|
||||
def get_doab_record(doab_id):
|
||||
record_id = 'oai:directory.doabooks.org:%s' % doab_id
|
||||
try:
|
||||
return doab_client.getRecord(metadataPrefix='oai_dc', identifier=record_id)
|
||||
except IdDoesNotExistError:
|
||||
return None
|
||||
|
||||
def load_doab_oai(from_date, until_date, limit=100):
|
||||
'''
|
||||
use oai feed to get oai updates
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
from django.core.management.base import BaseCommand
|
||||
from django.db.models import Count,Subquery, OuterRef, IntegerField
|
||||
|
||||
from regluit.core.loaders.doab import get_doab_record
|
||||
from regluit.core.models import Work, Identifier
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "remove duplicate doab ids "
|
||||
|
||||
def handle(self, **options):
|
||||
doab_works = Work.objects.annotate(
|
||||
doab_count=Subquery(
|
||||
Identifier.objects.filter(
|
||||
type='doab',
|
||||
work=OuterRef('pk')
|
||||
).values('work')
|
||||
.annotate(cnt=Count('pk'))
|
||||
.values('cnt'),
|
||||
output_field=IntegerField()
|
||||
)
|
||||
)
|
||||
for w in doab_works.filter(doab_count__gt=1):
|
||||
for ident in w.identifiers.filter(type="doab"):
|
||||
record = get_doab_record(ident.value)
|
||||
if not record:
|
||||
self.stdout.write('removing %s' % ident.value)
|
||||
ident.delete()
|
Loading…
Reference in New Issue