add a command to remove dead doabids
parent
beb76ea4bb
commit
e0c0d98c5d
|
@ -450,6 +450,14 @@ def getdoab(url):
|
||||||
return id_match.group(1)
|
return id_match.group(1)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_doab_record(doab_id):
|
||||||
|
record_id = 'oai:directory.doabooks.org:%s' % doab_id
|
||||||
|
try:
|
||||||
|
return doab_client.getRecord(metadataPrefix='oai_dc', identifier=record_id)
|
||||||
|
except IdDoesNotExistError:
|
||||||
|
return None
|
||||||
|
|
||||||
def load_doab_oai(from_date, until_date, limit=100):
|
def load_doab_oai(from_date, until_date, limit=100):
|
||||||
'''
|
'''
|
||||||
use oai feed to get oai updates
|
use oai feed to get oai updates
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from django.db.models import Count,Subquery, OuterRef, IntegerField
|
||||||
|
|
||||||
|
from regluit.core.loaders.doab import get_doab_record
|
||||||
|
from regluit.core.models import Work, Identifier
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = "remove duplicate doab ids "
|
||||||
|
|
||||||
|
def handle(self, **options):
|
||||||
|
doab_works = Work.objects.annotate(
|
||||||
|
doab_count=Subquery(
|
||||||
|
Identifier.objects.filter(
|
||||||
|
type='doab',
|
||||||
|
work=OuterRef('pk')
|
||||||
|
).values('work')
|
||||||
|
.annotate(cnt=Count('pk'))
|
||||||
|
.values('cnt'),
|
||||||
|
output_field=IntegerField()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for w in doab_works.filter(doab_count__gt=1):
|
||||||
|
for ident in w.identifiers.filter(type="doab"):
|
||||||
|
record = get_doab_record(ident.value)
|
||||||
|
if not record:
|
||||||
|
self.stdout.write('removing %s' % ident.value)
|
||||||
|
ident.delete()
|
Loading…
Reference in New Issue