parent
057521408b
commit
9b094d02ca
|
@ -33,17 +33,14 @@ class LinkAdmin(admin.ModelAdmin):
|
||||||
return mark_safe(f'<a href="{obj.url}">{obj.url}</a>')
|
return mark_safe(f'<a href="{obj.url}">{obj.url}</a>')
|
||||||
|
|
||||||
@admin.register(models.LinkRel)
|
@admin.register(models.LinkRel)
|
||||||
class LinkAdmin(admin.ModelAdmin):
|
class LinkRelAdmin(admin.ModelAdmin):
|
||||||
list_display = ('role', 'doab', 'url',)
|
list_display = ('role', 'doab', 'url',)
|
||||||
readonly_fields = ('doab', 'url')
|
readonly_fields = ('item', 'link')
|
||||||
search_fields = ['link__url']
|
search_fields = ['link__url']
|
||||||
def doab(self, obj):
|
def doab(self, obj):
|
||||||
return mark_safe(f'<a href="/admin/doab_check/item/{obj.item.id}/">{obj.item}</a>')
|
return mark_safe(f'<a href="/admin/doab_check/item/{obj.item.id}/">{obj.item}</a>')
|
||||||
def url(self, obj):
|
def url(self, obj):
|
||||||
return mark_safe(f'<a href="/admin/doab_check/link/{obj.link.id}/">{obj.link.url}</a>')
|
return mark_safe(f'<a href="/admin/doab_check/link/{obj.link.id}/">{obj.link.url}</a>')
|
||||||
|
|
||||||
@admin.register(models.Record)
|
|
||||||
class RecordAdmin(admin.ModelAdmin):
|
|
||||||
readonly_fields = ['item']
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,10 +12,10 @@ from oaipmh.metadata import MetadataRegistry
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from .doab_utils import doab_reader
|
from .doab_utils import doab_reader
|
||||||
from .models import Item, Link, Record, Timestamp
|
from .models import Item, Link, Timestamp
|
||||||
|
|
||||||
DOAB_OAIURL = 'https://directory.doabooks.org/oai/request'
|
DOAB_OAIURL = 'https://directory.doabooks.org/oai/request'
|
||||||
DOAB_PATT = re.compile(r'oai:directory\.doabooks\.org:(.*)')
|
DOAB_PATT = re.compile(r'oai:(directory\.doabooks\.org|doab-books):(.*)')
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -29,10 +29,12 @@ def unlist(alist):
|
||||||
return None
|
return None
|
||||||
return alist[0]
|
return alist[0]
|
||||||
|
|
||||||
def getdoab(url):
|
def getdoab(url, new_ns=False):
|
||||||
id_match = DOAB_PATT.search(url)
|
id_match = DOAB_PATT.search(url)
|
||||||
if id_match:
|
if id_match:
|
||||||
return f'oai:doab-books:{id_match.group(1)}'
|
if new_ns:
|
||||||
|
return f'oai:directory.doabooks.org:{id_match.group(2)}'
|
||||||
|
return f'oai:doab-books:{id_match.group(2)}'
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,7 +42,7 @@ def add_by_doab(doab_id, record=None):
|
||||||
try:
|
try:
|
||||||
record = record if record else doab_client.getRecord(
|
record = record if record else doab_client.getRecord(
|
||||||
metadataPrefix='oai_dc',
|
metadataPrefix='oai_dc',
|
||||||
identifier=doab_id
|
identifier=getdoab(doab_id, new_ns=True)
|
||||||
)
|
)
|
||||||
if record[0].isDeleted() or not record[1]:
|
if record[0].isDeleted() or not record[1]:
|
||||||
logger.warning('record %s has no content or is deleted', record)
|
logger.warning('record %s has no content or is deleted', record)
|
||||||
|
@ -57,7 +59,7 @@ def add_by_doab(doab_id, record=None):
|
||||||
publisher_name = unlist(metadata.pop('publisher', ['']))
|
publisher_name = unlist(metadata.pop('publisher', ['']))
|
||||||
item_type = unlist(metadata.pop('type', []))
|
item_type = unlist(metadata.pop('type', []))
|
||||||
timestamps = metadata.pop('timestamp', [])
|
timestamps = metadata.pop('timestamp', [])
|
||||||
added_record = load_doab_record(
|
added_item = load_doab_record(
|
||||||
doab_id,
|
doab_id,
|
||||||
title,
|
title,
|
||||||
publisher_name,
|
publisher_name,
|
||||||
|
@ -66,7 +68,7 @@ def add_by_doab(doab_id, record=None):
|
||||||
timestamps,
|
timestamps,
|
||||||
**metadata
|
**metadata
|
||||||
)
|
)
|
||||||
return added_record
|
return added_item
|
||||||
except IdDoesNotExistError as e:
|
except IdDoesNotExistError as e:
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
return None
|
return None
|
||||||
|
@ -81,16 +83,20 @@ def load_doab_record(doab_id, title, publisher_name, item_type, urls, timestamps
|
||||||
new_item.publisher_name = publisher_name
|
new_item.publisher_name = publisher_name
|
||||||
new_item.resource_type = item_type
|
new_item.resource_type = item_type
|
||||||
new_item.save()
|
new_item.save()
|
||||||
new_record = Record.objects.create(item=new_item)
|
|
||||||
for timestamp in timestamps:
|
for timestamp in timestamps:
|
||||||
(new_timestamp, created) = Timestamp.objects.get_or_create(
|
(new_timestamp, created) = Timestamp.objects.get_or_create(
|
||||||
datetime=timestamp,
|
datetime=timestamp,
|
||||||
record=new_record)
|
item=new_item)
|
||||||
for url in urls:
|
for url in urls:
|
||||||
url = url.strip()
|
url = url.strip()
|
||||||
(link, created) = Link.objects.get_or_create(url=url)
|
(link, created) = Link.objects.get_or_create(url=url)
|
||||||
link.items.add(new_item)
|
link.items.add(new_item)
|
||||||
return new_record
|
for linkrel in new_item.related.filter(role='identifier'):
|
||||||
|
if linkrel.link.url in urls:
|
||||||
|
linkrel.status = 1
|
||||||
|
else:
|
||||||
|
linkrel.status = 0
|
||||||
|
return new_item
|
||||||
|
|
||||||
|
|
||||||
def set_deleted(record):
|
def set_deleted(record):
|
||||||
|
@ -101,6 +107,9 @@ def set_deleted(record):
|
||||||
item = Item.objects.get(doab=doab)
|
item = Item.objects.get(doab=doab)
|
||||||
item.status = 0
|
item.status = 0
|
||||||
item.save()
|
item.save()
|
||||||
|
for linkrel in item.related.all():
|
||||||
|
linkrel.status = 0
|
||||||
|
linkrel.save()
|
||||||
return item
|
return item
|
||||||
except Item.DoesNotExist:
|
except Item.DoesNotExist:
|
||||||
logger.warning(f'no item {doab}')
|
logger.warning(f'no item {doab}')
|
||||||
|
@ -134,13 +143,13 @@ def load_doab_oai(from_date, until_date, limit=100):
|
||||||
doab = getdoab(ident)
|
doab = getdoab(ident)
|
||||||
if doab:
|
if doab:
|
||||||
num_doabs += 1
|
num_doabs += 1
|
||||||
rec = add_by_doab(doab, record=record)
|
item = add_by_doab(doab, record=record)
|
||||||
if not rec:
|
if not item:
|
||||||
logger.error('error for doab #%s', doab)
|
logger.error('error for doab #%s', doab)
|
||||||
continue
|
continue
|
||||||
if lasttime > start:
|
if lasttime > start:
|
||||||
new_doabs += 1
|
new_doabs += 1
|
||||||
title = rec.item.title
|
title = item.title
|
||||||
logger.info(u'updated:\t%s\t%s', doab, title)
|
logger.info(u'updated:\t%s\t%s', doab, title)
|
||||||
if num_doabs >= limit:
|
if num_doabs >= limit:
|
||||||
break
|
break
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,4 +1,4 @@
|
||||||
# Generated by Django 4.1.1 on 2023-02-20 18:43
|
# Generated by Django 4.1.7 on 2023-04-19 18:18
|
||||||
|
|
||||||
from django.db import migrations, models
|
from django.db import migrations, models
|
||||||
import django.db.models.deletion
|
import django.db.models.deletion
|
||||||
|
@ -19,7 +19,9 @@ class Migration(migrations.Migration):
|
||||||
('doab', models.CharField(max_length=40, unique=True)),
|
('doab', models.CharField(max_length=40, unique=True)),
|
||||||
('created', models.DateTimeField(auto_now_add=True, db_index=True)),
|
('created', models.DateTimeField(auto_now_add=True, db_index=True)),
|
||||||
('resource_type', models.CharField(max_length=20, null=True)),
|
('resource_type', models.CharField(max_length=20, null=True)),
|
||||||
('title', models.CharField(max_length=1000)),
|
('title', models.CharField(default='', max_length=1000)),
|
||||||
|
('publisher_name', models.CharField(default='', max_length=1000)),
|
||||||
|
('status', models.IntegerField(default=1)),
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
|
@ -29,30 +31,24 @@ class Migration(migrations.Migration):
|
||||||
('url', models.URLField(max_length=1024, unique=True)),
|
('url', models.URLField(max_length=1024, unique=True)),
|
||||||
('created', models.DateTimeField(auto_now_add=True)),
|
('created', models.DateTimeField(auto_now_add=True)),
|
||||||
('live', models.BooleanField(default=True)),
|
('live', models.BooleanField(default=True)),
|
||||||
],
|
('provider', models.CharField(default='', max_length=255)),
|
||||||
),
|
|
||||||
migrations.CreateModel(
|
|
||||||
name='Record',
|
|
||||||
fields=[
|
|
||||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
|
||||||
('created', models.DateTimeField(auto_now_add=True)),
|
|
||||||
('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='records', to='doab_check.item')),
|
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name='Timestamp',
|
name='Timestamp',
|
||||||
fields=[
|
fields=[
|
||||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
('created', models.DateTimeField()),
|
('created', models.DateTimeField(auto_now_add=True)),
|
||||||
('datetime', models.DateTimeField()),
|
('datetime', models.DateTimeField()),
|
||||||
('record', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='timestamps', to='doab_check.record')),
|
('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='timestamps', to='doab_check.item')),
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name='LinkRel',
|
name='LinkRel',
|
||||||
fields=[
|
fields=[
|
||||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
('role', models.CharField(max_length=10, null=True)),
|
('role', models.CharField(default='identifier', max_length=10)),
|
||||||
|
('status', models.IntegerField(default=1)),
|
||||||
('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related', to='doab_check.item')),
|
('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related', to='doab_check.item')),
|
||||||
('link', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related', to='doab_check.link')),
|
('link', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related', to='doab_check.link')),
|
||||||
],
|
],
|
||||||
|
|
|
@ -1,28 +0,0 @@
|
||||||
# Generated by Django 4.1.7 on 2023-02-21 00:53
|
|
||||||
|
|
||||||
from django.db import migrations, models
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
('doab_check', '0001_initial'),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.AddField(
|
|
||||||
model_name='link',
|
|
||||||
name='provider',
|
|
||||||
field=models.CharField(default='', max_length=255),
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='linkrel',
|
|
||||||
name='role',
|
|
||||||
field=models.CharField(default='identifier', max_length=10),
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='timestamp',
|
|
||||||
name='created',
|
|
||||||
field=models.DateTimeField(auto_now_add=True),
|
|
||||||
),
|
|
||||||
]
|
|
|
@ -1,18 +0,0 @@
|
||||||
# Generated by Django 4.1.7 on 2023-03-29 23:25
|
|
||||||
from django.db import migrations, models
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
('doab_check', '0002_link_provider_alter_linkrel_role_and_more'),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.AddField(
|
|
||||||
model_name='item',
|
|
||||||
name='publisher_name',
|
|
||||||
field=models.CharField(default='', max_length=1000),
|
|
||||||
preserve_default=False,
|
|
||||||
),
|
|
||||||
|
|
||||||
]
|
|
|
@ -1,41 +0,0 @@
|
||||||
# Generated by Django 4.1.7 on 2023-03-30 14:19
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from oaipmh.error import IdDoesNotExistError
|
|
||||||
|
|
||||||
from django.db import migrations
|
|
||||||
from doab_check.doab_oai import doab_client, unlist
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
def noop(apps, schema_editor):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def getpub(apps, schema_editor):
|
|
||||||
Item = apps.get_model('doab_check', 'Item')
|
|
||||||
for item in Item.objects.all():
|
|
||||||
try:
|
|
||||||
record = doab_client.getRecord(
|
|
||||||
metadataPrefix='oai_dc',
|
|
||||||
identifier=item.doab
|
|
||||||
)
|
|
||||||
if not record[1]:
|
|
||||||
logger.error('No content in record %s', record)
|
|
||||||
return ''
|
|
||||||
metadata = record[1].getMap()
|
|
||||||
item.publisher_name = unlist(metadata.pop('publisher', ['']))
|
|
||||||
if item.publisher_name:
|
|
||||||
item.save()
|
|
||||||
except IdDoesNotExistError as e:
|
|
||||||
logger.error(e)
|
|
||||||
return ''
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
('doab_check', '0003_item_publisher_name'),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.RunPython(getpub, reverse_code=noop, hints={'doab_check': 'Item'}),
|
|
||||||
]
|
|
|
@ -1,23 +0,0 @@
|
||||||
# Generated by Django 4.1.7 on 2023-04-11 17:26
|
|
||||||
|
|
||||||
from django.db import migrations, models
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
('doab_check', '0004_auto_20230330_1419'),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='item',
|
|
||||||
name='publisher_name',
|
|
||||||
field=models.CharField(default='', max_length=1000),
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='item',
|
|
||||||
name='title',
|
|
||||||
field=models.CharField(default='', max_length=1000),
|
|
||||||
),
|
|
||||||
]
|
|
|
@ -1,18 +0,0 @@
|
||||||
# Generated by Django 4.1.7 on 2023-04-18 21:58
|
|
||||||
|
|
||||||
from django.db import migrations, models
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
('doab_check', '0005_alter_item_publisher_name_alter_item_title'),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.AddField(
|
|
||||||
model_name='item',
|
|
||||||
name='status',
|
|
||||||
field=models.IntegerField(default=1),
|
|
||||||
),
|
|
||||||
]
|
|
|
@ -53,19 +53,12 @@ class Timestamp(models.Model):
|
||||||
''' timestamp of the record returned by doab. records can have multiple timestamps '''
|
''' timestamp of the record returned by doab. records can have multiple timestamps '''
|
||||||
created = models.DateTimeField(auto_now_add=True)
|
created = models.DateTimeField(auto_now_add=True)
|
||||||
datetime = models.DateTimeField()
|
datetime = models.DateTimeField()
|
||||||
record = models.ForeignKey("Record", related_name="timestamps", null=False,
|
item = models.ForeignKey("Item", related_name="timestamps", null=False,
|
||||||
on_delete=models.CASCADE)
|
on_delete=models.CASCADE)
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return f'Record for {self.record.item} on {self.datetime}'
|
return f'Record for {self.record.item} on {self.datetime}'
|
||||||
|
|
||||||
|
|
||||||
class Record(models.Model):
|
|
||||||
''' a harvested record '''
|
|
||||||
created = models.DateTimeField(auto_now_add=True)
|
|
||||||
item = models.ForeignKey("Item", related_name="records", on_delete=models.CASCADE)
|
|
||||||
def __str__(self):
|
|
||||||
return f'Record for {self.item} harvested on {self.created}'
|
|
||||||
|
|
||||||
|
|
||||||
class LinkRel(models.Model):
|
class LinkRel(models.Model):
|
||||||
''' association between an item and a link '''
|
''' association between an item and a link '''
|
||||||
|
@ -73,6 +66,7 @@ class LinkRel(models.Model):
|
||||||
role = models.CharField(max_length=10, default='identifier')
|
role = models.CharField(max_length=10, default='identifier')
|
||||||
link = models.ForeignKey("Link", related_name='related', on_delete=models.CASCADE)
|
link = models.ForeignKey("Link", related_name='related', on_delete=models.CASCADE)
|
||||||
item = models.ForeignKey("Item", related_name='related', on_delete=models.CASCADE)
|
item = models.ForeignKey("Item", related_name='related', on_delete=models.CASCADE)
|
||||||
|
status = models.IntegerField(default=1) # 0 if deleted
|
||||||
|
|
||||||
class Check(models.Model):
|
class Check(models.Model):
|
||||||
''' The results of a link check '''
|
''' The results of a link check '''
|
||||||
|
|
|
@ -29,6 +29,10 @@ class HarvestTests(TestCase):
|
||||||
add_by_doab(sample_doab)
|
add_by_doab(sample_doab)
|
||||||
item = Item.objects.get(doab=sample_doab)
|
item = Item.objects.get(doab=sample_doab)
|
||||||
self.assertTrue('Sieveking' in item.title)
|
self.assertTrue('Sieveking' in item.title)
|
||||||
|
urls = []
|
||||||
|
for linkrel in item.related.filter(status=1):
|
||||||
|
urls.append(linkrel.link.url)
|
||||||
|
self.assertTrue('http://library.oapen.org/handle/20.500.12657/27590' in urls)
|
||||||
|
|
||||||
# tweak the record to make it a delete record
|
# tweak the record to make it a delete record
|
||||||
record = doab_client.getRecord(
|
record = doab_client.getRecord(
|
||||||
|
@ -39,6 +43,7 @@ class HarvestTests(TestCase):
|
||||||
add_by_doab(sample_doab, record=record)
|
add_by_doab(sample_doab, record=record)
|
||||||
item = Item.objects.get(doab=sample_doab)
|
item = Item.objects.get(doab=sample_doab)
|
||||||
self.assertTrue(item.status == 0)
|
self.assertTrue(item.status == 0)
|
||||||
|
self.assertTrue(item.related.filter(status=1).count() == 0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue