parent
057521408b
commit
9b094d02ca
|
@ -33,17 +33,14 @@ class LinkAdmin(admin.ModelAdmin):
|
|||
return mark_safe(f'<a href="{obj.url}">{obj.url}</a>')
|
||||
|
||||
@admin.register(models.LinkRel)
|
||||
class LinkAdmin(admin.ModelAdmin):
|
||||
class LinkRelAdmin(admin.ModelAdmin):
|
||||
list_display = ('role', 'doab', 'url',)
|
||||
readonly_fields = ('doab', 'url')
|
||||
readonly_fields = ('item', 'link')
|
||||
search_fields = ['link__url']
|
||||
def doab(self, obj):
|
||||
return mark_safe(f'<a href="/admin/doab_check/item/{obj.item.id}/">{obj.item}</a>')
|
||||
def url(self, obj):
|
||||
return mark_safe(f'<a href="/admin/doab_check/link/{obj.link.id}/">{obj.link.url}</a>')
|
||||
|
||||
@admin.register(models.Record)
|
||||
class RecordAdmin(admin.ModelAdmin):
|
||||
readonly_fields = ['item']
|
||||
|
||||
|
||||
|
|
|
@ -12,10 +12,10 @@ from oaipmh.metadata import MetadataRegistry
|
|||
import requests
|
||||
|
||||
from .doab_utils import doab_reader
|
||||
from .models import Item, Link, Record, Timestamp
|
||||
from .models import Item, Link, Timestamp
|
||||
|
||||
DOAB_OAIURL = 'https://directory.doabooks.org/oai/request'
|
||||
DOAB_PATT = re.compile(r'oai:directory\.doabooks\.org:(.*)')
|
||||
DOAB_PATT = re.compile(r'oai:(directory\.doabooks\.org|doab-books):(.*)')
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -29,10 +29,12 @@ def unlist(alist):
|
|||
return None
|
||||
return alist[0]
|
||||
|
||||
def getdoab(url):
|
||||
def getdoab(url, new_ns=False):
|
||||
id_match = DOAB_PATT.search(url)
|
||||
if id_match:
|
||||
return f'oai:doab-books:{id_match.group(1)}'
|
||||
if new_ns:
|
||||
return f'oai:directory.doabooks.org:{id_match.group(2)}'
|
||||
return f'oai:doab-books:{id_match.group(2)}'
|
||||
return False
|
||||
|
||||
|
||||
|
@ -40,7 +42,7 @@ def add_by_doab(doab_id, record=None):
|
|||
try:
|
||||
record = record if record else doab_client.getRecord(
|
||||
metadataPrefix='oai_dc',
|
||||
identifier=doab_id
|
||||
identifier=getdoab(doab_id, new_ns=True)
|
||||
)
|
||||
if record[0].isDeleted() or not record[1]:
|
||||
logger.warning('record %s has no content or is deleted', record)
|
||||
|
@ -57,7 +59,7 @@ def add_by_doab(doab_id, record=None):
|
|||
publisher_name = unlist(metadata.pop('publisher', ['']))
|
||||
item_type = unlist(metadata.pop('type', []))
|
||||
timestamps = metadata.pop('timestamp', [])
|
||||
added_record = load_doab_record(
|
||||
added_item = load_doab_record(
|
||||
doab_id,
|
||||
title,
|
||||
publisher_name,
|
||||
|
@ -66,7 +68,7 @@ def add_by_doab(doab_id, record=None):
|
|||
timestamps,
|
||||
**metadata
|
||||
)
|
||||
return added_record
|
||||
return added_item
|
||||
except IdDoesNotExistError as e:
|
||||
logger.error(e)
|
||||
return None
|
||||
|
@ -81,16 +83,20 @@ def load_doab_record(doab_id, title, publisher_name, item_type, urls, timestamps
|
|||
new_item.publisher_name = publisher_name
|
||||
new_item.resource_type = item_type
|
||||
new_item.save()
|
||||
new_record = Record.objects.create(item=new_item)
|
||||
for timestamp in timestamps:
|
||||
(new_timestamp, created) = Timestamp.objects.get_or_create(
|
||||
datetime=timestamp,
|
||||
record=new_record)
|
||||
item=new_item)
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
(link, created) = Link.objects.get_or_create(url=url)
|
||||
link.items.add(new_item)
|
||||
return new_record
|
||||
for linkrel in new_item.related.filter(role='identifier'):
|
||||
if linkrel.link.url in urls:
|
||||
linkrel.status = 1
|
||||
else:
|
||||
linkrel.status = 0
|
||||
return new_item
|
||||
|
||||
|
||||
def set_deleted(record):
|
||||
|
@ -101,6 +107,9 @@ def set_deleted(record):
|
|||
item = Item.objects.get(doab=doab)
|
||||
item.status = 0
|
||||
item.save()
|
||||
for linkrel in item.related.all():
|
||||
linkrel.status = 0
|
||||
linkrel.save()
|
||||
return item
|
||||
except Item.DoesNotExist:
|
||||
logger.warning(f'no item {doab}')
|
||||
|
@ -134,13 +143,13 @@ def load_doab_oai(from_date, until_date, limit=100):
|
|||
doab = getdoab(ident)
|
||||
if doab:
|
||||
num_doabs += 1
|
||||
rec = add_by_doab(doab, record=record)
|
||||
if not rec:
|
||||
item = add_by_doab(doab, record=record)
|
||||
if not item:
|
||||
logger.error('error for doab #%s', doab)
|
||||
continue
|
||||
if lasttime > start:
|
||||
new_doabs += 1
|
||||
title = rec.item.title
|
||||
title = item.title
|
||||
logger.info(u'updated:\t%s\t%s', doab, title)
|
||||
if num_doabs >= limit:
|
||||
break
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,4 +1,4 @@
|
|||
# Generated by Django 4.1.1 on 2023-02-20 18:43
|
||||
# Generated by Django 4.1.7 on 2023-04-19 18:18
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
@ -19,7 +19,9 @@ class Migration(migrations.Migration):
|
|||
('doab', models.CharField(max_length=40, unique=True)),
|
||||
('created', models.DateTimeField(auto_now_add=True, db_index=True)),
|
||||
('resource_type', models.CharField(max_length=20, null=True)),
|
||||
('title', models.CharField(max_length=1000)),
|
||||
('title', models.CharField(default='', max_length=1000)),
|
||||
('publisher_name', models.CharField(default='', max_length=1000)),
|
||||
('status', models.IntegerField(default=1)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
|
@ -29,30 +31,24 @@ class Migration(migrations.Migration):
|
|||
('url', models.URLField(max_length=1024, unique=True)),
|
||||
('created', models.DateTimeField(auto_now_add=True)),
|
||||
('live', models.BooleanField(default=True)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Record',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('created', models.DateTimeField(auto_now_add=True)),
|
||||
('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='records', to='doab_check.item')),
|
||||
('provider', models.CharField(default='', max_length=255)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Timestamp',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('created', models.DateTimeField()),
|
||||
('created', models.DateTimeField(auto_now_add=True)),
|
||||
('datetime', models.DateTimeField()),
|
||||
('record', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='timestamps', to='doab_check.record')),
|
||||
('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='timestamps', to='doab_check.item')),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='LinkRel',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('role', models.CharField(max_length=10, null=True)),
|
||||
('role', models.CharField(default='identifier', max_length=10)),
|
||||
('status', models.IntegerField(default=1)),
|
||||
('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related', to='doab_check.item')),
|
||||
('link', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related', to='doab_check.link')),
|
||||
],
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
# Generated by Django 4.1.7 on 2023-02-21 00:53
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('doab_check', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='link',
|
||||
name='provider',
|
||||
field=models.CharField(default='', max_length=255),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='linkrel',
|
||||
name='role',
|
||||
field=models.CharField(default='identifier', max_length=10),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='timestamp',
|
||||
name='created',
|
||||
field=models.DateTimeField(auto_now_add=True),
|
||||
),
|
||||
]
|
|
@ -1,18 +0,0 @@
|
|||
# Generated by Django 4.1.7 on 2023-03-29 23:25
|
||||
from django.db import migrations, models
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('doab_check', '0002_link_provider_alter_linkrel_role_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='item',
|
||||
name='publisher_name',
|
||||
field=models.CharField(default='', max_length=1000),
|
||||
preserve_default=False,
|
||||
),
|
||||
|
||||
]
|
|
@ -1,41 +0,0 @@
|
|||
# Generated by Django 4.1.7 on 2023-03-30 14:19
|
||||
import logging
|
||||
|
||||
from oaipmh.error import IdDoesNotExistError
|
||||
|
||||
from django.db import migrations
|
||||
from doab_check.doab_oai import doab_client, unlist
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
def noop(apps, schema_editor):
|
||||
pass
|
||||
|
||||
def getpub(apps, schema_editor):
|
||||
Item = apps.get_model('doab_check', 'Item')
|
||||
for item in Item.objects.all():
|
||||
try:
|
||||
record = doab_client.getRecord(
|
||||
metadataPrefix='oai_dc',
|
||||
identifier=item.doab
|
||||
)
|
||||
if not record[1]:
|
||||
logger.error('No content in record %s', record)
|
||||
return ''
|
||||
metadata = record[1].getMap()
|
||||
item.publisher_name = unlist(metadata.pop('publisher', ['']))
|
||||
if item.publisher_name:
|
||||
item.save()
|
||||
except IdDoesNotExistError as e:
|
||||
logger.error(e)
|
||||
return ''
|
||||
|
||||
dependencies = [
|
||||
('doab_check', '0003_item_publisher_name'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(getpub, reverse_code=noop, hints={'doab_check': 'Item'}),
|
||||
]
|
|
@ -1,23 +0,0 @@
|
|||
# Generated by Django 4.1.7 on 2023-04-11 17:26
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('doab_check', '0004_auto_20230330_1419'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='item',
|
||||
name='publisher_name',
|
||||
field=models.CharField(default='', max_length=1000),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='item',
|
||||
name='title',
|
||||
field=models.CharField(default='', max_length=1000),
|
||||
),
|
||||
]
|
|
@ -1,18 +0,0 @@
|
|||
# Generated by Django 4.1.7 on 2023-04-18 21:58
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('doab_check', '0005_alter_item_publisher_name_alter_item_title'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='item',
|
||||
name='status',
|
||||
field=models.IntegerField(default=1),
|
||||
),
|
||||
]
|
|
@ -53,19 +53,12 @@ class Timestamp(models.Model):
|
|||
''' timestamp of the record returned by doab. records can have multiple timestamps '''
|
||||
created = models.DateTimeField(auto_now_add=True)
|
||||
datetime = models.DateTimeField()
|
||||
record = models.ForeignKey("Record", related_name="timestamps", null=False,
|
||||
item = models.ForeignKey("Item", related_name="timestamps", null=False,
|
||||
on_delete=models.CASCADE)
|
||||
def __str__(self):
|
||||
return f'Record for {self.record.item} on {self.datetime}'
|
||||
return f'Record for {self.record.item} on {self.datetime}'
|
||||
|
||||
|
||||
class Record(models.Model):
|
||||
''' a harvested record '''
|
||||
created = models.DateTimeField(auto_now_add=True)
|
||||
item = models.ForeignKey("Item", related_name="records", on_delete=models.CASCADE)
|
||||
def __str__(self):
|
||||
return f'Record for {self.item} harvested on {self.created}'
|
||||
|
||||
|
||||
class LinkRel(models.Model):
|
||||
''' association between an item and a link '''
|
||||
|
@ -73,6 +66,7 @@ class LinkRel(models.Model):
|
|||
role = models.CharField(max_length=10, default='identifier')
|
||||
link = models.ForeignKey("Link", related_name='related', on_delete=models.CASCADE)
|
||||
item = models.ForeignKey("Item", related_name='related', on_delete=models.CASCADE)
|
||||
status = models.IntegerField(default=1) # 0 if deleted
|
||||
|
||||
class Check(models.Model):
|
||||
''' The results of a link check '''
|
||||
|
|
|
@ -29,7 +29,11 @@ class HarvestTests(TestCase):
|
|||
add_by_doab(sample_doab)
|
||||
item = Item.objects.get(doab=sample_doab)
|
||||
self.assertTrue('Sieveking' in item.title)
|
||||
|
||||
urls = []
|
||||
for linkrel in item.related.filter(status=1):
|
||||
urls.append(linkrel.link.url)
|
||||
self.assertTrue('http://library.oapen.org/handle/20.500.12657/27590' in urls)
|
||||
|
||||
# tweak the record to make it a delete record
|
||||
record = doab_client.getRecord(
|
||||
metadataPrefix='oai_dc',
|
||||
|
@ -39,6 +43,7 @@ class HarvestTests(TestCase):
|
|||
add_by_doab(sample_doab, record=record)
|
||||
item = Item.objects.get(doab=sample_doab)
|
||||
self.assertTrue(item.status == 0)
|
||||
self.assertTrue(item.related.filter(status=1).count() == 0)
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue