move intended Record functionality to linkrel

reset the database, too
main
eric 2023-04-19 15:12:44 -04:00
parent 057521408b
commit 9b094d02ca
11 changed files with 43 additions and 170 deletions

View File

@ -33,17 +33,14 @@ class LinkAdmin(admin.ModelAdmin):
return mark_safe(f'<a href="{obj.url}">{obj.url}</a>')
@admin.register(models.LinkRel)
class LinkAdmin(admin.ModelAdmin):
class LinkRelAdmin(admin.ModelAdmin):
list_display = ('role', 'doab', 'url',)
readonly_fields = ('doab', 'url')
readonly_fields = ('item', 'link')
search_fields = ['link__url']
def doab(self, obj):
return mark_safe(f'<a href="/admin/doab_check/item/{obj.item.id}/">{obj.item}</a>')
def url(self, obj):
return mark_safe(f'<a href="/admin/doab_check/link/{obj.link.id}/">{obj.link.url}</a>')
@admin.register(models.Record)
class RecordAdmin(admin.ModelAdmin):
readonly_fields = ['item']

View File

@ -12,10 +12,10 @@ from oaipmh.metadata import MetadataRegistry
import requests
from .doab_utils import doab_reader
from .models import Item, Link, Record, Timestamp
from .models import Item, Link, Timestamp
DOAB_OAIURL = 'https://directory.doabooks.org/oai/request'
DOAB_PATT = re.compile(r'oai:directory\.doabooks\.org:(.*)')
DOAB_PATT = re.compile(r'oai:(directory\.doabooks\.org|doab-books):(.*)')
logger = logging.getLogger(__name__)
@ -29,10 +29,12 @@ def unlist(alist):
return None
return alist[0]
def getdoab(url):
def getdoab(url, new_ns=False):
id_match = DOAB_PATT.search(url)
if id_match:
return f'oai:doab-books:{id_match.group(1)}'
if new_ns:
return f'oai:directory.doabooks.org:{id_match.group(2)}'
return f'oai:doab-books:{id_match.group(2)}'
return False
@ -40,7 +42,7 @@ def add_by_doab(doab_id, record=None):
try:
record = record if record else doab_client.getRecord(
metadataPrefix='oai_dc',
identifier=doab_id
identifier=getdoab(doab_id, new_ns=True)
)
if record[0].isDeleted() or not record[1]:
logger.warning('record %s has no content or is deleted', record)
@ -57,7 +59,7 @@ def add_by_doab(doab_id, record=None):
publisher_name = unlist(metadata.pop('publisher', ['']))
item_type = unlist(metadata.pop('type', []))
timestamps = metadata.pop('timestamp', [])
added_record = load_doab_record(
added_item = load_doab_record(
doab_id,
title,
publisher_name,
@ -66,7 +68,7 @@ def add_by_doab(doab_id, record=None):
timestamps,
**metadata
)
return added_record
return added_item
except IdDoesNotExistError as e:
logger.error(e)
return None
@ -81,16 +83,20 @@ def load_doab_record(doab_id, title, publisher_name, item_type, urls, timestamps
new_item.publisher_name = publisher_name
new_item.resource_type = item_type
new_item.save()
new_record = Record.objects.create(item=new_item)
for timestamp in timestamps:
(new_timestamp, created) = Timestamp.objects.get_or_create(
datetime=timestamp,
record=new_record)
item=new_item)
for url in urls:
url = url.strip()
(link, created) = Link.objects.get_or_create(url=url)
link.items.add(new_item)
return new_record
for linkrel in new_item.related.filter(role='identifier'):
if linkrel.link.url in urls:
linkrel.status = 1
else:
linkrel.status = 0
return new_item
def set_deleted(record):
@ -101,6 +107,9 @@ def set_deleted(record):
item = Item.objects.get(doab=doab)
item.status = 0
item.save()
for linkrel in item.related.all():
linkrel.status = 0
linkrel.save()
return item
except Item.DoesNotExist:
logger.warning(f'no item {doab}')
@ -134,13 +143,13 @@ def load_doab_oai(from_date, until_date, limit=100):
doab = getdoab(ident)
if doab:
num_doabs += 1
rec = add_by_doab(doab, record=record)
if not rec:
item = add_by_doab(doab, record=record)
if not item:
logger.error('error for doab #%s', doab)
continue
if lasttime > start:
new_doabs += 1
title = rec.item.title
title = item.title
logger.info(u'updated:\t%s\t%s', doab, title)
if num_doabs >= limit:
break

File diff suppressed because one or more lines are too long

View File

@ -1,4 +1,4 @@
# Generated by Django 4.1.1 on 2023-02-20 18:43
# Generated by Django 4.1.7 on 2023-04-19 18:18
from django.db import migrations, models
import django.db.models.deletion
@ -19,7 +19,9 @@ class Migration(migrations.Migration):
('doab', models.CharField(max_length=40, unique=True)),
('created', models.DateTimeField(auto_now_add=True, db_index=True)),
('resource_type', models.CharField(max_length=20, null=True)),
('title', models.CharField(max_length=1000)),
('title', models.CharField(default='', max_length=1000)),
('publisher_name', models.CharField(default='', max_length=1000)),
('status', models.IntegerField(default=1)),
],
),
migrations.CreateModel(
@ -29,30 +31,24 @@ class Migration(migrations.Migration):
('url', models.URLField(max_length=1024, unique=True)),
('created', models.DateTimeField(auto_now_add=True)),
('live', models.BooleanField(default=True)),
],
),
migrations.CreateModel(
name='Record',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', models.DateTimeField(auto_now_add=True)),
('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='records', to='doab_check.item')),
('provider', models.CharField(default='', max_length=255)),
],
),
migrations.CreateModel(
name='Timestamp',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', models.DateTimeField()),
('created', models.DateTimeField(auto_now_add=True)),
('datetime', models.DateTimeField()),
('record', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='timestamps', to='doab_check.record')),
('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='timestamps', to='doab_check.item')),
],
),
migrations.CreateModel(
name='LinkRel',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('role', models.CharField(max_length=10, null=True)),
('role', models.CharField(default='identifier', max_length=10)),
('status', models.IntegerField(default=1)),
('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related', to='doab_check.item')),
('link', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related', to='doab_check.link')),
],

View File

@ -1,28 +0,0 @@
# Generated by Django 4.1.7 on 2023-02-21 00:53
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('doab_check', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='link',
name='provider',
field=models.CharField(default='', max_length=255),
),
migrations.AlterField(
model_name='linkrel',
name='role',
field=models.CharField(default='identifier', max_length=10),
),
migrations.AlterField(
model_name='timestamp',
name='created',
field=models.DateTimeField(auto_now_add=True),
),
]

View File

@ -1,18 +0,0 @@
# Generated by Django 4.1.7 on 2023-03-29 23:25
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('doab_check', '0002_link_provider_alter_linkrel_role_and_more'),
]
operations = [
migrations.AddField(
model_name='item',
name='publisher_name',
field=models.CharField(default='', max_length=1000),
preserve_default=False,
),
]

View File

@ -1,41 +0,0 @@
# Generated by Django 4.1.7 on 2023-03-30 14:19
import logging
from oaipmh.error import IdDoesNotExistError
from django.db import migrations
from doab_check.doab_oai import doab_client, unlist
logger = logging.getLogger(__name__)
class Migration(migrations.Migration):
def noop(apps, schema_editor):
pass
def getpub(apps, schema_editor):
Item = apps.get_model('doab_check', 'Item')
for item in Item.objects.all():
try:
record = doab_client.getRecord(
metadataPrefix='oai_dc',
identifier=item.doab
)
if not record[1]:
logger.error('No content in record %s', record)
return ''
metadata = record[1].getMap()
item.publisher_name = unlist(metadata.pop('publisher', ['']))
if item.publisher_name:
item.save()
except IdDoesNotExistError as e:
logger.error(e)
return ''
dependencies = [
('doab_check', '0003_item_publisher_name'),
]
operations = [
migrations.RunPython(getpub, reverse_code=noop, hints={'doab_check': 'Item'}),
]

View File

@ -1,23 +0,0 @@
# Generated by Django 4.1.7 on 2023-04-11 17:26
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('doab_check', '0004_auto_20230330_1419'),
]
operations = [
migrations.AlterField(
model_name='item',
name='publisher_name',
field=models.CharField(default='', max_length=1000),
),
migrations.AlterField(
model_name='item',
name='title',
field=models.CharField(default='', max_length=1000),
),
]

View File

@ -1,18 +0,0 @@
# Generated by Django 4.1.7 on 2023-04-18 21:58
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('doab_check', '0005_alter_item_publisher_name_alter_item_title'),
]
operations = [
migrations.AddField(
model_name='item',
name='status',
field=models.IntegerField(default=1),
),
]

View File

@ -53,19 +53,12 @@ class Timestamp(models.Model):
''' timestamp of the record returned by doab. records can have multiple timestamps '''
created = models.DateTimeField(auto_now_add=True)
datetime = models.DateTimeField()
record = models.ForeignKey("Record", related_name="timestamps", null=False,
item = models.ForeignKey("Item", related_name="timestamps", null=False,
on_delete=models.CASCADE)
def __str__(self):
return f'Record for {self.record.item} on {self.datetime}'
class Record(models.Model):
''' a harvested record '''
created = models.DateTimeField(auto_now_add=True)
item = models.ForeignKey("Item", related_name="records", on_delete=models.CASCADE)
def __str__(self):
return f'Record for {self.item} harvested on {self.created}'
class LinkRel(models.Model):
''' association between an item and a link '''
@ -73,6 +66,7 @@ class LinkRel(models.Model):
role = models.CharField(max_length=10, default='identifier')
link = models.ForeignKey("Link", related_name='related', on_delete=models.CASCADE)
item = models.ForeignKey("Item", related_name='related', on_delete=models.CASCADE)
status = models.IntegerField(default=1) # 0 if deleted
class Check(models.Model):
''' The results of a link check '''

View File

@ -29,6 +29,10 @@ class HarvestTests(TestCase):
add_by_doab(sample_doab)
item = Item.objects.get(doab=sample_doab)
self.assertTrue('Sieveking' in item.title)
urls = []
for linkrel in item.related.filter(status=1):
urls.append(linkrel.link.url)
self.assertTrue('http://library.oapen.org/handle/20.500.12657/27590' in urls)
# tweak the record to make it a delete record
record = doab_client.getRecord(
@ -39,6 +43,7 @@ class HarvestTests(TestCase):
add_by_doab(sample_doab, record=record)
item = Item.objects.get(doab=sample_doab)
self.assertTrue(item.status == 0)
self.assertTrue(item.related.filter(status=1).count() == 0)