move intended Record functionality to linkrel

reset the database, too
main
eric 2023-04-19 15:12:44 -04:00
parent 057521408b
commit 9b094d02ca
11 changed files with 43 additions and 170 deletions

View File

@ -33,17 +33,14 @@ class LinkAdmin(admin.ModelAdmin):
return mark_safe(f'<a href="{obj.url}">{obj.url}</a>') return mark_safe(f'<a href="{obj.url}">{obj.url}</a>')
@admin.register(models.LinkRel) @admin.register(models.LinkRel)
class LinkAdmin(admin.ModelAdmin): class LinkRelAdmin(admin.ModelAdmin):
list_display = ('role', 'doab', 'url',) list_display = ('role', 'doab', 'url',)
readonly_fields = ('doab', 'url') readonly_fields = ('item', 'link')
search_fields = ['link__url'] search_fields = ['link__url']
def doab(self, obj): def doab(self, obj):
return mark_safe(f'<a href="/admin/doab_check/item/{obj.item.id}/">{obj.item}</a>') return mark_safe(f'<a href="/admin/doab_check/item/{obj.item.id}/">{obj.item}</a>')
def url(self, obj): def url(self, obj):
return mark_safe(f'<a href="/admin/doab_check/link/{obj.link.id}/">{obj.link.url}</a>') return mark_safe(f'<a href="/admin/doab_check/link/{obj.link.id}/">{obj.link.url}</a>')
@admin.register(models.Record)
class RecordAdmin(admin.ModelAdmin):
readonly_fields = ['item']

View File

@ -12,10 +12,10 @@ from oaipmh.metadata import MetadataRegistry
import requests import requests
from .doab_utils import doab_reader from .doab_utils import doab_reader
from .models import Item, Link, Record, Timestamp from .models import Item, Link, Timestamp
DOAB_OAIURL = 'https://directory.doabooks.org/oai/request' DOAB_OAIURL = 'https://directory.doabooks.org/oai/request'
DOAB_PATT = re.compile(r'oai:directory\.doabooks\.org:(.*)') DOAB_PATT = re.compile(r'oai:(directory\.doabooks\.org|doab-books):(.*)')
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -29,10 +29,12 @@ def unlist(alist):
return None return None
return alist[0] return alist[0]
def getdoab(url): def getdoab(url, new_ns=False):
id_match = DOAB_PATT.search(url) id_match = DOAB_PATT.search(url)
if id_match: if id_match:
return f'oai:doab-books:{id_match.group(1)}' if new_ns:
return f'oai:directory.doabooks.org:{id_match.group(2)}'
return f'oai:doab-books:{id_match.group(2)}'
return False return False
@ -40,7 +42,7 @@ def add_by_doab(doab_id, record=None):
try: try:
record = record if record else doab_client.getRecord( record = record if record else doab_client.getRecord(
metadataPrefix='oai_dc', metadataPrefix='oai_dc',
identifier=doab_id identifier=getdoab(doab_id, new_ns=True)
) )
if record[0].isDeleted() or not record[1]: if record[0].isDeleted() or not record[1]:
logger.warning('record %s has no content or is deleted', record) logger.warning('record %s has no content or is deleted', record)
@ -57,7 +59,7 @@ def add_by_doab(doab_id, record=None):
publisher_name = unlist(metadata.pop('publisher', [''])) publisher_name = unlist(metadata.pop('publisher', ['']))
item_type = unlist(metadata.pop('type', [])) item_type = unlist(metadata.pop('type', []))
timestamps = metadata.pop('timestamp', []) timestamps = metadata.pop('timestamp', [])
added_record = load_doab_record( added_item = load_doab_record(
doab_id, doab_id,
title, title,
publisher_name, publisher_name,
@ -66,7 +68,7 @@ def add_by_doab(doab_id, record=None):
timestamps, timestamps,
**metadata **metadata
) )
return added_record return added_item
except IdDoesNotExistError as e: except IdDoesNotExistError as e:
logger.error(e) logger.error(e)
return None return None
@ -81,16 +83,20 @@ def load_doab_record(doab_id, title, publisher_name, item_type, urls, timestamps
new_item.publisher_name = publisher_name new_item.publisher_name = publisher_name
new_item.resource_type = item_type new_item.resource_type = item_type
new_item.save() new_item.save()
new_record = Record.objects.create(item=new_item)
for timestamp in timestamps: for timestamp in timestamps:
(new_timestamp, created) = Timestamp.objects.get_or_create( (new_timestamp, created) = Timestamp.objects.get_or_create(
datetime=timestamp, datetime=timestamp,
record=new_record) item=new_item)
for url in urls: for url in urls:
url = url.strip() url = url.strip()
(link, created) = Link.objects.get_or_create(url=url) (link, created) = Link.objects.get_or_create(url=url)
link.items.add(new_item) link.items.add(new_item)
return new_record for linkrel in new_item.related.filter(role='identifier'):
if linkrel.link.url in urls:
linkrel.status = 1
else:
linkrel.status = 0
return new_item
def set_deleted(record): def set_deleted(record):
@ -101,6 +107,9 @@ def set_deleted(record):
item = Item.objects.get(doab=doab) item = Item.objects.get(doab=doab)
item.status = 0 item.status = 0
item.save() item.save()
for linkrel in item.related.all():
linkrel.status = 0
linkrel.save()
return item return item
except Item.DoesNotExist: except Item.DoesNotExist:
logger.warning(f'no item {doab}') logger.warning(f'no item {doab}')
@ -134,13 +143,13 @@ def load_doab_oai(from_date, until_date, limit=100):
doab = getdoab(ident) doab = getdoab(ident)
if doab: if doab:
num_doabs += 1 num_doabs += 1
rec = add_by_doab(doab, record=record) item = add_by_doab(doab, record=record)
if not rec: if not item:
logger.error('error for doab #%s', doab) logger.error('error for doab #%s', doab)
continue continue
if lasttime > start: if lasttime > start:
new_doabs += 1 new_doabs += 1
title = rec.item.title title = item.title
logger.info(u'updated:\t%s\t%s', doab, title) logger.info(u'updated:\t%s\t%s', doab, title)
if num_doabs >= limit: if num_doabs >= limit:
break break

File diff suppressed because one or more lines are too long

View File

@ -1,4 +1,4 @@
# Generated by Django 4.1.1 on 2023-02-20 18:43 # Generated by Django 4.1.7 on 2023-04-19 18:18
from django.db import migrations, models from django.db import migrations, models
import django.db.models.deletion import django.db.models.deletion
@ -19,7 +19,9 @@ class Migration(migrations.Migration):
('doab', models.CharField(max_length=40, unique=True)), ('doab', models.CharField(max_length=40, unique=True)),
('created', models.DateTimeField(auto_now_add=True, db_index=True)), ('created', models.DateTimeField(auto_now_add=True, db_index=True)),
('resource_type', models.CharField(max_length=20, null=True)), ('resource_type', models.CharField(max_length=20, null=True)),
('title', models.CharField(max_length=1000)), ('title', models.CharField(default='', max_length=1000)),
('publisher_name', models.CharField(default='', max_length=1000)),
('status', models.IntegerField(default=1)),
], ],
), ),
migrations.CreateModel( migrations.CreateModel(
@ -29,30 +31,24 @@ class Migration(migrations.Migration):
('url', models.URLField(max_length=1024, unique=True)), ('url', models.URLField(max_length=1024, unique=True)),
('created', models.DateTimeField(auto_now_add=True)), ('created', models.DateTimeField(auto_now_add=True)),
('live', models.BooleanField(default=True)), ('live', models.BooleanField(default=True)),
], ('provider', models.CharField(default='', max_length=255)),
),
migrations.CreateModel(
name='Record',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', models.DateTimeField(auto_now_add=True)),
('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='records', to='doab_check.item')),
], ],
), ),
migrations.CreateModel( migrations.CreateModel(
name='Timestamp', name='Timestamp',
fields=[ fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', models.DateTimeField()), ('created', models.DateTimeField(auto_now_add=True)),
('datetime', models.DateTimeField()), ('datetime', models.DateTimeField()),
('record', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='timestamps', to='doab_check.record')), ('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='timestamps', to='doab_check.item')),
], ],
), ),
migrations.CreateModel( migrations.CreateModel(
name='LinkRel', name='LinkRel',
fields=[ fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('role', models.CharField(max_length=10, null=True)), ('role', models.CharField(default='identifier', max_length=10)),
('status', models.IntegerField(default=1)),
('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related', to='doab_check.item')), ('item', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related', to='doab_check.item')),
('link', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related', to='doab_check.link')), ('link', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related', to='doab_check.link')),
], ],

View File

@ -1,28 +0,0 @@
# Generated by Django 4.1.7 on 2023-02-21 00:53
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('doab_check', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='link',
name='provider',
field=models.CharField(default='', max_length=255),
),
migrations.AlterField(
model_name='linkrel',
name='role',
field=models.CharField(default='identifier', max_length=10),
),
migrations.AlterField(
model_name='timestamp',
name='created',
field=models.DateTimeField(auto_now_add=True),
),
]

View File

@ -1,18 +0,0 @@
# Generated by Django 4.1.7 on 2023-03-29 23:25
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('doab_check', '0002_link_provider_alter_linkrel_role_and_more'),
]
operations = [
migrations.AddField(
model_name='item',
name='publisher_name',
field=models.CharField(default='', max_length=1000),
preserve_default=False,
),
]

View File

@ -1,41 +0,0 @@
# Generated by Django 4.1.7 on 2023-03-30 14:19
import logging
from oaipmh.error import IdDoesNotExistError
from django.db import migrations
from doab_check.doab_oai import doab_client, unlist
logger = logging.getLogger(__name__)
class Migration(migrations.Migration):
def noop(apps, schema_editor):
pass
def getpub(apps, schema_editor):
Item = apps.get_model('doab_check', 'Item')
for item in Item.objects.all():
try:
record = doab_client.getRecord(
metadataPrefix='oai_dc',
identifier=item.doab
)
if not record[1]:
logger.error('No content in record %s', record)
return ''
metadata = record[1].getMap()
item.publisher_name = unlist(metadata.pop('publisher', ['']))
if item.publisher_name:
item.save()
except IdDoesNotExistError as e:
logger.error(e)
return ''
dependencies = [
('doab_check', '0003_item_publisher_name'),
]
operations = [
migrations.RunPython(getpub, reverse_code=noop, hints={'doab_check': 'Item'}),
]

View File

@ -1,23 +0,0 @@
# Generated by Django 4.1.7 on 2023-04-11 17:26
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('doab_check', '0004_auto_20230330_1419'),
]
operations = [
migrations.AlterField(
model_name='item',
name='publisher_name',
field=models.CharField(default='', max_length=1000),
),
migrations.AlterField(
model_name='item',
name='title',
field=models.CharField(default='', max_length=1000),
),
]

View File

@ -1,18 +0,0 @@
# Generated by Django 4.1.7 on 2023-04-18 21:58
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('doab_check', '0005_alter_item_publisher_name_alter_item_title'),
]
operations = [
migrations.AddField(
model_name='item',
name='status',
field=models.IntegerField(default=1),
),
]

View File

@ -53,19 +53,12 @@ class Timestamp(models.Model):
''' timestamp of the record returned by doab. records can have multiple timestamps ''' ''' timestamp of the record returned by doab. records can have multiple timestamps '''
created = models.DateTimeField(auto_now_add=True) created = models.DateTimeField(auto_now_add=True)
datetime = models.DateTimeField() datetime = models.DateTimeField()
record = models.ForeignKey("Record", related_name="timestamps", null=False, item = models.ForeignKey("Item", related_name="timestamps", null=False,
on_delete=models.CASCADE) on_delete=models.CASCADE)
def __str__(self): def __str__(self):
return f'Record for {self.record.item} on {self.datetime}' return f'Record for {self.record.item} on {self.datetime}'
class Record(models.Model):
''' a harvested record '''
created = models.DateTimeField(auto_now_add=True)
item = models.ForeignKey("Item", related_name="records", on_delete=models.CASCADE)
def __str__(self):
return f'Record for {self.item} harvested on {self.created}'
class LinkRel(models.Model): class LinkRel(models.Model):
''' association between an item and a link ''' ''' association between an item and a link '''
@ -73,6 +66,7 @@ class LinkRel(models.Model):
role = models.CharField(max_length=10, default='identifier') role = models.CharField(max_length=10, default='identifier')
link = models.ForeignKey("Link", related_name='related', on_delete=models.CASCADE) link = models.ForeignKey("Link", related_name='related', on_delete=models.CASCADE)
item = models.ForeignKey("Item", related_name='related', on_delete=models.CASCADE) item = models.ForeignKey("Item", related_name='related', on_delete=models.CASCADE)
status = models.IntegerField(default=1) # 0 if deleted
class Check(models.Model): class Check(models.Model):
''' The results of a link check ''' ''' The results of a link check '''

View File

@ -29,6 +29,10 @@ class HarvestTests(TestCase):
add_by_doab(sample_doab) add_by_doab(sample_doab)
item = Item.objects.get(doab=sample_doab) item = Item.objects.get(doab=sample_doab)
self.assertTrue('Sieveking' in item.title) self.assertTrue('Sieveking' in item.title)
urls = []
for linkrel in item.related.filter(status=1):
urls.append(linkrel.link.url)
self.assertTrue('http://library.oapen.org/handle/20.500.12657/27590' in urls)
# tweak the record to make it a delete record # tweak the record to make it a delete record
record = doab_client.getRecord( record = doab_client.getRecord(
@ -39,6 +43,7 @@ class HarvestTests(TestCase):
add_by_doab(sample_doab, record=record) add_by_doab(sample_doab, record=record)
item = Item.objects.get(doab=sample_doab) item = Item.objects.get(doab=sample_doab)
self.assertTrue(item.status == 0) self.assertTrue(item.status == 0)
self.assertTrue(item.related.filter(status=1).count() == 0)