From e92fb443655ca1ab06a44f1c7c04a440932e0955 Mon Sep 17 00:00:00 2001 From: eric Date: Wed, 25 Oct 2023 17:32:04 -0400 Subject: [PATCH] fix live not being set to false when link removed --- doab_check/doab_oai.py | 4 +++ doab_check/management/commands/dump_checks.py | 4 +-- .../management/commands/kill_dead_links.py | 19 ++++++++++++++ doab_check/models.py | 6 +++++ doab_check/views.py | 26 +++++++++---------- 5 files changed, 44 insertions(+), 15 deletions(-) create mode 100644 doab_check/management/commands/kill_dead_links.py diff --git a/doab_check/doab_oai.py b/doab_check/doab_oai.py index 28b67f0..eeed09b 100644 --- a/doab_check/doab_oai.py +++ b/doab_check/doab_oai.py @@ -101,6 +101,9 @@ def load_doab_record(doab_id, title, publisher_name, item_type, urls, timestamps linkrel.status = 1 else: linkrel.status = 0 + linkrel.save() + linkrel.link.save() + return new_item @@ -115,6 +118,7 @@ def set_deleted(record): for linkrel in item.related.all(): linkrel.status = 0 linkrel.save() + linkrel.link.save() return item except Item.DoesNotExist: logger.warning(f'no item {doab}') diff --git a/doab_check/management/commands/dump_checks.py b/doab_check/management/commands/dump_checks.py index c7eefa4..fd3f37b 100644 --- a/doab_check/management/commands/dump_checks.py +++ b/doab_check/management/commands/dump_checks.py @@ -18,7 +18,7 @@ class Command(BaseCommand): def check_data(self, item): link_dict = {'doab': item.doab} - for link in item.links.filter(live=True): + for link in item.links.filter(related__status=1): link_dict['url'] = link.url if link.recent_check: link_dict['checked'] = link.recent_check.created @@ -44,5 +44,5 @@ class Command(BaseCommand): end_time = datetime.datetime.now() logger.info(f'wrote {num} link checks in {end_time - start_time}') - self.stdout.write(f'wrote {num} link checks in {end_time - start_time}') + self.stdout.write(f'wrote link checks for {num} items in {end_time - start_time}') diff --git a/doab_check/management/commands/kill_dead_links.py b/doab_check/management/commands/kill_dead_links.py new file mode 100644 index 0000000..b84e3b7 --- /dev/null +++ b/doab_check/management/commands/kill_dead_links.py @@ -0,0 +1,19 @@ +from django.core.management.base import BaseCommand + + +from doab_check.models import Link + + +class Command(BaseCommand): + help = "set live attribute for all links" + + def handle(self, **options): + changed = 0 + for link in Link.objects.all(): + live = link.live + link.save() + if live != link.live: + changed += 1 + + self.stdout.write(f'changed {changed} links') + diff --git a/doab_check/models.py b/doab_check/models.py index e1cca98..b5d6c69 100644 --- a/doab_check/models.py +++ b/doab_check/models.py @@ -52,6 +52,12 @@ class Link(models.Model): if netloc.startswith('www.'): netloc = netloc[4:] self.provider = netloc + if self.id: + live = False + for linkrel in self.related.filter(status=1): + live = True + break + self.live = live super().save(*args, **kwargs) diff --git a/doab_check/views.py b/doab_check/views.py index cd6a70a..65928fc 100644 --- a/doab_check/views.py +++ b/doab_check/views.py @@ -15,8 +15,8 @@ class HomepageView(generic.TemplateView): template_name = 'index.html' def get_context_data(self, **kwargs): - active_links = Link.objects.filter(recent_check__isnull=False).only( - 'recent_check').order_by('-recent_check__return_code') + active_links = Link.objects.filter(recent_check__isnull=False, live=True + ).only('recent_check').order_by('-recent_check__return_code') codes = active_links.values( 'recent_check__return_code').distinct() num_checked = active_links.count() @@ -40,9 +40,9 @@ class ProblemsView(generic.TemplateView): def get_context_data(self, **kwargs): code = kwargs['code'] - problems = Link.objects.exclude(recent_check__isnull=True).filter( - recent_check__return_code__exact=code).order_by('provider' - ).annotate(title=F('items__title')) + problems = Link.objects.exclude(recent_check__isnull=True + ).filter(recent_check__return_code__exact=code, live=True + ).order_by('provider').annotate(title=F('items__title')) providers = problems.values('provider').distinct().annotate(Count('provider')) for provider in providers: provider['links'] = problems.filter(provider=provider['provider']) @@ -67,7 +67,7 @@ class ProviderView(generic.TemplateView): provider = {'provider': prov} provider_links = Link.objects.filter( provider=prov, live=True, recent_check__isnull=False - ).annotate(title=F('items__title')) + ).annotate(title=F('items__title')) provider['link_count'] = provider_links.count() codes = provider_links.order_by('-recent_check__return_code').values( 'recent_check__return_code').distinct().annotate(Count('recent_check__return_code')) @@ -121,7 +121,7 @@ class PublisherView(generic.TemplateView): if pub == NOPUBNAME: pub = '' publisher_links = Link.objects.filter( - items__publisher_name=pub, items__status=1, recent_check__isnull=False + items__publisher_name=pub, live=True, recent_check__isnull=False ).annotate(title=F('items__title')) link_count = publisher_links.distinct().count() @@ -151,12 +151,12 @@ def link_api_view(request, doab): data['status'] = 'found' links = [] data['links'] = links - for link in item.links.filter(live=True): - link_dict = {'url': link.url} - if link.recent_check: - link_dict['checked'] = link.recent_check.created - link_dict['return_code'] = link.recent_check.return_code - link_dict['content_type'] = link.recent_check.content_type + for linkrel in item.related.filter(status=1): + link_dict = {'url': linkrel.link.url} + if linkrel.link.recent_check: + link_dict['checked'] = linkrel.link.recent_check.created + link_dict['return_code'] = linkrel.link.recent_check.return_code + link_dict['content_type'] = linkrel.link.recent_check.content_type links.append(link_dict) return JsonResponse(data)