add problem links by publisher
parent
acaf8777bf
commit
b69aeaeaa8
|
@ -21,7 +21,7 @@ We've grouped the links by the publisher's name and by the server url to make it
|
|||
We have a few ways to view the results.
|
||||
<ul>
|
||||
<li>
|
||||
View the links which appear to have <a href="#codes">problems</a>.
|
||||
View the links which appear to have problems <a href="{% url 'probpubs' %}">by publisher name</a>, or below, by the return code.
|
||||
</li>
|
||||
<li>
|
||||
View <a href="{% url 'providers' %}">the list of servers we've checked</a>.
|
||||
|
@ -41,7 +41,7 @@ When a link is checked we record the status code and content type returned by th
|
|||
<li>"404" means the link is broken - the resource is not found.
|
||||
<li>"408" means the website didn't respond in a reasonable time.
|
||||
<li>"500" means something has gone wrong at the website server.
|
||||
<li>"502" means is a gateway error. Some websites use load balancers or content distribution networks; if these gateways have a problem connecting with the sorce website, they send a 502 response.
|
||||
<li>"502" means is a gateway error. Some websites use load balancers or content distribution networks; if these gateways have a problem connecting with the source website, they send a 502 response.
|
||||
<li>"503" means that a website couldn’t be reached. This could happen because the server was too busy, under maintenance, or something else. Amazon's robot blocker returns 503 codes, so these must be checked manually.
|
||||
<li>"504" indicates that the server, while acting as a gateway or proxy did not get a response in time from an upstream server.
|
||||
<li>"511" indicates a problem with the security of the connection - most often an incomplete certificate.
|
||||
|
|
|
@ -20,7 +20,7 @@ DOAB links with Status code: {{ code | default:'0 or None' }}
|
|||
<ul>
|
||||
{% for link in provider.links %}
|
||||
<li>
|
||||
<p><a href="{{link.url}}">{{link.url}}</a> ({{link.items.first.publisher_name}})
|
||||
<p><a href="{{link.url}}">{{link.url}}</a> ({{link.items.first.publisher_name | default:'*** no publisher name ***'}})
|
||||
<table>
|
||||
{% for check in link.recent_checks %}
|
||||
<tr>
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>DOAB Link Checking Problems by Publisher</title>
|
||||
</head>
|
||||
<body>
|
||||
<h2>
|
||||
DOAB Link Checking Problems by Publisher
|
||||
</h2>
|
||||
|
||||
<h3>Problem Link Summary</h3>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Publisher</th>
|
||||
<th>Number</th>
|
||||
</tr>
|
||||
{% for pub in pubs %}
|
||||
<tr style="color:red">
|
||||
<td> <a href="#{{ pub.pub | default:'*** no publisher name ***'| urlencode}}">{{ pub.pub | default:'*** no publisher name ***' }}</a> </td>
|
||||
<td> {{ pub.bad_links.count }} </td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
|
||||
<h3>Problem links by publisher</h3>
|
||||
<ul>
|
||||
{% for pub in pubs %}
|
||||
<li id={{ pub.pub | default:'*** no publisher name ***' | urlencode }}><h4>{{ pub.pub | default:'*** no publisher name ***'}}</h4>
|
||||
{% for link in pub.bad_links.all %}
|
||||
<table>
|
||||
<tr>
|
||||
<th>
|
||||
<a href="{{ link.url }}">{{ link.url }}</a>
|
||||
</th>
|
||||
<tr>
|
||||
<td>
|
||||
<table>
|
||||
{% for check in link.recent_checks %}
|
||||
<tr>
|
||||
<td>{{ check.created }}:</td>
|
||||
<td style="color:red">{{ check.return_code }}</td>
|
||||
<td>{{ check.content_type }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
|
@ -23,6 +23,8 @@ class PageTests(TestCase):
|
|||
self.assertEqual(r.status_code, 200)
|
||||
r = self.client.get("/problems/404/")
|
||||
self.assertEqual(r.status_code, 200)
|
||||
r = self.client.get("/problems/publishers/")
|
||||
self.assertEqual(r.status_code, 200)
|
||||
|
||||
|
||||
sample_doab = 'oai:doab-books:20.500.12854/25850'
|
||||
|
|
|
@ -9,6 +9,7 @@ from . import views
|
|||
urlpatterns = [
|
||||
path('', views.HomepageView.as_view(), name='home'),
|
||||
path('admin/', admin.site.urls),
|
||||
path('problems/publishers/', views.ProblemPublishersView.as_view(), name='probpubs'),
|
||||
path('problems/<str:code>/', views.ProblemsView.as_view(), name='problems'),
|
||||
path('providers/', views.ProvidersView.as_view(), name='providers'),
|
||||
path('providers/<str:provider>/', views.ProviderView.as_view(), name='provider'),
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
"""doab_check views
|
||||
"""
|
||||
|
||||
from django.db.models import Count
|
||||
from django.db.models import Count, OuterRef, Subquery
|
||||
from django.http import HttpResponseRedirect
|
||||
from django.shortcuts import get_object_or_404, render
|
||||
from django.urls import reverse
|
||||
|
@ -69,6 +69,7 @@ class ProviderView(generic.TemplateView):
|
|||
|
||||
return {'provider': provider, 'links': provider_links, 'codes': codes}
|
||||
|
||||
|
||||
class PublishersView(generic.TemplateView):
|
||||
template_name = 'publishers.html'
|
||||
|
||||
|
@ -79,6 +80,23 @@ class PublishersView(generic.TemplateView):
|
|||
publisher_name=publisher['publisher_name'], status=1).count()
|
||||
return {'publisher_list': publishers}
|
||||
|
||||
|
||||
class ProblemPublishersView(generic.TemplateView):
|
||||
template_name = 'probpubs.html'
|
||||
|
||||
def get_context_data(self, **kwargs):
|
||||
onepub = Link.objects.filter(items=OuterRef("pk"))[:1].values('items__publisher_name')
|
||||
problinks = Link.objects.exclude(
|
||||
recent_check__isnull=True).exclude(
|
||||
recent_check__return_code__exact=200)
|
||||
probpubs = problinks.annotate(pub=onepub).order_by('pub')
|
||||
pubs = probpubs.values('pub').distinct()
|
||||
numlinks = probpubs.count()
|
||||
for publisher in pubs:
|
||||
publisher['bad_links'] = probpubs.filter(pub=publisher['pub'])
|
||||
return {'pubs': pubs}
|
||||
|
||||
|
||||
class PublisherView(generic.TemplateView):
|
||||
template_name = 'publisher.html'
|
||||
|
||||
|
|
Loading…
Reference in New Issue