add problem links by publisher
parent
acaf8777bf
commit
b69aeaeaa8
|
@ -21,7 +21,7 @@ We've grouped the links by the publisher's name and by the server url to make it
|
||||||
We have a few ways to view the results.
|
We have a few ways to view the results.
|
||||||
<ul>
|
<ul>
|
||||||
<li>
|
<li>
|
||||||
View the links which appear to have <a href="#codes">problems</a>.
|
View the links which appear to have problems <a href="{% url 'probpubs' %}">by publisher name</a>, or below, by the return code.
|
||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
View <a href="{% url 'providers' %}">the list of servers we've checked</a>.
|
View <a href="{% url 'providers' %}">the list of servers we've checked</a>.
|
||||||
|
@ -41,7 +41,7 @@ When a link is checked we record the status code and content type returned by th
|
||||||
<li>"404" means the link is broken - the resource is not found.
|
<li>"404" means the link is broken - the resource is not found.
|
||||||
<li>"408" means the website didn't respond in a reasonable time.
|
<li>"408" means the website didn't respond in a reasonable time.
|
||||||
<li>"500" means something has gone wrong at the website server.
|
<li>"500" means something has gone wrong at the website server.
|
||||||
<li>"502" means is a gateway error. Some websites use load balancers or content distribution networks; if these gateways have a problem connecting with the sorce website, they send a 502 response.
|
<li>"502" means is a gateway error. Some websites use load balancers or content distribution networks; if these gateways have a problem connecting with the source website, they send a 502 response.
|
||||||
<li>"503" means that a website couldn’t be reached. This could happen because the server was too busy, under maintenance, or something else. Amazon's robot blocker returns 503 codes, so these must be checked manually.
|
<li>"503" means that a website couldn’t be reached. This could happen because the server was too busy, under maintenance, or something else. Amazon's robot blocker returns 503 codes, so these must be checked manually.
|
||||||
<li>"504" indicates that the server, while acting as a gateway or proxy did not get a response in time from an upstream server.
|
<li>"504" indicates that the server, while acting as a gateway or proxy did not get a response in time from an upstream server.
|
||||||
<li>"511" indicates a problem with the security of the connection - most often an incomplete certificate.
|
<li>"511" indicates a problem with the security of the connection - most often an incomplete certificate.
|
||||||
|
|
|
@ -20,7 +20,7 @@ DOAB links with Status code: {{ code | default:'0 or None' }}
|
||||||
<ul>
|
<ul>
|
||||||
{% for link in provider.links %}
|
{% for link in provider.links %}
|
||||||
<li>
|
<li>
|
||||||
<p><a href="{{link.url}}">{{link.url}}</a> ({{link.items.first.publisher_name}})
|
<p><a href="{{link.url}}">{{link.url}}</a> ({{link.items.first.publisher_name | default:'*** no publisher name ***'}})
|
||||||
<table>
|
<table>
|
||||||
{% for check in link.recent_checks %}
|
{% for check in link.recent_checks %}
|
||||||
<tr>
|
<tr>
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>DOAB Link Checking Problems by Publisher</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h2>
|
||||||
|
DOAB Link Checking Problems by Publisher
|
||||||
|
</h2>
|
||||||
|
|
||||||
|
<h3>Problem Link Summary</h3>
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Publisher</th>
|
||||||
|
<th>Number</th>
|
||||||
|
</tr>
|
||||||
|
{% for pub in pubs %}
|
||||||
|
<tr style="color:red">
|
||||||
|
<td> <a href="#{{ pub.pub | default:'*** no publisher name ***'| urlencode}}">{{ pub.pub | default:'*** no publisher name ***' }}</a> </td>
|
||||||
|
<td> {{ pub.bad_links.count }} </td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<h3>Problem links by publisher</h3>
|
||||||
|
<ul>
|
||||||
|
{% for pub in pubs %}
|
||||||
|
<li id={{ pub.pub | default:'*** no publisher name ***' | urlencode }}><h4>{{ pub.pub | default:'*** no publisher name ***'}}</h4>
|
||||||
|
{% for link in pub.bad_links.all %}
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>
|
||||||
|
<a href="{{ link.url }}">{{ link.url }}</a>
|
||||||
|
</th>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<table>
|
||||||
|
{% for check in link.recent_checks %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ check.created }}:</td>
|
||||||
|
<td style="color:red">{{ check.return_code }}</td>
|
||||||
|
<td>{{ check.content_type }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -23,6 +23,8 @@ class PageTests(TestCase):
|
||||||
self.assertEqual(r.status_code, 200)
|
self.assertEqual(r.status_code, 200)
|
||||||
r = self.client.get("/problems/404/")
|
r = self.client.get("/problems/404/")
|
||||||
self.assertEqual(r.status_code, 200)
|
self.assertEqual(r.status_code, 200)
|
||||||
|
r = self.client.get("/problems/publishers/")
|
||||||
|
self.assertEqual(r.status_code, 200)
|
||||||
|
|
||||||
|
|
||||||
sample_doab = 'oai:doab-books:20.500.12854/25850'
|
sample_doab = 'oai:doab-books:20.500.12854/25850'
|
||||||
|
|
|
@ -9,6 +9,7 @@ from . import views
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
path('', views.HomepageView.as_view(), name='home'),
|
path('', views.HomepageView.as_view(), name='home'),
|
||||||
path('admin/', admin.site.urls),
|
path('admin/', admin.site.urls),
|
||||||
|
path('problems/publishers/', views.ProblemPublishersView.as_view(), name='probpubs'),
|
||||||
path('problems/<str:code>/', views.ProblemsView.as_view(), name='problems'),
|
path('problems/<str:code>/', views.ProblemsView.as_view(), name='problems'),
|
||||||
path('providers/', views.ProvidersView.as_view(), name='providers'),
|
path('providers/', views.ProvidersView.as_view(), name='providers'),
|
||||||
path('providers/<str:provider>/', views.ProviderView.as_view(), name='provider'),
|
path('providers/<str:provider>/', views.ProviderView.as_view(), name='provider'),
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
"""doab_check views
|
"""doab_check views
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from django.db.models import Count
|
from django.db.models import Count, OuterRef, Subquery
|
||||||
from django.http import HttpResponseRedirect
|
from django.http import HttpResponseRedirect
|
||||||
from django.shortcuts import get_object_or_404, render
|
from django.shortcuts import get_object_or_404, render
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
|
@ -69,6 +69,7 @@ class ProviderView(generic.TemplateView):
|
||||||
|
|
||||||
return {'provider': provider, 'links': provider_links, 'codes': codes}
|
return {'provider': provider, 'links': provider_links, 'codes': codes}
|
||||||
|
|
||||||
|
|
||||||
class PublishersView(generic.TemplateView):
|
class PublishersView(generic.TemplateView):
|
||||||
template_name = 'publishers.html'
|
template_name = 'publishers.html'
|
||||||
|
|
||||||
|
@ -79,6 +80,23 @@ class PublishersView(generic.TemplateView):
|
||||||
publisher_name=publisher['publisher_name'], status=1).count()
|
publisher_name=publisher['publisher_name'], status=1).count()
|
||||||
return {'publisher_list': publishers}
|
return {'publisher_list': publishers}
|
||||||
|
|
||||||
|
|
||||||
|
class ProblemPublishersView(generic.TemplateView):
|
||||||
|
template_name = 'probpubs.html'
|
||||||
|
|
||||||
|
def get_context_data(self, **kwargs):
|
||||||
|
onepub = Link.objects.filter(items=OuterRef("pk"))[:1].values('items__publisher_name')
|
||||||
|
problinks = Link.objects.exclude(
|
||||||
|
recent_check__isnull=True).exclude(
|
||||||
|
recent_check__return_code__exact=200)
|
||||||
|
probpubs = problinks.annotate(pub=onepub).order_by('pub')
|
||||||
|
pubs = probpubs.values('pub').distinct()
|
||||||
|
numlinks = probpubs.count()
|
||||||
|
for publisher in pubs:
|
||||||
|
publisher['bad_links'] = probpubs.filter(pub=publisher['pub'])
|
||||||
|
return {'pubs': pubs}
|
||||||
|
|
||||||
|
|
||||||
class PublisherView(generic.TemplateView):
|
class PublisherView(generic.TemplateView):
|
||||||
template_name = 'publisher.html'
|
template_name = 'publisher.html'
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue