display problems by hostname

main
eric 2023-05-02 15:52:03 -04:00
parent 221a56ba8d
commit 12f25515e9
5 changed files with 56 additions and 29 deletions

View File

@ -15,43 +15,49 @@ Welcome to DOAB Check!
DOAB Check is a collaboration between the Free Ebook Foundation and the Directory of Open Access Books. It checks each the links in DOAB about once a month, looking for any links that are broken or misconfigured.
</p>
<p>
We've grouped the links by the hostname for the links url to make it easier for publishers to see whether links they've entered into the DOAB database are working as suspected.
We've grouped the links by the publisher's name and by the server url to make it easier for publishers to see whether links they've entered into the DOAB database are working as expected.
</p>
<p>
When a link is checked we record the status code returned by the web server.
</p>
<ul>
<li>"403" indicates a misconfigured server that is not allowing access to the promised resource.
<li>"404" means the link is broken - the resource is not found.
<li>"500" means something has gone wrong.
<li>"503" means that a website couldnt be reached. This could happen because the server was too busy, under maintenance, or something else.
<li>"504" indicates that the server, while acting as a gateway or proxy did not get a response in time from an upstream server.
<li>"511" indicates a problem with the security of the connection - most often an incomplete certificate.
<li>"None" or "0" means something has gone terribly wrong.
</ul>
We have a few ways to view the results.
<ul>
<li>
View <a href="{% url 'problems' %}">the links which appear to have problems</a>.
View the links which appear to have <a href="#codes">problems</a>.
</li>
<li>
View <a href="{% url 'providers' %}">the list of host names we've checked</a>.
View <a href="{% url 'providers' %}">the list of servers we've checked</a>.
</li>
<li>
View <a href="{% url 'publishers' %}">the list of publishers whose links we've checked</a>.
</li>
</ul>
<h3>Summary of link checks</h3>
<table>
<p>
When a link is checked we record the status code and content type returned by the web server.
</p>
<ul>
<li>"403" indicates a misconfigured server that is not allowing access to the promised resource.
<li>"404" means the link is broken - the resource is not found.
<li>"500" means something has gone wrong.
<li>"503" means that a website couldnt be reached. This could happen because the server was too busy, under maintenance, or something else. Amazon's robot blocker returns 503 codes, so these must be checked manually.
<li>"504" indicates that the server, while acting as a gateway or proxy did not get a response in time from an upstream server.
<li>"511" indicates a problem with the security of the connection - most often an incomplete certificate.
<li>"None" or "0" means something has gone terribly wrong.
</ul>
<p> {{ num_checked }} links have been checked. <p>
<table id="codes">
<tr>
<th>HTTP code</th>
<th>number of links</th>
<th>percent of total</th>
</tr>
{% for code in codes %}
<tr {% if code.recent_check__return_code != 200 %} style="color:red"{% endif %}>
<td> {{ code.recent_check__return_code }} </td>
<td> {{ code.count }} </td>
<td>{% if code.recent_check__return_code != 200 %}<a href="{% url 'problems' code.recent_check__return_code %}"> {{ code.count }} </a>
{% else %} {{ code.count }} {% endif %}</td>
<td> {{ code.percent }} </td>
</tr>
{% endfor %}
</table>

View File

@ -4,11 +4,21 @@
</head>
<body>
<h2>
DOAB links with apparent problems
DOAB links with Status code: {{ code | default:'0 or None' }}
</h2>
<ul>
{% for link in problem_list %}
{% for provider in providers %}
<li> <a href="#{{ provider.provider | urlencode }}">{{ provider.provider }}</a>:
{{ provider.count }} problem links </li>
{% endfor %}
</ul>
<dl>
{% for provider in providers %}
<dt id="{{ provider.provider | urlencode }}"><h3>{{ provider.provider }}</h3></dt>
<dd>
<ul>
{% for link in provider.links %}
<li>
<p><a href="{{link.url}}">{{link.url}}</a> ({{link.items.first.publisher_name}})
<table>
@ -22,5 +32,8 @@ DOAB links with apparent problems
</li>
{% endfor %}
</ul>
</dd>
{% endfor %}
</dl>
</body>
</html>

View File

@ -4,7 +4,7 @@
</head>
<body>
<h2>
DOAB Link Checking by Hostnames
DOAB Link Checking by Server Hostname
</h2>
<ul>

View File

@ -9,7 +9,7 @@ from . import views
urlpatterns = [
path('', views.HomepageView.as_view(), name='home'),
path('admin/', admin.site.urls),
path('problems/', views.ProblemsView.as_view(), name='problems'),
path('problems/<str:code>/', views.ProblemsView.as_view(), name='problems'),
path('providers/', views.ProvidersView.as_view(), name='providers'),
path('providers/<str:provider>/', views.ProviderView.as_view(), name='provider'),
path('publishers/', views.PublishersView.as_view(), name='publishers'),

View File

@ -14,24 +14,32 @@ class HomepageView(generic.TemplateView):
template_name = 'index.html'
def get_context_data(self, **kwargs):
codes = Link.objects.filter(recent_check__isnull=False).order_by('-recent_check__return_code').values(
codes = Link.objects.filter(recent_check__isnull=False).order_by(
'-recent_check__return_code').values(
'recent_check__return_code').distinct()
num_checked = Link.objects.filter(
recent_check__return_code__isnull=False).distinct().count()
for code in codes:
code['count'] = Link.objects.filter(
recent_check__return_code=code['recent_check__return_code'],
).distinct().count()
return {'codes': codes}
code['percent'] = '{:.2%}'.format(code['count'] / num_checked)
return {'num_checked': num_checked, 'codes': codes}
class ProblemsView(generic.TemplateView):
template_name = 'problems.html'
def get_context_data(self, **kwargs):
problems = Link.objects.exclude(
recent_check__return_code__exact=200).exclude(recent_check__isnull=True
).order_by(
'-recent_check__return_code', 'provider')
return {'problem_list': problems}
code = kwargs['code']
problems = Link.objects.exclude(recent_check__isnull=True).filter(
recent_check__return_code__exact=code).order_by('provider')
providers = problems.values('provider').distinct()
for provider in providers:
provider['links'] = problems.filter(provider=provider['provider'])
provider['count'] = provider['links'].count()
return {'code': code, 'providers': providers}
class ProvidersView(generic.TemplateView):