From be3fde9a6e64d62f93d0c725de2c377134fbfdda Mon Sep 17 00:00:00 2001
From: eric
Date: Thu, 4 May 2023 12:04:31 -0400
Subject: [PATCH] add handling for bad redirects
---
doab_check/check.py | 11 ++++++++++-
doab_check/templates/index.html | 4 +++-
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/doab_check/check.py b/doab_check/check.py
index bc0e35c..68440d2 100755
--- a/doab_check/check.py
+++ b/doab_check/check.py
@@ -36,8 +36,17 @@ class ContentTyper(object):
r = requests.get(url, verify=False)
r.status_code = 511
return r
- except:
+ except requests.exceptions.ConnectionError:
+ try:
+ r = requests.get(url, allow_redirects=False, headers=HEADERS)
+ return r
+ except Exception as e:
+ # unexplained error
+ logger.exception(e)
+ return None
+ except Exception as e:
# unexplained error
+ logger.exception(e)
return None
def calc_type(self, url):
diff --git a/doab_check/templates/index.html b/doab_check/templates/index.html
index bb8b425..48f5a9e 100644
--- a/doab_check/templates/index.html
+++ b/doab_check/templates/index.html
@@ -36,9 +36,11 @@ View the list of publishers whose links we've c
When a link is checked we record the status code and content type returned by the web server.
+- "301" or "302" indicates a bad redirect.
- "403" indicates a misconfigured server that is not allowing access to the promised resource.
- "404" means the link is broken - the resource is not found.
-
- "500" means something has gone wrong.
+
- "500" means something has gone wrong at the website server.
+
- "502" means is a gateway error. Some websites use load balancers or content distribution networks; if these gateways have a problem connecting with the sorce website, they send a 502 response.
- "503" means that a website couldn’t be reached. This could happen because the server was too busy, under maintenance, or something else. Amazon's robot blocker returns 503 codes, so these must be checked manually.
- "504" indicates that the server, while acting as a gateway or proxy did not get a response in time from an upstream server.
- "511" indicates a problem with the security of the connection - most often an incomplete certificate.