added handling for hung servers

main
eric 2023-05-04 16:22:11 -04:00
parent be3fde9a6e
commit 98639afac4
2 changed files with 16 additions and 9 deletions

View File

@ -33,17 +33,21 @@ class ContentTyper(object):
r = requests.get(url, headers=HEADERS)
return r
except requests.exceptions.SSLError:
r = requests.get(url, verify=False)
r = requests.get(url, verify=False)
r.status_code = 511
return r
except requests.exceptions.ConnectionError:
try:
r = requests.get(url, allow_redirects=False, headers=HEADERS)
return r
except Exception as e:
# unexplained error
logger.exception(e)
return None
except requests.exceptions.ConnectionError as ce:
if '[Errno 8]' in str(ce):
try:
r = requests.get(url, allow_redirects=False, headers=HEADERS)
return r
except Exception as e:
pass
elif '[Errno 60]' in str(ce):
return (408, '', '')
# unexplained error
logger.exception(e)
return None
except Exception as e:
# unexplained error
logger.exception(e)
@ -77,6 +81,8 @@ def response_parts(response):
''' return code, content type, content disposition handling any missing data'''
if response == None:
return 0, '', ''
if isinstance(response, tuple):
return response
try:
if response.status_code == 404:
return 404, '', ''

View File

@ -39,6 +39,7 @@ When a link is checked we record the status code and content type returned by th
<li>"301" or "302" indicates a bad redirect.
<li>"403" indicates a misconfigured server that is not allowing access to the promised resource.
<li>"404" means the link is broken - the resource is not found.
<li>"408" means the website didn't respond in a reasonable time.
<li>"500" means something has gone wrong at the website server.
<li>"502" means is a gateway error. Some websites use load balancers or content distribution networks; if these gateways have a problem connecting with the sorce website, they send a 502 response.
<li>"503" means that a website couldnt be reached. This could happen because the server was too busy, under maintenance, or something else. Amazon's robot blocker returns 503 codes, so these must be checked manually.