added handling for hung servers
parent
be3fde9a6e
commit
98639afac4
|
@ -33,17 +33,21 @@ class ContentTyper(object):
|
|||
r = requests.get(url, headers=HEADERS)
|
||||
return r
|
||||
except requests.exceptions.SSLError:
|
||||
r = requests.get(url, verify=False)
|
||||
r = requests.get(url, verify=False)
|
||||
r.status_code = 511
|
||||
return r
|
||||
except requests.exceptions.ConnectionError:
|
||||
try:
|
||||
r = requests.get(url, allow_redirects=False, headers=HEADERS)
|
||||
return r
|
||||
except Exception as e:
|
||||
# unexplained error
|
||||
logger.exception(e)
|
||||
return None
|
||||
except requests.exceptions.ConnectionError as ce:
|
||||
if '[Errno 8]' in str(ce):
|
||||
try:
|
||||
r = requests.get(url, allow_redirects=False, headers=HEADERS)
|
||||
return r
|
||||
except Exception as e:
|
||||
pass
|
||||
elif '[Errno 60]' in str(ce):
|
||||
return (408, '', '')
|
||||
# unexplained error
|
||||
logger.exception(e)
|
||||
return None
|
||||
except Exception as e:
|
||||
# unexplained error
|
||||
logger.exception(e)
|
||||
|
@ -77,6 +81,8 @@ def response_parts(response):
|
|||
''' return code, content type, content disposition handling any missing data'''
|
||||
if response == None:
|
||||
return 0, '', ''
|
||||
if isinstance(response, tuple):
|
||||
return response
|
||||
try:
|
||||
if response.status_code == 404:
|
||||
return 404, '', ''
|
||||
|
|
|
@ -39,6 +39,7 @@ When a link is checked we record the status code and content type returned by th
|
|||
<li>"301" or "302" indicates a bad redirect.
|
||||
<li>"403" indicates a misconfigured server that is not allowing access to the promised resource.
|
||||
<li>"404" means the link is broken - the resource is not found.
|
||||
<li>"408" means the website didn't respond in a reasonable time.
|
||||
<li>"500" means something has gone wrong at the website server.
|
||||
<li>"502" means is a gateway error. Some websites use load balancers or content distribution networks; if these gateways have a problem connecting with the sorce website, they send a 502 response.
|
||||
<li>"503" means that a website couldn’t be reached. This could happen because the server was too busy, under maintenance, or something else. Amazon's robot blocker returns 503 codes, so these must be checked manually.
|
||||
|
|
Loading…
Reference in New Issue