Fix parsing for keyword search feature. Update HTML and text table output. Add reputation check filtering based on PR from @christruncer
parent
26dd64870d
commit
e3985c2c37
|
@ -8,6 +8,11 @@ This Python based tool was written to quickly query the Expireddomains.net searc
|
|||
|
||||
## Changes
|
||||
|
||||
- 6 May 2018
|
||||
+ Fixed expired domains parsing when performing a keyword search
|
||||
+ Minor HTML and text table output updates
|
||||
+ Filtered reputation checks to only execute for .COM, .ORG, and .NET domains and removed check for Archive.org records when performing a default or keyword search. Credit to @christruncer for the original PR and idea.
|
||||
|
||||
- 11 April 2018
|
||||
+ Added OCR support for CAPTCHA solving with tesseract. Thanks to t94j0 for the idea in [AIRMASTER](https://github.com/t94j0/AIRMASTER)
|
||||
+ Added support for input file list of potential domains (-f/--filename)
|
||||
|
@ -85,11 +90,11 @@ List DomainHunter options
|
|||
|
||||
Use defaults to check for most recent 100 domains and check reputation
|
||||
|
||||
python ./domainhunter.py
|
||||
python3 ./domainhunter.py
|
||||
|
||||
Search for 1000 most recently expired/deleted domains, but don't check reputation
|
||||
|
||||
python ./domainhunter.py -r 1000
|
||||
python3 ./domainhunter.py -r 1000
|
||||
|
||||
Perform all reputation checks for a single domain
|
||||
|
||||
|
|
192
domainhunter.py
192
domainhunter.py
|
@ -4,17 +4,15 @@
|
|||
## Author: @joevest and @andrewchiles
|
||||
## Description: Checks expired domains, reputation/categorization, and Archive.org history to determine
|
||||
## good candidates for phishing and C2 domain names
|
||||
# Add OCR support for BlueCoat/SiteReview CAPTCHA using tesseract
|
||||
# Add support for input file list of potential domains
|
||||
# Add additional error checking for ExpiredDomains.net parsing
|
||||
# Changed -q/--query switch to -k/--keyword to better match its purpose
|
||||
|
||||
import time
|
||||
import random
|
||||
import argparse
|
||||
import json
|
||||
import base64
|
||||
import os
|
||||
|
||||
__version__ = "20180411"
|
||||
__version__ = "20180506"
|
||||
|
||||
## Functions
|
||||
|
||||
|
@ -34,7 +32,7 @@ def doSleep(timing):
|
|||
def checkBluecoat(domain):
|
||||
try:
|
||||
url = 'https://sitereview.bluecoat.com/resource/lookup'
|
||||
postData = {'url':domain,'captcha':''} # HTTP POST Parameters
|
||||
postData = {'url':domain,'captcha':''}
|
||||
headers = {'User-Agent':useragent,
|
||||
'Content-Type':'application/json; charset=UTF-8',
|
||||
'Referer':'https://sitereview.bluecoat.com/lookup'}
|
||||
|
@ -51,16 +49,14 @@ def checkBluecoat(domain):
|
|||
# Print notice if CAPTCHAs are blocking accurate results and attempt to solve if --ocr
|
||||
if a == 'captcha':
|
||||
if ocr:
|
||||
# This request is performed in a browser, but is not needed for our purposes
|
||||
# This request is also performed by a browser, but is not needed for our purposes
|
||||
#captcharequestURL = 'https://sitereview.bluecoat.com/resource/captcha-request'
|
||||
#print('[*] Requesting CAPTCHA')
|
||||
#response = s.get(url=captcharequestURL,headers=headers,cookies=cookies,verify=False)
|
||||
|
||||
print('[*] Received CAPTCHA challenge!')
|
||||
captcha = solveCaptcha('https://sitereview.bluecoat.com/resource/captcha.jpg',s)
|
||||
|
||||
if captcha:
|
||||
b64captcha = base64.b64encode(captcha.encode('utf-8')).decode('utf-8')
|
||||
b64captcha = base64.urlsafe_b64encode(captcha.encode('utf-8')).decode('utf-8')
|
||||
|
||||
# Send CAPTCHA solution via GET since inclusion with the domain categorization request doens't work anymore
|
||||
captchasolutionURL = 'https://sitereview.bluecoat.com/resource/captcha-request/{0}'.format(b64captcha)
|
||||
|
@ -124,7 +120,7 @@ def checkIBMXForce(domain):
|
|||
return "-"
|
||||
|
||||
def checkTalos(domain):
|
||||
url = "https://www.talosintelligence.com/sb_api/query_lookup?query=%2Fapi%2Fv2%2Fdetails%2Fdomain%2F&query_entry={0}&offset=0&order=ip+asc".format(domain)
|
||||
url = 'https://www.talosintelligence.com/sb_api/query_lookup?query=%2Fapi%2Fv2%2Fdetails%2Fdomain%2F&query_entry={0}&offset=0&order=ip+asc'.format(domain)
|
||||
headers = {'User-Agent':useragent,
|
||||
'Referer':url}
|
||||
|
||||
|
@ -222,10 +218,7 @@ def checkDomain(domain):
|
|||
|
||||
if domain in maldomainsList:
|
||||
print("[!] {}: Identified as known malware domain (malwaredomains.com)".format(domain))
|
||||
|
||||
mxtoolbox = checkMXToolbox(domain)
|
||||
print("[+] {}: {}".format(domain, mxtoolbox))
|
||||
|
||||
|
||||
bluecoat = checkBluecoat(domain)
|
||||
print("[+] {}: {}".format(domain, bluecoat))
|
||||
|
||||
|
@ -234,13 +227,21 @@ def checkDomain(domain):
|
|||
|
||||
ciscotalos = checkTalos(domain)
|
||||
print("[+] {}: {}".format(domain, ciscotalos))
|
||||
|
||||
mxtoolbox = checkMXToolbox(domain)
|
||||
print("[+] {}: {}".format(domain, mxtoolbox))
|
||||
|
||||
print("")
|
||||
return
|
||||
|
||||
results = [domain,bluecoat,ibmxforce,ciscotalos,mxtoolbox]
|
||||
return results
|
||||
|
||||
def solveCaptcha(url,session):
|
||||
# Downloads CAPTCHA image and saves to current directory for OCR with tesseract
|
||||
# Returns CAPTCHA string or False if error occured
|
||||
|
||||
jpeg = 'captcha.jpg'
|
||||
|
||||
try:
|
||||
response = session.get(url=url,headers=headers,verify=False, stream=True)
|
||||
if response.status_code == 200:
|
||||
|
@ -251,13 +252,32 @@ def solveCaptcha(url,session):
|
|||
print('[-] Error downloading CAPTCHA file!')
|
||||
return False
|
||||
|
||||
# Perform basic OCR without additional image enhancement
|
||||
text = pytesseract.image_to_string(Image.open(jpeg))
|
||||
text = text.replace(" ", "")
|
||||
|
||||
# Remove CAPTCHA file
|
||||
try:
|
||||
os.remove(jpeg)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return text
|
||||
|
||||
except Exception as e:
|
||||
print("[-] Error solving CAPTCHA - {0}".format(e))
|
||||
|
||||
return False
|
||||
|
||||
def drawTable(header,data):
|
||||
|
||||
data.insert(0,header)
|
||||
t = Texttable(max_width=maxwidth)
|
||||
t.add_rows(data)
|
||||
t.header(header)
|
||||
|
||||
return(t.draw())
|
||||
|
||||
## MAIN
|
||||
if __name__ == "__main__":
|
||||
|
||||
|
@ -265,7 +285,7 @@ if __name__ == "__main__":
|
|||
parser.add_argument('-k','--keyword', help='Keyword used to refine search results', required=False, default=False, type=str, dest='keyword')
|
||||
parser.add_argument('-c','--check', help='Perform domain reputation checks', required=False, default=False, action='store_true', dest='check')
|
||||
parser.add_argument('-f','--filename', help='Specify input file of line delimited domain names to check', required=False, default=False, type=str, dest='filename')
|
||||
parser.add_argument('--ocr', help='Perform OCR on CAPTCHAs when present', required=False, default=False, action='store_true')
|
||||
parser.add_argument('--ocr', help='Perform OCR on CAPTCHAs when challenged', required=False, default=False, action='store_true')
|
||||
parser.add_argument('-r','--maxresults', help='Number of results to return when querying latest expired/deleted domains', required=False, default=100, type=int, dest='maxresults')
|
||||
parser.add_argument('-s','--single', help='Performs detailed reputation checks against a single domain name/IP.', required=False, default=False, dest='single')
|
||||
parser.add_argument('-t','--timing', help='Modifies request timing to avoid CAPTCHAs. Slowest(0) = 90-120 seconds, Default(3) = 10-20 seconds, Fastest(5) = no delay', required=False, default=3, type=int, choices=range(0,6), dest='timing')
|
||||
|
@ -294,9 +314,9 @@ if __name__ == "__main__":
|
|||
except Exception as e:
|
||||
print("Expired Domains Reputation Check")
|
||||
print("[-] Missing OCR dependencies: {}".format(str(e)))
|
||||
print("[*] Install required Python dependencies by running `pip3 install -r requirements.txt`")
|
||||
print("[*] Ubuntu\Debian - Install tesseract by running `apt-get install tesseract-ocr python3-imaging`")
|
||||
print("[*] MAC OSX - Install tesseract with homebrew by running `brew install tesseract`")
|
||||
print("[*] Install required Python dependencies by running: pip3 install -r requirements.txt")
|
||||
print("[*] Ubuntu\Debian - Install tesseract by running: apt-get install tesseract-ocr python3-imaging")
|
||||
print("[*] macOS - Install tesseract with homebrew by running: brew install tesseract")
|
||||
quit(0)
|
||||
|
||||
## Variables
|
||||
|
@ -314,14 +334,16 @@ if __name__ == "__main__":
|
|||
|
||||
maxwidth = args.maxwidth
|
||||
|
||||
malwaredomainsURL = 'http://mirror1.malwaredomains.com/files/justdomains'
|
||||
expireddomainsqueryURL = 'https://www.expireddomains.net/domain-name-search'
|
||||
|
||||
ocr = args.ocr
|
||||
|
||||
malwaredomainsURL = 'http://mirror1.malwaredomains.com/files/justdomains'
|
||||
|
||||
expireddomainsqueryURL = 'https://www.expireddomains.net/domain-name-search'
|
||||
|
||||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
useragent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)'
|
||||
|
||||
headers = {'User-Agent':useragent}
|
||||
|
||||
requests.packages.urllib3.disable_warnings()
|
||||
|
@ -329,8 +351,6 @@ if __name__ == "__main__":
|
|||
# HTTP Session container, used to manage cookies, session tokens and other session information
|
||||
s = requests.Session()
|
||||
|
||||
data = []
|
||||
|
||||
title = '''
|
||||
____ ___ __ __ _ ___ _ _ _ _ _ _ _ _ _____ _____ ____
|
||||
| _ \ / _ \| \/ | / \ |_ _| \ | | | | | | | | | \ | |_ _| ____| _ \
|
||||
|
@ -357,22 +377,29 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
# Retrieve reputation for a single choosen domain (Quick Mode)
|
||||
if single:
|
||||
checkDomain(single)
|
||||
quit(0)
|
||||
exit(0)
|
||||
|
||||
# Perform detailed domain reputation checks against input file
|
||||
# Perform detailed domain reputation checks against input file, print table, and quit
|
||||
if filename:
|
||||
# Initialize our list with an empty row for the header
|
||||
data = []
|
||||
try:
|
||||
with open(filename, 'r') as domainsList:
|
||||
for line in domainsList.read().splitlines():
|
||||
checkDomain(line)
|
||||
data.append(checkDomain(line))
|
||||
doSleep(timing)
|
||||
|
||||
# Print results table
|
||||
header = ['Domain', 'BlueCoat', 'IBM X-Force', 'Cisco Talos', 'MXToolbox']
|
||||
print(drawTable(header,data))
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print('Caught keyboard interrupt. Exiting!')
|
||||
quit(0)
|
||||
exit(0)
|
||||
except Exception as e:
|
||||
print('[-] {}'.format(e))
|
||||
quit(1)
|
||||
quit(0)
|
||||
print('[-] Error: {}'.format(e))
|
||||
exit(1)
|
||||
exit(0)
|
||||
|
||||
# Generic Proxy support
|
||||
# TODO: add as a parameter
|
||||
|
@ -389,8 +416,9 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
|
||||
# Generate list of URLs to query for expired/deleted domains
|
||||
urls = []
|
||||
domain_list = []
|
||||
|
||||
# Use the keyword string to narrow domain search if provided
|
||||
# Use the keyword string to narrow domain search if provided. This generates a list of URLs to query
|
||||
if keyword:
|
||||
print('[*] Fetching expired or deleted domains containing "{}"'.format(keyword))
|
||||
for i in range (0,maxresults,25):
|
||||
|
@ -404,14 +432,12 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
# If no keyword provided, retrieve list of recently expired domains in batches of 25 results.
|
||||
else:
|
||||
print('[*] Fetching expired or deleted domains...')
|
||||
# Caculate number of URLs to request since we're performing a request for four different resources instead of one
|
||||
numresults = int(maxresults / 4)
|
||||
# Caculate number of URLs to request since we're performing a request for two different resources instead of one
|
||||
numresults = int(maxresults / 2)
|
||||
for i in range (0,(numresults),25):
|
||||
urls.append('https://www.expireddomains.net/backorder-expired-domains?start={}&o=changed&r=a'.format(i))
|
||||
urls.append('https://www.expireddomains.net/deleted-com-domains/?start={}&o=changed&r=a'.format(i))
|
||||
urls.append('https://www.expireddomains.net/deleted-net-domains/?start={}&o=changed&r=a'.format(i))
|
||||
urls.append('https://www.expireddomains.net/deleted-org-domains/?start={}&o=changed&r=a'.format(i))
|
||||
|
||||
|
||||
for url in urls:
|
||||
|
||||
print("[*] {}".format(url))
|
||||
|
@ -423,7 +449,6 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
|
||||
r1 = random.randint(100000,999999)
|
||||
|
||||
|
||||
# Known good example _pk_id.10.dd0a cookie: 5abbbc772cbacfb1.1496760705.2.1496760705.1496760705
|
||||
pk_str = '5abbbc772cbacfb1' + '.1496' + str(r1) + '.2.1496' + str(r1) + '.1496' + str(r1)
|
||||
|
||||
|
@ -435,10 +460,10 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
#domainrequest = s.get(url,headers=headers,verify=False,cookies=jar,proxies=proxies)
|
||||
|
||||
domains = domainrequest.text
|
||||
|
||||
|
||||
# Turn the HTML into a Beautiful Soup object
|
||||
soup = BeautifulSoup(domains, 'lxml')
|
||||
|
||||
soup = BeautifulSoup(domains, 'lxml')
|
||||
#print(soup)
|
||||
try:
|
||||
table = soup.find("table")
|
||||
for row in table.findAll('tr')[1:]:
|
||||
|
@ -449,8 +474,6 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
cells = row.findAll("td")
|
||||
|
||||
if len(cells) >= 1:
|
||||
output = ""
|
||||
|
||||
if keyword:
|
||||
|
||||
c0 = row.find('td').find('a').text # domain
|
||||
|
@ -466,10 +489,9 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
c10 = cells[10].find(text=True) # status org
|
||||
c11 = cells[11].find(text=True) # status de
|
||||
c12 = cells[12].find(text=True) # tld registered
|
||||
c13 = cells[13].find(text=True) # Related Domains
|
||||
c14 = cells[14].find(text=True) # Domain list
|
||||
c15 = cells[15].find(text=True) # status
|
||||
c16 = cells[16].find(text=True) # related links
|
||||
c13 = cells[13].find(text=True) # Source List
|
||||
c14 = cells[14].find(text=True) # Domain Status
|
||||
c15 = "" # Related Domains
|
||||
|
||||
else:
|
||||
c0 = cells[0].find(text=True) # domain
|
||||
|
@ -487,12 +509,6 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
c12 = cells[12].find(text=True) # tld registered
|
||||
c13 = cells[13].find(text=True) # changes
|
||||
c14 = cells[14].find(text=True) # whois
|
||||
c15 = "" # not used
|
||||
c16 = "" # not used
|
||||
c17 = "" # not used
|
||||
|
||||
# Expired Domains results have an additional 'Availability' column that breaks parsing "deleted" domains
|
||||
#c15 = cells[15].find(text=True) # related links
|
||||
|
||||
available = ''
|
||||
if c8 == "available":
|
||||
|
@ -507,42 +523,49 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
if c11 == "available":
|
||||
available += ".de "
|
||||
|
||||
# Only grab status for keyword searches since it doesn't exist otherwise
|
||||
status = ""
|
||||
if c15:
|
||||
status = c15
|
||||
if keyword:
|
||||
status = c14
|
||||
|
||||
bluecoat = ''
|
||||
ibmxforce = ''
|
||||
ciscotalos = ''
|
||||
|
||||
if check == True:
|
||||
# Only perform reputation checks if domain is a .com .net. .org and not in maldomains list
|
||||
if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
|
||||
|
||||
# Skip additional reputation checks if this domain is already categorized as malicious
|
||||
if c0 in maldomainsList:
|
||||
print("[-] Skipping {} - Identified as known malware domain").format(c0)
|
||||
else:
|
||||
bluecoat = ''
|
||||
ibmxforce = ''
|
||||
if c3 == '-':
|
||||
bluecoat = 'ignored'
|
||||
ibmxforce = 'ignored'
|
||||
elif check == True:
|
||||
bluecoat = checkBluecoat(c0)
|
||||
print("[+] {}: {}".format(c0, bluecoat))
|
||||
ibmxforce = checkIBMXForce(c0)
|
||||
print("[+] {}: {}".format(c0, ibmxforce))
|
||||
ciscotalos = checkTalos(c0)
|
||||
print("[+] {}: {}".format(c0, ciscotalos))
|
||||
# Sleep to avoid captchas
|
||||
doSleep(timing)
|
||||
else:
|
||||
bluecoat = "skipped"
|
||||
ibmxforce = "skipped"
|
||||
# Append parsed domain data to list
|
||||
data.append([c0,c3,c4,available,status,bluecoat,ibmxforce])
|
||||
bluecoat = 'skipped'
|
||||
ibmxforce = 'skipped'
|
||||
ciscotalos = 'skipped'
|
||||
|
||||
# Append parsed domain data to list
|
||||
domain_list.append([c0,c3,c4,available,status,bluecoat,ibmxforce,ciscotalos])
|
||||
|
||||
except Exception as e:
|
||||
#print(e)
|
||||
# print(e)
|
||||
pass
|
||||
|
||||
# Add additional sleep on requests to ExpiredDomains.net to avoid errors
|
||||
time.sleep(5)
|
||||
|
||||
# Check for valid results before continuing
|
||||
if not(data):
|
||||
print("[-] No results found for keyword: {0}".format(keyword))
|
||||
quit(0)
|
||||
if len(domain_list) == 0:
|
||||
print("[-] No domain results found")
|
||||
exit(0)
|
||||
|
||||
# Sort domain list by column 2 (Birth Year)
|
||||
sortedData = sorted(data, key=lambda x: x[1], reverse=True)
|
||||
sortedDomains = sorted(domain_list, key=lambda x: x[1], reverse=True)
|
||||
|
||||
# Build HTML Table
|
||||
html = ''
|
||||
|
@ -556,9 +579,11 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
<th>Entries</th>
|
||||
<th>TLDs Available</th>
|
||||
<th>Status</th>
|
||||
<th>Symantec</th>
|
||||
<th>BlueCoat</th>
|
||||
<th>Categorization</th>
|
||||
<th>IBM-xForce</th>
|
||||
<th>IBM X-Force</th>
|
||||
<th>Categorization</th>
|
||||
<th>Cisco Talos</th>
|
||||
<th>Categorization</th>
|
||||
<th>WatchGuard</th>
|
||||
<th>Namecheap</th>
|
||||
|
@ -570,7 +595,7 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
htmlFooter = '</body></html>'
|
||||
|
||||
# Build HTML table contents
|
||||
for i in sortedData:
|
||||
for i in sortedDomains:
|
||||
htmlTableBody += '<tr>'
|
||||
htmlTableBody += '<td>{}</td>'.format(i[0]) # Domain
|
||||
htmlTableBody += '<td>{}</td>'.format(i[1]) # Birth
|
||||
|
@ -578,10 +603,12 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
htmlTableBody += '<td>{}</td>'.format(i[3]) # TLDs
|
||||
htmlTableBody += '<td>{}</td>'.format(i[4]) # Status
|
||||
|
||||
htmlTableBody += '<td><a href="https://sitereview.bluecoat.com/sitereview#/?search={}" target="_blank">Bluecoat</a></td>'.format(i[0]) # Bluecoat
|
||||
htmlTableBody += '<td><a href="https://sitereview.bluecoat.com/" target="_blank">Bluecoat</a></td>'.format(i[0]) # Bluecoat
|
||||
htmlTableBody += '<td>{}</td>'.format(i[5]) # Bluecoat Categorization
|
||||
htmlTableBody += '<td><a href="https://exchange.xforce.ibmcloud.com/url/{}" target="_blank">IBM-xForce</a></td>'.format(i[0]) # IBM xForce
|
||||
htmlTableBody += '<td>{}</td>'.format(i[6]) # IBM x-Force Categorization
|
||||
htmlTableBody += '<td><a href="https://www.talosintelligence.com/reputation_center/lookup?search={0}" target="_blank">Cisco Talos</a></td>'.format(i[0]) # Cisco Talos
|
||||
htmlTableBody += '<td>{}</td>'.format(i[7]) # Cisco Talos
|
||||
htmlTableBody += '<td><a href="http://www.borderware.com/domain_lookup.php?ip={}" target="_blank">WatchGuard</a></td>'.format(i[0]) # Borderware WatchGuard
|
||||
htmlTableBody += '<td><a href="https://www.namecheap.com/domains/registration/results.aspx?domain={}" target="_blank">Namecheap</a></td>'.format(i[0]) # Namecheap
|
||||
htmlTableBody += '<td><a href="http://web.archive.org/web/*/{}" target="_blank">Archive.org</a></td>'.format(i[0]) # Archive.org
|
||||
|
@ -598,8 +625,5 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
print("[*] Log written to {}\n".format(logfilename))
|
||||
|
||||
# Print Text Table
|
||||
t = Texttable(max_width=maxwidth)
|
||||
t.add_rows(sortedData)
|
||||
header = ['Domain', 'Birth', '#', 'TLDs', 'Status', 'Symantec', 'IBM']
|
||||
t.header(header)
|
||||
print(t.draw())
|
||||
header = ['Domain', 'Birth', '#', 'TLDs', 'Status', 'BlueCoat', 'IBM', 'Cisco Talos']
|
||||
print(drawTable(header,sortedDomains))
|
||||
|
|
Loading…
Reference in New Issue