Added option to provide input file of domain names to check against reputation services

master
Andrew Chiles 2017-03-01 16:50:42 +01:00
parent 95d4a3bdbb
commit 416f81e8e0
2 changed files with 101 additions and 58 deletions

View File

@ -10,7 +10,8 @@ This Python based tool was written to quickly query the Expireddomains.net searc
- Retrieves specified number of recently expired and deleted domains (.com, .net, .org primarily) - Retrieves specified number of recently expired and deleted domains (.com, .net, .org primarily)
- Retrieves available domains based on keyword search - Retrieves available domains based on keyword search
- Performs reputation checks against the Blue Coat Site Review service - Reads line delimited input file of potential domains names to check against reputation services
- Performs reputation checks against the Blue Coat Site Review and IBM x-Force services
- Sorts results by domain age (if known) - Sorts results by domain age (if known)
- Text-based table and HTML report output with links to reputation sources and Archive.org entry - Text-based table and HTML report output with links to reputation sources and Archive.org entry
@ -38,6 +39,8 @@ List DomainHunter options
-r MAXRESULTS, --maxresults MAXRESULTS -r MAXRESULTS, --maxresults MAXRESULTS
Number of results to return when querying latest Number of results to return when querying latest
expired/deleted domains (min. 100) expired/deleted domains (min. 100)
-f FILE, --file FILE Input file containing potential domain names to check
(1 per line)
Use defaults to check for most recent 100 domains and check reputation Use defaults to check for most recent 100 domains and check reputation
@ -47,6 +50,10 @@ Search for 1000 most recently expired/deleted domains, but don't check reputatio
python ./domainhunter.py -r 1000 -n python ./domainhunter.py -r 1000 -n
Retreive reputation information from domains in an input file
python ./domainhunter.py -f <filename>
Search for available domains with search term of "dog" and max results of 100 Search for available domains with search term of "dog" and max results of 100
./domainhunter.py -q dog -r 100 -c ./domainhunter.py -q dog -r 100 -c

View File

@ -9,6 +9,7 @@
# Add reputation categorizations to identify desireable vs undesireable domains # Add reputation categorizations to identify desireable vs undesireable domains
# Code cleanup/optimization # Code cleanup/optimization
# Read in list of desired domain names # Read in list of desired domain names
# Add Authenticated "Members-Only" option to download CSV/txt (https://member.expireddomains.net/domains/expiredcom/)
import time import time
import random import random
@ -96,7 +97,7 @@ if __name__ == "__main__":
parser.add_argument('-c','--check', help='Perform slow reputation checks', required=False, default=False, action='store_true') parser.add_argument('-c','--check', help='Perform slow reputation checks', required=False, default=False, action='store_true')
parser.add_argument('-r','--maxresults', help='Number of results to return when querying latest expired/deleted domains (min. 100)', required=False, type=int, default=100) parser.add_argument('-r','--maxresults', help='Number of results to return when querying latest expired/deleted domains (min. 100)', required=False, type=int, default=100)
parser.add_argument('-w','--maxwidth', help='Width of text table', required=False, type=int, default=400) parser.add_argument('-w','--maxwidth', help='Width of text table', required=False, type=int, default=400)
parser.add_argument('-f','--file', help='Input file containing potential domain names to check (1 per line)', required=False, type=str)
args = parser.parse_args() args = parser.parse_args()
## Variables ## Variables
@ -112,6 +113,10 @@ if __name__ == "__main__":
maxwidth=args.maxwidth maxwidth=args.maxwidth
inputfile = False
if args.file:
inputfile = args.file
t = Texttable(max_width=maxwidth) t = Texttable(max_width=maxwidth)
malwaredomains = 'http://mirror1.malwaredomains.com/files/justdomains' malwaredomains = 'http://mirror1.malwaredomains.com/files/justdomains'
expireddomainsqueryurl = 'https://www.expireddomains.net/domain-name-search' expireddomainsqueryurl = 'https://www.expireddomains.net/domain-name-search'
@ -193,70 +198,98 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
# Turn the HTML into a Beautiful Soup object # Turn the HTML into a Beautiful Soup object
soup = BeautifulSoup(domains, 'lxml') soup = BeautifulSoup(domains, 'lxml')
table = soup.find("table") table = soup.find("table")
try:
for row in table.findAll('tr')[1:]:
for row in table.findAll('tr')[1:]: # Alternative way to extract domain name
# domain = row.find('td').find('a').text
# Alternative way to extract domain name cells = row.findAll("td")
# domain = row.find('td').find('a').text if len(cells) >= 1:
output = ""
c0 = row.find('td').find('a').text # domain
c1 = cells[1].find(text=True) # bl
c2 = cells[2].find(text=True) # domainpop
c3 = cells[3].find(text=True) # birth
c4 = cells[4].find(text=True) # entries
c5 = cells[5].find(text=True) # similarweb
c6 = cells[6].find(text=True) # similarweb country code
c7 = cells[7].find(text=True) # moz
c8 = cells[8].find(text=True) # status com
c9 = cells[9].find(text=True) # status net
c10 = cells[10].find(text=True) # status org
c11 = cells[11].find(text=True) # status de
c12 = cells[12].find(text=True) # tld registered
c13 = cells[13].find(text=True) # monthly searches
c14 = cells[14].find(text=True) # adwords competition
c15 = cells[15].find(text=True) # list
c16 = cells[16].find(text=True) # status
c17 = cells[17].find(text=True) # related links
cells = row.findAll("td") available = ''
if len(cells) >= 1: if c8 == "available":
output = "" available += ".com "
c0 = row.find('td').find('a').text # domain
c1 = cells[1].find(text=True) # bl
c2 = cells[2].find(text=True) # domainpop
c3 = cells[3].find(text=True) # birth
c4 = cells[4].find(text=True) # entries
c5 = cells[5].find(text=True) # similarweb
c6 = cells[6].find(text=True) # similarweb country code
c7 = cells[7].find(text=True) # moz
c8 = cells[8].find(text=True) # status com
c9 = cells[9].find(text=True) # status net
c10 = cells[10].find(text=True) # status org
c11 = cells[11].find(text=True) # status de
c12 = cells[12].find(text=True) # tld registered
c13 = cells[13].find(text=True) # monthly searches
c14 = cells[14].find(text=True) # adwords competition
c15 = cells[15].find(text=True) # list
c16 = cells[16].find(text=True) # status
c17 = cells[17].find(text=True) # related links
available = '' if c9 == "available":
if c8 == "available": available += ".net "
available += ".com "
if c9 == "available": if c10 == "available":
available += ".net " available += ".org "
if c10 == "available": if c11 == "available":
available += ".org " available += ".de "
if c11 == "available": # Skip additional reputation checks if this domain is already categorized as malicious
available += ".de " if c0 in maldomains_list:
print("[-] Skipping {} - Identified as known malware domain").format(c0)
# Skip additional reputation checks if this domain is already categorized as malicious
if c0 in maldomains_list:
print("[-] Skipping {} - Identified as known malware domain").format(c0)
else:
bluecoat = ''
ibmxforce = ''
if c3 == '-':
bluecoat = 'ignored'
ibmxforce = 'ignored'
elif check == True:
bluecoat = checkBluecoat(c0)
print "[+] {} is categorized as: {}".format(c0, bluecoat)
ibmxforce = checkIBMxForce(c0)
print "[+] {} is categorized as: {}".format(c0, ibmxforce)
# Sleep to avoid captchas
time.sleep(random.randrange(10,20))
else: else:
bluecoat = "skipped" bluecoat = ''
ibmxforce = "skipped" ibmxforce = ''
# Append parsed domain data to list if c3 == '-':
data.append([c0,c3,c4,available,bluecoat,ibmxforce]) bluecoat = 'ignored'
ibmxforce = 'ignored'
elif check == True:
bluecoat = checkBluecoat(c0)
print "[+] {} is categorized as: {}".format(c0, bluecoat)
ibmxforce = checkIBMxForce(c0)
print "[+] {} is categorized as: {}".format(c0, ibmxforce)
# Sleep to avoid captchas
time.sleep(random.randrange(10,20))
else:
bluecoat = "skipped"
ibmxforce = "skipped"
# Append parsed domain data to list
data.append([c0,c3,c4,available,bluecoat,ibmxforce])
except:
print "[-] Error: No results found on this page!"
# Retrieve the most recent expired/deleted domain results # Retrieve the most recent expired/deleted domain results
elif inputfile:
print('[*] Fetching domain reputation from file: {}').format(inputfile)
# read in file contents to list
try:
domains = [line.rstrip('\r\n') for line in open(inputfile, "r")]
except IOError:
print '[-] Error: {} does not appear to exist.'.format(inputfile)
exit()
print('[*] Domains loaded: {}').format(len(domains))
for domain in domains:
if domain in maldomains_list:
print("[-] Skipping {} - Identified as known malware domain").format(domain)
else:
bluecoat = ''
ibmxforce = ''
bluecoat = checkBluecoat(domain)
print "[+] {} is categorized as: {}".format(domain, bluecoat)
ibmxforce = checkIBMxForce(domain)
print "[+] {} is categorized as: {}".format(domain, ibmxforce)
# Sleep to avoid captchas
time.sleep(random.randrange(10,20))
data.append([domain,'-','-','-',bluecoat,ibmxforce])
else: else:
print('[*] Fetching {} expired or deleted domains...').format(query) print('[*] Fetching {} expired or deleted domains...').format(query)
@ -279,10 +312,9 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
soup = BeautifulSoup(expireddomains, 'lxml') soup = BeautifulSoup(expireddomains, 'lxml')
table = soup.find("table") table = soup.find("table")
try:
for row in table.findAll('tr')[1:]: for row in table.findAll('tr')[1:]:
#print(row)
#domain = row.find('td').find('a').text
cells = row.findAll("td") cells = row.findAll("td")
if len(cells) >= 1: if len(cells) >= 1:
output = "" output = ""
@ -338,6 +370,8 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
ibmxforce = "skipped" ibmxforce = "skipped"
# Append parsed domain data to list # Append parsed domain data to list
data.append([c0,c3,c4,available,bluecoat,ibmxforce]) data.append([c0,c3,c4,available,bluecoat,ibmxforce])
except:
print "[-] Error: No results found on this page!"
# Sort domain list by column 2 (Birth Year) # Sort domain list by column 2 (Birth Year)
sortedData = sorted(data, key=lambda x: x[1], reverse=True) sortedData = sorted(data, key=lambda x: x[1], reverse=True)
@ -398,3 +432,5 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
print(t.draw()) print(t.draw())