tweak logic and fix table column indexes

master
Joe Vest 2018-10-04 10:58:35 -05:00
parent 8f8abdb6e3
commit 4be8c803eb
3 changed files with 138 additions and 59 deletions

6
.gitignore vendored
View File

@ -1,3 +1,9 @@
*.html *.html
*.txt *.txt
*.jpg *.jpg
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json

View File

@ -8,6 +8,12 @@ This Python based tool was written to quickly query the Expireddomains.net searc
## Changes ## Changes
- 4 October 2018
+ Tweaked parsing logic
+ Fixed changes parsed columns indexes
+ Added additional TLDs to found TLD if the TLD is marked available.
+ If thisistest.com is found and thisistest.org is mark available, thisistest.org will be added to the search list
- 17 September 2018 - 17 September 2018
+ Fixed Symantec WebPulse Site Review parsing errors caused by service updates + Fixed Symantec WebPulse Site Review parsing errors caused by service updates
@ -67,37 +73,42 @@ Optional - Install additional OCR support dependencies
## Usage ## Usage
List DomainHunter options usage: domainhunter.py [-h] [-a] [-k KEYWORD] [-c] [-f FILENAME] [--ocr]
[-r MAXRESULTS] [-s SINGLE] [-t {0,1,2,3,4,5}]
python3 domainhunter.py -h [-w MAXWIDTH] [-V]
usage: domainhunter.py [-h] [-q QUERY] [-c] [-r MAXRESULTS] [-s SINGLE]
[-w MAXWIDTH] [-v]
Finds expired domains, domain categorization, and Archive.org history to Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains
determine good candidates for C2 and phishing domains
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-k KEYWORD, --keyword KEYWORD -a, --alexa Filter results to Alexa listings
-k KEYWORD, --keyword KEYWORD
Keyword used to refine search results Keyword used to refine search results
-c, --check Perform domain reputation checks -c, --check Perform domain reputation checks
-f FILENAME, --filename FILENAME -f FILENAME, --filename FILENAME
Specify input file of line delimited domain names to Specify input file of line delimited domain names to
check check
--ocr Perform OCR on CAPTCHAs when present --ocr Perform OCR on CAPTCHAs when challenged
-r MAXRESULTS, --maxresults MAXRESULTS -r MAXRESULTS, --maxresults MAXRESULTS
Number of results to return when querying latest Number of results to return when querying latest
expired/deleted domains expired/deleted domains
-s SINGLE, --single SINGLE -s SINGLE, --single SINGLE
Performs detailed reputation checks against a single Performs detailed reputation checks against a single
domain name/IP. domain name/IP.
-t {0,1,2,3,4,5}, --timing {0,1,2,3,4,5} -t {0,1,2,3,4,5}, --timing {0,1,2,3,4,5}
Modifies request timing to avoid CAPTCHAs. Slowest(0) Modifies request timing to avoid CAPTCHAs. Slowest(0)
= 90-120 seconds, Default(3) = 10-20 seconds, = 90-120 seconds, Default(3) = 10-20 seconds,
Fastest(5) = no delay Fastest(5) = no delay
-w MAXWIDTH, --maxwidth MAXWIDTH -w MAXWIDTH, --maxwidth MAXWIDTH
Width of text table Width of text table
-V, --version show program's version number and exit -V, --version show program's version number and exit
Examples:
./domainhunter.py -k apples -c --ocr -t5
./domainhunter.py --check --ocr -t3
./domainhunter.py --single mydomain.com
./domainhunter.py --keyword tech --check --ocr --timing 5 --alexa
./domaihunter.py --filename inputlist.txt --ocr --timing 5
Use defaults to check for most recent 100 domains and check reputation Use defaults to check for most recent 100 domains and check reputation

View File

@ -293,7 +293,8 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains', description='Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains',
epilog = '''Examples: epilog = '''
Examples:
./domainhunter.py -k apples -c --ocr -t5 ./domainhunter.py -k apples -c --ocr -t5
./domainhunter.py --check --ocr -t3 ./domainhunter.py --check --ocr -t3
./domainhunter.py --single mydomain.com ./domainhunter.py --single mydomain.com
@ -313,8 +314,6 @@ if __name__ == "__main__":
parser.add_argument('-V','--version', action='version',version='%(prog)s {version}'.format(version=__version__)) parser.add_argument('-V','--version', action='version',version='%(prog)s {version}'.format(version=__version__))
args = parser.parse_args() args = parser.parse_args()
# Load dependent modules # Load dependent modules
try: try:
import requests import requests
@ -495,6 +494,8 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
#print(soup) #print(soup)
try: try:
table = soup.find("table") table = soup.find("table")
rows = table.findAll('tr')[1:]
for row in table.findAll('tr')[1:]: for row in table.findAll('tr')[1:]:
# Alternative way to extract domain name # Alternative way to extract domain name
@ -510,60 +511,118 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
c2 = cells[2].find(text=True) # domainpop c2 = cells[2].find(text=True) # domainpop
c3 = cells[3].find(text=True) # birth c3 = cells[3].find(text=True) # birth
c4 = cells[4].find(text=True) # Archive.org entries c4 = cells[4].find(text=True) # Archive.org entries
c5 = cells[5].find(text=True) # similarweb c5 = cells[5].find(text=True) # Alexa
c6 = cells[6].find(text=True) # similarweb country code c6 = cells[6].find(text=True) # Dmoz.org
c7 = cells[7].find(text=True) # Dmoz.org c7 = cells[7].find(text=True) # status com
c8 = cells[8].find(text=True) # status com c8 = cells[8].find(text=True) # status net
c9 = cells[9].find(text=True) # status net c9 = cells[9].find(text=True) # status org
c10 = cells[10].find(text=True) # status org c10 = cells[10].find(text=True) # status de
c11 = cells[11].find(text=True) # status de c11 = cells[11].find(text=True) # TLDs
c12 = cells[12].find(text=True) # tld registered c12 = cells[12].find(text=True) # RDT
c13 = cells[13].find(text=True) # Source List c13 = cells[13].find(text=True) # List
c14 = cells[14].find(text=True) # Domain Status c14 = cells[14].find(text=True) # Status
c15 = "" # Related Domains c15 = "" # Links
# create available TLD list
available = ''
if c7 == "available":
available += ".com "
if c8 == "available":
available += ".net "
if c9 == "available":
available += ".org "
if c10 == "available":
available += ".de "
# Only grab status for keyword searches since it doesn't exist otherwise
status = ""
if keyword:
status = c14
# Only add Expired, not Pending, Backorder, etc
if c13 == "Expired":
# Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
#if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
# domain_list.append([c0,c3,c4,available,status])
# Add other TLDs to list if marked available
if (c7 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".com"
domain_list.append([dom,c3,c4,available,status])
if (c8 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".net"
domain_list.append([dom,c3,c4,available,status])
if (c9 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".org"
domain_list.append([dom,c3,c4,available,status])
if (c10 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".de"
domain_list.append([dom,c3,c4,available,status])
# Non-keyword search table format is slightly different # Non-keyword search table format is slightly different
else: else:
c0 = cells[0].find(text=True) # domain c0 = cells[0].find(text=True) # domain
c1 = cells[1].find(text=True) # bl c1 = cells[1].find(text=True) # bl
c2 = cells[2].find(text=True) # domainpop c2 = cells[2].find(text=True) # domainpop
c3 = cells[3].find(text=True) # birth c3 = cells[3].find(text=True) # birth
c4 = cells[4].find(text=True) # Archive.org entries c4 = cells[4].find(text=True) # Archive.org entries
c5 = cells[5].find(text=True) # similarweb c5 = cells[5].find(text=True) # Alexa
c6 = cells[6].find(text=True) # similarweb country code c6 = cells[6].find(text=True) # Dmoz.org
c7 = cells[7].find(text=True) # Dmoz.org c7 = cells[7].find(text=True) # status com
c8 = cells[8].find(text=True) # status com c8 = cells[8].find(text=True) # status net
c9 = cells[9].find(text=True) # status net c9 = cells[9].find(text=True) # status org
c10 = cells[10].find(text=True) # status org c10 = cells[10].find(text=True) # status de
c11 = cells[11].find(text=True) # status de c11 = cells[11].find(text=True) # TLDs
c12 = cells[12].find(text=True) # tld registered c12 = cells[12].find(text=True) # RDT
c13 = cells[13].find(text=True) # changes c13 = cells[13].find(text=True) # End Date
c14 = cells[14].find(text=True) # whois c14 = cells[14].find(text=True) # Links
# create available TLD list
available = ''
if c7 == "available":
available += ".com "
available = '' if c8 == "available":
if c8 == "available": available += ".net "
available += ".com "
if c9 == "available": if c9 == "available":
available += ".net " available += ".org "
if c10 == "available": if c10 == "available":
available += ".org " available += ".de "
if c11 == "available": status = ""
available += ".de "
# Only grab status for keyword searches since it doesn't exist otherwise # Add other TLDs to list if marked available
status = "" if (c7 == "available") and (c0 not in maldomainsList):
if keyword: dom = c0.split(".")[0] + ".com"
status = c14 domain_list.append([dom,c3,c4,available,status])
# Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain) if (c8 == "available") and (c0 not in maldomainsList):
if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList): dom = c0.split(".")[0] + ".net"
domain_list.append([c0,c3,c4,available,status]) domain_list.append([dom,c3,c4,available,status])
if (c9 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".org"
domain_list.append([dom,c3,c4,available,status])
if (c10 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".de"
domain_list.append([dom,c3,c4,available,status])
# Append original parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
#if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
# domain_list.append([c0,c3,c4,available,status])
except Exception as e: except Exception as e:
#print(e) print("[!] Error: ", e)
pass pass
# Add additional sleep on requests to ExpiredDomains.net to avoid errors # Add additional sleep on requests to ExpiredDomains.net to avoid errors
@ -577,7 +636,10 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
if check: if check:
print("\n[*] Performing reputation checks for {} domains".format(len(domain_list))) print("\n[*] Performing reputation checks for {} domains".format(len(domain_list)))
for domain_entry in domain_list: domain_list_unique = []
[domain_list_unique.append(item) for item in domain_list if item not in domain_list_unique]
for domain_entry in domain_list_unique:
domain = domain_entry[0] domain = domain_entry[0]
birthdate = domain_entry[1] birthdate = domain_entry[1]
archiveentries = domain_entry[2] archiveentries = domain_entry[2]