tweak logic and fix table column indexes

master
Joe Vest 2018-10-04 10:58:35 -05:00
parent 8f8abdb6e3
commit 4be8c803eb
3 changed files with 138 additions and 59 deletions

6
.gitignore vendored
View File

@ -1,3 +1,9 @@
*.html
*.txt
*.jpg
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json

View File

@ -8,6 +8,12 @@ This Python based tool was written to quickly query the Expireddomains.net searc
## Changes
- 4 October 2018
+ Tweaked parsing logic
+ Fixed changes parsed columns indexes
+ Added additional TLDs to found TLD if the TLD is marked available.
+ If thisistest.com is found and thisistest.org is mark available, thisistest.org will be added to the search list
- 17 September 2018
+ Fixed Symantec WebPulse Site Review parsing errors caused by service updates
@ -67,37 +73,42 @@ Optional - Install additional OCR support dependencies
## Usage
List DomainHunter options
python3 domainhunter.py -h
usage: domainhunter.py [-h] [-q QUERY] [-c] [-r MAXRESULTS] [-s SINGLE]
[-w MAXWIDTH] [-v]
usage: domainhunter.py [-h] [-a] [-k KEYWORD] [-c] [-f FILENAME] [--ocr]
[-r MAXRESULTS] [-s SINGLE] [-t {0,1,2,3,4,5}]
[-w MAXWIDTH] [-V]
Finds expired domains, domain categorization, and Archive.org history to
determine good candidates for C2 and phishing domains
Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains
optional arguments:
-h, --help show this help message and exit
-k KEYWORD, --keyword KEYWORD
-h, --help show this help message and exit
-a, --alexa Filter results to Alexa listings
-k KEYWORD, --keyword KEYWORD
Keyword used to refine search results
-c, --check Perform domain reputation checks
-f FILENAME, --filename FILENAME
-c, --check Perform domain reputation checks
-f FILENAME, --filename FILENAME
Specify input file of line delimited domain names to
check
--ocr Perform OCR on CAPTCHAs when present
-r MAXRESULTS, --maxresults MAXRESULTS
--ocr Perform OCR on CAPTCHAs when challenged
-r MAXRESULTS, --maxresults MAXRESULTS
Number of results to return when querying latest
expired/deleted domains
-s SINGLE, --single SINGLE
-s SINGLE, --single SINGLE
Performs detailed reputation checks against a single
domain name/IP.
-t {0,1,2,3,4,5}, --timing {0,1,2,3,4,5}
-t {0,1,2,3,4,5}, --timing {0,1,2,3,4,5}
Modifies request timing to avoid CAPTCHAs. Slowest(0)
= 90-120 seconds, Default(3) = 10-20 seconds,
Fastest(5) = no delay
-w MAXWIDTH, --maxwidth MAXWIDTH
-w MAXWIDTH, --maxwidth MAXWIDTH
Width of text table
-V, --version show program's version number and exit
-V, --version show program's version number and exit
Examples:
./domainhunter.py -k apples -c --ocr -t5
./domainhunter.py --check --ocr -t3
./domainhunter.py --single mydomain.com
./domainhunter.py --keyword tech --check --ocr --timing 5 --alexa
./domaihunter.py --filename inputlist.txt --ocr --timing 5
Use defaults to check for most recent 100 domains and check reputation

View File

@ -293,7 +293,8 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains',
epilog = '''Examples:
epilog = '''
Examples:
./domainhunter.py -k apples -c --ocr -t5
./domainhunter.py --check --ocr -t3
./domainhunter.py --single mydomain.com
@ -313,8 +314,6 @@ if __name__ == "__main__":
parser.add_argument('-V','--version', action='version',version='%(prog)s {version}'.format(version=__version__))
args = parser.parse_args()
# Load dependent modules
try:
import requests
@ -495,6 +494,8 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
#print(soup)
try:
table = soup.find("table")
rows = table.findAll('tr')[1:]
for row in table.findAll('tr')[1:]:
# Alternative way to extract domain name
@ -510,60 +511,118 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
c2 = cells[2].find(text=True) # domainpop
c3 = cells[3].find(text=True) # birth
c4 = cells[4].find(text=True) # Archive.org entries
c5 = cells[5].find(text=True) # similarweb
c6 = cells[6].find(text=True) # similarweb country code
c7 = cells[7].find(text=True) # Dmoz.org
c8 = cells[8].find(text=True) # status com
c9 = cells[9].find(text=True) # status net
c10 = cells[10].find(text=True) # status org
c11 = cells[11].find(text=True) # status de
c12 = cells[12].find(text=True) # tld registered
c13 = cells[13].find(text=True) # Source List
c14 = cells[14].find(text=True) # Domain Status
c15 = "" # Related Domains
c5 = cells[5].find(text=True) # Alexa
c6 = cells[6].find(text=True) # Dmoz.org
c7 = cells[7].find(text=True) # status com
c8 = cells[8].find(text=True) # status net
c9 = cells[9].find(text=True) # status org
c10 = cells[10].find(text=True) # status de
c11 = cells[11].find(text=True) # TLDs
c12 = cells[12].find(text=True) # RDT
c13 = cells[13].find(text=True) # List
c14 = cells[14].find(text=True) # Status
c15 = "" # Links
# create available TLD list
available = ''
if c7 == "available":
available += ".com "
if c8 == "available":
available += ".net "
if c9 == "available":
available += ".org "
if c10 == "available":
available += ".de "
# Only grab status for keyword searches since it doesn't exist otherwise
status = ""
if keyword:
status = c14
# Only add Expired, not Pending, Backorder, etc
if c13 == "Expired":
# Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
#if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
# domain_list.append([c0,c3,c4,available,status])
# Add other TLDs to list if marked available
if (c7 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".com"
domain_list.append([dom,c3,c4,available,status])
if (c8 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".net"
domain_list.append([dom,c3,c4,available,status])
if (c9 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".org"
domain_list.append([dom,c3,c4,available,status])
if (c10 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".de"
domain_list.append([dom,c3,c4,available,status])
# Non-keyword search table format is slightly different
else:
c0 = cells[0].find(text=True) # domain
c1 = cells[1].find(text=True) # bl
c2 = cells[2].find(text=True) # domainpop
c3 = cells[3].find(text=True) # birth
c4 = cells[4].find(text=True) # Archive.org entries
c5 = cells[5].find(text=True) # similarweb
c6 = cells[6].find(text=True) # similarweb country code
c7 = cells[7].find(text=True) # Dmoz.org
c8 = cells[8].find(text=True) # status com
c9 = cells[9].find(text=True) # status net
c10 = cells[10].find(text=True) # status org
c11 = cells[11].find(text=True) # status de
c12 = cells[12].find(text=True) # tld registered
c13 = cells[13].find(text=True) # changes
c14 = cells[14].find(text=True) # whois
c5 = cells[5].find(text=True) # Alexa
c6 = cells[6].find(text=True) # Dmoz.org
c7 = cells[7].find(text=True) # status com
c8 = cells[8].find(text=True) # status net
c9 = cells[9].find(text=True) # status org
c10 = cells[10].find(text=True) # status de
c11 = cells[11].find(text=True) # TLDs
c12 = cells[12].find(text=True) # RDT
c13 = cells[13].find(text=True) # End Date
c14 = cells[14].find(text=True) # Links
# create available TLD list
available = ''
if c7 == "available":
available += ".com "
available = ''
if c8 == "available":
available += ".com "
if c8 == "available":
available += ".net "
if c9 == "available":
available += ".net "
if c9 == "available":
available += ".org "
if c10 == "available":
available += ".org "
if c10 == "available":
available += ".de "
if c11 == "available":
available += ".de "
status = ""
# Only grab status for keyword searches since it doesn't exist otherwise
status = ""
if keyword:
status = c14
# Add other TLDs to list if marked available
if (c7 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".com"
domain_list.append([dom,c3,c4,available,status])
# Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
domain_list.append([c0,c3,c4,available,status])
if (c8 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".net"
domain_list.append([dom,c3,c4,available,status])
if (c9 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".org"
domain_list.append([dom,c3,c4,available,status])
if (c10 == "available") and (c0 not in maldomainsList):
dom = c0.split(".")[0] + ".de"
domain_list.append([dom,c3,c4,available,status])
# Append original parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
#if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
# domain_list.append([c0,c3,c4,available,status])
except Exception as e:
#print(e)
print("[!] Error: ", e)
pass
# Add additional sleep on requests to ExpiredDomains.net to avoid errors
@ -577,7 +636,10 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
if check:
print("\n[*] Performing reputation checks for {} domains".format(len(domain_list)))
for domain_entry in domain_list:
domain_list_unique = []
[domain_list_unique.append(item) for item in domain_list if item not in domain_list_unique]
for domain_entry in domain_list_unique:
domain = domain_entry[0]
birthdate = domain_entry[1]
archiveentries = domain_entry[2]