tweak logic and fix table column indexes
parent
8f8abdb6e3
commit
4be8c803eb
|
@ -1,3 +1,9 @@
|
||||||
*.html
|
*.html
|
||||||
*.txt
|
*.txt
|
||||||
*.jpg
|
*.jpg
|
||||||
|
|
||||||
|
.vscode/*
|
||||||
|
!.vscode/settings.json
|
||||||
|
!.vscode/tasks.json
|
||||||
|
!.vscode/launch.json
|
||||||
|
!.vscode/extensions.json
|
45
README.md
45
README.md
|
@ -8,6 +8,12 @@ This Python based tool was written to quickly query the Expireddomains.net searc
|
||||||
|
|
||||||
## Changes
|
## Changes
|
||||||
|
|
||||||
|
- 4 October 2018
|
||||||
|
+ Tweaked parsing logic
|
||||||
|
+ Fixed changes parsed columns indexes
|
||||||
|
+ Added additional TLDs to found TLD if the TLD is marked available.
|
||||||
|
+ If thisistest.com is found and thisistest.org is mark available, thisistest.org will be added to the search list
|
||||||
|
|
||||||
- 17 September 2018
|
- 17 September 2018
|
||||||
+ Fixed Symantec WebPulse Site Review parsing errors caused by service updates
|
+ Fixed Symantec WebPulse Site Review parsing errors caused by service updates
|
||||||
|
|
||||||
|
@ -67,37 +73,42 @@ Optional - Install additional OCR support dependencies
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
List DomainHunter options
|
usage: domainhunter.py [-h] [-a] [-k KEYWORD] [-c] [-f FILENAME] [--ocr]
|
||||||
|
[-r MAXRESULTS] [-s SINGLE] [-t {0,1,2,3,4,5}]
|
||||||
python3 domainhunter.py -h
|
[-w MAXWIDTH] [-V]
|
||||||
usage: domainhunter.py [-h] [-q QUERY] [-c] [-r MAXRESULTS] [-s SINGLE]
|
|
||||||
[-w MAXWIDTH] [-v]
|
|
||||||
|
|
||||||
Finds expired domains, domain categorization, and Archive.org history to
|
Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains
|
||||||
determine good candidates for C2 and phishing domains
|
|
||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
-k KEYWORD, --keyword KEYWORD
|
-a, --alexa Filter results to Alexa listings
|
||||||
|
-k KEYWORD, --keyword KEYWORD
|
||||||
Keyword used to refine search results
|
Keyword used to refine search results
|
||||||
-c, --check Perform domain reputation checks
|
-c, --check Perform domain reputation checks
|
||||||
-f FILENAME, --filename FILENAME
|
-f FILENAME, --filename FILENAME
|
||||||
Specify input file of line delimited domain names to
|
Specify input file of line delimited domain names to
|
||||||
check
|
check
|
||||||
--ocr Perform OCR on CAPTCHAs when present
|
--ocr Perform OCR on CAPTCHAs when challenged
|
||||||
-r MAXRESULTS, --maxresults MAXRESULTS
|
-r MAXRESULTS, --maxresults MAXRESULTS
|
||||||
Number of results to return when querying latest
|
Number of results to return when querying latest
|
||||||
expired/deleted domains
|
expired/deleted domains
|
||||||
-s SINGLE, --single SINGLE
|
-s SINGLE, --single SINGLE
|
||||||
Performs detailed reputation checks against a single
|
Performs detailed reputation checks against a single
|
||||||
domain name/IP.
|
domain name/IP.
|
||||||
-t {0,1,2,3,4,5}, --timing {0,1,2,3,4,5}
|
-t {0,1,2,3,4,5}, --timing {0,1,2,3,4,5}
|
||||||
Modifies request timing to avoid CAPTCHAs. Slowest(0)
|
Modifies request timing to avoid CAPTCHAs. Slowest(0)
|
||||||
= 90-120 seconds, Default(3) = 10-20 seconds,
|
= 90-120 seconds, Default(3) = 10-20 seconds,
|
||||||
Fastest(5) = no delay
|
Fastest(5) = no delay
|
||||||
-w MAXWIDTH, --maxwidth MAXWIDTH
|
-w MAXWIDTH, --maxwidth MAXWIDTH
|
||||||
Width of text table
|
Width of text table
|
||||||
-V, --version show program's version number and exit
|
-V, --version show program's version number and exit
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
./domainhunter.py -k apples -c --ocr -t5
|
||||||
|
./domainhunter.py --check --ocr -t3
|
||||||
|
./domainhunter.py --single mydomain.com
|
||||||
|
./domainhunter.py --keyword tech --check --ocr --timing 5 --alexa
|
||||||
|
./domaihunter.py --filename inputlist.txt --ocr --timing 5
|
||||||
|
|
||||||
Use defaults to check for most recent 100 domains and check reputation
|
Use defaults to check for most recent 100 domains and check reputation
|
||||||
|
|
||||||
|
|
146
domainhunter.py
146
domainhunter.py
|
@ -293,7 +293,8 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains',
|
description='Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains',
|
||||||
epilog = '''Examples:
|
epilog = '''
|
||||||
|
Examples:
|
||||||
./domainhunter.py -k apples -c --ocr -t5
|
./domainhunter.py -k apples -c --ocr -t5
|
||||||
./domainhunter.py --check --ocr -t3
|
./domainhunter.py --check --ocr -t3
|
||||||
./domainhunter.py --single mydomain.com
|
./domainhunter.py --single mydomain.com
|
||||||
|
@ -313,8 +314,6 @@ if __name__ == "__main__":
|
||||||
parser.add_argument('-V','--version', action='version',version='%(prog)s {version}'.format(version=__version__))
|
parser.add_argument('-V','--version', action='version',version='%(prog)s {version}'.format(version=__version__))
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Load dependent modules
|
# Load dependent modules
|
||||||
try:
|
try:
|
||||||
import requests
|
import requests
|
||||||
|
@ -495,6 +494,8 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
||||||
#print(soup)
|
#print(soup)
|
||||||
try:
|
try:
|
||||||
table = soup.find("table")
|
table = soup.find("table")
|
||||||
|
|
||||||
|
rows = table.findAll('tr')[1:]
|
||||||
for row in table.findAll('tr')[1:]:
|
for row in table.findAll('tr')[1:]:
|
||||||
|
|
||||||
# Alternative way to extract domain name
|
# Alternative way to extract domain name
|
||||||
|
@ -510,60 +511,118 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
||||||
c2 = cells[2].find(text=True) # domainpop
|
c2 = cells[2].find(text=True) # domainpop
|
||||||
c3 = cells[3].find(text=True) # birth
|
c3 = cells[3].find(text=True) # birth
|
||||||
c4 = cells[4].find(text=True) # Archive.org entries
|
c4 = cells[4].find(text=True) # Archive.org entries
|
||||||
c5 = cells[5].find(text=True) # similarweb
|
c5 = cells[5].find(text=True) # Alexa
|
||||||
c6 = cells[6].find(text=True) # similarweb country code
|
c6 = cells[6].find(text=True) # Dmoz.org
|
||||||
c7 = cells[7].find(text=True) # Dmoz.org
|
c7 = cells[7].find(text=True) # status com
|
||||||
c8 = cells[8].find(text=True) # status com
|
c8 = cells[8].find(text=True) # status net
|
||||||
c9 = cells[9].find(text=True) # status net
|
c9 = cells[9].find(text=True) # status org
|
||||||
c10 = cells[10].find(text=True) # status org
|
c10 = cells[10].find(text=True) # status de
|
||||||
c11 = cells[11].find(text=True) # status de
|
c11 = cells[11].find(text=True) # TLDs
|
||||||
c12 = cells[12].find(text=True) # tld registered
|
c12 = cells[12].find(text=True) # RDT
|
||||||
c13 = cells[13].find(text=True) # Source List
|
c13 = cells[13].find(text=True) # List
|
||||||
c14 = cells[14].find(text=True) # Domain Status
|
c14 = cells[14].find(text=True) # Status
|
||||||
c15 = "" # Related Domains
|
c15 = "" # Links
|
||||||
|
|
||||||
|
# create available TLD list
|
||||||
|
available = ''
|
||||||
|
if c7 == "available":
|
||||||
|
available += ".com "
|
||||||
|
|
||||||
|
if c8 == "available":
|
||||||
|
available += ".net "
|
||||||
|
|
||||||
|
if c9 == "available":
|
||||||
|
available += ".org "
|
||||||
|
|
||||||
|
if c10 == "available":
|
||||||
|
available += ".de "
|
||||||
|
|
||||||
|
# Only grab status for keyword searches since it doesn't exist otherwise
|
||||||
|
status = ""
|
||||||
|
if keyword:
|
||||||
|
status = c14
|
||||||
|
|
||||||
|
# Only add Expired, not Pending, Backorder, etc
|
||||||
|
if c13 == "Expired":
|
||||||
|
# Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
|
||||||
|
#if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
|
||||||
|
# domain_list.append([c0,c3,c4,available,status])
|
||||||
|
|
||||||
|
# Add other TLDs to list if marked available
|
||||||
|
if (c7 == "available") and (c0 not in maldomainsList):
|
||||||
|
dom = c0.split(".")[0] + ".com"
|
||||||
|
domain_list.append([dom,c3,c4,available,status])
|
||||||
|
|
||||||
|
if (c8 == "available") and (c0 not in maldomainsList):
|
||||||
|
dom = c0.split(".")[0] + ".net"
|
||||||
|
domain_list.append([dom,c3,c4,available,status])
|
||||||
|
|
||||||
|
if (c9 == "available") and (c0 not in maldomainsList):
|
||||||
|
dom = c0.split(".")[0] + ".org"
|
||||||
|
domain_list.append([dom,c3,c4,available,status])
|
||||||
|
|
||||||
|
if (c10 == "available") and (c0 not in maldomainsList):
|
||||||
|
dom = c0.split(".")[0] + ".de"
|
||||||
|
domain_list.append([dom,c3,c4,available,status])
|
||||||
|
|
||||||
# Non-keyword search table format is slightly different
|
# Non-keyword search table format is slightly different
|
||||||
else:
|
else:
|
||||||
|
|
||||||
c0 = cells[0].find(text=True) # domain
|
c0 = cells[0].find(text=True) # domain
|
||||||
c1 = cells[1].find(text=True) # bl
|
c1 = cells[1].find(text=True) # bl
|
||||||
c2 = cells[2].find(text=True) # domainpop
|
c2 = cells[2].find(text=True) # domainpop
|
||||||
c3 = cells[3].find(text=True) # birth
|
c3 = cells[3].find(text=True) # birth
|
||||||
c4 = cells[4].find(text=True) # Archive.org entries
|
c4 = cells[4].find(text=True) # Archive.org entries
|
||||||
c5 = cells[5].find(text=True) # similarweb
|
c5 = cells[5].find(text=True) # Alexa
|
||||||
c6 = cells[6].find(text=True) # similarweb country code
|
c6 = cells[6].find(text=True) # Dmoz.org
|
||||||
c7 = cells[7].find(text=True) # Dmoz.org
|
c7 = cells[7].find(text=True) # status com
|
||||||
c8 = cells[8].find(text=True) # status com
|
c8 = cells[8].find(text=True) # status net
|
||||||
c9 = cells[9].find(text=True) # status net
|
c9 = cells[9].find(text=True) # status org
|
||||||
c10 = cells[10].find(text=True) # status org
|
c10 = cells[10].find(text=True) # status de
|
||||||
c11 = cells[11].find(text=True) # status de
|
c11 = cells[11].find(text=True) # TLDs
|
||||||
c12 = cells[12].find(text=True) # tld registered
|
c12 = cells[12].find(text=True) # RDT
|
||||||
c13 = cells[13].find(text=True) # changes
|
c13 = cells[13].find(text=True) # End Date
|
||||||
c14 = cells[14].find(text=True) # whois
|
c14 = cells[14].find(text=True) # Links
|
||||||
|
|
||||||
|
# create available TLD list
|
||||||
|
available = ''
|
||||||
|
if c7 == "available":
|
||||||
|
available += ".com "
|
||||||
|
|
||||||
available = ''
|
if c8 == "available":
|
||||||
if c8 == "available":
|
available += ".net "
|
||||||
available += ".com "
|
|
||||||
|
|
||||||
if c9 == "available":
|
if c9 == "available":
|
||||||
available += ".net "
|
available += ".org "
|
||||||
|
|
||||||
if c10 == "available":
|
if c10 == "available":
|
||||||
available += ".org "
|
available += ".de "
|
||||||
|
|
||||||
if c11 == "available":
|
status = ""
|
||||||
available += ".de "
|
|
||||||
|
|
||||||
# Only grab status for keyword searches since it doesn't exist otherwise
|
# Add other TLDs to list if marked available
|
||||||
status = ""
|
if (c7 == "available") and (c0 not in maldomainsList):
|
||||||
if keyword:
|
dom = c0.split(".")[0] + ".com"
|
||||||
status = c14
|
domain_list.append([dom,c3,c4,available,status])
|
||||||
|
|
||||||
# Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
|
if (c8 == "available") and (c0 not in maldomainsList):
|
||||||
if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
|
dom = c0.split(".")[0] + ".net"
|
||||||
domain_list.append([c0,c3,c4,available,status])
|
domain_list.append([dom,c3,c4,available,status])
|
||||||
|
|
||||||
|
if (c9 == "available") and (c0 not in maldomainsList):
|
||||||
|
dom = c0.split(".")[0] + ".org"
|
||||||
|
domain_list.append([dom,c3,c4,available,status])
|
||||||
|
|
||||||
|
if (c10 == "available") and (c0 not in maldomainsList):
|
||||||
|
dom = c0.split(".")[0] + ".de"
|
||||||
|
domain_list.append([dom,c3,c4,available,status])
|
||||||
|
|
||||||
|
# Append original parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
|
||||||
|
#if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
|
||||||
|
# domain_list.append([c0,c3,c4,available,status])
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
#print(e)
|
print("[!] Error: ", e)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Add additional sleep on requests to ExpiredDomains.net to avoid errors
|
# Add additional sleep on requests to ExpiredDomains.net to avoid errors
|
||||||
|
@ -577,7 +636,10 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
||||||
if check:
|
if check:
|
||||||
print("\n[*] Performing reputation checks for {} domains".format(len(domain_list)))
|
print("\n[*] Performing reputation checks for {} domains".format(len(domain_list)))
|
||||||
|
|
||||||
for domain_entry in domain_list:
|
domain_list_unique = []
|
||||||
|
[domain_list_unique.append(item) for item in domain_list if item not in domain_list_unique]
|
||||||
|
|
||||||
|
for domain_entry in domain_list_unique:
|
||||||
domain = domain_entry[0]
|
domain = domain_entry[0]
|
||||||
birthdate = domain_entry[1]
|
birthdate = domain_entry[1]
|
||||||
archiveentries = domain_entry[2]
|
archiveentries = domain_entry[2]
|
||||||
|
|
Loading…
Reference in New Issue