tweak logic and fix table column indexes
parent
8f8abdb6e3
commit
4be8c803eb
|
@ -1,3 +1,9 @@
|
|||
*.html
|
||||
*.txt
|
||||
*.jpg
|
||||
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
45
README.md
45
README.md
|
@ -8,6 +8,12 @@ This Python based tool was written to quickly query the Expireddomains.net searc
|
|||
|
||||
## Changes
|
||||
|
||||
- 4 October 2018
|
||||
+ Tweaked parsing logic
|
||||
+ Fixed changes parsed columns indexes
|
||||
+ Added additional TLDs to found TLD if the TLD is marked available.
|
||||
+ If thisistest.com is found and thisistest.org is mark available, thisistest.org will be added to the search list
|
||||
|
||||
- 17 September 2018
|
||||
+ Fixed Symantec WebPulse Site Review parsing errors caused by service updates
|
||||
|
||||
|
@ -67,37 +73,42 @@ Optional - Install additional OCR support dependencies
|
|||
|
||||
## Usage
|
||||
|
||||
List DomainHunter options
|
||||
|
||||
python3 domainhunter.py -h
|
||||
usage: domainhunter.py [-h] [-q QUERY] [-c] [-r MAXRESULTS] [-s SINGLE]
|
||||
[-w MAXWIDTH] [-v]
|
||||
usage: domainhunter.py [-h] [-a] [-k KEYWORD] [-c] [-f FILENAME] [--ocr]
|
||||
[-r MAXRESULTS] [-s SINGLE] [-t {0,1,2,3,4,5}]
|
||||
[-w MAXWIDTH] [-V]
|
||||
|
||||
Finds expired domains, domain categorization, and Archive.org history to
|
||||
determine good candidates for C2 and phishing domains
|
||||
Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-k KEYWORD, --keyword KEYWORD
|
||||
-h, --help show this help message and exit
|
||||
-a, --alexa Filter results to Alexa listings
|
||||
-k KEYWORD, --keyword KEYWORD
|
||||
Keyword used to refine search results
|
||||
-c, --check Perform domain reputation checks
|
||||
-f FILENAME, --filename FILENAME
|
||||
-c, --check Perform domain reputation checks
|
||||
-f FILENAME, --filename FILENAME
|
||||
Specify input file of line delimited domain names to
|
||||
check
|
||||
--ocr Perform OCR on CAPTCHAs when present
|
||||
-r MAXRESULTS, --maxresults MAXRESULTS
|
||||
--ocr Perform OCR on CAPTCHAs when challenged
|
||||
-r MAXRESULTS, --maxresults MAXRESULTS
|
||||
Number of results to return when querying latest
|
||||
expired/deleted domains
|
||||
-s SINGLE, --single SINGLE
|
||||
-s SINGLE, --single SINGLE
|
||||
Performs detailed reputation checks against a single
|
||||
domain name/IP.
|
||||
-t {0,1,2,3,4,5}, --timing {0,1,2,3,4,5}
|
||||
-t {0,1,2,3,4,5}, --timing {0,1,2,3,4,5}
|
||||
Modifies request timing to avoid CAPTCHAs. Slowest(0)
|
||||
= 90-120 seconds, Default(3) = 10-20 seconds,
|
||||
Fastest(5) = no delay
|
||||
-w MAXWIDTH, --maxwidth MAXWIDTH
|
||||
-w MAXWIDTH, --maxwidth MAXWIDTH
|
||||
Width of text table
|
||||
-V, --version show program's version number and exit
|
||||
-V, --version show program's version number and exit
|
||||
|
||||
Examples:
|
||||
./domainhunter.py -k apples -c --ocr -t5
|
||||
./domainhunter.py --check --ocr -t3
|
||||
./domainhunter.py --single mydomain.com
|
||||
./domainhunter.py --keyword tech --check --ocr --timing 5 --alexa
|
||||
./domaihunter.py --filename inputlist.txt --ocr --timing 5
|
||||
|
||||
Use defaults to check for most recent 100 domains and check reputation
|
||||
|
||||
|
|
146
domainhunter.py
146
domainhunter.py
|
@ -293,7 +293,8 @@ if __name__ == "__main__":
|
|||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains',
|
||||
epilog = '''Examples:
|
||||
epilog = '''
|
||||
Examples:
|
||||
./domainhunter.py -k apples -c --ocr -t5
|
||||
./domainhunter.py --check --ocr -t3
|
||||
./domainhunter.py --single mydomain.com
|
||||
|
@ -313,8 +314,6 @@ if __name__ == "__main__":
|
|||
parser.add_argument('-V','--version', action='version',version='%(prog)s {version}'.format(version=__version__))
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
|
||||
# Load dependent modules
|
||||
try:
|
||||
import requests
|
||||
|
@ -495,6 +494,8 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
#print(soup)
|
||||
try:
|
||||
table = soup.find("table")
|
||||
|
||||
rows = table.findAll('tr')[1:]
|
||||
for row in table.findAll('tr')[1:]:
|
||||
|
||||
# Alternative way to extract domain name
|
||||
|
@ -510,60 +511,118 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
c2 = cells[2].find(text=True) # domainpop
|
||||
c3 = cells[3].find(text=True) # birth
|
||||
c4 = cells[4].find(text=True) # Archive.org entries
|
||||
c5 = cells[5].find(text=True) # similarweb
|
||||
c6 = cells[6].find(text=True) # similarweb country code
|
||||
c7 = cells[7].find(text=True) # Dmoz.org
|
||||
c8 = cells[8].find(text=True) # status com
|
||||
c9 = cells[9].find(text=True) # status net
|
||||
c10 = cells[10].find(text=True) # status org
|
||||
c11 = cells[11].find(text=True) # status de
|
||||
c12 = cells[12].find(text=True) # tld registered
|
||||
c13 = cells[13].find(text=True) # Source List
|
||||
c14 = cells[14].find(text=True) # Domain Status
|
||||
c15 = "" # Related Domains
|
||||
c5 = cells[5].find(text=True) # Alexa
|
||||
c6 = cells[6].find(text=True) # Dmoz.org
|
||||
c7 = cells[7].find(text=True) # status com
|
||||
c8 = cells[8].find(text=True) # status net
|
||||
c9 = cells[9].find(text=True) # status org
|
||||
c10 = cells[10].find(text=True) # status de
|
||||
c11 = cells[11].find(text=True) # TLDs
|
||||
c12 = cells[12].find(text=True) # RDT
|
||||
c13 = cells[13].find(text=True) # List
|
||||
c14 = cells[14].find(text=True) # Status
|
||||
c15 = "" # Links
|
||||
|
||||
# create available TLD list
|
||||
available = ''
|
||||
if c7 == "available":
|
||||
available += ".com "
|
||||
|
||||
if c8 == "available":
|
||||
available += ".net "
|
||||
|
||||
if c9 == "available":
|
||||
available += ".org "
|
||||
|
||||
if c10 == "available":
|
||||
available += ".de "
|
||||
|
||||
# Only grab status for keyword searches since it doesn't exist otherwise
|
||||
status = ""
|
||||
if keyword:
|
||||
status = c14
|
||||
|
||||
# Only add Expired, not Pending, Backorder, etc
|
||||
if c13 == "Expired":
|
||||
# Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
|
||||
#if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
|
||||
# domain_list.append([c0,c3,c4,available,status])
|
||||
|
||||
# Add other TLDs to list if marked available
|
||||
if (c7 == "available") and (c0 not in maldomainsList):
|
||||
dom = c0.split(".")[0] + ".com"
|
||||
domain_list.append([dom,c3,c4,available,status])
|
||||
|
||||
if (c8 == "available") and (c0 not in maldomainsList):
|
||||
dom = c0.split(".")[0] + ".net"
|
||||
domain_list.append([dom,c3,c4,available,status])
|
||||
|
||||
if (c9 == "available") and (c0 not in maldomainsList):
|
||||
dom = c0.split(".")[0] + ".org"
|
||||
domain_list.append([dom,c3,c4,available,status])
|
||||
|
||||
if (c10 == "available") and (c0 not in maldomainsList):
|
||||
dom = c0.split(".")[0] + ".de"
|
||||
domain_list.append([dom,c3,c4,available,status])
|
||||
|
||||
# Non-keyword search table format is slightly different
|
||||
else:
|
||||
|
||||
c0 = cells[0].find(text=True) # domain
|
||||
c1 = cells[1].find(text=True) # bl
|
||||
c2 = cells[2].find(text=True) # domainpop
|
||||
c3 = cells[3].find(text=True) # birth
|
||||
c4 = cells[4].find(text=True) # Archive.org entries
|
||||
c5 = cells[5].find(text=True) # similarweb
|
||||
c6 = cells[6].find(text=True) # similarweb country code
|
||||
c7 = cells[7].find(text=True) # Dmoz.org
|
||||
c8 = cells[8].find(text=True) # status com
|
||||
c9 = cells[9].find(text=True) # status net
|
||||
c10 = cells[10].find(text=True) # status org
|
||||
c11 = cells[11].find(text=True) # status de
|
||||
c12 = cells[12].find(text=True) # tld registered
|
||||
c13 = cells[13].find(text=True) # changes
|
||||
c14 = cells[14].find(text=True) # whois
|
||||
c5 = cells[5].find(text=True) # Alexa
|
||||
c6 = cells[6].find(text=True) # Dmoz.org
|
||||
c7 = cells[7].find(text=True) # status com
|
||||
c8 = cells[8].find(text=True) # status net
|
||||
c9 = cells[9].find(text=True) # status org
|
||||
c10 = cells[10].find(text=True) # status de
|
||||
c11 = cells[11].find(text=True) # TLDs
|
||||
c12 = cells[12].find(text=True) # RDT
|
||||
c13 = cells[13].find(text=True) # End Date
|
||||
c14 = cells[14].find(text=True) # Links
|
||||
|
||||
# create available TLD list
|
||||
available = ''
|
||||
if c7 == "available":
|
||||
available += ".com "
|
||||
|
||||
available = ''
|
||||
if c8 == "available":
|
||||
available += ".com "
|
||||
if c8 == "available":
|
||||
available += ".net "
|
||||
|
||||
if c9 == "available":
|
||||
available += ".net "
|
||||
if c9 == "available":
|
||||
available += ".org "
|
||||
|
||||
if c10 == "available":
|
||||
available += ".org "
|
||||
if c10 == "available":
|
||||
available += ".de "
|
||||
|
||||
if c11 == "available":
|
||||
available += ".de "
|
||||
status = ""
|
||||
|
||||
# Only grab status for keyword searches since it doesn't exist otherwise
|
||||
status = ""
|
||||
if keyword:
|
||||
status = c14
|
||||
# Add other TLDs to list if marked available
|
||||
if (c7 == "available") and (c0 not in maldomainsList):
|
||||
dom = c0.split(".")[0] + ".com"
|
||||
domain_list.append([dom,c3,c4,available,status])
|
||||
|
||||
# Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
|
||||
if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
|
||||
domain_list.append([c0,c3,c4,available,status])
|
||||
if (c8 == "available") and (c0 not in maldomainsList):
|
||||
dom = c0.split(".")[0] + ".net"
|
||||
domain_list.append([dom,c3,c4,available,status])
|
||||
|
||||
if (c9 == "available") and (c0 not in maldomainsList):
|
||||
dom = c0.split(".")[0] + ".org"
|
||||
domain_list.append([dom,c3,c4,available,status])
|
||||
|
||||
if (c10 == "available") and (c0 not in maldomainsList):
|
||||
dom = c0.split(".")[0] + ".de"
|
||||
domain_list.append([dom,c3,c4,available,status])
|
||||
|
||||
# Append original parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
|
||||
#if (c0.lower().endswith(".com") or c0.lower().endswith(".net") or c0.lower().endswith(".org")) and (c0 not in maldomainsList):
|
||||
# domain_list.append([c0,c3,c4,available,status])
|
||||
|
||||
except Exception as e:
|
||||
#print(e)
|
||||
print("[!] Error: ", e)
|
||||
pass
|
||||
|
||||
# Add additional sleep on requests to ExpiredDomains.net to avoid errors
|
||||
|
@ -577,7 +636,10 @@ If you plan to use this content for illegal purpose, don't. Have a nice day :)'
|
|||
if check:
|
||||
print("\n[*] Performing reputation checks for {} domains".format(len(domain_list)))
|
||||
|
||||
for domain_entry in domain_list:
|
||||
domain_list_unique = []
|
||||
[domain_list_unique.append(item) for item in domain_list if item not in domain_list_unique]
|
||||
|
||||
for domain_entry in domain_list_unique:
|
||||
domain = domain_entry[0]
|
||||
birthdate = domain_entry[1]
|
||||
archiveentries = domain_entry[2]
|
||||
|
|
Loading…
Reference in New Issue