2017-03-01 11:17:15 +00:00
#!/usr/bin/env python
## Title: domainhunter.py
2018-04-07 15:53:58 +00:00
## Author: @joevest and @andrewchiles
## Description: Checks expired domains, reputation/categorization, and Archive.org history to determine
2017-03-01 11:17:15 +00:00
## good candidates for phishing and C2 domain names
2018-05-06 18:34:55 +00:00
2018-09-17 09:00:39 +00:00
# If the expected response format from a provider changes, use the traceback module to get a full stack trace without removing try/catch blocks
#import traceback
#traceback.print_exc()
2017-03-01 11:17:15 +00:00
import time
import random
import argparse
import json
2018-04-11 12:46:15 +00:00
import base64
2018-05-06 18:34:55 +00:00
import os
2017-03-01 11:17:15 +00:00
2018-09-17 09:00:39 +00:00
__version__ = " 20180917 "
2018-04-07 15:53:58 +00:00
2017-03-01 11:17:15 +00:00
## Functions
2018-04-09 11:21:35 +00:00
def doSleep ( timing ) :
if timing == 0 :
time . sleep ( random . randrange ( 90 , 120 ) )
elif timing == 1 :
time . sleep ( random . randrange ( 60 , 90 ) )
elif timing == 2 :
time . sleep ( random . randrange ( 30 , 60 ) )
elif timing == 3 :
time . sleep ( random . randrange ( 10 , 20 ) )
elif timing == 4 :
time . sleep ( random . randrange ( 5 , 10 ) )
2018-04-11 12:46:15 +00:00
# There's no elif timing == 5 here because we don't want to sleep for -t 5
2018-04-09 11:21:35 +00:00
2017-03-01 11:17:15 +00:00
def checkBluecoat ( domain ) :
try :
2018-04-07 15:53:58 +00:00
url = ' https://sitereview.bluecoat.com/resource/lookup '
2018-05-06 18:34:55 +00:00
postData = { ' url ' : domain , ' captcha ' : ' ' }
2017-03-01 11:17:15 +00:00
headers = { ' User-Agent ' : useragent ,
2018-09-17 09:00:39 +00:00
' Accept ' : ' application/json, text/plain, */* ' ,
' Content-Type ' : ' application/json; charset=UTF-8 ' ,
' Referer ' : ' https://sitereview.bluecoat.com/lookup ' }
2017-03-01 11:17:15 +00:00
2018-04-09 11:21:35 +00:00
print ( ' [*] BlueCoat: {} ' . format ( domain ) )
2018-04-07 15:53:58 +00:00
response = s . post ( url , headers = headers , json = postData , verify = False )
responseJSON = json . loads ( response . text )
2018-09-17 09:00:39 +00:00
2018-04-07 15:53:58 +00:00
if ' errorType ' in responseJSON :
a = responseJSON [ ' errorType ' ]
2017-03-01 11:17:15 +00:00
else :
2018-04-07 15:53:58 +00:00
a = responseJSON [ ' categorization ' ] [ 0 ] [ ' name ' ]
2017-03-03 09:54:53 +00:00
2018-04-11 12:46:15 +00:00
# Print notice if CAPTCHAs are blocking accurate results and attempt to solve if --ocr
if a == ' captcha ' :
if ocr :
2018-05-06 18:34:55 +00:00
# This request is also performed by a browser, but is not needed for our purposes
2018-04-11 12:46:15 +00:00
#captcharequestURL = 'https://sitereview.bluecoat.com/resource/captcha-request'
print ( ' [*] Received CAPTCHA challenge! ' )
captcha = solveCaptcha ( ' https://sitereview.bluecoat.com/resource/captcha.jpg ' , s )
if captcha :
2018-05-06 18:34:55 +00:00
b64captcha = base64 . urlsafe_b64encode ( captcha . encode ( ' utf-8 ' ) ) . decode ( ' utf-8 ' )
2018-04-11 12:46:15 +00:00
# Send CAPTCHA solution via GET since inclusion with the domain categorization request doens't work anymore
captchasolutionURL = ' https://sitereview.bluecoat.com/resource/captcha-request/ {0} ' . format ( b64captcha )
print ( ' [*] Submiting CAPTCHA at {0} ' . format ( captchasolutionURL ) )
response = s . get ( url = captchasolutionURL , headers = headers , verify = False )
# Try the categorization request again
response = s . post ( url , headers = headers , json = postData , verify = False )
responseJSON = json . loads ( response . text )
if ' errorType ' in responseJSON :
a = responseJSON [ ' errorType ' ]
else :
a = responseJSON [ ' categorization ' ] [ 0 ] [ ' name ' ]
else :
print ( ' [-] Error: Failed to solve BlueCoat CAPTCHA with OCR! Manually solve at " https://sitereview.bluecoat.com/sitereview.jsp " ' )
else :
print ( ' [-] Error: BlueCoat CAPTCHA received. Try --ocr flag or manually solve a CAPTCHA at " https://sitereview.bluecoat.com/sitereview.jsp " ' )
2017-03-01 11:17:15 +00:00
return a
2018-04-11 12:46:15 +00:00
except Exception as e :
print ( ' [-] Error retrieving Bluecoat reputation! {0} ' . format ( e ) )
2017-03-01 11:17:15 +00:00
return " - "
2018-04-11 12:46:15 +00:00
def checkIBMXForce ( domain ) :
2017-03-01 11:17:15 +00:00
try :
url = ' https://exchange.xforce.ibmcloud.com/url/ {} ' . format ( domain )
headers = { ' User-Agent ' : useragent ,
' Accept ' : ' application/json, text/plain, */* ' ,
' x-ui ' : ' XFE ' ,
' Origin ' : url ,
' Referer ' : url }
2018-04-09 11:21:35 +00:00
print ( ' [*] IBM xForce: {} ' . format ( domain ) )
2017-03-01 11:17:15 +00:00
url = ' https://api.xforce.ibmcloud.com/url/ {} ' . format ( domain )
response = s . get ( url , headers = headers , verify = False )
2018-04-07 15:53:58 +00:00
responseJSON = json . loads ( response . text )
2017-03-01 11:17:15 +00:00
2018-04-07 15:53:58 +00:00
if ' error ' in responseJSON :
a = responseJSON [ ' error ' ]
2018-04-09 11:21:35 +00:00
elif not responseJSON [ ' result ' ] [ ' cats ' ] :
a = ' Uncategorized '
2018-05-17 10:08:17 +00:00
## TO-DO - Add noticed when "intrusion" category is returned. This is indication of rate limit / brute-force protection hit on the endpoint
2017-03-01 11:17:15 +00:00
else :
2018-04-09 11:21:35 +00:00
categories = ' '
# Parse all dictionary keys and append to single string to get Category names
for key in responseJSON [ " result " ] [ ' cats ' ] :
categories + = ' {0} , ' . format ( str ( key ) )
a = ' {0} (Score: {1} ) ' . format ( categories , str ( responseJSON [ ' result ' ] [ ' score ' ] ) )
2017-03-01 11:17:15 +00:00
return a
except :
print ( ' [-] Error retrieving IBM x-Force reputation! ' )
return " - "
2018-04-07 15:53:58 +00:00
def checkTalos ( domain ) :
2018-05-06 18:34:55 +00:00
url = ' https://www.talosintelligence.com/sb_api/query_lookup?query= %2F api %2F v2 %2F details %2F domain %2F &query_entry= {0} &offset=0&order=ip+asc ' . format ( domain )
2018-04-09 11:21:35 +00:00
headers = { ' User-Agent ' : useragent ,
' Referer ' : url }
2018-04-07 15:53:58 +00:00
2018-04-09 11:21:35 +00:00
print ( ' [*] Cisco Talos: {} ' . format ( domain ) )
try :
2018-04-07 15:53:58 +00:00
response = s . get ( url , headers = headers , verify = False )
2018-04-09 11:21:35 +00:00
2018-04-07 15:53:58 +00:00
responseJSON = json . loads ( response . text )
2018-04-09 11:21:35 +00:00
2018-04-07 15:53:58 +00:00
if ' error ' in responseJSON :
a = str ( responseJSON [ ' error ' ] )
2018-05-17 10:08:17 +00:00
if a == " Unfortunately, we can ' t find any results for your search. " :
a = ' Uncategorized '
2018-04-09 11:21:35 +00:00
elif responseJSON [ ' category ' ] is None :
a = ' Uncategorized '
2018-04-07 15:53:58 +00:00
else :
a = ' {0} (Score: {1} ) ' . format ( str ( responseJSON [ ' category ' ] [ ' description ' ] ) , str ( responseJSON [ ' web_score_name ' ] ) )
return a
except :
print ( ' [-] Error retrieving Talos reputation! ' )
return " - "
2018-04-09 11:21:35 +00:00
def checkMXToolbox ( domain ) :
url = ' https://mxtoolbox.com/Public/Tools/BrandReputation.aspx '
headers = { ' User-Agent ' : useragent ,
' Origin ' : url ,
' Referer ' : url }
2018-04-07 15:53:58 +00:00
2018-04-09 11:21:35 +00:00
print ( ' [*] Google SafeBrowsing and PhishTank: {} ' . format ( domain ) )
try :
response = s . get ( url = url , headers = headers )
soup = BeautifulSoup ( response . content , ' lxml ' )
viewstate = soup . select ( ' input[name=__VIEWSTATE] ' ) [ 0 ] [ ' value ' ]
viewstategenerator = soup . select ( ' input[name=__VIEWSTATEGENERATOR] ' ) [ 0 ] [ ' value ' ]
eventvalidation = soup . select ( ' input[name=__EVENTVALIDATION] ' ) [ 0 ] [ ' value ' ]
data = {
" __EVENTTARGET " : " " ,
" __EVENTARGUMENT " : " " ,
" __VIEWSTATE " : viewstate ,
" __VIEWSTATEGENERATOR " : viewstategenerator ,
" __EVENTVALIDATION " : eventvalidation ,
" ctl00$ContentPlaceHolder1$brandReputationUrl " : domain ,
" ctl00$ContentPlaceHolder1$brandReputationDoLookup " : " Brand Reputation Lookup " ,
" ctl00$ucSignIn$hfRegCode " : ' missing ' ,
" ctl00$ucSignIn$hfRedirectSignUp " : ' /Public/Tools/BrandReputation.aspx ' ,
" ctl00$ucSignIn$hfRedirectLogin " : ' ' ,
" ctl00$ucSignIn$txtEmailAddress " : ' ' ,
" ctl00$ucSignIn$cbNewAccount " : ' cbNewAccount ' ,
" ctl00$ucSignIn$txtFullName " : ' ' ,
" ctl00$ucSignIn$txtModalNewPassword " : ' ' ,
" ctl00$ucSignIn$txtPhone " : ' ' ,
" ctl00$ucSignIn$txtCompanyName " : ' ' ,
" ctl00$ucSignIn$drpTitle " : ' ' ,
" ctl00$ucSignIn$txtTitleName " : ' ' ,
" ctl00$ucSignIn$txtModalPassword " : ' '
}
response = s . post ( url = url , headers = headers , data = data )
soup = BeautifulSoup ( response . content , ' lxml ' )
a = ' '
if soup . select ( ' div[id=ctl00_ContentPlaceHolder1_noIssuesFound] ' ) :
a = ' No issues found '
return a
else :
if soup . select ( ' div[id=ctl00_ContentPlaceHolder1_googleSafeBrowsingIssuesFound] ' ) :
a = ' Google SafeBrowsing Issues Found. '
if soup . select ( ' div[id=ctl00_ContentPlaceHolder1_phishTankIssuesFound] ' ) :
a + = ' PhishTank Issues Found '
return a
except Exception as e :
print ( ' [-] Error retrieving Google SafeBrowsing and PhishTank reputation! ' )
return " - "
def downloadMalwareDomains ( malwaredomainsURL ) :
url = malwaredomainsURL
2018-04-11 12:46:15 +00:00
response = s . get ( url = url , headers = headers , verify = False )
2017-03-01 11:17:15 +00:00
responseText = response . text
if response . status_code == 200 :
return responseText
else :
2018-04-09 11:21:35 +00:00
print ( " [-] Error reaching: {} Status: {} " ) . format ( url , response . status_code )
def checkDomain ( domain ) :
print ( ' [*] Fetching domain reputation for: {} ' . format ( domain ) )
if domain in maldomainsList :
print ( " [!] {} : Identified as known malware domain (malwaredomains.com) " . format ( domain ) )
2018-05-06 18:34:55 +00:00
2018-04-09 11:21:35 +00:00
bluecoat = checkBluecoat ( domain )
print ( " [+] {} : {} " . format ( domain , bluecoat ) )
2018-04-11 12:46:15 +00:00
ibmxforce = checkIBMXForce ( domain )
2018-04-09 11:21:35 +00:00
print ( " [+] {} : {} " . format ( domain , ibmxforce ) )
ciscotalos = checkTalos ( domain )
print ( " [+] {} : {} " . format ( domain , ciscotalos ) )
2018-05-06 18:34:55 +00:00
mxtoolbox = checkMXToolbox ( domain )
print ( " [+] {} : {} " . format ( domain , mxtoolbox ) )
2018-04-09 11:21:35 +00:00
print ( " " )
2018-05-06 18:34:55 +00:00
results = [ domain , bluecoat , ibmxforce , ciscotalos , mxtoolbox ]
return results
2017-03-01 11:17:15 +00:00
2018-04-11 12:46:15 +00:00
def solveCaptcha ( url , session ) :
# Downloads CAPTCHA image and saves to current directory for OCR with tesseract
# Returns CAPTCHA string or False if error occured
2018-05-06 18:34:55 +00:00
2018-04-11 12:46:15 +00:00
jpeg = ' captcha.jpg '
2018-05-06 18:34:55 +00:00
2018-04-11 12:46:15 +00:00
try :
response = session . get ( url = url , headers = headers , verify = False , stream = True )
if response . status_code == 200 :
with open ( jpeg , ' wb ' ) as f :
response . raw . decode_content = True
shutil . copyfileobj ( response . raw , f )
else :
print ( ' [-] Error downloading CAPTCHA file! ' )
return False
2018-05-06 18:34:55 +00:00
# Perform basic OCR without additional image enhancement
2018-04-11 12:46:15 +00:00
text = pytesseract . image_to_string ( Image . open ( jpeg ) )
text = text . replace ( " " , " " )
2018-05-06 18:34:55 +00:00
# Remove CAPTCHA file
try :
os . remove ( jpeg )
except OSError :
pass
2018-04-11 12:46:15 +00:00
return text
2018-05-06 18:34:55 +00:00
2018-04-11 12:46:15 +00:00
except Exception as e :
print ( " [-] Error solving CAPTCHA - {0} " . format ( e ) )
2018-05-06 18:34:55 +00:00
2018-04-11 12:46:15 +00:00
return False
2018-05-06 18:34:55 +00:00
def drawTable ( header , data ) :
data . insert ( 0 , header )
t = Texttable ( max_width = maxwidth )
t . add_rows ( data )
t . header ( header )
return ( t . draw ( ) )
2017-03-01 11:17:15 +00:00
## MAIN
if __name__ == " __main__ " :
2018-09-17 09:00:39 +00:00
parser = argparse . ArgumentParser (
description = ' Finds expired domains, domain categorization, and Archive.org history to determine good candidates for C2 and phishing domains ' ,
epilog = ''' Examples:
. / domainhunter . py - k apples - c - - ocr - t5
. / domainhunter . py - - check - - ocr - t3
. / domainhunter . py - - single mydomain . com
. / domainhunter . py - - keyword tech - - check - - ocr - - timing 5 - - alexa
. / domaihunter . py - - filename inputlist . txt - - ocr - - timing 5 ''' ,
formatter_class = argparse . RawDescriptionHelpFormatter )
parser . add_argument ( ' -a ' , ' --alexa ' , help = ' Filter results to Alexa listings ' , required = False , default = 0 , action = ' store_const ' , const = 1 )
2018-04-11 12:46:15 +00:00
parser . add_argument ( ' -k ' , ' --keyword ' , help = ' Keyword used to refine search results ' , required = False , default = False , type = str , dest = ' keyword ' )
parser . add_argument ( ' -c ' , ' --check ' , help = ' Perform domain reputation checks ' , required = False , default = False , action = ' store_true ' , dest = ' check ' )
parser . add_argument ( ' -f ' , ' --filename ' , help = ' Specify input file of line delimited domain names to check ' , required = False , default = False , type = str , dest = ' filename ' )
2018-05-06 18:34:55 +00:00
parser . add_argument ( ' --ocr ' , help = ' Perform OCR on CAPTCHAs when challenged ' , required = False , default = False , action = ' store_true ' )
2018-04-11 12:46:15 +00:00
parser . add_argument ( ' -r ' , ' --maxresults ' , help = ' Number of results to return when querying latest expired/deleted domains ' , required = False , default = 100 , type = int , dest = ' maxresults ' )
parser . add_argument ( ' -s ' , ' --single ' , help = ' Performs detailed reputation checks against a single domain name/IP. ' , required = False , default = False , dest = ' single ' )
parser . add_argument ( ' -t ' , ' --timing ' , help = ' Modifies request timing to avoid CAPTCHAs. Slowest(0) = 90-120 seconds, Default(3) = 10-20 seconds, Fastest(5) = no delay ' , required = False , default = 3 , type = int , choices = range ( 0 , 6 ) , dest = ' timing ' )
parser . add_argument ( ' -w ' , ' --maxwidth ' , help = ' Width of text table ' , required = False , default = 400 , type = int , dest = ' maxwidth ' )
parser . add_argument ( ' -V ' , ' --version ' , action = ' version ' , version = ' %(prog)s {version} ' . format ( version = __version__ ) )
args = parser . parse_args ( )
2018-09-17 09:00:39 +00:00
2018-04-11 12:46:15 +00:00
# Load dependent modules
2017-03-01 11:17:15 +00:00
try :
import requests
from bs4 import BeautifulSoup
from texttable import Texttable
except Exception as e :
2017-06-06 20:38:10 +00:00
print ( " Expired Domains Reputation Check " )
2018-04-11 12:46:15 +00:00
print ( " [-] Missing basic dependencies: {} " . format ( str ( e ) ) )
print ( " [*] Install required dependencies by running `pip3 install -r requirements.txt` " )
2017-03-01 11:17:15 +00:00
quit ( 0 )
2018-04-11 12:46:15 +00:00
# Load OCR related modules if --ocr flag is set since these can be difficult to get working
if args . ocr :
try :
import pytesseract
from PIL import Image
import shutil
except Exception as e :
print ( " Expired Domains Reputation Check " )
print ( " [-] Missing OCR dependencies: {} " . format ( str ( e ) ) )
2018-05-06 18:34:55 +00:00
print ( " [*] Install required Python dependencies by running: pip3 install -r requirements.txt " )
print ( " [*] Ubuntu \ Debian - Install tesseract by running: apt-get install tesseract-ocr python3-imaging " )
print ( " [*] macOS - Install tesseract with homebrew by running: brew install tesseract " )
2018-04-11 12:46:15 +00:00
quit ( 0 )
2017-03-01 11:17:15 +00:00
## Variables
2018-09-17 09:00:39 +00:00
alexa = args . alexa
2018-04-11 12:46:15 +00:00
keyword = args . keyword
2017-03-01 11:17:15 +00:00
check = args . check
2018-04-11 12:46:15 +00:00
filename = args . filename
2018-04-07 15:53:58 +00:00
2017-03-01 11:17:15 +00:00
maxresults = args . maxresults
2018-04-07 15:53:58 +00:00
single = args . single
2017-03-01 15:50:42 +00:00
2018-04-09 11:21:35 +00:00
timing = args . timing
2018-04-07 15:53:58 +00:00
maxwidth = args . maxwidth
2018-04-11 12:46:15 +00:00
ocr = args . ocr
2018-05-06 18:34:55 +00:00
malwaredomainsURL = ' http://mirror1.malwaredomains.com/files/justdomains '
expireddomainsqueryURL = ' https://www.expireddomains.net/domain-name-search '
2018-04-11 12:46:15 +00:00
2017-03-01 11:17:15 +00:00
timestamp = time . strftime ( " % Y % m %d _ % H % M % S " )
useragent = ' Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) '
2018-05-06 18:34:55 +00:00
2017-03-01 11:17:15 +00:00
headers = { ' User-Agent ' : useragent }
requests . packages . urllib3 . disable_warnings ( )
# HTTP Session container, used to manage cookies, session tokens and other session information
s = requests . Session ( )
title = '''
____ ___ __ __ _ ___ _ _ _ _ _ _ _ _ _____ _____ ____
| _ \ / _ \| \/ | / \ | _ _ | \ | | | | | | | | | \ | | _ _ | ____ | _ \
| | | | | | | | \/ | | / _ \ | | | \| | | | _ | | | | | \| | | | | _ | | | _ ) |
| | _ | | | _ | | | | | / ___ \ | | | | \ | | _ | | _ | | | \ | | | | | ___ | _ <
| ____ / \___ / | _ | | _ / _ / \_ \___ | _ | \_ | | _ | | _ | \___ / | _ | \_ | | _ | | _____ | _ | \_ \ '''
print ( title )
print ( " " )
2017-06-04 19:11:41 +00:00
print ( " Expired Domains Reputation Checker " )
2018-04-07 15:53:58 +00:00
print ( " Authors: @joevest and @andrewchiles \n " )
print ( " DISCLAIMER: This is for educational purposes only! " )
2017-03-01 11:17:15 +00:00
disclaimer = ''' It is designed to promote education and the improvement of computer/cyber security.
The authors or employers are not liable for any illegal act or misuse performed by any user of this tool .
If you plan to use this content for illegal purpose , don ' t. Have a nice day :) ' ' '
print ( disclaimer )
print ( " " )
2018-04-09 11:21:35 +00:00
# Download known malware domains
print ( ' [*] Downloading malware domain list from {} \n ' . format ( malwaredomainsURL ) )
maldomains = downloadMalwareDomains ( malwaredomainsURL )
maldomainsList = maldomains . split ( " \n " )
2018-04-07 15:53:58 +00:00
# Retrieve reputation for a single choosen domain (Quick Mode)
if single :
2018-04-09 11:21:35 +00:00
checkDomain ( single )
2018-05-06 18:34:55 +00:00
exit ( 0 )
2018-04-07 15:53:58 +00:00
2018-05-06 18:34:55 +00:00
# Perform detailed domain reputation checks against input file, print table, and quit
2018-04-11 12:46:15 +00:00
if filename :
2018-05-06 18:34:55 +00:00
# Initialize our list with an empty row for the header
data = [ ]
2018-04-11 12:46:15 +00:00
try :
with open ( filename , ' r ' ) as domainsList :
for line in domainsList . read ( ) . splitlines ( ) :
2018-05-06 18:34:55 +00:00
data . append ( checkDomain ( line ) )
2018-04-11 12:46:15 +00:00
doSleep ( timing )
2018-05-06 18:34:55 +00:00
# Print results table
header = [ ' Domain ' , ' BlueCoat ' , ' IBM X-Force ' , ' Cisco Talos ' , ' MXToolbox ' ]
print ( drawTable ( header , data ) )
2018-04-11 12:46:15 +00:00
except KeyboardInterrupt :
print ( ' Caught keyboard interrupt. Exiting! ' )
2018-05-06 18:34:55 +00:00
exit ( 0 )
2018-04-11 12:46:15 +00:00
except Exception as e :
2018-05-06 18:34:55 +00:00
print ( ' [-] Error: {} ' . format ( e ) )
exit ( 1 )
exit ( 0 )
2018-04-11 12:46:15 +00:00
2017-06-14 00:06:36 +00:00
# Generic Proxy support
# TODO: add as a parameter
proxies = {
' http ' : ' http://127.0.0.1:8080 ' ,
' https ' : ' http://127.0.0.1:8080 ' ,
}
2018-04-07 15:53:58 +00:00
# Create an initial session
2017-06-04 19:11:41 +00:00
domainrequest = s . get ( " https://www.expireddomains.net " , headers = headers , verify = False )
2018-04-09 11:21:35 +00:00
# Use proxy like Burp for debugging request/parsing errors
2017-06-14 00:06:36 +00:00
#domainrequest = s.get("https://www.expireddomains.net",headers=headers,verify=False,proxies=proxies)
2017-03-01 11:17:15 +00:00
2018-05-17 10:08:17 +00:00
# Lists for our ExpiredDomains results
2018-05-06 18:34:55 +00:00
domain_list = [ ]
2018-05-17 10:08:17 +00:00
data = [ ]
2017-06-06 20:38:10 +00:00
2018-05-17 10:08:17 +00:00
# Generate list of URLs to query for expired/deleted domains
urls = [ ]
2018-05-06 18:34:55 +00:00
# Use the keyword string to narrow domain search if provided. This generates a list of URLs to query
2018-05-17 10:08:17 +00:00
2018-04-11 12:46:15 +00:00
if keyword :
print ( ' [*] Fetching expired or deleted domains containing " {} " ' . format ( keyword ) )
2017-03-01 11:17:15 +00:00
for i in range ( 0 , maxresults , 25 ) :
if i == 0 :
2018-09-17 09:00:39 +00:00
urls . append ( " {} /?q= {} &fwhois=22&falexa= {} " . format ( expireddomainsqueryURL , keyword , alexa ) )
2018-04-11 12:46:15 +00:00
headers [ ' Referer ' ] = ' https://www.expireddomains.net/domain-name-search/?q= {} &start=1 ' . format ( keyword )
2017-03-01 11:17:15 +00:00
else :
2018-09-17 09:00:39 +00:00
urls . append ( " {} /?start= {} &q= {} &fwhois=22&falexa= {} " . format ( expireddomainsqueryURL , i , keyword , alexa ) )
2018-04-11 12:46:15 +00:00
headers [ ' Referer ' ] = ' https://www.expireddomains.net/domain-name-search/?start= {} &q= {} ' . format ( ( i - 25 ) , keyword )
2018-04-07 15:53:58 +00:00
2018-05-17 10:08:17 +00:00
# If no keyword provided, generate list of recently expired domains URLS (batches of 25 results).
2017-06-06 20:38:10 +00:00
else :
print ( ' [*] Fetching expired or deleted domains... ' )
2018-05-06 18:34:55 +00:00
# Caculate number of URLs to request since we're performing a request for two different resources instead of one
numresults = int ( maxresults / 2 )
2018-04-09 11:21:35 +00:00
for i in range ( 0 , ( numresults ) , 25 ) :
2018-09-17 09:00:39 +00:00
urls . append ( ' https://www.expireddomains.net/backorder-expired-domains?start= {} &ftlds[]=2&ftlds[]=3&ftlds[]=4&falexa= {} ' . format ( i , alexa ) )
urls . append ( ' https://www.expireddomains.net/deleted-com-domains/?start= {} &ftlds[]=2&ftlds[]=3&ftlds[]=4&falexa= {} ' . format ( i , alexa ) )
2018-05-06 18:34:55 +00:00
2017-06-06 20:38:10 +00:00
for url in urls :
2017-06-04 19:11:41 +00:00
2017-06-06 20:38:10 +00:00
print ( " [*] {} " . format ( url ) )
2017-03-01 11:17:15 +00:00
2017-06-06 20:38:10 +00:00
# Annoyingly when querying specific keywords the expireddomains.net site requires additional cookies which
# are set in JavaScript and not recognized by Requests so we add them here manually.
# May not be needed, but the _pk_id.10.dd0a cookie only requires a single . to be successful
# In order to somewhat match a real cookie, but still be different, random integers are introduced
2017-06-04 19:11:41 +00:00
2017-06-06 20:38:10 +00:00
r1 = random . randint ( 100000 , 999999 )
2017-03-01 15:50:42 +00:00
2017-06-14 00:06:36 +00:00
# Known good example _pk_id.10.dd0a cookie: 5abbbc772cbacfb1.1496760705.2.1496760705.1496760705
pk_str = ' 5abbbc772cbacfb1 ' + ' .1496 ' + str ( r1 ) + ' .2.1496 ' + str ( r1 ) + ' .1496 ' + str ( r1 )
2017-03-01 15:50:42 +00:00
2017-06-06 20:38:10 +00:00
jar = requests . cookies . RequestsCookieJar ( )
jar . set ( ' _pk_ses.10.dd0a ' , ' * ' , domain = ' expireddomains.net ' , path = ' / ' )
2017-06-14 00:06:36 +00:00
jar . set ( ' _pk_id.10.dd0a ' , pk_str , domain = ' expireddomains.net ' , path = ' / ' )
2017-06-06 20:38:10 +00:00
domainrequest = s . get ( url , headers = headers , verify = False , cookies = jar )
2017-06-14 00:06:36 +00:00
#domainrequest = s.get(url,headers=headers,verify=False,cookies=jar,proxies=proxies)
2017-06-06 20:38:10 +00:00
domains = domainrequest . text
2018-05-06 18:34:55 +00:00
2017-06-06 20:38:10 +00:00
# Turn the HTML into a Beautiful Soup object
2018-05-06 18:34:55 +00:00
soup = BeautifulSoup ( domains , ' lxml ' )
#print(soup)
2017-06-06 20:38:10 +00:00
try :
2018-04-11 12:46:15 +00:00
table = soup . find ( " table " )
2017-06-06 20:38:10 +00:00
for row in table . findAll ( ' tr ' ) [ 1 : ] :
# Alternative way to extract domain name
# domain = row.find('td').find('a').text
cells = row . findAll ( " td " )
if len ( cells ) > = 1 :
2018-04-11 12:46:15 +00:00
if keyword :
2017-06-04 19:11:41 +00:00
2017-03-01 15:50:42 +00:00
c0 = row . find ( ' td ' ) . find ( ' a ' ) . text # domain
c1 = cells [ 1 ] . find ( text = True ) # bl
c2 = cells [ 2 ] . find ( text = True ) # domainpop
c3 = cells [ 3 ] . find ( text = True ) # birth
2017-06-20 19:42:14 +00:00
c4 = cells [ 4 ] . find ( text = True ) # Archive.org entries
2017-03-01 15:50:42 +00:00
c5 = cells [ 5 ] . find ( text = True ) # similarweb
c6 = cells [ 6 ] . find ( text = True ) # similarweb country code
2017-06-20 19:42:14 +00:00
c7 = cells [ 7 ] . find ( text = True ) # Dmoz.org
2017-03-01 15:50:42 +00:00
c8 = cells [ 8 ] . find ( text = True ) # status com
c9 = cells [ 9 ] . find ( text = True ) # status net
c10 = cells [ 10 ] . find ( text = True ) # status org
c11 = cells [ 11 ] . find ( text = True ) # status de
c12 = cells [ 12 ] . find ( text = True ) # tld registered
2018-05-06 18:34:55 +00:00
c13 = cells [ 13 ] . find ( text = True ) # Source List
c14 = cells [ 14 ] . find ( text = True ) # Domain Status
c15 = " " # Related Domains
2017-06-20 19:42:14 +00:00
2018-05-17 10:08:17 +00:00
# Non-keyword search table format is slightly different
2017-06-06 20:38:10 +00:00
else :
2017-03-02 11:32:43 +00:00
c0 = cells [ 0 ] . find ( text = True ) # domain
c1 = cells [ 1 ] . find ( text = True ) # bl
c2 = cells [ 2 ] . find ( text = True ) # domainpop
c3 = cells [ 3 ] . find ( text = True ) # birth
2017-06-20 19:42:14 +00:00
c4 = cells [ 4 ] . find ( text = True ) # Archive.org entries
2017-03-02 11:32:43 +00:00
c5 = cells [ 5 ] . find ( text = True ) # similarweb
c6 = cells [ 6 ] . find ( text = True ) # similarweb country code
2017-06-20 19:42:14 +00:00
c7 = cells [ 7 ] . find ( text = True ) # Dmoz.org
2017-03-02 11:32:43 +00:00
c8 = cells [ 8 ] . find ( text = True ) # status com
c9 = cells [ 9 ] . find ( text = True ) # status net
c10 = cells [ 10 ] . find ( text = True ) # status org
c11 = cells [ 11 ] . find ( text = True ) # status de
c12 = cells [ 12 ] . find ( text = True ) # tld registered
c13 = cells [ 13 ] . find ( text = True ) # changes
c14 = cells [ 14 ] . find ( text = True ) # whois
2017-06-06 20:38:10 +00:00
available = ' '
if c8 == " available " :
available + = " .com "
if c9 == " available " :
available + = " .net "
if c10 == " available " :
available + = " .org "
if c11 == " available " :
available + = " .de "
2018-05-06 18:34:55 +00:00
# Only grab status for keyword searches since it doesn't exist otherwise
2017-06-06 20:38:10 +00:00
status = " "
2018-05-06 18:34:55 +00:00
if keyword :
status = c14
2018-05-17 10:08:17 +00:00
# Append parsed domain data to list if it matches our criteria (.com|.net|.org and not a known malware domain)
if ( c0 . lower ( ) . endswith ( " .com " ) or c0 . lower ( ) . endswith ( " .net " ) or c0 . lower ( ) . endswith ( " .org " ) ) and ( c0 not in maldomainsList ) :
domain_list . append ( [ c0 , c3 , c4 , available , status ] )
2018-05-06 18:34:55 +00:00
2018-04-07 15:53:58 +00:00
except Exception as e :
2018-05-17 10:08:17 +00:00
#print(e)
2018-04-11 12:46:15 +00:00
pass
2018-05-06 18:34:55 +00:00
# Add additional sleep on requests to ExpiredDomains.net to avoid errors
time . sleep ( 5 )
2018-05-17 10:08:17 +00:00
# Check for valid list results before continuing
2018-05-06 18:34:55 +00:00
if len ( domain_list ) == 0 :
2018-05-17 10:08:17 +00:00
print ( " [-] No domain results found or none are currently available for purchase! " )
2018-05-06 18:34:55 +00:00
exit ( 0 )
2018-05-17 10:08:17 +00:00
else :
if check :
print ( " \n [*] Performing reputation checks for {} domains " . format ( len ( domain_list ) ) )
for domain_entry in domain_list :
domain = domain_entry [ 0 ]
birthdate = domain_entry [ 1 ]
archiveentries = domain_entry [ 2 ]
availabletlds = domain_entry [ 3 ]
status = domain_entry [ 4 ]
bluecoat = ' '
ibmxforce = ' '
ciscotalos = ' '
# Perform domain reputation checks
if check :
bluecoat = checkBluecoat ( domain )
print ( " [+] {} : {} " . format ( domain , bluecoat ) )
ibmxforce = checkIBMXForce ( domain )
print ( " [+] {} : {} " . format ( domain , ibmxforce ) )
ciscotalos = checkTalos ( domain )
print ( " [+] {} : {} " . format ( domain , ciscotalos ) )
# Sleep to avoid captchas
doSleep ( timing )
# Mark reputation checks as skipped if -c flag not present
else :
bluecoat = ' - '
ibmxforce = ' - '
ciscotalos = ' - '
# Append entry to new list with reputation if at least one service reports reputation
2018-09-17 09:00:39 +00:00
if not ( ( bluecoat in ( ' Uncategorized ' , ' badurl ' , ' Suspicious ' , ' Malicious Sources/Malnets ' , ' captcha ' , ' Phishing ' ) ) and ibmxforce == " Not found. " and ciscotalos == " Uncategorized " ) :
2018-05-17 10:08:17 +00:00
data . append ( [ domain , birthdate , archiveentries , availabletlds , status , bluecoat , ibmxforce , ciscotalos ] )
2018-04-11 12:46:15 +00:00
2017-03-01 11:17:15 +00:00
# Sort domain list by column 2 (Birth Year)
2018-05-17 10:08:17 +00:00
sortedDomains = sorted ( data , key = lambda x : x [ 1 ] , reverse = True )
if len ( sortedDomains ) == 0 :
print ( " \n [-] No domains discovered with a desireable categorization! " )
exit ( 0 )
else :
print ( " \n [*] {} of {} domains discovered with a potentially desireable categorization! " . format ( len ( sortedDomains ) , len ( domain_list ) ) )
2017-03-01 11:17:15 +00:00
# Build HTML Table
html = ' '
htmlHeader = ' <html><head><title>Expired Domain List</title></head> '
htmlBody = ' <body><p>The following available domains report was generated at {} </p> ' . format ( timestamp )
htmlTableHeader = '''
< table border = " 1 " align = " center " >
< th > Domain < / th >
< th > Birth < / th >
< th > Entries < / th >
< th > TLDs Available < / th >
2017-06-06 20:38:10 +00:00
< th > Status < / th >
2018-05-06 18:34:55 +00:00
< th > BlueCoat < / th >
< th > IBM X - Force < / th >
< th > Cisco Talos < / th >
2017-03-01 11:17:15 +00:00
< th > WatchGuard < / th >
< th > Namecheap < / th >
< th > Archive . org < / th >
'''
htmlTableBody = ' '
htmlTableFooter = ' </table> '
htmlFooter = ' </body></html> '
# Build HTML table contents
2018-05-06 18:34:55 +00:00
for i in sortedDomains :
2017-03-01 11:17:15 +00:00
htmlTableBody + = ' <tr> '
htmlTableBody + = ' <td> {} </td> ' . format ( i [ 0 ] ) # Domain
htmlTableBody + = ' <td> {} </td> ' . format ( i [ 1 ] ) # Birth
htmlTableBody + = ' <td> {} </td> ' . format ( i [ 2 ] ) # Entries
htmlTableBody + = ' <td> {} </td> ' . format ( i [ 3 ] ) # TLDs
2017-06-06 20:38:10 +00:00
htmlTableBody + = ' <td> {} </td> ' . format ( i [ 4 ] ) # Status
2018-05-17 10:08:17 +00:00
htmlTableBody + = ' <td><a href= " https://sitereview.bluecoat.com/ " target= " _blank " > {} </a></td> ' . format ( i [ 5 ] ) # Bluecoat
htmlTableBody + = ' <td><a href= " https://exchange.xforce.ibmcloud.com/url/ {} " target= " _blank " > {} </a></td> ' . format ( i [ 0 ] , i [ 6 ] ) # IBM x-Force Categorization
htmlTableBody + = ' <td><a href= " https://www.talosintelligence.com/reputation_center/lookup?search= {} " target= " _blank " > {} </a></td> ' . format ( i [ 0 ] , i [ 7 ] ) # Cisco Talos
2017-03-01 11:17:15 +00:00
htmlTableBody + = ' <td><a href= " http://www.borderware.com/domain_lookup.php?ip= {} " target= " _blank " >WatchGuard</a></td> ' . format ( i [ 0 ] ) # Borderware WatchGuard
htmlTableBody + = ' <td><a href= " https://www.namecheap.com/domains/registration/results.aspx?domain= {} " target= " _blank " >Namecheap</a></td> ' . format ( i [ 0 ] ) # Namecheap
htmlTableBody + = ' <td><a href= " http://web.archive.org/web/*/ {} " target= " _blank " >Archive.org</a></td> ' . format ( i [ 0 ] ) # Archive.org
htmlTableBody + = ' </tr> '
html = htmlHeader + htmlBody + htmlTableHeader + htmlTableBody + htmlTableFooter + htmlFooter
logfilename = " {} _domainreport.html " . format ( timestamp )
log = open ( logfilename , ' w ' )
log . write ( html )
log . close
print ( " \n [*] Search complete " )
2017-06-06 20:38:10 +00:00
print ( " [*] Log written to {} \n " . format ( logfilename ) )
2018-04-07 15:53:58 +00:00
# Print Text Table
2018-05-06 18:34:55 +00:00
header = [ ' Domain ' , ' Birth ' , ' # ' , ' TLDs ' , ' Status ' , ' BlueCoat ' , ' IBM ' , ' Cisco Talos ' ]
print ( drawTable ( header , sortedDomains ) )