robust/robust.py

67 lines
1.8 KiB
Python
Raw Normal View History

2022-10-10 18:33:39 +00:00
import requests
from duckduckgo_search import ddg
import tldextract
import numpy as np
import threading
2022-11-17 04:58:45 +00:00
import argparse
2022-10-10 18:33:39 +00:00
import re
2022-11-17 04:58:45 +00:00
def parser():
parser = argparse.ArgumentParser(description="robust args")
parser.add_argument("--url")
parser.add_argument("--codes",nargs="+")
parser.add_argument("-nosplit")
2022-11-17 06:29:22 +00:00
parser.add_argument("-cookie",nargs="+")
parser.add_argument("-uagent",nargs="+")
2022-11-17 04:58:45 +00:00
args = parser.parse_args()
runrobust(args)
def runrobust(args):
found=[]
def bust(href):
2022-11-17 06:29:22 +00:00
headers={"cookie":" ".join(args.cookie),"user-agent":" ".join(args.uagent)}
2022-11-17 04:58:45 +00:00
try:
r=requests.get(href)
splitl=r.text.split("\n")
splitl=list(set(splitl))
for line in splitl:
if "Disallow: " in line:
if args.nosplit:
lnsplt=line
else:
lnsplt=line.replace("Disallow: ","").split("/")
for entry in lnsplt:
2022-11-17 05:09:12 +00:00
entry=entry.split("?")[0].replace("*","")
entry=entry.split("&")[0]
2022-11-17 04:58:45 +00:00
if not re.match(entry, "\s") and not tldextract.extract(href).domain.split(".")[0] in entry.lower() and len(entry) < 25 and not entry in found and not "#" in entry:
2022-11-17 06:29:22 +00:00
r=requests.get(args.url+"/"+entry, allow_redirects=False,headers=headers)
2022-11-17 04:58:45 +00:00
if str(r.status_code) in args.codes:
print("found: "+entry+" ["+str(r.status_code)+"]")
found.append(entry)
except:
pass
def split(a,n):
return(np.array_split(a, n))
with open("banner.txt","r") as banner:
print(banner.read())
keywords = "filetype:TXT +inurl:\"robots.txt\""
results = ddg(keywords, max_results=250)
results=(href for href in (res["href"] for res in results))
results=[*results]
results=split(results,5)
for chunk in results:
threads=[]
for href in chunk:
t=threading.Thread(target=bust, args=([href]))
threads.append(t)
t.start()
for t in threads:
t.join()
2022-11-17 06:55:29 +00:00
2022-11-17 04:58:45 +00:00
parser()