dirgenerate/dirgenerate.py

35 lines
984 B
Python

import requests
from duckduckgo_search import DDGS
import tldextract
with open("banner.txt","r") as banner:
print(banner.read())
site = input("robots of (leave blank for all results): ")
if site != "":
keywords = "site:{} +filetype:TXT +inurl:\"robots.txt\"".format(site)
else:
keywords = "filetype:TXT +inurl:\"robots.txt\""
results = DDGS().text(keywords)
dirs=[]
for result in results:
url=result["href"]
try:
r=requests.get(url)
splitl=r.text.split("\n")
for line in splitl:
if "Disallow: " in line:
lnsplt=line.replace("Disallow: ","").split("/")
for entry in lnsplt:
q=entry.split("?")
entry=q[0].replace("*","")
if entry.lower() not in dirs and not entry == (" ") and not tldextract.extract(url).domain.split(".")[0] in entry.lower() and len(entry) < 25:
dirs.append(entry.lower())
with open("dirlist.txt","a") as mylist:
mylist.write(entry+"\n")
print("added entry: "+entry)
except:
pass
print("done")