Update Reconix.py

Added pages Option
main
ShadowByte 2024-08-20 12:04:11 +10:00 committed by GitHub
parent 8804ba2c98
commit 9013ef07f5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 42 additions and 19 deletions

View File

@ -45,7 +45,7 @@ def wait_for_user_acceptance():
continue continue
# Function to scrape Google search results for a given dork # Function to scrape Google search results for a given dork
def scrape_dork(dork): def scrape_dork(dork, max_pages):
try: try:
print(f"Processing dork: {dork.strip()}") # Debugging line print(f"Processing dork: {dork.strip()}") # Debugging line
# Construct the Google search URL # Construct the Google search URL
@ -71,23 +71,42 @@ def scrape_dork(dork):
# Wait for the user to accept any prompts # Wait for the user to accept any prompts
wait_for_user_acceptance() wait_for_user_acceptance()
# Manually iterate over search result links page_count = 0
links = driver.find_elements(By.XPATH, "//a[@href]") while True:
print(f"Found {len(links)} links.") # Debugging line # Manually iterate over search result links
links = driver.find_elements(By.XPATH, "//a[@href]")
print(f"Found {len(links)} links on page {page_count + 1}.") # Debugging line
with open(output_file, 'a') as f: with open(output_file, 'a') as f:
for link in links: for link in links:
try: try:
url = link.get_attribute("href") url = link.get_attribute("href")
if is_valid_url(url): if is_valid_url(url):
# Capture URLs that do not contain 'google' # Capture URLs that do not contain 'google'
print(f"Saving URL: {url}") # Output to console print(f"Saving URL: {url}") # Output to console
f.write(url + "\n") # Write to file f.write(url + "\n") # Write to file
else: else:
print(f"Skipping URL: {url}") # Skip Google-related URLs print(f"Skipping URL: {url}") # Skip Google-related URLs
except NoSuchElementException: except NoSuchElementException:
print("Element not found. Skipping...") print("Element not found. Skipping...")
continue continue
# Increment page counter
page_count += 1
# Check if we've reached the max number of pages
if page_count >= max_pages:
print(f"Reached the maximum number of pages ({max_pages}) for this dork.")
break
# Check if there's a "Next" button to go to the next page
try:
next_button = driver.find_element(By.XPATH, "//a[@id='pnnext']")
next_button.click()
time.sleep(3) # Wait for the next page to load
except NoSuchElementException:
print("No more pages left.")
break
except Exception as e: except Exception as e:
print(f"An error occurred for dork: {dork} -> {e}") print(f"An error occurred for dork: {dork} -> {e}")
@ -97,18 +116,22 @@ def main():
parser = argparse.ArgumentParser(description="Google Dork Scraper") parser = argparse.ArgumentParser(description="Google Dork Scraper")
parser.add_argument("-D", "--dork", help="Single Google dork to use", required=False) parser.add_argument("-D", "--dork", help="Single Google dork to use", required=False)
parser.add_argument("-F", "--file", help="File containing a list of Google dorks", required=False) parser.add_argument("-F", "--file", help="File containing a list of Google dorks", required=False)
parser.add_argument("-P", "--pages", help="Maximum number of pages to scrape per dork (default: 5, max: 30)", type=int, default=5)
args = parser.parse_args() args = parser.parse_args()
# Validate the number of pages
max_pages = min(max(1, args.pages), 30)
# Check if the user provided a dork or a file # Check if the user provided a dork or a file
if args.dork: if args.dork:
scrape_dork(args.dork) scrape_dork(args.dork, max_pages)
elif args.file: elif args.file:
if os.path.isfile(args.file): if os.path.isfile(args.file):
with open(args.file, 'r') as file: with open(args.file, 'r') as file:
dorks = file.readlines() dorks = file.readlines()
for dork in dorks: for dork in dorks:
scrape_dork(dork) scrape_dork(dork, max_pages)
time.sleep(10) # Sleep to prevent being flagged time.sleep(10) # Sleep to prevent being flagged
else: else:
print(f"File {args.file} does not exist.") print(f"File {args.file} does not exist.")