Create Reconix.py

2024-08-20 08:58:35 +10:00 · 2024-08-20 08:58:35 +10:00 · 8eb2fac3bc
parent 74d6ed7bca
commit 8eb2fac3bc
1 changed files with 106 additions and 0 deletions
--- a/Reconix.py
+++ b/Reconix.py
@ -0,0 +1,106 @@
+from selenium import webdriver
+from selenium.webdriver.firefox.options import Options
+from selenium.webdriver.common.by import By
+from selenium.common.exceptions import NoSuchElementException
+import time
+
+# File paths
+dorks_file = "dorks.txt"
+output_file = "scraped.txt"
+
+# Setup Firefox with Geckodriver
+options = Options()
+options.headless = True  # Run Firefox in headless mode
+driver = webdriver.Firefox(options=options)
+
+# Function to check for CAPTCHA
+def check_for_captcha():
+    try:
+        # Check if the CAPTCHA div is present
+        captcha_present = driver.find_element(By.ID, "captcha-form")
+        return True if captcha_present else False
+    except NoSuchElementException:
+        return False
+
+# Function to filter URLs
+def is_valid_url(url):
+    # Skip any URLs containing 'google'
+    if url and "google" not in url.lower():
+        return True
+    return False
+
+# Function to handle user acceptance prompts
+def wait_for_user_acceptance():
+    print("Please accept any cookies or agreements if prompted.")
+    accepted = False
+    while not accepted:
+        time.sleep(5)
+        try:
+            # Assume user has accepted the prompts and the content is loaded
+            driver.find_element(By.TAG_NAME, "body")  # Check if body is present
+            accepted = True
+        except NoSuchElementException:
+            print("Waiting for user to accept the prompt...")
+            continue
+
+# Function to scrape Google search results for a given dork
+def scrape_dork(dork):
+    try:
+        print(f"Processing dork: {dork.strip()}")  # Debugging line
+        # Construct the Google search URL
+        search_url = f"https://www.google.com/search?q=inurl:\"{dork.strip()}\""
+
+        # Open the Google search URL
+        driver.get(search_url)
+        time.sleep(3)  # Wait for the page to load
+
+        # Check if a CAPTCHA is present
+        if check_for_captcha():
+            print("CAPTCHA detected. Please solve it manually.")
+            solved = False
+            while not solved:
+                time.sleep(10)
+                solved = not check_for_captcha()  # Continue only if CAPTCHA is solved
+            if solved:
+                print("CAPTCHA solved. Continuing...")
+            else:
+                print("CAPTCHA not solved after waiting. Moving on.")
+                return  # Skip this dork if CAPTCHA is still present
+
+        # Wait for the user to accept any prompts
+        wait_for_user_acceptance()
+
+        # Manually iterate over search result links
+        links = driver.find_elements(By.XPATH, "//a[@href]")
+        print(f"Found {len(links)} links.")  # Debugging line
+
+        with open(output_file, 'a') as f:
+            for link in links:
+                try:
+                    url = link.get_attribute("href")
+                    if is_valid_url(url):
+                        # Capture URLs that do not contain 'google'
+                        print(f"Saving URL: {url}")  # Output to console
+                        f.write(url + "\n")  # Write to file
+                    else:
+                        print(f"Skipping URL: {url}")  # Skip Google-related URLs
+                except NoSuchElementException:
+                    print("Element not found. Skipping...")
+                    continue
+
+    except Exception as e:
+        print(f"An error occurred for dork: {dork} -> {e}")
+
+# Read dorks from the file
+with open(dorks_file, 'r') as file:
+    dorks = file.readlines()
+
+# Iterate over all dorks and scrape Google search results
+for dork in dorks:
+    scrape_dork(dork)
+    time.sleep(10)  # Sleep to prevent being flagged
+
+# Close the browser
+driver.quit()
+
+print("Scraping completed. Results are saved in scraped.txt")