from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import time

# File paths
dorks_file = "dorks.txt"
output_file = "scraped.txt"

# Setup Firefox with Geckodriver
options = Options()
options.headless = True  # Run Firefox in headless mode
driver = webdriver.Firefox(options=options)

# Function to check for CAPTCHA
def check_for_captcha():
    try:
        # Check if the CAPTCHA div is present
        captcha_present = driver.find_element(By.ID, "captcha-form")
        return True if captcha_present else False
    except NoSuchElementException:
        return False

# Function to filter URLs
def is_valid_url(url):
    # Skip any URLs containing 'google'
    if url and "google" not in url.lower():
        return True
    return False

# Function to handle user acceptance prompts
def wait_for_user_acceptance():
    print("Please accept any cookies or agreements if prompted.")
    accepted = False
    while not accepted:
        time.sleep(5)
        try:
            # Assume user has accepted the prompts and the content is loaded
            driver.find_element(By.TAG_NAME, "body")  # Check if body is present
            accepted = True
        except NoSuchElementException:
            print("Waiting for user to accept the prompt...")
            continue

# Function to scrape Google search results for a given dork
def scrape_dork(dork):
    try:
        print(f"Processing dork: {dork.strip()}")  # Debugging line
        # Construct the Google search URL
        search_url = f"https://www.google.com/search?q=inurl:\"{dork.strip()}\""

        # Open the Google search URL
        driver.get(search_url)
        time.sleep(3)  # Wait for the page to load

        # Check if a CAPTCHA is present
        if check_for_captcha():
            print("CAPTCHA detected. Please solve it manually.")
            solved = False
            while not solved:
                time.sleep(10)
                solved = not check_for_captcha()  # Continue only if CAPTCHA is solved
            if solved:
                print("CAPTCHA solved. Continuing...")
            else:
                print("CAPTCHA not solved after waiting. Moving on.")
                return  # Skip this dork if CAPTCHA is still present

        # Wait for the user to accept any prompts
        wait_for_user_acceptance()

        # Manually iterate over search result links
        links = driver.find_elements(By.XPATH, "//a[@href]")
        print(f"Found {len(links)} links.")  # Debugging line

        with open(output_file, 'a') as f:
            for link in links:
                try:
                    url = link.get_attribute("href")
                    if is_valid_url(url):
                        # Capture URLs that do not contain 'google'
                        print(f"Saving URL: {url}")  # Output to console
                        f.write(url + "\n")  # Write to file
                    else:
                        print(f"Skipping URL: {url}")  # Skip Google-related URLs
                except NoSuchElementException:
                    print("Element not found. Skipping...")
                    continue

    except Exception as e:
        print(f"An error occurred for dork: {dork} -> {e}")

# Read dorks from the file
with open(dorks_file, 'r') as file:
    dorks = file.readlines()

# Iterate over all dorks and scrape Google search results
for dork in dorks:
    scrape_dork(dork)
    time.sleep(10)  # Sleep to prevent being flagged

# Close the browser
driver.quit()

print("Scraping completed. Results are saved in scraped.txt")