#!/usr/bin/python # -*- coding: utf-8 -*- import os import re import math from indicators import * from functions import * result_count = 0 result_files = 0 # Compute a Shannon entropy for a string based on an iterator def shannon_entropy(data, iterator): """ Borrowed from http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html """ if not data: return 0 entropy = 0 for x in iterator: p_x = float(data.count(x))/len(data) if p_x > 0: entropy += - p_x*math.log(p_x, 2) return entropy # Analyse the source code of a single page def analysis(path, plain): global result_count global result_files result_files += 1 with open(path, 'r', encoding='utf-8', errors='replace') as content_file: # Clean source for a better detection content = content_file.read() content = clean_source_and_format(content) # Hardcoded credentials (work as an exception, it's not function based) credz = ['pass', 'secret', 'token', 'pwd'] for credential in credz: content_pure = content.replace(' ', '') # detect all variables regex_var_detect = "\$[\w\s]+\s?=\s?[\"|'].*[\"|']|define\([\"|'].*[\"|']\)" regex = re.compile(regex_var_detect , re.I) matches = regex.findall(content_pure) # If we find a variable with a constant for a given indicator for vuln_content in matches: if credential in vuln_content.lower(): payload = ["", "Hardcoded Credential", []] add_vuln_var(payload, plain, path, vuln_content, content, regex_var_detect) # High Entropy String content_pure = content.replace(' ', '') regex_var_detect = ".*?=\s?[\"|'].*?[\"|'].*?" regex = re.compile(regex_var_detect , re.I) matches = regex.findall(content_pure) BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" HEX_CHARS = "1234567890abcdefABCDEF" for vuln_content in matches: payload = ["", "High Entropy String", []] if shannon_entropy(vuln_content, BASE64_CHARS) >= 4.1 or \ shannon_entropy(vuln_content, HEX_CHARS) >= 2.5: add_vuln_var(payload, plain, path, vuln_content, content, regex_var_detect) # Detection of RCE/SQLI/LFI/RFI/RFU/XSS/... for payload in payloads: regex = re.compile(payload[0] + regex_indicators) matches = regex.findall(content.replace(" ", "(PLACEHOLDER")) for vuln_content in matches: # Handle "require something" vs "require(something)" # Dirty trick to force a parenthesis before the function's argument vuln_content = list(vuln_content) for i in range(len(vuln_content)): vuln_content[i] = vuln_content[i].replace("(PLACEHOLDER", " ") vuln_content[i] = vuln_content[i].replace("PLACEHOLDER", "") occurence = 0 # Security hole detected, is it protected ? if not check_protection(payload[2], vuln_content): declaration_text, line = "", "" # Managing multiple variable in a single line/function sentence = "".join(vuln_content) regex = re.compile(regex_indicators[2:-2]) for vulnerable_var in regex.findall(sentence): false_positive = False occurence += 1 # No declaration for $_GET, $_POST ... if not check_exception(vulnerable_var[1]): # Look for the declaration of $something = xxxxx false_positive, declaration_text, line = check_declaration( content, vulnerable_var[1], path) # Set false positive if protection is in the variable's declaration is_protected = check_protection(payload[2], declaration_text) false_positive = is_protected if is_protected else false_positive # Display all the vuln line_vuln = find_line_vuln(payload, vuln_content, content) # Check for not $dest="constant"; $dest='cste'; $dest=XX; if "$_" not in vulnerable_var[1]: if "$" not in declaration_text.replace(vulnerable_var[1], ''): false_positive = True if not false_positive: result_count = result_count + 1 display(path, payload, vuln_content, line_vuln, declaration_text, line, vulnerable_var[1], occurence, plain) # Run thru every files and subdirectories def recursive(dir, progress, plain): progress += 1 progress_indicator = '⬛' if plain: progress_indicator = "█" try: for name in os.listdir(dir): print('\tAnalyzing : ' + progress_indicator * progress + '\r', end="\r"), # Targetting only PHP Files if os.path.isfile(os.path.join(dir, name)): if ".php" in os.path.join(dir, name): analysis(dir + "/" + name, plain) else: recursive(dir + "/" + name, progress, plain) except OSError as e: print("Error 404 - Not Found, maybe you need more right ?" + " " * 30) exit(-1) # Display basic informations about the scan def scanresults(): global result_count global result_files print("Found {} vulnerabilities in {} files".format(result_count, result_files)) def add_vuln_var(payload, plain, path, vuln_content, page_content, regex_var_detect, occurence=1): # Get the line of the vulnerability line_vuln = -1 splitted_content = page_content.split('\n') for i in range(len(splitted_content)): regex = re.compile(regex_var_detect, re.I) matches = regex.findall(splitted_content[i]) if len(matches) > 0: line_vuln = i # display the result display( path, # path payload, # payload vuln_content, # vulnerability line_vuln, # line vuln_content, # declaration_text str(line_vuln), # declaration_line vuln_content, # colored occurence, # occurence plain # plain ) # increment the global vulnerability count global result_count result_count = result_count + 1