Vulny-Code-Static-Analysis/detection.py

178 lines
6.7 KiB
Python

#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import re
import math
from indicators import *
from functions import *
result_count = 0
result_files = 0
# Compute a Shannon entropy for a string based on an iterator
def shannon_entropy(data, iterator):
"""
Borrowed from http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html
"""
if not data:
return 0
entropy = 0
for x in iterator:
p_x = float(data.count(x))/len(data)
if p_x > 0:
entropy += - p_x*math.log(p_x, 2)
return entropy
# Analyse the source code of a single page
def analysis(path, plain):
global result_count
global result_files
result_files += 1
with open(path, 'r', encoding='utf-8', errors='replace') as content_file:
# Clean source for a better detection
content = content_file.read()
content = clean_source_and_format(content)
# Hardcoded credentials (work as an exception, it's not function based)
credz = ['pass', 'secret', 'token', 'pwd']
for credential in credz:
content_pure = content.replace(' ', '')
# detect all variables
regex_var_detect = "\$[\w\s]+\s?=\s?[\"|'].*[\"|']|define\([\"|'].*[\"|']\)"
regex = re.compile(regex_var_detect , re.I)
matches = regex.findall(content_pure)
# If we find a variable with a constant for a given indicator
for vuln_content in matches:
if credential in vuln_content.lower():
payload = ["", "Hardcoded Credential", []]
add_vuln_var(payload, plain, path, vuln_content, content, regex_var_detect)
# High Entropy String
content_pure = content.replace(' ', '')
regex_var_detect = ".*?=\s?[\"|'].*?[\"|'].*?"
regex = re.compile(regex_var_detect , re.I)
matches = regex.findall(content_pure)
BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
HEX_CHARS = "1234567890abcdefABCDEF"
for vuln_content in matches:
payload = ["", "High Entropy String", []]
if shannon_entropy(vuln_content, BASE64_CHARS) >= 4.1 or \
shannon_entropy(vuln_content, HEX_CHARS) >= 2.5:
add_vuln_var(payload, plain, path, vuln_content, content, regex_var_detect)
# Detection of RCE/SQLI/LFI/RFI/RFU/XSS/...
for payload in payloads:
regex = re.compile(payload[0] + regex_indicators)
matches = regex.findall(content.replace(" ", "(PLACEHOLDER"))
for vuln_content in matches:
# Handle "require something" vs "require(something)"
# Dirty trick to force a parenthesis before the function's argument
vuln_content = list(vuln_content)
for i in range(len(vuln_content)):
vuln_content[i] = vuln_content[i].replace("(PLACEHOLDER", " ")
vuln_content[i] = vuln_content[i].replace("PLACEHOLDER", "")
occurence = 0
# Security hole detected, is it protected ?
if not check_protection(payload[2], vuln_content):
declaration_text, line = "", ""
# Managing multiple variable in a single line/function
sentence = "".join(vuln_content)
regex = re.compile(regex_indicators[2:-2])
for vulnerable_var in regex.findall(sentence):
false_positive = False
occurence += 1
# No declaration for $_GET, $_POST ...
if not check_exception(vulnerable_var[1]):
# Look for the declaration of $something = xxxxx
false_positive, declaration_text, line = check_declaration(
content,
vulnerable_var[1],
path)
# Set false positive if protection is in the variable's declaration
is_protected = check_protection(payload[2], declaration_text)
false_positive = is_protected if is_protected else false_positive
# Display all the vuln
line_vuln = find_line_vuln(payload, vuln_content, content)
# Check for not $dest="constant"; $dest='cste'; $dest=XX;
if "$_" not in vulnerable_var[1]:
if "$" not in declaration_text.replace(vulnerable_var[1], ''):
false_positive = True
if not false_positive:
result_count = result_count + 1
display(path, payload, vuln_content, line_vuln, declaration_text, line, vulnerable_var[1], occurence, plain)
# Run thru every files and subdirectories
def recursive(dir, progress, plain):
progress += 1
progress_indicator = ''
if plain:
progress_indicator = ""
try:
for name in os.listdir(dir):
print('\tAnalyzing : ' + progress_indicator * progress + '\r', end="\r"),
# Targetting only PHP Files
if os.path.isfile(os.path.join(dir, name)):
if ".php" in os.path.join(dir, name):
analysis(dir + "/" + name, plain)
else:
recursive(dir + "/" + name, progress, plain)
except OSError as e:
print("Error 404 - Not Found, maybe you need more right ?" + " " * 30)
exit(-1)
# Display basic informations about the scan
def scanresults():
global result_count
global result_files
print("Found {} vulnerabilities in {} files".format(result_count, result_files))
def add_vuln_var(payload, plain, path, vuln_content, page_content, regex_var_detect, occurence=1):
# Get the line of the vulnerability
line_vuln = -1
splitted_content = page_content.split('\n')
for i in range(len(splitted_content)):
regex = re.compile(regex_var_detect, re.I)
matches = regex.findall(splitted_content[i])
if len(matches) > 0:
line_vuln = i
# display the result
display(
path, # path
payload, # payload
vuln_content, # vulnerability
line_vuln, # line
vuln_content, # declaration_text
str(line_vuln), # declaration_line
vuln_content, # colored
occurence, # occurence
plain # plain
)
# increment the global vulnerability count
global result_count
result_count = result_count + 1