metasploit-framework/lib/rex/exploitation/jsobfu.rb

489 lines
12 KiB
Ruby

##
# $Id$
##
require 'rex/text'
require 'rkelly'
module Rex
module Exploitation
#
# Obfuscate JavaScript by randomizing as much as possible and removing
# easily-signaturable string constants.
#
# Example:
# js = ::Rex::Exploitation::JSObfu.new %Q|
# var a = "0\\612\\063\\x34\\x35\\x36\\x37\\x38\\u0039";
# var b = { foo : "foo", bar : "bar" }
# alert(a);
# alert(b.foo);
# |
# js.obfuscate
# puts js
# Example Output:
# var VwxvESbCgv = String.fromCharCode(0x30,0x31,062,063,064,53,0x36,067,070,0x39);
# var ToWZPn = {
# "\146\157\x6f": (function () { var yDyv="o",YnCL="o",Qcsa="f"; return Qcsa+YnCL+yDyv })(),
# "\142ar": String.fromCharCode(0142,97,0162)
# };
# alert(VwxvESbCgv);
# alert(ToWZPn.foo);
#
# NOTE: Variables MUST be declared with a 'var' statement BEFORE first use (or
# not at all) for this to generate correct code! If variables are not declared
# they will not be randomized but the generated code will be correct.
#
# Bad Example Javascript:
# a = "asdf"; // this variable hasn't been declared and will not be randomized
# var a;
# alert(a); // real js engines will alert "asdf" here
# Bad Example Obfuscated:
# a = (function () { var hpHu="f",oyTm="asd"; return oyTm+hpHu })();
# var zSrnHpEfJZtg;
# alert(zSrnHpEfJZtg);
# Notice that the first usage of +a+ (before it was declared) is not
# randomized. Thus, the obfuscated version will alert 'undefined' instead of
# "asdf".
#
class JSObfu
#
# Abstract Syntax Tree generated by RKelly::Parser#parse
#
attr_reader :ast
#
# Saves +code+ for later obfuscation with #obfuscate
#
def initialize(code)
@code = code
@funcs = {}
@vars = {}
@debug = false
end
#
# Add +str+ to the un-obfuscated code.
#
# Calling this method after #obfuscate is undefined
#
def <<(str)
@code << str
end
#
# Return the (possibly obfuscated) code as a string.
#
# If #obfuscate has not been called before this, returns the parsed,
# unobfuscated code. This can be useful for example to remove comments and
# standardize spacing.
#
def to_s
parse if not @ast
@ast.to_ecma
end
#
# Return the obfuscated name of a symbol
#
# You MUST call #obfuscate before this method!
#
def sym(lookup)
if @vars[lookup]
ret = @vars[lookup]
elsif @funcs[lookup]
ret = @funcs[lookup]
else
ret = lookup
end
ret
end
#
# Parse and obfuscate
#
def obfuscate
parse
obfuscate_r(@ast)
end
protected
#
# Recursive method to obfuscate the given +ast+.
#
# +ast+ should be the result of RKelly::Parser#parse
#
def obfuscate_r(ast)
ast.each do |node|
#if node.respond_to? :value and node.value.kind_of? String and node.value =~ /bodyOnLoad/i
# $stdout.puts("bodyOnLoad: #{node.class}: #{node.value}")
#end
case node
when nil
nil
when ::RKelly::Nodes::SourceElementsNode
# Recurse
obfuscate_r(node.value)
#when ::RKelly::Nodes::ObjectLiteralNode
# TODO
#$stdout.puts(node.methods - Object.new.methods)
#$stdout.puts(node.value.inspect)
when ::RKelly::Nodes::PropertyNode
# Property names must be bare words or string literals NOT
# expressions! Can't use transform_string() here
if node.name =~ /^[a-zA-Z_][a-zA-Z0-9_]*$/
n = '"'
node.name.unpack("C*") { |c|
case rand(3)
when 0; n << "\\x%02x"%(c)
when 1; n << "\\#{c.to_s 8}"
when 2; n << [c].pack("C")
end
}
n << '"'
node.name = n
end
# Variables
when ::RKelly::Nodes::VarDeclNode
if @vars[node.name].nil?
#@vars[node.name] = "var_#{Rex::Text.rand_text_alpha(3+rand(12))}_#{node.name}"
@vars[node.name] = "#{Rex::Text.rand_text_alpha(3+rand(12))}"
end
node.name = @vars[node.name]
when ::RKelly::Nodes::ParameterNode
if @vars[node.value].nil?
#@vars[node.value] = "param_#{Rex::Text.rand_text_alpha(3+rand(12))}_#{node.value}"
@vars[node.value] = "#{Rex::Text.rand_text_alpha(3+rand(12))}"
end
node.value = @vars[node.value]
when ::RKelly::Nodes::ResolveNode
#$stdout.puts("Resolve bodyOnload: #{@vars[node.value]}") if "bodyOnLoad" == node.value
node.value = @vars[node.value] if @vars[node.value]
when ::RKelly::Nodes::DotAccessorNode
case node.value
when ::RKelly::Nodes::ResolveNode
if @vars[node.value.value]
node.value.value = @vars[node.value.value]
end
#else
# $stderr.puts("Non-resolve node as target of dotaccessor: #{node.value.class}")
end
# Functions
when ::RKelly::Nodes::FunctionDeclNode
#$stdout.puts("FunctionDecl: #{node.value}")
# Functions can also act as objects, so store them in the vars
# and the functions list so we can replace them in both places
if @funcs[node.value].nil? and not @funcs.values.include?(node.value)
#@funcs[node.value] = "func_#{Rex::Text.rand_text_alpha(3+rand(12))}_#{node.value}"
@funcs[node.value] = "#{Rex::Text.rand_text_alpha(3+rand(12))}"
if @vars[node.value].nil?
@vars[node.value] = @funcs[node.value]
end
node.value = @funcs[node.value]
end
when ::RKelly::Nodes::FunctionCallNode
# The value of a FunctionCallNode is some sort of accessor node or a ResolveNode
# so this is basically useless
#$stdout.puts("Function call: #{node.name} => #{@funcs[node.name]}")
#node.value = @funcs[node.value] if @funcs[node.value]
# Transformers
when ::RKelly::Nodes::NumberNode
node.value = transform_number(node.value)
when ::RKelly::Nodes::StringNode
node.value = transform_string(node.value)
else
#$stderr.puts "#{node.class}: #{node.value}"
#$stderr.puts "#{node.class}"
end
#unless node.kind_of? ::RKelly::Nodes::SourceElementsNode
# $stderr.puts "#{node.class}: #{node.value}"
#end
end
nil
end
#
# Generate an Abstract Syntax Tree (#ast) for later obfuscation
#
def parse
parser = RKelly::Parser.new
@ast = parser.parse(@code)
end
#
# Convert a number to a random base (decimal, octal, or hexedecimal).
#
# Given 10 as input, the possible return values are:
# "10"
# "0xa"
# "012"
#
def rand_base(num)
case rand(3)
when 0; num.to_s
when 1; "0%o" % num
when 2; "0x%x" % num
end
end
#
# Return a mathematical expression that will evaluate to the given number
# +num+.
#
# +num+ can be a float or an int, but should never be negative.
#
def transform_number(num)
case num
when Fixnum
if num == 0
r = rand(10) + 1
transformed = "('#{Rex::Text.rand_text_alpha(r)}'.length - #{r})"
elsif num > 0 and num < 10
# use a random string.length for small numbers
transformed = "'#{Rex::Text.rand_text_alpha(num)}'.length"
else
transformed = "("
divisor = rand(num) + 1
a = num / divisor.to_i
b = num - (a * divisor)
# recurse half the time for a
a = (rand(2) == 0) ? transform_number(a) : rand_base(a)
# recurse half the time for divisor
divisor = (rand(2) == 0) ? transform_number(divisor) : rand_base(divisor)
transformed << "#{a}*#{divisor}"
transformed << "+#{b}"
transformed << ")"
end
when Float
transformed = "(#{num - num.floor} + #{rand_base(num.floor)})"
end
#puts("#{num} == #{transformed}")
transformed
end
#
# Convert a javascript string into something that will generate that string.
#
# Randomly calls one of the +transform_string_*+ methods
#
def transform_string(str)
quote = str[0,1]
# pull off the quotes
str = str[1,str.length - 2]
return quote*2 if str.length == 0
case rand(2)
when 0
transformed = transform_string_split_concat(str, quote)
when 1
transformed = transform_string_fromCharCode(str)
#when 2
# # Currently no-op
# transformed = transform_string_unescape(str)
end
#$stderr.puts "Obfuscating str: #{str.ljust 30} #{transformed}"
transformed
end
#
# Split a javascript string, +str+, without breaking escape sequences.
#
# The maximum length of each piece of the string is half the total length
# of the string, ensuring we (almost) always split into at least two
# pieces. This won't always be true when given a string like "AA\x41",
# where escape sequences artificially increase the total length (escape
# sequences are considered a single character).
#
# Returns an array of two-element arrays. The zeroeth element is a
# randomly generated variable name, the first is a piece of the string
# contained in +quote+s.
#
# See #escape_length
#
def safe_split(str, quote)
parts = []
max_len = str.length / 2
while str.length > 0
len = 0
loop do
e_len = escape_length(str[len..-1])
e_len = 1 if e_len.nil?
len += e_len
# if we've reached the end of the string, bail
break unless str[len]
break if len > max_len
# randomize the length of each part
break if (rand(4) == 0)
end
part = str.slice!(0, len)
var = Rex::Text.rand_text_alpha(4)
parts.push( [ var, "#{quote}#{part}#{quote}" ] )
end
parts
end
#
# Stolen from obfuscatejs.rb
#
# Determines the length of an escape sequence
#
def escape_length(str)
esc_len = nil
if str[0,1] == "\\"
case str[1,1]
when "u"; esc_len = 6 # unicode \u1234
when "x"; esc_len = 4 # hex, \x41
when /[0-7]/ # octal, \123, \0
str[1,3] =~ /([0-7]{1,3})/
if $1.to_i(8) > 255
str[1,3] =~ /([0-7]{1,2})/
end
esc_len = 1 + $1.length
else; esc_len = 2 # \" \n, etc.
end
end
esc_len
end
#
# Split a javascript string, +str+, into multiple randomly-ordered parts
# and return an anonymous javascript function that joins them in the
# correct order. This method can be called safely on strings containing
# escape sequences. See #safe_split.
#
def transform_string_split_concat(str, quote)
parts = safe_split(str, quote)
func = "(function () { var "
ret = "; return "
parts.sort { |a,b| rand }.each do |part|
func << "#{part[0]}=#{part[1]},"
end
func.chop!
ret << parts.map{|part| part[0]}.join("+")
final = func + ret + " })()"
final
end
# TODO
#def transform_string_unescape(str)
# str
#end
#
# Return a call to String.fromCharCode() with each char of the input as arguments
#
# Example:
# input : "A\n"
# output: String.fromCharCode(0x41, 10)
#
def transform_string_fromCharCode(str)
buf = "String.fromCharCode("
bytes = str.unpack("C*")
len = 0
while str.length > 0
if str[0,1] == "\\"
str.slice!(0,1)
# then this is an escape sequence and we need to deal with all
# the special cases
case str[0,1]
# For chars that contain their non-escaped selves, step past
# the backslash and let the rand_base() below decide how to
# represent the character.
when '"', "'", "\\", " "
char = str.slice!(0,1).unpack("C").first
# For symbolic escapes, use the known value
when "n"; char = 0x0a; str.slice!(0,1)
when "t"; char = 0x09; str.slice!(0,1)
# Lastly, if it's a hex, unicode, or octal escape, pull out the
# real value and use that
when "x"
# Strip the x
str.slice!(0,1)
char = str.slice!(0,2).to_i 16
when "u"
# This can potentially lose information in the case of
# characters like \u0041, but since regular ascii is stored
# as unicode internally, String.fromCharCode(0x41) will be
# represented as 00 41 in memory anyway, so it shouldn't
# matter.
str.slice!(0,1)
char = str.slice!(0,4).to_i 16
when /[0-7]/
# Octals are a bit harder since they are variable width and
# don't necessarily mean what you might think. For example,
# "\61" == "1" and "\610" == "10". 610 is a valid octal
# number, but not a valid ascii character. Javascript will
# interpreter as much as it can as a char and use the rest
# as a literal. Boo.
str =~ /([0-7]{1,3})/
char = $1.to_i 8
if char > 255
str =~ /([0-7]{1,2})/
char = $1.to_i 8
end
str.slice!(0,$1.length)
end
else
char = str.slice!(0,1).unpack("C").first
end
buf << "#{rand_base(char)},"
end
# Strip off the last comma
buf = buf[0,buf.length-1] + ")"
transformed = buf
transformed
end
end
end
end
=begin
if __FILE__ == $0
if ARGV[0]
code = File.read(ARGV[0])
else
#require 'rex/exploitation/javascriptosdetect'
#code = Rex::Exploitation::JavascriptOSDetect.new.to_s
code = <<-EOS
// Should alert "0123456789"
var a = "0\\612\\063\\x34\\x35\\x36\\x37\\x38\\u0039";
var a,b=2,c=3;
alert(a);
// should alert "asdfjkl;"
var d = (function() { var foo = "jkl;", blah = "asdf"; return blah + foo; })();
alert(d);
EOS
end
js = Rex::Exploitation::JSObfu.new(code)
js.obfuscate
puts js.to_s
end
=end