metasploit-framework/lib/rex/exploitation/jsobfu.rb

514 lines
14 KiB
Ruby

# -*- coding: binary -*-
require 'rex/text'
require 'rex/random_identifier_generator'
require 'rkelly'
module Rex
module Exploitation
#
# Obfuscate JavaScript by randomizing as much as possible and removing
# easily-signaturable string constants.
#
# Example:
# js = ::Rex::Exploitation::JSObfu.new %Q|
# var a = "0\\612\\063\\x34\\x35\\x36\\x37\\x38\\u0039";
# var b = { foo : "foo", bar : "bar" }
# alert(a);
# alert(b.foo);
# |
# js.obfuscate
# puts js
# Example Output:
# var VwxvESbCgv = String.fromCharCode(0x30,0x31,062,063,064,53,0x36,067,070,0x39);
# var ToWZPn = {
# "\146\157\x6f": (function () { var yDyv="o",YnCL="o",Qcsa="f"; return Qcsa+YnCL+yDyv })(),
# "\142ar": String.fromCharCode(0142,97,0162)
# };
# alert(VwxvESbCgv);
# alert(ToWZPn.foo);
#
# NOTE: Variables MUST be declared with a 'var' statement BEFORE first use (or
# not at all) for this to generate correct code! If variables are not declared
# they will not be randomized but the generated code will be correct.
#
# Bad Example Javascript:
# a = "asdf"; // this variable hasn't been declared and will not be randomized
# var a;
# alert(a); // real js engines will alert "asdf" here
# Bad Example Obfuscated:
# a = (function () { var hpHu="f",oyTm="asd"; return oyTm+hpHu })();
# var zSrnHpEfJZtg;
# alert(zSrnHpEfJZtg);
# Notice that the first usage of +a+ (before it was declared) is not
# randomized. Thus, the obfuscated version will alert 'undefined' instead of
# "asdf".
#
class JSObfu
# these keywords should never be used as a random var name
# source: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Reserved_Words
RESERVED_KEYWORDS = %w(
break case catch continue debugger default delete do else finally
for function if in instanceof new return switch this throw try
typeof var void while with class enum export extends import super
implements interface let package private protected public static yield
)
#
# Abstract Syntax Tree generated by RKelly::Parser#parse
#
attr_reader :ast
#
# Saves +code+ for later obfuscation with #obfuscate
#
def initialize(code)
@code = code
@funcs = {}
@vars = {}
@debug = false
@rand_gen = Rex::RandomIdentifierGenerator.new(
:max_length => 15,
:first_char_set => Rex::Text::Alpha+"_$",
:char_set => Rex::Text::AlphaNumeric+"_$"
)
end
#
# Add +str+ to the un-obfuscated code.
#
# Calling this method after #obfuscate is undefined
#
def <<(str)
@code << str
end
#
# Return the (possibly obfuscated) code as a string.
#
# If #obfuscate has not been called before this, returns the parsed,
# unobfuscated code. This can be useful for example to remove comments and
# standardize spacing.
#
def to_s
parse if not @ast
@ast.to_ecma
end
#
# Return the obfuscated name of a symbol
#
# You MUST call #obfuscate before this method!
#
def sym(lookup)
if @vars[lookup]
ret = @vars[lookup]
elsif @funcs[lookup]
ret = @funcs[lookup]
else
ret = lookup
end
ret
end
#
# Parse and obfuscate
#
def obfuscate
parse
obfuscate_r(@ast)
end
# @return [String] a unique random var name that is not a reserved keyword
def random_var_name
loop do
text = random_string
unless @vars.has_value?(text) or RESERVED_KEYWORDS.include?(text)
return text
end
end
end
protected
# @return [String] a random string
def random_string
@rand_gen.generate
end
#
# Recursive method to obfuscate the given +ast+.
#
# +ast+ should be the result of RKelly::Parser#parse
#
def obfuscate_r(ast)
ast.each do |node|
#if node.respond_to? :value and node.value.kind_of? String and node.value =~ /bodyOnLoad/i
# $stdout.puts("bodyOnLoad: #{node.class}: #{node.value}")
#end
case node
when nil
nil
when ::RKelly::Nodes::SourceElementsNode
# Recurse
obfuscate_r(node.value)
#when ::RKelly::Nodes::ObjectLiteralNode
# TODO
#$stdout.puts(node.methods - Object.new.methods)
#$stdout.puts(node.value.inspect)
when ::RKelly::Nodes::PropertyNode
# Property names must be bare words or string literals NOT
# expressions! Can't use transform_string() here
if node.name =~ /^[a-zA-Z_][a-zA-Z0-9_]*$/
n = '"'
node.name.unpack("C*") { |c|
case rand(3)
when 0; n << "\\x%02x"%(c)
when 1; n << "\\#{c.to_s 8}"
when 2; n << [c].pack("C")
end
}
n << '"'
node.name = n
end
# Variables
when ::RKelly::Nodes::VarDeclNode
if @vars[node.name].nil?
@vars[node.name] = random_var_name
end
node.name = @vars[node.name]
when ::RKelly::Nodes::ParameterNode
if @vars[node.value].nil?
@vars[node.value] = random_var_name
end
node.value = @vars[node.value]
when ::RKelly::Nodes::ResolveNode
#$stdout.puts("Resolve bodyOnload: #{@vars[node.value]}") if "bodyOnLoad" == node.value
node.value = @vars[node.value] if @vars[node.value]
when ::RKelly::Nodes::DotAccessorNode
case node.value
when ::RKelly::Nodes::ResolveNode
if @vars[node.value.value]
node.value.value = @vars[node.value.value]
end
#else
# $stderr.puts("Non-resolve node as target of dotaccessor: #{node.value.class}")
end
# Functions
when ::RKelly::Nodes::FunctionDeclNode
#$stdout.puts("FunctionDecl: #{node.value}")
# Functions can also act as objects, so store them in the vars
# and the functions list so we can replace them in both places
if @funcs[node.value].nil? and not @funcs.values.include?(node.value)
@funcs[node.value] = random_var_name
if @vars[node.value].nil?
@vars[node.value] = @funcs[node.value]
end
node.value = @funcs[node.value]
end
when ::RKelly::Nodes::FunctionCallNode
# The value of a FunctionCallNode is some sort of accessor node or a ResolveNode
# so this is basically useless
#$stdout.puts("Function call: #{node.name} => #{@funcs[node.name]}")
#node.value = @funcs[node.value] if @funcs[node.value]
# Transformers
when ::RKelly::Nodes::NumberNode
node.value = transform_number(node.value)
when ::RKelly::Nodes::StringNode
node.value = transform_string(node.value)
else
#$stderr.puts "#{node.class}: #{node.value}"
#$stderr.puts "#{node.class}"
end
#unless node.kind_of? ::RKelly::Nodes::SourceElementsNode
# $stderr.puts "#{node.class}: #{node.value}"
#end
end
nil
end
#
# Generate an Abstract Syntax Tree (#ast) for later obfuscation
#
def parse
parser = RKelly::Parser.new
@ast = parser.parse(@code)
end
#
# Convert a number to a random base (decimal, octal, or hexedecimal).
#
# Given 10 as input, the possible return values are:
# "10"
# "0xa"
# "012"
#
def rand_base(num)
case rand(3)
when 0; num.to_s
when 1; "0%o" % num
when 2; "0x%x" % num
end
end
#
# Return a mathematical expression that will evaluate to the given number
# +num+.
#
# +num+ can be a float or an int, but should never be negative.
#
def transform_number(num)
case num
when Fixnum
if num == 0
r = rand(10) + 1
transformed = "('#{Rex::Text.rand_text_alpha(r)}'.length - #{r})"
elsif num > 0 and num < 10
# use a random string.length for small numbers
transformed = "'#{Rex::Text.rand_text_alpha(num)}'.length"
else
transformed = "("
divisor = rand(num) + 1
a = num / divisor.to_i
b = num - (a * divisor)
# recurse half the time for a
a = (rand(2) == 0) ? transform_number(a) : rand_base(a)
# recurse half the time for divisor
divisor = (rand(2) == 0) ? transform_number(divisor) : rand_base(divisor)
transformed << "#{a}*#{divisor}"
transformed << "+#{b}"
transformed << ")"
end
when Float
transformed = "(#{num - num.floor} + #{rand_base(num.floor)})"
end
#puts("#{num} == #{transformed}")
transformed
end
#
# Convert a javascript string into something that will generate that string.
#
# Randomly calls one of the +transform_string_*+ methods
#
def transform_string(str)
quote = str[0,1]
# pull off the quotes
str = str[1,str.length - 2]
return quote*2 if str.length == 0
case rand(2)
when 0
transformed = transform_string_split_concat(str, quote)
when 1
transformed = transform_string_fromCharCode(str)
#when 2
# # Currently no-op
# transformed = transform_string_unescape(str)
end
#$stderr.puts "Obfuscating str: #{str.ljust 30} #{transformed}"
transformed
end
#
# Split a javascript string, +str+, without breaking escape sequences.
#
# The maximum length of each piece of the string is half the total length
# of the string, ensuring we (almost) always split into at least two
# pieces. This won't always be true when given a string like "AA\x41",
# where escape sequences artificially increase the total length (escape
# sequences are considered a single character).
#
# Returns an array of two-element arrays. The zeroeth element is a
# randomly generated variable name, the first is a piece of the string
# contained in +quote+s.
#
# See #escape_length
#
def safe_split(str, quote)
parts = []
max_len = str.length / 2
while str.length > 0
len = 0
loop do
e_len = escape_length(str[len..-1])
e_len = 1 if e_len.nil?
len += e_len
# if we've reached the end of the string, bail
break unless str[len]
break if len > max_len
# randomize the length of each part
break if (rand(4) == 0)
end
part = str.slice!(0, len)
var = Rex::Text.rand_text_alpha(4)
parts.push( [ var, "#{quote}#{part}#{quote}" ] )
end
parts
end
#
# Stolen from obfuscatejs.rb
#
# Determines the length of an escape sequence
#
def escape_length(str)
esc_len = nil
if str[0,1] == "\\"
case str[1,1]
when "u"; esc_len = 6 # unicode \u1234
when "x"; esc_len = 4 # hex, \x41
when /[0-7]/ # octal, \123, \0
str[1,3] =~ /([0-7]{1,3})/
if $1.to_i(8) > 255
str[1,3] =~ /([0-7]{1,2})/
end
esc_len = 1 + $1.length
else; esc_len = 2 # \" \n, etc.
end
end
esc_len
end
#
# Split a javascript string, +str+, into multiple randomly-ordered parts
# and return an anonymous javascript function that joins them in the
# correct order. This method can be called safely on strings containing
# escape sequences. See #safe_split.
#
def transform_string_split_concat(str, quote)
parts = safe_split(str, quote)
func = "(function () { var "
ret = "; return "
parts.sort { |a,b| rand }.each do |part|
func << "#{part[0]}=#{part[1]},"
end
func.chop!
ret << parts.map{|part| part[0]}.join("+")
final = func + ret + " })()"
final
end
# TODO
#def transform_string_unescape(str)
# str
#end
#
# Return a call to String.fromCharCode() with each char of the input as arguments
#
# Example:
# input : "A\n"
# output: String.fromCharCode(0x41, 10)
#
def transform_string_fromCharCode(str)
buf = "String.fromCharCode("
bytes = str.unpack("C*")
len = 0
while str.length > 0
if str[0,1] == "\\"
str.slice!(0,1)
# then this is an escape sequence and we need to deal with all
# the special cases
case str[0,1]
# For chars that contain their non-escaped selves, step past
# the backslash and let the rand_base() below decide how to
# represent the character.
when '"', "'", "\\", " "
char = str.slice!(0,1).unpack("C").first
# For symbolic escapes, use the known value
when "n"; char = 0x0a; str.slice!(0,1)
when "t"; char = 0x09; str.slice!(0,1)
# Lastly, if it's a hex, unicode, or octal escape, pull out the
# real value and use that
when "x"
# Strip the x
str.slice!(0,1)
char = str.slice!(0,2).to_i 16
when "u"
# This can potentially lose information in the case of
# characters like \u0041, but since regular ascii is stored
# as unicode internally, String.fromCharCode(0x41) will be
# represented as 00 41 in memory anyway, so it shouldn't
# matter.
str.slice!(0,1)
char = str.slice!(0,4).to_i 16
when /[0-7]/
# Octals are a bit harder since they are variable width and
# don't necessarily mean what you might think. For example,
# "\61" == "1" and "\610" == "10". 610 is a valid octal
# number, but not a valid ascii character. Javascript will
# interpreter as much as it can as a char and use the rest
# as a literal. Boo.
str =~ /([0-7]{1,3})/
char = $1.to_i 8
if char > 255
str =~ /([0-7]{1,2})/
char = $1.to_i 8
end
str.slice!(0,$1.length)
end
else
char = str.slice!(0,1).unpack("C").first
end
buf << "#{rand_base(char)},"
end
# Strip off the last comma
buf = buf[0,buf.length-1] + ")"
transformed = buf
transformed
end
end
end
end
=begin
if __FILE__ == $0
if ARGV[0]
code = File.read(ARGV[0])
else
#require 'rex/exploitation/javascriptosdetect'
#code = Rex::Exploitation::JavascriptOSDetect.new.to_s
code = <<-EOS
// Should alert "0123456789"
var a = "0\\612\\063\\x34\\x35\\x36\\x37\\x38\\u0039";
var a,b=2,c=3;
alert(a);
// should alert "asdfjkl;"
var d = (function() { var foo = "jkl;", blah = "asdf"; return blah + foo; })();
alert(d);
EOS
end
js = Rex::Exploitation::JSObfu.new(code)
js.obfuscate
puts js.to_s
end
=end