489 lines
12 KiB
Ruby
489 lines
12 KiB
Ruby
##
|
|
# $Id$
|
|
##
|
|
|
|
require 'rex/text'
|
|
require 'rkelly'
|
|
|
|
module Rex
|
|
module Exploitation
|
|
|
|
|
|
#
|
|
# Obfuscate JavaScript by randomizing as much as possible and removing
|
|
# easily-signaturable string constants.
|
|
#
|
|
# Example:
|
|
# js = ::Rex::Exploitation::JSObfu.new %Q|
|
|
# var a = "0\\612\\063\\x34\\x35\\x36\\x37\\x38\\u0039";
|
|
# var b = { foo : "foo", bar : "bar" }
|
|
# alert(a);
|
|
# alert(b.foo);
|
|
# |
|
|
# js.obfuscate
|
|
# puts js
|
|
# Example Output:
|
|
# var VwxvESbCgv = String.fromCharCode(0x30,0x31,062,063,064,53,0x36,067,070,0x39);
|
|
# var ToWZPn = {
|
|
# "\146\157\x6f": (function () { var yDyv="o",YnCL="o",Qcsa="f"; return Qcsa+YnCL+yDyv })(),
|
|
# "\142ar": String.fromCharCode(0142,97,0162)
|
|
# };
|
|
# alert(VwxvESbCgv);
|
|
# alert(ToWZPn.foo);
|
|
#
|
|
# NOTE: Variables MUST be declared with a 'var' statement BEFORE first use (or
|
|
# not at all) for this to generate correct code! If variables are not declared
|
|
# they will not be randomized but the generated code will be correct.
|
|
#
|
|
# Bad Example Javascript:
|
|
# a = "asdf"; // this variable hasn't been declared and will not be randomized
|
|
# var a;
|
|
# alert(a); // real js engines will alert "asdf" here
|
|
# Bad Example Obfuscated:
|
|
# a = (function () { var hpHu="f",oyTm="asd"; return oyTm+hpHu })();
|
|
# var zSrnHpEfJZtg;
|
|
# alert(zSrnHpEfJZtg);
|
|
# Notice that the first usage of +a+ (before it was declared) is not
|
|
# randomized. Thus, the obfuscated version will alert 'undefined' instead of
|
|
# "asdf".
|
|
#
|
|
class JSObfu
|
|
|
|
#
|
|
# Abstract Syntax Tree generated by RKelly::Parser#parse
|
|
#
|
|
attr_reader :ast
|
|
|
|
#
|
|
# Saves +code+ for later obfuscation with #obfuscate
|
|
#
|
|
def initialize(code)
|
|
@code = code
|
|
@funcs = {}
|
|
@vars = {}
|
|
@debug = false
|
|
end
|
|
|
|
#
|
|
# Add +str+ to the un-obfuscated code.
|
|
#
|
|
# Calling this method after #obfuscate is undefined
|
|
#
|
|
def <<(str)
|
|
@code << str
|
|
end
|
|
|
|
#
|
|
# Return the (possibly obfuscated) code as a string.
|
|
#
|
|
# If #obfuscate has not been called before this, returns the parsed,
|
|
# unobfuscated code. This can be useful for example to remove comments and
|
|
# standardize spacing.
|
|
#
|
|
def to_s
|
|
parse if not @ast
|
|
@ast.to_ecma
|
|
end
|
|
|
|
#
|
|
# Return the obfuscated name of a symbol
|
|
#
|
|
# You MUST call #obfuscate before this method!
|
|
#
|
|
def sym(lookup)
|
|
if @vars[lookup]
|
|
ret = @vars[lookup]
|
|
elsif @funcs[lookup]
|
|
ret = @funcs[lookup]
|
|
else
|
|
ret = lookup
|
|
end
|
|
ret
|
|
end
|
|
|
|
#
|
|
# Parse and obfuscate
|
|
#
|
|
def obfuscate
|
|
parse
|
|
obfuscate_r(@ast)
|
|
end
|
|
|
|
protected
|
|
|
|
#
|
|
# Recursive method to obfuscate the given +ast+.
|
|
#
|
|
# +ast+ should be the result of RKelly::Parser#parse
|
|
#
|
|
def obfuscate_r(ast)
|
|
ast.each do |node|
|
|
#if node.respond_to? :value and node.value.kind_of? String and node.value =~ /bodyOnLoad/i
|
|
# $stdout.puts("bodyOnLoad: #{node.class}: #{node.value}")
|
|
#end
|
|
|
|
case node
|
|
when nil
|
|
nil
|
|
|
|
when ::RKelly::Nodes::SourceElementsNode
|
|
# Recurse
|
|
obfuscate_r(node.value)
|
|
|
|
#when ::RKelly::Nodes::ObjectLiteralNode
|
|
# TODO
|
|
#$stdout.puts(node.methods - Object.new.methods)
|
|
#$stdout.puts(node.value.inspect)
|
|
|
|
when ::RKelly::Nodes::PropertyNode
|
|
# Property names must be bare words or string literals NOT
|
|
# expressions! Can't use transform_string() here
|
|
if node.name =~ /^[a-zA-Z_][a-zA-Z0-9_]*$/
|
|
n = '"'
|
|
node.name.unpack("C*") { |c|
|
|
case rand(3)
|
|
when 0; n << "\\x%02x"%(c)
|
|
when 1; n << "\\#{c.to_s 8}"
|
|
when 2; n << [c].pack("C")
|
|
end
|
|
}
|
|
n << '"'
|
|
node.name = n
|
|
end
|
|
|
|
# Variables
|
|
when ::RKelly::Nodes::VarDeclNode
|
|
if @vars[node.name].nil?
|
|
#@vars[node.name] = "var_#{Rex::Text.rand_text_alpha(3+rand(12))}_#{node.name}"
|
|
@vars[node.name] = "#{Rex::Text.rand_text_alpha(3+rand(12))}"
|
|
end
|
|
node.name = @vars[node.name]
|
|
when ::RKelly::Nodes::ParameterNode
|
|
if @vars[node.value].nil?
|
|
#@vars[node.value] = "param_#{Rex::Text.rand_text_alpha(3+rand(12))}_#{node.value}"
|
|
@vars[node.value] = "#{Rex::Text.rand_text_alpha(3+rand(12))}"
|
|
end
|
|
node.value = @vars[node.value]
|
|
when ::RKelly::Nodes::ResolveNode
|
|
#$stdout.puts("Resolve bodyOnload: #{@vars[node.value]}") if "bodyOnLoad" == node.value
|
|
node.value = @vars[node.value] if @vars[node.value]
|
|
when ::RKelly::Nodes::DotAccessorNode
|
|
case node.value
|
|
when ::RKelly::Nodes::ResolveNode
|
|
if @vars[node.value.value]
|
|
node.value.value = @vars[node.value.value]
|
|
end
|
|
#else
|
|
# $stderr.puts("Non-resolve node as target of dotaccessor: #{node.value.class}")
|
|
end
|
|
|
|
# Functions
|
|
when ::RKelly::Nodes::FunctionDeclNode
|
|
#$stdout.puts("FunctionDecl: #{node.value}")
|
|
# Functions can also act as objects, so store them in the vars
|
|
# and the functions list so we can replace them in both places
|
|
if @funcs[node.value].nil? and not @funcs.values.include?(node.value)
|
|
#@funcs[node.value] = "func_#{Rex::Text.rand_text_alpha(3+rand(12))}_#{node.value}"
|
|
@funcs[node.value] = "#{Rex::Text.rand_text_alpha(3+rand(12))}"
|
|
if @vars[node.value].nil?
|
|
@vars[node.value] = @funcs[node.value]
|
|
end
|
|
node.value = @funcs[node.value]
|
|
end
|
|
when ::RKelly::Nodes::FunctionCallNode
|
|
# The value of a FunctionCallNode is some sort of accessor node or a ResolveNode
|
|
# so this is basically useless
|
|
#$stdout.puts("Function call: #{node.name} => #{@funcs[node.name]}")
|
|
#node.value = @funcs[node.value] if @funcs[node.value]
|
|
|
|
# Transformers
|
|
when ::RKelly::Nodes::NumberNode
|
|
node.value = transform_number(node.value)
|
|
when ::RKelly::Nodes::StringNode
|
|
node.value = transform_string(node.value)
|
|
else
|
|
#$stderr.puts "#{node.class}: #{node.value}"
|
|
#$stderr.puts "#{node.class}"
|
|
end
|
|
|
|
#unless node.kind_of? ::RKelly::Nodes::SourceElementsNode
|
|
# $stderr.puts "#{node.class}: #{node.value}"
|
|
#end
|
|
end
|
|
|
|
nil
|
|
end
|
|
|
|
#
|
|
# Generate an Abstract Syntax Tree (#ast) for later obfuscation
|
|
#
|
|
def parse
|
|
parser = RKelly::Parser.new
|
|
@ast = parser.parse(@code)
|
|
end
|
|
|
|
#
|
|
# Convert a number to a random base (decimal, octal, or hexedecimal).
|
|
#
|
|
# Given 10 as input, the possible return values are:
|
|
# "10"
|
|
# "0xa"
|
|
# "012"
|
|
#
|
|
def rand_base(num)
|
|
case rand(3)
|
|
when 0; num.to_s
|
|
when 1; "0%o" % num
|
|
when 2; "0x%x" % num
|
|
end
|
|
end
|
|
|
|
#
|
|
# Return a mathematical expression that will evaluate to the given number
|
|
# +num+.
|
|
#
|
|
# +num+ can be a float or an int, but should never be negative.
|
|
#
|
|
def transform_number(num)
|
|
case num
|
|
when Fixnum
|
|
if num == 0
|
|
r = rand(10) + 1
|
|
transformed = "('#{Rex::Text.rand_text_alpha(r)}'.length - #{r})"
|
|
elsif num > 0 and num < 10
|
|
# use a random string.length for small numbers
|
|
transformed = "'#{Rex::Text.rand_text_alpha(num)}'.length"
|
|
else
|
|
transformed = "("
|
|
divisor = rand(num) + 1
|
|
a = num / divisor.to_i
|
|
b = num - (a * divisor)
|
|
# recurse half the time for a
|
|
a = (rand(2) == 0) ? transform_number(a) : rand_base(a)
|
|
# recurse half the time for divisor
|
|
divisor = (rand(2) == 0) ? transform_number(divisor) : rand_base(divisor)
|
|
transformed << "#{a}*#{divisor}"
|
|
transformed << "+#{b}"
|
|
transformed << ")"
|
|
end
|
|
when Float
|
|
transformed = "(#{num - num.floor} + #{rand_base(num.floor)})"
|
|
end
|
|
|
|
#puts("#{num} == #{transformed}")
|
|
|
|
transformed
|
|
end
|
|
|
|
#
|
|
# Convert a javascript string into something that will generate that string.
|
|
#
|
|
# Randomly calls one of the +transform_string_*+ methods
|
|
#
|
|
def transform_string(str)
|
|
quote = str[0,1]
|
|
# pull off the quotes
|
|
str = str[1,str.length - 2]
|
|
return quote*2 if str.length == 0
|
|
|
|
case rand(2)
|
|
when 0
|
|
transformed = transform_string_split_concat(str, quote)
|
|
when 1
|
|
transformed = transform_string_fromCharCode(str)
|
|
#when 2
|
|
# # Currently no-op
|
|
# transformed = transform_string_unescape(str)
|
|
end
|
|
|
|
#$stderr.puts "Obfuscating str: #{str.ljust 30} #{transformed}"
|
|
transformed
|
|
end
|
|
|
|
#
|
|
# Split a javascript string, +str+, without breaking escape sequences.
|
|
#
|
|
# The maximum length of each piece of the string is half the total length
|
|
# of the string, ensuring we (almost) always split into at least two
|
|
# pieces. This won't always be true when given a string like "AA\x41",
|
|
# where escape sequences artificially increase the total length (escape
|
|
# sequences are considered a single character).
|
|
#
|
|
# Returns an array of two-element arrays. The zeroeth element is a
|
|
# randomly generated variable name, the first is a piece of the string
|
|
# contained in +quote+s.
|
|
#
|
|
# See #escape_length
|
|
#
|
|
def safe_split(str, quote)
|
|
parts = []
|
|
max_len = str.length / 2
|
|
while str.length > 0
|
|
len = 0
|
|
loop do
|
|
e_len = escape_length(str[len..-1])
|
|
e_len = 1 if e_len.nil?
|
|
len += e_len
|
|
# if we've reached the end of the string, bail
|
|
break unless str[len]
|
|
break if len > max_len
|
|
# randomize the length of each part
|
|
break if (rand(4) == 0)
|
|
end
|
|
|
|
part = str.slice!(0, len)
|
|
|
|
var = Rex::Text.rand_text_alpha(4)
|
|
parts.push( [ var, "#{quote}#{part}#{quote}" ] )
|
|
end
|
|
|
|
parts
|
|
end
|
|
|
|
#
|
|
# Stolen from obfuscatejs.rb
|
|
#
|
|
# Determines the length of an escape sequence
|
|
#
|
|
def escape_length(str)
|
|
esc_len = nil
|
|
if str[0,1] == "\\"
|
|
case str[1,1]
|
|
when "u"; esc_len = 6 # unicode \u1234
|
|
when "x"; esc_len = 4 # hex, \x41
|
|
when /[0-7]/ # octal, \123, \0
|
|
str[1,3] =~ /([0-7]{1,3})/
|
|
if $1.to_i(8) > 255
|
|
str[1,3] =~ /([0-7]{1,2})/
|
|
end
|
|
esc_len = 1 + $1.length
|
|
else; esc_len = 2 # \" \n, etc.
|
|
end
|
|
end
|
|
esc_len
|
|
end
|
|
|
|
#
|
|
# Split a javascript string, +str+, into multiple randomly-ordered parts
|
|
# and return an anonymous javascript function that joins them in the
|
|
# correct order. This method can be called safely on strings containing
|
|
# escape sequences. See #safe_split.
|
|
#
|
|
def transform_string_split_concat(str, quote)
|
|
parts = safe_split(str, quote)
|
|
func = "(function () { var "
|
|
ret = "; return "
|
|
parts.sort { |a,b| rand }.each do |part|
|
|
func << "#{part[0]}=#{part[1]},"
|
|
end
|
|
func.chop!
|
|
|
|
ret << parts.map{|part| part[0]}.join("+")
|
|
final = func + ret + " })()"
|
|
|
|
final
|
|
end
|
|
|
|
|
|
# TODO
|
|
#def transform_string_unescape(str)
|
|
# str
|
|
#end
|
|
|
|
#
|
|
# Return a call to String.fromCharCode() with each char of the input as arguments
|
|
#
|
|
# Example:
|
|
# input : "A\n"
|
|
# output: String.fromCharCode(0x41, 10)
|
|
#
|
|
def transform_string_fromCharCode(str)
|
|
buf = "String.fromCharCode("
|
|
bytes = str.unpack("C*")
|
|
len = 0
|
|
while str.length > 0
|
|
if str[0,1] == "\\"
|
|
str.slice!(0,1)
|
|
# then this is an escape sequence and we need to deal with all
|
|
# the special cases
|
|
case str[0,1]
|
|
# For chars that contain their non-escaped selves, step past
|
|
# the backslash and let the rand_base() below decide how to
|
|
# represent the character.
|
|
when '"', "'", "\\", " "
|
|
char = str.slice!(0,1).unpack("C").first
|
|
# For symbolic escapes, use the known value
|
|
when "n"; char = 0x0a; str.slice!(0,1)
|
|
when "t"; char = 0x09; str.slice!(0,1)
|
|
# Lastly, if it's a hex, unicode, or octal escape, pull out the
|
|
# real value and use that
|
|
when "x"
|
|
# Strip the x
|
|
str.slice!(0,1)
|
|
char = str.slice!(0,2).to_i 16
|
|
when "u"
|
|
# This can potentially lose information in the case of
|
|
# characters like \u0041, but since regular ascii is stored
|
|
# as unicode internally, String.fromCharCode(0x41) will be
|
|
# represented as 00 41 in memory anyway, so it shouldn't
|
|
# matter.
|
|
str.slice!(0,1)
|
|
char = str.slice!(0,4).to_i 16
|
|
when /[0-7]/
|
|
# Octals are a bit harder since they are variable width and
|
|
# don't necessarily mean what you might think. For example,
|
|
# "\61" == "1" and "\610" == "10". 610 is a valid octal
|
|
# number, but not a valid ascii character. Javascript will
|
|
# interpreter as much as it can as a char and use the rest
|
|
# as a literal. Boo.
|
|
str =~ /([0-7]{1,3})/
|
|
char = $1.to_i 8
|
|
if char > 255
|
|
str =~ /([0-7]{1,2})/
|
|
char = $1.to_i 8
|
|
end
|
|
str.slice!(0,$1.length)
|
|
end
|
|
else
|
|
char = str.slice!(0,1).unpack("C").first
|
|
end
|
|
buf << "#{rand_base(char)},"
|
|
end
|
|
# Strip off the last comma
|
|
buf = buf[0,buf.length-1] + ")"
|
|
transformed = buf
|
|
|
|
transformed
|
|
end
|
|
|
|
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
=begin
|
|
if __FILE__ == $0
|
|
if ARGV[0]
|
|
code = File.read(ARGV[0])
|
|
else
|
|
#require 'rex/exploitation/javascriptosdetect'
|
|
#code = Rex::Exploitation::JavascriptOSDetect.new.to_s
|
|
code = <<-EOS
|
|
// Should alert "0123456789"
|
|
var a = "0\\612\\063\\x34\\x35\\x36\\x37\\x38\\u0039";
|
|
var a,b=2,c=3;
|
|
alert(a);
|
|
// should alert "asdfjkl;"
|
|
var d = (function() { var foo = "jkl;", blah = "asdf"; return blah + foo; })();
|
|
alert(d);
|
|
EOS
|
|
end
|
|
js = Rex::Exploitation::JSObfu.new(code)
|
|
js.obfuscate
|
|
puts js.to_s
|
|
|
|
end
|
|
|
|
=end
|