metasploit-framework/lib/rex/exploitation/jsobfu.rb

# -*- coding: binary -*-

require 'rex/text'
require 'rex/random_identifier_generator'
require 'rkelly'

module Rex
module Exploitation


#
# Obfuscate JavaScript by randomizing as much as possible and removing
# easily-signaturable string constants.
#
# Example:
#   js = ::Rex::Exploitation::JSObfu.new %Q|
#     var a = "0\\612\\063\\x34\\x35\\x36\\x37\\x38\\u0039";
#     var b = { foo : "foo", bar : "bar" }
#     alert(a);
#     alert(b.foo);
#   |
#   js.obfuscate
#   puts js
# Example Output:
#   var VwxvESbCgv = String.fromCharCode(0x30,0x31,062,063,064,53,0x36,067,070,0x39);
#   var ToWZPn = {
#     "\146\157\x6f": (function () { var yDyv="o",YnCL="o",Qcsa="f"; return Qcsa+YnCL+yDyv })(),
#     "\142ar": String.fromCharCode(0142,97,0162)
#   };
#   alert(VwxvESbCgv);
#   alert(ToWZPn.foo);
#
# NOTE: Variables MUST be declared with a 'var' statement BEFORE first use (or
# not at all) for this to generate correct code!  If variables are not declared
# they will not be randomized but the generated code will be correct.
#
# Bad Example Javascript:
#   a = "asdf"; // this variable hasn't been declared and will not be randomized
#   var a;
#   alert(a); // real js engines will alert "asdf" here
# Bad Example Obfuscated:
#   a = (function () { var hpHu="f",oyTm="asd"; return oyTm+hpHu })();
#   var zSrnHpEfJZtg;
#   alert(zSrnHpEfJZtg);
# Notice that the first usage of +a+ (before it was declared) is not
# randomized.  Thus, the obfuscated version will alert 'undefined' instead of
# "asdf".
#
class JSObfu

  # these keywords should never be used as a random var name
  # source: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Reserved_Words
  RESERVED_KEYWORDS = %w(
    break case catch continue debugger default delete do else finally
    for function if in instanceof new return switch this throw try
    typeof var void while with class enum export extends import super
    implements interface let package private protected public static yield
  )

  #
  # Abstract Syntax Tree generated by RKelly::Parser#parse
  #
  attr_reader :ast

  #
  # Saves +code+ for later obfuscation with #obfuscate
  #
  def initialize(code)
    @code = code
    @funcs = {}
    @vars  = {}
    @debug = false
    @rand_gen = Rex::RandomIdentifierGenerator.new(
      :max_length => 15,
      :first_char_set => Rex::Text::Alpha+"_$",
      :char_set => Rex::Text::AlphaNumeric+"_$"
    )
  end

  #
  # Add +str+ to the un-obfuscated code.
  #
  # Calling this method after #obfuscate is undefined
  #
  def <<(str)
    @code << str
  end

  #
  # Return the (possibly obfuscated) code as a string.
  #
  # If #obfuscate has not been called before this, returns the parsed,
  # unobfuscated code.  This can be useful for example to remove comments and
  # standardize spacing.
  #
  def to_s
    parse if not @ast
    @ast.to_ecma
  end

  #
  # Return the obfuscated name of a symbol
  #
  # You MUST call #obfuscate before this method!
  #
  def sym(lookup)
    if @vars[lookup]
      ret = @vars[lookup]
    elsif @funcs[lookup]
      ret = @funcs[lookup]
    else
      ret = lookup
    end
    ret
  end

  #
  # Parse and obfuscate
  #
  def obfuscate
    parse
    obfuscate_r(@ast)
  end

  # @return [String] a unique random var name that is not a reserved keyword
  def random_var_name
    loop do
      text = random_string
      unless @vars.has_value?(text) or RESERVED_KEYWORDS.include?(text)
        return text
      end
    end
  end

protected

  # @return [String] a random string
  def random_string
    @rand_gen.generate
  end

  #
  # Recursive method to obfuscate the given +ast+.
  #
  # +ast+ should be the result of RKelly::Parser#parse
  #
  def obfuscate_r(ast)
    ast.each do |node|
      #if node.respond_to? :value and node.value.kind_of? String and node.value =~ /bodyOnLoad/i
      #	$stdout.puts("bodyOnLoad: #{node.class}: #{node.value}")
      #end

      case node
      when nil
        nil

      when ::RKelly::Nodes::SourceElementsNode
        # Recurse
        obfuscate_r(node.value)

      #when ::RKelly::Nodes::ObjectLiteralNode
        # TODO
        #$stdout.puts(node.methods - Object.new.methods)
        #$stdout.puts(node.value.inspect)

      when ::RKelly::Nodes::PropertyNode
        # Property names must be bare words or string literals NOT
        # expressions!  Can't use transform_string() here
        if node.name =~ /^[a-zA-Z_][a-zA-Z0-9_]*$/
          n = '"'
          node.name.unpack("C*") { |c|
            case rand(3)
            when 0; n << "\\x%02x"%(c)
            when 1; n << "\\#{c.to_s 8}"
            when 2; n << [c].pack("C")
            end
          }
          n << '"'
          node.name = n
        end

      # Variables
      when ::RKelly::Nodes::VarDeclNode
        if @vars[node.name].nil?
          @vars[node.name] = random_var_name
        end
        node.name = @vars[node.name]
      when ::RKelly::Nodes::ParameterNode
        if @vars[node.value].nil?
          @vars[node.value] = random_var_name
        end
        node.value = @vars[node.value]
      when ::RKelly::Nodes::ResolveNode
        #$stdout.puts("Resolve bodyOnload: #{@vars[node.value]}") if "bodyOnLoad" == node.value
        node.value = @vars[node.value] if @vars[node.value]
      when ::RKelly::Nodes::DotAccessorNode
        case node.value
        when ::RKelly::Nodes::ResolveNode
          if @vars[node.value.value]
            node.value.value = @vars[node.value.value]
          end
        #else
        #	$stderr.puts("Non-resolve node as target of dotaccessor: #{node.value.class}")
        end

      # Functions
      when ::RKelly::Nodes::FunctionDeclNode
        #$stdout.puts("FunctionDecl: #{node.value}")
        # Functions can also act as objects, so store them in the vars
        # and the functions list so we can replace them in both places
        if @funcs[node.value].nil? and not @funcs.values.include?(node.value)
          @funcs[node.value] = random_var_name
          if @vars[node.value].nil?
            @vars[node.value] = @funcs[node.value]
          end
          node.value = @funcs[node.value]
        end
      when ::RKelly::Nodes::FunctionCallNode
        # The value of a FunctionCallNode is some sort of accessor node or a ResolveNode
        # so this is basically useless
        #$stdout.puts("Function call: #{node.name} => #{@funcs[node.name]}")
        #node.value = @funcs[node.value] if @funcs[node.value]

      # Transformers
      when ::RKelly::Nodes::NumberNode
        node.value = transform_number(node.value)
      when ::RKelly::Nodes::StringNode
        node.value = transform_string(node.value)
      else
        #$stderr.puts "#{node.class}: #{node.value}"
        #$stderr.puts "#{node.class}"
      end

      #unless node.kind_of? ::RKelly::Nodes::SourceElementsNode
      #	$stderr.puts "#{node.class}: #{node.value}"
      #end
    end

    nil
  end

  #
  # Generate an Abstract Syntax Tree (#ast) for later obfuscation
  #
  def parse
    parser = RKelly::Parser.new
    @ast = parser.parse(@code)
  end

  #
  # Convert a number to a random base (decimal, octal, or hexedecimal).
  #
  # Given 10 as input, the possible return values are:
  #   "10"
  #   "0xa"
  #   "012"
  #
  def rand_base(num)
    case rand(3)
    when 0; num.to_s
    when 1; "0%o" % num
    when 2; "0x%x" % num
    end
  end

  #
  # Return a mathematical expression that will evaluate to the given number
  # +num+.
  #
  # +num+ can be a float or an int, but should never be negative.
  #
  def transform_number(num)
    case num
    when Fixnum
      if num == 0
        r = rand(10) + 1
        transformed = "('#{Rex::Text.rand_text_alpha(r)}'.length - #{r})"
      elsif num > 0 and num < 10
        # use a random string.length for small numbers
        transformed = "'#{Rex::Text.rand_text_alpha(num)}'.length"
      else
        transformed = "("
        divisor = rand(num) + 1
        a = num / divisor.to_i
        b = num - (a * divisor)
        # recurse half the time for a
        a = (rand(2) == 0) ? transform_number(a) : rand_base(a)
        # recurse half the time for divisor
        divisor = (rand(2) == 0) ? transform_number(divisor) : rand_base(divisor)
        transformed << "#{a}*#{divisor}"
        transformed << "+#{b}"
        transformed << ")"
      end
    when Float
      transformed = "(#{num - num.floor} + #{rand_base(num.floor)})"
    end

    #puts("#{num} == #{transformed}")

    transformed
  end

  #
  # Convert a javascript string into something that will generate that string.
  #
  # Randomly calls one of the +transform_string_*+ methods
  #
  def transform_string(str)
    quote = str[0,1]
    # pull off the quotes
    str = str[1,str.length - 2]
    return quote*2 if str.length == 0

    case rand(2)
    when 0
      transformed = transform_string_split_concat(str, quote)
    when 1
      transformed = transform_string_fromCharCode(str)
    #when 2
    #	# Currently no-op
    #	transformed = transform_string_unescape(str)
    end

    #$stderr.puts "Obfuscating str: #{str.ljust 30} #{transformed}"
    transformed
  end

  #
  # Split a javascript string, +str+, without breaking escape sequences.
  #
  # The maximum length of each piece of the string is half the total length
  # of the string, ensuring we (almost) always split into at least two
  # pieces.  This won't always be true when given a string like "AA\x41",
  # where escape sequences artificially increase the total length (escape
  # sequences are considered a single character).
  #
  # Returns an array of two-element arrays.  The zeroeth element is a
  # randomly generated variable name, the first is a piece of the string
  # contained in +quote+s.
  #
  # See #escape_length
  #
  def safe_split(str, quote)
    parts = []
    max_len = str.length / 2
    while str.length > 0
      len = 0
      loop do
        e_len = escape_length(str[len..-1])
        e_len = 1 if e_len.nil?
        len += e_len
        # if we've reached the end of the string, bail
        break unless str[len]
        break if len > max_len
        # randomize the length of each part
        break if (rand(4) == 0)
      end

      part = str.slice!(0, len)

      var = Rex::Text.rand_text_alpha(4)
      parts.push( [ var, "#{quote}#{part}#{quote}" ] )
    end

    parts
  end

  #
  # Stolen from obfuscatejs.rb
  #
  # Determines the length of an escape sequence
  #
  def escape_length(str)
    esc_len = nil
    if str[0,1] == "\\"
      case str[1,1]
      when "u"; esc_len = 6     # unicode \u1234
      when "x"; esc_len = 4     # hex, \x41
      when /[0-7]/              # octal, \123, \0
        str[1,3] =~ /([0-7]{1,3})/
        if $1.to_i(8) > 255
          str[1,3] =~ /([0-7]{1,2})/
        end
        esc_len = 1 + $1.length
      else; esc_len = 2         # \" \n, etc.
      end
    end
    esc_len
  end

  #
  # Split a javascript string, +str+, into multiple randomly-ordered parts
  # and return an anonymous javascript function that joins them in the
  # correct order.  This method can be called safely on strings containing
  # escape sequences.  See #safe_split.
  #
  def transform_string_split_concat(str, quote)
    parts = safe_split(str, quote)
    func = "(function () { var "
    ret = "; return "
    parts.sort { |a,b| rand }.each do |part|
      func << "#{part[0]}=#{part[1]},"
    end
    func.chop!

    ret  << parts.map{|part| part[0]}.join("+")
    final = func + ret + " })()"

    final
  end


  # TODO
  #def transform_string_unescape(str)
  #	str
  #end

  #
  # Return a call to String.fromCharCode() with each char of the input as arguments
  #
  # Example:
  #   input : "A\n"
  #   output: String.fromCharCode(0x41, 10)
  #
  def transform_string_fromCharCode(str)
    buf = "String.fromCharCode("
    bytes = str.unpack("C*")
    len = 0
    while str.length > 0
      if str[0,1] == "\\"
        str.slice!(0,1)
        # then this is an escape sequence and we need to deal with all
        # the special cases
        case str[0,1]
        # For chars that contain their non-escaped selves, step past
        # the backslash and let the rand_base() below decide how to
        # represent the character.
        when '"', "'", "\\", " "
          char = str.slice!(0,1).unpack("C").first
        # For symbolic escapes, use the known value
        when "n"; char = 0x0a; str.slice!(0,1)
        when "t"; char = 0x09; str.slice!(0,1)
        # Lastly, if it's a hex, unicode, or octal escape, pull out the
        # real value and use that
        when "x"
          # Strip the x
          str.slice!(0,1)
          char = str.slice!(0,2).to_i 16
        when "u"
          # This can potentially lose information in the case of
          # characters like \u0041, but since regular ascii is stored
          # as unicode internally, String.fromCharCode(0x41) will be
          # represented as 00 41 in memory anyway, so it shouldn't
          # matter.
          str.slice!(0,1)
          char = str.slice!(0,4).to_i 16
        when /[0-7]/
          # Octals are a bit harder since they are variable width and
          # don't necessarily mean what you might think. For example,
          # "\61" == "1" and "\610" == "10".  610 is a valid octal
          # number, but not a valid ascii character.  Javascript will
          # interpreter as much as it can as a char and use the rest
          # as a literal.  Boo.
          str =~ /([0-7]{1,3})/
          char = $1.to_i 8
          if char > 255
            str =~ /([0-7]{1,2})/
            char = $1.to_i 8
          end
          str.slice!(0,$1.length)
        end
      else
        char = str.slice!(0,1).unpack("C").first
      end
      buf << "#{rand_base(char)},"
    end
    # Strip off the last comma
    buf = buf[0,buf.length-1] + ")"
    transformed = buf

    transformed
  end


end
end
end


=begin
if __FILE__ == $0
  if ARGV[0]
    code = File.read(ARGV[0])
  else
    #require 'rex/exploitation/javascriptosdetect'
    #code = Rex::Exploitation::JavascriptOSDetect.new.to_s
    code = <<-EOS
      // Should alert "0123456789"
      var a = "0\\612\\063\\x34\\x35\\x36\\x37\\x38\\u0039";
      var a,b=2,c=3;
      alert(a);
      // should alert "asdfjkl;"
      var d = (function() { var foo = "jkl;", blah = "asdf"; return blah + foo; })();
      alert(d);
    EOS
  end
  js = Rex::Exploitation::JSObfu.new(code)
  js.obfuscate
  puts js.to_s

end

=end