metasploit-framework/lib/metasm/metasm/main.rb

1319 lines
42 KiB
Ruby

# This file is part of Metasm, the Ruby assembly manipulation suite
# Copyright (C) 2006-2009 Yoann GUILLOT
#
# Licence is LGPL, see LICENCE in the top-level directory
module Metasm
VERSION = 0x0001 # major major minor minor
# superclass for all metasm exceptions
class Exception < RuntimeError ; end
# parse error
class ParseError < Exception ; end
# invalid exeformat signature
class InvalidExeFormat < Exception ; end
# cannot honor .offset specification, reloc fixup overflow
class EncodeError < Exception ; end
# holds context of a processor
# endianness, current mode, opcode list...
class CPU
attr_accessor :valid_args, :valid_props, :fields_mask
attr_accessor :endianness, :size
attr_accessor :generate_PIC
def opcode_list
@opcode_list ||= init_opcode_list
end
def opcode_list=(l) @opcode_list = l end
def initialize
@fields_mask = {}
@fields_shift= {}
@valid_args = {}
@valid_props = { :setip => true, :saveip => true, :stopexec => true }
@generate_PIC = true
end
# returns a hash opcode_name => array of opcodes with this name
def opcode_list_byname
@opcode_list_byname ||= opcode_list.inject({}) { |h, o| (h[o.name] ||= []) << o ; h }
end
# sets up the C parser : standard macro definitions, type model (size of int etc)
def tune_cparser(cp)
case @size
when 64; cp.lp64
when 32; cp.ilp32
when 16; cp.ilp16
end
cp.endianness = @endianness
cp.lexer.define_weak('_STDC', 1)
# TODO gcc -dM -E - </dev/null
tune_prepro(cp.lexer)
end
def tune_prepro(pp)
# TODO pp.define('BIGENDIAN')
end
# return a new AsmPreprocessor
def new_asmprepro(str='', exe=nil)
pp = AsmPreprocessor.new(str, exe)
tune_prepro(pp)
exe.tune_prepro(pp) if exe
pp
end
# returns a new & tuned C::Parser
def new_cparser
C::Parser.new(self)
end
# returns a new C::Compiler
def new_ccompiler(parser, exe=ExeFormat.new)
exe.cpu = self if not exe.instance_variable_get("@cpu")
C::Compiler.new(parser, exe)
end
def shortname
self.class.name.sub(/.*::/, '').downcase
end
# some userinterface wants to hilight a word, return a regexp
# useful for register aliases
# the regexp will be enclosed in \b and should not contain captures
def gui_hilight_word_regexp(word)
Regexp.escape(word)
end
# returns true if the name is invalid as a label name (eg register name)
def check_reserved_name(name)
end
end
# generic CPU, with no instructions, just size/endianness
class UnknownCPU < CPU
def initialize(size, endianness)
super()
@size, @endianness = size, endianness
end
end
# a cpu instruction 'formal' description
class Opcode
# the name of the instruction
attr_accessor :name
# formal description of arguments (array of cpu-specific symbols)
attr_accessor :args
# binary encoding of the opcode (integer for risc, array of bytes for cisc)
attr_accessor :bin
# list of bit fields in the binary encoding
# hash position => field
# position is bit shift for risc, [byte index, bit shift] for risc
# field is cpu-specific
attr_accessor :fields
# hash of opcode generic properties/restrictions (mostly property => true/false)
attr_accessor :props
# binary mask for decoding
attr_accessor :bin_mask
def initialize(name, bin=nil)
@name = name
@bin = bin
@args = []
@fields = {}
@props = {}
end
def basename
@name.sub(/\..*/, '')
end
def dup
o = Opcode.new(@name.dup, @bin)
o.bin = @bin.dup if @bin.kind_of?(::Array)
o.args = @args.dup
o.fields = @fields.dup
o.props = @props.dup
o
end
end
# defines an attribute self.backtrace (array of filename/lineno)
# and a method backtrace_str which dumps this array to a human-readable form
module Backtrace
# array [file, lineno, file, lineno]
# if file 'A' does #include 'B' you'll get ['A', linenoA, 'B', linenoB]
attr_accessor :backtrace
# builds a readable string from self.backtrace
def backtrace_str
Backtrace.backtrace_str(@backtrace)
end
# builds a readable backtrace string from an array of [file, lineno, file, lineno, ..]
def self.backtrace_str(ary)
return '' if not ary
i = ary.length
bt = ''
while i > 0
bt << ",\n\tincluded from " if ary[i]
i -= 2
bt << "#{ary[i].inspect} line #{ary[i+1]}"
end
bt
end
def exception(msg='syntax error')
ParseError.new "at #{backtrace_str}: #{msg}"
end
end
# an instruction: opcode name + arguments
class Instruction
# arguments (cpu-specific objects)
attr_accessor :args
# hash of prefixes (unused in simple cpus)
attr_accessor :prefix
# name of the associated opcode
attr_accessor :opname
# reference to the cpu which issued this instruction (used for rendering)
attr_accessor :cpu
include Backtrace
def initialize(cpu, opname=nil, args=[], pfx=nil, backtrace=nil)
@cpu = cpu
@opname = opname
@args = args
@prefix = pfx if pfx
@backtrace = backtrace
end
# duplicates the argument list and prefix hash
def dup
Instruction.new(@cpu, (@opname.dup if opname), @args.dup, (@prefix.dup if prefix), (@backtrace.dup if backtrace))
end
end
# all kind of data description (including repeated/uninitialized)
class Data
# maps data type to Expression parameters (signedness/bit size)
INT_TYPE = {'db' => :a8, 'dw' => :a16, 'dd' => :a32, 'dq' => :a64}
# an Expression, an Array of Data, a String, or :uninitialized
attr_accessor :data
# the data type, from INT_TYPE (TODO store directly Expression parameters ?)
attr_accessor :type
# the repetition count of the data parameter (dup constructs)
attr_accessor :count
include Backtrace
def initialize(type, data, count=1, backtrace=nil)
@data, @type, @count, @backtrace = data, type, count, backtrace
end
end
# a name for a location
class Label
attr_accessor :name
include Backtrace
def initialize(name, backtrace=nil)
@name, @backtrace = name, backtrace
end
end
# alignment directive
class Align
# the size to align to
attr_accessor :val
# the Data used to pad
attr_accessor :fillwith
include Backtrace
def initialize(val, fillwith=nil, backtrace=nil)
@val, @fillwith, @backtrace = val, fillwith, backtrace
end
end
# padding directive
class Padding
# Data used to pad
attr_accessor :fillwith
include Backtrace
def initialize(fillwith=nil, backtrace=nil)
@fillwith, @backtrace = fillwith, backtrace
end
end
# offset directive
# can be used to fix padding length or to assert some code/data compiled length
class Offset
# the assembler will arrange to make this pseudo-instruction
# be at this offset from beginning of current section
attr_accessor :val
include Backtrace
def initialize(val, backtrace=nil)
@val, @backtrace = val, backtrace
end
end
# the superclass of all real executable formats
# main methods:
# self.decode(str) => decodes the file format (imports/relocs/etc), no asm disassembly
# parse(source) => parses assembler source, fills self.source
# assemble => assembles self.source in binary sections/segments/whatever
# encode => builds imports/relocs tables, put all this together, links everything in self.encoded
class ExeFormat
# array of Data/Instruction/Align/Padding/Offset/Label, populated in parse
attr_accessor :cursource
# contains the binary version of the compiled program (EncodedData)
attr_accessor :encoded
# hash of labels generated by new_label
attr_accessor :unique_labels_cache
# initializes self.cpu, creates an empty self.encoded
def initialize(cpu=nil)
@cpu = cpu
@encoded = EncodedData.new
@unique_labels_cache = {}
end
attr_writer :cpu # custom reader
def cpu
@cpu ||= cpu_from_headers
end
# return the label name corresponding to the specified offset of the encodeddata, creates it if necessary
def label_at(edata, offset, base = '')
if not l = edata.inv_export[offset]
edata.add_export(l = new_label(base), offset)
end
l
end
# creates a new label, that is guaranteed to never be returned again as long as this object (ExeFormat) exists
def new_label(base = '')
base = base.dup.tr('^a-zA-Z0-9_', '_')
# use %x with absolute value to avoid negative number formatting
base = (base << '_uuid' << ('%08x' % base.object_id.abs)).freeze if base.empty? or @unique_labels_cache[base]
@unique_labels_cache[base] = true
base
end
# share self.unique_labels_cache with other, checks for conflicts, returns self
def share_namespace(other)
return self if other.unique_labels_cache.equal? @unique_labels_cache
raise "share_ns #{(other.unique_labels_cache.keys & @unique_labels_cache.keys).inspect}" if !(other.unique_labels_cache.keys & @unique_labels_cache.keys).empty?
@unique_labels_cache.update other.unique_labels_cache
other.unique_labels_cache = @unique_labels_cache
self
end
end
# superclass for classes similar to Expression
# must define #bind, #reduce_rec, #match_rec, #externals
class ExpressionType
def +(o) Expression[self, :+, o].reduce end
def -(o) Expression[self, :-, o].reduce end
end
# handle immediate values, and arbitrary arithmetic/logic expression involving variables
# boolean values are treated as in C : true is 1, false is 0
# TODO replace #type with #size => bits + #type => [:signed/:unsigned/:any/:floating]
# TODO handle floats
class Expression < ExpressionType
INT_SIZE = {}
INT_MIN = {}
INT_MAX = {}
[8, 16, 32, 64].each { |sz|
INT_SIZE["i#{sz}".to_sym] =
INT_SIZE["u#{sz}".to_sym] =
INT_SIZE["a#{sz}".to_sym] = sz
INT_MIN["a#{sz}".to_sym] =
INT_MIN["i#{sz}".to_sym] = -(1 << (sz-1)) # -0x8000
INT_MIN["u#{sz}".to_sym] = 0
INT_MAX["i#{sz}".to_sym] = (1 << (sz-1)) - 1 # 0x7fff
INT_MAX["a#{sz}".to_sym] =
INT_MAX["u#{sz}".to_sym] = (1 << sz) - 1 # 0xffff
}
# alternative constructor
# in operands order, and allows nesting using sub-arrays
# ex: Expression[[:-, 42], :*, [1, :+, [4, :*, 7]]]
# with a single argument, return it if already an Expression, else construct a new one (using unary +/-)
def self.[](l, op=nil, r=nil)
if not r # need to shift args
if not op
raise ArgumentError, 'invalid Expression[nil]' if not l
return l if l.kind_of? Expression
if l.kind_of?(::Numeric) and l < 0
r = -l
op = :'-'
else
r = l
op = :'+'
end
else
r = op
op = l
end
l = nil
else
l = self[*l] if l.kind_of?(::Array)
end
r = self[*r] if r.kind_of?(::Array)
new(op, r, l)
end
# checks if a given Expression/Integer is in the type range
# returns true if it is, false if it overflows, and nil if cannot be determined (eg unresolved variable)
def self.in_range?(val, type)
val = val.reduce if val.kind_of? self
return unless val.kind_of?(::Numeric)
if INT_MIN[type]
val == val.to_i and
val >= INT_MIN[type] and val <= INT_MAX[type]
end
end
# casts an unsigned value to a two-complement signed if the sign bit is set
def self.make_signed(val, bitlength)
case val
when Integer
val = val - (1 << bitlength) if val > 0 and val >> (bitlength - 1) == 1
when Expression
val = Expression[val, :-, [(1<<bitlength), :*, [[val, :>>, (bitlength-1)], :==, 1]]]
end
val
end
# the operator (symbol)
attr_accessor :op
# the lefthandside expression (nil for unary expressions)
attr_accessor :lexpr
# the righthandside expression
attr_accessor :rexpr
# basic constructor
# XXX funny args order, you should use +Expression[]+ instead
def initialize(op, rexpr, lexpr)
raise ArgumentError, "Expression: invalid arg order: #{[lexpr, op, rexpr].inspect}" if not op.kind_of?(::Symbol)
@op = op
@lexpr = lexpr
@rexpr = rexpr
end
# recursive check of equity using #==
# will not match 1+2 and 2+1
def ==(o)
# shortcircuit recursion
o.object_id == object_id or (o.kind_of?(Expression) and @op == o.op and @lexpr == o.lexpr and @rexpr == o.rexpr)
end
# make it useable as Hash key (see +==+)
def hash
(@lexpr.hash + @op.hash + @rexpr.hash) & 0x7fff_ffff
end
alias eql? ==
# returns a new Expression with all variables found in the binding replaced with their value
# does not check the binding's key class except for numeric
# calls lexpr/rexpr #bind if they respond_to? it
def bind(binding = {})
if binding[self]
return binding[self].dup
end
l = @lexpr
r = @rexpr
if l and binding[l]
raise "internal error - bound #{l.inspect}" if l.kind_of?(::Numeric)
l = binding[l]
elsif l.kind_of? ExpressionType
l = l.bind(binding)
end
if r and binding[r]
raise "internal error - bound #{r.inspect}" if r.kind_of?(::Numeric)
r = binding[r]
elsif r.kind_of? ExpressionType
r = r.bind(binding)
end
Expression.new(@op, r, l)
end
# bind in place (replace self.lexpr/self.rexpr with the binding value)
# only recurse with Expressions (does not use respond_to?)
def bind!(binding = {})
if @lexpr.kind_of?(Expression)
@lexpr.bind!(binding)
elsif @lexpr
@lexpr = binding[@lexpr] || @lexpr
end
if @rexpr.kind_of?(Expression)
@rexpr.bind!(binding)
elsif @rexpr
@rexpr = binding[@rexpr] || @rexpr
end
self
end
# reduce_lambda is a callback called after the standard reduction procedure for custom algorithms
# the lambda may return a new expression or nil (to keep the old expr)
# exemple: lambda { |e| e.lexpr if e.kind_of? Expression and e.op == :& and e.rexpr == 0xffff_ffff }
# returns old lambda
def self.reduce_lambda(&b)
old = @@reduce_lambda
@@reduce_lambda = b if block_given?
old
end
def self.reduce_lambda=(p)
@@reduce_lambda = p
end
@@reduce_lambda = nil
# returns a simplified copy of self
# can return an +Expression+ or a +Numeric+, may return self
# see +reduce_rec+ for simplifications description
# if given a block, it will temporarily overwrite the global @@reduce_lambda XXX THIS IS NOT THREADSAFE
def reduce(&b)
old_rp, @@reduce_lambda = @@reduce_lambda, b if b
case e = reduce_rec
when Expression, Numeric; e
else Expression[e]
end
ensure
@@reduce_lambda = old_rp if b
end
# resolves logic operations (true || false, etc)
# computes numeric operations (1 + 3)
# expands substractions to addition of the opposite
# reduces double-oppositions (-(-1) => 1)
# reduces addition of 0 and unary +
# canonicalize additions: put variables in the lhs, descend addition tree in the rhs => (a + (b + (c + 12)))
# make formal reduction if finds somewhere in addition tree (a) and (-a)
def reduce_rec
l = @lexpr.kind_of?(ExpressionType) ? @lexpr.reduce_rec : @lexpr
r = @rexpr.kind_of?(ExpressionType) ? @rexpr.reduce_rec : @rexpr
if @@reduce_lambda
l = @@reduce_lambda[l] || l if not @lexpr.kind_of? Expression
r = @@reduce_lambda[r] || r if not @rexpr.kind_of? Expression
end
v =
if r.kind_of?(::Numeric) and (not l or l.kind_of?(::Numeric))
case @op
when :+; l ? l + r : r
when :-; l ? l - r : -r
when :'!'; raise 'internal error' if l ; (r == 0) ? 1 : 0
when :'~'; raise 'internal error' if l ; ~r
when :'&&', :'||', :'>', :'<', :'>=', :'<=', :'==', :'!='
raise 'internal error' if not l
case @op
when :'&&'; (l != 0) && (r != 0)
when :'||'; (l != 0) || (r != 0)
when :'>' ; l > r
when :'>='; l >= r
when :'<' ; l < r
when :'<='; l <= r
when :'=='; l == r
when :'!='; l != r
end ? 1 : 0
else
l.send(@op, r)
end
elsif rp = @@reduce_op[@op]
rp[self, l, r]
end
ret = case v
when nil
# no dup if no new value
(r == :unknown or l == :unknown) ? :unknown :
((r == @rexpr and l == @lexpr) ? self : Expression.new(@op, r, l))
when Expression
(v.lexpr == :unknown or v.rexpr == :unknown) ? :unknown : v
else v
end
if @@reduce_lambda and ret.kind_of? ExpressionType and newret = @@reduce_lambda[ret] and newret != ret
if newret.kind_of? ExpressionType
ret = newret.reduce_rec
else
ret = newret
end
end
ret
end
@@reduce_op = {
:+ => lambda { |e, l, r| e.reduce_op_plus(l, r) },
:- => lambda { |e, l, r| e.reduce_op_minus(l, r) },
:'&&' => lambda { |e, l, r| e.reduce_op_andand(l, r) },
:'||' => lambda { |e, l, r| e.reduce_op_oror(l, r) },
:>> => lambda { |e, l, r| e.reduce_op_shr(l, r) },
:<< => lambda { |e, l, r| e.reduce_op_shl(l, r) },
:'!' => lambda { |e, l, r| e.reduce_op_not(l, r) },
:== => lambda { |e, l, r| e.reduce_op_eql(l, r) },
:'!=' => lambda { |e, l, r| e.reduce_op_neq(l, r) },
:^ => lambda { |e, l, r| e.reduce_op_xor(l, r) },
:& => lambda { |e, l, r| e.reduce_op_and(l, r) },
:| => lambda { |e, l, r| e.reduce_op_or(l, r) },
:* => lambda { |e, l, r| e.reduce_op_times(l, r) },
:/ => lambda { |e, l, r| e.reduce_op_div(l, r) },
:% => lambda { |e, l, r| e.reduce_op_mod(l, r) },
}
def self.reduce_op
@@reduce_op
end
def reduce_op_plus(l, r)
if not l; r # +x => x
elsif r == 0; l # x+0 => x
elsif l == :unknown or r == :unknown; :unknown
elsif l.kind_of?(::Numeric)
if r.kind_of? Expression and r.op == :+
# 1+(x+y) => x+(y+1)
Expression[r.lexpr, :+, [r.rexpr, :+, l]].reduce_rec
else
# 1+a => a+1
Expression[r, :+, l].reduce_rec
end
# (a+b)+foo => a+(b+foo)
elsif l.kind_of? Expression and l.op == :+; Expression[l.lexpr, :+, [l.rexpr, :+, r]].reduce_rec
elsif l.kind_of? Expression and r.kind_of? Expression and l.op == :% and r.op == :% and l.rexpr.kind_of?(::Integer) and l.rexpr == r.rexpr
Expression[[l.lexpr, :+, r.lexpr], :%, l.rexpr].reduce_rec
elsif l.kind_of? Expression and l.op == :- and not l.lexpr
reduce_rec_add_rec(r, l.rexpr)
elsif l.kind_of? Expression and r.kind_of? Expression and l.op == :& and r.op == :& and l.rexpr.kind_of?(::Integer) and r.rexpr.kind_of?(::Integer) and l.rexpr & r.rexpr == 0
# (a&0xf0)+(b&0x0f) => (a&0xf0)|(b&0x0f)
Expression[l, :|, r].reduce_rec
else
reduce_rec_add_rec(r, Expression.new(:-, l, nil))
end
end
def reduce_rec_add_rec(cur, neg_l)
if neg_l == cur
# -l found
0
elsif cur.kind_of?(Expression) and cur.op == :+
# recurse
if newl = reduce_rec_add_rec(cur.lexpr, neg_l)
Expression[newl, cur.op, cur.rexpr].reduce_rec
elsif newr = reduce_rec_add_rec(cur.rexpr, neg_l)
Expression[cur.lexpr, cur.op, newr].reduce_rec
end
end
end
def reduce_op_minus(l, r)
if l == :unknown or r == :unknown; :unknown
elsif not l and r.kind_of? Expression and (r.op == :- or r.op == :+)
if r.op == :- # no lexpr (reduced)
# -(-x) => x
r.rexpr
else # :+ and lexpr (r is reduced)
# -(a+b) => (-a)+(-b)
Expression.new(:+, Expression.new(:-, r.rexpr, nil), Expression.new(:-, r.lexpr, nil)).reduce_rec
end
elsif l.kind_of? Expression and l.op == :+ and l.lexpr == r
# shortcircuit for a common occurence [citation needed]
# (a+b)-a
l.rexpr
elsif l
# a-b => a+(-b)
Expression[l, :+, [:-, r]].reduce_rec
end
end
def reduce_op_andand(l, r)
if l == 0 # shortcircuit eval
0
elsif l == 1
Expression[r, :'!=', 0].reduce_rec
elsif r == 0
0 # XXX l could be a special ExprType with sideeffects ?
end
end
def reduce_op_oror(l, r)
if l.kind_of?(::Numeric) and l != 0 # shortcircuit eval
1
elsif l == 0
Expression[r, :'!=', 0].reduce_rec
elsif r == 0
Expression[l, :'!=', 0].reduce_rec
end
end
def reduce_op_shr(l, r)
if l == 0; 0
elsif r == 0; l
elsif l.kind_of? Expression and l.op == :>>
Expression[l.lexpr, :>>, [l.rexpr, :+, r]].reduce_rec
elsif r.kind_of? Integer and l.kind_of? Expression and [:&, :|, :^].include? l.op
# (a | b) << i => (a<<i | b<<i)
Expression[[l.lexpr, :>>, r], l.op, [l.rexpr, :>>, r]].reduce_rec
end
end
def reduce_op_shl(l, r)
if l == 0; 0
elsif r == 0; l
elsif l.kind_of? Expression and l.op == :<<
Expression[l.lexpr, :<<, [l.rexpr, :+, r]].reduce_rec
elsif l.kind_of? Expression and l.op == :>> and r.kind_of? Integer and l.rexpr.kind_of? Integer
# (a >> 1) << 1 == a & 0xfffffe
if r == l.rexpr
Expression[l.lexpr, :&, (-1 << r)].reduce_rec
elsif r > l.rexpr
Expression[[l.lexpr, :<<, r-l.rexpr], :&, (-1 << r)].reduce_rec
else
Expression[[l.lexpr, :>>, l.rexpr-r], :&, (-1 << r)].reduce_rec
end
elsif r.kind_of? Integer and l.kind_of? Expression and [:&, :|, :^].include? l.op
# (a | b) << i => (a<<i | b<<i)
Expression[[l.lexpr, :<<, r], l.op, [l.rexpr, :<<, r]].reduce_rec
end
end
NEG_OP = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}
def reduce_op_not(l, r)
if r.kind_of? Expression and nop = NEG_OP[r.op]
Expression[r.lexpr, nop, r.rexpr].reduce_rec
end
end
def reduce_op_eql(l, r)
if l == r; 1
elsif r == 0 and l.kind_of? Expression and nop = NEG_OP[l.op]
Expression[l.lexpr, nop, l.rexpr].reduce_rec
elsif r == 1 and l.kind_of? Expression and NEG_OP[l.op]
l
elsif r == 0 and l.kind_of? Expression and l.op == :+
if l.rexpr.kind_of? Expression and l.rexpr.op == :- and not l.rexpr.lexpr
Expression[l.lexpr, :==, l.rexpr.rexpr].reduce_rec
elsif l.rexpr.kind_of?(::Integer)
Expression[l.lexpr, :==, -l.rexpr].reduce_rec
end
end
end
def reduce_op_neq(l, r)
if l == r; 0
end
end
def reduce_op_xor(l, r)
if l == :unknown or r == :unknown; :unknown
elsif l == 0; r
elsif r == 0; l
elsif l == r; 0
elsif r == 1 and l.kind_of? Expression and NEG_OP[l.op]
Expression[nil, :'!', l].reduce_rec
elsif l.kind_of?(::Numeric)
if r.kind_of? Expression and r.op == :^
# 1^(x^y) => x^(y^1)
Expression[r.lexpr, :^, [r.rexpr, :^, l]].reduce_rec
else
# 1^a => a^1
Expression[r, :^, l].reduce_rec
end
elsif l.kind_of? Expression and l.op == :^
# (a^b)^c => a^(b^c)
Expression[l.lexpr, :^, [l.rexpr, :^, r]].reduce_rec
elsif r.kind_of? Expression and r.op == :^
if r.rexpr == l
# a^(a^b) => b
r.lexpr
elsif r.lexpr == l
# a^(b^a) => b
r.rexpr
else
# a^(b^(c^(a^d))) => b^(a^(c^(a^d)))
# XXX ugly..
tr = r
found = false
while not found and tr.kind_of?(Expression) and tr.op == :^
found = true if tr.lexpr == l or tr.rexpr == l
tr = tr.rexpr
end
if found
Expression[r.lexpr, :^, [l, :^, r.rexpr]].reduce_rec
end
end
elsif l.kind_of?(Expression) and l.op == :& and l.rexpr.kind_of?(::Integer) and (l.rexpr & (l.rexpr+1)) == 0
if r.kind_of?(::Integer) and r & l.rexpr == r
# (a&0xfff)^12 => (a^12)&0xfff
Expression[[l.lexpr, :^, r], :&, l.rexpr].reduce_rec
elsif r.kind_of?(Expression) and r.op == :& and r.rexpr.kind_of?(::Integer) and r.rexpr == l.rexpr
# (a&0xfff)^(b&0xfff) => (a^b)&0xfff
Expression[[l.lexpr, :^, r.lexpr], :&, l.rexpr].reduce_rec
end
end
end
def reduce_op_and(l, r)
if l == 0 or r == 0; 0
elsif r == 1 and l.kind_of?(Expression) and [:'==', :'!=', :<, :>, :<=, :>=].include?(l.op)
l
elsif l == r; l
elsif l.kind_of?(Integer); Expression[r, :&, l].reduce_rec
elsif l.kind_of?(Expression) and l.op == :&; Expression[l.lexpr, :&, [l.rexpr, :&, r]].reduce_rec
elsif l.kind_of?(Expression) and [:|, :^].include?(l.op) and r.kind_of?(Integer) and (l.op == :| or (r & (r+1)) != 0)
# (a ^| b) & i => (a&i ^| b&i)
Expression[[l.lexpr, :&, r], l.op, [l.rexpr, :&, r]].reduce_rec
elsif r.kind_of?(::Integer) and l.kind_of?(Expression) and (r & (r+1)) == 0
# foo & 0xffff
case l.op
when :+, :^
if l.lexpr.kind_of?(Expression) and l.lexpr.op == :& and
l.lexpr.rexpr.kind_of?(::Integer) and l.lexpr.rexpr & r == r
# ((a&m) + b) & m => (a+b) & m
Expression[[l.lexpr.lexpr, l.op, l.rexpr], :&, r].reduce_rec
elsif l.rexpr.kind_of?(Expression) and l.rexpr.op == :& and
l.rexpr.rexpr.kind_of?(::Integer) and l.rexpr.rexpr & r == r
# (a + (b&m)) & m => (a+b) & m
Expression[[l.lexpr, l.op, l.rexpr.lexpr], :&, r].reduce_rec
else
Expression[l, :&, r]
end
when :|
# rol/ror composition
reduce_rec_composerol l, r
else
Expression[l, :&, r]
end
end
end
# a check to see if an Expr is the composition of two rotations (rol eax, 4 ; rol eax, 6 => rol eax, 10)
# this is a bit too ugly to stay in the main reduce_rec body.
def reduce_rec_composerol(e, mask)
m = Expression[['var', :sh_op, 'amt'], :|, ['var', :inv_sh_op, 'inv_amt']]
if vars = e.match(m, 'var', :sh_op, 'amt', :inv_sh_op, 'inv_amt') and vars[:sh_op] == {:>> => :<<, :<< => :>>}[vars[:inv_sh_op]] and
((vars['amt'].kind_of?(::Integer) and vars['inv_amt'].kind_of?(::Integer) and ampl = vars['amt'] + vars['inv_amt']) or
(vars['amt'].kind_of? Expression and vars['amt'].op == :% and vars['amt'].rexpr.kind_of?(::Integer) and
vars['inv_amt'].kind_of? Expression and vars['inv_amt'].op == :% and vars['amt'].rexpr == vars['inv_amt'].rexpr and ampl = vars['amt'].rexpr)) and
mask == (1<<ampl)-1 and vars['var'].kind_of? Expression and # it's a rotation
vars['var'].op == :& and vars['var'].rexpr == mask and
ivars = vars['var'].lexpr.match(m, 'var', :sh_op, 'amt', :inv_sh_op, 'inv_amt') and ivars[:sh_op] == {:>> => :<<, :<< => :>>}[ivars[:inv_sh_op]] and
((ivars['amt'].kind_of?(::Integer) and ivars['inv_amt'].kind_of?(::Integer) and ampl = ivars['amt'] + ivars['inv_amt']) or
(ivars['amt'].kind_of? Expression and ivars['amt'].op == :% and ivars['amt'].rexpr.kind_of?(::Integer) and
ivars['inv_amt'].kind_of? Expression and ivars['inv_amt'].op == :% and ivars['amt'].rexpr == ivars['inv_amt'].rexpr and ampl = ivars['amt'].rexpr))
if ivars[:sh_op] != vars[:sh_op]
# ensure the rotations are the same orientation
ivars[:sh_op], ivars[:inv_sh_op] = ivars[:inv_sh_op], ivars[:sh_op]
ivars['amt'], ivars['inv_amt'] = ivars['inv_amt'], ivars['amt']
end
amt = Expression[[vars['amt'], :+, ivars['amt']], :%, ampl]
invamt = Expression[[vars['inv_amt'], :+, ivars['inv_amt']], :%, ampl]
Expression[[[[ivars['var'], :&, mask], vars[:sh_op], amt], :|, [[ivars['var'], :&, mask], vars[:inv_sh_op], invamt]], :&, mask].reduce_rec
else
Expression[e, :&, mask]
end
end
def reduce_op_or(l, r)
if l == 0; r
elsif r == 0; l
elsif l == -1 or r == -1; -1
elsif l == r; l
elsif l.kind_of? Integer; Expression[r, :|, l].reduce_rec
elsif l.kind_of? Expression and l.op == :|
# (a|b)|c => a|(b|c)
Expression[l.lexpr, :|, [l.rexpr, :|, r]].reduce_rec
elsif l.kind_of? Expression and l.op == :& and r.kind_of? Expression and r.op == :& and l.lexpr == r.lexpr
# (a&b)|(a&c) => a&(b|c)
Expression[l.lexpr, :&, [l.rexpr, :|, r.rexpr]].reduce_rec
end
end
def reduce_op_times(l, r)
if l == 0 or r == 0; 0
elsif l == 1; r
elsif r == 1; l
elsif r.kind_of? Integer; Expression[r, :*, l].reduce_rec
elsif r.kind_of? Expression and r.op == :*; Expression[[l, :*, r.lexpr], :*, r.rexpr].reduce_rec
elsif l.kind_of? Integer and r.kind_of? Expression and r.op == :* and r.lexpr.kind_of? Integer; Expression[l*r.lexpr, :*, r.rexpr].reduce_rec # XXX need & regsize..
elsif l.kind_of? Integer and r.kind_of? Expression and r.op == :+ and r.rexpr.kind_of? Integer; Expression[[l, :*, r.lexpr], :+, l*r.rexpr].reduce_rec
end
end
def reduce_op_div(l, r)
if r == 0
elsif r.kind_of? Integer and l.kind_of? Expression and l.op == :+ and l.rexpr.kind_of? Integer and l.rexpr % r == 0
Expression[[l.lexpr, :/, r], :+, l.rexpr/r].reduce_rec
elsif r.kind_of? Integer and l.kind_of? Expression and l.op == :* and l.lexpr % r == 0
Expression[l.lexpr/r, :*, l.rexpr].reduce_rec
end
end
def reduce_op_mod(l, r)
if r.kind_of?(Integer) and r != 0 and (r & (r-1) == 0)
Expression[l, :&, r-1].reduce_rec
end
end
# a pattern-matching method
# Expression[42, :+, 28].match(Expression['any', :+, 28], 'any') => {'any' => 42}
# Expression[42, :+, 28].match(Expression['any', :+, 'any'], 'any') => false
# Expression[42, :+, 42].match(Expression['any', :+, 'any'], 'any') => {'any' => 42}
# vars can match anything except nil
def match(target, *vars)
match_rec(target, vars.inject({}) { |h, v| h.update v => nil })
end
def match_rec(target, vars)
return false if not target.kind_of? Expression
[target.lexpr, target.op, target.rexpr].zip([@lexpr, @op, @rexpr]) { |targ, exp|
if targ and vars[targ]
return false if exp != vars[targ]
elsif targ and vars.has_key? targ
return false if not vars[targ] = exp
elsif targ.kind_of? ExpressionType
return false if not exp.kind_of? ExpressionType or not exp.match_rec(targ, vars)
else
return false if targ != exp
end
}
vars
end
# returns the array of non-numeric members of the expression
# if a variables appears 3 times, it will be present 3 times in the returned array
def externals
a = []
[@rexpr, @lexpr].each { |e|
case e
when ExpressionType; a.concat e.externals
when nil, ::Numeric; a
else a << e
end
}
a
end
# returns the externals that appears in the expression, does not walk through other ExpressionType
def expr_externals(include_exprs=false)
a = []
[@rexpr, @lexpr].each { |e|
case e
when Expression; a.concat e.expr_externals(include_exprs)
when nil, ::Numeric; a
when ExpressionType; include_exprs ? a << e : a
else a << e
end
}
a
end
def inspect
"Expression[#{@lexpr.inspect.sub(/^Expression/, '') + ', ' if @lexpr}#{@op.inspect + ', ' if @lexpr or @op != :+}#{@rexpr.inspect.sub(/^Expression/, '')}]"
end
Unknown = self[:unknown]
end
# An Expression with a custom string representation
# used to show #define constants, struct offsets, func local vars, etc
class ExpressionString < ExpressionType
attr_accessor :expr, :str, :type, :hide_str
def reduce; expr.reduce; end
def reduce_rec; expr.reduce_rec; end
def bind(*a); expr.bind(*a); end
def externals; expr.externals; end
def expr_externals; expr.expr_externals; end
def match_rec(*a); expr.match_rec(*a); end
def initialize(expr, str, type=nil)
@expr = Expression[expr]
@str = str
@type = type
end
def render_str ; [str] ; end
def inspect ; "ExpressionString.new(#{@expr.inspect}, #{str.inspect}, #{type.inspect})" ; end
end
# an EncodedData relocation, specifies a value to patch in
class Relocation
# the relocation value (an Expression)
attr_accessor :target
# the relocation expression type
attr_accessor :type
# the endianness of the relocation
attr_accessor :endianness
include Backtrace
def initialize(target, type, endianness, backtrace = nil)
raise ArgumentError, "bad args #{[target, type, endianness].inspect}" if not target.kind_of? Expression or not type.kind_of?(::Symbol) or not endianness.kind_of?(::Symbol)
@target, @type, @endianness, @backtrace = target, type, endianness, backtrace
end
# fixup the encodeddata with value (reloc starts at off)
def fixup(edata, off, value)
str = Expression.encode_imm(value, @type, @endianness, @backtrace)
edata.fill off
edata.data[off, str.length] = str
end
# size of the relocation field, in bytes
def length
Expression::INT_SIZE[@type]/8
end
end
# a String-like, with export/relocation informations added
class EncodedData
# string with raw data
attr_accessor :data
# hash, key = offset within data, value = +Relocation+
attr_accessor :reloc
# hash, key = export name, value = offset within data - use add_export to update
attr_accessor :export
# hash, key = offset, value = 1st export name
attr_accessor :inv_export
# virtual size of data (all 0 by default, see +fill+)
attr_accessor :virtsize
# arbitrary pointer, often used when decoding immediates
# may be initialized with an export value
attr_reader :ptr # custom writer
def ptr=(p) @ptr = @export[p] || p end
# opts' keys in :reloc, :export, :virtsize, defaults to empty/empty/data.length
def initialize(data='', opts={})
if data.respond_to?(:force_encoding) and data.encoding.name != 'ASCII-8BIT' and data.length > 0
puts "Forcing edata.data.encoding = BINARY at", caller if $DEBUG
data = data.dup.force_encoding('binary')
end
@data = data
@reloc = opts[:reloc] || {}
@export = opts[:export] || {}
@inv_export = @export.invert
@virtsize = opts[:virtsize] || @data.length
@ptr = 0
end
def add_export(label, off=@ptr, set_inv=false)
@export[label] = off
if set_inv or not @inv_export[off]
@inv_export[off] = label
end
label
end
def del_export(label, off=@export[label])
@export.delete label
if e = @export.index(off)
@inv_export[off] = e
else
@inv_export.delete off
end
end
# returns the size of raw data, that is [data.length, last relocation end].max
def rawsize
[@data.length, *@reloc.map { |off, rel| off + rel.length } ].max
end
# String-like
alias length virtsize
# String-like
alias size virtsize
def empty?
@virtsize == 0
end
def eos?
ptr.to_i >= @virtsize
end
# returns a copy of itself, with reloc/export duped (but not deep)
def dup
self.class.new @data.dup, :reloc => @reloc.dup, :export => @export.dup, :virtsize => @virtsize
end
# resolve relocations:
# calculate each reloc target using Expression#bind(binding)
# if numeric, replace the raw data with the encoding of this value (+fill+s preceding data if needed) and remove the reloc
# if replace_target is true, the reloc target is replaced with its bound counterpart
def fixup_choice(binding, replace_target)
return if binding.empty?
@reloc.keys.each { |off|
val = @reloc[off].target.bind(binding).reduce
if val.kind_of? Integer
reloc = @reloc[off]
reloc.fixup(self, off, val)
@reloc.delete(off) # delete only if not overflowed
elsif replace_target
@reloc[off].target = val
end
}
end
# +fixup_choice+ binding, false
def fixup(binding)
fixup_choice(binding, false)
end
# +fixup_choice+ binding, true
def fixup!(binding)
fixup_choice(binding, true)
end
# returns a default binding suitable for use in +fixup+
# every export is expressed as base + offset
# base defaults to the first export name + its offset
def binding(base = nil)
if not base
key = @export.index(@export.values.min)
return {} if not key
base = (@export[key] == 0 ? key : Expression[key, :-, @export[key]])
end
binding = {}
@export.each { |n, o| binding.update n => Expression.new(:+, o, base) }
binding
end
# returns an array of variables that needs to be defined for a complete #fixup
# ie the list of externals for all relocations
def reloc_externals(interns = @export.keys)
@reloc.values.map { |r| r.target.externals }.flatten.uniq - interns
end
# returns the offset where the relocation for target t is to be applied
def offset_of_reloc(t)
t = Expression[t]
@reloc.keys.find { |off| @reloc[off].target == t }
end
# fill virtual space by repeating pattern (String) up to len
# expand self if len is larger than self.virtsize
def fill(len = @virtsize, pattern = [0].pack('C'))
@virtsize = len if len > @virtsize
@data = @data.to_str.ljust(len, pattern) if len > @data.length
end
# rounds up virtsize to next multiple of len
def align(len, pattern=nil)
@virtsize = EncodedData.align_size(@virtsize, len)
fill(@virtsize, pattern) if pattern
end
# returns the value val rounded up to next multiple of len
def self.align_size(val, len)
return val if len == 0
((val + len - 1) / len).to_i * len
end
# concatenation of another +EncodedData+ (or nil/Fixnum/anything supporting String#<<)
def <<(other)
case other
when nil
when ::Fixnum
fill
@data = @data.to_str if not @data.kind_of? String
@data << other
@virtsize += 1
when EncodedData
fill if not other.data.empty?
other.reloc.each { |k, v| @reloc[k + @virtsize] = v } if not other.reloc.empty?
if not other.export.empty?
other.export.each { |k, v|
if @export[k] and @export[k] != v + @virtsize
cf = (other.export.keys & @export.keys).find_all { |k_| other.export[k_] != @export[k_] - @virtsize }
raise "edata merge: label conflict #{cf.inspect}"
end
@export[k] = v + @virtsize
}
other.inv_export.each { |k, v| @inv_export[@virtsize + k] = v }
end
if @data.empty?; @data = other.data.dup
elsif not @data.kind_of?(String); @data = @data.to_str << other.data
else @data << other.data
end
@virtsize += other.virtsize
else
fill
if other.respond_to?(:force_encoding) and other.encoding.name != 'ASCII-8BIT'
puts "Forcing edata.data.encoding = BINARY at", caller if $DEBUG
other = other.dup.force_encoding('binary')
end
if @data.empty?; @data = other.dup
elsif not @data.kind_of?(String); @data = @data.to_str << other
else @data << other
end
@virtsize += other.length
end
self
end
# equivalent to dup << other, filters out Integers & nil
def +(other)
raise ArgumentError if not other or other.kind_of?(Integer)
dup << other
end
# slice
def [](from, len=nil)
if not len and from.kind_of? Range
b = from.begin
e = from.end
b = @export[b] if @export[b]
e = @export[e] if @export[e]
b = b + @virtsize if b < 0
e = e + @virtsize if e < 0
len = e - b
len += 1 if not from.exclude_end?
from = b
end
from = @export[from] if @export[from]
from = from + @virtsize if from < 0
return if from > @virtsize or from < 0
return @data[from] if not len
len = @virtsize - from if from+len > @virtsize
ret = EncodedData.new @data[from, len]
ret.virtsize = len
@reloc.each { |o, r|
ret.reloc[o - from] = r if o >= from and o + r.length <= from+len
}
@export.each { |e_, o|
ret.export[e_] = o - from if o >= from and o <= from+len # XXX include end ?
}
@inv_export.each { |o, e_|
ret.inv_export[o-from] = e_ if o >= from and o <= from+len
}
ret
end
# slice replacement, supports size change (shifts following relocs/exports)
# discards old exports/relocs from the overwritten space
def []=(from, len, val=nil)
if not val
val = len
len = nil
end
if not len and from.kind_of?(::Range)
b = from.begin
e = from.end
b = @export[b] if @export[b]
e = @export[e] if @export[e]
b = b + @virtsize if b < 0
e = e + @virtsize if e < 0
len = e - b
len += 1 if not from.exclude_end?
from = b
end
from = @export[from] || from
raise "invalid offset #{from}" if not from.kind_of?(::Integer)
from = from + @virtsize if from < 0
if not len
val = val.chr if val.kind_of?(::Integer)
len = val.length
end
raise "invalid slice length #{len}" if not len.kind_of?(::Integer) or len < 0
if from >= @virtsize
len = 0
elsif from+len > @virtsize
len = @virtsize-from
end
val = EncodedData.new << val
# remove overwritten metadata
@export.delete_if { |name, off| off > from and off < from + len }
@reloc.delete_if { |off, rel| off - rel.length > from and off < from + len }
# shrink/grow
if val.length != len
diff = val.length - len
@export.keys.each { |name| @export[name] = @export[name] + diff if @export[name] > from }
@inv_export.keys.each { |off| @inv_export[off+diff] = @inv_export.delete(off) if off > from }
@reloc.keys.each { |off| @reloc[off + diff] = @reloc.delete(off) if off > from }
if @virtsize >= from+len
@virtsize += diff
end
end
@virtsize = from + val.length if @virtsize < from + val.length
if from + len < @data.length # patch real data
val.fill
@data[from, len] = val.data
elsif not val.data.empty? # patch end of real data
@data << ([0].pack('C')*(from-@data.length)) if @data.length < from
@data[from..-1] = val.data
else # patch end of real data with fully virtual
@data = @data[0, from]
end
val.export.each { |name, off| @export[name] = from + off }
val.inv_export.each { |off, name| @inv_export[from+off] = name }
val.reloc.each { |off, rel| @reloc[from + off] = rel }
end
# replace a portion of self
# from/to may be Integers (offsets) or labels (from self.export)
# content is a String or an EncodedData, which will be inserted in the specified location (padded if necessary)
# raise if the string does not fit in.
def patch(from, to, content)
from = @export[from] || from
raise "invalid offset specification #{from}" if not from.kind_of? Integer
to = @export[to] || to
raise "invalid offset specification #{to}" if not to.kind_of? Integer
raise EncodeError, 'cannot patch data: new content too long' if to - from < content.length
self[from, content.length] = content
end
# returns a list of offsets where /pat/ can be found inside @data
# scan is done per chunk of chunksz bytes, with a margin for chunk-overlapping patterns
# yields each offset found, and only include it in the result if the block returns !false
def pattern_scan(pat, chunksz=nil, margin=nil)
chunksz ||= 4*1024*1024 # scan 4MB at a time
margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries
pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of?(::String)
found = []
chunkoff = 0
while chunkoff < @data.length
chunk = @data[chunkoff, chunksz+margin].to_str
off = 0
while match = chunk[off..-1].match(pat)
off += match.pre_match.length
m_l = match[0].length
break if off >= chunksz # match fully in margin
match_addr = chunkoff + off
found << match_addr if not block_given? or yield(match_addr)
off += m_l
end
chunkoff += chunksz
end
found
end
end
end