564 lines
15 KiB
Ruby
564 lines
15 KiB
Ruby
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
#
|
|
# Licence is LGPL, see LICENCE in the top-level directory
|
|
|
|
# This sample hacks in the ruby interpreter to allow dynamic loading of shellcodes as object methods
|
|
# Also it allows raw modifications to the ruby interpreter memory, for all kind of purposes
|
|
# Includes methods to dump the ruby parser AST from the interpreter memory
|
|
# elf/linux/x86 only
|
|
|
|
require 'metasm'
|
|
|
|
|
|
module Metasm
|
|
module RubyHack
|
|
CACHEDIR = File.expand_path('~/.metasm/jit_cache/')
|
|
# basic C defs for ruby internals - 1.8 only !
|
|
RUBY_H = <<EOS
|
|
typedef unsigned long VALUE;
|
|
|
|
struct st_table;
|
|
|
|
struct klass {
|
|
long flags;
|
|
VALUE klass;
|
|
struct st_table *iv_tbl;
|
|
struct st_table *m_tbl;
|
|
VALUE super;
|
|
};
|
|
#define RClass(x) ((struct klass *)(x))
|
|
#define RModule RClass
|
|
|
|
struct string {
|
|
long flags;
|
|
VALUE klass;
|
|
long len;
|
|
char *ptr;
|
|
union {
|
|
long capa;
|
|
VALUE shared;
|
|
} aux;
|
|
};
|
|
#define RString(x) ((struct string *)(x))
|
|
|
|
struct node {
|
|
long flags;
|
|
char *file;
|
|
long a1;
|
|
long a2;
|
|
long a3;
|
|
};
|
|
#define FL_USHIFT 11
|
|
#define nd_type(n) ((((struct node*)n)->flags >> FL_USHIFT) & 0xff)
|
|
|
|
extern VALUE rb_cObject;
|
|
extern VALUE rb_eRuntimeError;
|
|
#define Qfalse ((VALUE)0)
|
|
#define Qtrue ((VALUE)2)
|
|
#define Qnil ((VALUE)4)
|
|
#define FIX2LONG(x) (((long)x) >> 1)
|
|
|
|
VALUE rb_uint2inum(unsigned long);
|
|
unsigned long rb_num2ulong(VALUE);
|
|
|
|
VALUE rb_str_new(const char* ptr, long len); // alloc + memcpy + 0term
|
|
|
|
int rb_intern(char *);
|
|
VALUE rb_funcall(VALUE recv, int id, int nargs, ...);
|
|
VALUE rb_const_get(VALUE, int);
|
|
VALUE rb_raise(VALUE, char*);
|
|
void rb_define_method(VALUE, char *, VALUE (*)(), int);
|
|
void rb_define_singleton_method(VALUE, char *, VALUE (*)(), int);
|
|
int rb_to_id(VALUE);
|
|
struct node* rb_method_node(VALUE klass, int id);
|
|
VALUE rb_str_new(char*, int);
|
|
|
|
|
|
// TODO setup those vars auto or define a standard .import/.export (elf/pe/macho)
|
|
#ifdef METASM_TARGET_ELF
|
|
asm .global "rb_cObject" undef type=NOTYPE; // TODO fix elf encoder to not need this
|
|
asm .global "rb_eRuntimeError" undef type=NOTYPE;
|
|
#endif
|
|
EOS
|
|
NODETYPE = [
|
|
:method, :fbody, :cfunc, :scope, :block,
|
|
:if, :case, :when, :opt_n, :while,
|
|
:until, :iter, :for, :break, :next,
|
|
:redo, :retry, :begin, :rescue, :resbody,
|
|
:ensure, :and, :or, :not, :masgn,
|
|
:lasgn, :dasgn, :dasgn_curr, :gasgn, :iasgn,
|
|
:cdecl, :cvasgn, :cvdecl, :op_asgn1, :op_asgn2,
|
|
:op_asgn_and, :op_asgn_or, :call, :fcall, :vcall,
|
|
:super, :zsuper, :array, :zarray, :hash,
|
|
:return, :yield, :lvar, :dvar, :gvar, # 50
|
|
:ivar, :const, :cvar, :nth_ref, :back_ref,
|
|
:match, :match2, :match3, :lit, :str,
|
|
:dstr, :xstr, :dxstr, :evstr, :dregx,
|
|
:dregx_once, :args, :argscat, :argspush, :splat,
|
|
:to_ary, :svalue, :block_arg, :block_pass, :defn,
|
|
:defs, :alias, :valias, :undef, :class,
|
|
:module, :sclass, :colon2, :colon3, :cref,
|
|
:dot2, :dot3, :flip2, :flip3, :attrset,
|
|
:self, :nil, :true, :false, :defined,
|
|
:newline, :postexe, :alloca, :dmethod, :bmethod, # 100
|
|
:memo, :ifunc, :dsym, :attrasgn, :last
|
|
]
|
|
|
|
|
|
# create and load a ruby module that allows
|
|
# to use a ruby string as the binary code implementing a ruby method
|
|
# enable the use of .load_binary_method(class, methodname, string)
|
|
def self.load_bootstrap
|
|
c_source = <<EOS
|
|
#define METASM_TARGET_ELF
|
|
|
|
#{RUBY_H}
|
|
|
|
void mprotect(int, int, int);
|
|
asm .global mprotect undef;
|
|
|
|
static VALUE set_class_method_raw(VALUE self, VALUE klass, VALUE methname, VALUE rawcode, VALUE nparams)
|
|
{
|
|
int raw = (int)RString(rawcode)->ptr;
|
|
mprotect(raw & 0xfffff000, ((raw+RString(rawcode)->len+0xfff) & 0xfffff000) - (raw&0xfffff000), 7); // RWX
|
|
rb_define_method(klass, RString(methname)->ptr, RString(rawcode)->ptr, FIX2LONG(nparams));
|
|
return Qtrue;
|
|
}
|
|
|
|
static VALUE memory_read(VALUE self, VALUE addr, VALUE len)
|
|
{
|
|
return rb_str_new((char*)rb_num2ulong(addr), (int)rb_num2ulong(len));
|
|
}
|
|
|
|
static VALUE memory_write(VALUE self, VALUE addr, VALUE val)
|
|
{
|
|
char *src = RString(val)->ptr;
|
|
char *dst = (char*)rb_num2ulong(addr);
|
|
int len = RString(val)->len;
|
|
while (len--)
|
|
*dst++ = *src++;
|
|
return val;
|
|
}
|
|
|
|
static VALUE memory_read_int(VALUE self, VALUE addr)
|
|
{
|
|
return rb_uint2inum(*(unsigned long*)rb_num2ulong(addr));
|
|
}
|
|
|
|
static VALUE memory_write_int(VALUE self, VALUE addr, VALUE val)
|
|
{
|
|
*(unsigned long*)rb_num2ulong(addr) = rb_num2ulong(val);
|
|
return val;
|
|
}
|
|
|
|
extern void *dlsym(int handle, char *symname);
|
|
#define RTLD_DEFAULT 0
|
|
asm .global dlsym undef;
|
|
|
|
static VALUE dl_dlsym(VALUE self, VALUE symname)
|
|
{
|
|
return rb_uint2inum((unsigned)dlsym(RTLD_DEFAULT, RString(symname)->ptr));
|
|
}
|
|
|
|
static VALUE get_method_node_ptr(VALUE self, VALUE klass, VALUE id)
|
|
{
|
|
return rb_uint2inum((unsigned)rb_method_node(klass, rb_to_id(id)));
|
|
}
|
|
|
|
static VALUE id2ref(VALUE self, VALUE id)
|
|
{
|
|
return rb_num2ulong(id);
|
|
}
|
|
|
|
int Init_metasm_binload(void)
|
|
{
|
|
VALUE metasm = rb_const_get(rb_cObject, rb_intern("Metasm"));
|
|
VALUE rubyhack = rb_const_get(metasm, rb_intern("RubyHack"));
|
|
rb_define_singleton_method(rubyhack, "set_class_method_raw", set_class_method_raw, 4);
|
|
rb_define_singleton_method(rubyhack, "memory_read", memory_read, 2);
|
|
rb_define_singleton_method(rubyhack, "memory_write", memory_write, 2);
|
|
rb_define_singleton_method(rubyhack, "memory_read_int", memory_read_int, 1);
|
|
rb_define_singleton_method(rubyhack, "memory_write_int", memory_write_int, 2);
|
|
rb_define_singleton_method(rubyhack, "get_method_node_ptr", get_method_node_ptr, 2);
|
|
rb_define_singleton_method(rubyhack, "dlsym", dl_dlsym, 1);
|
|
rb_define_singleton_method(rubyhack, "id2ref", id2ref, 1);
|
|
return 0;
|
|
}
|
|
asm .global Init_metasm_binload;
|
|
|
|
asm .soname "metasm_binload";
|
|
asm .nointerp;
|
|
asm .pt_gnu_stack rw;
|
|
EOS
|
|
|
|
`mkdir -p #{CACHEDIR}` if not File.directory? CACHEDIR
|
|
stat = File.stat(__FILE__) # may be relative, do it before chdir
|
|
Dir.chdir(CACHEDIR) {
|
|
if not File.exist? 'metasm_binload.so' or File.stat('metasm_binload.so').mtime < stat.mtime
|
|
compile_c(c_source, ELF).encode_file('metasm_binload.so')
|
|
end
|
|
require 'metasm_binload'
|
|
}
|
|
# TODO Windows support
|
|
# TODO PaX support (write + mmap, in user-configurable dir?)
|
|
end
|
|
|
|
def self.cpu
|
|
# TODO check runtime environment etc
|
|
@cpu ||= Ia32.new
|
|
end
|
|
|
|
def self.compile_c(c_src, exeformat=Shellcode)
|
|
exeformat.compile_c(cpu, c_src)
|
|
end
|
|
|
|
load_bootstrap
|
|
|
|
# sets up rawopcodes as the method implementation for class klass
|
|
# rawopcodes must implement the expected ABI or things will break horribly
|
|
# this method is VERY UNSAFE, and breaks everything put in place by the ruby interpreter
|
|
# use with EXTREME CAUTION
|
|
# nargs arglist
|
|
# -2 self, arg_ary
|
|
# -1 argc, VALUE*argv, self
|
|
# >=0 self, arg0, arg1..
|
|
def self.set_method_binary(klass, methodname, raw, nargs=-2)
|
|
if raw.kind_of? EncodedData
|
|
baseaddr = memory_read_int((raw.data.object_id << 1) + 12)
|
|
bd = raw.binding(baseaddr)
|
|
raw.reloc_externals.uniq.each { |ext| bd[ext] = dlsym(ext) or raise "unknown symbol #{ext}" }
|
|
raw.fixup(bd)
|
|
raw = raw.data
|
|
end
|
|
(@@prevent_gc ||= {})[[klass, methodname]] = raw
|
|
set_class_method_raw(klass, methodname.to_s, raw, nargs)
|
|
end
|
|
|
|
# same as load_binary_method but with an object and not a class
|
|
def self.set_object_method_binary(obj, *a)
|
|
set_method_binary((class << obj ; self ; end), *a)
|
|
end
|
|
|
|
def self.object_pointer(obj)
|
|
(obj.object_id << 1) & 0xffffffff
|
|
end
|
|
|
|
def self.read_node(ptr, cur=nil)
|
|
return if ptr == 0
|
|
|
|
|
|
type = NODETYPE[(memory_read_int(ptr) >> 11) & 0xff]
|
|
v1 = memory_read_int(ptr+8)
|
|
v2 = memory_read_int(ptr+12)
|
|
v3 = memory_read_int(ptr+16)
|
|
|
|
case type
|
|
when :block, :array, :hash
|
|
cur = nil if cur and cur[0] != type
|
|
cur ||= [type]
|
|
cur << read_node(v1)
|
|
n = read_node(v3, cur)
|
|
raise "block->next = #{n.inspect}" if n and n[0] != type
|
|
cur
|
|
when :newline
|
|
read_node(v3) # debug/trace usage only
|
|
when :if
|
|
[type, read_node(v1), read_node(v2), read_node(v3)]
|
|
when :cfunc
|
|
[type, {:fptr => v1, # c func pointer
|
|
:arity => v2}]
|
|
when :scope
|
|
[type, {:localnr => memory_read_int(v1), # nr of local vars (+2 for $_/$~)
|
|
:cref => v2}, # node, starting point for const resolution
|
|
read_node(v3)]
|
|
when :call, :fcall, :vcall
|
|
# TODO check fcall/vcall
|
|
ret = [type, read_node(v1), v2.id2name]
|
|
if args = read_node(v3)
|
|
raise "#{ret.inspect} with args != array: #{args.inspect}" if args[0] != :array
|
|
ret.concat args[1..-1]
|
|
end
|
|
ret
|
|
when :zarray
|
|
[:array, []]
|
|
when :lasgn
|
|
[type, v3, read_node(v2)]
|
|
when :iasgn, :dasgn, :dasgn_curr, :gasgn, :cvasgn
|
|
[type, v1.id2name, read_node(v2)]
|
|
when :masgn
|
|
[type, read_node(v1), read_node(v2)] # multiple assignment: a, b = 42 / lambda { |x, y| }.call(1, 2)
|
|
when :attrasgn
|
|
[type, ((v1 == 1) ? :self : read_node(v1)), v2.id2name, read_node(v3)]
|
|
when :lvar
|
|
[type, v3]
|
|
when :ivar, :dvar, :gvar, :cvar, :const
|
|
[type, v1.id2name]
|
|
when :str
|
|
# cannot use _id2ref here, probably the parser does not use standard alloced objects
|
|
s = memory_read(memory_read_int(v1+12), memory_read_int(v1+16))
|
|
[type, s]
|
|
when :lit
|
|
[type, id2ref(v1)]
|
|
when :args # specialcased by rb_call0, invalid in rb_eval
|
|
cnt = v3 # nr of required args, copied directly to local_vars
|
|
opt = read_node(v1) # :block to execute for each missing arg / with N optargs specified, skip N 1st statements
|
|
rest = read_node(v2) # catchall arg in def foo(rq1, rq2, *rest)
|
|
[type, cnt, opt, rest]
|
|
when :and, :or
|
|
[type, read_node(v1), read_node(v2)] # shortcircuit
|
|
when :not
|
|
[type, read_node(v2)]
|
|
when :nil, :true, :false, :self
|
|
[type]
|
|
when :redo, :retry
|
|
[type]
|
|
when :case, :when
|
|
[type, read_node(v1), read_node(v2), read_node(v3)]
|
|
when :iter
|
|
# save a block for the following funcall
|
|
args = read_node(v1) # assignments with nil, not realized, just to store the arg list (multi args -> :masgn)
|
|
body = read_node(v2) # the body statements (multi -> :block)
|
|
subj = read_node(v3) # the stuff which is passed the block, probably a :call
|
|
[type, args, body, subj]
|
|
when :while
|
|
[type, read_node(v1), read_node(v2), v3]
|
|
when :return, :break, :next
|
|
[type, read_node(v1)]
|
|
when :colon3 # ::Stuff
|
|
[type, v2.id2name]
|
|
else
|
|
puts "unhandled #{type.inspect}"
|
|
[type, v1, v2, v3]
|
|
end
|
|
end
|
|
|
|
def self.[](a, l=nil)
|
|
if a.kind_of? Range
|
|
memory_read(a.begin, a.end-a.begin+(a.exclude_end? ? 0 : 1))
|
|
elsif l
|
|
memory_read(a, l)
|
|
else
|
|
memory_read_int(a)
|
|
end
|
|
end
|
|
|
|
def self.[]=(a, l, v=nil)
|
|
l, v = v, l if not v
|
|
if a.kind_of? Range
|
|
memory_write(a.begin, v)
|
|
elsif l
|
|
memory_write(a, v)
|
|
else
|
|
memory_write_int(a, v)
|
|
end
|
|
end
|
|
|
|
def self.compile_ruby(klass, meth)
|
|
ptr = get_method_node_ptr(klass, meth)
|
|
ast = read_node(ptr)
|
|
require 'pp'
|
|
pp ast
|
|
return if not c = ruby_ast_to_c(ast)
|
|
puts c
|
|
raw = compile_c(c).encoded
|
|
set_method_binary(klass, meth, raw, klass.instance_method(meth).arity)
|
|
end
|
|
|
|
def self.ruby_ast_to_c(ast)
|
|
return if ast[0] != :scope
|
|
cp = cpu.new_cparser
|
|
cp.parse RUBY_H
|
|
cp.parse 'void meth(VALUE self) { }'
|
|
cp.toplevel.symbol['meth'].type.type = cp.toplevel.symbol['VALUE']
|
|
scope = cp.toplevel.symbol['meth'].initializer
|
|
RubyCompiler.new(cp).compile(ast, scope)
|
|
cp.dump_definition('meth')
|
|
end
|
|
end
|
|
|
|
class RubyCompiler
|
|
def initialize(cp)
|
|
@cp = cp
|
|
end
|
|
|
|
def compile(ast, scope)
|
|
@scope = scope
|
|
ast[1][:localnr].times { |lnr|
|
|
next if lnr < 2 # TODO check usage of $~ / $_
|
|
# TODO args
|
|
# TODO analyse to find numeric locals (to avoid useless INT2FIX)
|
|
l = C::Variable.new("local_#{lnr}", value)
|
|
l.initializer = C::CExpression[[nil.object_id], l.type]
|
|
scope.symbol[l.name] = l
|
|
scope.statements << C::Declaration.new(l)
|
|
}
|
|
scope.statements << C::Return.new(ast_to_c(ast[2], scope))
|
|
end
|
|
|
|
def value
|
|
@cp.toplevel.symbol['VALUE']
|
|
end
|
|
|
|
def local(n)
|
|
@scope.symbol["local_#{n}"]
|
|
end
|
|
|
|
def rb_intern(n)
|
|
C::CExpression[@cp.toplevel.symbol['rb_intern'], :funcall, [n]]
|
|
end
|
|
|
|
def rb_funcall(recv, meth, *args)
|
|
C::CExpression[@cp.toplevel.symbol['rb_funcall'], :funcall, [recv, rb_intern(meth), [args.length], *args]]
|
|
end
|
|
|
|
def ast_to_c(ast, scope)
|
|
ret =
|
|
case ast.to_a[0]
|
|
when :block
|
|
ast[1..-1].map { |a| ast_to_c(a, scope) }.last
|
|
when :lasgn
|
|
l = local(ast[1])
|
|
scope.statements << C::CExpression[l, :'=', ast_to_c(ast[2], scope)]
|
|
l
|
|
when :lvar
|
|
local(ast[1])
|
|
when :lit
|
|
case ast[1]
|
|
when Symbol
|
|
rb_intern(ast[1])
|
|
else # true/false/nil/fixnum
|
|
ast[1].object_id
|
|
end
|
|
when :str
|
|
C::CExpression[@cp.toplevel.symbol['rb_str_new'], :funcall, [ast[1], [ast[1].length]]]
|
|
when :iter
|
|
b_args, b_body, b_recv = ast[1, 3]
|
|
if b_recv[0] == :call and b_recv[2] == 'times' # TODO check its Fixnum#times
|
|
recv = ast_to_c(b_recv[1], scope)
|
|
cntr = C::Variable.new("cntr", C::BaseType.new(:int)) # TODO uniq name etc
|
|
cntr.initializer = C::CExpression[[0]]
|
|
init = C::Block.new(scope)
|
|
init.symbol[cntr.name] = cntr
|
|
body = C::Block.new(init)
|
|
scope.statements << C::For.new(init, C::CExpression[cntr, :<, [recv, :>>, 1]], C::CExpression[:'++', cntr], body)
|
|
body.symbol[cntr.name] = cntr
|
|
ast_to_c(b_body, body)
|
|
recv
|
|
else
|
|
puts "unsupported #{ast.inspect}"
|
|
nil.object_id
|
|
end
|
|
when :call
|
|
f = rb_funcall(ast_to_c(ast[1], scope), ast[2], *ast[3..-1].map { |a| ast_to_c(a, scope) })
|
|
case ast[2]
|
|
when '+', '-'
|
|
tmp = C::Variable.new('tmp', value)
|
|
if not scope.symbol_ancestors['tmp']
|
|
scope.symbol['tmp'] = tmp
|
|
scope.statements << C::Declaration.new(tmp)
|
|
end
|
|
a1 = [ast_to_c(ast[1], scope), C::BaseType.new(:int)]
|
|
a3 = [ast_to_c(ast[3], scope), C::BaseType.new(:int)]
|
|
scope.statements <<
|
|
C::If.new(C::CExpression[[a1, :&, a3], :&, 1], # XXX overflow to Bignum
|
|
C::CExpression[tmp, :'=', [a1, ast[2].to_sym, [a3, :-, [1]]]],
|
|
C::CExpression[tmp, :'=', f])
|
|
tmp
|
|
else
|
|
f
|
|
end
|
|
when nil, :nil, :args
|
|
nil.object_id
|
|
else
|
|
puts "unsupported #{ast.inspect}"
|
|
nil.object_id
|
|
end
|
|
ret = [ret] if ret.kind_of? Integer
|
|
C::CExpression[ret, value]
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
|
|
|
|
if __FILE__ == $0
|
|
|
|
demo = ARGV.empty? ? :test_jit : :dump_ruby_ast
|
|
|
|
case demo # chose your use case !
|
|
when :inlineasm
|
|
|
|
# cnt.times { sys_write str }
|
|
src_asm = <<EOS
|
|
mov ecx, [ebp+8]
|
|
again:
|
|
push ecx
|
|
mov eax, 4
|
|
mov ebx, 1
|
|
mov ecx, [ebp+12]
|
|
mov edx, [ebp+16]
|
|
int 80h
|
|
pop ecx
|
|
loop again
|
|
EOS
|
|
|
|
src = <<EOS
|
|
|
|
#{Metasm::RubyHack::RUBY_H}
|
|
|
|
void doit(int, char*, int);
|
|
VALUE foo(VALUE self, VALUE count, VALUE str) {
|
|
doit(FIX2LONG(count), RString(str)->ptr, RString(str)->len);
|
|
return count;
|
|
}
|
|
|
|
void doit(int count, char *str, int strlen) {
|
|
asm(#{src_asm.inspect});
|
|
}
|
|
EOS
|
|
|
|
m = Metasm::RubyHack.compile_c(src).encode_string
|
|
|
|
o = Object.new
|
|
Metasm::RubyHack.set_object_method_binary(o, 'bar', m, 2)
|
|
|
|
puts "test1"
|
|
o.bar(4, "blabla\n")
|
|
puts "test2"
|
|
o.bar(2, "foo\n")
|
|
|
|
|
|
|
|
when :dump_ruby_ast
|
|
|
|
abort 'need <class> <method> args' if ARGV.length != 2
|
|
c = Metasm.const_get(ARGV.shift)
|
|
m = ARGV.shift
|
|
ptr = Metasm::RubyHack.get_method_node_ptr(c, m)
|
|
require 'pp'
|
|
pp Metasm::RubyHack.read_node(ptr)
|
|
|
|
when :test_jit
|
|
|
|
|
|
class Foo
|
|
def bla
|
|
i = 0
|
|
20_000_000.times { i += 1 }
|
|
i
|
|
end
|
|
end
|
|
|
|
t0 = Time.now
|
|
Metasm::RubyHack.compile_ruby(Foo, :bla)
|
|
t1 = Time.now
|
|
p Foo.new.bla
|
|
t2 = Time.now
|
|
|
|
puts "compile %.3fs run %.3fs" % [t1-t0, t2-t1]
|
|
end
|
|
|
|
end
|