#!/usr/bin/env ruby # This file is part of Metasm, the Ruby assembly manipulation suite # Copyright (C) 2006-2009 Yoann GUILLOT # # Licence is LGPL, see LICENCE in the top-level directory # This sample implements a trivial binary diffing algorithm between two programs # the programs have first to be disassembled, and then the diff algorith will # (try to) identify identical functions in both dasm graphs # Currently there is NO fuzzy matching whatsoever, so the function graphs have to # be exactly the same in both programs to be recognized. # You can still force a comparaison between two functions, but the results will be bad. # # This file can be run as a standalone application (eg 'ruby bindiff file1 file2') # or as a disassembler plugin (see dasm-plugin/bindiff) require 'metasm' module ::Metasm class BinDiffWidget < Gui::DrawableWidget attr_accessor :status COLORS = { :same => '8f8', :similar => 'cfc', :badarg => 'fcc', :badop => 'faa', :default => '888' } def initialize_widget(d1=nil, d2=nil) self.dasm1 = d1 if d1 self.dasm2 = d2 if d2 @status = nil end def dasm1; @dasm1 end def dasm1=(d) @dasm1 = d @func1 = nil @funcstat1 = nil @dasmcol1 = {} @dasm1.gui.bg_color_callback = lambda { |a1| COLORS[@dasmcol1[a1] || :default] } @match_func = nil end def dasm2; @dasm2 end def dasm2=(d) @dasm2 = d @func2 = nil @funcstat1 = nil @dasmcol2 = {} @dasm2.gui.bg_color_callback = lambda { |a2| COLORS[@dasmcol2[a2] || :default] } @match_func = nil end def curaddr1; @dasm1.gui.curaddr end def curaddr2; @dasm2.gui.curaddr end def curfunc1; @dasm1.find_function_start(curaddr1) end def curfunc2; @dasm2.find_function_start(curaddr2) end def func1; @func1 ||= set_status('funcs 1') { create_funcs(@dasm1) } end def func2; @func2 ||= set_status('funcs 2') { create_funcs(@dasm2) } end def funcstat1; @funcstat1 ||= set_status('func stats 1') { create_funcs_stats(func1, @dasm1) } end def funcstat2; @funcstat2 ||= set_status('func stats 2') { create_funcs_stats(func2, @dasm2) } end def paint draw_string_color(:black, @font_width, 3*@font_height, @status || 'idle') end def gui_update @dasm1.gui.gui_update rescue nil @dasm2.gui.gui_update rescue nil redraw end def set_status(st=nil) ost = @status @status = st redraw if block_given? ret = protect { yield } set_status ost ret end end def keypress(key) case key when ?A keypress(?D) keypress(?f) keypress(?i) when ?D disassemble_all when ?c disassemble when ?C disassemble(:disassemble_fast) when ?f funcstat1 funcstat2 when ?g inputbox('address to go', :text => Expression[@dasm1.gui.curaddr]) { |v| @dasm1.gui.focus_addr_autocomplete(v) @dasm2.gui.focus_addr_autocomplete(v) } when ?M show_match_funcs when ?m match_one_func(curfunc1, curfunc2) when ?r puts 'reload' load __FILE__ gui_update when ?Q Gui.main_quit end end def keypress_ctrl(key) case key when ?C disassemble(:disassemble_fast_deep) when ?r inputbox('code to eval') { |c| messagebox eval(c).inspect[0, 512], 'eval' } end end def disassemble_all @func1 = @func2 = @funcstat1 = @funcstat2 = nil @dasm1.load_plugin 'dasm_all' @dasm2.load_plugin 'dasm_all' set_status('dasm_all 1') { @dasm1.dasm_all_section '.text' } set_status('dasm_all 2') { @dasm2.dasm_all_section '.text' } gui_update end def disassemble(method=:disassemble) @func1 = @func2 = @funcstat1 = @funcstat2 = nil set_status('dasm 1') { @dasm1.send(method, curaddr1) @dasm1.gui.focus_addr(curaddr1, :graph) } set_status('dasm 2') { @dasm2.send(method, curaddr2) @dasm2.gui.focus_addr(curaddr2, :graph) } gui_update end def show_match_funcs match_funcs gui_update Gui.main_iter list = [['addr 1', 'addr 2', 'score']] f1 = func1.keys f2 = func2.keys match_funcs.each { |a1, (a2, s)| list << [(@dasm1.get_label_at(a1) || Expression[a1]), (@dasm2.get_label_at(a2) || Expression[a2]), '%.4f' % s] f1.delete a1 f2.delete a2 } f1.each { |a1| list << [(@dasm1.get_label_at(a1) || Expression[a1]), '?', 'nomatch'] } f2.each { |a2| list << ['?', (@dasm2.get_label_at(a2) || Expression[a2]), 'nomatch'] } listwindow("matches", list) { |i| @dasm1.gui.focus_addr i[0], nil, true ; @dasm2.gui.focus_addr i[1], nil, true } end # func addr => { funcblock => list of funcblock to } def create_funcs(dasm) f = {} dasm.entrypoints.to_a.each { |ep| dasm.function[ep] ||= DecodedFunction.new } dasm.function.each_key { |a| next if not dasm.di_at(a) f[a] = create_func(dasm, a) Gui.main_iter } f end def create_func(dasm, a) h = {} todo = [a] while a = todo.pop next if h[a] h[a] = [] dasm.decoded[a].block.each_to_samefunc(dasm) { |ta| next if not dasm.di_at(ta) todo << ta h[a] << ta } end h end def create_funcs_stats(f, dasm) fs = {} f.each { |a, g| fs[a] = create_func_stats(dasm, a, g) Gui.main_iter } fs end def create_func_stats(dasm, a, g) s = {} s[:blocks] = g.length s[:edges] = 0 # nr of edges s[:leaves] = 0 # nr of nodes with no successor s[:ext_calls] = 0 # nr of jumps out_of_func s[:loops] = 0 # nr of jump back todo = [a] done = [] while aa = todo.pop next if done.include? aa done << aa todo.concat g[aa] s[:edges] += g[aa].length s[:leaves] += 1 if g[aa].empty? dasm.decoded[aa].block.each_to_otherfunc(dasm) { s[:ext_calls] += 1 } end # loop detection # find the longest distance to the root w/o loops g = g.dup while eliminate_one_loop(a, g) s[:loops] += 1 end s end def eliminate_one_loop(a, g) stack = [] index = {} reach_index = {} done = false curindex = 0 trajan = lambda { |e| index[e] = curindex reach_index[e] = curindex curindex += 1 stack << e g[e].each { |ne| if not index[ne] trajan[ne] break if done reach_index[e] = [reach_index[e], reach_index[ne]].min elsif stack.include? ne reach_index[e] = [reach_index[e], reach_index[ne]].min end } break if done if index[e] == reach_index[e] if (e == stack.last and not g[e].include? e) stack.pop next end # e is the entry in the loop, cut the loop here tail = reach_index.keys.find { |ee| reach_index[ee] == index[e] and g[ee].include? e } g[tail] -= [e] # patch g, but don't modify the original g value (ie -= instead of delete) done = true # one loop found & removed, try again end } trajan[a] done end def rematch_funcs @match_funcs = nil match_funcs end def match_funcs @match_funcs ||= {} layout_match = {} set_status('match func layout') { funcstat1.each { |a, s| next if @match_funcs[a] layout_match[a] = [] funcstat2.each { |aa, ss| layout_match[a] << aa if s == ss } Gui.main_iter } } set_status('match funcs') { # refine the layout matching with actual function matching already_matched = [] match_score = {} layout_match.each { |f1, list| puts "matching #{Expression[f1]}" if $VERBOSE begin f2 = (list - already_matched).sort_by { |f| match_func(f1, f, false, false) }.first if f2 already_matched << f2 score = match_func(f1, f2) @match_funcs[f1] = [f2, score] end rescue Interrupt puts 'abort this one' sleep 0.2 # allow a 2nd ^c do escalate end Gui.main_iter } } puts "matched #{@match_funcs.length} - unmatched #{func1.length - @match_funcs.length}" @match_funcs end def match_one_func(a1, a2) s = match_func(a1, a2) puts "match score: #{s}" @match_funcs ||= {} @match_funcs[a1] = [a2, s] gui_update end # return how much match a func in d1 and a func in d2 def match_func(a1, a2, do_colorize=true, verb=true) f1 = func1[a1] f2 = func2[a2] raise "dasm1 has no function at #{Expression[a1]}" if not f1 raise "dasm2 has no function at #{Expression[a2]}" if not f2 todo1 = [a1] todo2 = [a2] done1 = [] done2 = [] score = 0.0 # average of the (local best) match_block scores score += 0.01 if @dasm1.get_label_at(a1) != @dasm2.get_label_at(a2) # for thunks score_div = [f1.length, f2.length].max.to_f # XXX this is stupid and only good for perfect matches (and even then it may fail) # TODO handle block split etc (eg instr-level diff VS block-level) while a1 = todo1.shift next if done1.include? a1 t = todo2.map { |a| [a, match_block(@dasm1.decoded[a1].block, @dasm2.decoded[a].block)] } a2 = t.sort_by { |a, s| s }.first if not a2 break end score += a2[1] / score_div a2 = a2[0] done1 << a1 done2 << a2 todo1.concat f1[a1] todo2.concat f2[a2] todo2 -= done2 colorize_blocks(a1, a2) if do_colorize end score += (f1.length - f2.length).abs * 3 / score_div # block count difference -> +3 per block score end def match_block(b1, b2) # 0 = perfect match (same opcodes, same args) # 1 = same opcodes, same arg type # 2 = same opcodes, diff argtypes # 3 = some opcode difference # 4 = full block difference score = 0 score_div = [b1.list.length, b2.list.length].max.to_f common_start = 0 common_end = 0 # basic diff-style: compare start while it's good, then end, then whats left # should handle most simples cases well len = [b1.list.length, b2.list.length].min while common_start < len and (s = match_instr(b1.list[common_start], b2.list[common_start])) <= 1 score += s / score_div common_start += 1 end while common_start+common_end < len and (s = match_instr(b1.list[-1-common_end], b2.list[-1-common_end])) <= 1 score += s / score_div common_end += 1 end # TODO improve the middle part matching (allow insertions/suppressions/swapping) b1.list[common_start..-1-common_end].zip(b2.list[common_start..-1-common_end]).each { |di1, di2| score += match_instr(di1, di2) / score_div } yield(common_start, common_end) if block_given? # used by colorize_blocks score += (b1.list.length - b2.list.length).abs * 3 / score_div # instr count difference -> +3 per instr score end def colorize_blocks(a1, a2) b1 = @dasm1.decoded[a1].block b2 = @dasm2.decoded[a2].block common_start = common_end = 0 match_block(b1, b2) { |a, b| common_start = a ; common_end = b } b1.list[0..-1-common_end].zip(b2.list[0..-1-common_end]).each { |di1, di2| next if not di1 or not di2 @dasmcol1[di1.address] = @dasmcol2[di2.address] = [:same, :similar, :badarg, :badop][match_instr(di1, di2)] } b1.list[-common_end..-1].zip(b2.list[-common_end..-1]).each { |di1, di2| next if not di1 or not di2 @dasmcol1[di1.address] = @dasmcol2[di2.address] = [:same, :similar, :badarg, :badop][match_instr(di1, di2)] } end def match_instr(di1, di2) if not di1 or not di2 or di1.opcode.name != di2.opcode.name 3 elsif di1.instruction.args.map { |a| a.class } != di2.instruction.args.map { |a| a.class } 2 elsif di1.instruction.to_s.gsub(/loc_\w+/, 'loc_') != di2.instruction.to_s.gsub(/loc_\w+/, 'loc_') # local labels TODO compare blocks targeted 1 else 0 end end # show in window 1 the match of the function found in win 2 def sync1 c2 = curfunc2 if a1 = match_funcs.find { |k, (a2, s)| a2 == c2 } @dasm1.gui.focus_addr(a1[0]) end end def sync2 if a2 = match_funcs[curfunc1] @dasm2.gui.focus_addr(a2[0]) end end end class BinDiffWindow < Gui::Window def initialize_window(d1=nil, d2=nil) self.widget = BinDiffWidget.new(d1, d2) end def build_menu menu = new_menu addsubmenu(menu, 'load file 1') { openfile('file 1') { |f| loadfile1(f) } } addsubmenu(menu, 'load file 2') { openfile('file 2') { |f| loadfile2(f) } } addsubmenu(menu) addsubmenu(menu, '_disassemble from there', '^C') { widget.disassemble(:disassemble_fast_deep) } addsubmenu(menu, 'co_mpare current functions', 'm') { widget.match_one_func(widget.curfunc1, widget.curfunc2) } addsubmenu(menu, 'compare all funct_ions', 'M') { widget.show_match_funcs } addsubmenu(menu, '_goto', 'g') { widget.keypress ?g } addsubmenu(menu) addsubmenu(menu, 'sync win 2', '2') { widget.sync2 } addsubmenu(menu, 'sync win 1', '1') { widget.sync1 } addsubmenu(menu) addsubmenu(menu, '_quit', 'Q') { Gui.main_quit } addsubmenu(@menu, '_File', menu) end def loadfile1(f) exe = AutoExe.orshellcode { Ia32.new }.decode_file(f) d = exe.init_disassembler Gui::DasmWindow.new("bindiff - 1 - #{f}").display(d) widget.dasm1 = d end def loadfile2(f) exe = AutoExe.orshellcode { Ia32.new }.decode_file(f) d = exe.init_disassembler Gui::DasmWindow.new("bindiff - 2 - #{f}").display(d) widget.dasm2 = d end end end if $0 == __FILE__ and not defined? $bindiff_loaded # allow reloading the file for easier diff algorithm test $bindiff_loaded = true require 'optparse' $VERBOSE = true # parse arguments opts = {} OptionParser.new { |opt| opt.banner = 'Usage: bindiff.rb [options] []' opt.on('-P ', '--plugin ', 'load a metasm disassembler plugin') { |h| (opts[:plugin] ||= []) << h } opt.on('-e ', '--eval ', 'eval a ruby code') { |h| (opts[:hookstr] ||= []) << h } opt.on('--map1 ', 'load a map file (addr <-> name association)') { |f| opts[:map1] = f } opt.on('--map2 ', 'load a map file (addr <-> name association)') { |f| opts[:map2] = f } opt.on('-c
', '--c-header
', 'read C function prototypes (for external library functions)') { |h| opts[:cheader] = h } opt.on('-a', '--autoload', 'loads all relevant files with same filename (.h, .map..)') { opts[:autoload] = true } opt.on('-v', '--verbose') { $VERBOSE = true } # default opt.on('-q', '--no-verbose') { $VERBOSE = false } opt.on('-d', '--debug') { $DEBUG = $VERBOSE = true } opt.on('-A', 'match everything on start') { opts[:doit] = true } }.parse!(ARGV) if exename1 = ARGV.shift w1 = Metasm::Gui::DasmWindow.new("#{exename1} - bindiff1 - metasm disassembler") exe1 = w1.loadfile(exename1) if opts[:autoload] basename1 = exename1.sub(/\.\w\w?\w?$/, '') opts[:map1] ||= basename1 + '.map' if File.exist?(basename1 + '.map') opts[:cheader] ||= basename1 + '.h' if File.exist?(basename1 + '.h') end end if exename2 = ARGV.shift w2 = Metasm::Gui::DasmWindow.new("#{exename2} - bindiff2 - metasm disassembler") exe2 = w2.loadfile(exename2) if opts[:autoload] basename2 = exename2.sub(/\.\w\w?\w?$/, '') opts[:map2] ||= basename2 + '.map' if File.exist?(basename2 + '.map') opts[:cheader] ||= basename2 + '.h' if File.exist?(basename2 + '.h') end end if exe1 dasm1 = exe1.init_disassembler dasm1.load_map opts[:map1] if opts[:map1] dasm1.parse_c_file opts[:cheader] if opts[:cheader] end if exe2 dasm2 = exe2.init_disassembler dasm2.load_map opts[:map2] if opts[:map2] dasm2.parse_c_file opts[:cheader] if opts[:cheader] end ep = ARGV.dup w1.dasm_widget.focus_addr ep.first if w1 and not ep.empty? w2.dasm_widget.focus_addr ep.first if w2 and not ep.empty? opts[:plugin].to_a.each { |p| dasm1.load_plugin(p) if dasm1 ; dasm2.load_plugin(p) if dasm2 } opts[:hookstr].to_a.each { |f| eval f } ep.each { |e| dasm1.disassemble_fast_deep(e) if dasm1 ; dasm2.disassemble_fast_deep(e) if dasm2 } bd = Metasm::BinDiffWindow.new(dasm1, dasm2) bd.widget.keypress ?A if opts[:doit] Metasm::Gui.main end