From 72b1f1373f17a31276a0963a014aaf64b15632b4 Mon Sep 17 00:00:00 2001 From: David Maloney Date: Wed, 2 Apr 2014 13:44:02 -0500 Subject: [PATCH 1/5] pull JJ's latest changes in for c64 compiler for x86_64 has some bugs, this is JJ's latest fixes --- lib/metasm/metasm.rb | 133 +- lib/metasm/metasm/compile_c.rb | 2900 +++--- lib/metasm/metasm/cpu/arc/decode.rb | 714 +- lib/metasm/metasm/cpu/arc/main.rb | 324 +- lib/metasm/metasm/cpu/arc/opcodes.rb | 1152 +-- lib/metasm/metasm/cpu/arm/debug.rb | 42 +- lib/metasm/metasm/cpu/arm/decode.rb | 274 +- lib/metasm/metasm/cpu/arm/encode.rb | 148 +- lib/metasm/metasm/cpu/arm/main.rb | 102 +- lib/metasm/metasm/cpu/arm/opcodes.rb | 538 +- lib/metasm/metasm/cpu/arm/parse.rb | 254 +- lib/metasm/metasm/cpu/arm/render.rb | 80 +- lib/metasm/metasm/cpu/bpf/decode.rb | 224 +- lib/metasm/metasm/cpu/bpf/main.rb | 78 +- lib/metasm/metasm/cpu/bpf/opcodes.rb | 118 +- lib/metasm/metasm/cpu/bpf/render.rb | 54 +- lib/metasm/metasm/cpu/cy16/decode.rb | 416 +- lib/metasm/metasm/cpu/cy16/main.rb | 86 +- lib/metasm/metasm/cpu/cy16/opcodes.rb | 120 +- lib/metasm/metasm/cpu/cy16/render.rb | 54 +- lib/metasm/metasm/cpu/dalvik/decode.rb | 376 +- lib/metasm/metasm/cpu/dalvik/main.rb | 168 +- lib/metasm/metasm/cpu/dalvik/opcodes.rb | 610 +- lib/metasm/metasm/cpu/ia32/compile_c.rb | 3006 +++---- lib/metasm/metasm/cpu/ia32/debug.rb | 316 +- lib/metasm/metasm/cpu/ia32/decode.rb | 2626 +++--- lib/metasm/metasm/cpu/ia32/decompile.rb | 1038 +-- lib/metasm/metasm/cpu/ia32/encode.rb | 548 +- lib/metasm/metasm/cpu/ia32/main.rb | 450 +- lib/metasm/metasm/cpu/ia32/opcodes.rb | 2802 +++--- lib/metasm/metasm/cpu/ia32/parse.rb | 622 +- lib/metasm/metasm/cpu/ia32/render.rb | 182 +- lib/metasm/metasm/cpu/mips/debug.rb | 50 +- lib/metasm/metasm/cpu/mips/decode.rb | 480 +- lib/metasm/metasm/cpu/mips/encode.rb | 70 +- lib/metasm/metasm/cpu/mips/main.rb | 110 +- lib/metasm/metasm/cpu/mips/opcodes.rb | 794 +- lib/metasm/metasm/cpu/mips/parse.rb | 74 +- lib/metasm/metasm/cpu/mips/render.rb | 58 +- lib/metasm/metasm/cpu/pic16c/decode.rb | 46 +- lib/metasm/metasm/cpu/pic16c/main.rb | 10 +- lib/metasm/metasm/cpu/pic16c/opcodes.rb | 104 +- lib/metasm/metasm/cpu/ppc/decode.rb | 448 +- lib/metasm/metasm/cpu/ppc/decompile.rb | 442 +- lib/metasm/metasm/cpu/ppc/encode.rb | 66 +- lib/metasm/metasm/cpu/ppc/main.rb | 204 +- lib/metasm/metasm/cpu/ppc/opcodes.rb | 770 +- lib/metasm/metasm/cpu/ppc/parse.rb | 80 +- lib/metasm/metasm/cpu/python/decode.rb | 208 +- lib/metasm/metasm/cpu/python/main.rb | 36 +- lib/metasm/metasm/cpu/python/opcodes.rb | 280 +- lib/metasm/metasm/cpu/sh4/decode.rb | 632 +- lib/metasm/metasm/cpu/sh4/main.rb | 442 +- lib/metasm/metasm/cpu/sh4/opcodes.rb | 734 +- lib/metasm/metasm/cpu/x86_64/compile_c.rb | 1913 ++-- lib/metasm/metasm/cpu/x86_64/debug.rb | 86 +- lib/metasm/metasm/cpu/x86_64/decode.rb | 510 +- lib/metasm/metasm/cpu/x86_64/encode.rb | 510 +- lib/metasm/metasm/cpu/x86_64/main.rb | 222 +- lib/metasm/metasm/cpu/x86_64/opcodes.rb | 218 +- lib/metasm/metasm/cpu/x86_64/parse.rb | 112 +- lib/metasm/metasm/cpu/x86_64/render.rb | 34 +- lib/metasm/metasm/cpu/z80/decode.rb | 530 +- lib/metasm/metasm/cpu/z80/main.rb | 94 +- lib/metasm/metasm/cpu/z80/opcodes.rb | 358 +- lib/metasm/metasm/cpu/z80/render.rb | 78 +- lib/metasm/metasm/debug.rb | 2870 +++--- lib/metasm/metasm/decode.rb | 392 +- lib/metasm/metasm/decompile.rb | 5274 +++++------ lib/metasm/metasm/disassemble.rb | 4068 ++++----- lib/metasm/metasm/disassemble_api.rb | 3556 ++++---- lib/metasm/metasm/dynldr.rb | 1890 ++-- lib/metasm/metasm/encode.rb | 592 +- lib/metasm/metasm/exe_format/a_out.rb | 315 +- lib/metasm/metasm/exe_format/autoexe.rb | 52 +- lib/metasm/metasm/exe_format/bflt.rb | 343 +- lib/metasm/metasm/exe_format/coff.rb | 760 +- lib/metasm/metasm/exe_format/coff_decode.rb | 1805 ++-- lib/metasm/metasm/exe_format/coff_encode.rb | 2126 ++--- lib/metasm/metasm/exe_format/dex.rb | 871 +- lib/metasm/metasm/exe_format/dol.rb | 241 +- lib/metasm/metasm/exe_format/elf.rb | 1536 ++-- lib/metasm/metasm/exe_format/elf_decode.rb | 2110 ++--- lib/metasm/metasm/exe_format/elf_encode.rb | 2832 +++--- lib/metasm/metasm/exe_format/gb.rb | 79 +- lib/metasm/metasm/exe_format/javaclass.rb | 817 +- lib/metasm/metasm/exe_format/macho.rb | 1905 ++-- lib/metasm/metasm/exe_format/main.rb | 388 +- lib/metasm/metasm/exe_format/mz.rb | 257 +- lib/metasm/metasm/exe_format/nds.rb | 277 +- lib/metasm/metasm/exe_format/pe.rb | 751 +- lib/metasm/metasm/exe_format/pyc.rb | 275 +- lib/metasm/metasm/exe_format/serialstruct.rb | 477 +- lib/metasm/metasm/exe_format/shellcode.rb | 178 +- lib/metasm/metasm/exe_format/shellcode_rwx.rb | 184 +- lib/metasm/metasm/exe_format/swf.rb | 327 +- lib/metasm/metasm/exe_format/xcoff.rb | 276 +- lib/metasm/metasm/exe_format/zip.rb | 528 +- lib/metasm/metasm/gui.rb | 20 +- lib/metasm/metasm/gui/cstruct.rb | 648 +- lib/metasm/metasm/gui/dasm_coverage.rb | 322 +- lib/metasm/metasm/gui/dasm_decomp.rb | 630 +- lib/metasm/metasm/gui/dasm_funcgraph.rb | 160 +- lib/metasm/metasm/gui/dasm_graph.rb | 3354 +++---- lib/metasm/metasm/gui/dasm_hex.rb | 984 +- lib/metasm/metasm/gui/dasm_listing.rb | 1100 +-- lib/metasm/metasm/gui/dasm_main.rb | 2204 ++--- lib/metasm/metasm/gui/dasm_opcodes.rb | 468 +- lib/metasm/metasm/gui/debug.rb | 2282 ++--- lib/metasm/metasm/gui/gtk.rb | 1592 ++-- lib/metasm/metasm/gui/qt.rb | 718 +- lib/metasm/metasm/gui/win32.rb | 3128 +++---- lib/metasm/metasm/gui/x11.rb | 778 +- lib/metasm/metasm/main.rb | 2204 ++--- lib/metasm/metasm/os/gnu_exports.rb | 22 +- lib/metasm/metasm/os/linux.rb | 2636 +++--- lib/metasm/metasm/os/main.rb | 536 +- lib/metasm/metasm/os/remote.rb | 1048 +-- lib/metasm/metasm/os/windows.rb | 2427 ++--- lib/metasm/metasm/os/windows_exports.rb | 74 +- lib/metasm/metasm/parse.rb | 1518 ++-- lib/metasm/metasm/parse_c.rb | 7958 ++++++++--------- lib/metasm/metasm/preprocessor.rb | 2534 +++--- lib/metasm/metasm/render.rb | 142 +- lib/metasm/misc/bottleneck.rb | 34 +- lib/metasm/misc/cheader-findpppath.rb | 80 +- lib/metasm/misc/hexdiff.rb | 98 +- lib/metasm/misc/hexdump.rb | 70 +- lib/metasm/misc/lint.rb | 68 +- lib/metasm/misc/metasm-all.rb | 4 +- lib/metasm/misc/objdiff.rb | 68 +- lib/metasm/misc/objscan.rb | 58 +- lib/metasm/misc/pdfparse.rb | 974 +- lib/metasm/misc/ppc_pdf2oplist.rb | 286 +- lib/metasm/misc/tcp_proxy_hex.rb | 94 +- lib/metasm/misc/txt2html.rb | 768 +- lib/metasm/samples/asmsyntax.rb | 4 +- lib/metasm/samples/bindiff.rb | 850 +- lib/metasm/samples/compilation-steps.rb | 20 +- lib/metasm/samples/cparser_makestackoffset.rb | 12 +- lib/metasm/samples/dasm-plugins/bindiff.rb | 4 +- lib/metasm/samples/dasm-plugins/bookmark.rb | 218 +- .../samples/dasm-plugins/c_constants.rb | 76 +- .../dasm-plugins/colortheme_solarized.rb | 204 +- .../samples/dasm-plugins/cppobj_funcall.rb | 74 +- lib/metasm/samples/dasm-plugins/dasm_all.rb | 108 +- .../samples/dasm-plugins/demangle_cpp.rb | 34 +- .../samples/dasm-plugins/deobfuscate.rb | 254 +- lib/metasm/samples/dasm-plugins/dump_text.rb | 42 +- .../samples/dasm-plugins/export_graph_svg.rb | 114 +- lib/metasm/samples/dasm-plugins/findgadget.rb | 98 +- lib/metasm/samples/dasm-plugins/hl_opcode.rb | 44 +- .../samples/dasm-plugins/hotfix_gtk_dbg.rb | 10 +- lib/metasm/samples/dasm-plugins/imm2off.rb | 36 +- .../samples/dasm-plugins/match_libsigs.rb | 104 +- lib/metasm/samples/dasm-plugins/patch_file.rb | 128 +- .../samples/dasm-plugins/scanfuncstart.rb | 46 +- lib/metasm/samples/dasm-plugins/scanxrefs.rb | 28 +- lib/metasm/samples/dasm-plugins/selfmodify.rb | 268 +- .../samples/dasm-plugins/stringsxrefs.rb | 24 +- lib/metasm/samples/dasmnavig.rb | 566 +- lib/metasm/samples/dbg-apihook.rb | 336 +- lib/metasm/samples/dbg-plugins/heapscan.rb | 414 +- .../samples/dbg-plugins/heapscan/graphheap.rb | 1148 +-- .../samples/dbg-plugins/heapscan/heapscan.rb | 1216 +-- lib/metasm/samples/dbg-plugins/trace_func.rb | 292 +- lib/metasm/samples/dbghelp.rb | 130 +- lib/metasm/samples/disassemble-gui.rb | 154 +- lib/metasm/samples/disassemble.rb | 180 +- lib/metasm/samples/dump_upx.rb | 142 +- lib/metasm/samples/dynamic_ruby.rb | 3434 +++---- lib/metasm/samples/elf_list_needed.rb | 42 +- lib/metasm/samples/elf_listexports.rb | 28 +- lib/metasm/samples/exeencode.rb | 124 +- .../samples/factorize-headers-elfimports.rb | 44 +- .../samples/factorize-headers-peimports.rb | 68 +- lib/metasm/samples/factorize-headers.rb | 4 +- lib/metasm/samples/generate_libsigs.rb | 112 +- lib/metasm/samples/hotfix_gtk_dbg.rb | 46 +- lib/metasm/samples/install_win_env.rb | 62 +- lib/metasm/samples/lindebug.rb | 1290 +-- lib/metasm/samples/linux_injectsyscall.rb | 124 +- lib/metasm/samples/metasm-shell.rb | 110 +- lib/metasm/samples/pe-ia32-cpuid.rb | 178 +- lib/metasm/samples/pe-shutdown.rb | 30 +- lib/metasm/samples/pe-testrsrc.rb | 4 +- lib/metasm/samples/pe_listexports.rb | 32 +- lib/metasm/samples/peencode.rb | 4 +- lib/metasm/samples/peldr.rb | 732 +- lib/metasm/samples/r0trace.rb | 118 +- lib/metasm/samples/scan_pt_gnu_stack.rb | 50 +- lib/metasm/samples/scanpeexports.rb | 58 +- lib/metasm/samples/shellcode-c.rb | 6 +- lib/metasm/samples/shellcode-dynlink.rb | 164 +- lib/metasm/samples/struct_offset.rb | 18 +- lib/metasm/samples/testraw.rb | 10 +- lib/metasm/samples/win32genloader.rb | 84 +- lib/metasm/samples/win32hooker-advanced.rb | 118 +- lib/metasm/samples/win32hooker.rb | 24 +- lib/metasm/samples/win32livedasm.rb | 4 +- lib/metasm/samples/win32remotescan.rb | 22 +- lib/metasm/samples/wintrace.rb | 128 +- lib/metasm/tests/arc.rb | 26 +- lib/metasm/tests/dasm.rb | 48 +- lib/metasm/tests/dynldr.rb | 66 +- lib/metasm/tests/encodeddata.rb | 166 +- lib/metasm/tests/expression.rb | 64 +- lib/metasm/tests/graph_layout.rb | 116 +- lib/metasm/tests/ia32.rb | 166 +- lib/metasm/tests/mips.rb | 78 +- lib/metasm/tests/parse_c.rb | 280 +- lib/metasm/tests/preprocessor.rb | 246 +- lib/metasm/tests/x86_64.rb | 153 +- 213 files changed, 64001 insertions(+), 63857 deletions(-) diff --git a/lib/metasm/metasm.rb b/lib/metasm/metasm.rb index 81c3d40aea..6da1d2d0f0 100644 --- a/lib/metasm/metasm.rb +++ b/lib/metasm/metasm.rb @@ -5,83 +5,84 @@ module Metasm - # root directory for metasm files - # used by some scripts, eg to find samples/dasm-plugin directory - Metasmdir = File.dirname(__FILE__) - # add it to the ruby library path - $: << Metasmdir + # root directory for metasm files + # used by some scripts, eg to find samples/dasm-plugin directory + Metasmdir = File.dirname(__FILE__) + # add it to the ruby library path + $: << Metasmdir - # constants defined in the same file as another - Const_autorequire_equiv = { - 'X86' => 'Ia32', 'PPC' => 'PowerPC', - 'X64' => 'X86_64', 'AMD64' => 'X86_64', - 'MIPS64' => 'MIPS', - 'UniversalBinary' => 'MachO', 'COFFArchive' => 'COFF', - 'DEY' => 'DEX', - 'PTrace' => 'LinOS', 'FatELF' => 'ELF', - 'LoadedELF' => 'ELF', 'LoadedPE' => 'PE', - 'LoadedAutoExe' => 'AutoExe', - 'LinuxRemoteString' => 'LinOS', - 'LinDebugger' => 'LinOS', - 'WinAPI' => 'WinOS', - 'WindowsRemoteString' => 'WinOS', 'WinDbgAPI' => 'WinOS', - 'WinDebugger' => 'WinOS', - 'GdbRemoteString' => 'GdbClient', 'GdbRemoteDebugger' => 'GdbClient', - 'DecodedInstruction' => 'Disassembler', 'DecodedFunction' => 'Disassembler', - 'InstructionBlock' => 'Disassembler', - } + # constants defined in the same file as another + Const_autorequire_equiv = { + 'X86' => 'Ia32', 'PPC' => 'PowerPC', + 'X64' => 'X86_64', 'AMD64' => 'X86_64', + 'MIPS64' => 'MIPS', + 'UniversalBinary' => 'MachO', 'COFFArchive' => 'COFF', + 'DEY' => 'DEX', + 'PTrace' => 'LinOS', 'FatELF' => 'ELF', + 'LoadedELF' => 'ELF', 'LoadedPE' => 'PE', + 'LoadedAutoExe' => 'AutoExe', + 'LinuxRemoteString' => 'LinOS', + 'LinDebugger' => 'LinOS', + 'WinAPI' => 'WinOS', + 'WindowsRemoteString' => 'WinOS', 'WinDbgAPI' => 'WinOS', + 'WinDebugger' => 'WinOS', + 'GdbRemoteString' => 'GdbClient', 'GdbRemoteDebugger' => 'GdbClient', + 'DecodedInstruction' => 'Disassembler', 'DecodedFunction' => 'Disassembler', + 'InstructionBlock' => 'Disassembler', + } - # files to require to get the definition of those constants - Const_autorequire = { - 'Ia32' => 'cpu/ia32', 'MIPS' => 'cpu/mips', 'PowerPC' => 'cpu/ppc', 'ARM' => 'cpu/arm', - 'X86_64' => 'cpu/x86_64', 'Sh4' => 'cpu/sh4', 'Dalvik' => 'cpu/dalvik', 'ARC' => 'cpu/arc', - 'Python' => 'cpu/python', 'Z80' => 'cpu/z80', 'CY16' => 'cpu/cy16', 'BPF' => 'cpu/bpf', - 'C' => 'compile_c', - 'MZ' => 'exe_format/mz', 'PE' => 'exe_format/pe', - 'ELF' => 'exe_format/elf', 'COFF' => 'exe_format/coff', - 'Shellcode' => 'exe_format/shellcode', 'AutoExe' => 'exe_format/autoexe', - 'AOut' => 'exe_format/a_out', 'MachO' => 'exe_format/macho', - 'DEX' => 'exe_format/dex', - 'NDS' => 'exe_format/nds', 'XCoff' => 'exe_format/xcoff', - 'GameBoyRom' => 'exe_format/gb', - 'Bflt' => 'exe_format/bflt', 'Dol' => 'exe_format/dol', - 'PYC' => 'exe_format/pyc', 'JavaClass' => 'exe_format/javaclass', - 'SWF' => 'exe_format/swf', 'ZIP' => 'exe_format/zip', - 'Shellcode_RWX' => 'exe_format/shellcode_rwx', - 'Gui' => 'gui', - 'WindowsExports' => 'os/windows_exports', - 'GNUExports' => 'os/gnu_exports', - 'Debugger' => 'debug', - 'LinOS' => 'os/linux', 'WinOS' => 'os/windows', - 'GdbClient' => 'os/remote', - 'Disassembler' => 'disassemble', - 'Decompiler' => 'decompile', - 'DynLdr' => 'dynldr', - } + # files to require to get the definition of those constants + Const_autorequire = { + 'Ia32' => 'cpu/ia32', 'MIPS' => 'cpu/mips', 'PowerPC' => 'cpu/ppc', 'ARM' => 'cpu/arm', + 'X86_64' => 'cpu/x86_64', 'Sh4' => 'cpu/sh4', 'Dalvik' => 'cpu/dalvik', 'ARC' => 'cpu/arc', + 'Python' => 'cpu/python', 'Z80' => 'cpu/z80', 'CY16' => 'cpu/cy16', 'BPF' => 'cpu/bpf', + 'MSP430' => 'cpu/msp430', + 'C' => 'compile_c', + 'MZ' => 'exe_format/mz', 'PE' => 'exe_format/pe', + 'ELF' => 'exe_format/elf', 'COFF' => 'exe_format/coff', + 'Shellcode' => 'exe_format/shellcode', 'AutoExe' => 'exe_format/autoexe', + 'AOut' => 'exe_format/a_out', 'MachO' => 'exe_format/macho', + 'DEX' => 'exe_format/dex', + 'NDS' => 'exe_format/nds', 'XCoff' => 'exe_format/xcoff', + 'GameBoyRom' => 'exe_format/gb', + 'Bflt' => 'exe_format/bflt', 'Dol' => 'exe_format/dol', + 'PYC' => 'exe_format/pyc', 'JavaClass' => 'exe_format/javaclass', + 'SWF' => 'exe_format/swf', 'ZIP' => 'exe_format/zip', + 'Shellcode_RWX' => 'exe_format/shellcode_rwx', + 'Gui' => 'gui', + 'WindowsExports' => 'os/windows_exports', + 'GNUExports' => 'os/gnu_exports', + 'Debugger' => 'debug', + 'LinOS' => 'os/linux', 'WinOS' => 'os/windows', + 'GdbClient' => 'os/remote', + 'Disassembler' => 'disassemble', + 'Decompiler' => 'decompile', + 'DynLdr' => 'dynldr', + } - # use the Module.autoload ruby functionnality to load framework components on demand - Const_autorequire.each { |cst, file| - autoload cst, File.join('metasm', file) - } + # use the Module.autoload ruby functionnality to load framework components on demand + Const_autorequire.each { |cst, file| + autoload cst, File.join('metasm', file) + } - Const_autorequire_equiv.each { |cst, eqv| - file = Const_autorequire[eqv] - autoload cst, File.join('metasm', file) - } + Const_autorequire_equiv.each { |cst, eqv| + file = Const_autorequire[eqv] + autoload cst, File.join('metasm', file) + } end # load Metasm core files %w[main encode decode render exe_format/main os/main].each { |f| - require File.join('metasm', f) + require File.join('metasm', f) } # remove an 1.9 warning, couldn't find a compatible way... if Hash.new.respond_to?(:key) - puts "using ruby1.9 workaround for Hash#index warning" if $DEBUG - class Hash - alias index_premetasm index rescue nil - undef index rescue nil - alias index key - end + puts "using ruby1.9 workaround for Hash#index warning" if $DEBUG + class Hash + alias index_premetasm index rescue nil + undef index rescue nil + alias index key + end end diff --git a/lib/metasm/metasm/compile_c.rb b/lib/metasm/metasm/compile_c.rb index 63a1b8b86d..eb430deb7d 100644 --- a/lib/metasm/metasm/compile_c.rb +++ b/lib/metasm/metasm/compile_c.rb @@ -9,1455 +9,1455 @@ require 'metasm/parse_c' module Metasm module C - class Parser - def precompile - @toplevel.precompile(Compiler.new(self, @program)) - self - end - end - - # each CPU defines a subclass of this one - class Compiler - # an ExeFormat (mostly used for unique label creation, and cpu.check_reserved_name) - attr_accessor :exeformat - # the C Parser (destroyed by compilation) - attr_accessor :parser - # an array of assembler statements (strings) - attr_accessor :source - # list of unique labels generated (to recognize user-defined ones) - attr_accessor :auto_label_list - # map asm name -> original C name (for exports etc) - attr_accessor :label_oldname - - attr_accessor :curexpr - # allows 'raise self' (eg struct.offsetof) - def exception(msg='EOF unexpected') - ParseError.new "near #@curexpr: #{msg}" - end - - # creates a new CCompiler from an ExeFormat and a C Parser - def initialize(parser, exeformat=nil, source=[]) - exeformat ||= ExeFormat.new - @parser, @exeformat, @source = parser, exeformat, source - @auto_label_list = {} - @label_oldname = {} - end - - def new_label(base='') - lbl = @exeformat.new_label base - @auto_label_list[lbl] = true - lbl - end - - def toplevel ; @parser.toplevel end - def typesize ; @parser.typesize end - def sizeof(*a) @parser.sizeof(*a) end - - # compiles the c parser toplevel to assembler statements in self.source (::Array of ::String) - # - # starts by precompiling parser.toplevel (destructively): - # static symbols are converted to toplevel ones, as nested functions - # uses an ExeFormat (the argument) to create unique label/variable names - # - # remove typedefs/enums - # CExpressions: all expr types are converted to __int8/__int16/__int32/__int64 (sign kept) (incl. ptr), + void - # struct member dereference/array indexes are converted to *(ptr + off) - # coma are converted to 2 statements, ?: are converted to If - # :|| and :&& are converted to If + assignment to temporary - # immediate quotedstrings/floats are converted to references to const static toplevel - # postincrements are replaced by a temporary (XXX arglist) - # compound statements are unnested - # Asm are kept (TODO precompile clobber types) - # Declarations: initializers are converted to separate assignment CExpressions - # Blocks are kept unless empty - # structure dereferences/array indexing are converted to *(ptr + offset) - # While/For/DoWhile/Switch are converted to If/Goto - # Continue/Break are converted to Goto - # Cases are converted to Labels during Switch conversion - # Label statements are removed - # Return: 'return ;' => 'return ; goto ;', 'return;' => 'goto ;' - # If: 'if (a) b; else c;' => 'if (a) goto l1; { c; }; goto l2; l1: { b; } l2:' - # && and || in condition are expanded to multiple If - # functions returning struct are precompiled (in Declaration/CExpression/Return) - # - # in a second phase, unused labels are removed from functions, as noop goto (goto x; x:) - # dead code is removed ('goto foo; bar; baz:' => 'goto foo; baz:') (TODO) - # - # after that, toplevel is no longer valid C (bad types, blocks moved...) - # - # then toplevel statements are sorted (.text, .data, .rodata, .bss) and compiled into asm statements in self.source - # - # returns the asm source in a single string - def compile - cf = @exeformat.unique_labels_cache.keys & @auto_label_list.keys - raise "compile_c name conflict: #{cf.inspect}" if not cf.empty? - @exeformat.unique_labels_cache.update @auto_label_list - - @parser.toplevel.precompile(self) - - # reorder statements (arrays of Variables) following exe section typical order - funcs, rwdata, rodata, udata = [], [], [], [] - @parser.toplevel.statements.each { |st| - if st.kind_of? Asm - @source << st.body - next - end - raise 'non-declaration at toplevel! ' + st.inspect if not st.kind_of? Declaration - v = st.var - if v.type.kind_of? Function - funcs << v if v.initializer # no initializer == storage :extern - elsif v.storage == :extern - elsif v.initializer - if v.type.qualifier.to_a.include?(:const) or - (v.type.kind_of? Array and v.type.type.qualifier.to_a.include?(:const)) - rodata << v - else - rwdata << v - end - else - udata << v - end - } - - if not funcs.empty? - @exeformat.compile_setsection @source, '.text' - funcs.each { |func| c_function(func) } - c_program_epilog - end - - align = 1 - if not rwdata.empty? - @exeformat.compile_setsection @source, '.data' - rwdata.each { |data| align = c_idata(data, align) } - end - - if not rodata.empty? - @exeformat.compile_setsection @source, '.rodata' - rodata.each { |data| align = c_idata(data, align) } - end - - if not udata.empty? - @exeformat.compile_setsection @source, '.bss' - udata.each { |data| align = c_udata(data, align) } - end - - # needed to allow asm parser to use our autogenerated label names - @exeformat.unique_labels_cache.delete_if { |k, v| @auto_label_list[k] } - - @source.join("\n") - end - - # compiles a C function +func+ to asm source into the array of strings +str+ - # in a first pass the stack variable offsets are computed, - # then each statement is compiled in turn - def c_function(func) - # must wait the Declaration to run the CExpr for dynamic auto offsets, - # and must run those statements once only - # TODO alloc a stack variable to maintain the size for each dynamic array - # TODO offset of arguments - # TODO nested function - c_init_state(func) - - # hide the full @source while compiling, then add prolog/epilog (saves 1 pass) - @source << '' - @source << "#{@label_oldname[func.name]}:" if @label_oldname[func.name] - @source << "#{func.name}:" - presource, @source = @source, [] - - c_block(func.initializer) - - tmpsource, @source = @source, presource - c_prolog - @source.concat tmpsource - c_epilog - @source << '' - end - - def c_block(blk) - c_block_enter(blk) - blk.statements.each { |stmt| - case stmt - when CExpression; c_cexpr(stmt) - when Declaration; c_decl(stmt.var) - when If; c_ifgoto(stmt.test, stmt.bthen.target) - when Goto; c_goto(stmt.target) - when Label; c_label(stmt.name) - when Return; c_return(stmt.value) - when Asm; c_asm(stmt) - when Block; c_block(stmt) - else raise - end - } - c_block_exit(blk) - end - - def c_block_enter(blk) - end - - def c_block_exit(blk) - end - - def c_label(name) - @source << "#{name}:" - end - - # fills @state.offset (empty hash) - # automatic variable => stack offset, (recursive) - # offset is an ::Integer or a CExpression (dynamic array) - # assumes offset 0 is a ptr-size-aligned address - # TODO registerize automatic variables - def c_reserve_stack(block, off = 0) - block.statements.each { |stmt| - case stmt - when Declaration - next if stmt.var.type.kind_of? Function - off = c_reserve_stack_var(stmt.var, off) - @state.offset[stmt.var] = off - when Block - c_reserve_stack(stmt, off) - # do not update off, not nested subblocks can overlap - end - } - end - - # computes the new stack offset for var - # off is either an offset from stack start (:ptr-size-aligned) or - # a CExpression [[[expr, +, 7], &, -7], +, off] - def c_reserve_stack_var(var, off) - if (arr_type = var.type).kind_of? Array and (arr_sz = arr_type.length).kind_of? CExpression - # dynamic array ! - arr_sz = CExpression.new(arr_sz, :*, sizeof(nil, arr_type.type), - BaseType.new(:long, :unsigned)).precompile_inner(@parser, nil) - off = CExpression.new(arr_sz, :+, off, arr_sz.type) - off = CExpression.new(off, :+, 7, off.type) - off = CExpression.new(off, :&, -7, off.type) - CExpression.new(off, :+, 0, off.type) - else - al = var.type.align(@parser) - sz = sizeof(var) - case off - when CExpression; CExpression.new(off.lexpr, :+, ((off.rexpr + sz + al - 1) / al * al), off.type) - else (off + sz + al - 1) / al * al - end - end - end - - # here you can add thing like stubs for PIC code - def c_program_epilog - end - - # compiles a C static data definition into an asm string - # returns the new alignment value - def c_idata(data, align) - w = data.type.align(@parser) - @source << ".align #{align = w}" if w > align - - @source << "#{@label_oldname[data.name]}:" if @label_oldname[data.name] - @source << data.name.dup - len = c_idata_inner(data.type, data.initializer) - len %= w - len == 0 ? w : len - end - - # dumps an anonymous variable definition, appending to the last line of source - # source.last is a label name or is empty before calling here - # return the length of the data written - def c_idata_inner(type, value) - case type - when BaseType - value ||= 0 - - if type.name == :void - @source.last << ':' if not @source.last.empty? - return 0 - end - - @source.last << - case type.name - when :__int8; ' db ' - when :__int16; ' dw ' - when :__int32; ' dd ' - when :__int64; ' dq ' - when :ptr; " d#{%w[x b w x d x x x q][@parser.typesize[type.name]]} " - when :float; ' db ' + [value].pack(@parser.endianness == :little ? 'e' : 'g').unpack('C*').join(', ') + ' // ' - when :double; ' db ' + [value].pack(@parser.endianness == :little ? 'E' : 'G').unpack('C*').join(', ') + ' // ' - when :longdouble; ' db ' + [value].pack(@parser.endianness == :little ? 'E' : 'G').unpack('C*').join(', ') + ' // ' # XXX same as :double - else raise "unknown idata type #{type.inspect} #{value.inspect}" - end - - @source.last << c_idata_inner_cexpr(value) - - @parser.typesize[type.name] - - when Struct - value ||= [] - @source.last << ':' if not @source.last.empty? - # could .align here, but if there is our label name just before, it should have been .aligned too.. - raise "unknown struct initializer #{value.inspect}" if not value.kind_of? ::Array - sz = 0 - type.members.zip(value).each { |m, v| - if m.name and wsz = type.offsetof(@parser, m.name) and sz < wsz - @source << "db #{wsz-sz} dup(?)" - end - @source << '' - flen = c_idata_inner(m.type, v) - sz += flen - } - - sz - - when Union - value ||= [] - @source.last << ':' if not @source.last.empty? - len = sizeof(nil, type) - raise "unknown union initializer #{value.inspect}" if not value.kind_of? ::Array - idx = value.rindex(value.compact.last) || 0 - raise "empty union initializer" if not idx - wlen = c_idata_inner(type.members[idx].type, value[idx]) - @source << "db #{'0' * (len - wlen) * ', '}" if wlen < len - - len - - when Array - value ||= [] - if value.kind_of? CExpression and not value.op and value.rexpr.kind_of? ::String - elen = sizeof(nil, value.type.type) - @source.last << - case elen - when 1; ' db ' - when 2; ' dw ' - else raise 'bad char* type ' + value.inspect - end << value.rexpr.inspect - - len = type.length || (value.rexpr.length+1) - if len > value.rexpr.length - @source.last << (', 0' * (len - value.rexpr.length)) - end - - elen * len - - elsif value.kind_of? ::Array - @source.last << ':' if not @source.last.empty? - len = type.length || value.length - value.each { |v| - @source << '' - c_idata_inner(type.type, v) - } - len -= value.length - if len > 0 - @source << " db #{len * sizeof(nil, type.type)} dup(0)" - end - - sizeof(nil, type.type) * len - - else raise "unknown static array initializer #{value.inspect}" - end - end - end - - def c_idata_inner_cexpr(expr) - expr = expr.reduce(@parser) if expr.kind_of? CExpression - case expr - when ::Integer; (expr >= 4096) ? ('0x%X' % expr) : expr.to_s - when ::Numeric; expr.to_s - when Variable - case expr.type - when Array; expr.name - else c_idata_inner_cexpr(expr.initializer) - end - when CExpression - if not expr.lexpr - case expr.op - when :& - case expr.rexpr - when Variable; expr.rexpr.name - else raise 'unhandled addrof in initializer ' + expr.rexpr.inspect - end - #when :* - when :+; c_idata_inner_cexpr(expr.rexpr) - when :-; ' -' << c_idata_inner_cexpr(expr.rexpr) - when nil - e = c_idata_inner_cexpr(expr.rexpr) - if expr.rexpr.kind_of? CExpression - e = '(' << e << " & 0#{'ff'*sizeof(expr)}h)" - end - e - else raise 'unhandled initializer expr ' + expr.inspect - end - else - case expr.op - when :+, :-, :*, :/, :%, :<<, :>>, :&, :|, :^ - e = '(' << c_idata_inner_cexpr(expr.lexpr) << - expr.op.to_s << c_idata_inner_cexpr(expr.rexpr) << ')' - if expr.type.integral? - # db are unsigned - e = '(' << e << " & 0#{'ff'*sizeof(expr)}h)" - end - e - #when :'.' - #when :'->' - #when :'[]' - else raise 'unhandled initializer expr ' + expr.inspect - end - end - else raise 'unhandled initializer ' + expr.inspect - end - end - - def c_udata(data, align) - @source << "#{@label_oldname[data.name]}:" if @label_oldname[data.name] - @source << "#{data.name} " - @source.last << - case data.type - when BaseType - len = @parser.typesize[data.type.name] - case data.type.name - when :__int8; 'db ?' - when :__int16; 'dw ?' - when :__int32; 'dd ?' - when :__int64; 'dq ?' - else "db #{len} dup(?)" - end - else - len = sizeof(data) - "db #{len} dup(?)" - end - len %= align - len == 0 ? align : len - end - - # return non-nil if the variable name is unsuitable to appear as is in the asm listing - # eg filter out asm instruction names - def check_reserved_name(var) - return true if @exeformat.cpu and @exeformat.cpu.check_reserved_name(var.name) - %w[db dw dd dq].include?(var.name) - end - end - - class Statement - # all Statements/Declaration must define a precompile(compiler, scope) method - # it must append itself to scope.statements - - # turns a statement into a new block - def precompile_make_block(scope) - b = Block.new scope - b.statements << self - b - end - end - - class Block - # precompile all statements, then simplifies symbols/structs types - def precompile(compiler, scope=nil) - stmts = @statements.dup - @statements.clear - stmts.each { |st| - compiler.curexpr = st - st.precompile(compiler, self) - } - - # cleanup declarations - @symbol.delete_if { |n, s| not s.kind_of? Variable } - @struct.delete_if { |n, s| not s.kind_of? Union } - @symbol.each_value { |var| - CExpression.precompile_type(compiler, self, var, true) - } - @struct.each_value { |var| - next if not var.members - var.members.each { |m| - CExpression.precompile_type(compiler, self, m, true) - } - } - scope.statements << self if scope and not @statements.empty? - end - - # removes unused labels, and in-place goto (goto toto; toto:) - def precompile_optimize - list = [] - precompile_optimize_inner(list, 1) - precompile_optimize_inner(list, 2) - end - - # step 1: list used labels/unused goto - # step 2: remove unused labels - def precompile_optimize_inner(list, step) - lastgoto = nil - hadref = false - walk = lambda { |expr| - next if not expr.kind_of? CExpression - # gcc's unary && support - if not expr.op and not expr.lexpr and expr.rexpr.kind_of? Label - list << expr.rexpr.name - else - walk[expr.lexpr] - if expr.rexpr.kind_of? ::Array - expr.rexpr.each { |r| walk[r] } - else - walk[expr.rexpr] - end - end - } - @statements.dup.each { |s| - lastgoto = nil if not s.kind_of? Label - case s - when Block - s.precompile_optimize_inner(list, step) - @statements.delete s if step == 2 and s.statements.empty? - when CExpression; walk[s] if step == 1 - when Label - case step - when 1 - if lastgoto and lastgoto.target == s.name - list << lastgoto - list.delete s.name if not hadref - end - when 2; @statements.delete s if not list.include? s.name - end - when Goto, If - s.kind_of?(If) ? g = s.bthen : g = s - case step - when 1 - hadref = list.include? g.target - lastgoto = g - list << g.target - when 2 - if list.include? g - idx = @statements.index s - @statements.delete s - @statements[idx, 0] = s.test if s != g and not s.test.constant? - end - end - end - } - list - end - - # noop - def precompile_make_block(scope) self end - - def continue_label ; defined?(@continue_label) ? @continue_label : @outer.continue_label end - def continue_label=(l) @continue_label = l end - def break_label ; defined?(@break_label) ? @break_label : @outer.break_label end - def break_label=(l) @break_label = l end - def return_label ; defined?(@return_label) ? @return_label : @outer.return_label end - def return_label=(l) @return_label = l end - def nonauto_label=(l) @nonauto_label = l end - def nonauto_label ; defined?(@nonauto_label) ? @nonauto_label : @outer.nonauto_label end - def function ; defined?(@function) ? @function : @outer.function end - def function=(f) @function = f end - end - - class Declaration - def precompile(compiler, scope) - if (@var.type.kind_of? Function and @var.initializer and scope != compiler.toplevel) or @var.storage == :static or compiler.check_reserved_name(@var) - old = @var.name - ref = scope.symbol.delete old - if scope == compiler.toplevel or (@var.type.kind_of?(Function) and not @var.initializer) - if n = compiler.label_oldname.index(old) - # reuse same name as predeclarations - @var.name = n - else - newname = old - newname = compiler.new_label newname until newname != old - if not compiler.check_reserved_name(@var) - compiler.label_oldname[newname] = old - end - @var.name = newname - end - ref ||= scope.symbol[@var.name] || @var - # append only one actual declaration for all predecls (the one with init, or the last uninit) - scope.statements << self if ref.eql?(@var) - else - @var.name = compiler.new_label @var.name until @var.name != old - compiler.toplevel.statements << self - end - compiler.toplevel.symbol[@var.name] = ref - else - scope.symbol[@var.name] ||= @var - appendme = true if scope.symbol[@var.name].eql?(@var) - end - - if i = @var.initializer - if @var.type.kind_of? Function - if @var.type.type.kind_of? Union - s = @var.type.type - v = Variable.new - v.name = compiler.new_label('return_struct_ptr') - v.type = Pointer.new(s) - CExpression.precompile_type(compiler, scope, v) - @var.type.args.unshift v - @var.type.type = v.type - end - i.function = @var - i.return_label = compiler.new_label('epilog') - i.nonauto_label = {} - i.precompile(compiler) - Label.new(i.return_label).precompile(compiler, i) - i.precompile_optimize - # append now so that static dependencies are declared before us - # TODO no pure inline if addrof(func) needed - scope.statements << self if appendme and not @var.attributes.to_a.include? 'inline' - elsif scope != compiler.toplevel and @var.storage != :static - scope.statements << self if appendme - Declaration.precompile_dyn_initializer(compiler, scope, @var, @var.type, i) - @var.initializer = nil - else - scope.statements << self if appendme - @var.initializer = Declaration.precompile_static_initializer(compiler, @var.type, i) - end - else - scope.statements << self if appendme - end - end - - # turns an initializer to CExpressions in scope.statements - def self.precompile_dyn_initializer(compiler, scope, var, type, init) - case type = type.untypedef - when Array - # XXX TODO type.length may be dynamic !! - case init - when CExpression - # char toto[] = "42" - if not init.kind_of? CExpression or init.op or init.lexpr or not init.rexpr.kind_of? ::String - raise "unknown initializer #{init.inspect} for #{var.inspect}" - end - init = init.rexpr.unpack('C*') + [0] - init.map! { |chr| CExpression.new(nil, nil, chr, type.type) } - precompile_dyn_initializer(compiler, scope, var, type, init) - - when ::Array - type.length ||= init.length - # len is an Integer - init.each_with_index { |it, idx| - next if not it - break if idx >= type.length - idx = CExpression.new(nil, nil, idx, BaseType.new(:long, :unsigned)) - v = CExpression.new(var, :'[]', idx, type.type) - precompile_dyn_initializer(compiler, scope, v, type.type, it) - } - else raise "unknown initializer #{init.inspect} for #{var.inspect}" - end - when Union - case init - when CExpression, Variable - if init.type.untypedef.kind_of? BaseType - # works for struct foo bar[] = {0}; ... - type.members.each { |m| - v = CExpression.new(var, :'.', m.name, m.type) - precompile_dyn_initializer(compiler, scope, v, v.type, init) - } - elsif init.type.untypedef.kind_of? type.class - CExpression.new(var, :'=', init, type).precompile(compiler, scope) - else - raise "bad initializer #{init.inspect} for #{var.inspect}" - end - when ::Array - init.each_with_index{ |it, idx| - next if not it - m = type.members[idx] - v = CExpression.new(var, :'.', m.name, m.type) - precompile_dyn_initializer(compiler, scope, v, m.type, it) - } - else raise "unknown initializer #{init.inspect} for #{var.inspect}" - end - else - case init - when CExpression - CExpression.new(var, :'=', init, type).precompile(compiler, scope) - else raise "unknown initializer #{init.inspect} for #{var.inspect}" - end - end - end - - # returns a precompiled static initializer (eg string constants) - def self.precompile_static_initializer(compiler, type, init) - # TODO - case type = type.untypedef - when Array - if init.kind_of? ::Array - init.map { |i| precompile_static_initializer(compiler, type.type, i) } - else - init - end - when Union - if init.kind_of? ::Array - init.zip(type.members).map { |i, m| precompile_static_initializer(compiler, m.type, i) } - else - init - end - else - if init.kind_of? CExpression and init = init.reduce(compiler) and init.kind_of? CExpression - if not init.op and init.rexpr.kind_of? ::String - v = Variable.new - v.storage = :static - v.name = 'char_' + init.rexpr.gsub(/[^a-zA-Z]/, '')[0, 8] - v.type = Array.new(type.type) - v.type.length = init.rexpr.length + 1 - v.type.type.qualifier = [:const] - v.initializer = CExpression.new(nil, nil, init.rexpr, type) - Declaration.new(v).precompile(compiler, compiler.toplevel) - init.rexpr = v - end - init.rexpr = precompile_static_initializer(compiler, init.rexpr.type, init.rexpr) if init.rexpr.kind_of? CExpression - init.lexpr = precompile_static_initializer(compiler, init.lexpr.type, init.lexpr) if init.lexpr.kind_of? CExpression - end - init - end - end - end - - class If - def precompile(compiler, scope) - expr = lambda { |e| e.kind_of?(CExpression) ? e : CExpression.new(nil, nil, e, e.type) } - - if @bthen.kind_of? Goto or @bthen.kind_of? Break or @bthen.kind_of? Continue - # if () goto l; else b; => if () goto l; b; - if belse - t1 = @belse - @belse = nil - end - - # need to convert user-defined Goto target ! - @bthen.precompile(compiler, scope) - @bthen = scope.statements.pop # break => goto break_label - elsif belse - # if () a; else b; => if () goto then; b; goto end; then: a; end: - t1 = @belse - t2 = @bthen - l2 = compiler.new_label('if_then') - @bthen = Goto.new(l2) - @belse = nil - l3 = compiler.new_label('if_end') - else - # if () a; => if (!) goto end; a; end: - t1 = @bthen - l2 = compiler.new_label('if_end') - @bthen = Goto.new(l2) - @test = CExpression.negate(@test) - end - - @test = expr[@test] - case @test.op - when :'&&' - # if (c1 && c2) goto a; => if (!c1) goto b; if (c2) goto a; b: - l1 = compiler.new_label('if_nand') - If.new(CExpression.negate(@test.lexpr), Goto.new(l1)).precompile(compiler, scope) - @test = expr[@test.rexpr] - precompile(compiler, scope) - when :'||' - l1 = compiler.new_label('if_or') - If.new(expr[@test.lexpr], Goto.new(@bthen.target)).precompile(compiler, scope) - @test = expr[@test.rexpr] - precompile(compiler, scope) - else - @test = CExpression.precompile_inner(compiler, scope, @test) - t = @test.reduce(compiler) - if t.kind_of? ::Integer - if t == 0 - Label.new(l1, nil).precompile(compiler, scope) if l1 - t1.precompile(compiler, scope) if t1 - Label.new(l2, nil).precompile(compiler, scope) if l2 - Label.new(l3, nil).precompile(compiler, scope) if l3 - else - scope.statements << @bthen - Label.new(l1, nil).precompile(compiler, scope) if l1 - Label.new(l2, nil).precompile(compiler, scope) if l2 - t2.precompile(compiler, scope) if t2 - Label.new(l3, nil).precompile(compiler, scope) if l3 - end - return - end - scope.statements << self - end - - Label.new(l1, nil).precompile(compiler, scope) if l1 - t1.precompile(compiler, scope) if t1 - Goto.new(l3).precompile(compiler, scope) if l3 - Label.new(l2, nil).precompile(compiler, scope) if l2 - t2.precompile(compiler, scope) if t2 - Label.new(l3, nil).precompile(compiler, scope) if l3 - end - end - - class For - def precompile(compiler, scope) - if init - @init.precompile(compiler, scope) - scope = @init if @init.kind_of? Block - end - - @body = @body.precompile_make_block scope - @body.continue_label = compiler.new_label 'for_continue' - @body.break_label = compiler.new_label 'for_break' - label_test = compiler.new_label 'for_test' - - Label.new(label_test).precompile(compiler, scope) - if test - If.new(CExpression.negate(@test), Goto.new(@body.break_label)).precompile(compiler, scope) - end - - @body.precompile(compiler, scope) - - Label.new(@body.continue_label).precompile(compiler, scope) - if iter - @iter.precompile(compiler, scope) - end - - Goto.new(label_test).precompile(compiler, scope) - Label.new(@body.break_label).precompile(compiler, scope) - end - end - - class While - def precompile(compiler, scope) - @body = @body.precompile_make_block scope - @body.continue_label = compiler.new_label('while_continue') - @body.break_label = compiler.new_label('while_break') - - Label.new(@body.continue_label).precompile(compiler, scope) - - If.new(CExpression.negate(@test), Goto.new(@body.break_label)).precompile(compiler, scope) - - @body.precompile(compiler, scope) - - Goto.new(@body.continue_label).precompile(compiler, scope) - Label.new(@body.break_label).precompile(compiler, scope) - end - end - - class DoWhile - def precompile(compiler, scope) - @body = @body.precompile_make_block scope - @body.continue_label = compiler.new_label('dowhile_continue') - @body.break_label = compiler.new_label('dowhile_break') - loop_start = compiler.new_label('dowhile_start') - - Label.new(loop_start).precompile(compiler, scope) - - @body.precompile(compiler, scope) - - Label.new(@body.continue_label).precompile(compiler, scope) - - If.new(@test, Goto.new(loop_start)).precompile(compiler, scope) - - Label.new(@body.break_label).precompile(compiler, scope) - end - end - - class Switch - def precompile(compiler, scope) - var = Variable.new - var.storage = :register - var.name = compiler.new_label('switch') - var.type = @test.type - var.initializer = @test - CExpression.precompile_type(compiler, scope, var) - Declaration.new(var).precompile(compiler, scope) - - @body = @body.precompile_make_block scope - @body.break_label = compiler.new_label('switch_break') - @body.precompile(compiler) - default = @body.break_label - # recursive lambda to change Case to Labels - # dynamically creates the If sequence - walk = lambda { |blk| - blk.statements.each_with_index { |s, i| - case s - when Case - label = compiler.new_label('case') - if s.expr == 'default' - default = label - elsif s.exprup - If.new(CExpression.new(CExpression.new(var, :'>=', s.expr, BaseType.new(:int)), :'&&', - CExpression.new(var, :'<=', s.exprup, BaseType.new(:int)), - BaseType.new(:int)), Goto.new(label)).precompile(compiler, scope) - else - If.new(CExpression.new(var, :'==', s.expr, BaseType.new(:int)), - Goto.new(label)).precompile(compiler, scope) - end - blk.statements[i] = Label.new(label) - when Block - walk[s] - end - } - } - walk[@body] - Goto.new(default).precompile(compiler, scope) - scope.statements << @body - Label.new(@body.break_label).precompile(compiler, scope) - end - end - - class Continue - def precompile(compiler, scope) - Goto.new(scope.continue_label).precompile(compiler, scope) - end - end - - class Break - def precompile(compiler, scope) - Goto.new(scope.break_label).precompile(compiler, scope) - end - end - - class Return - def precompile(compiler, scope) - if @value - @value = CExpression.new(nil, nil, @value, @value.type) if not @value.kind_of? CExpression - if @value.type.untypedef.kind_of? Union - @value = @value.precompile_inner(compiler, scope) - func = scope.function.type - CExpression.new(CExpression.new(nil, :*, func.args.first, @value.type), :'=', @value, @value.type).precompile(compiler, scope) - @value = func.args.first - else - # cast to function return type - @value = CExpression.new(nil, nil, @value, scope.function.type.type).precompile_inner(compiler, scope) - end - scope.statements << self - end - Goto.new(scope.return_label).precompile(compiler, scope) - end - end - - class Label - def precompile(compiler, scope) - if name and (not compiler.auto_label_list[@name]) - @name = scope.nonauto_label[@name] ||= compiler.new_label(@name) - end - scope.statements << self - if statement - @statement.precompile(compiler, scope) - @statement = nil - end - end - end - - class Case - def precompile(compiler, scope) - @expr = CExpression.precompile_inner(compiler, scope, @expr) - @exprup = CExpression.precompile_inner(compiler, scope, @exprup) if exprup - super(compiler, scope) - end - end - - class Goto - def precompile(compiler, scope) - if not compiler.auto_label_list[@target] - @target = scope.nonauto_label[@target] ||= compiler.new_label(@target) - end - scope.statements << self - end - end - - class Asm - def precompile(compiler, scope) - scope.statements << self - # TODO CExpr.precompile_type(clobbers) - end - end - - class CExpression - def precompile(compiler, scope) - i = precompile_inner(compiler, scope, false) - scope.statements << i if i - end - - # changes obj.type to a precompiled type - # keeps struct/union, change everything else to __int\d - # except Arrays if declaration is true (need to know variable allocation sizes etc) - # returns the type - def self.precompile_type(compiler, scope, obj, declaration = false) - case t = obj.type.untypedef - when BaseType - case t.name - when :void - when :float, :double, :longdouble - else t = BaseType.new("__int#{compiler.typesize[t.name]*8}".to_sym, t.specifier) - end - when Array - if declaration; precompile_type(compiler, scope, t, declaration) - else t = BaseType.new("__int#{compiler.typesize[:ptr]*8}".to_sym, :unsigned) - end - when Pointer - if t.type.untypedef.kind_of? Function - precompile_type(compiler, scope, t, declaration) - else - t = BaseType.new("__int#{compiler.typesize[:ptr]*8}".to_sym, :unsigned) - end - when Enum; t = BaseType.new("__int#{compiler.typesize[:int]*8}".to_sym) - when Function - precompile_type(compiler, scope, t) - t.args ||= [] - t.args.each { |a| precompile_type(compiler, scope, a) } - when Union - if declaration and t.members and not t.name # anonymous struct - t.members.each { |a| precompile_type(compiler, scope, a, true) } - end - else raise 'bad type ' + t.inspect - end - (t.qualifier ||= []).concat obj.type.qualifier if obj.type.qualifier and t != obj.type - (t.attributes ||= []).concat obj.type.attributes if obj.type.attributes and t != obj.type - while obj.type.kind_of? TypeDef - obj.type = obj.type.type - (t.qualifier ||= []).concat obj.type.qualifier if obj.type.qualifier and t != obj.type - (t.attributes ||= []).concat obj.type.attributes if obj.type.attributes and t != obj.type - end - obj.type = t - end - - def self.precompile_inner(compiler, scope, expr, nested = true) - case expr - when CExpression; expr.precompile_inner(compiler, scope, nested) - else expr - end - end - - # returns a new CExpression with simplified self.type, computes structure offsets - # turns char[]/float immediates to reference to anonymised const - # TODO 'a = b += c' => 'b += c; a = b' (use nested argument) - # TODO handle precompile_inner return nil - # TODO struct.bits - def precompile_inner(compiler, scope, nested = true) - case @op - when :'.' - # a.b => (&a)->b - lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - ll = lexpr - ll = lexpr.rexpr while ll.kind_of? CExpression and not ll.op - if ll.kind_of? CExpression and ll.op == :'*' and not ll.lexpr - # do not change lexpr.rexpr.type directly to a pointer, might retrigger (ptr+imm) => (ptr + imm*sizeof(*ptr)) - @lexpr = CExpression.new(nil, nil, ll.rexpr, Pointer.new(lexpr.type)) - else - @lexpr = CExpression.new(nil, :'&', lexpr, Pointer.new(lexpr.type)) - end - @op = :'->' - precompile_inner(compiler, scope) - when :'->' - # a->b => *(a + off(b)) - struct = @lexpr.type.untypedef.type.untypedef - lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - @lexpr = nil - @op = nil - if struct.kind_of? Union and (off = struct.offsetof(compiler, @rexpr)) != 0 - off = CExpression.new(nil, nil, off, BaseType.new(:int, :unsigned)) - @rexpr = CExpression.new(lexpr, :'+', off, lexpr.type) - # ensure the (ptr + value) is not expanded to (ptr + value * sizeof(*ptr)) - CExpression.precompile_type(compiler, scope, @rexpr) - else - # union or 1st struct member - @rexpr = lexpr - end - if @type.kind_of? Array # Array member type is already an address - else - @rexpr = CExpression.new(nil, :*, @rexpr, @rexpr.type) - end - precompile_inner(compiler, scope) - when :'[]' - rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - if rexpr.kind_of? CExpression and not rexpr.op and rexpr.rexpr == 0 - @rexpr = @lexpr - else - @rexpr = CExpression.new(@lexpr, :'+', rexpr, @lexpr.type) - end - @op = :'*' - @lexpr = nil - precompile_inner(compiler, scope) - when :'?:' - # cannot precompile in place, a conditionnal expression may have a coma: must turn into If - if @lexpr.kind_of? CExpression - @lexpr = @lexpr.precompile_inner(compiler, scope) - if not @lexpr.lexpr and not @lexpr.op and @lexpr.rexpr.kind_of? ::Numeric - if @lexpr.rexpr == 0 - e = @rexpr[1] - else - e = @rexpr[0] - end - e = CExpression.new(nil, nil, e, e.type) if not e.kind_of? CExpression - return e.precompile_inner(compiler, scope) - end - end - raise 'conditional in toplevel' if scope == compiler.toplevel # just in case - var = Variable.new - var.storage = :register - var.name = compiler.new_label('ternary') - var.type = @rexpr[0].type - CExpression.precompile_type(compiler, scope, var) - Declaration.new(var).precompile(compiler, scope) - If.new(@lexpr, CExpression.new(var, :'=', @rexpr[0], var.type), CExpression.new(var, :'=', @rexpr[1], var.type)).precompile(compiler, scope) - @lexpr = nil - @op = nil - @rexpr = var - precompile_inner(compiler, scope) - when :'&&' - if scope == compiler.toplevel - @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - CExpression.precompile_type(compiler, scope, self) - self - else - var = Variable.new - var.storage = :register - var.name = compiler.new_label('and') - var.type = @type - CExpression.precompile_type(compiler, scope, var) - var.initializer = CExpression.new(nil, nil, 0, var.type) - Declaration.new(var).precompile(compiler, scope) - l = @lexpr.kind_of?(CExpression) ? @lexpr : CExpression.new(nil, nil, @lexpr, @lexpr.type) - r = @rexpr.kind_of?(CExpression) ? @rexpr : CExpression.new(nil, nil, @rexpr, @rexpr.type) - If.new(l, If.new(r, CExpression.new(var, :'=', CExpression.new(nil, nil, 1, var.type), var.type))).precompile(compiler, scope) - @lexpr = nil - @op = nil - @rexpr = var - precompile_inner(compiler, scope) - end - when :'||' - if scope == compiler.toplevel - @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - CExpression.precompile_type(compiler, scope, self) - self - else - var = Variable.new - var.storage = :register - var.name = compiler.new_label('or') - var.type = @type - CExpression.precompile_type(compiler, scope, var) - var.initializer = CExpression.new(nil, nil, 1, var.type) - Declaration.new(var).precompile(compiler, scope) - l = @lexpr.kind_of?(CExpression) ? @lexpr : CExpression.new(nil, nil, @lexpr, @lexpr.type) - l = CExpression.new(nil, :'!', l, var.type) - r = @rexpr.kind_of?(CExpression) ? @rexpr : CExpression.new(nil, nil, @rexpr, @rexpr.type) - r = CExpression.new(nil, :'!', r, var.type) - If.new(l, If.new(r, CExpression.new(var, :'=', CExpression.new(nil, nil, 0, var.type), var.type))).precompile(compiler, scope) - @lexpr = nil - @op = nil - @rexpr = var - precompile_inner(compiler, scope) - end - when :funcall - if @lexpr.kind_of? Variable and @lexpr.type.kind_of? Function and @lexpr.attributes and @lexpr.attributes.include? 'inline' and @lexpr.initializer - # TODO check recursive call (direct or indirect) - raise 'inline varargs unsupported' if @lexpr.type.varargs - rtype = @lexpr.type.type.untypedef - if not rtype.kind_of? BaseType or rtype.name != :void - rval = Variable.new - rval.name = compiler.new_label('inline_return') - rval.type = @lexpr.type.type - Declaration.new(rval).precompile(compiler, scope) - end - inline_label = {} - locals = @lexpr.type.args.zip(@rexpr).inject({}) { |h, (fa, a)| - h.update fa => CExpression.new(nil, nil, a, fa.type).precompile_inner(compiler, scope) - } - copy_inline_ce = lambda { |ce| - case ce - when CExpression; CExpression.new(copy_inline_ce[ce.lexpr], ce.op, copy_inline_ce[ce.rexpr], ce.type) - when Variable; locals[ce] || ce - when ::Array; ce.map { |e_| copy_inline_ce[e_] } - else ce - end - } - copy_inline = lambda { |stmt, scp| - case stmt - when Block - b = Block.new(scp) - stmt.statements.each { |s| - s = copy_inline[s, b] - b.statements << s if s - } - b - when If; If.new(copy_inline_ce[stmt.test], copy_inline[stmt.bthen, scp]) # re-precompile ? - when Label; Label.new(inline_label[stmt.name] ||= compiler.new_label('inline_'+stmt.name)) - when Goto; Goto.new(inline_label[stmt.target] ||= compiler.new_label('inline_'+stmt.target)) - when Return; CExpression.new(rval, :'=', copy_inline_ce[stmt.value], rval.type).precompile_inner(compiler, scp) if stmt.value - when CExpression; copy_inline_ce[stmt] - when Declaration - nv = stmt.var.dup - if nv.type.kind_of? Array and nv.type.length.kind_of? CExpression - nv.type = Array.new(nv.type.type, copy_inline_ce[nv.type.length]) # XXX nested dynamic? - end - locals[stmt.var] = nv - scp.symbol[nv.name] = nv - Declaration.new(nv) - else raise 'unexpected inline statement ' + stmt.inspect - end - } - scope.statements << copy_inline[@lexpr.initializer, scope] # body already precompiled - CExpression.new(nil, nil, rval, rval.type).precompile_inner(compiler, scope) - elsif @type.kind_of? Union - var = Variable.new - var.name = compiler.new_label('return_struct') - var.type = @type - Declaration.new(var).precompile(compiler, scope) - @rexpr.unshift CExpression.new(nil, :&, var, Pointer.new(var.type)) - - var2 = Variable.new - var2.name = compiler.new_label('return_struct_ptr') - var2.type = Pointer.new(@type) - var2.storage = :register - CExpression.precompile_type(compiler, scope, var2) - Declaration.new(var2).precompile(compiler, scope) - @type = var2.type - CExpression.new(var2, :'=', self, var2.type).precompile(compiler, scope) - - CExpression.new(nil, :'*', var2, var.type).precompile_inner(compiler, scope) - else - t = @lexpr.type.untypedef - t = t.type.untypedef if t.pointer? - @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - types = t.args.map { |a| a.type } - # cast args to func prototype - @rexpr.map! { |e_| (types.empty? ? e_ : CExpression.new(nil, nil, e_, types.shift)).precompile_inner(compiler, scope) } - CExpression.precompile_type(compiler, scope, self) - self - end - when :',' - lexpr = @lexpr.kind_of?(CExpression) ? @lexpr : CExpression.new(nil, nil, @lexpr, @lexpr.type) - rexpr = @rexpr.kind_of?(CExpression) ? @rexpr : CExpression.new(nil, nil, @rexpr, @rexpr.type) - lexpr.precompile(compiler, scope) - rexpr.precompile_inner(compiler, scope) - when :'!' - CExpression.precompile_type(compiler, scope, self) - if @rexpr.kind_of?(CExpression) - case @rexpr.op - when :'<', :'>', :'<=', :'>=', :'==', :'!=' - @op = { :'<' => :'>=', :'>' => :'<=', :'<=' => :'>', :'>=' => :'<', - :'==' => :'!=', :'!=' => :'==' }[@rexpr.op] - @lexpr = @rexpr.lexpr - @rexpr = @rexpr.rexpr - precompile_inner(compiler, scope) - when :'&&', :'||' - @op = { :'&&' => :'||', :'||' => :'&&' }[@rexpr.op] - @lexpr = CExpression.new(nil, :'!', @rexpr.lexpr, @type) - @rexpr = CExpression.new(nil, :'!', @rexpr.rexpr, @type) - precompile_inner(compiler, scope) - when :'!' - if @rexpr.rexpr.kind_of? CExpression - @op = nil - @rexpr = @rexpr.rexpr - else - @op = :'!=' - @lexpr = @rexpr.rexpr - @rexpr = CExpression.new(nil, nil, 0, @lexpr.type) - end - precompile_inner(compiler, scope) - else - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - self - end - else - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - self - end - when :'++', :'--' - if not @rexpr - var = Variable.new - var.storage = :register - var.name = compiler.new_label('postincrement') - var.type = @type - Declaration.new(var).precompile(compiler, scope) - CExpression.new(var, :'=', @lexpr, @type).precompile(compiler, scope) - CExpression.new(nil, @op, @lexpr, @type).precompile(compiler, scope) - @lexpr = nil - @op = nil - @rexpr = var - precompile_inner(compiler, scope) - elsif @type.pointer? and compiler.sizeof(nil, @type.untypedef.type.untypedef) != 1 - # ++ptr => ptr += sizeof(*ptr) (done in += precompiler) - @op = { :'++' => :'+=', :'--' => :'-=' }[@op] - @lexpr = @rexpr - @rexpr = CExpression.new(nil, nil, 1, BaseType.new(:ptr, :unsigned)) - precompile_inner(compiler, scope) - else - CExpression.precompile_type(compiler, scope, self) - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - self - end - when :'=' - # handle structure assignment/array assignment - case @lexpr.type.untypedef - when Union - # rexpr may be a :funcall - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - @lexpr.type.untypedef.members.zip(@rexpr.type.untypedef.members) { |m1, m2| - # assume m1 and m2 are compatible - v1 = CExpression.new(@lexpr, :'.', m1.name, m1.type) - v2 = CExpression.new(@rexpr, :'.', m2.name, m1.type) - CExpression.new(v1, :'=', v2, v1.type).precompile(compiler, scope) - } - # (foo = bar).toto - @op = nil - @rexpr = @lexpr - @lexpr = nil - @type = @rexpr.type - precompile_inner(compiler, scope) if nested - when Array - if not len = @lexpr.type.untypedef.length - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - # char toto[] = "bla" - if @rexpr.kind_of? CExpression and not @rexpr.lexpr and not @rexpr.op and - @rexpr.rexpr.kind_of? Variable and @rexpr.rexpr.type.kind_of? Array - len = @rexpr.rexpr.type.length - end - end - raise 'array initializer with no length !' if not len - # TODO optimize... - len.times { |i| - i = CExpression.new(nil, nil, i, BaseType.new(:long, :unsigned)) - v1 = CExpression.new(@lexpr, :'[]', i, @lexpr.type.untypedef.type) - v2 = CExpression.new(@rexpr, :'[]', i, v1.type) - CExpression.new(v1, :'=', v2, v1.type).precompile(compiler, scope) - } - @op = nil - @rexpr = @lexpr - @lexpr = nil - @type = @rexpr.type - precompile_inner(compiler, scope) if nested - else - @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - CExpression.precompile_type(compiler, scope, self) - self - end - when nil - case @rexpr - when Block - # compound statements - raise 'compound statement in toplevel' if scope == compiler.toplevel # just in case - var = Variable.new - var.storage = :register - var.name = compiler.new_label('compoundstatement') - var.type = @type - CExpression.precompile_type(compiler, scope, var) - Declaration.new(var).precompile(compiler, scope) - if @rexpr.statements.last.kind_of? CExpression - @rexpr.statements[-1] = CExpression.new(var, :'=', @rexpr.statements[-1], var.type) - @rexpr.precompile(compiler, scope) - end - @rexpr = var - precompile_inner(compiler, scope) - when ::String - # char[] immediate - v = Variable.new - v.storage = :static - v.name = 'char_' + @rexpr.tr('^a-zA-Z', '')[0, 8] - v.type = Array.new(@type.type) - v.type.length = @rexpr.length + 1 - v.type.type.qualifier = [:const] - v.initializer = CExpression.new(nil, nil, @rexpr, @type) - Declaration.new(v).precompile(compiler, scope) - @rexpr = v - precompile_inner(compiler, scope) - when ::Float - # float immediate - v = Variable.new - v.storage = :static - v.name = @type.untypedef.name.to_s - v.type = @type - v.type.qualifier = [:const] - v.initializer = CExpression.new(nil, nil, @rexpr, @type) - Declaration.new(v).precompile(compiler, scope) - @rexpr = CExpression.new(nil, :'*', v, v.type) - precompile_inner(compiler, scope) - when CExpression - # simplify casts - CExpression.precompile_type(compiler, scope, self) - # propagate type first so that __uint64 foo() { return -1 } => 0xffffffffffffffff - @rexpr.type = @type if @rexpr.kind_of? CExpression and @rexpr.op == :- and not @rexpr.lexpr and @type.kind_of? BaseType and @type.name == :__int64 # XXX kill me - @rexpr = @rexpr.precompile_inner(compiler, scope) - if @type.kind_of? BaseType and @rexpr.type.kind_of? BaseType - if @rexpr.type == @type - # noop cast - @lexpr, @op, @rexpr = @rexpr.lexpr, @rexpr.op, @rexpr.rexpr - elsif not @rexpr.op and @type.integral? and @rexpr.type.integral? - if @rexpr.rexpr.kind_of? ::Numeric and (val = reduce(compiler)).kind_of? ::Numeric - @rexpr = val - elsif compiler.typesize[@type.name] < compiler.typesize[@rexpr.type.name] - # (char)(short)(int)(long)foo => (char)foo - @rexpr = @rexpr.rexpr - end - end - end - self - else - CExpression.precompile_type(compiler, scope, self) - self - end - else - # int+ptr => ptr+int - if @op == :+ and @lexpr and @lexpr.type.integral? and @rexpr.type.pointer? - @rexpr, @lexpr = @lexpr, @rexpr - end - - # handle pointer + 2 == ((char *)pointer) + 2*sizeof(*pointer) - if @rexpr and [:'+', :'+=', :'-', :'-='].include? @op and - @type.pointer? and @rexpr.type.integral? - sz = compiler.sizeof(nil, @type.untypedef.type.untypedef) - if sz != 1 - sz = CExpression.new(nil, nil, sz, @rexpr.type) - @rexpr = CExpression.new(@rexpr, :'*', sz, @rexpr.type) - end - end - - # type promotion => cast - case @op - when :+, :-, :*, :/, :&, :|, :^, :% - if @lexpr - if @lexpr.type != @type - @lexpr = CExpression.new(nil, nil, @lexpr, @lexpr.type) if not @lexpr.kind_of? CExpression - @lexpr = CExpression.new(nil, nil, @lexpr, @type) - end - if @rexpr.type != @type - @rexpr = CExpression.new(nil, nil, @rexpr, @rexpr.type) if not @rexpr.kind_of? CExpression - @rexpr = CExpression.new(nil, nil, @rexpr, @type) - end - end - when :>>, :<< - # char => int - if @lexpr.type != @type - @lexpr = CExpression.new(nil, nil, @lexpr, @lexpr.type) if not @lexpr.kind_of? CExpression - @lexpr = CExpression.new(nil, nil, @lexpr, @type) - end - when :'+=', :'-=', :'*=', :'/=', :'&=', :'|=', :'^=', :'%=' - if @rexpr.type != @lexpr.type - @rexpr = CExpression.new(nil, nil, @rexpr, @rexpr.type) if not @rexpr.kind_of? CExpression - @rexpr = CExpression.new(nil, nil, @rexpr, @type) - end - end - - @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) - @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) - - if @op == :'&' and not @lexpr - rr = @rexpr - rr = rr.rexpr while rr.kind_of? CExpression and not rr.op - if rr.kind_of? CExpression and rr.op == :'*' and not rr.lexpr - @lexpr = nil - @op = nil - @rexpr = rr.rexpr - return precompile_inner(compiler, scope) - elsif rr != @rexpr - @rexpr = rr - return precompile_inner(compiler, scope) - end - end - - CExpression.precompile_type(compiler, scope, self) - - isnumeric = lambda { |e_| e_.kind_of?(::Numeric) or (e_.kind_of? CExpression and - not e_.lexpr and not e_.op and e_.rexpr.kind_of? ::Numeric) } - - # calc numeric - # XXX do not simplify operations involving variables (for type overflow etc) - if isnumeric[@rexpr] and (not @lexpr or isnumeric[@lexpr]) and (val = reduce(compiler)).kind_of? ::Numeric - @lexpr = nil - @op = nil - @rexpr = val - end - - self - end - end - end + class Parser + def precompile + @toplevel.precompile(Compiler.new(self, @program)) + self + end + end + + # each CPU defines a subclass of this one + class Compiler + # an ExeFormat (mostly used for unique label creation, and cpu.check_reserved_name) + attr_accessor :exeformat + # the C Parser (destroyed by compilation) + attr_accessor :parser + # an array of assembler statements (strings) + attr_accessor :source + # list of unique labels generated (to recognize user-defined ones) + attr_accessor :auto_label_list + # map asm name -> original C name (for exports etc) + attr_accessor :label_oldname + + attr_accessor :curexpr + # allows 'raise self' (eg struct.offsetof) + def exception(msg='EOF unexpected') + ParseError.new "near #@curexpr: #{msg}" + end + + # creates a new CCompiler from an ExeFormat and a C Parser + def initialize(parser, exeformat=nil, source=[]) + exeformat ||= ExeFormat.new + @parser, @exeformat, @source = parser, exeformat, source + @auto_label_list = {} + @label_oldname = {} + end + + def new_label(base='') + lbl = @exeformat.new_label base + @auto_label_list[lbl] = true + lbl + end + + def toplevel ; @parser.toplevel end + def typesize ; @parser.typesize end + def sizeof(*a) @parser.sizeof(*a) end + + # compiles the c parser toplevel to assembler statements in self.source (::Array of ::String) + # + # starts by precompiling parser.toplevel (destructively): + # static symbols are converted to toplevel ones, as nested functions + # uses an ExeFormat (the argument) to create unique label/variable names + # + # remove typedefs/enums + # CExpressions: all expr types are converted to __int8/__int16/__int32/__int64 (sign kept) (incl. ptr), + void + # struct member dereference/array indexes are converted to *(ptr + off) + # coma are converted to 2 statements, ?: are converted to If + # :|| and :&& are converted to If + assignment to temporary + # immediate quotedstrings/floats are converted to references to const static toplevel + # postincrements are replaced by a temporary (XXX arglist) + # compound statements are unnested + # Asm are kept (TODO precompile clobber types) + # Declarations: initializers are converted to separate assignment CExpressions + # Blocks are kept unless empty + # structure dereferences/array indexing are converted to *(ptr + offset) + # While/For/DoWhile/Switch are converted to If/Goto + # Continue/Break are converted to Goto + # Cases are converted to Labels during Switch conversion + # Label statements are removed + # Return: 'return ;' => 'return ; goto ;', 'return;' => 'goto ;' + # If: 'if (a) b; else c;' => 'if (a) goto l1; { c; }; goto l2; l1: { b; } l2:' + # && and || in condition are expanded to multiple If + # functions returning struct are precompiled (in Declaration/CExpression/Return) + # + # in a second phase, unused labels are removed from functions, as noop goto (goto x; x:) + # dead code is removed ('goto foo; bar; baz:' => 'goto foo; baz:') (TODO) + # + # after that, toplevel is no longer valid C (bad types, blocks moved...) + # + # then toplevel statements are sorted (.text, .data, .rodata, .bss) and compiled into asm statements in self.source + # + # returns the asm source in a single string + def compile + cf = @exeformat.unique_labels_cache.keys & @auto_label_list.keys + raise "compile_c name conflict: #{cf.inspect}" if not cf.empty? + @exeformat.unique_labels_cache.update @auto_label_list + + @parser.toplevel.precompile(self) + + # reorder statements (arrays of Variables) following exe section typical order + funcs, rwdata, rodata, udata = [], [], [], [] + @parser.toplevel.statements.each { |st| + if st.kind_of? Asm + @source << st.body + next + end + raise 'non-declaration at toplevel! ' + st.inspect if not st.kind_of? Declaration + v = st.var + if v.type.kind_of? Function + funcs << v if v.initializer # no initializer == storage :extern + elsif v.storage == :extern + elsif v.initializer + if v.type.qualifier.to_a.include?(:const) or + (v.type.kind_of? Array and v.type.type.qualifier.to_a.include?(:const)) + rodata << v + else + rwdata << v + end + else + udata << v + end + } + + if not funcs.empty? + @exeformat.compile_setsection @source, '.text' + funcs.each { |func| c_function(func) } + c_program_epilog + end + + align = 1 + if not rwdata.empty? + @exeformat.compile_setsection @source, '.data' + rwdata.each { |data| align = c_idata(data, align) } + end + + if not rodata.empty? + @exeformat.compile_setsection @source, '.rodata' + rodata.each { |data| align = c_idata(data, align) } + end + + if not udata.empty? + @exeformat.compile_setsection @source, '.bss' + udata.each { |data| align = c_udata(data, align) } + end + + # needed to allow asm parser to use our autogenerated label names + @exeformat.unique_labels_cache.delete_if { |k, v| @auto_label_list[k] } + + @source.join("\n") + end + + # compiles a C function +func+ to asm source into the array of strings +str+ + # in a first pass the stack variable offsets are computed, + # then each statement is compiled in turn + def c_function(func) + # must wait the Declaration to run the CExpr for dynamic auto offsets, + # and must run those statements once only + # TODO alloc a stack variable to maintain the size for each dynamic array + # TODO offset of arguments + # TODO nested function + c_init_state(func) + + # hide the full @source while compiling, then add prolog/epilog (saves 1 pass) + @source << '' + @source << "#{@label_oldname[func.name]}:" if @label_oldname[func.name] + @source << "#{func.name}:" + presource, @source = @source, [] + + c_block(func.initializer) + + tmpsource, @source = @source, presource + c_prolog + @source.concat tmpsource + c_epilog + @source << '' + end + + def c_block(blk) + c_block_enter(blk) + blk.statements.each { |stmt| + case stmt + when CExpression; c_cexpr(stmt) + when Declaration; c_decl(stmt.var) + when If; c_ifgoto(stmt.test, stmt.bthen.target) + when Goto; c_goto(stmt.target) + when Label; c_label(stmt.name) + when Return; c_return(stmt.value) + when Asm; c_asm(stmt) + when Block; c_block(stmt) + else raise + end + } + c_block_exit(blk) + end + + def c_block_enter(blk) + end + + def c_block_exit(blk) + end + + def c_label(name) + @source << "#{name}:" + end + + # fills @state.offset (empty hash) + # automatic variable => stack offset, (recursive) + # offset is an ::Integer or a CExpression (dynamic array) + # assumes offset 0 is a ptr-size-aligned address + # TODO registerize automatic variables + def c_reserve_stack(block, off = 0) + block.statements.each { |stmt| + case stmt + when Declaration + next if stmt.var.type.kind_of? Function + off = c_reserve_stack_var(stmt.var, off) + @state.offset[stmt.var] = off + when Block + c_reserve_stack(stmt, off) + # do not update off, not nested subblocks can overlap + end + } + end + + # computes the new stack offset for var + # off is either an offset from stack start (:ptr-size-aligned) or + # a CExpression [[[expr, +, 7], &, -7], +, off] + def c_reserve_stack_var(var, off) + if (arr_type = var.type).kind_of? Array and (arr_sz = arr_type.length).kind_of? CExpression + # dynamic array ! + arr_sz = CExpression.new(arr_sz, :*, sizeof(nil, arr_type.type), + BaseType.new(:long, :unsigned)).precompile_inner(@parser, nil) + off = CExpression.new(arr_sz, :+, off, arr_sz.type) + off = CExpression.new(off, :+, 7, off.type) + off = CExpression.new(off, :&, -7, off.type) + CExpression.new(off, :+, 0, off.type) + else + al = var.type.align(@parser) + sz = sizeof(var) + case off + when CExpression; CExpression.new(off.lexpr, :+, ((off.rexpr + sz + al - 1) / al * al), off.type) + else (off + sz + al - 1) / al * al + end + end + end + + # here you can add thing like stubs for PIC code + def c_program_epilog + end + + # compiles a C static data definition into an asm string + # returns the new alignment value + def c_idata(data, align) + w = data.type.align(@parser) + @source << ".align #{align = w}" if w > align + + @source << "#{@label_oldname[data.name]}:" if @label_oldname[data.name] + @source << data.name.dup + len = c_idata_inner(data.type, data.initializer) + len %= w + len == 0 ? w : len + end + + # dumps an anonymous variable definition, appending to the last line of source + # source.last is a label name or is empty before calling here + # return the length of the data written + def c_idata_inner(type, value) + case type + when BaseType + value ||= 0 + + if type.name == :void + @source.last << ':' if not @source.last.empty? + return 0 + end + + @source.last << + case type.name + when :__int8; ' db ' + when :__int16; ' dw ' + when :__int32; ' dd ' + when :__int64; ' dq ' + when :ptr; " d#{%w[x b w x d x x x q][@parser.typesize[type.name]]} " + when :float; ' db ' + [value].pack(@parser.endianness == :little ? 'e' : 'g').unpack('C*').join(', ') + ' // ' + when :double; ' db ' + [value].pack(@parser.endianness == :little ? 'E' : 'G').unpack('C*').join(', ') + ' // ' + when :longdouble; ' db ' + [value].pack(@parser.endianness == :little ? 'E' : 'G').unpack('C*').join(', ') + ' // ' # XXX same as :double + else raise "unknown idata type #{type.inspect} #{value.inspect}" + end + + @source.last << c_idata_inner_cexpr(value) + + @parser.typesize[type.name] + + when Struct + value ||= [] + @source.last << ':' if not @source.last.empty? + # could .align here, but if there is our label name just before, it should have been .aligned too.. + raise "unknown struct initializer #{value.inspect}" if not value.kind_of? ::Array + sz = 0 + type.members.zip(value).each { |m, v| + if m.name and wsz = type.offsetof(@parser, m.name) and sz < wsz + @source << "db #{wsz-sz} dup(?)" + end + @source << '' + flen = c_idata_inner(m.type, v) + sz += flen + } + + sz + + when Union + value ||= [] + @source.last << ':' if not @source.last.empty? + len = sizeof(nil, type) + raise "unknown union initializer #{value.inspect}" if not value.kind_of? ::Array + idx = value.rindex(value.compact.last) || 0 + raise "empty union initializer" if not idx + wlen = c_idata_inner(type.members[idx].type, value[idx]) + @source << "db #{'0' * (len - wlen) * ', '}" if wlen < len + + len + + when Array + value ||= [] + if value.kind_of? CExpression and not value.op and value.rexpr.kind_of? ::String + elen = sizeof(nil, value.type.type) + @source.last << + case elen + when 1; ' db ' + when 2; ' dw ' + else raise 'bad char* type ' + value.inspect + end << value.rexpr.inspect + + len = type.length || (value.rexpr.length+1) + if len > value.rexpr.length + @source.last << (', 0' * (len - value.rexpr.length)) + end + + elen * len + + elsif value.kind_of? ::Array + @source.last << ':' if not @source.last.empty? + len = type.length || value.length + value.each { |v| + @source << '' + c_idata_inner(type.type, v) + } + len -= value.length + if len > 0 + @source << " db #{len * sizeof(nil, type.type)} dup(0)" + end + + sizeof(nil, type.type) * len + + else raise "unknown static array initializer #{value.inspect}" + end + end + end + + def c_idata_inner_cexpr(expr) + expr = expr.reduce(@parser) if expr.kind_of? CExpression + case expr + when ::Integer; (expr >= 4096) ? ('0x%X' % expr) : expr.to_s + when ::Numeric; expr.to_s + when Variable + case expr.type + when Array; expr.name + else c_idata_inner_cexpr(expr.initializer) + end + when CExpression + if not expr.lexpr + case expr.op + when :& + case expr.rexpr + when Variable; expr.rexpr.name + else raise 'unhandled addrof in initializer ' + expr.rexpr.inspect + end + #when :* + when :+; c_idata_inner_cexpr(expr.rexpr) + when :-; ' -' << c_idata_inner_cexpr(expr.rexpr) + when nil + e = c_idata_inner_cexpr(expr.rexpr) + if expr.rexpr.kind_of? CExpression + e = '(' << e << " & 0#{'ff'*sizeof(expr)}h)" + end + e + else raise 'unhandled initializer expr ' + expr.inspect + end + else + case expr.op + when :+, :-, :*, :/, :%, :<<, :>>, :&, :|, :^ + e = '(' << c_idata_inner_cexpr(expr.lexpr) << + expr.op.to_s << c_idata_inner_cexpr(expr.rexpr) << ')' + if expr.type.integral? + # db are unsigned + e = '(' << e << " & 0#{'ff'*sizeof(expr)}h)" + end + e + #when :'.' + #when :'->' + #when :'[]' + else raise 'unhandled initializer expr ' + expr.inspect + end + end + else raise 'unhandled initializer ' + expr.inspect + end + end + + def c_udata(data, align) + @source << "#{@label_oldname[data.name]}:" if @label_oldname[data.name] + @source << "#{data.name} " + @source.last << + case data.type + when BaseType + len = @parser.typesize[data.type.name] + case data.type.name + when :__int8; 'db ?' + when :__int16; 'dw ?' + when :__int32; 'dd ?' + when :__int64; 'dq ?' + else "db #{len} dup(?)" + end + else + len = sizeof(data) + "db #{len} dup(?)" + end + len %= align + len == 0 ? align : len + end + + # return non-nil if the variable name is unsuitable to appear as is in the asm listing + # eg filter out asm instruction names + def check_reserved_name(var) + return true if @exeformat.cpu and @exeformat.cpu.check_reserved_name(var.name) + %w[db dw dd dq].include?(var.name) + end + end + + class Statement + # all Statements/Declaration must define a precompile(compiler, scope) method + # it must append itself to scope.statements + + # turns a statement into a new block + def precompile_make_block(scope) + b = Block.new scope + b.statements << self + b + end + end + + class Block + # precompile all statements, then simplifies symbols/structs types + def precompile(compiler, scope=nil) + stmts = @statements.dup + @statements.clear + stmts.each { |st| + compiler.curexpr = st + st.precompile(compiler, self) + } + + # cleanup declarations + @symbol.delete_if { |n, s| not s.kind_of? Variable } + @struct.delete_if { |n, s| not s.kind_of? Union } + @symbol.each_value { |var| + CExpression.precompile_type(compiler, self, var, true) + } + @struct.each_value { |var| + next if not var.members + var.members.each { |m| + CExpression.precompile_type(compiler, self, m, true) + } + } + scope.statements << self if scope and not @statements.empty? + end + + # removes unused labels, and in-place goto (goto toto; toto:) + def precompile_optimize + list = [] + precompile_optimize_inner(list, 1) + precompile_optimize_inner(list, 2) + end + + # step 1: list used labels/unused goto + # step 2: remove unused labels + def precompile_optimize_inner(list, step) + lastgoto = nil + hadref = false + walk = lambda { |expr| + next if not expr.kind_of? CExpression + # gcc's unary && support + if not expr.op and not expr.lexpr and expr.rexpr.kind_of? Label + list << expr.rexpr.name + else + walk[expr.lexpr] + if expr.rexpr.kind_of? ::Array + expr.rexpr.each { |r| walk[r] } + else + walk[expr.rexpr] + end + end + } + @statements.dup.each { |s| + lastgoto = nil if not s.kind_of? Label + case s + when Block + s.precompile_optimize_inner(list, step) + @statements.delete s if step == 2 and s.statements.empty? + when CExpression; walk[s] if step == 1 + when Label + case step + when 1 + if lastgoto and lastgoto.target == s.name + list << lastgoto + list.delete s.name if not hadref + end + when 2; @statements.delete s if not list.include? s.name + end + when Goto, If + s.kind_of?(If) ? g = s.bthen : g = s + case step + when 1 + hadref = list.include? g.target + lastgoto = g + list << g.target + when 2 + if list.include? g + idx = @statements.index s + @statements.delete s + @statements[idx, 0] = s.test if s != g and not s.test.constant? + end + end + end + } + list + end + + # noop + def precompile_make_block(scope) self end + + def continue_label ; defined?(@continue_label) ? @continue_label : @outer.continue_label end + def continue_label=(l) @continue_label = l end + def break_label ; defined?(@break_label) ? @break_label : @outer.break_label end + def break_label=(l) @break_label = l end + def return_label ; defined?(@return_label) ? @return_label : @outer.return_label end + def return_label=(l) @return_label = l end + def nonauto_label=(l) @nonauto_label = l end + def nonauto_label ; defined?(@nonauto_label) ? @nonauto_label : @outer.nonauto_label end + def function ; defined?(@function) ? @function : @outer.function end + def function=(f) @function = f end + end + + class Declaration + def precompile(compiler, scope) + if (@var.type.kind_of? Function and @var.initializer and scope != compiler.toplevel) or @var.storage == :static or compiler.check_reserved_name(@var) + old = @var.name + ref = scope.symbol.delete old + if scope == compiler.toplevel or (@var.type.kind_of?(Function) and not @var.initializer) + if n = compiler.label_oldname.index(old) + # reuse same name as predeclarations + @var.name = n + else + newname = old + newname = compiler.new_label newname until newname != old + if not compiler.check_reserved_name(@var) + compiler.label_oldname[newname] = old + end + @var.name = newname + end + ref ||= scope.symbol[@var.name] || @var + # append only one actual declaration for all predecls (the one with init, or the last uninit) + scope.statements << self if ref.eql?(@var) + else + @var.name = compiler.new_label @var.name until @var.name != old + compiler.toplevel.statements << self + end + compiler.toplevel.symbol[@var.name] = ref + else + scope.symbol[@var.name] ||= @var + appendme = true if scope.symbol[@var.name].eql?(@var) + end + + if i = @var.initializer + if @var.type.kind_of? Function + if @var.type.type.kind_of? Union + s = @var.type.type + v = Variable.new + v.name = compiler.new_label('return_struct_ptr') + v.type = Pointer.new(s) + CExpression.precompile_type(compiler, scope, v) + @var.type.args.unshift v + @var.type.type = v.type + end + i.function = @var + i.return_label = compiler.new_label('epilog') + i.nonauto_label = {} + i.precompile(compiler) + Label.new(i.return_label).precompile(compiler, i) + i.precompile_optimize + # append now so that static dependencies are declared before us + # TODO no pure inline if addrof(func) needed + scope.statements << self if appendme and not @var.attributes.to_a.include? 'inline' + elsif scope != compiler.toplevel and @var.storage != :static + scope.statements << self if appendme + Declaration.precompile_dyn_initializer(compiler, scope, @var, @var.type, i) + @var.initializer = nil + else + scope.statements << self if appendme + @var.initializer = Declaration.precompile_static_initializer(compiler, @var.type, i) + end + else + scope.statements << self if appendme + end + end + + # turns an initializer to CExpressions in scope.statements + def self.precompile_dyn_initializer(compiler, scope, var, type, init) + case type = type.untypedef + when Array + # XXX TODO type.length may be dynamic !! + case init + when CExpression + # char toto[] = "42" + if not init.kind_of? CExpression or init.op or init.lexpr or not init.rexpr.kind_of? ::String + raise "unknown initializer #{init.inspect} for #{var.inspect}" + end + init = init.rexpr.unpack('C*') + [0] + init.map! { |chr| CExpression.new(nil, nil, chr, type.type) } + precompile_dyn_initializer(compiler, scope, var, type, init) + + when ::Array + type.length ||= init.length + # len is an Integer + init.each_with_index { |it, idx| + next if not it + break if idx >= type.length + idx = CExpression.new(nil, nil, idx, BaseType.new(:long, :unsigned)) + v = CExpression.new(var, :'[]', idx, type.type) + precompile_dyn_initializer(compiler, scope, v, type.type, it) + } + else raise "unknown initializer #{init.inspect} for #{var.inspect}" + end + when Union + case init + when CExpression, Variable + if init.type.untypedef.kind_of? BaseType + # works for struct foo bar[] = {0}; ... + type.members.each { |m| + v = CExpression.new(var, :'.', m.name, m.type) + precompile_dyn_initializer(compiler, scope, v, v.type, init) + } + elsif init.type.untypedef.kind_of? type.class + CExpression.new(var, :'=', init, type).precompile(compiler, scope) + else + raise "bad initializer #{init.inspect} for #{var.inspect}" + end + when ::Array + init.each_with_index{ |it, idx| + next if not it + m = type.members[idx] + v = CExpression.new(var, :'.', m.name, m.type) + precompile_dyn_initializer(compiler, scope, v, m.type, it) + } + else raise "unknown initializer #{init.inspect} for #{var.inspect}" + end + else + case init + when CExpression + CExpression.new(var, :'=', init, type).precompile(compiler, scope) + else raise "unknown initializer #{init.inspect} for #{var.inspect}" + end + end + end + + # returns a precompiled static initializer (eg string constants) + def self.precompile_static_initializer(compiler, type, init) + # TODO + case type = type.untypedef + when Array + if init.kind_of? ::Array + init.map { |i| precompile_static_initializer(compiler, type.type, i) } + else + init + end + when Union + if init.kind_of? ::Array + init.zip(type.members).map { |i, m| precompile_static_initializer(compiler, m.type, i) } + else + init + end + else + if init.kind_of? CExpression and init = init.reduce(compiler) and init.kind_of? CExpression + if not init.op and init.rexpr.kind_of? ::String + v = Variable.new + v.storage = :static + v.name = 'char_' + init.rexpr.gsub(/[^a-zA-Z]/, '')[0, 8] + v.type = Array.new(type.type) + v.type.length = init.rexpr.length + 1 + v.type.type.qualifier = [:const] + v.initializer = CExpression.new(nil, nil, init.rexpr, type) + Declaration.new(v).precompile(compiler, compiler.toplevel) + init.rexpr = v + end + init.rexpr = precompile_static_initializer(compiler, init.rexpr.type, init.rexpr) if init.rexpr.kind_of? CExpression + init.lexpr = precompile_static_initializer(compiler, init.lexpr.type, init.lexpr) if init.lexpr.kind_of? CExpression + end + init + end + end + end + + class If + def precompile(compiler, scope) + expr = lambda { |e| e.kind_of?(CExpression) ? e : CExpression.new(nil, nil, e, e.type) } + + if @bthen.kind_of? Goto or @bthen.kind_of? Break or @bthen.kind_of? Continue + # if () goto l; else b; => if () goto l; b; + if belse + t1 = @belse + @belse = nil + end + + # need to convert user-defined Goto target ! + @bthen.precompile(compiler, scope) + @bthen = scope.statements.pop # break => goto break_label + elsif belse + # if () a; else b; => if () goto then; b; goto end; then: a; end: + t1 = @belse + t2 = @bthen + l2 = compiler.new_label('if_then') + @bthen = Goto.new(l2) + @belse = nil + l3 = compiler.new_label('if_end') + else + # if () a; => if (!) goto end; a; end: + t1 = @bthen + l2 = compiler.new_label('if_end') + @bthen = Goto.new(l2) + @test = CExpression.negate(@test) + end + + @test = expr[@test] + case @test.op + when :'&&' + # if (c1 && c2) goto a; => if (!c1) goto b; if (c2) goto a; b: + l1 = compiler.new_label('if_nand') + If.new(CExpression.negate(@test.lexpr), Goto.new(l1)).precompile(compiler, scope) + @test = expr[@test.rexpr] + precompile(compiler, scope) + when :'||' + l1 = compiler.new_label('if_or') + If.new(expr[@test.lexpr], Goto.new(@bthen.target)).precompile(compiler, scope) + @test = expr[@test.rexpr] + precompile(compiler, scope) + else + @test = CExpression.precompile_inner(compiler, scope, @test) + t = @test.reduce(compiler) + if t.kind_of? ::Integer + if t == 0 + Label.new(l1, nil).precompile(compiler, scope) if l1 + t1.precompile(compiler, scope) if t1 + Label.new(l2, nil).precompile(compiler, scope) if l2 + Label.new(l3, nil).precompile(compiler, scope) if l3 + else + scope.statements << @bthen + Label.new(l1, nil).precompile(compiler, scope) if l1 + Label.new(l2, nil).precompile(compiler, scope) if l2 + t2.precompile(compiler, scope) if t2 + Label.new(l3, nil).precompile(compiler, scope) if l3 + end + return + end + scope.statements << self + end + + Label.new(l1, nil).precompile(compiler, scope) if l1 + t1.precompile(compiler, scope) if t1 + Goto.new(l3).precompile(compiler, scope) if l3 + Label.new(l2, nil).precompile(compiler, scope) if l2 + t2.precompile(compiler, scope) if t2 + Label.new(l3, nil).precompile(compiler, scope) if l3 + end + end + + class For + def precompile(compiler, scope) + if init + @init.precompile(compiler, scope) + scope = @init if @init.kind_of? Block + end + + @body = @body.precompile_make_block scope + @body.continue_label = compiler.new_label 'for_continue' + @body.break_label = compiler.new_label 'for_break' + label_test = compiler.new_label 'for_test' + + Label.new(label_test).precompile(compiler, scope) + if test + If.new(CExpression.negate(@test), Goto.new(@body.break_label)).precompile(compiler, scope) + end + + @body.precompile(compiler, scope) + + Label.new(@body.continue_label).precompile(compiler, scope) + if iter + @iter.precompile(compiler, scope) + end + + Goto.new(label_test).precompile(compiler, scope) + Label.new(@body.break_label).precompile(compiler, scope) + end + end + + class While + def precompile(compiler, scope) + @body = @body.precompile_make_block scope + @body.continue_label = compiler.new_label('while_continue') + @body.break_label = compiler.new_label('while_break') + + Label.new(@body.continue_label).precompile(compiler, scope) + + If.new(CExpression.negate(@test), Goto.new(@body.break_label)).precompile(compiler, scope) + + @body.precompile(compiler, scope) + + Goto.new(@body.continue_label).precompile(compiler, scope) + Label.new(@body.break_label).precompile(compiler, scope) + end + end + + class DoWhile + def precompile(compiler, scope) + @body = @body.precompile_make_block scope + @body.continue_label = compiler.new_label('dowhile_continue') + @body.break_label = compiler.new_label('dowhile_break') + loop_start = compiler.new_label('dowhile_start') + + Label.new(loop_start).precompile(compiler, scope) + + @body.precompile(compiler, scope) + + Label.new(@body.continue_label).precompile(compiler, scope) + + If.new(@test, Goto.new(loop_start)).precompile(compiler, scope) + + Label.new(@body.break_label).precompile(compiler, scope) + end + end + + class Switch + def precompile(compiler, scope) + var = Variable.new + var.storage = :register + var.name = compiler.new_label('switch') + var.type = @test.type + var.initializer = @test + CExpression.precompile_type(compiler, scope, var) + Declaration.new(var).precompile(compiler, scope) + + @body = @body.precompile_make_block scope + @body.break_label = compiler.new_label('switch_break') + @body.precompile(compiler) + default = @body.break_label + # recursive lambda to change Case to Labels + # dynamically creates the If sequence + walk = lambda { |blk| + blk.statements.each_with_index { |s, i| + case s + when Case + label = compiler.new_label('case') + if s.expr == 'default' + default = label + elsif s.exprup + If.new(CExpression.new(CExpression.new(var, :'>=', s.expr, BaseType.new(:int)), :'&&', + CExpression.new(var, :'<=', s.exprup, BaseType.new(:int)), + BaseType.new(:int)), Goto.new(label)).precompile(compiler, scope) + else + If.new(CExpression.new(var, :'==', s.expr, BaseType.new(:int)), + Goto.new(label)).precompile(compiler, scope) + end + blk.statements[i] = Label.new(label) + when Block + walk[s] + end + } + } + walk[@body] + Goto.new(default).precompile(compiler, scope) + scope.statements << @body + Label.new(@body.break_label).precompile(compiler, scope) + end + end + + class Continue + def precompile(compiler, scope) + Goto.new(scope.continue_label).precompile(compiler, scope) + end + end + + class Break + def precompile(compiler, scope) + Goto.new(scope.break_label).precompile(compiler, scope) + end + end + + class Return + def precompile(compiler, scope) + if @value + @value = CExpression.new(nil, nil, @value, @value.type) if not @value.kind_of? CExpression + if @value.type.untypedef.kind_of? Union + @value = @value.precompile_inner(compiler, scope) + func = scope.function.type + CExpression.new(CExpression.new(nil, :*, func.args.first, @value.type), :'=', @value, @value.type).precompile(compiler, scope) + @value = func.args.first + else + # cast to function return type + @value = CExpression.new(nil, nil, @value, scope.function.type.type).precompile_inner(compiler, scope) + end + scope.statements << self + end + Goto.new(scope.return_label).precompile(compiler, scope) + end + end + + class Label + def precompile(compiler, scope) + if name and (not compiler.auto_label_list[@name]) + @name = scope.nonauto_label[@name] ||= compiler.new_label(@name) + end + scope.statements << self + if statement + @statement.precompile(compiler, scope) + @statement = nil + end + end + end + + class Case + def precompile(compiler, scope) + @expr = CExpression.precompile_inner(compiler, scope, @expr) + @exprup = CExpression.precompile_inner(compiler, scope, @exprup) if exprup + super(compiler, scope) + end + end + + class Goto + def precompile(compiler, scope) + if not compiler.auto_label_list[@target] + @target = scope.nonauto_label[@target] ||= compiler.new_label(@target) + end + scope.statements << self + end + end + + class Asm + def precompile(compiler, scope) + scope.statements << self + # TODO CExpr.precompile_type(clobbers) + end + end + + class CExpression + def precompile(compiler, scope) + i = precompile_inner(compiler, scope, false) + scope.statements << i if i + end + + # changes obj.type to a precompiled type + # keeps struct/union, change everything else to __int\d + # except Arrays if declaration is true (need to know variable allocation sizes etc) + # returns the type + def self.precompile_type(compiler, scope, obj, declaration = false) + case t = obj.type.untypedef + when BaseType + case t.name + when :void + when :float, :double, :longdouble + else t = BaseType.new("__int#{compiler.typesize[t.name]*8}".to_sym, t.specifier) + end + when Array + if declaration; precompile_type(compiler, scope, t, declaration) + else t = BaseType.new("__int#{compiler.typesize[:ptr]*8}".to_sym, :unsigned) + end + when Pointer + if t.type.untypedef.kind_of? Function + precompile_type(compiler, scope, t, declaration) + else + t = BaseType.new("__int#{compiler.typesize[:ptr]*8}".to_sym, :unsigned) + end + when Enum; t = BaseType.new("__int#{compiler.typesize[:int]*8}".to_sym) + when Function + precompile_type(compiler, scope, t) + t.args ||= [] + t.args.each { |a| precompile_type(compiler, scope, a) } + when Union + if declaration and t.members and not t.name # anonymous struct + t.members.each { |a| precompile_type(compiler, scope, a, true) } + end + else raise 'bad type ' + t.inspect + end + (t.qualifier ||= []).concat obj.type.qualifier if obj.type.qualifier and t != obj.type + (t.attributes ||= []).concat obj.type.attributes if obj.type.attributes and t != obj.type + while obj.type.kind_of? TypeDef + obj.type = obj.type.type + (t.qualifier ||= []).concat obj.type.qualifier if obj.type.qualifier and t != obj.type + (t.attributes ||= []).concat obj.type.attributes if obj.type.attributes and t != obj.type + end + obj.type = t + end + + def self.precompile_inner(compiler, scope, expr, nested = true) + case expr + when CExpression; expr.precompile_inner(compiler, scope, nested) + else expr + end + end + + # returns a new CExpression with simplified self.type, computes structure offsets + # turns char[]/float immediates to reference to anonymised const + # TODO 'a = b += c' => 'b += c; a = b' (use nested argument) + # TODO handle precompile_inner return nil + # TODO struct.bits + def precompile_inner(compiler, scope, nested = true) + case @op + when :'.' + # a.b => (&a)->b + lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + ll = lexpr + ll = lexpr.rexpr while ll.kind_of? CExpression and not ll.op + if ll.kind_of? CExpression and ll.op == :'*' and not ll.lexpr + # do not change lexpr.rexpr.type directly to a pointer, might retrigger (ptr+imm) => (ptr + imm*sizeof(*ptr)) + @lexpr = CExpression.new(nil, nil, ll.rexpr, Pointer.new(lexpr.type)) + else + @lexpr = CExpression.new(nil, :'&', lexpr, Pointer.new(lexpr.type)) + end + @op = :'->' + precompile_inner(compiler, scope) + when :'->' + # a->b => *(a + off(b)) + struct = @lexpr.type.untypedef.type.untypedef + lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + @lexpr = nil + @op = nil + if struct.kind_of? Union and (off = struct.offsetof(compiler, @rexpr)) != 0 + off = CExpression.new(nil, nil, off, BaseType.new(:int, :unsigned)) + @rexpr = CExpression.new(lexpr, :'+', off, lexpr.type) + # ensure the (ptr + value) is not expanded to (ptr + value * sizeof(*ptr)) + CExpression.precompile_type(compiler, scope, @rexpr) + else + # union or 1st struct member + @rexpr = lexpr + end + if @type.kind_of? Array # Array member type is already an address + else + @rexpr = CExpression.new(nil, :*, @rexpr, @rexpr.type) + end + precompile_inner(compiler, scope) + when :'[]' + rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + if rexpr.kind_of? CExpression and not rexpr.op and rexpr.rexpr == 0 + @rexpr = @lexpr + else + @rexpr = CExpression.new(@lexpr, :'+', rexpr, @lexpr.type) + end + @op = :'*' + @lexpr = nil + precompile_inner(compiler, scope) + when :'?:' + # cannot precompile in place, a conditionnal expression may have a coma: must turn into If + if @lexpr.kind_of? CExpression + @lexpr = @lexpr.precompile_inner(compiler, scope) + if not @lexpr.lexpr and not @lexpr.op and @lexpr.rexpr.kind_of? ::Numeric + if @lexpr.rexpr == 0 + e = @rexpr[1] + else + e = @rexpr[0] + end + e = CExpression.new(nil, nil, e, e.type) if not e.kind_of? CExpression + return e.precompile_inner(compiler, scope) + end + end + raise 'conditional in toplevel' if scope == compiler.toplevel # just in case + var = Variable.new + var.storage = :register + var.name = compiler.new_label('ternary') + var.type = @rexpr[0].type + CExpression.precompile_type(compiler, scope, var) + Declaration.new(var).precompile(compiler, scope) + If.new(@lexpr, CExpression.new(var, :'=', @rexpr[0], var.type), CExpression.new(var, :'=', @rexpr[1], var.type)).precompile(compiler, scope) + @lexpr = nil + @op = nil + @rexpr = var + precompile_inner(compiler, scope) + when :'&&' + if scope == compiler.toplevel + @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + CExpression.precompile_type(compiler, scope, self) + self + else + var = Variable.new + var.storage = :register + var.name = compiler.new_label('and') + var.type = @type + CExpression.precompile_type(compiler, scope, var) + var.initializer = CExpression.new(nil, nil, 0, var.type) + Declaration.new(var).precompile(compiler, scope) + l = @lexpr.kind_of?(CExpression) ? @lexpr : CExpression.new(nil, nil, @lexpr, @lexpr.type) + r = @rexpr.kind_of?(CExpression) ? @rexpr : CExpression.new(nil, nil, @rexpr, @rexpr.type) + If.new(l, If.new(r, CExpression.new(var, :'=', CExpression.new(nil, nil, 1, var.type), var.type))).precompile(compiler, scope) + @lexpr = nil + @op = nil + @rexpr = var + precompile_inner(compiler, scope) + end + when :'||' + if scope == compiler.toplevel + @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + CExpression.precompile_type(compiler, scope, self) + self + else + var = Variable.new + var.storage = :register + var.name = compiler.new_label('or') + var.type = @type + CExpression.precompile_type(compiler, scope, var) + var.initializer = CExpression.new(nil, nil, 1, var.type) + Declaration.new(var).precompile(compiler, scope) + l = @lexpr.kind_of?(CExpression) ? @lexpr : CExpression.new(nil, nil, @lexpr, @lexpr.type) + l = CExpression.new(nil, :'!', l, var.type) + r = @rexpr.kind_of?(CExpression) ? @rexpr : CExpression.new(nil, nil, @rexpr, @rexpr.type) + r = CExpression.new(nil, :'!', r, var.type) + If.new(l, If.new(r, CExpression.new(var, :'=', CExpression.new(nil, nil, 0, var.type), var.type))).precompile(compiler, scope) + @lexpr = nil + @op = nil + @rexpr = var + precompile_inner(compiler, scope) + end + when :funcall + if @lexpr.kind_of? Variable and @lexpr.type.kind_of? Function and @lexpr.attributes and @lexpr.attributes.include? 'inline' and @lexpr.initializer + # TODO check recursive call (direct or indirect) + raise 'inline varargs unsupported' if @lexpr.type.varargs + rtype = @lexpr.type.type.untypedef + if not rtype.kind_of? BaseType or rtype.name != :void + rval = Variable.new + rval.name = compiler.new_label('inline_return') + rval.type = @lexpr.type.type + Declaration.new(rval).precompile(compiler, scope) + end + inline_label = {} + locals = @lexpr.type.args.zip(@rexpr).inject({}) { |h, (fa, a)| + h.update fa => CExpression.new(nil, nil, a, fa.type).precompile_inner(compiler, scope) + } + copy_inline_ce = lambda { |ce| + case ce + when CExpression; CExpression.new(copy_inline_ce[ce.lexpr], ce.op, copy_inline_ce[ce.rexpr], ce.type) + when Variable; locals[ce] || ce + when ::Array; ce.map { |e_| copy_inline_ce[e_] } + else ce + end + } + copy_inline = lambda { |stmt, scp| + case stmt + when Block + b = Block.new(scp) + stmt.statements.each { |s| + s = copy_inline[s, b] + b.statements << s if s + } + b + when If; If.new(copy_inline_ce[stmt.test], copy_inline[stmt.bthen, scp]) # re-precompile ? + when Label; Label.new(inline_label[stmt.name] ||= compiler.new_label('inline_'+stmt.name)) + when Goto; Goto.new(inline_label[stmt.target] ||= compiler.new_label('inline_'+stmt.target)) + when Return; CExpression.new(rval, :'=', copy_inline_ce[stmt.value], rval.type).precompile_inner(compiler, scp) if stmt.value + when CExpression; copy_inline_ce[stmt] + when Declaration + nv = stmt.var.dup + if nv.type.kind_of? Array and nv.type.length.kind_of? CExpression + nv.type = Array.new(nv.type.type, copy_inline_ce[nv.type.length]) # XXX nested dynamic? + end + locals[stmt.var] = nv + scp.symbol[nv.name] = nv + Declaration.new(nv) + else raise 'unexpected inline statement ' + stmt.inspect + end + } + scope.statements << copy_inline[@lexpr.initializer, scope] # body already precompiled + CExpression.new(nil, nil, rval, rval.type).precompile_inner(compiler, scope) + elsif @type.kind_of? Union + var = Variable.new + var.name = compiler.new_label('return_struct') + var.type = @type + Declaration.new(var).precompile(compiler, scope) + @rexpr.unshift CExpression.new(nil, :&, var, Pointer.new(var.type)) + + var2 = Variable.new + var2.name = compiler.new_label('return_struct_ptr') + var2.type = Pointer.new(@type) + var2.storage = :register + CExpression.precompile_type(compiler, scope, var2) + Declaration.new(var2).precompile(compiler, scope) + @type = var2.type + CExpression.new(var2, :'=', self, var2.type).precompile(compiler, scope) + + CExpression.new(nil, :'*', var2, var.type).precompile_inner(compiler, scope) + else + t = @lexpr.type.untypedef + t = t.type.untypedef if t.pointer? + @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + types = t.args.map { |a| a.type } + # cast args to func prototype + @rexpr.map! { |e_| (types.empty? ? e_ : CExpression.new(nil, nil, e_, types.shift)).precompile_inner(compiler, scope) } + CExpression.precompile_type(compiler, scope, self) + self + end + when :',' + lexpr = @lexpr.kind_of?(CExpression) ? @lexpr : CExpression.new(nil, nil, @lexpr, @lexpr.type) + rexpr = @rexpr.kind_of?(CExpression) ? @rexpr : CExpression.new(nil, nil, @rexpr, @rexpr.type) + lexpr.precompile(compiler, scope) + rexpr.precompile_inner(compiler, scope) + when :'!' + CExpression.precompile_type(compiler, scope, self) + if @rexpr.kind_of?(CExpression) + case @rexpr.op + when :'<', :'>', :'<=', :'>=', :'==', :'!=' + @op = { :'<' => :'>=', :'>' => :'<=', :'<=' => :'>', :'>=' => :'<', + :'==' => :'!=', :'!=' => :'==' }[@rexpr.op] + @lexpr = @rexpr.lexpr + @rexpr = @rexpr.rexpr + precompile_inner(compiler, scope) + when :'&&', :'||' + @op = { :'&&' => :'||', :'||' => :'&&' }[@rexpr.op] + @lexpr = CExpression.new(nil, :'!', @rexpr.lexpr, @type) + @rexpr = CExpression.new(nil, :'!', @rexpr.rexpr, @type) + precompile_inner(compiler, scope) + when :'!' + if @rexpr.rexpr.kind_of? CExpression + @op = nil + @rexpr = @rexpr.rexpr + else + @op = :'!=' + @lexpr = @rexpr.rexpr + @rexpr = CExpression.new(nil, nil, 0, @lexpr.type) + end + precompile_inner(compiler, scope) + else + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + self + end + else + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + self + end + when :'++', :'--' + if not @rexpr + var = Variable.new + var.storage = :register + var.name = compiler.new_label('postincrement') + var.type = @type + Declaration.new(var).precompile(compiler, scope) + CExpression.new(var, :'=', @lexpr, @type).precompile(compiler, scope) + CExpression.new(nil, @op, @lexpr, @type).precompile(compiler, scope) + @lexpr = nil + @op = nil + @rexpr = var + precompile_inner(compiler, scope) + elsif @type.pointer? and compiler.sizeof(nil, @type.untypedef.type.untypedef) != 1 + # ++ptr => ptr += sizeof(*ptr) (done in += precompiler) + @op = { :'++' => :'+=', :'--' => :'-=' }[@op] + @lexpr = @rexpr + @rexpr = CExpression.new(nil, nil, 1, BaseType.new(:ptr, :unsigned)) + precompile_inner(compiler, scope) + else + CExpression.precompile_type(compiler, scope, self) + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + self + end + when :'=' + # handle structure assignment/array assignment + case @lexpr.type.untypedef + when Union + # rexpr may be a :funcall + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + @lexpr.type.untypedef.members.zip(@rexpr.type.untypedef.members) { |m1, m2| + # assume m1 and m2 are compatible + v1 = CExpression.new(@lexpr, :'.', m1.name, m1.type) + v2 = CExpression.new(@rexpr, :'.', m2.name, m1.type) + CExpression.new(v1, :'=', v2, v1.type).precompile(compiler, scope) + } + # (foo = bar).toto + @op = nil + @rexpr = @lexpr + @lexpr = nil + @type = @rexpr.type + precompile_inner(compiler, scope) if nested + when Array + if not len = @lexpr.type.untypedef.length + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + # char toto[] = "bla" + if @rexpr.kind_of? CExpression and not @rexpr.lexpr and not @rexpr.op and + @rexpr.rexpr.kind_of? Variable and @rexpr.rexpr.type.kind_of? Array + len = @rexpr.rexpr.type.length + end + end + raise 'array initializer with no length !' if not len + # TODO optimize... + len.times { |i| + i = CExpression.new(nil, nil, i, BaseType.new(:long, :unsigned)) + v1 = CExpression.new(@lexpr, :'[]', i, @lexpr.type.untypedef.type) + v2 = CExpression.new(@rexpr, :'[]', i, v1.type) + CExpression.new(v1, :'=', v2, v1.type).precompile(compiler, scope) + } + @op = nil + @rexpr = @lexpr + @lexpr = nil + @type = @rexpr.type + precompile_inner(compiler, scope) if nested + else + @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + CExpression.precompile_type(compiler, scope, self) + self + end + when nil + case @rexpr + when Block + # compound statements + raise 'compound statement in toplevel' if scope == compiler.toplevel # just in case + var = Variable.new + var.storage = :register + var.name = compiler.new_label('compoundstatement') + var.type = @type + CExpression.precompile_type(compiler, scope, var) + Declaration.new(var).precompile(compiler, scope) + if @rexpr.statements.last.kind_of? CExpression + @rexpr.statements[-1] = CExpression.new(var, :'=', @rexpr.statements[-1], var.type) + @rexpr.precompile(compiler, scope) + end + @rexpr = var + precompile_inner(compiler, scope) + when ::String + # char[] immediate + v = Variable.new + v.storage = :static + v.name = 'char_' + @rexpr.tr('^a-zA-Z', '')[0, 8] + v.type = Array.new(@type.type) + v.type.length = @rexpr.length + 1 + v.type.type.qualifier = [:const] + v.initializer = CExpression.new(nil, nil, @rexpr, @type) + Declaration.new(v).precompile(compiler, scope) + @rexpr = v + precompile_inner(compiler, scope) + when ::Float + # float immediate + v = Variable.new + v.storage = :static + v.name = @type.untypedef.name.to_s + v.type = @type + v.type.qualifier = [:const] + v.initializer = CExpression.new(nil, nil, @rexpr, @type) + Declaration.new(v).precompile(compiler, scope) + @rexpr = CExpression.new(nil, :'*', v, v.type) + precompile_inner(compiler, scope) + when CExpression + # simplify casts + CExpression.precompile_type(compiler, scope, self) + # propagate type first so that __uint64 foo() { return -1 } => 0xffffffffffffffff + @rexpr.type = @type if @rexpr.kind_of? CExpression and @rexpr.op == :- and not @rexpr.lexpr and @type.kind_of? BaseType and @type.name == :__int64 # XXX kill me + @rexpr = @rexpr.precompile_inner(compiler, scope) + if @type.kind_of? BaseType and @rexpr.type.kind_of? BaseType + if @rexpr.type == @type + # noop cast + @lexpr, @op, @rexpr = @rexpr.lexpr, @rexpr.op, @rexpr.rexpr + elsif not @rexpr.op and @type.integral? and @rexpr.type.integral? + if @rexpr.rexpr.kind_of? ::Numeric and (val = reduce(compiler)).kind_of? ::Numeric + @rexpr = val + elsif compiler.typesize[@type.name] < compiler.typesize[@rexpr.type.name] + # (char)(short)(int)(long)foo => (char)foo + @rexpr = @rexpr.rexpr + end + end + end + self + else + CExpression.precompile_type(compiler, scope, self) + self + end + else + # int+ptr => ptr+int + if @op == :+ and @lexpr and @lexpr.type.integral? and @rexpr.type.pointer? + @rexpr, @lexpr = @lexpr, @rexpr + end + + # handle pointer + 2 == ((char *)pointer) + 2*sizeof(*pointer) + if @rexpr and [:'+', :'+=', :'-', :'-='].include? @op and + @type.pointer? and @rexpr.type.integral? + sz = compiler.sizeof(nil, @type.untypedef.type.untypedef) + if sz != 1 + sz = CExpression.new(nil, nil, sz, @rexpr.type) + @rexpr = CExpression.new(@rexpr, :'*', sz, @rexpr.type) + end + end + + # type promotion => cast + case @op + when :+, :-, :*, :/, :&, :|, :^, :% + if @lexpr + if @lexpr.type != @type + @lexpr = CExpression.new(nil, nil, @lexpr, @lexpr.type) if not @lexpr.kind_of? CExpression + @lexpr = CExpression.new(nil, nil, @lexpr, @type) + end + if @rexpr.type != @type + @rexpr = CExpression.new(nil, nil, @rexpr, @rexpr.type) if not @rexpr.kind_of? CExpression + @rexpr = CExpression.new(nil, nil, @rexpr, @type) + end + end + when :>>, :<< + # char => int + if @lexpr.type != @type + @lexpr = CExpression.new(nil, nil, @lexpr, @lexpr.type) if not @lexpr.kind_of? CExpression + @lexpr = CExpression.new(nil, nil, @lexpr, @type) + end + when :'+=', :'-=', :'*=', :'/=', :'&=', :'|=', :'^=', :'%=' + if @rexpr.type != @lexpr.type + @rexpr = CExpression.new(nil, nil, @rexpr, @rexpr.type) if not @rexpr.kind_of? CExpression + @rexpr = CExpression.new(nil, nil, @rexpr, @type) + end + end + + @lexpr = CExpression.precompile_inner(compiler, scope, @lexpr) + @rexpr = CExpression.precompile_inner(compiler, scope, @rexpr) + + if @op == :'&' and not @lexpr + rr = @rexpr + rr = rr.rexpr while rr.kind_of? CExpression and not rr.op + if rr.kind_of? CExpression and rr.op == :'*' and not rr.lexpr + @lexpr = nil + @op = nil + @rexpr = rr.rexpr + return precompile_inner(compiler, scope) + elsif rr != @rexpr + @rexpr = rr + return precompile_inner(compiler, scope) + end + end + + CExpression.precompile_type(compiler, scope, self) + + isnumeric = lambda { |e_| e_.kind_of?(::Numeric) or (e_.kind_of? CExpression and + not e_.lexpr and not e_.op and e_.rexpr.kind_of? ::Numeric) } + + # calc numeric + # XXX do not simplify operations involving variables (for type overflow etc) + if isnumeric[@rexpr] and (not @lexpr or isnumeric[@lexpr]) and (val = reduce(compiler)).kind_of? ::Numeric + @lexpr = nil + @op = nil + @rexpr = val + end + + self + end + end + end end end diff --git a/lib/metasm/metasm/cpu/arc/decode.rb b/lib/metasm/metasm/cpu/arc/decode.rb index 9b93a3528b..6aa455a552 100644 --- a/lib/metasm/metasm/cpu/arc/decode.rb +++ b/lib/metasm/metasm/cpu/arc/decode.rb @@ -8,418 +8,418 @@ require 'metasm/decode' module Metasm class ARC - def major_opcode(val, sz = 16) - return val >> (sz == 16 ? 0xB : 0x1B) - end + def major_opcode(val, sz = 16) + return val >> (sz == 16 ? 0xB : 0x1B) + end - def sub_opcode(val) - return ((val >> 16) & 0x3f) - end + def sub_opcode(val) + return ((val >> 16) & 0x3f) + end - def build_opcode_bin_mask(op, sz) - op.bin_mask = 0 - op.args.each { |f| op.bin_mask |= @fields_mask[f] << @fields_shift[f]} - op.bin_mask = ((1 << sz)-1) ^ op.bin_mask - end + def build_opcode_bin_mask(op, sz) + op.bin_mask = 0 + op.args.each { |f| op.bin_mask |= @fields_mask[f] << @fields_shift[f]} + op.bin_mask = ((1 << sz)-1) ^ op.bin_mask + end - def build_bin_lookaside - bin_lookaside = {} - opcode_list.each{|mode,oplist| - lookaside = {} - # 2nd level to speed up lookaside for major 5 - lookaside[5] = {} - oplist.each { |op| - next if not op.bin.kind_of? Integer - build_opcode_bin_mask(op, mode) - mj = major_opcode(op.bin, mode) - if mode == 32 and mj == 5 - (lookaside[mj][sub_opcode(op.bin)] ||= []) << op - else - (lookaside[mj] ||= []) << op - end - } - bin_lookaside[mode] = lookaside - } - bin_lookaside - end + def build_bin_lookaside + bin_lookaside = {} + opcode_list.each{|mode,oplist| + lookaside = {} + # 2nd level to speed up lookaside for major 5 + lookaside[5] = {} + oplist.each { |op| + next if not op.bin.kind_of? Integer + build_opcode_bin_mask(op, mode) + mj = major_opcode(op.bin, mode) + if mode == 32 and mj == 5 + (lookaside[mj][sub_opcode(op.bin)] ||= []) << op + else + (lookaside[mj] ||= []) << op + end + } + bin_lookaside[mode] = lookaside + } + bin_lookaside + end - def instruction_size(edata) - val = major_opcode(edata.decode_imm(:u16, @endianness)) - edata.ptr -= 2 - (val >= 0xC) ? 16 : 32 - end + def instruction_size(edata) + val = major_opcode(edata.decode_imm(:u16, @endianness)) + edata.ptr -= 2 + (val >= 0xC) ? 16 : 32 + end - def memref_size(di) - case di.opcode.name - when 'ldb_s', 'stb_s', 'extb_s', 'sexb_s'; 1 - when 'ldw_s', 'stw_s', 'extw_s', 'sexw_s'; 2 - else 4 - end - end + def memref_size(di) + case di.opcode.name + when 'ldb_s', 'stb_s', 'extb_s', 'sexb_s'; 1 + when 'ldw_s', 'stw_s', 'extw_s', 'sexw_s'; 2 + else 4 + end + end - def decode_bin(edata, sz) - case sz - when 16; edata.decode_imm(:u16, @endianness) - when 32 - # wordswap - val = edata.decode_imm(:u32, :little) - ((val >> 16) & 0xffff) | ((val & 0xffff) << 16) - end - end + def decode_bin(edata, sz) + case sz + when 16; edata.decode_imm(:u16, @endianness) + when 32 + # wordswap + val = edata.decode_imm(:u32, :little) + ((val >> 16) & 0xffff) | ((val & 0xffff) << 16) + end + end - def decode_findopcode(edata) - di = DecodedInstruction.new(self) + def decode_findopcode(edata) + di = DecodedInstruction.new(self) - @instrlength = instruction_size(edata) - val = decode_bin(edata, @instrlength) - edata.ptr -= @instrlength/8 + @instrlength = instruction_size(edata) + val = decode_bin(edata, @instrlength) + edata.ptr -= @instrlength/8 - maj = major_opcode(val, @instrlength) - lookaside = @bin_lookaside[@instrlength][maj] - lookaside = lookaside[sub_opcode(val)] if @instrlength == 32 and maj == 5 + maj = major_opcode(val, @instrlength) + lookaside = @bin_lookaside[@instrlength][maj] + lookaside = lookaside[sub_opcode(val)] if @instrlength == 32 and maj == 5 - op = lookaside.select { |opcode| - if $ARC_DEBUG and (val & opcode.bin_mask) == opcode.bin - puts "#{opcode.bin_mask.to_s(16)} - #{opcode.bin.to_s(16)} - #{(val & opcode.bin_mask).to_s(16)} - #{opcode.name} - #{opcode.args}" - end - (val & opcode.bin_mask) == opcode.bin - } + op = lookaside.select { |opcode| + if $ARC_DEBUG and (val & opcode.bin_mask) == opcode.bin + puts "#{opcode.bin_mask.to_s(16)} - #{opcode.bin.to_s(16)} - #{(val & opcode.bin_mask).to_s(16)} - #{opcode.name} - #{opcode.args}" + end + (val & opcode.bin_mask) == opcode.bin + } - if op.size == 2 and op.first.name == 'mov' and op.last.name == 'nop' - op = op.last - elsif op == nil or op.size != 1 - puts "[> I sense a disturbance in the force <]" - op.to_a.each { |opcode| puts "#{opcode.name} - #{opcode.args} - #{Expression[opcode.bin]} - #{Expression[opcode.bin_mask]}" } - puts "current value: #{Expression[val]}" - puts "current value: 0b#{val.to_s(2)}" - op = nil - else - op = op.first - end + if op.size == 2 and op.first.name == 'mov' and op.last.name == 'nop' + op = op.last + elsif op == nil or op.size != 1 + puts "[> I sense a disturbance in the force <]" + op.to_a.each { |opcode| puts "#{opcode.name} - #{opcode.args} - #{Expression[opcode.bin]} - #{Expression[opcode.bin_mask]}" } + puts "current value: #{Expression[val]}" + puts "current value: 0b#{val.to_s(2)}" + op = nil + else + op = op.first + end - di if di.opcode = op - end + di if di.opcode = op + end - Reduced_reg = [0, 1, 2, 3, 12, 13, 14, 15] - def reduced_reg_set(i) - Reduced_reg[i] - end + Reduced_reg = [0, 1, 2, 3, 12, 13, 14, 15] + def reduced_reg_set(i) + Reduced_reg[i] + end - def decode_instr_op(edata, di) - before_ptr = edata.ptr - op = di.opcode - di.instruction.opname = op.name - val = decode_bin(edata, @instrlength) + def decode_instr_op(edata, di) + before_ptr = edata.ptr + op = di.opcode + di.instruction.opname = op.name + val = decode_bin(edata, @instrlength) - field_val = lambda { |f| - r = (val >> @fields_shift[f]) & @fields_mask[f] - case f + field_val = lambda { |f| + r = (val >> @fields_shift[f]) & @fields_mask[f] + case f - # 16-bits instruction operands ------------------------------------------" - when :ca, :cb, :cb2, :cb3, :cc; r = reduced_reg_set(r) - when :ch - r = (((r & 7) << 3) | (r >> 5)) - when :@cbu7, :@cbu6, :@cbu5 - r = r & 0b11111 - r = (f == :@cbu7) ? r << 2 : ( (f == :@cbu6) ? r << 1 : r) - when :cu5ee; r = r << 2 - when :cdisps13 - r = (Expression.make_signed(r,11) << 2) + ((di.address >> 2) << 2) - when :cdisps10 - r = (Expression.make_signed(r, 9) << 1) + ((di.address >> 2) << 2) - when :cdisps8 - r = (Expression.make_signed(r, 7) << 1) + ((di.address >> 2) << 2) - when :cdisps7 - r = (Expression.make_signed(r, 6) << 1) + ((di.address >> 2) << 2) - when :cs9, :cs10, :cs11 - r = Expression.make_signed(r, ((f== :cs11 ? 11 : (f == :cs10 ? 10 : 9) ))) - r = (f == :cs11) ? r << 2 : ((f == :cs10) ? r << 1 : r) - when :@cspu7; - r = r << 2 + # 16-bits instruction operands ------------------------------------------" + when :ca, :cb, :cb2, :cb3, :cc; r = reduced_reg_set(r) + when :ch + r = (((r & 7) << 3) | (r >> 5)) + when :@cbu7, :@cbu6, :@cbu5 + r = r & 0b11111 + r = (f == :@cbu7) ? r << 2 : ( (f == :@cbu6) ? r << 1 : r) + when :cu5ee; r = r << 2 + when :cdisps13 + r = (Expression.make_signed(r,11) << 2) + ((di.address >> 2) << 2) + when :cdisps10 + r = (Expression.make_signed(r, 9) << 1) + ((di.address >> 2) << 2) + when :cdisps8 + r = (Expression.make_signed(r, 7) << 1) + ((di.address >> 2) << 2) + when :cdisps7 + r = (Expression.make_signed(r, 6) << 1) + ((di.address >> 2) << 2) + when :cs9, :cs10, :cs11 + r = Expression.make_signed(r, ((f== :cs11 ? 11 : (f == :cs10 ? 10 : 9) ))) + r = (f == :cs11) ? r << 2 : ((f == :cs10) ? r << 1 : r) + when :@cspu7; + r = r << 2 - # 32-bits instruction operands ------------------------------------------" - when :b - r = (r >> 12) | ((r & 0x7) << 3) - when :s8e - r = ((r & 0x1) << 7) | (r >> 2) - r = (Expression.make_signed(r, 8) << 1) + ((di.address >> 2) << 2) + # 32-bits instruction operands ------------------------------------------" + when :b + r = (r >> 12) | ((r & 0x7) << 3) + when :s8e + r = ((r & 0x1) << 7) | (r >> 2) + r = (Expression.make_signed(r, 8) << 1) + ((di.address >> 2) << 2) - when :u6e - r = (r << 1) + ((di.address >> 2) << 2) - when :s9 - r = (Expression.make_signed(r, 7) << 1) + ((di.address >> 2) << 2) + when :u6e + r = (r << 1) + ((di.address >> 2) << 2) + when :s9 + r = (Expression.make_signed(r, 7) << 1) + ((di.address >> 2) << 2) - when :s12 - r = (r >> 6) | ((r & 0x3f) << 6) - r = Expression.make_signed(r, 12) + when :s12 + r = (r >> 6) | ((r & 0x3f) << 6) + r = Expression.make_signed(r, 12) - when :s12e - r = (r >> 6) | ((r & 0x3f) << 6) - r = (Expression.make_signed(r, 12) <<1 ) + ((di.address >> 2) << 2) + when :s12e + r = (r >> 6) | ((r & 0x3f) << 6) + r = (Expression.make_signed(r, 12) <<1 ) + ((di.address >> 2) << 2) - when :s21e - r = ((r & 0x3ff) << 10) | (r >> 11) - r = (Expression.make_signed(r, 20) << 1) + ((di.address >> 2) << 2) + when :s21e + r = ((r & 0x3ff) << 10) | (r >> 11) + r = (Expression.make_signed(r, 20) << 1) + ((di.address >> 2) << 2) - when :s21ee # pc-relative - r = ((r & 0x3ff) << 9) | (r >> 12) - r = (Expression.make_signed(r, 19) << 2) + ((di.address >> 2) << 2) + when :s21ee # pc-relative + r = ((r & 0x3ff) << 9) | (r >> 12) + r = (Expression.make_signed(r, 19) << 2) + ((di.address >> 2) << 2) - when :s25e # pc-relative - r = ((r & 0xf) << 20) | (((r >> 6) & 0x3ff) << 10) | (r >> 17) - r = (Expression.make_signed(r, 24) << 1) + ((di.address >> 2) << 2) + when :s25e # pc-relative + r = ((r & 0xf) << 20) | (((r >> 6) & 0x3ff) << 10) | (r >> 17) + r = (Expression.make_signed(r, 24) << 1) + ((di.address >> 2) << 2) - when :s25ee # pc-relative - r = ((r & 0xf) << 19) | (((r >> 6) & 0x3ff) << 9) | (r >> 18) - r = (Expression.make_signed(r, 23) << 2) + ((di.address >> 2) << 2) + when :s25ee # pc-relative + r = ((r & 0xf) << 19) | (((r >> 6) & 0x3ff) << 9) | (r >> 18) + r = (Expression.make_signed(r, 23) << 2) + ((di.address >> 2) << 2) - when :@bs9 - r = r >> 3 - s9 = ((r & 1) << 8) | ((r >> 1) & 0xff) - r = Expression.make_signed(s9, 9) + when :@bs9 + r = r >> 3 + s9 = ((r & 1) << 8) | ((r >> 1) & 0xff) + r = Expression.make_signed(s9, 9) - when :bext, :cext, :@cext - if ((r = field_val[(f == :bext) ? :b : :c]) == 0x3E) - tmp = edata.decode_imm(:u32, :little) - r = Expression[(tmp >> 16) | ((tmp & 0xffff) << 16)] - else - r = GPR.new(r) - end + when :bext, :cext, :@cext + if ((r = field_val[(f == :bext) ? :b : :c]) == 0x3E) + tmp = edata.decode_imm(:u32, :little) + r = Expression[(tmp >> 16) | ((tmp & 0xffff) << 16)] + else + r = GPR.new(r) + end - else r - end - r - } + else r + end + r + } - # decode properties fields - op.args.each { |a| - case a - when :flags15, :flags16 - di.instruction.opname += '.f' if field_val[a] != 0 - when :ccond - di.instruction.opname += ('.' + @cond_suffix[field_val[a]]) if field_val[a] != 0 - when :delay5, :delay16 - di.instruction.opname += '.d' if field_val[a] != 0 - when :cache5, :cache11, :cache16 - di.instruction.opname +='.di' if field_val[a] != 0 - when :signext6, :signext16 - di.instruction.opname += '.x' if field_val[a] != 0 - when :wb3, :wb9, :wb22 - case field_val[a] - when 1; di.instruction.opname += ((memref_size(di) == 2) ? '.ab' : '.a') - when 2; di.instruction.opname += '.ab' - when 3; di.instruction.opname += '.as' - end - when :sz1, :sz7, :sz16, :sz17 - case field_val[a] - when 1; di.instruction.opname += 'b' - when 2; di.instruction.opname += 'w' - end - else - di.instruction.args << case a + # decode properties fields + op.args.each { |a| + case a + when :flags15, :flags16 + di.instruction.opname += '.f' if field_val[a] != 0 + when :ccond + di.instruction.opname += ('.' + @cond_suffix[field_val[a]]) if field_val[a] != 0 + when :delay5, :delay16 + di.instruction.opname += '.d' if field_val[a] != 0 + when :cache5, :cache11, :cache16 + di.instruction.opname +='.di' if field_val[a] != 0 + when :signext6, :signext16 + di.instruction.opname += '.x' if field_val[a] != 0 + when :wb3, :wb9, :wb22 + case field_val[a] + when 1; di.instruction.opname += ((memref_size(di) == 2) ? '.ab' : '.a') + when 2; di.instruction.opname += '.ab' + when 3; di.instruction.opname += '.as' + end + when :sz1, :sz7, :sz16, :sz17 + case field_val[a] + when 1; di.instruction.opname += 'b' + when 2; di.instruction.opname += 'w' + end + else + di.instruction.args << case a - # 16-bits instruction operands ------------------------------------------" - when :cr0; GPR.new 0 - when :ca, :cb, :cb2, :cb3, :cc; GPR.new(field_val[a]) - when :ch - if ((r = field_val[a]) == 0x3E) - tmp = edata.decode_imm(:u32, :little) - Expression[(tmp >> 16) | ((tmp & 0xffff) << 16)] - else - GPR.new(r) - end + # 16-bits instruction operands ------------------------------------------" + when :cr0; GPR.new 0 + when :ca, :cb, :cb2, :cb3, :cc; GPR.new(field_val[a]) + when :ch + if ((r = field_val[a]) == 0x3E) + tmp = edata.decode_imm(:u32, :little) + Expression[(tmp >> 16) | ((tmp & 0xffff) << 16)] + else + GPR.new(r) + end - when :@gps9, :@gps10, :@gps11 - imm = (a == :@gps11) ? :cs11 : (a == :@gps10) ? :cs10 : :cs9 - Memref.new(GPR.new(26), Expression[field_val[imm]], memref_size(di)) + when :@gps9, :@gps10, :@gps11 + imm = (a == :@gps11) ? :cs11 : (a == :@gps10) ? :cs10 : :cs9 + Memref.new(GPR.new(26), Expression[field_val[imm]], memref_size(di)) - when :cu3, :cu5, :cu5ee, :cu6, :cu7, :cu7l, :cu8; Expression[field_val[a]] - when :cs9, :cs10, :cs11; Expression[field_val[a]] - when :cdisps7, :cdisps8, :cdisps10, :cdisps13; Expression[field_val[a]] - when :@cb; Memref.new(GPR.new(field_val[:cb]), nil, memref_size(di)) - when :@cbu7, :@cbu6, :@cbu5; Memref.new(GPR.new(field_val[:cb]), Expression[field_val[a]], memref_size(di)) - when :@cspu7; Memref.new(GPR.new(28), field_val[a], memref_size(di)) - when :@cbcc; Memref.new(field_val[:cb], field_val[:cc], memref_size(di)) + when :cu3, :cu5, :cu5ee, :cu6, :cu7, :cu7l, :cu8; Expression[field_val[a]] + when :cs9, :cs10, :cs11; Expression[field_val[a]] + when :cdisps7, :cdisps8, :cdisps10, :cdisps13; Expression[field_val[a]] + when :@cb; Memref.new(GPR.new(field_val[:cb]), nil, memref_size(di)) + when :@cbu7, :@cbu6, :@cbu5; Memref.new(GPR.new(field_val[:cb]), Expression[field_val[a]], memref_size(di)) + when :@cspu7; Memref.new(GPR.new(28), field_val[a], memref_size(di)) + when :@cbcc; Memref.new(field_val[:cb], field_val[:cc], memref_size(di)) - # 32-bits instruction operands ------------------------------------------" - when :a, :b - ((r = field_val[a]) == 0x3E) ? :zero : GPR.new(r) - when :b2; GPR.new field_val[:b] - when :c; GPR.new field_val[a] - when :bext, :cext; field_val[a] - when :@cext - target = field_val[a] - (di.opcode.props[:setip] and target.kind_of? GPR) ? Memref.new(target, nil, memref_size(di)) : target + # 32-bits instruction operands ------------------------------------------" + when :a, :b + ((r = field_val[a]) == 0x3E) ? :zero : GPR.new(r) + when :b2; GPR.new field_val[:b] + when :c; GPR.new field_val[a] + when :bext, :cext; field_val[a] + when :@cext + target = field_val[a] + (di.opcode.props[:setip] and target.kind_of? GPR) ? Memref.new(target, nil, memref_size(di)) : target - when :@bextcext - tmp = field_val[a] - #c = tmp & 0x3F - tmp = tmp >> 6 - b = (tmp >> 12) | ((tmp & 0x7) << 3) - Memref.new(field_val[:bext], field_val[:cext], memref_size(di)) + when :@bextcext + tmp = field_val[a] + #c = tmp & 0x3F + tmp = tmp >> 6 + b = (tmp >> 12) | ((tmp & 0x7) << 3) + Memref.new(field_val[:bext], field_val[:cext], memref_size(di)) - when :u6, :u6e, :s8e, :s9, :s12; Expression[field_val[a]] - when :s12e, :s21e, :s21ee, :s25e, :s25ee; Expression[field_val[a]] - when :auxs12; AUX.new field_val[:s12] - when :@c; Memref.new(GPR.new(field_val[a]), nil, memref_size(di)) - when :@bcext; Memref.new(field_val[a], nil, memref_size(di)) - when :@bcext; Memref.new(field_val[:b], field_val[:cext], memref_size(di)) - when :@bs9 - # [b,s9] or [limm] if b = 0x3E - base = field_val[:bext] - Memref.new(base, (base.kind_of? GPR) ? Expression[field_val[a]] : nil, memref_size(di)) + when :u6, :u6e, :s8e, :s9, :s12; Expression[field_val[a]] + when :s12e, :s21e, :s21ee, :s25e, :s25ee; Expression[field_val[a]] + when :auxs12; AUX.new field_val[:s12] + when :@c; Memref.new(GPR.new(field_val[a]), nil, memref_size(di)) + when :@bcext; Memref.new(field_val[a], nil, memref_size(di)) + when :@bcext; Memref.new(field_val[:b], field_val[:cext], memref_size(di)) + when :@bs9 + # [b,s9] or [limm] if b = 0x3E + base = field_val[:bext] + Memref.new(base, (base.kind_of? GPR) ? Expression[field_val[a]] : nil, memref_size(di)) - # common instruction operands ------------------------------------------" - when :zero; Expression[0] - when :gp; GPR.new(26) - when :sp, :sp2; GPR.new(28) - when :blink; GPR.new(31) - when :@ilink1; Memref.new(GPR.new(29), nil, memref_size(di)) - when :@ilink2; Memref.new(GPR.new(30), nil, memref_size(di)) - when :@blink; Memref.new(GPR.new(31), nil, memref_size(di)) + # common instruction operands ------------------------------------------" + when :zero; Expression[0] + when :gp; GPR.new(26) + when :sp, :sp2; GPR.new(28) + when :blink; GPR.new(31) + when :@ilink1; Memref.new(GPR.new(29), nil, memref_size(di)) + when :@ilink2; Memref.new(GPR.new(30), nil, memref_size(di)) + when :@blink; Memref.new(GPR.new(31), nil, memref_size(di)) - else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" - end - end - } + else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" + end + end + } - di.bin_length += edata.ptr - before_ptr + di.bin_length += edata.ptr - before_ptr - return if edata.ptr > edata.virtsize + return if edata.ptr > edata.virtsize - di - end + di + end - def disassembler_default_func - df = DecodedFunction.new - df.backtrace_binding = {} - 15.times { |i| - df.backtrace_binding["r#{i}".to_sym] = Expression::Unknown - } - df.backtracked_for = [] - df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr| - if funcaddr != :default - btfor - elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip] - btfor - else [] - end - } - df - end + def disassembler_default_func + df = DecodedFunction.new + df.backtrace_binding = {} + 15.times { |i| + df.backtrace_binding["r#{i}".to_sym] = Expression::Unknown + } + df.backtracked_for = [] + df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr| + if funcaddr != :default + btfor + elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip] + btfor + else [] + end + } + df + end - REG_SYMS = [:r26, :r27, :r28, :r29, :r30, :r31, :r60] - def register_symbols - REG_SYMS - end + REG_SYMS = [:r26, :r27, :r28, :r29, :r30, :r31, :r60] + def register_symbols + REG_SYMS + end - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end - def opshift(op) - op[/\d/].to_i - end + def opshift(op) + op[/\d/].to_i + end - def with_res(arg) - arg != :zero - end + def with_res(arg) + arg != :zero + end - def init_backtrace_binding - sp = :r28 - blink = :r31 + def init_backtrace_binding + sp = :r28 + blink = :r31 - @backtrace_binding ||= {} + @backtrace_binding ||= {} - mask = lambda { |sz| (1 << sz)-1 } # 32bits => 0xffff_ffff + mask = lambda { |sz| (1 << sz)-1 } # 32bits => 0xffff_ffff - opcode_list.each{|mode, oplist| - oplist.map { |ol| ol.name }.uniq.each { |op| - binding = case op - when /^add/, /^sub/ - lambda { |di, a0, a1, a2| - if (shift = opshift(op)) == 0 - { a0 => Expression[[a1, :+, a2], :&, mask[32]] } - else - { a0 => Expression[[a1, :+, [a2, :<<, shift]], :&, mask[32]] } - end - } - when /^and/ - lambda { |di, a0, a1, a2| { a0 => Expression[a1, :&, a2] } } - when /^asl/ - lambda { |di, *a| { a[0] => Expression[[a[1], :<<, (a[2] ? a[2]:1)], :&, mask[32]] } } - when /^bxor/ - lambda { |di, a0, a1, a2| { a0 => Expression[a1, :^, [1, :<<, a2]] }} - when /^bclr/; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :&, Expression[mask[32], :^, Expression[1, :<<, a2]]] } } - when /^bset/; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :|, Expression[1, :<<, a2]] } } - when /^jl/; lambda { |di, a0| { blink => Expression[di.next_addr] } } - when 'bl', 'bl_s', /^bl\./ - # FIXME handle delay slot - # "This address is taken either from the first instruction following the branch (current PC) or the - # instruction after that (next PC) according to the delay slot mode (.d)." - lambda { |di, a0| { blink => Expression[di.next_addr] } } - when /^mov/, /^lr/, /^ld/; lambda { |di, a0, a1| { a0 => a1 } } - when /^neg/; lambda { |di, a0, a1| { a0 => Expression[[0, :-, a1], :&, mask[32]] } } - when /^not/; lambda { |di, a0, a1| { a0 => Expression[[:~, a1], :&, mask[32]] } } - when /^or/; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :|, a2] } } - when /^st/, /^sr/; lambda { |di, a0, a1| { a1 => a0 } } - when /^ex/; lambda { |di, a0, a1| { a1 => a0 , a0 => a1 } } - when 'push_s' - lambda { |di, a0| { - sp => Expression[sp, :-, 4], - Indirection[sp, @size/8, di.address] => Expression[a0] - } } - when 'pop_s' - lambda { |di, a0| { - a0 => Indirection[sp, @size/8, di.address], - sp => Expression[sp, :+, 4] - } } - end - @backtrace_binding[op] ||= binding if binding - } - } + opcode_list.each{|mode, oplist| + oplist.map { |ol| ol.name }.uniq.each { |op| + binding = case op + when /^add/, /^sub/ + lambda { |di, a0, a1, a2| + if (shift = opshift(op)) == 0 + { a0 => Expression[[a1, :+, a2], :&, mask[32]] } + else + { a0 => Expression[[a1, :+, [a2, :<<, shift]], :&, mask[32]] } + end + } + when /^and/ + lambda { |di, a0, a1, a2| { a0 => Expression[a1, :&, a2] } } + when /^asl/ + lambda { |di, *a| { a[0] => Expression[[a[1], :<<, (a[2] ? a[2]:1)], :&, mask[32]] } } + when /^bxor/ + lambda { |di, a0, a1, a2| { a0 => Expression[a1, :^, [1, :<<, a2]] }} + when /^bclr/; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :&, Expression[mask[32], :^, Expression[1, :<<, a2]]] } } + when /^bset/; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :|, Expression[1, :<<, a2]] } } + when /^jl/; lambda { |di, a0| { blink => Expression[di.next_addr] } } + when 'bl', 'bl_s', /^bl\./ + # FIXME handle delay slot + # "This address is taken either from the first instruction following the branch (current PC) or the + # instruction after that (next PC) according to the delay slot mode (.d)." + lambda { |di, a0| { blink => Expression[di.next_addr] } } + when /^mov/, /^lr/, /^ld/; lambda { |di, a0, a1| { a0 => a1 } } + when /^neg/; lambda { |di, a0, a1| { a0 => Expression[[0, :-, a1], :&, mask[32]] } } + when /^not/; lambda { |di, a0, a1| { a0 => Expression[[:~, a1], :&, mask[32]] } } + when /^or/; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :|, a2] } } + when /^st/, /^sr/; lambda { |di, a0, a1| { a1 => a0 } } + when /^ex/; lambda { |di, a0, a1| { a1 => a0 , a0 => a1 } } + when 'push_s' + lambda { |di, a0| { + sp => Expression[sp, :-, 4], + Indirection[sp, @size/8, di.address] => Expression[a0] + } } + when 'pop_s' + lambda { |di, a0| { + a0 => Indirection[sp, @size/8, di.address], + sp => Expression[sp, :+, 4] + } } + end + @backtrace_binding[op] ||= binding if binding + } + } - @backtrace_binding - end + @backtrace_binding + end - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when GPR; arg.symbolic - when Memref; arg.symbolic(di.address) - else arg - end - } + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when GPR; arg.symbolic + when Memref; arg.symbolic(di.address) + else arg + end + } - if binding = backtrace_binding[di.opcode.basename] - binding[di, *a] - else - puts "unhandled instruction to backtrace: #{di}" if $VERBOSE - { :incomplete_binding => Expression[1] } - end - end + if binding = backtrace_binding[di.opcode.basename] + binding[di, *a] + else + puts "unhandled instruction to backtrace: #{di}" if $VERBOSE + { :incomplete_binding => Expression[1] } + end + end - def get_xrefs_x(dasm, di) - return [] if not di.opcode.props[:setip] + def get_xrefs_x(dasm, di) + return [] if not di.opcode.props[:setip] - arg = case di.opcode.name - when 'b', 'b_s', /^j/, /^bl/, /^br/, 'lp' - expr = di.instruction.args.last - expr.kind_of?(Memref) ? expr.base : expr - else di.instruction.args.last - end + arg = case di.opcode.name + when 'b', 'b_s', /^j/, /^bl/, /^br/, 'lp' + expr = di.instruction.args.last + expr.kind_of?(Memref) ? expr.base : expr + else di.instruction.args.last + end - [Expression[(arg.kind_of?(Reg) ? arg.symbolic : arg)]] - end + [Expression[(arg.kind_of?(Reg) ? arg.symbolic : arg)]] + end - def backtrace_is_function_return(expr, di=nil) - Expression[expr].reduce == Expression[register_symbols[5]] - end + def backtrace_is_function_return(expr, di=nil) + Expression[expr].reduce == Expression[register_symbols[5]] + end - def delay_slot(di=nil) - return 0 if (not di) or (not di.opcode.props[:setip]) - return 1 if di.opcode.props[:delay_slot] - (di.instruction.opname =~ /\.d/) ? 0 : 1 - end + def delay_slot(di=nil) + return 0 if (not di) or (not di.opcode.props[:setip]) + return 1 if di.opcode.props[:delay_slot] + (di.instruction.opname =~ /\.d/) ? 0 : 1 + end end end diff --git a/lib/metasm/metasm/cpu/arc/main.rb b/lib/metasm/metasm/cpu/arc/main.rb index 2d0a5bd042..747d6be6e2 100644 --- a/lib/metasm/metasm/cpu/arc/main.rb +++ b/lib/metasm/metasm/cpu/arc/main.rb @@ -7,185 +7,185 @@ require 'metasm/main' module Metasm class ARC < CPU - def initialize(e = :little) - super() - @endianness = e - @size = 32 - end + def initialize(e = :little) + super() + @endianness = e + @size = 32 + end - class Reg - include Renderable + class Reg + include Renderable - attr_accessor :i + attr_accessor :i - def initialize(i); @i = i end + def initialize(i); @i = i end - def ==(o) - o.class == self.class and o.i == i - end - end + def ==(o) + o.class == self.class and o.i == i + end + end - # general purpose reg - # Result R0-R1 - # Arguments R0-R7 - # Caller Saved Registers R0-R12 - # Callee Saved Registers R13-R25 - # Static chain pointer (if required) R11 - # Register for temp calculation R12 - # Global Pointer R26 (GP) - # Frame Pointer R27 (FP) - # Stack Pointer R28 (SP) - # Interrupt Link Register 1 R29 (ILINK1) - # Interrupt Link Register 2 R30 (ILINK2) - # Branch Link Register R31 (BLINK) - class GPR < Reg - Sym = (0..64).map { |i| "r#{i}".to_sym } - def symbolic; Sym[@i] end + # general purpose reg + # Result R0-R1 + # Arguments R0-R7 + # Caller Saved Registers R0-R12 + # Callee Saved Registers R13-R25 + # Static chain pointer (if required) R11 + # Register for temp calculation R12 + # Global Pointer R26 (GP) + # Frame Pointer R27 (FP) + # Stack Pointer R28 (SP) + # Interrupt Link Register 1 R29 (ILINK1) + # Interrupt Link Register 2 R30 (ILINK2) + # Branch Link Register R31 (BLINK) + class GPR < Reg + Sym = (0..64).map { |i| "r#{i}".to_sym } + def symbolic; Sym[@i] end - Render = { - 26 => 'gp', # global pointer, used to point to small sets of shared data throughout execution of a program - 27 => 'fp', # frame pointer - 28 => 'sp', # stak pointer - 29 => 'ilink1', # maskable interrupt link register - 30 => 'ilink2', # maskable interrupt link register 2 - 31 => 'blink', # branch link register - 60 => 'lp_count', # loop count register (24 bits) - # "When a destination register is set to r62 there is no destination for the result of the instruction so the - # result is discarded. Any flag updates will still occur according to the set flags directive (.F or implicit - # in the instruction)." - 62 => 'zero' - } + Render = { + 26 => 'gp', # global pointer, used to point to small sets of shared data throughout execution of a program + 27 => 'fp', # frame pointer + 28 => 'sp', # stak pointer + 29 => 'ilink1', # maskable interrupt link register + 30 => 'ilink2', # maskable interrupt link register 2 + 31 => 'blink', # branch link register + 60 => 'lp_count', # loop count register (24 bits) + # "When a destination register is set to r62 there is no destination for the result of the instruction so the + # result is discarded. Any flag updates will still occur according to the set flags directive (.F or implicit + # in the instruction)." + 62 => 'zero' + } - def render - if s = Render[i] - [s] - else - # r0-r28 general purpose registers - # r32-r59 reserved for extentions - ["r#@i"] - end - end + def render + if s = Render[i] + [s] + else + # r0-r28 general purpose registers + # r32-r59 reserved for extentions + ["r#@i"] + end + end - end + end - class AUX < Reg - def symbolic; "aux#{i}".to_sym end + class AUX < Reg + def symbolic; "aux#{i}".to_sym end - Render = { - 0x00 => 'status', # Status register (Original ARCtangent-A4 processor format) - 0x01 => 'semaphore', # Inter-process/Host semaphore register - 0x02 => 'lp_start', # Loop start address (32-bit) - 0x03 => 'lp_end', # Loop end address (32-bit) - 0x04 => 'identity', # Processor Identification register - 0x05 => 'debug', # Debug register - 0x06 => 'pc', # PC register (32-bit) - 0x0A => 'status32', # Status register (32-bit) - 0x0B => 'status32_l1', # Status register save for level 1 interrupts - 0x0C => 'status32_l2', # Status register save for level 2 interrupts - 0x10 => 'ic_ivic', # Cache invalidate - 0x11 => 'ic_ctrl', # Mode bits for cache controller - 0x12 => 'mulhi', # High part of Multiply - 0x19 => 'ic_ivil', - 0x21 => 'timer0_cnt', # Processor Timer 0 Count value - 0x22 => 'timer0_ctrl', # Processor Timer 0 Control value - 0x23 => 'timer0_limit', # Processor Timer 0 Limit value - 0x25 => 'int_vector_base', # Interrupt Vector Base address - 0x40 => 'im_set_dc_ctrl', - 0x41 => 'aux_macmode', # Extended Arithmetic Status and Mode - 0x43 => 'aux_irq_lv12', # Interrupt Level Status - 0x47 => 'dc_ivdc', # Invalidate cache - 0x48 => 'dc_ctrl', # Cache control register - 0x49 => 'dc_ldl', # Lock data line - 0x4A => 'dc_ivdl', # Invalidate data line - 0x4B => 'dc_flsh', # Flush data cache - 0x4C => 'dc_fldl', # Flush data line - 0x58 => 'dc_ram_addr', # Access RAM address - 0x59 => 'dc_tag', # Tag Access - 0x5A => 'dc_wp', # Way Pointer Access - 0x5B => 'dc_data', # Data Access - 0x62 => 'crc_bcr', - 0x64 => 'dvfb_bcr', - 0x65 => 'extarith_bcr', - 0x68 => 'vecbase_bcr', - 0x69 => 'perbase_bcr', - 0x6f => 'mmu_bcr', - 0x72 => 'd_cache_build', # Build: Data Cache - 0x73 => 'madi_build', # Build: Multiple ARC Debug I/F - 0x74 => 'ldstram_build', # Build: LD/ST RAM - 0x75 => 'timer_build', # Build: Timer - 0x76 => 'ap_build', # Build: Actionpoints - 0x77 => 'i_cache_build', # Build: I-Cache - 0x78 => 'addsub_build', # Build: Saturated Add/Sub - 0x79 => 'dspram_build', # Build: Scratch RAM & XY Memory - 0x7B => 'multiply_build', # Build: Multiply - 0x7C => 'swap_build', # Build: Swap - 0x7D => 'norm_build', # Build: Normalise - 0x7E => 'minmax_build', # Build: Min/Max - 0x7F => 'barrel_build', # Build: Barrel Shift - 0x100 => 'timer1_cnt', # Processor Timer 1 Count value - 0x101 => 'timer1_ctrl', # Processor Timer 1 Control value - 0x102 => 'timer1_limit', # Processor Timer 1 Limit value - 0x200 => 'aux_irq_lev', # Interrupt Level Programming - 0x201 => 'aux_irq_hint', # Software Triggered Interrupt - 0x202 => 'aux_irq_mask', # Masked bits for Interrupts - 0x203 => 'aux_irq_base', # Interrupt Vector base address - 0x400 => 'eret', # Exception Return Address - 0x401 => 'erbta', # Exception Return Branch Target Address - 0x402 => 'erstatus', # Exception Return Status - 0x403 => 'ecr', # Exception Cause Register - 0x404 => 'efa', # Exception Fault Address - 0x40A => 'icause1', # Level 1 Interrupt Cause Register - 0x40B => 'icause2', # Level 2 Interrupt Cause Register - 0x40C => 'aux_ienable', # Interrupt Mask Programming - 0x40D => 'aux_itrigger', # Interrupt Sensitivity Programming - 0x410 => 'xpu', # User Mode Extension Enables - 0x412 => 'bta', # Branch Target Address - 0x413 => 'bta_l1', # Level 1 Return Branch Target - 0x414 => 'bta_l2', # Level 2 Return Branch Target - 0x415 => 'aux_irq_pulse_cancel', # Interrupt Pulse Cancel - 0x416 => 'aux_irq_pending', # Interrupt Pending Register - } + Render = { + 0x00 => 'status', # Status register (Original ARCtangent-A4 processor format) + 0x01 => 'semaphore', # Inter-process/Host semaphore register + 0x02 => 'lp_start', # Loop start address (32-bit) + 0x03 => 'lp_end', # Loop end address (32-bit) + 0x04 => 'identity', # Processor Identification register + 0x05 => 'debug', # Debug register + 0x06 => 'pc', # PC register (32-bit) + 0x0A => 'status32', # Status register (32-bit) + 0x0B => 'status32_l1', # Status register save for level 1 interrupts + 0x0C => 'status32_l2', # Status register save for level 2 interrupts + 0x10 => 'ic_ivic', # Cache invalidate + 0x11 => 'ic_ctrl', # Mode bits for cache controller + 0x12 => 'mulhi', # High part of Multiply + 0x19 => 'ic_ivil', + 0x21 => 'timer0_cnt', # Processor Timer 0 Count value + 0x22 => 'timer0_ctrl', # Processor Timer 0 Control value + 0x23 => 'timer0_limit', # Processor Timer 0 Limit value + 0x25 => 'int_vector_base', # Interrupt Vector Base address + 0x40 => 'im_set_dc_ctrl', + 0x41 => 'aux_macmode', # Extended Arithmetic Status and Mode + 0x43 => 'aux_irq_lv12', # Interrupt Level Status + 0x47 => 'dc_ivdc', # Invalidate cache + 0x48 => 'dc_ctrl', # Cache control register + 0x49 => 'dc_ldl', # Lock data line + 0x4A => 'dc_ivdl', # Invalidate data line + 0x4B => 'dc_flsh', # Flush data cache + 0x4C => 'dc_fldl', # Flush data line + 0x58 => 'dc_ram_addr', # Access RAM address + 0x59 => 'dc_tag', # Tag Access + 0x5A => 'dc_wp', # Way Pointer Access + 0x5B => 'dc_data', # Data Access + 0x62 => 'crc_bcr', + 0x64 => 'dvfb_bcr', + 0x65 => 'extarith_bcr', + 0x68 => 'vecbase_bcr', + 0x69 => 'perbase_bcr', + 0x6f => 'mmu_bcr', + 0x72 => 'd_cache_build', # Build: Data Cache + 0x73 => 'madi_build', # Build: Multiple ARC Debug I/F + 0x74 => 'ldstram_build', # Build: LD/ST RAM + 0x75 => 'timer_build', # Build: Timer + 0x76 => 'ap_build', # Build: Actionpoints + 0x77 => 'i_cache_build', # Build: I-Cache + 0x78 => 'addsub_build', # Build: Saturated Add/Sub + 0x79 => 'dspram_build', # Build: Scratch RAM & XY Memory + 0x7B => 'multiply_build', # Build: Multiply + 0x7C => 'swap_build', # Build: Swap + 0x7D => 'norm_build', # Build: Normalise + 0x7E => 'minmax_build', # Build: Min/Max + 0x7F => 'barrel_build', # Build: Barrel Shift + 0x100 => 'timer1_cnt', # Processor Timer 1 Count value + 0x101 => 'timer1_ctrl', # Processor Timer 1 Control value + 0x102 => 'timer1_limit', # Processor Timer 1 Limit value + 0x200 => 'aux_irq_lev', # Interrupt Level Programming + 0x201 => 'aux_irq_hint', # Software Triggered Interrupt + 0x202 => 'aux_irq_mask', # Masked bits for Interrupts + 0x203 => 'aux_irq_base', # Interrupt Vector base address + 0x400 => 'eret', # Exception Return Address + 0x401 => 'erbta', # Exception Return Branch Target Address + 0x402 => 'erstatus', # Exception Return Status + 0x403 => 'ecr', # Exception Cause Register + 0x404 => 'efa', # Exception Fault Address + 0x40A => 'icause1', # Level 1 Interrupt Cause Register + 0x40B => 'icause2', # Level 2 Interrupt Cause Register + 0x40C => 'aux_ienable', # Interrupt Mask Programming + 0x40D => 'aux_itrigger', # Interrupt Sensitivity Programming + 0x410 => 'xpu', # User Mode Extension Enables + 0x412 => 'bta', # Branch Target Address + 0x413 => 'bta_l1', # Level 1 Return Branch Target + 0x414 => 'bta_l2', # Level 2 Return Branch Target + 0x415 => 'aux_irq_pulse_cancel', # Interrupt Pulse Cancel + 0x416 => 'aux_irq_pending', # Interrupt Pending Register + } - def render - if s = Render[i] - [s] - else - ["aux#@i"] - end - end - end + def render + if s = Render[i] + [s] + else + ["aux#@i"] + end + end + end - class Memref - attr_accessor :base, :disp + class Memref + attr_accessor :base, :disp - def initialize(base, disp, sz) - @base, @disp, @size = base, disp, sz - end + def initialize(base, disp, sz) + @base, @disp, @size = base, disp, sz + end - def symbolic(orig) - b = @base - b = b.symbolic if b.kind_of? Reg + def symbolic(orig) + b = @base + b = b.symbolic if b.kind_of? Reg - if disp - o = @disp - o = o.symbolic if o.kind_of? Reg - e = Expression[b, :+, o].reduce - else - e = Expression[b].reduce - end + if disp + o = @disp + o = o.symbolic if o.kind_of? Reg + e = Expression[b, :+, o].reduce + else + e = Expression[b].reduce + end - Indirection[e, @size, orig] - end + Indirection[e, @size, orig] + end - include Renderable + include Renderable - def render - if @disp and @disp != 0 - ['[', @base, ', ', @disp, ']'] - else - ['[', @base, ']'] - end - end - end + def render + if @disp and @disp != 0 + ['[', @base, ', ', @disp, ']'] + else + ['[', @base, ']'] + end + end + end end end diff --git a/lib/metasm/metasm/cpu/arc/opcodes.rb b/lib/metasm/metasm/cpu/arc/opcodes.rb index 66417d93fd..8494fabe8a 100644 --- a/lib/metasm/metasm/cpu/arc/opcodes.rb +++ b/lib/metasm/metasm/cpu/arc/opcodes.rb @@ -7,582 +7,582 @@ require 'metasm/cpu/arc/main' module Metasm class ARC - def addop32(name, bin, *args) - addop(:ac32, name, bin, *args) - end - - def addop16(name, bin, *args) - addop(:ac16, name, bin, *args) - end - - def addop(mode, name, bin, *args) - o = Opcode.new(name) - o.bin = bin - args.each { |a| - o.args << a if @fields_mask[a] - o.props[a] = true if @valid_props[a] - o.fields[a] = [@fields_mask[a], @fields_shift[a]] if @fields_mask[a] - } - (mode == :ac16) ? (@opcode_list16 << o) : (@opcode_list32 << o) - end - - def init_opcode_list - @opcode_list16 = [] - @opcode_list32 = [] - - @valid_props.update :flag_update => true, :delay_slot => true - @cond_suffix = [''] + %w[z nz p n cs cc vs vc gt ge lt le hi ls pnz] - #The remaining 16 condition codes (10-1F) are available for extension - @cond_suffix += (0x10..0x1f).map{ |i| "extcc#{i.to_s(16)}" } - - # Compact 16-bits operands field masks - fields_mask16 = { - :ca => 0x7, :cb => 0x7, :cb2 => 0x7, :cb3 => 0x7, :cc => 0x7, - :cu => 0x1f, - :ch => 0b11100111, - - # immediate (un)signed - :cu3 => 0x7, :cu8 => 0xff, - # cu7 is 32-bit aligned, cu6 is 16-bit aligned, cu6 is 8-bit aligned - :cu5 => 0x1f, :cu5ee => 0x1f, :cu6 => 0x3f, :cu7 => 0x7f, - - :cs9 => 0x1ff, :cs9ee => 0x1ff, :cs10 => 0x1ff, :cs11 => 0x1ff, - - # signed displacement - :cdisps7=> 0x3f, :cdisps8 => 0x7f, :cdisps10 => 0x1ff, :cdisps13 => 0x7FF, - - # memref [b+u], [sp,u], etc. - :@cb => 0x7, :@cbu7 => 0b11100011111, :@cbu6 => 0b11100011111, :@cbu5 => 0b11100011111, - :@cspu7 => 0b11111, :@cbcc => 0b111111, - :@gps9 => 0x1ff, :@gps10 => 0x1ff, :@gps11 => 0x1ff, - - # implicit operands - :climm => 0x0, :cr0 => 0x0, - :blink => 0x0, :@blink => 0x0, :gp => 0x0, :sp => 0x0, :sp2 => 0x0, :zero => 0x0 - } - - fields_shift16 = { - :ca => 0x0, :cb => 0x8, :cb2 => 0x8, :cb3 => 0x8, :cc => 0x5, - :cu => 0x0, - - # immediate (un)signed - :ch => 0x0, - :cu3 => 0x0, :cu5 => 0, :cu5ee => 0, :cu6 => 5, :cu7 => 0x0, :cu8 => 0x0, - :cs9 => 0x0, :cs9ee => 0x0, :cs10 => 0x0, :cs11 => 0x0, - - # signed displacement - :cdisps7=> 0x0, :cdisps8 => 0x0, :cdisps10 => 0x0, :cdisps13 => 0x0, - - # memref [b+u] - :@cb => 0x8, :@cbu7 => 0x0, :@cbu6 => 0x0, :@cbu5 => 0x0, - :@cspu7 => 0x0, :@cbcc => 0x5, - :@gps9 => 0x0, :@gps10 => 0x0, :@gps11 => 0x0, - - # implicit operands - :climm => 0x0, :cr0 => 0x0, - :blink => 0x0, :@blink => 0x0, :gp => 0x0, :sp => 0x0, :sp2 => 0x0, :zero => 0x0, - } - - fields_mask32 = { - :a => 0x3f, :b => 0b111000000000111, :bext => 0b111000000000111, - :c => 0x3f, :@c => 0x3f, :cext => 0x3f, :@cext => 0x3f, - - :u6 => 0x3f, :u6e => 0x3f, - :s8e => 0x1fd, :s9 => 0x7f, - :s12 => 0xfff, :s12e => 0xfff, - :s21e => 0x1ffBff, :s21ee => 0x1ff3ff, - :s25e => 0x7feffcf, :s25ee => 0x7fcffcf, - - :@bs9 => 0x7fff, :@bc => 0x1ff, :@bextcext => 0x1C01FF, - - :limm => 0x0, :@limm => 0x0, - :@limmc => 0x3f, :@blimm => 0x7, - - :auxlimm => 0x0, :auxs12 => 0xfff, - - :ccond => 0x1f, #condition codes - :delay5 => 1, :delay16 => 1,# delay slot - :flags15 => 0x1, :flags16 => 0x1, - :signext6 => 0x1, :signext16 => 0x1, - :cache5 => 0x1, :cache11 => 0x1, :cache16 => 0x1, # data cache mode field - :sz1 => 0x3, :sz7 => 0x3, :sz16 => 0x3, :sz17 => 0x3, #data size field - :wb3 => 0x3, :wb9 => 0x3, :wb22 => 0x3, #write-back flag - :zero => 0x0, :b2 => 0x0, :@ilink1 => 0x0, :@ilink2 => 0x0 - } - #FIXME - - fields_shift32 = { - :a => 0x0, :b => 0xC, :bext => 0xC, - :c => 0x6, :@c => 0x6, :cext => 0x6, :@cext => 0x6, - - :u6 => 0x6, :u6e =>0x6, - :s8e => 15, :s9 => 0x11, - :s12 => 0x0, :s12e => 0, - :s21e => 0x6, :s21ee => 0x6, - :s25e => 0, :s25ee => 0, - - :limm => 0x0, :@limm => 0x0, - :@limmc => 0x6, :@blimm => 0x18, - - :auxlimm => 0x0, :auxs12 => 0, - - :@bs9 => 12, :@bc => 6, :@bextcext => 6, - - :ccond => 0, #condition codes - :delay5 => 5, :delay16 => 16,# delay slot - :flags15 => 15, :flags16 => 16, - :signext6 => 6, :signext16 => 16, - :cache5 => 5, :cache11 => 11, :cache16 => 16, # data cache mode field - :sz1 => 1, :sz7 => 7, :sz16 => 16, :sz17 => 17, #data size field - :wb3 => 3, :wb9 => 9, :wb22 => 22, #write-back flag - :zero => 0x0, :b2 => 0x0, :@ilink1 => 0, :@ilink2 => 0, - } - - @fields_mask = fields_mask16.merge(fields_mask32) - @fields_shift = fields_shift16.merge(fields_shift32) - - init_arc_compact16() - init_arc_compact32() - - {16 => @opcode_list16, 32 => @opcode_list32} - end - - def add_artihm_op(op, majorcode, subcode, *flags) - # 0bxxxxxbbb00xxxxxxFBBBCCCCCCAAAAAA - addop32 op, 0b00000000000000000000000000000000 | majorcode << 0x1b | subcode << 16, :a, :bext, :cext, :flags15 - # 0bxxxxxbbb01xxxxxxFBBBuuuuuuAAAAAA - addop32 op, 0b00000000010000000000000000000000 | majorcode << 0x1b | subcode << 16, :a, :b, :u6, :flags15 - # 0bxxxxxbbb10xxxxxxFBBBssssssSSSSSS - addop32 op, 0b00000000100000000000000000000000 | majorcode << 0x1b | subcode << 16, :b, :b2, :s12, :flags15 - # 0bxxxxxbbb11xxxxxxFBBBCCCCCC0QQQQQ - addop32 op, 0b00000000110000000000000000000000 | majorcode << 0x1b | subcode << 16, :b, :b2, :cext, :ccond, :flags15 - # 0bxxxxxbbb11xxxxxxFBBBuuuuuu1QQQQQ - addop32 op, 0b00000000110000000000000000100000 | majorcode << 0x1b | subcode << 16, :b, :b2, :u6, :ccond, :flags15 - end - - def add_logical_op(op, majorcode, subcode, *flags) - # 0b00100bbb00xxxxxxFBBBCCCCCCAAAAAA - addop32 op, 0b00100000000000000000000000000000 | majorcode << 0x1b | subcode << 16, :a, :bext, :c, :flags15 - # 0b00100bbb01xxxxxxFBBBuuuuuuAAAAAA - addop32 op, 0b00100000010000000000000000000000 | majorcode << 0x1b | subcode << 16, :a, :b, :u6, :flags15 - # 0b00100bbb11xxxxxxFBBBCCCCCC0QQQQQ - # WTF - addop32 op, 0b00100000110000000000000000000000 | majorcode << 0x1b | subcode << 16, :b, :b2, :c, :ccond, :flags15 - # 0b00100bbb11xxxxxxFBBBuuuuuu1QQQQQ - addop32 op, 0b00100000110000000000000000100000 | majorcode << 0x1b | subcode << 16, :b, :b2, :u6, :ccond, :flags15 - end - - def add_artihm_op_reduce(op, majorcode, subcode) - # 0bxxxxxbbb00101111FBBBCCCCCCxxxxxx - addop32 op, 0b00000000001011110000000000000000 | majorcode << 0x1b | subcode, :b, :cext, :flags15 - # 0bxxxxxbbb01101111FBBBuuuuuuxxxxxx - addop32 op, 0b00000000011011110000000000000000 | majorcode << 0x1b | subcode, :b, :u6, :flags15 - end - - def add_condbranch_op(op, ccond) - # 0b00001bbbsssssss1SBBBUUUUUUN0xxxx - addop32 op, 0b00001000000000010000000000000000 | ccond, :bext, :cext, :s8e, :setip, :delay5 - # 0b00001bbbsssssss1SBBBUUUUUUN1xxxx - addop32 op, 0b00001000000000010000000000010000 | ccond, :b, :u6, :s8e, :setip, :delay5 - end - - def add_condjmp_op() - # 0b00100RRR1110000D0RRRCCCCCC0QQQQQ - addop32 'j', 0b00100000111000000000000000000000, :@cext, :ccond, :setip, :delay16 - # 0b00100RRR1110000D0RRRuuuuuu1QQQQQ - addop32 'j', 0b00100000111000000000000000100000, :u6, :ccond, :setip, :delay16 - # 0b00100RRR111000001RRR0111010QQQQQ - addop32 'j', 0b00100000111000001000011101000000, :@ilink1, :ccond, :setip, :flag_update - # 0b00100RRR111000001RRR0111100QQQQQ - addop32 'j', 0b00100000111000001000011110000000, :@ilink2, :ccond, :setip, :flag_update - end - - def add_condjmplink_op() - # 0b00100RRR111000100RRRCCCCCC0QQQQQ - addop32 'jl', 0b00100000111000100000000000000000, :@cext, :ccond, :setip, :saveip, :delay16 - # 0b00100RRR111000100RRRuuuuuu1QQQQQ - addop32 'jl', 0b00100000111000100000000000100000, :u6, :ccond, :setip, :saveip, :delay16 - end - - def init_arc_compact32 - - add_artihm_op_reduce 'abs', 0b00100, 0b001001 - add_artihm_op_reduce 'abss', 0b00101, 0b000101 - add_artihm_op_reduce 'abssw', 0b00101, 0b000100 - - add_artihm_op 'adc', 0b00100, 0b000001 - add_artihm_op 'add', 0b00100, 0b000000 - add_artihm_op 'add1', 0b00100, 0b010100 - add_artihm_op 'add2', 0b00100, 0b010101 - add_artihm_op 'add3', 0b00100, 0b010110 - add_artihm_op 'adds', 0b00101, 0b000110 - add_artihm_op 'addsw', 0b00101, 0b010101, :extended - add_artihm_op 'addsdw',0b00101, 0b101000, :extended - add_artihm_op 'and' ,0b00100, 0b000100 - - add_artihm_op_reduce 'asl', 0b00100, 0b000000 - - add_artihm_op 'asl', 0b00101, 0b000000, :extended - add_artihm_op 'asls', 0b00101, 0b001010, :extended - - add_artihm_op_reduce 'asr', 0b00100, 0b000001 - - add_artihm_op 'asr', 0b00101, 0b000010 - add_artihm_op 'asrs', 0b00101, 0b001011 - - # 0b00001bbbsssssss1SBBBCCCCCCN01110 - addop32 'bbit0', 0b00001000000000010000000000001110, :b, :c, :s9, :delay5, :setip - # 0b00001bbbsssssss1SBBBuuuuuuN11110 - addop32 'bbit0', 0b00001000000000010000000000011110, :b, :u6, :s9, :delay5, :setip - # 0b00001bbbsssssss1SBBBCCCCCCN01111 - addop32 'bbit1', 0b00001000000000010000000000001111, :b, :c, :s9, :delay5, :setip - # 0b00001bbbsssssss1SBBBuuuuuuN11111 - addop32 'bbit1', 0b00001000000000010000000000011111, :b, :u6, :s9, :delay5, :setip - - # 0b00000ssssssssss0SSSSSSSSSSNQQQQQ - addop32 'b', 0b00000000000000000000000000000000, :s21e, :ccond, :delay5, :setip - # 0b00000ssssssssss1SSSSSSSSSSNRtttt - addop32 'b', 0b00000000000000010000000000000000, :s25e, :delay5, :setip, :stopexec - # WTF: unknown encoding, bit 5 should be reserved - addop32 'b', 0b00000000000000010000000000010000, :s25e, :delay5, :setip, :stopexec - - add_logical_op 'bclr', 0b00100, 0b010000 - add_artihm_op 'bic', 0b00100, 0b000110 - - # 0b00001sssssssss00SSSSSSSSSSNQQQQQ - addop32 'bl', 0b00001000000000000000000000000000, :s21ee, :ccond, :delay5, :setip, :saveip - # 0b00001sssssssss10SSSSSSSSSSNRtttt - addop32 'bl', 0b00001000000000100000000000000000, :s25ee, :delay5, :setip, :saveip, :stopexec - - add_logical_op 'bmsk', 0b00100, 0b010011 - - add_condbranch_op 'breq', 0b0000 - add_condbranch_op 'brne', 0b0001 - add_condbranch_op 'brlt', 0b0010 - add_condbranch_op 'brge', 0b0011 - add_condbranch_op 'brlo', 0b0100 - add_condbranch_op 'brhs', 0b0101 - - addop32 'brk', 0b00100101011011110000000000111111, :stopexec - - add_logical_op 'bset', 0b00100, 0b001111 - - # 0b00100bbb110100011BBBCCCCCC0QQQQQ - addop32 'btst', 0b00100000110100011000000000000000, :bext, :c, :ccond - # 0b00100bbb110100011BBBuuuuuu1QQQQQ - addop32 'btst', 0b00100000110100011000000000100000, :b, :u6, :ccond - # WTF 0b00100bbb010100011BBBuuuuuu0QQQQQ - addop32 'btst', 0b00100000010100011000000000000000, :b, :u6, :ccond - - add_logical_op 'bxor', 0b00100, 0b010010 - - # 0b00100bbb100011001BBBssssssSSSSSS - addop32 'cmp', 0b00100000100011001000000000000000, :b, :s12 - # WTF unknown encoding ... - # 0b00100bbb010011001BBBssssssSSSSSS - addop32 'cmp', 0b00100000010011001000000000000000, :b, :s12 - # 0b00100bbb110011001BBBuuuuuu1QQQQQ - addop32 'cmp', 0b00100000110011001000000000100000, :b, :u6, :ccond - # WTF unknown encoding ... - # 0b00100bbb010011001BBBssssssSSSSSS - addop32 'cmp', 0b00100000000011001000000000000000, :bext, :cext, :ccond - # 0b00100bbb110011001BBBCCCCCC0QQQQQ - addop32 'cmp', 0b00100000110011001000000000000000, :bext, :cext, :ccond - - add_artihm_op 'divaw', 0b00101, 0b001000, :extended - - # 0b00100bbb00101111DBBBCCCCCC001100 - addop32 'ex', 0b00100000001011110000000000001100, :b, :@cext, :cache16 - # 0b00100bbb01101111DBBBuuuuuu001100 - addop32 'ex', 0b00100000011011110000000000001100, :b, :@u6, :cache16 - - add_artihm_op_reduce 'extb', 0b00100, 0b000111 - add_artihm_op_reduce 'extw', 0b00100, 0b001000 - - # WTF unknown encoding ... - # 0b00100rrr111010010RRRCCCCCC0QQQQQ - addop32 'flag', 0b00100000001010010000000000000000, :cext, :ccond, :flag_update - # 0b00100rrr111010010RRRuuuuuu1QQQQQ - addop32 'flag', 0b00100000001010010000000000100000, :u6, :ccond, :flag_update - # 0b00100rrr101010010RRRssssssSSSSSS - addop32 'flag', 0b00100000011010010000000000000000, :s12, :flag_update - - add_condjmp_op() - add_condjmplink_op() - - # 0b00100RRR001000000RRRCCCCCCRRRRRR - addop32 'j', 0b00100000001000000000000000000000, :@cext, :delay16, :setip, :stopexec - # 0b00100RRR011000000RRRuuuuuuRRRRRR - addop32 'j', 0b00100000011000000000000000000000, :u6, :delay16, :setip, :stopexec - # 0b00100RRR101000000RRRssssssSSSSSS - addop32 'j', 0b00100000101000000000000000000000, :s12, :delay16, :setip, :stopexec - # 0b00100RRR001000001RRR011101RRRRRR - addop32 'j.f', 0b00100000001000001000011101000000, :@ilink1, :flag_update, :setip, :stopexec - # 0b00100RRR001000001RRR011110RRRRRR - addop32 'j.f', 0b00100000001000001000011110000000, :@ilink2, :flag_update, :setip, :stopexec - - # 0b00100RRR0010001D0RRRCCCCCCRRRRRR - addop32 'jl', 0b00100000001000100000000000000000, :@cext, :delay16, :setip, :saveip, :stopexec - # 0b00100RRR0110001D0RRRuuuuuuRRRRRR - addop32 'jl', 0b00100000011000100000000000000000, :u6, :delay16, :setip, :saveip, :stopexec - # 0b00100RRR1010001D0RRRssssssSSSSSS - addop32 'jl', 0b00100000101000100000000000000000, :s12, :delay16, :setip, :saveip, :stopexec - - # 0b00010bbbssssssssSBBBDaaZZXAAAAAA - addop32 'ld', 0b00010000000000000000000000000000, :a, :@bs9, :sz7, :signext6, :wb9, :cache11 - - # 0b00100bbbaa110ZZXDBBBCCCCCCAAAAAA - addop32 'ld', 0b00100000001100000000000000000000, :a, :@bextcext, :sz17, :signext16, :wb22, :cache11 - - # 0b00100RRR111010000RRRuuuuuu1QQQQQ - addop32 'lp', 0b00100000111010000000000000100000, :u6e, :ccond, :setip - # 0b00100RRR101010000RRRssssssSSSSSS - addop32 'lp', 0b00100000101010000000000000000000, :s12e, :setip - - # 0b00100bbb001010100BBBCCCCCCRRRRRR - addop32 'lr', 0b00100000101010100000000000000000, :b, :@c - # 0b00100bbb001010100BBB111110RRRRRR - addop32 'lr', 0b00100000001010100000111110000000, :b, :auxlimm - # 0b00100bbb101010100BBBssssssSSSSSS - addop32 'lr', 0b00100000011010100000000000000000, :b, :auxs12 - # WTF unknown encoding ... - # 0b00100bbb101010100BBBssssssSSSSSS - addop32 'lr', 0b00100000101010100000000000000000, :b, :auxs12 - - add_artihm_op_reduce 'lsr', 0b00100, 0b000010 - - add_artihm_op 'lsr', 0b00101, 0b000001 - add_artihm_op 'max', 0b00100, 0b001000 - add_artihm_op 'min', 0b00100, 0b001001 - - # 0b00100bbb10001010FBBBssssssSSSSSS - addop32 'mov', 0b00100000100010100000000000000000, :b, :s12, :flags15 - # WTF unknown encoding ... - # 0b00100bbb01001010FBBBssssssSSSSSS - addop32 'mov', 0b00100000010010100000000000000000, :b, :s12, :flags15 - # 0b00100bbb11001010FBBBCCCCCC0QQQQQ - addop32 'mov', 0b00100000110010100000000000000000, :b, :cext, :ccond , :flags15 - # WTF unknown encoding .. - # 0b00100bbb00001010FBBBCCCCCC0QQQQQ - addop32 'mov', 0b00100000000010100000000000000000, :b, :cext, :ccond , :flags15 - # 0b00100bbb11001010FBBBuuuuuu1QQQQQ - addop32 'mov', 0b00100000110010100000000000100000, :b, :u6, :ccond , :flags15 - - add_artihm_op 'mpy', 0b00100, 0b011010, :extended - add_artihm_op 'mpyh', 0b00100, 0b011011, :extended - add_artihm_op 'mpyhu', 0b00100, 0b011100, :extended - add_artihm_op 'mpyu', 0b00100, 0b011101, :extended - - # WTF: neg instruction is not differenciated from a rsub :a, :b, :u6 - # : 0b00100bbb01001110FBBB000000AAAAAA - #addop32 'neg', 0b00100000010011100000000000000000, :a, :b, :flags15 - - # WTF: neg instruction is not differenciated from a rsub :b, :b2, :u6 - # 0b00100bbb11001110FBBB0000001QQQQQ - #addop32 'neg', 0b00100000110011100000000000100000, :b, :b2, :ccond , :flags15 - - add_artihm_op_reduce 'negs', 0b00101, 0b000111 - add_artihm_op_reduce 'negsw', 0b00101, 0b000110 - - # nop is an alias over mov null, 0 (mov - [:b, :s12, :flags15]) - addop32 'nop', 0b00100110010010100111000000000000 - - add_artihm_op_reduce 'norm', 0b00101, 0b000001 - add_artihm_op_reduce 'normw', 0b00101, 0b001000 - add_artihm_op_reduce 'not', 0b00100, 0b001010 - - add_artihm_op 'or', 0b00100, 0b000101 - - # 0b00010bbbssssssssSBBB0aa000111110 - addop32 'prefetch', 0b00010000000000000000000000111110, :@bs9, :wb - # 0b00100bbbaa1100000BBBCCCCCC111110 - addop32 'prefetch', 0b00100000001100000000000000111110, :@bextcext, :wb22 - - # 0b00100bbb100011011BBBssssssSSSSSS - addop32 'rcmp', 0b00100000100011011000000000000000, :b, :s12 - # 0b00100bbb110011011BBBCCCCCC0QQQQQ - addop32 'rcmp', 0b00100000110011011000000000000000, :bext, :cext, :ccond - # 0b00100bbb110011011BBBuuuuuu1QQQQQ - addop32 'rcmp', 0b00100000110011011000000000100000, :b, :u6, :ccond - - add_artihm_op_reduce 'rlc', 0b00100, 0b001011 - add_artihm_op_reduce 'rnd16', 0b00101, 0b000011 - add_artihm_op_reduce 'ror', 0b00100, 0b000011 - - add_artihm_op 'ror', 0b00101, 0b000011, :extended - - add_artihm_op_reduce 'rrc', 0b00100, 0b000100 - - add_artihm_op 'rsub', 0b00100, 0b001110 - - addop32 'rtie', 0b00100100011011110000000000111111, :setip, :stopexec - - add_artihm_op_reduce 'sat16', 0b00101, 0b000010 - - add_artihm_op 'sbc', 0b00100, 0b000011 - - add_artihm_op_reduce 'sexb', 0b00100, 0b000101 - add_artihm_op_reduce 'sexbw', 0b00100, 0b000110 - - # 0b00100001011011110000uuuuuu111111 - addop32 'sleep', 0b00100001011011110000000000111111, :u6 - - # 0b00100bbb001010110BBBCCCCCCRRRRRR - addop32 'sr', 0b00100000001010110000000000000000, :bext, :@cext - # 0b00100110101010110111CCCCCCRRRRRR - addop32 'sr', 0b00100000101010110000000000000000, :bext, :auxs12 - # WTF: unknown encoding - addop32 'sr', 0b00100000011010110000000000000000, :bext, :auxs12 - - # 0b00011bbbssssssssSBBBCCCCCCDaaZZR - addop32 'st', 0b00011000000000000000000000000000, :cext, :@bs9, :sz1, :wb3, :cache5 - - add_artihm_op 'sub', 0b00100, 0b000010 - add_artihm_op 'sub1', 0b00100, 0b010111 - add_artihm_op 'sub2', 0b00100, 0b011000 - add_artihm_op 'sub3', 0b00100, 0b011001 - - # WTF: same encoding as xor instructions - #add_artihm_op 'subs', 0b00100, 0b000111 - - add_artihm_op 'subsdw', 0b00101, 0b101001, :extended - - add_artihm_op_reduce 'swap', 0b00101, 0b000000 - - addop32 'swi', 0b00100010011011110000000000111111, :setip, :stopexec - addop32 'sync', 0b00100011011011110000000000111111 - - # 0b00100bbb100010111BBBssssssSSSSSS - addop32 'tst', 0b00100000100010111000000000000000, :b, :s12 - # 0b00100bbb110010111BBBCCCCCC0QQQQQ - addop32 'tst', 0b00100000110010111000000000000000, :bext, :cext, :ccond - # 0b00100bbb110010111BBBuuuuuu1QQQQQ - addop32 'tst', 0b00100000110010111000000000100000, :b, :u6, :ccond - - add_artihm_op 'xor', 0b00100, 0b000111 - end - - # ARCompact 16-bit instructions - def init_arc_compact16 - addop16 'abs_s', 0x7811, :cb, :cc - addop16 'add_s', 0x6018, :ca, :cb, :cc - addop16 'add_s', 0x7000, :cb, :cb2, :ch - addop16 'add_s', 0x6800, :cc, :cb, :cu3 - addop16 'add_s', 0xe000, :cb, :cb2, :cu7 - - # same encoding as add_s b,b,h - #addop16 'add_s', 0x70c7, :cb, :cb2, :climm - - addop16 'add_s', 0xc080, :cb, :sp, :cu5ee - addop16 'add_s', 0xc0a0, :sp, :sp2, :cu5ee - addop16 'add_s', 0xce00, :cr0, :gp, :cs9 - addop16 'add1_s', 0x7814, :cb, :cb2, :cc - addop16 'add2_s', 0x7815, :cb, :cb2, :cc - addop16 'add3_s', 0x7816, :cb, :cb2, :cc - addop16 'and_s', 0x7804, :cb, :cb2, :cc - addop16 'asl_s', 0x7818, :cb, :cb2, :cc - addop16 'asl_s', 0x6810, :cc, :cb, :cu3 - addop16 'asl_s', 0xb800, :cb, :cb2, :cu5 - addop16 'asl_s', 0x781b, :cb, :cc - addop16 'asr_s', 0x781a, :cb, :cb2, :cc - addop16 'asr_s', 0x6818, :cc, :cb, :cu3 - addop16 'asr_s', 0xb840, :cb, :cb2, :cu5 - addop16 'asr_s', 0x781c, :cb, :cc - addop16 'b_s', 0xf000, :cdisps10, :setip, :stopexec - addop16 'beq_s', 0xf200, :cdisps10, :setip - addop16 'bne_s', 0xf400, :cdisps10, :setip - addop16 'bgt_s', 0xf600, :cdisps7, :setip - addop16 'bge_s', 0xf640, :cdisps7, :setip - addop16 'blt_s', 0xf680, :cdisps7, :setip - addop16 'ble_s', 0xf6c0, :cdisps7, :setip - addop16 'bhi_s', 0xf700, :cdisps7, :setip - addop16 'bhs_s', 0xf740, :cdisps7, :setip - addop16 'blo_s', 0xf780, :cdisps7, :setip - addop16 'bls_s', 0xf7c0, :cdisps7, :setip - addop16 'bclr_s', 0xb8a0, :cb, :cb2, :cu5 - addop16 'bic_s', 0x7806, :cb, :cb2, :cc - addop16 'bl_s', 0xf800, :cdisps13, :setip, :saveip, :stopexec - addop16 'bmsk_s', 0xb8c0, :cb, :cb2, :cu5 - addop16 'breq_s', 0xe800, :cb, :zero, :cdisps8, :setip - addop16 'brne_s', 0xe880, :cb, :zero, :cdisps8, :setip - addop16 'brk_s', 0x7fff - addop16 'bset_s', 0xb880, :cb, :cb2, :cu5 - addop16 'btst_s', 0xb8e0, :cb, :cu5 - addop16 'cmp_s', 0x7010, :cb, :ch - addop16 'cmp_s', 0xe080, :cb, :cu7 - - # encoded over cmp_s b,h - # addop16 'cmp_s', 0x70d7, :cb, :limm - - addop16 'extb_s', 0x780f, :cb, :cc - addop16 'extw_s', 0x7810, :cb, :cc - addop16 'j_s', 0x7800, :@cb, :setip, :stopexec - addop16 'j_s.d', 0x7820, :@cb, :setip, :stopexec, :delay_slot - addop16 'j_s', 0x7ee0, :@blink, :setip, :stopexec - addop16 'j_s.d', 0x7fe0, :@blink, :setip, :stopexec, :delay_slot - addop16 'jeq_s', 0x7ce0, :@blink, :setip - addop16 'jne_s', 0x7de0, :@blink, :setip - addop16 'jl_s', 0x7840, :@cb, :setip, :saveip, :stopexec - addop16 'jl_s.d', 0x7860, :@cb, :setip, :saveip, :stopexec, :delay_slot - addop16 'ld_s', 0x6000, :ca, :@cbcc - addop16 'ldb_s', 0x6008, :ca, :@cbcc - addop16 'ldw_s', 0x6010, :ca, :@cbcc - addop16 'ld_s', 0x8000, :cc, :@cbu7 - addop16 'ldb_s', 0x8800, :cc, :@cbu5 - addop16 'ldw_s', 0x9000, :cc, :@cbu6 - addop16 'ldw_s.x', 0x9800, :cc, :@cbu6 - addop16 'ld_s', 0xc000, :cb, :@cspu7 - addop16 'ldb_s', 0xc020, :cb, :@cspu7 - addop16 'ld_s', 0xc800, :cr0, :@gps11 - addop16 'ldb_s', 0xca00, :cr0, :@gps9 - addop16 'ldw_s', 0xcc00, :cr0, :@gps10 - addop16 'ld_s', 0xd000, :cb, :@pclu10 - - # FIXME: exact same encoding as asl_s instructions - #addop16 'lsl_s', 0x7818, :cb, :cb2, :cc - #addop16 'lsl_s', 0x6810, :cc, :cb, :cu3 - #addop16 'lsl_s', 0xb800, :cb, :cb2, :cu5 - #addop16 'lsl_s', 0x781d, :cb, :cc - - addop16 'lsr_s', 0x7819, :cb, :cb2, :cc - addop16 'lsr_s', 0xb820, :cb, :cb2, :cu5 - addop16 'lsr_s', 0x781d, :cb, :cc - addop16 'mov_s', 0x7008, :cb, :ch - - # FIXME: same encoding as previous instruction - #addop16 'mov_s', 0x70cf, :cb, :limm - - addop16 'mov_s', 0xd800, :cb, :cu8 - addop16 'mov_s', 0x7018, :ch, :cb - - # TODO seems to overlap with previous instruction - addop16 'mov_s', 0x70df, :zero, :cb - addop16 'mul64_s', 0x780c, :zero, :cb, :cc - addop16 'neg_s', 0x7813, :cb, :cc - addop16 'not_s', 0x7812, :cb, :cc - addop16 'nop_s',0x78e0 - addop16 'unimp_s', 0x79e0 - addop16 'or_s', 0x7805, :cb, :cb2, :cc - addop16 'pop_s', 0xc0c1, :cb - addop16 'pop_s', 0xc0d1, :blink - addop16 'push_s', 0xc0e1, :cb - addop16 'push_s', 0xc0f1, :blink - addop16 'sexb_s', 0x780d, :cb, :cc - addop16 'sexw_s', 0x780e, :cb, :cc - addop16 'st_s', 0xc040, :cb, :@cspu7 - addop16 'stb_s', 0xc060, :cb, :@cspu7 - addop16 'st_s', 0xa000, :cc, :@cbu7 - addop16 'stb_s', 0xa800, :cc, :@cbu5 - addop16 'stw_s', 0xb000, :cc, :@cbu6 - addop16 'sub_s', 0x7802, :cb, :cb2, :cc - addop16 'sub_s', 0x6808, :cc, :cb, :cu3 - addop16 'sub_s', 0xb860, :cb, :cb2, :cu5 - addop16 'sub_s', 0xc1a0, :sp, :sp2, :cu5ee - addop16 'sub_s.ne', 0x78c0, :cb, :c2, :cb3 - addop16 'trap_s', 0x781E, :cu6, :setip, :stopexec - addop16 'tst_s', 0x780b, :cb, :cc - addop16 'xor_s', 0x7807, :cb, :cb2, :cc - end + def addop32(name, bin, *args) + addop(:ac32, name, bin, *args) + end + + def addop16(name, bin, *args) + addop(:ac16, name, bin, *args) + end + + def addop(mode, name, bin, *args) + o = Opcode.new(name) + o.bin = bin + args.each { |a| + o.args << a if @fields_mask[a] + o.props[a] = true if @valid_props[a] + o.fields[a] = [@fields_mask[a], @fields_shift[a]] if @fields_mask[a] + } + (mode == :ac16) ? (@opcode_list16 << o) : (@opcode_list32 << o) + end + + def init_opcode_list + @opcode_list16 = [] + @opcode_list32 = [] + + @valid_props.update :flag_update => true, :delay_slot => true + @cond_suffix = [''] + %w[z nz p n cs cc vs vc gt ge lt le hi ls pnz] + #The remaining 16 condition codes (10-1F) are available for extension + @cond_suffix += (0x10..0x1f).map{ |i| "extcc#{i.to_s(16)}" } + + # Compact 16-bits operands field masks + fields_mask16 = { + :ca => 0x7, :cb => 0x7, :cb2 => 0x7, :cb3 => 0x7, :cc => 0x7, + :cu => 0x1f, + :ch => 0b11100111, + + # immediate (un)signed + :cu3 => 0x7, :cu8 => 0xff, + # cu7 is 32-bit aligned, cu6 is 16-bit aligned, cu6 is 8-bit aligned + :cu5 => 0x1f, :cu5ee => 0x1f, :cu6 => 0x3f, :cu7 => 0x7f, + + :cs9 => 0x1ff, :cs9ee => 0x1ff, :cs10 => 0x1ff, :cs11 => 0x1ff, + + # signed displacement + :cdisps7=> 0x3f, :cdisps8 => 0x7f, :cdisps10 => 0x1ff, :cdisps13 => 0x7FF, + + # memref [b+u], [sp,u], etc. + :@cb => 0x7, :@cbu7 => 0b11100011111, :@cbu6 => 0b11100011111, :@cbu5 => 0b11100011111, + :@cspu7 => 0b11111, :@cbcc => 0b111111, + :@gps9 => 0x1ff, :@gps10 => 0x1ff, :@gps11 => 0x1ff, + + # implicit operands + :climm => 0x0, :cr0 => 0x0, + :blink => 0x0, :@blink => 0x0, :gp => 0x0, :sp => 0x0, :sp2 => 0x0, :zero => 0x0 + } + + fields_shift16 = { + :ca => 0x0, :cb => 0x8, :cb2 => 0x8, :cb3 => 0x8, :cc => 0x5, + :cu => 0x0, + + # immediate (un)signed + :ch => 0x0, + :cu3 => 0x0, :cu5 => 0, :cu5ee => 0, :cu6 => 5, :cu7 => 0x0, :cu8 => 0x0, + :cs9 => 0x0, :cs9ee => 0x0, :cs10 => 0x0, :cs11 => 0x0, + + # signed displacement + :cdisps7=> 0x0, :cdisps8 => 0x0, :cdisps10 => 0x0, :cdisps13 => 0x0, + + # memref [b+u] + :@cb => 0x8, :@cbu7 => 0x0, :@cbu6 => 0x0, :@cbu5 => 0x0, + :@cspu7 => 0x0, :@cbcc => 0x5, + :@gps9 => 0x0, :@gps10 => 0x0, :@gps11 => 0x0, + + # implicit operands + :climm => 0x0, :cr0 => 0x0, + :blink => 0x0, :@blink => 0x0, :gp => 0x0, :sp => 0x0, :sp2 => 0x0, :zero => 0x0, + } + + fields_mask32 = { + :a => 0x3f, :b => 0b111000000000111, :bext => 0b111000000000111, + :c => 0x3f, :@c => 0x3f, :cext => 0x3f, :@cext => 0x3f, + + :u6 => 0x3f, :u6e => 0x3f, + :s8e => 0x1fd, :s9 => 0x7f, + :s12 => 0xfff, :s12e => 0xfff, + :s21e => 0x1ffBff, :s21ee => 0x1ff3ff, + :s25e => 0x7feffcf, :s25ee => 0x7fcffcf, + + :@bs9 => 0x7fff, :@bc => 0x1ff, :@bextcext => 0x1C01FF, + + :limm => 0x0, :@limm => 0x0, + :@limmc => 0x3f, :@blimm => 0x7, + + :auxlimm => 0x0, :auxs12 => 0xfff, + + :ccond => 0x1f, #condition codes + :delay5 => 1, :delay16 => 1,# delay slot + :flags15 => 0x1, :flags16 => 0x1, + :signext6 => 0x1, :signext16 => 0x1, + :cache5 => 0x1, :cache11 => 0x1, :cache16 => 0x1, # data cache mode field + :sz1 => 0x3, :sz7 => 0x3, :sz16 => 0x3, :sz17 => 0x3, #data size field + :wb3 => 0x3, :wb9 => 0x3, :wb22 => 0x3, #write-back flag + :zero => 0x0, :b2 => 0x0, :@ilink1 => 0x0, :@ilink2 => 0x0 + } + #FIXME + + fields_shift32 = { + :a => 0x0, :b => 0xC, :bext => 0xC, + :c => 0x6, :@c => 0x6, :cext => 0x6, :@cext => 0x6, + + :u6 => 0x6, :u6e =>0x6, + :s8e => 15, :s9 => 0x11, + :s12 => 0x0, :s12e => 0, + :s21e => 0x6, :s21ee => 0x6, + :s25e => 0, :s25ee => 0, + + :limm => 0x0, :@limm => 0x0, + :@limmc => 0x6, :@blimm => 0x18, + + :auxlimm => 0x0, :auxs12 => 0, + + :@bs9 => 12, :@bc => 6, :@bextcext => 6, + + :ccond => 0, #condition codes + :delay5 => 5, :delay16 => 16,# delay slot + :flags15 => 15, :flags16 => 16, + :signext6 => 6, :signext16 => 16, + :cache5 => 5, :cache11 => 11, :cache16 => 16, # data cache mode field + :sz1 => 1, :sz7 => 7, :sz16 => 16, :sz17 => 17, #data size field + :wb3 => 3, :wb9 => 9, :wb22 => 22, #write-back flag + :zero => 0x0, :b2 => 0x0, :@ilink1 => 0, :@ilink2 => 0, + } + + @fields_mask = fields_mask16.merge(fields_mask32) + @fields_shift = fields_shift16.merge(fields_shift32) + + init_arc_compact16() + init_arc_compact32() + + {16 => @opcode_list16, 32 => @opcode_list32} + end + + def add_artihm_op(op, majorcode, subcode, *flags) + # 0bxxxxxbbb00xxxxxxFBBBCCCCCCAAAAAA + addop32 op, 0b00000000000000000000000000000000 | majorcode << 0x1b | subcode << 16, :a, :bext, :cext, :flags15 + # 0bxxxxxbbb01xxxxxxFBBBuuuuuuAAAAAA + addop32 op, 0b00000000010000000000000000000000 | majorcode << 0x1b | subcode << 16, :a, :b, :u6, :flags15 + # 0bxxxxxbbb10xxxxxxFBBBssssssSSSSSS + addop32 op, 0b00000000100000000000000000000000 | majorcode << 0x1b | subcode << 16, :b, :b2, :s12, :flags15 + # 0bxxxxxbbb11xxxxxxFBBBCCCCCC0QQQQQ + addop32 op, 0b00000000110000000000000000000000 | majorcode << 0x1b | subcode << 16, :b, :b2, :cext, :ccond, :flags15 + # 0bxxxxxbbb11xxxxxxFBBBuuuuuu1QQQQQ + addop32 op, 0b00000000110000000000000000100000 | majorcode << 0x1b | subcode << 16, :b, :b2, :u6, :ccond, :flags15 + end + + def add_logical_op(op, majorcode, subcode, *flags) + # 0b00100bbb00xxxxxxFBBBCCCCCCAAAAAA + addop32 op, 0b00100000000000000000000000000000 | majorcode << 0x1b | subcode << 16, :a, :bext, :c, :flags15 + # 0b00100bbb01xxxxxxFBBBuuuuuuAAAAAA + addop32 op, 0b00100000010000000000000000000000 | majorcode << 0x1b | subcode << 16, :a, :b, :u6, :flags15 + # 0b00100bbb11xxxxxxFBBBCCCCCC0QQQQQ + # WTF + addop32 op, 0b00100000110000000000000000000000 | majorcode << 0x1b | subcode << 16, :b, :b2, :c, :ccond, :flags15 + # 0b00100bbb11xxxxxxFBBBuuuuuu1QQQQQ + addop32 op, 0b00100000110000000000000000100000 | majorcode << 0x1b | subcode << 16, :b, :b2, :u6, :ccond, :flags15 + end + + def add_artihm_op_reduce(op, majorcode, subcode) + # 0bxxxxxbbb00101111FBBBCCCCCCxxxxxx + addop32 op, 0b00000000001011110000000000000000 | majorcode << 0x1b | subcode, :b, :cext, :flags15 + # 0bxxxxxbbb01101111FBBBuuuuuuxxxxxx + addop32 op, 0b00000000011011110000000000000000 | majorcode << 0x1b | subcode, :b, :u6, :flags15 + end + + def add_condbranch_op(op, ccond) + # 0b00001bbbsssssss1SBBBUUUUUUN0xxxx + addop32 op, 0b00001000000000010000000000000000 | ccond, :bext, :cext, :s8e, :setip, :delay5 + # 0b00001bbbsssssss1SBBBUUUUUUN1xxxx + addop32 op, 0b00001000000000010000000000010000 | ccond, :b, :u6, :s8e, :setip, :delay5 + end + + def add_condjmp_op() + # 0b00100RRR1110000D0RRRCCCCCC0QQQQQ + addop32 'j', 0b00100000111000000000000000000000, :@cext, :ccond, :setip, :delay16 + # 0b00100RRR1110000D0RRRuuuuuu1QQQQQ + addop32 'j', 0b00100000111000000000000000100000, :u6, :ccond, :setip, :delay16 + # 0b00100RRR111000001RRR0111010QQQQQ + addop32 'j', 0b00100000111000001000011101000000, :@ilink1, :ccond, :setip, :flag_update + # 0b00100RRR111000001RRR0111100QQQQQ + addop32 'j', 0b00100000111000001000011110000000, :@ilink2, :ccond, :setip, :flag_update + end + + def add_condjmplink_op() + # 0b00100RRR111000100RRRCCCCCC0QQQQQ + addop32 'jl', 0b00100000111000100000000000000000, :@cext, :ccond, :setip, :saveip, :delay16 + # 0b00100RRR111000100RRRuuuuuu1QQQQQ + addop32 'jl', 0b00100000111000100000000000100000, :u6, :ccond, :setip, :saveip, :delay16 + end + + def init_arc_compact32 + + add_artihm_op_reduce 'abs', 0b00100, 0b001001 + add_artihm_op_reduce 'abss', 0b00101, 0b000101 + add_artihm_op_reduce 'abssw', 0b00101, 0b000100 + + add_artihm_op 'adc', 0b00100, 0b000001 + add_artihm_op 'add', 0b00100, 0b000000 + add_artihm_op 'add1', 0b00100, 0b010100 + add_artihm_op 'add2', 0b00100, 0b010101 + add_artihm_op 'add3', 0b00100, 0b010110 + add_artihm_op 'adds', 0b00101, 0b000110 + add_artihm_op 'addsw', 0b00101, 0b010101, :extended + add_artihm_op 'addsdw',0b00101, 0b101000, :extended + add_artihm_op 'and' ,0b00100, 0b000100 + + add_artihm_op_reduce 'asl', 0b00100, 0b000000 + + add_artihm_op 'asl', 0b00101, 0b000000, :extended + add_artihm_op 'asls', 0b00101, 0b001010, :extended + + add_artihm_op_reduce 'asr', 0b00100, 0b000001 + + add_artihm_op 'asr', 0b00101, 0b000010 + add_artihm_op 'asrs', 0b00101, 0b001011 + + # 0b00001bbbsssssss1SBBBCCCCCCN01110 + addop32 'bbit0', 0b00001000000000010000000000001110, :b, :c, :s9, :delay5, :setip + # 0b00001bbbsssssss1SBBBuuuuuuN11110 + addop32 'bbit0', 0b00001000000000010000000000011110, :b, :u6, :s9, :delay5, :setip + # 0b00001bbbsssssss1SBBBCCCCCCN01111 + addop32 'bbit1', 0b00001000000000010000000000001111, :b, :c, :s9, :delay5, :setip + # 0b00001bbbsssssss1SBBBuuuuuuN11111 + addop32 'bbit1', 0b00001000000000010000000000011111, :b, :u6, :s9, :delay5, :setip + + # 0b00000ssssssssss0SSSSSSSSSSNQQQQQ + addop32 'b', 0b00000000000000000000000000000000, :s21e, :ccond, :delay5, :setip + # 0b00000ssssssssss1SSSSSSSSSSNRtttt + addop32 'b', 0b00000000000000010000000000000000, :s25e, :delay5, :setip, :stopexec + # WTF: unknown encoding, bit 5 should be reserved + addop32 'b', 0b00000000000000010000000000010000, :s25e, :delay5, :setip, :stopexec + + add_logical_op 'bclr', 0b00100, 0b010000 + add_artihm_op 'bic', 0b00100, 0b000110 + + # 0b00001sssssssss00SSSSSSSSSSNQQQQQ + addop32 'bl', 0b00001000000000000000000000000000, :s21ee, :ccond, :delay5, :setip, :saveip + # 0b00001sssssssss10SSSSSSSSSSNRtttt + addop32 'bl', 0b00001000000000100000000000000000, :s25ee, :delay5, :setip, :saveip, :stopexec + + add_logical_op 'bmsk', 0b00100, 0b010011 + + add_condbranch_op 'breq', 0b0000 + add_condbranch_op 'brne', 0b0001 + add_condbranch_op 'brlt', 0b0010 + add_condbranch_op 'brge', 0b0011 + add_condbranch_op 'brlo', 0b0100 + add_condbranch_op 'brhs', 0b0101 + + addop32 'brk', 0b00100101011011110000000000111111, :stopexec + + add_logical_op 'bset', 0b00100, 0b001111 + + # 0b00100bbb110100011BBBCCCCCC0QQQQQ + addop32 'btst', 0b00100000110100011000000000000000, :bext, :c, :ccond + # 0b00100bbb110100011BBBuuuuuu1QQQQQ + addop32 'btst', 0b00100000110100011000000000100000, :b, :u6, :ccond + # WTF 0b00100bbb010100011BBBuuuuuu0QQQQQ + addop32 'btst', 0b00100000010100011000000000000000, :b, :u6, :ccond + + add_logical_op 'bxor', 0b00100, 0b010010 + + # 0b00100bbb100011001BBBssssssSSSSSS + addop32 'cmp', 0b00100000100011001000000000000000, :b, :s12 + # WTF unknown encoding ... + # 0b00100bbb010011001BBBssssssSSSSSS + addop32 'cmp', 0b00100000010011001000000000000000, :b, :s12 + # 0b00100bbb110011001BBBuuuuuu1QQQQQ + addop32 'cmp', 0b00100000110011001000000000100000, :b, :u6, :ccond + # WTF unknown encoding ... + # 0b00100bbb010011001BBBssssssSSSSSS + addop32 'cmp', 0b00100000000011001000000000000000, :bext, :cext, :ccond + # 0b00100bbb110011001BBBCCCCCC0QQQQQ + addop32 'cmp', 0b00100000110011001000000000000000, :bext, :cext, :ccond + + add_artihm_op 'divaw', 0b00101, 0b001000, :extended + + # 0b00100bbb00101111DBBBCCCCCC001100 + addop32 'ex', 0b00100000001011110000000000001100, :b, :@cext, :cache16 + # 0b00100bbb01101111DBBBuuuuuu001100 + addop32 'ex', 0b00100000011011110000000000001100, :b, :@u6, :cache16 + + add_artihm_op_reduce 'extb', 0b00100, 0b000111 + add_artihm_op_reduce 'extw', 0b00100, 0b001000 + + # WTF unknown encoding ... + # 0b00100rrr111010010RRRCCCCCC0QQQQQ + addop32 'flag', 0b00100000001010010000000000000000, :cext, :ccond, :flag_update + # 0b00100rrr111010010RRRuuuuuu1QQQQQ + addop32 'flag', 0b00100000001010010000000000100000, :u6, :ccond, :flag_update + # 0b00100rrr101010010RRRssssssSSSSSS + addop32 'flag', 0b00100000011010010000000000000000, :s12, :flag_update + + add_condjmp_op() + add_condjmplink_op() + + # 0b00100RRR001000000RRRCCCCCCRRRRRR + addop32 'j', 0b00100000001000000000000000000000, :@cext, :delay16, :setip, :stopexec + # 0b00100RRR011000000RRRuuuuuuRRRRRR + addop32 'j', 0b00100000011000000000000000000000, :u6, :delay16, :setip, :stopexec + # 0b00100RRR101000000RRRssssssSSSSSS + addop32 'j', 0b00100000101000000000000000000000, :s12, :delay16, :setip, :stopexec + # 0b00100RRR001000001RRR011101RRRRRR + addop32 'j.f', 0b00100000001000001000011101000000, :@ilink1, :flag_update, :setip, :stopexec + # 0b00100RRR001000001RRR011110RRRRRR + addop32 'j.f', 0b00100000001000001000011110000000, :@ilink2, :flag_update, :setip, :stopexec + + # 0b00100RRR0010001D0RRRCCCCCCRRRRRR + addop32 'jl', 0b00100000001000100000000000000000, :@cext, :delay16, :setip, :saveip, :stopexec + # 0b00100RRR0110001D0RRRuuuuuuRRRRRR + addop32 'jl', 0b00100000011000100000000000000000, :u6, :delay16, :setip, :saveip, :stopexec + # 0b00100RRR1010001D0RRRssssssSSSSSS + addop32 'jl', 0b00100000101000100000000000000000, :s12, :delay16, :setip, :saveip, :stopexec + + # 0b00010bbbssssssssSBBBDaaZZXAAAAAA + addop32 'ld', 0b00010000000000000000000000000000, :a, :@bs9, :sz7, :signext6, :wb9, :cache11 + + # 0b00100bbbaa110ZZXDBBBCCCCCCAAAAAA + addop32 'ld', 0b00100000001100000000000000000000, :a, :@bextcext, :sz17, :signext16, :wb22, :cache11 + + # 0b00100RRR111010000RRRuuuuuu1QQQQQ + addop32 'lp', 0b00100000111010000000000000100000, :u6e, :ccond, :setip + # 0b00100RRR101010000RRRssssssSSSSSS + addop32 'lp', 0b00100000101010000000000000000000, :s12e, :setip + + # 0b00100bbb001010100BBBCCCCCCRRRRRR + addop32 'lr', 0b00100000101010100000000000000000, :b, :@c + # 0b00100bbb001010100BBB111110RRRRRR + addop32 'lr', 0b00100000001010100000111110000000, :b, :auxlimm + # 0b00100bbb101010100BBBssssssSSSSSS + addop32 'lr', 0b00100000011010100000000000000000, :b, :auxs12 + # WTF unknown encoding ... + # 0b00100bbb101010100BBBssssssSSSSSS + addop32 'lr', 0b00100000101010100000000000000000, :b, :auxs12 + + add_artihm_op_reduce 'lsr', 0b00100, 0b000010 + + add_artihm_op 'lsr', 0b00101, 0b000001 + add_artihm_op 'max', 0b00100, 0b001000 + add_artihm_op 'min', 0b00100, 0b001001 + + # 0b00100bbb10001010FBBBssssssSSSSSS + addop32 'mov', 0b00100000100010100000000000000000, :b, :s12, :flags15 + # WTF unknown encoding ... + # 0b00100bbb01001010FBBBssssssSSSSSS + addop32 'mov', 0b00100000010010100000000000000000, :b, :s12, :flags15 + # 0b00100bbb11001010FBBBCCCCCC0QQQQQ + addop32 'mov', 0b00100000110010100000000000000000, :b, :cext, :ccond , :flags15 + # WTF unknown encoding .. + # 0b00100bbb00001010FBBBCCCCCC0QQQQQ + addop32 'mov', 0b00100000000010100000000000000000, :b, :cext, :ccond , :flags15 + # 0b00100bbb11001010FBBBuuuuuu1QQQQQ + addop32 'mov', 0b00100000110010100000000000100000, :b, :u6, :ccond , :flags15 + + add_artihm_op 'mpy', 0b00100, 0b011010, :extended + add_artihm_op 'mpyh', 0b00100, 0b011011, :extended + add_artihm_op 'mpyhu', 0b00100, 0b011100, :extended + add_artihm_op 'mpyu', 0b00100, 0b011101, :extended + + # WTF: neg instruction is not differenciated from a rsub :a, :b, :u6 + # : 0b00100bbb01001110FBBB000000AAAAAA + #addop32 'neg', 0b00100000010011100000000000000000, :a, :b, :flags15 + + # WTF: neg instruction is not differenciated from a rsub :b, :b2, :u6 + # 0b00100bbb11001110FBBB0000001QQQQQ + #addop32 'neg', 0b00100000110011100000000000100000, :b, :b2, :ccond , :flags15 + + add_artihm_op_reduce 'negs', 0b00101, 0b000111 + add_artihm_op_reduce 'negsw', 0b00101, 0b000110 + + # nop is an alias over mov null, 0 (mov - [:b, :s12, :flags15]) + addop32 'nop', 0b00100110010010100111000000000000 + + add_artihm_op_reduce 'norm', 0b00101, 0b000001 + add_artihm_op_reduce 'normw', 0b00101, 0b001000 + add_artihm_op_reduce 'not', 0b00100, 0b001010 + + add_artihm_op 'or', 0b00100, 0b000101 + + # 0b00010bbbssssssssSBBB0aa000111110 + addop32 'prefetch', 0b00010000000000000000000000111110, :@bs9, :wb + # 0b00100bbbaa1100000BBBCCCCCC111110 + addop32 'prefetch', 0b00100000001100000000000000111110, :@bextcext, :wb22 + + # 0b00100bbb100011011BBBssssssSSSSSS + addop32 'rcmp', 0b00100000100011011000000000000000, :b, :s12 + # 0b00100bbb110011011BBBCCCCCC0QQQQQ + addop32 'rcmp', 0b00100000110011011000000000000000, :bext, :cext, :ccond + # 0b00100bbb110011011BBBuuuuuu1QQQQQ + addop32 'rcmp', 0b00100000110011011000000000100000, :b, :u6, :ccond + + add_artihm_op_reduce 'rlc', 0b00100, 0b001011 + add_artihm_op_reduce 'rnd16', 0b00101, 0b000011 + add_artihm_op_reduce 'ror', 0b00100, 0b000011 + + add_artihm_op 'ror', 0b00101, 0b000011, :extended + + add_artihm_op_reduce 'rrc', 0b00100, 0b000100 + + add_artihm_op 'rsub', 0b00100, 0b001110 + + addop32 'rtie', 0b00100100011011110000000000111111, :setip, :stopexec + + add_artihm_op_reduce 'sat16', 0b00101, 0b000010 + + add_artihm_op 'sbc', 0b00100, 0b000011 + + add_artihm_op_reduce 'sexb', 0b00100, 0b000101 + add_artihm_op_reduce 'sexbw', 0b00100, 0b000110 + + # 0b00100001011011110000uuuuuu111111 + addop32 'sleep', 0b00100001011011110000000000111111, :u6 + + # 0b00100bbb001010110BBBCCCCCCRRRRRR + addop32 'sr', 0b00100000001010110000000000000000, :bext, :@cext + # 0b00100110101010110111CCCCCCRRRRRR + addop32 'sr', 0b00100000101010110000000000000000, :bext, :auxs12 + # WTF: unknown encoding + addop32 'sr', 0b00100000011010110000000000000000, :bext, :auxs12 + + # 0b00011bbbssssssssSBBBCCCCCCDaaZZR + addop32 'st', 0b00011000000000000000000000000000, :cext, :@bs9, :sz1, :wb3, :cache5 + + add_artihm_op 'sub', 0b00100, 0b000010 + add_artihm_op 'sub1', 0b00100, 0b010111 + add_artihm_op 'sub2', 0b00100, 0b011000 + add_artihm_op 'sub3', 0b00100, 0b011001 + + # WTF: same encoding as xor instructions + #add_artihm_op 'subs', 0b00100, 0b000111 + + add_artihm_op 'subsdw', 0b00101, 0b101001, :extended + + add_artihm_op_reduce 'swap', 0b00101, 0b000000 + + addop32 'swi', 0b00100010011011110000000000111111, :setip, :stopexec + addop32 'sync', 0b00100011011011110000000000111111 + + # 0b00100bbb100010111BBBssssssSSSSSS + addop32 'tst', 0b00100000100010111000000000000000, :b, :s12 + # 0b00100bbb110010111BBBCCCCCC0QQQQQ + addop32 'tst', 0b00100000110010111000000000000000, :bext, :cext, :ccond + # 0b00100bbb110010111BBBuuuuuu1QQQQQ + addop32 'tst', 0b00100000110010111000000000100000, :b, :u6, :ccond + + add_artihm_op 'xor', 0b00100, 0b000111 + end + + # ARCompact 16-bit instructions + def init_arc_compact16 + addop16 'abs_s', 0x7811, :cb, :cc + addop16 'add_s', 0x6018, :ca, :cb, :cc + addop16 'add_s', 0x7000, :cb, :cb2, :ch + addop16 'add_s', 0x6800, :cc, :cb, :cu3 + addop16 'add_s', 0xe000, :cb, :cb2, :cu7 + + # same encoding as add_s b,b,h + #addop16 'add_s', 0x70c7, :cb, :cb2, :climm + + addop16 'add_s', 0xc080, :cb, :sp, :cu5ee + addop16 'add_s', 0xc0a0, :sp, :sp2, :cu5ee + addop16 'add_s', 0xce00, :cr0, :gp, :cs9 + addop16 'add1_s', 0x7814, :cb, :cb2, :cc + addop16 'add2_s', 0x7815, :cb, :cb2, :cc + addop16 'add3_s', 0x7816, :cb, :cb2, :cc + addop16 'and_s', 0x7804, :cb, :cb2, :cc + addop16 'asl_s', 0x7818, :cb, :cb2, :cc + addop16 'asl_s', 0x6810, :cc, :cb, :cu3 + addop16 'asl_s', 0xb800, :cb, :cb2, :cu5 + addop16 'asl_s', 0x781b, :cb, :cc + addop16 'asr_s', 0x781a, :cb, :cb2, :cc + addop16 'asr_s', 0x6818, :cc, :cb, :cu3 + addop16 'asr_s', 0xb840, :cb, :cb2, :cu5 + addop16 'asr_s', 0x781c, :cb, :cc + addop16 'b_s', 0xf000, :cdisps10, :setip, :stopexec + addop16 'beq_s', 0xf200, :cdisps10, :setip + addop16 'bne_s', 0xf400, :cdisps10, :setip + addop16 'bgt_s', 0xf600, :cdisps7, :setip + addop16 'bge_s', 0xf640, :cdisps7, :setip + addop16 'blt_s', 0xf680, :cdisps7, :setip + addop16 'ble_s', 0xf6c0, :cdisps7, :setip + addop16 'bhi_s', 0xf700, :cdisps7, :setip + addop16 'bhs_s', 0xf740, :cdisps7, :setip + addop16 'blo_s', 0xf780, :cdisps7, :setip + addop16 'bls_s', 0xf7c0, :cdisps7, :setip + addop16 'bclr_s', 0xb8a0, :cb, :cb2, :cu5 + addop16 'bic_s', 0x7806, :cb, :cb2, :cc + addop16 'bl_s', 0xf800, :cdisps13, :setip, :saveip, :stopexec + addop16 'bmsk_s', 0xb8c0, :cb, :cb2, :cu5 + addop16 'breq_s', 0xe800, :cb, :zero, :cdisps8, :setip + addop16 'brne_s', 0xe880, :cb, :zero, :cdisps8, :setip + addop16 'brk_s', 0x7fff + addop16 'bset_s', 0xb880, :cb, :cb2, :cu5 + addop16 'btst_s', 0xb8e0, :cb, :cu5 + addop16 'cmp_s', 0x7010, :cb, :ch + addop16 'cmp_s', 0xe080, :cb, :cu7 + + # encoded over cmp_s b,h + # addop16 'cmp_s', 0x70d7, :cb, :limm + + addop16 'extb_s', 0x780f, :cb, :cc + addop16 'extw_s', 0x7810, :cb, :cc + addop16 'j_s', 0x7800, :@cb, :setip, :stopexec + addop16 'j_s.d', 0x7820, :@cb, :setip, :stopexec, :delay_slot + addop16 'j_s', 0x7ee0, :@blink, :setip, :stopexec + addop16 'j_s.d', 0x7fe0, :@blink, :setip, :stopexec, :delay_slot + addop16 'jeq_s', 0x7ce0, :@blink, :setip + addop16 'jne_s', 0x7de0, :@blink, :setip + addop16 'jl_s', 0x7840, :@cb, :setip, :saveip, :stopexec + addop16 'jl_s.d', 0x7860, :@cb, :setip, :saveip, :stopexec, :delay_slot + addop16 'ld_s', 0x6000, :ca, :@cbcc + addop16 'ldb_s', 0x6008, :ca, :@cbcc + addop16 'ldw_s', 0x6010, :ca, :@cbcc + addop16 'ld_s', 0x8000, :cc, :@cbu7 + addop16 'ldb_s', 0x8800, :cc, :@cbu5 + addop16 'ldw_s', 0x9000, :cc, :@cbu6 + addop16 'ldw_s.x', 0x9800, :cc, :@cbu6 + addop16 'ld_s', 0xc000, :cb, :@cspu7 + addop16 'ldb_s', 0xc020, :cb, :@cspu7 + addop16 'ld_s', 0xc800, :cr0, :@gps11 + addop16 'ldb_s', 0xca00, :cr0, :@gps9 + addop16 'ldw_s', 0xcc00, :cr0, :@gps10 + addop16 'ld_s', 0xd000, :cb, :@pclu10 + + # FIXME: exact same encoding as asl_s instructions + #addop16 'lsl_s', 0x7818, :cb, :cb2, :cc + #addop16 'lsl_s', 0x6810, :cc, :cb, :cu3 + #addop16 'lsl_s', 0xb800, :cb, :cb2, :cu5 + #addop16 'lsl_s', 0x781d, :cb, :cc + + addop16 'lsr_s', 0x7819, :cb, :cb2, :cc + addop16 'lsr_s', 0xb820, :cb, :cb2, :cu5 + addop16 'lsr_s', 0x781d, :cb, :cc + addop16 'mov_s', 0x7008, :cb, :ch + + # FIXME: same encoding as previous instruction + #addop16 'mov_s', 0x70cf, :cb, :limm + + addop16 'mov_s', 0xd800, :cb, :cu8 + addop16 'mov_s', 0x7018, :ch, :cb + + # TODO seems to overlap with previous instruction + addop16 'mov_s', 0x70df, :zero, :cb + addop16 'mul64_s', 0x780c, :zero, :cb, :cc + addop16 'neg_s', 0x7813, :cb, :cc + addop16 'not_s', 0x7812, :cb, :cc + addop16 'nop_s',0x78e0 + addop16 'unimp_s', 0x79e0 + addop16 'or_s', 0x7805, :cb, :cb2, :cc + addop16 'pop_s', 0xc0c1, :cb + addop16 'pop_s', 0xc0d1, :blink + addop16 'push_s', 0xc0e1, :cb + addop16 'push_s', 0xc0f1, :blink + addop16 'sexb_s', 0x780d, :cb, :cc + addop16 'sexw_s', 0x780e, :cb, :cc + addop16 'st_s', 0xc040, :cb, :@cspu7 + addop16 'stb_s', 0xc060, :cb, :@cspu7 + addop16 'st_s', 0xa000, :cc, :@cbu7 + addop16 'stb_s', 0xa800, :cc, :@cbu5 + addop16 'stw_s', 0xb000, :cc, :@cbu6 + addop16 'sub_s', 0x7802, :cb, :cb2, :cc + addop16 'sub_s', 0x6808, :cc, :cb, :cu3 + addop16 'sub_s', 0xb860, :cb, :cb2, :cu5 + addop16 'sub_s', 0xc1a0, :sp, :sp2, :cu5ee + addop16 'sub_s.ne', 0x78c0, :cb, :c2, :cb3 + addop16 'trap_s', 0x781E, :cu6, :setip, :stopexec + addop16 'tst_s', 0x780b, :cb, :cc + addop16 'xor_s', 0x7807, :cb, :cb2, :cc + end end end diff --git a/lib/metasm/metasm/cpu/arm/debug.rb b/lib/metasm/metasm/cpu/arm/debug.rb index 6c115c47f4..972a9ed581 100644 --- a/lib/metasm/metasm/cpu/arm/debug.rb +++ b/lib/metasm/metasm/cpu/arm/debug.rb @@ -8,32 +8,32 @@ require 'metasm/cpu/arm/opcodes' module Metasm class ARM - def dbg_register_pc - @dbg_register_pc ||= :pc - end - def dbg_register_flags - @dbg_register_flags ||= :flags - end + def dbg_register_pc + @dbg_register_pc ||= :pc + end + def dbg_register_flags + @dbg_register_flags ||= :flags + end - def dbg_register_list - @dbg_register_list ||= [:r0, :r1, :r2, :r3, :r4, :r5, :r6, :r7, :r8, :r9, :r10, :r11, :r12, :sp, :lr, :pc] - end + def dbg_register_list + @dbg_register_list ||= [:r0, :r1, :r2, :r3, :r4, :r5, :r6, :r7, :r8, :r9, :r10, :r11, :r12, :sp, :lr, :pc] + end - def dbg_flag_list - @dbg_flag_list ||= [] - end + def dbg_flag_list + @dbg_flag_list ||= [] + end - def dbg_register_size - @dbg_register_size ||= Hash.new(32) - end + def dbg_register_size + @dbg_register_size ||= Hash.new(32) + end - def dbg_need_stepover(dbg, addr, di) - di and di.opcode.props[:saveip] - end + def dbg_need_stepover(dbg, addr, di) + di and di.opcode.props[:saveip] + end - def dbg_end_stepout(dbg, addr, di) - di and di.opcode.name == 'foobar' # TODO - end + def dbg_end_stepout(dbg, addr, di) + di and di.opcode.name == 'foobar' # TODO + end end end diff --git a/lib/metasm/metasm/cpu/arm/decode.rb b/lib/metasm/metasm/cpu/arm/decode.rb index dae5f1093e..ef56a5c41d 100644 --- a/lib/metasm/metasm/cpu/arm/decode.rb +++ b/lib/metasm/metasm/cpu/arm/decode.rb @@ -8,161 +8,161 @@ require 'metasm/decode' module Metasm class ARM - # create the bin_mask for a given opcode - def build_opcode_bin_mask(op) - # bit = 0 if can be mutated by an field value, 1 if fixed by opcode - op.bin_mask = 0 - op.fields.each { |k, (m, s)| - op.bin_mask |= m << s - } - op.bin_mask = 0xffffffff ^ op.bin_mask - end + # create the bin_mask for a given opcode + def build_opcode_bin_mask(op) + # bit = 0 if can be mutated by an field value, 1 if fixed by opcode + op.bin_mask = 0 + op.fields.each { |k, (m, s)| + op.bin_mask |= m << s + } + op.bin_mask = 0xffffffff ^ op.bin_mask + end - # create the lookaside hash from the first byte of the opcode - def build_bin_lookaside - lookaside = Array.new(256) { [] } + # create the lookaside hash from the first byte of the opcode + def build_bin_lookaside + lookaside = Array.new(256) { [] } - opcode_list.each { |op| - build_opcode_bin_mask op + opcode_list.each { |op| + build_opcode_bin_mask op - b = (op.bin >> 20) & 0xff - msk = (op.bin_mask >> 20) & 0xff - b &= msk + b = (op.bin >> 20) & 0xff + msk = (op.bin_mask >> 20) & 0xff + b &= msk - for i in b..(b | (255^msk)) - lookaside[i] << op if i & msk == b - end - } + for i in b..(b | (255^msk)) + lookaside[i] << op if i & msk == b + end + } - lookaside - end + lookaside + end - def decode_findopcode(edata) - return if edata.ptr+4 > edata.length - di = DecodedInstruction.new(self) - val = edata.decode_imm(:u32, @endianness) - di.instance_variable_set('@raw', val) - di if di.opcode = @bin_lookaside[(val >> 20) & 0xff].find { |op| - (not op.props[:cond] or - ((val >> @fields_shift[:cond]) & @fields_mask[:cond]) != 0xf) and - (op.bin & op.bin_mask) == (val & op.bin_mask) - } - end + def decode_findopcode(edata) + return if edata.ptr+4 > edata.length + di = DecodedInstruction.new(self) + val = edata.decode_imm(:u32, @endianness) + di.instance_variable_set('@raw', val) + di if di.opcode = @bin_lookaside[(val >> 20) & 0xff].find { |op| + (not op.props[:cond] or + ((val >> @fields_shift[:cond]) & @fields_mask[:cond]) != 0xf) and + (op.bin & op.bin_mask) == (val & op.bin_mask) + } + end - def disassembler_default_func - df = DecodedFunction.new - df - end + def disassembler_default_func + df = DecodedFunction.new + df + end - def decode_instr_op(edata, di) - op = di.opcode - di.instruction.opname = op.name - val = di.instance_variable_get('@raw') + def decode_instr_op(edata, di) + op = di.opcode + di.instruction.opname = op.name + val = di.instance_variable_get('@raw') - field_val = lambda { |f| - r = (val >> @fields_shift[f]) & @fields_mask[f] - case f - when :i12; Expression.make_signed(r, 12) - when :i24; Expression.make_signed(r, 24) - when :i8_12; ((r >> 4) & 0xf0) | (r & 0xf) - when :stype; [:lsl, :lsr, :asr, :ror][r] - when :u; [:-, :+][r] - else r - end - } + field_val = lambda { |f| + r = (val >> @fields_shift[f]) & @fields_mask[f] + case f + when :i12; Expression.make_signed(r, 12) + when :i24; Expression.make_signed(r, 24) + when :i8_12; ((r >> 4) & 0xf0) | (r & 0xf) + when :stype; [:lsl, :lsr, :asr, :ror][r] + when :u; [:-, :+][r] + else r + end + } - if op.props[:cond] - cd = %w[eq ne cs cc mi pl vs vc hi ls ge lt gt le al][field_val[:cond]] - if cd != 'al' - di.opcode = di.opcode.dup - di.instruction.opname = di.opcode.name.dup - di.instruction.opname[(op.props[:cond_name_off] || di.opcode.name.length), 0] = cd - if di.opcode.props[:stopexec] - di.opcode.props = di.opcode.props.dup - di.opcode.props.delete :stopexec - end - end - end + if op.props[:cond] + cd = %w[eq ne cs cc mi pl vs vc hi ls ge lt gt le al][field_val[:cond]] + if cd != 'al' + di.opcode = di.opcode.dup + di.instruction.opname = di.opcode.name.dup + di.instruction.opname[(op.props[:cond_name_off] || di.opcode.name.length), 0] = cd + if di.opcode.props[:stopexec] + di.opcode.props = di.opcode.props.dup + di.opcode.props.delete :stopexec + end + end + end - op.args.each { |a| - di.instruction.args << case a - when :rd, :rn, :rm; Reg.new field_val[a] - when :rm_rs; Reg.new field_val[:rm], field_val[:stype], Reg.new(field_val[:rs]) - when :rm_is; Reg.new field_val[:rm], field_val[:stype], field_val[:shifti] - when :i12; Expression[field_val[a]] - when :i24; Expression[field_val[a] << 2] - when :i8_r - i = field_val[:i8] - r = field_val[:rotate]*2 - Expression[((i >> r) | (i << (32-r))) & 0xffff_ffff] - when :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12 - b = Reg.new(field_val[:rn]) - o = case a - when :mem_rn_rm; Reg.new(field_val[:rm]) - when :mem_rn_i8_12; field_val[:i8_12] - when :mem_rn_rms; Reg.new(field_val[:rm], field_val[:stype], field_val[:shifti]) - when :mem_rn_i12; field_val[:i12] - end - Memref.new(b, o, field_val[:u], op.props[:baseincr]) - when :reglist - di.instruction.args.last.updated = true if op.props[:baseincr] - msk = field_val[a] - l = RegList.new((0..15).map { |n| Reg.new(n) if (msk & (1 << n)) > 0 }.compact) - l.usermoderegs = true if op.props[:usermoderegs] - l - else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" - end - } + op.args.each { |a| + di.instruction.args << case a + when :rd, :rn, :rm; Reg.new field_val[a] + when :rm_rs; Reg.new field_val[:rm], field_val[:stype], Reg.new(field_val[:rs]) + when :rm_is; Reg.new field_val[:rm], field_val[:stype], field_val[:shifti] + when :i12; Expression[field_val[a]] + when :i24; Expression[field_val[a] << 2] + when :i8_r + i = field_val[:i8] + r = field_val[:rotate]*2 + Expression[((i >> r) | (i << (32-r))) & 0xffff_ffff] + when :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12 + b = Reg.new(field_val[:rn]) + o = case a + when :mem_rn_rm; Reg.new(field_val[:rm]) + when :mem_rn_i8_12; field_val[:i8_12] + when :mem_rn_rms; Reg.new(field_val[:rm], field_val[:stype], field_val[:shifti]) + when :mem_rn_i12; field_val[:i12] + end + Memref.new(b, o, field_val[:u], op.props[:baseincr]) + when :reglist + di.instruction.args.last.updated = true if op.props[:baseincr] + msk = field_val[a] + l = RegList.new((0..15).map { |n| Reg.new(n) if (msk & (1 << n)) > 0 }.compact) + l.usermoderegs = true if op.props[:usermoderegs] + l + else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" + end + } - di.bin_length = 4 - di - end + di.bin_length = 4 + di + end - def decode_instr_interpret(di, addr) - if di.opcode.args[-1] == :i24 - di.instruction.args[-1] = Expression[di.instruction.args[-1] + addr + 8] - end - di - end + def decode_instr_interpret(di, addr) + if di.opcode.args[-1] == :i24 + di.instruction.args[-1] = Expression[di.instruction.args[-1] + addr + 8] + end + di + end - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end - def init_backtrace_binding - @backtrace_binding ||= {} - end + def init_backtrace_binding + @backtrace_binding ||= {} + end - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when Reg; arg.symbolic - when Memref; arg.symbolic(di.address) - else arg - end - } + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when Reg; arg.symbolic + when Memref; arg.symbolic(di.address) + else arg + end + } - if binding = backtrace_binding[di.opcode.name] - binding[di, *a] - else - puts "unhandled instruction to backtrace: #{di}" if $VERBOSE - # assume nothing except the 1st arg is modified - case a[0] - when Indirection, Symbol; { a[0] => Expression::Unknown } - when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} - else {} - end.update(:incomplete_binding => Expression[1]) - end + if binding = backtrace_binding[di.opcode.name] + binding[di, *a] + else + puts "unhandled instruction to backtrace: #{di}" if $VERBOSE + # assume nothing except the 1st arg is modified + case a[0] + when Indirection, Symbol; { a[0] => Expression::Unknown } + when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} + else {} + end.update(:incomplete_binding => Expression[1]) + end - end + end - def get_xrefs_x(dasm, di) - if di.opcode.props[:setip] - [di.instruction.args.last] - else - # TODO ldr pc, .. - [] - end - end + def get_xrefs_x(dasm, di) + if di.opcode.props[:setip] + [di.instruction.args.last] + else + # TODO ldr pc, .. + [] + end + end end end diff --git a/lib/metasm/metasm/cpu/arm/encode.rb b/lib/metasm/metasm/cpu/arm/encode.rb index bf641d1088..246ce7f150 100644 --- a/lib/metasm/metasm/cpu/arm/encode.rb +++ b/lib/metasm/metasm/cpu/arm/encode.rb @@ -9,84 +9,84 @@ require 'metasm/encode' module Metasm class ARM - def encode_instr_op(program, instr, op) - base = op.bin - set_field = lambda { |f, v| - v = v.reduce if v.kind_of?(Expression) - case f - when :i8_12 - base = Expression[base, :|, [[v, :&, 0xf], :|, [[v, :<<, 4], :&, 0xf00]]] - next - when :stype; v = [:lsl, :lsr, :asr, :ror].index(v) - when :u; v = [:-, :+].index(v) - end - base = Expression[base, :|, [[v, :&, @fields_mask[f]], :<<, @fields_shift[f]]] - } + def encode_instr_op(program, instr, op) + base = op.bin + set_field = lambda { |f, v| + v = v.reduce if v.kind_of?(Expression) + case f + when :i8_12 + base = Expression[base, :|, [[v, :&, 0xf], :|, [[v, :<<, 4], :&, 0xf00]]] + next + when :stype; v = [:lsl, :lsr, :asr, :ror].index(v) + when :u; v = [:-, :+].index(v) + end + base = Expression[base, :|, [[v, :&, @fields_mask[f]], :<<, @fields_shift[f]]] + } - val, mask, shift = 0, 0, 0 + val, mask, shift = 0, 0, 0 - if op.props[:cond] - coff = op.props[:cond_name_off] || op.name.length - cd = instr.opname[coff, 2] - cdi = %w[eq ne cs cc mi pl vs vc hi ls ge lt gt le al].index(cd) || 14 # default = al - set_field[:cond, cdi] - end + if op.props[:cond] + coff = op.props[:cond_name_off] || op.name.length + cd = instr.opname[coff, 2] + cdi = %w[eq ne cs cc mi pl vs vc hi ls ge lt gt le al].index(cd) || 14 # default = al + set_field[:cond, cdi] + end - op.args.zip(instr.args).each { |sym, arg| - case sym - when :rd, :rs, :rn, :rm; set_field[sym, arg.i] - when :rm_rs - set_field[:rm, arg.i] - set_field[:stype, arg.stype] - set_field[:rs, arg.shift.i] - when :rm_is - set_field[:rm, arg.i] - set_field[:stype, arg.stype] - set_field[:shifti, arg.shift] - when :mem_rn_rm, :mem_rn_rms, :mem_rn_i8_12, :mem_rn_i12 - set_field[:rn, arg.base.i] - case sym - when :mem_rn_rm - set_field[:rm, arg.offset.i] - when :mem_rn_rms - set_field[:rm, arg.offset.i] - set_field[:stype, arg.offset.stype] - set_field[:rs, arg.offset.shift.i] - when :mem_rn_i8_12 - set_field[:i8_12, arg.offset] - when :mem_rn_i12 - set_field[:i12, arg.offset] - end - # TODO set_field[:u] etc - when :reglist - set_field[sym, arg.list.inject(0) { |rl, r| rl | (1 << r.i) }] - when :i8_r - b = arg.reduce & 0xffffffff - r = (0..15).find { - next true if b < 0x100 - b = ((b << 2) & 0xffff_ffff) | ((b >> 30) & 3) - false - } - raise EncodeError, "Invalid constant" if not r - set_field[:i8, b] - set_field[:rotate, r] - when :i12, :i24 - val, mask, shift = arg, @fields_mask[sym], @fields_shift[sym] - end - } + op.args.zip(instr.args).each { |sym, arg| + case sym + when :rd, :rs, :rn, :rm; set_field[sym, arg.i] + when :rm_rs + set_field[:rm, arg.i] + set_field[:stype, arg.stype] + set_field[:rs, arg.shift.i] + when :rm_is + set_field[:rm, arg.i] + set_field[:stype, arg.stype] + set_field[:shifti, arg.shift] + when :mem_rn_rm, :mem_rn_rms, :mem_rn_i8_12, :mem_rn_i12 + set_field[:rn, arg.base.i] + case sym + when :mem_rn_rm + set_field[:rm, arg.offset.i] + when :mem_rn_rms + set_field[:rm, arg.offset.i] + set_field[:stype, arg.offset.stype] + set_field[:rs, arg.offset.shift.i] + when :mem_rn_i8_12 + set_field[:i8_12, arg.offset] + when :mem_rn_i12 + set_field[:i12, arg.offset] + end + # TODO set_field[:u] etc + when :reglist + set_field[sym, arg.list.inject(0) { |rl, r| rl | (1 << r.i) }] + when :i8_r + b = arg.reduce & 0xffffffff + r = (0..15).find { + next true if b < 0x100 + b = ((b << 2) & 0xffff_ffff) | ((b >> 30) & 3) + false + } + raise EncodeError, "Invalid constant" if not r + set_field[:i8, b] + set_field[:rotate, r] + when :i12, :i24 + val, mask, shift = arg, @fields_mask[sym], @fields_shift[sym] + end + } - if op.args[-1] == :i24 - # convert label name for branch to relative offset - label = program.new_label('l_'+op.name) - target = val - target = target.rexpr if target.kind_of?(Expression) and target.op == :+ and not target.lexpr - val = Expression[[target, :-, [label, :+, 8]], :>>, 2] + if op.args[-1] == :i24 + # convert label name for branch to relative offset + label = program.new_label('l_'+op.name) + target = val + target = target.rexpr if target.kind_of?(Expression) and target.op == :+ and not target.lexpr + val = Expression[[target, :-, [label, :+, 8]], :>>, 2] - EncodedData.new('', :export => { label => 0 }) << - Expression[base, :|, [[val, :<<, shift], :&, mask]].encode(:u32, @endianness) - else - Expression[base, :|, [[val, :<<, shift], :&, mask]].encode(:u32, @endianness) - end - end + EncodedData.new('', :export => { label => 0 }) << + Expression[base, :|, [[val, :<<, shift], :&, mask]].encode(:u32, @endianness) + else + Expression[base, :|, [[val, :<<, shift], :&, mask]].encode(:u32, @endianness) + end + end end end diff --git a/lib/metasm/metasm/cpu/arm/main.rb b/lib/metasm/metasm/cpu/arm/main.rb index d474e6702c..06355a7f1c 100644 --- a/lib/metasm/metasm/cpu/arm/main.rb +++ b/lib/metasm/metasm/cpu/arm/main.rb @@ -8,65 +8,65 @@ require 'metasm/main' module Metasm class ARM < CPU - class Reg - class << self - attr_accessor :s_to_i, :i_to_s - end - @i_to_s = %w[r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 sp lr pc] - @s_to_i = { 'wr' => 7, 'sb' => 9, 'sl' => 10, 'fp' => 11, 'ip' => 12, 'sp' => 13, 'lr' => 14, 'pc' => 15 } - 15.times { |i| @s_to_i["r#{i}"] = i } - 4.times { |i| @s_to_i["a#{i+1}"] = i } - 8.times { |i| @s_to_i["v#{i+1}"] = i+4 } + class Reg + class << self + attr_accessor :s_to_i, :i_to_s + end + @i_to_s = %w[r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 sp lr pc] + @s_to_i = { 'wr' => 7, 'sb' => 9, 'sl' => 10, 'fp' => 11, 'ip' => 12, 'sp' => 13, 'lr' => 14, 'pc' => 15 } + 15.times { |i| @s_to_i["r#{i}"] = i } + 4.times { |i| @s_to_i["a#{i+1}"] = i } + 8.times { |i| @s_to_i["v#{i+1}"] = i+4 } - attr_accessor :i, :stype, :shift, :updated - def initialize(i, stype=:lsl, shift=0) - @i = i - @stype = stype - @shift = shift - end + attr_accessor :i, :stype, :shift, :updated + def initialize(i, stype=:lsl, shift=0) + @i = i + @stype = stype + @shift = shift + end - def symbolic - r = self.class.i_to_s[@i].to_sym - if @stype == :lsl and @shift == 0 - r - else - r # TODO shift/rotate/... - end - end - end + def symbolic + r = self.class.i_to_s[@i].to_sym + if @stype == :lsl and @shift == 0 + r + else + r # TODO shift/rotate/... + end + end + end - class Memref - attr_accessor :base, :offset, :sign, :incr - def initialize(base, offset, sign=:+, incr=nil) - @base, @offset, @sign, @incr = base, offset, sign, incr - end + class Memref + attr_accessor :base, :offset, :sign, :incr + def initialize(base, offset, sign=:+, incr=nil) + @base, @offset, @sign, @incr = base, offset, sign, incr + end - def symbolic(len=4, orig=nil) - o = @offset - o = o.symbolic if o.kind_of? Reg - p = Expression[@base.symbolic, @sign, o].reduce - Indirection[p, len, orig] - end - end + def symbolic(len=4, orig=nil) + o = @offset + o = o.symbolic if o.kind_of? Reg + p = Expression[@base.symbolic, @sign, o].reduce + Indirection[p, len, orig] + end + end - class RegList - attr_accessor :list, :usermoderegs + class RegList + attr_accessor :list, :usermoderegs - def initialize(l=[]) - @list = l - end - end + def initialize(l=[]) + @list = l + end + end - def initialize(endianness = :little) - super() - @endianness = endianness - @size = 32 - end + def initialize(endianness = :little) + super() + @endianness = endianness + @size = 32 + end - def init_opcode_list - init_latest - @opcode_list - end + def init_opcode_list + init_latest + @opcode_list + end end end diff --git a/lib/metasm/metasm/cpu/arm/opcodes.rb b/lib/metasm/metasm/cpu/arm/opcodes.rb index c535077a7b..372fce5732 100644 --- a/lib/metasm/metasm/cpu/arm/opcodes.rb +++ b/lib/metasm/metasm/cpu/arm/opcodes.rb @@ -8,316 +8,316 @@ require 'metasm/cpu/arm/main' module Metasm class ARM - private + private - # ARM MODE + # ARM MODE - def addop(name, bin, *args) - args << :cond if not args.delete :uncond + def addop(name, bin, *args) + args << :cond if not args.delete :uncond - suppl = nil - o = Opcode.new name, bin - args.each { |a| - # Should Be One fields - if a == :sbo16 ; o.bin |= 0b1111 << 16 ; next ; end - if a == :sbo12 ; o.bin |= 0b1111 << 12 ; next ; end - if a == :sbo8 ; o.bin |= 0b1111 << 8 ; next ; end - if a == :sbo0 ; o.bin |= 0b1111 << 0 ; next ; end + suppl = nil + o = Opcode.new name, bin + args.each { |a| + # Should Be One fields + if a == :sbo16 ; o.bin |= 0b1111 << 16 ; next ; end + if a == :sbo12 ; o.bin |= 0b1111 << 12 ; next ; end + if a == :sbo8 ; o.bin |= 0b1111 << 8 ; next ; end + if a == :sbo0 ; o.bin |= 0b1111 << 0 ; next ; end - o.args << a if @valid_args[a] - o.props[a] = true if @valid_props[a] - o.props.update a if a.kind_of?(Hash) - # special args -> multiple fields - suppl ||= { :i8_r => [:i8, :rotate], :rm_is => [:rm, :stype, :shifti], - :rm_rs => [:rm, :stype, :rs], :mem_rn_rm => [:rn, :rm, :rsx, :u], - :mem_rn_i8_12 => [:rn, :i8_12, :u], - :mem_rn_rms => [:rn, :rm, :stype, :shifti, :i], - :mem_rn_i12 => [:rn, :i12, :u] - }[a] - } + o.args << a if @valid_args[a] + o.props[a] = true if @valid_props[a] + o.props.update a if a.kind_of?(Hash) + # special args -> multiple fields + suppl ||= { :i8_r => [:i8, :rotate], :rm_is => [:rm, :stype, :shifti], + :rm_rs => [:rm, :stype, :rs], :mem_rn_rm => [:rn, :rm, :rsx, :u], + :mem_rn_i8_12 => [:rn, :i8_12, :u], + :mem_rn_rms => [:rn, :rm, :stype, :shifti, :i], + :mem_rn_i12 => [:rn, :i12, :u] + }[a] + } - args.concat suppl if suppl + args.concat suppl if suppl - args.each { |a| o.fields[a] = [@fields_mask[a], @fields_shift[a]] if @fields_mask[a] } + args.each { |a| o.fields[a] = [@fields_mask[a], @fields_shift[a]] if @fields_mask[a] } - @opcode_list << o - end + @opcode_list << o + end - def addop_data_s(name, op, a1, a2, *h) - addop name, op | (1 << 25), a1, a2, :i8_r, :rotate, *h - addop name, op, a1, a2, :rm_is, *h - addop name, op | (1 << 4), a1, a2, :rm_rs, *h - end - def addop_data(name, op, a1, a2) - addop_data_s name, op << 21, a1, a2 - addop_data_s name+'s', (op << 21) | (1 << 20), a1, a2, :cond_name_off => name.length - end + def addop_data_s(name, op, a1, a2, *h) + addop name, op | (1 << 25), a1, a2, :i8_r, :rotate, *h + addop name, op, a1, a2, :rm_is, *h + addop name, op | (1 << 4), a1, a2, :rm_rs, *h + end + def addop_data(name, op, a1, a2) + addop_data_s name, op << 21, a1, a2 + addop_data_s name+'s', (op << 21) | (1 << 20), a1, a2, :cond_name_off => name.length + end - def addop_load_puw(name, op, *a) - addop name, op, {:baseincr => :post}, :rd, :u, *a - addop name, op | (1 << 24), :rd, :u, *a - addop name, op | (1 << 24) | (1 << 21), {:baseincr => :pre}, :rd, :u, *a - end - def addop_load_lsh_o(name, op) - addop_load_puw name, op, :rsz, :mem_rn_rm, {:cond_name_off => 3} - addop_load_puw name, op | (1 << 22), :mem_rn_i8_12, {:cond_name_off => 3} - end - def addop_load_lsh - op = 9 << 4 - addop_load_lsh_o 'strh', op | (1 << 5) - addop_load_lsh_o 'ldrd', op | (1 << 6) - addop_load_lsh_o 'strd', op | (1 << 6) | (1 << 5) - addop_load_lsh_o 'ldrh', op | (1 << 20) | (1 << 5) - addop_load_lsh_o 'ldrsb', op | (1 << 20) | (1 << 6) - addop_load_lsh_o 'ldrsh', op | (1 << 20) | (1 << 6) | (1 << 5) - end + def addop_load_puw(name, op, *a) + addop name, op, {:baseincr => :post}, :rd, :u, *a + addop name, op | (1 << 24), :rd, :u, *a + addop name, op | (1 << 24) | (1 << 21), {:baseincr => :pre}, :rd, :u, *a + end + def addop_load_lsh_o(name, op) + addop_load_puw name, op, :rsz, :mem_rn_rm, {:cond_name_off => 3} + addop_load_puw name, op | (1 << 22), :mem_rn_i8_12, {:cond_name_off => 3} + end + def addop_load_lsh + op = 9 << 4 + addop_load_lsh_o 'strh', op | (1 << 5) + addop_load_lsh_o 'ldrd', op | (1 << 6) + addop_load_lsh_o 'strd', op | (1 << 6) | (1 << 5) + addop_load_lsh_o 'ldrh', op | (1 << 20) | (1 << 5) + addop_load_lsh_o 'ldrsb', op | (1 << 20) | (1 << 6) + addop_load_lsh_o 'ldrsh', op | (1 << 20) | (1 << 6) | (1 << 5) + end - def addop_load_puwt(name, op, *a) - addop_load_puw name, op, *a - addop name+'t', op | (1 << 21), {:baseincr => :post, :cond_name_off => name.length}, :rd, :u, *a - end - def addop_load_o(name, op, *a) - addop_load_puwt name, op, :mem_rn_i12, *a - addop_load_puwt name, op | (1 << 25), :mem_rn_rms, *a - end - def addop_load(name, op) - addop_load_o name, op - addop_load_o name+'b', op | (1 << 22), :cond_name_off => name.length - end + def addop_load_puwt(name, op, *a) + addop_load_puw name, op, *a + addop name+'t', op | (1 << 21), {:baseincr => :post, :cond_name_off => name.length}, :rd, :u, *a + end + def addop_load_o(name, op, *a) + addop_load_puwt name, op, :mem_rn_i12, *a + addop_load_puwt name, op | (1 << 25), :mem_rn_rms, *a + end + def addop_load(name, op) + addop_load_o name, op + addop_load_o name+'b', op | (1 << 22), :cond_name_off => name.length + end - def addop_ldm_go(name, op, *a) - addop name, op, :rn, :reglist, {:cond_name_off => 3}, *a - end - def addop_ldm_w(name, op, *a) - addop_ldm_go name, op, *a # base reg untouched - addop_ldm_go name, op | (1 << 21), {:baseincr => :post}, *a # base updated - end - def addop_ldm_s(name, op) - addop_ldm_w name, op # transfer regs - addop_ldm_w name, op | (1 << 22), :usermoderegs # transfer usermode regs - end - def addop_ldm_p(name, op) - addop_ldm_s name+'a', op # target memory included - addop_ldm_s name+'b', op | (1 << 24) # target memory excluded, transfer starts at next addr - end - def addop_ldm_u(name, op) - addop_ldm_p name+'d', op # transfer made downward - addop_ldm_p name+'i', op | (1 << 23) # transfer made upward - end - def addop_ldm(name, op) - addop_ldm_u name, op - end + def addop_ldm_go(name, op, *a) + addop name, op, :rn, :reglist, {:cond_name_off => 3}, *a + end + def addop_ldm_w(name, op, *a) + addop_ldm_go name, op, *a # base reg untouched + addop_ldm_go name, op | (1 << 21), {:baseincr => :post}, *a # base updated + end + def addop_ldm_s(name, op) + addop_ldm_w name, op # transfer regs + addop_ldm_w name, op | (1 << 22), :usermoderegs # transfer usermode regs + end + def addop_ldm_p(name, op) + addop_ldm_s name+'a', op # target memory included + addop_ldm_s name+'b', op | (1 << 24) # target memory excluded, transfer starts at next addr + end + def addop_ldm_u(name, op) + addop_ldm_p name+'d', op # transfer made downward + addop_ldm_p name+'i', op | (1 << 23) # transfer made upward + end + def addop_ldm(name, op) + addop_ldm_u name, op + end - # ARMv6 instruction set, aka arm7/arm9 - def init_arm_v6 - @opcode_list = [] + # ARMv6 instruction set, aka arm7/arm9 + def init_arm_v6 + @opcode_list = [] - [:baseincr, :cond, :cond_name_off, :usermoderegs, :tothumb, :tojazelle - ].each { |p| @valid_props[p] = true } + [:baseincr, :cond, :cond_name_off, :usermoderegs, :tothumb, :tojazelle + ].each { |p| @valid_props[p] = true } - [:rn, :rd, :rm, :crn, :crd, :crm, :cpn, :reglist, :i24, :rm_rs, :rm_is, - :i8_r, :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12 - ].each { |p| @valid_args[p] = true } + [:rn, :rd, :rm, :crn, :crd, :crm, :cpn, :reglist, :i24, :rm_rs, :rm_is, + :i8_r, :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12 + ].each { |p| @valid_args[p] = true } - @fields_mask.update :rn => 0xf, :rd => 0xf, :rs => 0xf, :rm => 0xf, - :crn => 0xf, :crd => 0xf, :crm => 0xf, :cpn => 0xf, - :rnx => 0xf, :rdx => 0xf, :rsx => 0xf, - :shifti => 0x1f, :stype => 3, :rotate => 0xf, :reglist => 0xffff, - :i8 => 0xff, :i12 => 0xfff, :i24 => 0xff_ffff, :i8_12 => 0xf0f, - :u => 1, :mask => 0xf, :sbo => 0xf, :cond => 0xf + @fields_mask.update :rn => 0xf, :rd => 0xf, :rs => 0xf, :rm => 0xf, + :crn => 0xf, :crd => 0xf, :crm => 0xf, :cpn => 0xf, + :rnx => 0xf, :rdx => 0xf, :rsx => 0xf, + :shifti => 0x1f, :stype => 3, :rotate => 0xf, :reglist => 0xffff, + :i8 => 0xff, :i12 => 0xfff, :i24 => 0xff_ffff, :i8_12 => 0xf0f, + :u => 1, :mask => 0xf, :sbo => 0xf, :cond => 0xf - @fields_shift.update :rn => 16, :rd => 12, :rs => 8, :rm => 0, - :crn => 16, :crd => 12, :crm => 0, :cpn => 8, - :rnx => 16, :rdx => 12, :rsx => 8, - :shifti => 7, :stype => 5, :rotate => 8, :reglist => 0, - :i8 => 0, :i12 => 0, :i24 => 0, :i8_12 => 0, - :u => 23, :mask => 16, :sbo => 12, :cond => 28 + @fields_shift.update :rn => 16, :rd => 12, :rs => 8, :rm => 0, + :crn => 16, :crd => 12, :crm => 0, :cpn => 8, + :rnx => 16, :rdx => 12, :rsx => 8, + :shifti => 7, :stype => 5, :rotate => 8, :reglist => 0, + :i8 => 0, :i12 => 0, :i24 => 0, :i8_12 => 0, + :u => 23, :mask => 16, :sbo => 12, :cond => 28 - addop_data 'and', 0, :rd, :rn - addop_data 'eor', 1, :rd, :rn - addop_data 'xor', 1, :rd, :rn - addop_data 'sub', 2, :rd, :rn - addop_data 'rsb', 3, :rd, :rn - addop_data 'add', 4, :rd, :rn - addop_data 'adc', 5, :rd, :rn - addop_data 'sbc', 6, :rd, :rn - addop_data 'rsc', 7, :rd, :rn - addop_data_s 'tst', (8 << 21) | (1 << 20), :rdx, :rn - addop_data_s 'teq', (9 << 21) | (1 << 20), :rdx, :rn - addop_data_s 'cmp', (10 << 21) | (1 << 20), :rdx, :rn - addop_data_s 'cmn', (11 << 21) | (1 << 20), :rdx, :rn - addop_data 'orr', 12, :rd, :rn - addop_data 'or', 12, :rd, :rn - addop_data 'mov', 13, :rd, :rnx - addop_data 'bic', 14, :rd, :rn - addop_data 'mvn', 15, :rd, :rnx + addop_data 'and', 0, :rd, :rn + addop_data 'eor', 1, :rd, :rn + addop_data 'xor', 1, :rd, :rn + addop_data 'sub', 2, :rd, :rn + addop_data 'rsb', 3, :rd, :rn + addop_data 'add', 4, :rd, :rn + addop_data 'adc', 5, :rd, :rn + addop_data 'sbc', 6, :rd, :rn + addop_data 'rsc', 7, :rd, :rn + addop_data_s 'tst', (8 << 21) | (1 << 20), :rdx, :rn + addop_data_s 'teq', (9 << 21) | (1 << 20), :rdx, :rn + addop_data_s 'cmp', (10 << 21) | (1 << 20), :rdx, :rn + addop_data_s 'cmn', (11 << 21) | (1 << 20), :rdx, :rn + addop_data 'orr', 12, :rd, :rn + addop_data 'or', 12, :rd, :rn + addop_data 'mov', 13, :rd, :rnx + addop_data 'bic', 14, :rd, :rn + addop_data 'mvn', 15, :rd, :rnx - addop 'b', 0b1010 << 24, :setip, :stopexec, :i24 - addop 'bl', 0b1011 << 24, :setip, :stopexec, :i24, :saveip - addop 'bkpt', (0b00010010 << 20) | (0b0111 << 4) # other fields are available&unused, also cnd != AL is undef - addop 'blx', 0b1111101 << 25, :setip, :stopexec, :saveip, :tothumb, :h, :uncond, :i24 - addop 'blx', (0b00010010 << 20) | (0b0011 << 4), :setip, :stopexec, :saveip, :tothumb, :rm, :sbo16, :sbo12, :sbo8 - addop 'bx', (0b00010010 << 20) | (0b0001 << 4), :setip, :stopexec, :rm, :sbo16, :sbo12, :sbo8 - addop 'bxj', (0b00010010 << 20) | (0b0010 << 4), :setip, :stopexec, :rm, :tojazelle, :sbo16, :sbo12, :sbo8 + addop 'b', 0b1010 << 24, :setip, :stopexec, :i24 + addop 'bl', 0b1011 << 24, :setip, :stopexec, :i24, :saveip + addop 'bkpt', (0b00010010 << 20) | (0b0111 << 4) # other fields are available&unused, also cnd != AL is undef + addop 'blx', 0b1111101 << 25, :setip, :stopexec, :saveip, :tothumb, :h, :uncond, :i24 + addop 'blx', (0b00010010 << 20) | (0b0011 << 4), :setip, :stopexec, :saveip, :tothumb, :rm, :sbo16, :sbo12, :sbo8 + addop 'bx', (0b00010010 << 20) | (0b0001 << 4), :setip, :stopexec, :rm, :sbo16, :sbo12, :sbo8 + addop 'bxj', (0b00010010 << 20) | (0b0010 << 4), :setip, :stopexec, :rm, :tojazelle, :sbo16, :sbo12, :sbo8 - addop_load 'str', (1 << 26) - addop_load 'ldr', (1 << 26) | (1 << 20) - addop_load_lsh - addop_ldm 'stm', (1 << 27) - addop_ldm 'ldm', (1 << 27) | (1 << 20) - # TODO aliases (http://www.davespace.co.uk/arm/introduction-to-arm/stack.html) - # fd = full descending stmfd/ldmfd = stmdb/ldmia - # ed = empty descending stmed/ldmed = stmda/ldmib - # fa = full ascending stmfa/ldmfa = stmib/ldmda - # ea = empty ascending stmea/ldmea = stmia/ldmdb + addop_load 'str', (1 << 26) + addop_load 'ldr', (1 << 26) | (1 << 20) + addop_load_lsh + addop_ldm 'stm', (1 << 27) + addop_ldm 'ldm', (1 << 27) | (1 << 20) + # TODO aliases (http://www.davespace.co.uk/arm/introduction-to-arm/stack.html) + # fd = full descending stmfd/ldmfd = stmdb/ldmia + # ed = empty descending stmed/ldmed = stmda/ldmib + # fa = full ascending stmfa/ldmfa = stmib/ldmda + # ea = empty ascending stmea/ldmea = stmia/ldmdb - # TODO mrs, [qus]add/sub* - addop 'clz', (0b00010110 << 20) | (0b0001 << 4), :rd, :rm, :sbo16, :sbo8 - addop 'ldrex', (0b00011001 << 20) | (0b1001 << 4), :rd, :rn, :sbo8, :sbo0 - addop 'strex', (0b00011000 << 20) | (0b1001 << 4), :rd, :rm, :rn, :sbo8 - addop 'rev', (0b01101011 << 20) | (0b0011 << 4), :rd, :rm, :sbo16, :sbo8 - addop 'rev16', (0b01101011 << 20) | (0b1011 << 4), :rd, :rm, :sbo16, :sbo8 - addop 'revsh', (0b01101111 << 20) | (0b1011 << 4), :rd, :rm, :sbo16, :sbo8 - addop 'sel', (0b01101000 << 20) | (0b1011 << 4), :rd, :rn, :rm, :sbo8 + # TODO mrs, [qus]add/sub* + addop 'clz', (0b00010110 << 20) | (0b0001 << 4), :rd, :rm, :sbo16, :sbo8 + addop 'ldrex', (0b00011001 << 20) | (0b1001 << 4), :rd, :rn, :sbo8, :sbo0 + addop 'strex', (0b00011000 << 20) | (0b1001 << 4), :rd, :rm, :rn, :sbo8 + addop 'rev', (0b01101011 << 20) | (0b0011 << 4), :rd, :rm, :sbo16, :sbo8 + addop 'rev16', (0b01101011 << 20) | (0b1011 << 4), :rd, :rm, :sbo16, :sbo8 + addop 'revsh', (0b01101111 << 20) | (0b1011 << 4), :rd, :rm, :sbo16, :sbo8 + addop 'sel', (0b01101000 << 20) | (0b1011 << 4), :rd, :rn, :rm, :sbo8 - end + end - # THUMB2 MODE + # THUMB2 MODE - def addop_t(name, bin, *args) - o = Opcode.new name, bin - args.each { |a| - o.args << a if @valid_args[a] - o.props[a] = true if @valid_props[a] - o.props.update a if a.kind_of?(Hash) - } + def addop_t(name, bin, *args) + o = Opcode.new name, bin + args.each { |a| + o.args << a if @valid_args[a] + o.props[a] = true if @valid_props[a] + o.props.update a if a.kind_of?(Hash) + } - args.each { |a| o.fields[a] = [@fields_mask[a], @fields_shift[a]] if @fields_mask[a] } + args.each { |a| o.fields[a] = [@fields_mask[a], @fields_shift[a]] if @fields_mask[a] } - @opcode_list_t << o - end + @opcode_list_t << o + end - def init_arm_thumb2 - @opcode_list_t = [] - @valid_props_t = {} - @valid_args_t = {} - @fields_mask_t = {} - @fields_shift_t = {} + def init_arm_thumb2 + @opcode_list_t = [] + @valid_props_t = {} + @valid_args_t = {} + @fields_mask_t = {} + @fields_shift_t = {} - [:i16, :i16_3_8, :i16_rd].each { |p| @valid_props_t[p] = true } - [:i5, :rm, :rn, :rd].each { |p| @valid_args_t[p] = true } - @fields_mask_t.update :i5 => 0x1f, :i3 => 7, :i51 => 0x5f, - :rm => 7, :rn => 7, :rd => 7, :rdn => 7, :rdn8 => 7 - @fields_shift_t.update :i5 => 6, :i3 => 6, :i51 => 3, - :rm => 6, :rn => 3, :rd => 0, :rdn => 0, :rdn8 => 8 + [:i16, :i16_3_8, :i16_rd].each { |p| @valid_props_t[p] = true } + [:i5, :rm, :rn, :rd].each { |p| @valid_args_t[p] = true } + @fields_mask_t.update :i5 => 0x1f, :i3 => 7, :i51 => 0x5f, + :rm => 7, :rn => 7, :rd => 7, :rdn => 7, :rdn8 => 7 + @fields_shift_t.update :i5 => 6, :i3 => 6, :i51 => 3, + :rm => 6, :rn => 3, :rd => 0, :rdn => 0, :rdn8 => 8 - addop_t 'mov', 0b000_00 << 11, :rd, :rm - addop_t 'lsl', 0b000_00 << 11, :rd, :rm, :i5 - addop_t 'lsr', 0b000_01 << 11, :rd, :rm, :i5 - addop_t 'asr', 0b000_10 << 11, :rd, :rm, :i5 + addop_t 'mov', 0b000_00 << 11, :rd, :rm + addop_t 'lsl', 0b000_00 << 11, :rd, :rm, :i5 + addop_t 'lsr', 0b000_01 << 11, :rd, :rm, :i5 + addop_t 'asr', 0b000_10 << 11, :rd, :rm, :i5 - addop_t 'add', 0b000_1100 << 9, :rd, :rn, :rm - addop_t 'add', 0b000_1110 << 9, :rd, :rn, :i3 - addop_t 'sub', 0b000_1101 << 9, :rd, :rn, :rm - addop_t 'sub', 0b000_1111 << 9, :rd, :rn, :i3 + addop_t 'add', 0b000_1100 << 9, :rd, :rn, :rm + addop_t 'add', 0b000_1110 << 9, :rd, :rn, :i3 + addop_t 'sub', 0b000_1101 << 9, :rd, :rn, :rm + addop_t 'sub', 0b000_1111 << 9, :rd, :rn, :i3 - addop_t 'mov', 0b001_00 << 10, :rdn8, :i8 - addop_t 'cmp', 0b001_01 << 10, :rdn8, :i8 - addop_t 'add', 0b001_10 << 10, :rdn8, :i8 - addop_t 'sub', 0b001_11 << 10, :rdn8, :i8 + addop_t 'mov', 0b001_00 << 10, :rdn8, :i8 + addop_t 'cmp', 0b001_01 << 10, :rdn8, :i8 + addop_t 'add', 0b001_10 << 10, :rdn8, :i8 + addop_t 'sub', 0b001_11 << 10, :rdn8, :i8 - addop_t 'and', (0b010000 << 10) | ( 0 << 6), :rdn, :rm - addop_t 'eor', (0b010000 << 10) | ( 1 << 6), :rdn, :rm # xor - addop_t 'lsl', (0b010000 << 10) | ( 2 << 6), :rdn, :rm - addop_t 'lsr', (0b010000 << 10) | ( 3 << 6), :rdn, :rm - addop_t 'asr', (0b010000 << 10) | ( 4 << 6), :rdn, :rm - addop_t 'adc', (0b010000 << 10) | ( 5 << 6), :rdn, :rm - addop_t 'sbc', (0b010000 << 10) | ( 6 << 6), :rdn, :rm - addop_t 'ror', (0b010000 << 10) | ( 7 << 6), :rdn, :rm - addop_t 'tst', (0b010000 << 10) | ( 8 << 6), :rdn, :rm - addop_t 'rsb', (0b010000 << 10) | ( 9 << 6), :rdn, :rm - addop_t 'cmp', (0b010000 << 10) | (10 << 6), :rdn, :rm - addop_t 'cmn', (0b010000 << 10) | (11 << 6), :rdn, :rm - addop_t 'orr', (0b010000 << 10) | (12 << 6), :rdn, :rm # or - addop_t 'mul', (0b010000 << 10) | (13 << 6), :rdn, :rm - addop_t 'bic', (0b010000 << 10) | (14 << 6), :rdn, :rm - addop_t 'mvn', (0b010000 << 10) | (15 << 6), :rdn, :rm + addop_t 'and', (0b010000 << 10) | ( 0 << 6), :rdn, :rm + addop_t 'eor', (0b010000 << 10) | ( 1 << 6), :rdn, :rm # xor + addop_t 'lsl', (0b010000 << 10) | ( 2 << 6), :rdn, :rm + addop_t 'lsr', (0b010000 << 10) | ( 3 << 6), :rdn, :rm + addop_t 'asr', (0b010000 << 10) | ( 4 << 6), :rdn, :rm + addop_t 'adc', (0b010000 << 10) | ( 5 << 6), :rdn, :rm + addop_t 'sbc', (0b010000 << 10) | ( 6 << 6), :rdn, :rm + addop_t 'ror', (0b010000 << 10) | ( 7 << 6), :rdn, :rm + addop_t 'tst', (0b010000 << 10) | ( 8 << 6), :rdn, :rm + addop_t 'rsb', (0b010000 << 10) | ( 9 << 6), :rdn, :rm + addop_t 'cmp', (0b010000 << 10) | (10 << 6), :rdn, :rm + addop_t 'cmn', (0b010000 << 10) | (11 << 6), :rdn, :rm + addop_t 'orr', (0b010000 << 10) | (12 << 6), :rdn, :rm # or + addop_t 'mul', (0b010000 << 10) | (13 << 6), :rdn, :rm + addop_t 'bic', (0b010000 << 10) | (14 << 6), :rdn, :rm + addop_t 'mvn', (0b010000 << 10) | (15 << 6), :rdn, :rm - addop_t 'add', 0b010001_00 << 8, :rdn, :rm, :dn - addop_t 'cmp', 0b010001_01 << 8, :rdn, :rm, :dn - addop_t 'mov', 0b010001_10 << 8, :rdn, :rm, :dn + addop_t 'add', 0b010001_00 << 8, :rdn, :rm, :dn + addop_t 'cmp', 0b010001_01 << 8, :rdn, :rm, :dn + addop_t 'mov', 0b010001_10 << 8, :rdn, :rm, :dn - addop_t 'bx', 0b010001_110 << 7, :rm - addop_t 'blx', 0b010001_111 << 7, :rm + addop_t 'bx', 0b010001_110 << 7, :rm + addop_t 'blx', 0b010001_111 << 7, :rm - addop_t 'ldr', 0b01001 << 11, :rd, :pc_i8 - addop_t 'str', 0b0101_000 << 9, :rd, :rn, :rm - addop_t 'strh', 0b0101_001 << 9, :rd, :rn, :rm - addop_t 'strb', 0b0101_010 << 9, :rd, :rn, :rm - addop_t 'ldrsb', 0b0101_011 << 9, :rd, :rn, :rm - addop_t 'ldr', 0b0101_100 << 9, :rd, :rn, :rm - addop_t 'ldrh', 0b0101_101 << 9, :rd, :rn, :rm - addop_t 'ldrb', 0b0101_110 << 9, :rd, :rn, :rm - addop_t 'ldrsh', 0b0101_111 << 9, :rd, :rn, :rm + addop_t 'ldr', 0b01001 << 11, :rd, :pc_i8 + addop_t 'str', 0b0101_000 << 9, :rd, :rn, :rm + addop_t 'strh', 0b0101_001 << 9, :rd, :rn, :rm + addop_t 'strb', 0b0101_010 << 9, :rd, :rn, :rm + addop_t 'ldrsb', 0b0101_011 << 9, :rd, :rn, :rm + addop_t 'ldr', 0b0101_100 << 9, :rd, :rn, :rm + addop_t 'ldrh', 0b0101_101 << 9, :rd, :rn, :rm + addop_t 'ldrb', 0b0101_110 << 9, :rd, :rn, :rm + addop_t 'ldrsh', 0b0101_111 << 9, :rd, :rn, :rm - addop_t 'str', 0b01100 << 11, :rd, :rn, :i5 - addop_t 'ldr', 0b01101 << 11, :rd, :rn, :i5 - addop_t 'strb', 0b01110 << 11, :rd, :rn, :i5 - addop_t 'ldrb', 0b01111 << 11, :rd, :rn, :i5 - addop_t 'strh', 0b10000 << 11, :rd, :rn, :i5 - addop_t 'ldrh', 0b10001 << 11, :rd, :rn, :i5 - addop_t 'str', 0b10010 << 11, :rd, :sp_i8 - addop_t 'ldr', 0b10011 << 11, :rd, :sp_i8 - addop_t 'adr', 0b10100 << 11, :rd, :pc, :i8 - addop_t 'add', 0b10101 << 11, :rd, :sp, :i8 + addop_t 'str', 0b01100 << 11, :rd, :rn, :i5 + addop_t 'ldr', 0b01101 << 11, :rd, :rn, :i5 + addop_t 'strb', 0b01110 << 11, :rd, :rn, :i5 + addop_t 'ldrb', 0b01111 << 11, :rd, :rn, :i5 + addop_t 'strh', 0b10000 << 11, :rd, :rn, :i5 + addop_t 'ldrh', 0b10001 << 11, :rd, :rn, :i5 + addop_t 'str', 0b10010 << 11, :rd, :sp_i8 + addop_t 'ldr', 0b10011 << 11, :rd, :sp_i8 + addop_t 'adr', 0b10100 << 11, :rd, :pc, :i8 + addop_t 'add', 0b10101 << 11, :rd, :sp, :i8 - # 0b1011 misc - addop_t 'add', 0b1011_0000_0 << 7, :sp, :i7 - addop_t 'sub', 0b1011_0000_1 << 7, :sp, :i7 - addop_t 'sxth', 0b1011_0010_00 << 6, :rd, :rn - addop_t 'sxtb', 0b1011_0010_01 << 6, :rd, :rn - addop_t 'uxth', 0b1011_0010_10 << 6, :rd, :rn - addop_t 'uxtb', 0b1011_0010_11 << 6, :rd, :rn - addop_t 'cbz', 0b1011_0001 << 8, :rd, :i51 - addop_t 'cbnz', 0b1011_1001 << 8, :rd, :i51 - addop_t 'push', 0b1011_0100 << 8, :rlist - addop_t 'push', 0b1011_0101 << 8, :rlist - addop_t 'pop', 0b1011_1100 << 8, :rlist - addop_t 'pop', 0b1011_1101 << 8, :rlist - #addop_t 'unpredictable', 0b1011_0110_0100_0000, :i4 - addop_t 'setendle', 0b1011_0110_0101_0000 - addop_t 'setendbe', 0b1011_0110_0101_1000 - addop_t 'cps', 0b1011_0110_0110_0000 - #addop_t 'unpredictable', 0b1011_0110_0110_1000, :msk_0001_0111 - addop_t 'rev', 0b1011_1010_00 << 6, :rd, :rn - addop_t 'rev16', 0b1011_1010_01 << 6, :rd, :rn - addop_t 'revsh', 0b1011_1010_11 << 6, :rd, :rn - addop_t 'bkpt', 0b1011_1110 << 8, :i8 - addop_t 'it', 0b1011_1111 << 8, :itcond, :itmsk - addop_t 'nop', 0b1011_1111_0000_0000 - addop_t 'yield', 0b1011_1111_0000_0001 - addop_t 'wfe', 0b1011_1111_0000_0010 - addop_t 'wfi', 0b1011_1111_0000_0011 - addop_t 'sev', 0b1011_1111_0000_0100 - addop_t 'nop', 0b1011_1111_0000_0000, :i4 + # 0b1011 misc + addop_t 'add', 0b1011_0000_0 << 7, :sp, :i7 + addop_t 'sub', 0b1011_0000_1 << 7, :sp, :i7 + addop_t 'sxth', 0b1011_0010_00 << 6, :rd, :rn + addop_t 'sxtb', 0b1011_0010_01 << 6, :rd, :rn + addop_t 'uxth', 0b1011_0010_10 << 6, :rd, :rn + addop_t 'uxtb', 0b1011_0010_11 << 6, :rd, :rn + addop_t 'cbz', 0b1011_0001 << 8, :rd, :i51 + addop_t 'cbnz', 0b1011_1001 << 8, :rd, :i51 + addop_t 'push', 0b1011_0100 << 8, :rlist + addop_t 'push', 0b1011_0101 << 8, :rlist + addop_t 'pop', 0b1011_1100 << 8, :rlist + addop_t 'pop', 0b1011_1101 << 8, :rlist + #addop_t 'unpredictable', 0b1011_0110_0100_0000, :i4 + addop_t 'setendle', 0b1011_0110_0101_0000 + addop_t 'setendbe', 0b1011_0110_0101_1000 + addop_t 'cps', 0b1011_0110_0110_0000 + #addop_t 'unpredictable', 0b1011_0110_0110_1000, :msk_0001_0111 + addop_t 'rev', 0b1011_1010_00 << 6, :rd, :rn + addop_t 'rev16', 0b1011_1010_01 << 6, :rd, :rn + addop_t 'revsh', 0b1011_1010_11 << 6, :rd, :rn + addop_t 'bkpt', 0b1011_1110 << 8, :i8 + addop_t 'it', 0b1011_1111 << 8, :itcond, :itmsk + addop_t 'nop', 0b1011_1111_0000_0000 + addop_t 'yield', 0b1011_1111_0000_0001 + addop_t 'wfe', 0b1011_1111_0000_0010 + addop_t 'wfi', 0b1011_1111_0000_0011 + addop_t 'sev', 0b1011_1111_0000_0100 + addop_t 'nop', 0b1011_1111_0000_0000, :i4 - addop_t 'stmia', 0b11000 << 11, :rn, :rlist # stmea - addop_t 'ldmia', 0b11001 << 11, :rn, :rlist # ldmfd - addop_t 'undef', 0b1101_1110 << 8, :i8 - addop_t 'svc', 0b1101_1111 << 8, :i8 - addop_t 'b', 0b1101 << 12, :cond, :i8 - addop_t 'b', 0b11100 << 11, :i11 + addop_t 'stmia', 0b11000 << 11, :rn, :rlist # stmea + addop_t 'ldmia', 0b11001 << 11, :rn, :rlist # ldmfd + addop_t 'undef', 0b1101_1110 << 8, :i8 + addop_t 'svc', 0b1101_1111 << 8, :i8 + addop_t 'b', 0b1101 << 12, :cond, :i8 + addop_t 'b', 0b11100 << 11, :i11 - # thumb-32 - end + # thumb-32 + end - def init_arm_v6_thumb2 - init_arm_v6 - init_arm_thumb2 - end - alias init_latest init_arm_v6_thumb2 + def init_arm_v6_thumb2 + init_arm_v6 + init_arm_thumb2 + end + alias init_latest init_arm_v6_thumb2 end end diff --git a/lib/metasm/metasm/cpu/arm/parse.rb b/lib/metasm/metasm/cpu/arm/parse.rb index 2a1bdfe44c..4923ca1cd0 100644 --- a/lib/metasm/metasm/cpu/arm/parse.rb +++ b/lib/metasm/metasm/cpu/arm/parse.rb @@ -9,134 +9,134 @@ require 'metasm/parse' module Metasm class ARM - def opcode_list_byname - @opcode_list_byname ||= opcode_list.inject({}) { |h, o| - (h[o.name] ||= []) << o - if o.props[:cond] - coff = o.props[:cond_name_off] || o.name.length - %w[eq ne cs cc mi pl vs vc hi ls ge lt gt le al].each { |cd| - n = o.name.dup - n[coff, 0] = cd - (h[n] ||= []) << o - } - end - h - } - end + def opcode_list_byname + @opcode_list_byname ||= opcode_list.inject({}) { |h, o| + (h[o.name] ||= []) << o + if o.props[:cond] + coff = o.props[:cond_name_off] || o.name.length + %w[eq ne cs cc mi pl vs vc hi ls ge lt gt le al].each { |cd| + n = o.name.dup + n[coff, 0] = cd + (h[n] ||= []) << o + } + end + h + } + end - def parse_arg_valid?(op, sym, arg) - case sym - when :rd, :rs, :rn, :rm; arg.kind_of?(Reg) and arg.shift == 0 and (arg.updated ? op.props[:baseincr] : !op.props[:baseincr]) - when :rm_rs; arg.kind_of?(Reg) and arg.shift.kind_of?(Reg) - when :rm_is; arg.kind_of?(Reg) and arg.shift.kind_of?(Integer) - when :i12, :i24, :i8_12; arg.kind_of?(Expression) - when :i8_r - if arg.kind_of?(Expression) - b = arg.reduce - !b.kind_of?(Integer) or (0..15).find { - b = ((b << 2) & 0xffff_ffff) | ((b >> 30) & 3) - b < 0x100 } - end - when :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12 - os = case sym - when :mem_rn_rm; :rm - when :mem_rn_i8_12; :i8_12 - when :mem_rn_rms; :rm_rs - when :mem_rn_i12; :i12 - end - arg.kind_of?(Memref) and parse_arg_valid?(op, os, arg.offset) - when :reglist; arg.kind_of?(RegList) - end - # TODO check flags on reglist, check int values - end + def parse_arg_valid?(op, sym, arg) + case sym + when :rd, :rs, :rn, :rm; arg.kind_of?(Reg) and arg.shift == 0 and (arg.updated ? op.props[:baseincr] : !op.props[:baseincr]) + when :rm_rs; arg.kind_of?(Reg) and arg.shift.kind_of?(Reg) + when :rm_is; arg.kind_of?(Reg) and arg.shift.kind_of?(Integer) + when :i12, :i24, :i8_12; arg.kind_of?(Expression) + when :i8_r + if arg.kind_of?(Expression) + b = arg.reduce + !b.kind_of?(Integer) or (0..15).find { + b = ((b << 2) & 0xffff_ffff) | ((b >> 30) & 3) + b < 0x100 } + end + when :mem_rn_rm, :mem_rn_i8_12, :mem_rn_rms, :mem_rn_i12 + os = case sym + when :mem_rn_rm; :rm + when :mem_rn_i8_12; :i8_12 + when :mem_rn_rms; :rm_rs + when :mem_rn_i12; :i12 + end + arg.kind_of?(Memref) and parse_arg_valid?(op, os, arg.offset) + when :reglist; arg.kind_of?(RegList) + end + # TODO check flags on reglist, check int values + end - def parse_argument(lexer) - raise lexer, "unexpected EOS" if not lexer.nexttok - if Reg.s_to_i[lexer.nexttok.raw] - arg = Reg.new Reg.s_to_i[lexer.readtok.raw] - lexer.skip_space - case lexer.nexttok.raw.downcase - when 'lsl', 'lsr', 'asr', 'ror' - arg.stype = lexer.readtok.raw.downcase.to_sym - lexer.skip_space - if Reg.s_to_i[lexer.nexttok.raw] - arg.shift = Reg.new Reg.s_to_i[lexer.readtok.raw] - else - arg.shift = Expression.parse(lexer).reduce - end - when 'rrx' - lexer.readtok - arg.stype = :ror - when '!' - lexer.readtok - arg.updated = true - end if lexer.nexttok - elsif lexer.nexttok.raw == '{' - lexer.readtok - arg = RegList.new - loop do - lexer.skip_space - raise "unterminated reglist" if lexer.eos? - if Reg.s_to_i[lexer.nexttok.raw] - arg.list << Reg.new(Reg.s_to_i[lexer.readtok.raw]) - lexer.skip_space - raise "unterminated reglist" if lexer.eos? - end - case lexer.nexttok.raw - when ','; lexer.readtok - when '-' - lexer.readtok - lexer.skip_space - raise "unterminated reglist" if lexer.eos? - if not r = Reg.s_to_i[lexer.nexttok.raw] - raise lexer, "reglist parse error: invalid range" - end - lexer.readtok - (arg.list.last.i+1..r).each { |v| - arg.list << Reg.new(v) - } - when '}'; lexer.readtok ; break - else raise lexer, "reglist parse error: ',' or '}' expected, got #{lexer.nexttok.raw.inspect}" - end - end - if lexer.nexttok and lexer.nexttok.raw == '^' - lexer.readtok - arg.usermoderegs = true - end - elsif lexer.nexttok.raw == '[' - lexer.readtok - raise "unexpected EOS" if lexer.eos? - if not base = Reg.s_to_i[lexer.nexttok.raw] - raise lexer, 'invalid mem base (reg expected)' - end - base = Reg.new Reg.s_to_i[lexer.readtok.raw] - raise "unexpected EOS" if lexer.eos? - if lexer.nexttok.raw == ']' - lexer.readtok - #closed = true - end - if !lexer.nexttok or lexer.nexttok.raw != ',' - raise lexer, 'mem off expected' - end - lexer.readtok - off = parse_argument(lexer) - if not off.kind_of?(Expression) and not off.kind_of?(Reg) - raise lexer, 'invalid mem off (reg/imm expected)' - end - case lexer.nexttok and lexer.nexttok.raw - when ']' - when ',' - end - lexer.readtok - arg = Memref.new(base, off) - if lexer.nexttok and lexer.nexttok.raw == '!' - lexer.readtok - arg.incr = :pre # TODO :post - end - else - arg = Expression.parse lexer - end - arg - end + def parse_argument(lexer) + raise lexer, "unexpected EOS" if not lexer.nexttok + if Reg.s_to_i[lexer.nexttok.raw] + arg = Reg.new Reg.s_to_i[lexer.readtok.raw] + lexer.skip_space + case lexer.nexttok.raw.downcase + when 'lsl', 'lsr', 'asr', 'ror' + arg.stype = lexer.readtok.raw.downcase.to_sym + lexer.skip_space + if Reg.s_to_i[lexer.nexttok.raw] + arg.shift = Reg.new Reg.s_to_i[lexer.readtok.raw] + else + arg.shift = Expression.parse(lexer).reduce + end + when 'rrx' + lexer.readtok + arg.stype = :ror + when '!' + lexer.readtok + arg.updated = true + end if lexer.nexttok + elsif lexer.nexttok.raw == '{' + lexer.readtok + arg = RegList.new + loop do + lexer.skip_space + raise "unterminated reglist" if lexer.eos? + if Reg.s_to_i[lexer.nexttok.raw] + arg.list << Reg.new(Reg.s_to_i[lexer.readtok.raw]) + lexer.skip_space + raise "unterminated reglist" if lexer.eos? + end + case lexer.nexttok.raw + when ','; lexer.readtok + when '-' + lexer.readtok + lexer.skip_space + raise "unterminated reglist" if lexer.eos? + if not r = Reg.s_to_i[lexer.nexttok.raw] + raise lexer, "reglist parse error: invalid range" + end + lexer.readtok + (arg.list.last.i+1..r).each { |v| + arg.list << Reg.new(v) + } + when '}'; lexer.readtok ; break + else raise lexer, "reglist parse error: ',' or '}' expected, got #{lexer.nexttok.raw.inspect}" + end + end + if lexer.nexttok and lexer.nexttok.raw == '^' + lexer.readtok + arg.usermoderegs = true + end + elsif lexer.nexttok.raw == '[' + lexer.readtok + raise "unexpected EOS" if lexer.eos? + if not base = Reg.s_to_i[lexer.nexttok.raw] + raise lexer, 'invalid mem base (reg expected)' + end + base = Reg.new Reg.s_to_i[lexer.readtok.raw] + raise "unexpected EOS" if lexer.eos? + if lexer.nexttok.raw == ']' + lexer.readtok + #closed = true + end + if !lexer.nexttok or lexer.nexttok.raw != ',' + raise lexer, 'mem off expected' + end + lexer.readtok + off = parse_argument(lexer) + if not off.kind_of?(Expression) and not off.kind_of?(Reg) + raise lexer, 'invalid mem off (reg/imm expected)' + end + case lexer.nexttok and lexer.nexttok.raw + when ']' + when ',' + end + lexer.readtok + arg = Memref.new(base, off) + if lexer.nexttok and lexer.nexttok.raw == '!' + lexer.readtok + arg.incr = :pre # TODO :post + end + else + arg = Expression.parse lexer + end + arg + end end end diff --git a/lib/metasm/metasm/cpu/arm/render.rb b/lib/metasm/metasm/cpu/arm/render.rb index a39e808752..0aecb66a1e 100644 --- a/lib/metasm/metasm/cpu/arm/render.rb +++ b/lib/metasm/metasm/cpu/arm/render.rb @@ -8,48 +8,48 @@ require 'metasm/cpu/arm/opcodes' module Metasm class ARM - class Reg - include Renderable - def render - r = self.class.i_to_s[@i] - r += '!' if updated - if @stype == :lsl and @shift == 0 - [r] - elsif @stype == :ror and @shift == 0 - ["#{r} RRX"] - else - case s = @shift - when Integer; s = Expression[s == 0 ? 32 : s] # lsl and ror already accounted for - when Reg; s = self.class.i_to_s[s.i] - end - ["#{r} #{@stype.to_s.upcase} #{s}"] - end - end - end + class Reg + include Renderable + def render + r = self.class.i_to_s[@i] + r += '!' if updated + if @stype == :lsl and @shift == 0 + [r] + elsif @stype == :ror and @shift == 0 + ["#{r} RRX"] + else + case s = @shift + when Integer; s = Expression[s == 0 ? 32 : s] # lsl and ror already accounted for + when Reg; s = self.class.i_to_s[s.i] + end + ["#{r} #{@stype.to_s.upcase} #{s}"] + end + end + end - class Memref - include Renderable - def render - o = @offset - o = Expression[o] if o.kind_of? Integer - case @incr - when nil; ['[', @base, ', ', o, ']'] - when :pre; ['[', @base, ', ', o, ']!'] - when :post; ['[', @base, '], ', o] - end - end - end + class Memref + include Renderable + def render + o = @offset + o = Expression[o] if o.kind_of? Integer + case @incr + when nil; ['[', @base, ', ', o, ']'] + when :pre; ['[', @base, ', ', o, ']!'] + when :post; ['[', @base, '], ', o] + end + end + end - class RegList - include Renderable - def render - r = ['{'] - @list.each { |l| r << l << ', ' } - r[-1] = '}' - r << '^' if usermoderegs - r - end - end + class RegList + include Renderable + def render + r = ['{'] + @list.each { |l| r << l << ', ' } + r[-1] = '}' + r << '^' if usermoderegs + r + end + end end end diff --git a/lib/metasm/metasm/cpu/bpf/decode.rb b/lib/metasm/metasm/cpu/bpf/decode.rb index 30451389fc..363ea7680c 100644 --- a/lib/metasm/metasm/cpu/bpf/decode.rb +++ b/lib/metasm/metasm/cpu/bpf/decode.rb @@ -9,134 +9,134 @@ require 'metasm/decode' module Metasm class BPF - def build_bin_lookaside - opcode_list.inject({}) { |h, op| h.update op.bin => op } - end + def build_bin_lookaside + opcode_list.inject({}) { |h, op| h.update op.bin => op } + end - # tries to find the opcode encoded at edata.ptr - def decode_findopcode(edata) - return if edata.ptr > edata.data.length-8 - di = DecodedInstruction.new self - code = edata.data[edata.ptr, 2].unpack('v')[0] - return di if di.opcode = @bin_lookaside[code] - end + # tries to find the opcode encoded at edata.ptr + def decode_findopcode(edata) + return if edata.ptr > edata.data.length-8 + di = DecodedInstruction.new self + code = edata.data[edata.ptr, 2].unpack('v')[0] + return di if di.opcode = @bin_lookaside[code] + end - def decode_instr_op(edata, di) - op = di.opcode - di.instruction.opname = op.name - di.bin_length = 8 - code, jt, jf, k = edata.read(8).unpack('vCCV') + def decode_instr_op(edata, di) + op = di.opcode + di.instruction.opname = op.name + di.bin_length = 8 + code, jt, jf, k = edata.read(8).unpack('vCCV') - op.args.each { |a| - di.instruction.args << case a - when :k; Expression[k] - when :x; Reg.new(:x) - when :a; Reg.new(:a) - when :len; Reg.new(:len) - when :p_k; PktRef.new(nil, Expression[k], op.props[:msz]) - when :p_xk; PktRef.new(Reg.new(:x), Expression[k], op.props[:msz]) - when :m_k; MemRef.new(nil, Expression[4*k], 4) - when :jt; Expression[jt] - when :jf; Expression[jf] - else raise "unhandled arg #{a}" - end - } + op.args.each { |a| + di.instruction.args << case a + when :k; Expression[k] + when :x; Reg.new(:x) + when :a; Reg.new(:a) + when :len; Reg.new(:len) + when :p_k; PktRef.new(nil, Expression[k], op.props[:msz]) + when :p_xk; PktRef.new(Reg.new(:x), Expression[k], op.props[:msz]) + when :m_k; MemRef.new(nil, Expression[4*k], 4) + when :jt; Expression[jt] + when :jf; Expression[jf] + else raise "unhandled arg #{a}" + end + } - # je a, x, 0, 12 -> jne a, x, 12 - # je a, x, 12, 0 -> je a, x, 12 - if op.args[2] == :jt and di.instruction.args[2] == Expression[0] - di.opcode = op.dup - di.opcode.props.delete :stopexec - di.instruction.opname = { 'jg' => 'jle', 'jge' => 'jl', 'je' => 'jne', 'jtest' => 'jntest' }[di.instruction.opname] - di.instruction.args.delete_at(2) - elsif op.args[3] == :jf and di.instruction.args[3] == Expression[0] - di.opcode = op.dup - di.opcode.props.delete :stopexec - di.instruction.args.delete_at(3) - end + # je a, x, 0, 12 -> jne a, x, 12 + # je a, x, 12, 0 -> je a, x, 12 + if op.args[2] == :jt and di.instruction.args[2] == Expression[0] + di.opcode = op.dup + di.opcode.props.delete :stopexec + di.instruction.opname = { 'jg' => 'jle', 'jge' => 'jl', 'je' => 'jne', 'jtest' => 'jntest' }[di.instruction.opname] + di.instruction.args.delete_at(2) + elsif op.args[3] == :jf and di.instruction.args[3] == Expression[0] + di.opcode = op.dup + di.opcode.props.delete :stopexec + di.instruction.args.delete_at(3) + end - di - end + di + end - def decode_instr_interpret(di, addr) - if di.opcode.props[:setip] - delta = di.instruction.args[-1].reduce + 1 - arg = Expression[addr, :+, 8*delta].reduce - di.instruction.args[-1] = Expression[arg] + def decode_instr_interpret(di, addr) + if di.opcode.props[:setip] + delta = di.instruction.args[-1].reduce + 1 + arg = Expression[addr, :+, 8*delta].reduce + di.instruction.args[-1] = Expression[arg] - if di.instruction.args.length == 4 - delta = di.instruction.args[2].reduce + 1 - arg = Expression[addr, :+, 8*delta].reduce - di.instruction.args[2] = Expression[arg] - end - end + if di.instruction.args.length == 4 + delta = di.instruction.args[2].reduce + 1 + arg = Expression[addr, :+, 8*delta].reduce + di.instruction.args[2] = Expression[arg] + end + end - di - end + di + end - # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding } - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end - def backtrace_binding=(b) @backtrace_binding = b end + # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding } + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end + def backtrace_binding=(b) @backtrace_binding = b end - # populate the @backtrace_binding hash with default values - def init_backtrace_binding - @backtrace_binding ||= {} + # populate the @backtrace_binding hash with default values + def init_backtrace_binding + @backtrace_binding ||= {} - opcode_list.map { |ol| ol.basename }.uniq.sort.each { |op| - binding = case op - when 'mov'; lambda { |di, a0, a1| { a0 => Expression[a1] } } - when 'add'; lambda { |di, a0, a1| { a0 => Expression[a0, :+, a1] } } - when 'sub'; lambda { |di, a0, a1| { a0 => Expression[a0, :-, a1] } } - when 'mul'; lambda { |di, a0, a1| { a0 => Expression[a0, :*, a1] } } - when 'div'; lambda { |di, a0, a1| { a0 => Expression[a0, :/, a1] } } - when 'shl'; lambda { |di, a0, a1| { a0 => Expression[a0, :<<, a1] } } - when 'shr'; lambda { |di, a0, a1| { a0 => Expression[a0, :>>, a1] } } - when 'neg'; lambda { |di, a0| { a0 => Expression[:-, a0] } } - when 'msh'; lambda { |di, a0, a1| { a0 => Expression[[a1, :&, 0xf], :<<, 2] } } - when 'jmp', 'jg', 'jge', 'je', 'jtest', 'ret'; lambda { |di, *a| { } } - end - @backtrace_binding[op] ||= binding if binding - } + opcode_list.map { |ol| ol.basename }.uniq.sort.each { |op| + binding = case op + when 'mov'; lambda { |di, a0, a1| { a0 => Expression[a1] } } + when 'add'; lambda { |di, a0, a1| { a0 => Expression[a0, :+, a1] } } + when 'sub'; lambda { |di, a0, a1| { a0 => Expression[a0, :-, a1] } } + when 'mul'; lambda { |di, a0, a1| { a0 => Expression[a0, :*, a1] } } + when 'div'; lambda { |di, a0, a1| { a0 => Expression[a0, :/, a1] } } + when 'shl'; lambda { |di, a0, a1| { a0 => Expression[a0, :<<, a1] } } + when 'shr'; lambda { |di, a0, a1| { a0 => Expression[a0, :>>, a1] } } + when 'neg'; lambda { |di, a0| { a0 => Expression[:-, a0] } } + when 'msh'; lambda { |di, a0, a1| { a0 => Expression[[a1, :&, 0xf], :<<, 2] } } + when 'jmp', 'jg', 'jge', 'je', 'jtest', 'ret'; lambda { |di, *a| { } } + end + @backtrace_binding[op] ||= binding if binding + } - @backtrace_binding - end + @backtrace_binding + end - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when PktRef, MemRef, Reg; arg.symbolic(di) - else arg - end - } + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when PktRef, MemRef, Reg; arg.symbolic(di) + else arg + end + } - if binding = backtrace_binding[di.opcode.name] - binding[di, *a] - else - puts "unhandled instruction to backtrace: #{di}" if $VERBOSE - {:incomplete_binding => Expression[1]} - end - end + if binding = backtrace_binding[di.opcode.name] + binding[di, *a] + else + puts "unhandled instruction to backtrace: #{di}" if $VERBOSE + {:incomplete_binding => Expression[1]} + end + end - def get_xrefs_x(dasm, di) - return [] if not di.opcode.props[:setip] + def get_xrefs_x(dasm, di) + return [] if not di.opcode.props[:setip] - if di.instruction.args.length == 4 - di.instruction.args[-2, 2] - else - di.instruction.args[-1, 1] - end - end + if di.instruction.args.length == 4 + di.instruction.args[-2, 2] + else + di.instruction.args[-1, 1] + end + end - # updates an instruction's argument replacing an expression with another (eg label renamed) - def replace_instr_arg_immediate(i, old, new) - i.args.map! { |a| - case a - when Expression; a == old ? new : Expression[a.bind(old => new).reduce] - else a - end - } - end + # updates an instruction's argument replacing an expression with another (eg label renamed) + def replace_instr_arg_immediate(i, old, new) + i.args.map! { |a| + case a + when Expression; a == old ? new : Expression[a.bind(old => new).reduce] + else a + end + } + end end end diff --git a/lib/metasm/metasm/cpu/bpf/main.rb b/lib/metasm/metasm/cpu/bpf/main.rb index b070065cd7..7352ef1b5a 100644 --- a/lib/metasm/metasm/cpu/bpf/main.rb +++ b/lib/metasm/metasm/cpu/bpf/main.rb @@ -8,53 +8,53 @@ require 'metasm/main' module Metasm class BPF < CPU - class Reg - attr_accessor :v - def initialize(v) - @v = v - end + class Reg + attr_accessor :v + def initialize(v) + @v = v + end - def symbolic(orig=nil) ; @v ; end - end + def symbolic(orig=nil) ; @v ; end + end - class MemRef - attr_accessor :base, :offset, :msz + class MemRef + attr_accessor :base, :offset, :msz - def memtype - :mem - end + def memtype + :mem + end - def initialize(base, offset, msz) - @base = base - @offset = offset - @msz = msz - end + def initialize(base, offset, msz) + @base = base + @offset = offset + @msz = msz + end - def symbolic(orig) - p = Expression[memtype] - p = Expression[p, :+, @base.symbolic] if base - p = Expression[p, :+, @offset] if offset - Indirection[p, @msz, orig] - end - end + def symbolic(orig) + p = Expression[memtype] + p = Expression[p, :+, @base.symbolic] if base + p = Expression[p, :+, @offset] if offset + Indirection[p, @msz, orig] + end + end - class PktRef < MemRef - def memtype - :pkt - end - end + class PktRef < MemRef + def memtype + :pkt + end + end - def initialize(family = :latest) - super() - @endianness = :big - @size = 32 - @family = family - end + def initialize(family = :latest) + super() + @endianness = :big + @size = 32 + @family = family + end - def init_opcode_list - send("init_#@family") - @opcode_list - end + def init_opcode_list + send("init_#@family") + @opcode_list + end end end diff --git a/lib/metasm/metasm/cpu/bpf/opcodes.rb b/lib/metasm/metasm/cpu/bpf/opcodes.rb index 35a55eb0b7..f25d265704 100644 --- a/lib/metasm/metasm/cpu/bpf/opcodes.rb +++ b/lib/metasm/metasm/cpu/bpf/opcodes.rb @@ -8,74 +8,74 @@ require 'metasm/cpu/bpf/main' module Metasm class BPF - def addop(name, bin, *args) - o = Opcode.new name, bin - args.each { |a| - o.args << a if @valid_args[a] - o.props.update a if a.kind_of?(::Hash) - } - @opcode_list << o - end + def addop(name, bin, *args) + o = Opcode.new name, bin + args.each { |a| + o.args << a if @valid_args[a] + o.props.update a if a.kind_of?(::Hash) + } + @opcode_list << o + end - def addop_ldx(bin, src) - addop 'mov', bin | 0x00, :a, src - addop 'mov', bin | 0x01, :x, src - end + def addop_ldx(bin, src) + addop 'mov', bin | 0x00, :a, src + addop 'mov', bin | 0x01, :x, src + end - def addop_ldsz(bin, src) - addop 'mov', bin | 0x00, :a, src, :msz => 4 - addop 'mov', bin | 0x08, :a, src, :msz => 2 - addop 'mov', bin | 0x10, :a, src, :msz => 1 - end + def addop_ldsz(bin, src) + addop 'mov', bin | 0x00, :a, src, :msz => 4 + addop 'mov', bin | 0x08, :a, src, :msz => 2 + addop 'mov', bin | 0x10, :a, src, :msz => 1 + end - def addop_alu(name, bin) - addop name, bin | 0x04, :a, :k - addop name, bin | 0x0C, :a, :x - end + def addop_alu(name, bin) + addop name, bin | 0x04, :a, :k + addop name, bin | 0x0C, :a, :x + end - def addop_j(name, bin) - addop name, bin | 0x05 | 0x00, :a, :k, :jt, :jf, :setip => true, :stopexec => true - addop name, bin | 0x05 | 0x08, :a, :x, :jt, :jf, :setip => true, :stopexec => true - end + def addop_j(name, bin) + addop name, bin | 0x05 | 0x00, :a, :k, :jt, :jf, :setip => true, :stopexec => true + addop name, bin | 0x05 | 0x08, :a, :x, :jt, :jf, :setip => true, :stopexec => true + end - def init_bpf - @opcode_list = [] - [:a, :k, :x, :len, :m_k, :p_k, :p_xk, :jt, :jf].each { |a| @valid_args[a] = true } + def init_bpf + @opcode_list = [] + [:a, :k, :x, :len, :m_k, :p_k, :p_xk, :jt, :jf].each { |a| @valid_args[a] = true } - # LD/ST - addop_ldx 0x00, :k - addop_ldsz 0x20, :p_k - addop_ldsz 0x40, :p_xk - addop_ldx 0x60, :m_k - addop_ldx 0x80, :len - addop 'msh', 0xB1, :x, :p_k, :msz => 1 - addop 'mov', 0x02, :m_k, :a - addop 'mov', 0x03, :m_k, :x + # LD/ST + addop_ldx 0x00, :k + addop_ldsz 0x20, :p_k + addop_ldsz 0x40, :p_xk + addop_ldx 0x60, :m_k + addop_ldx 0x80, :len + addop 'msh', 0xB1, :x, :p_k, :msz => 1 + addop 'mov', 0x02, :m_k, :a + addop 'mov', 0x03, :m_k, :x - # ALU - addop_alu 'add', 0x00 - addop_alu 'sub', 0x10 - addop_alu 'mul', 0x20 - addop_alu 'div', 0x30 - addop_alu 'or', 0x40 - addop_alu 'and', 0x50 - addop_alu 'shl', 0x60 - addop_alu 'shr', 0x70 - addop 'neg', 0x84, :a + # ALU + addop_alu 'add', 0x00 + addop_alu 'sub', 0x10 + addop_alu 'mul', 0x20 + addop_alu 'div', 0x30 + addop_alu 'or', 0x40 + addop_alu 'and', 0x50 + addop_alu 'shl', 0x60 + addop_alu 'shr', 0x70 + addop 'neg', 0x84, :a - # JMP - addop 'jmp', 0x05, :k, :setip => true, :stopexec => true - addop_j 'je', 0x10 - addop_j 'jg', 0x20 - addop_j 'jge', 0x30 - addop_j 'jtest',0x40 - addop 'ret', 0x06, :k, :stopexec => true - addop 'ret', 0x16, :a, :stopexec => true + # JMP + addop 'jmp', 0x05, :k, :setip => true, :stopexec => true + addop_j 'je', 0x10 + addop_j 'jg', 0x20 + addop_j 'jge', 0x30 + addop_j 'jtest',0x40 + addop 'ret', 0x06, :k, :stopexec => true + addop 'ret', 0x16, :a, :stopexec => true - addop 'mov', 0x07, :x, :a - addop 'mov', 0x87, :a, :x - end + addop 'mov', 0x07, :x, :a + addop 'mov', 0x87, :a, :x + end - alias init_latest init_bpf + alias init_latest init_bpf end end diff --git a/lib/metasm/metasm/cpu/bpf/render.rb b/lib/metasm/metasm/cpu/bpf/render.rb index 1b4a68a27a..1cef86acf9 100644 --- a/lib/metasm/metasm/cpu/bpf/render.rb +++ b/lib/metasm/metasm/cpu/bpf/render.rb @@ -9,33 +9,33 @@ require 'metasm/render' module Metasm class BPF - class Reg - include Renderable - def render ; [@v.to_s] end - end - class MemRef - include Renderable - def render - r = [] - r << memtype - r << [nil, ' byte ', ' word ', nil, ' dword '][@msz] - r << '[' - r << @base if @base - r << '+' if @base and @offset - r << @offset if @offset - r << ']' - end - end + class Reg + include Renderable + def render ; [@v.to_s] end + end + class MemRef + include Renderable + def render + r = [] + r << memtype + r << [nil, ' byte ', ' word ', nil, ' dword '][@msz] + r << '[' + r << @base if @base + r << '+' if @base and @offset + r << @offset if @offset + r << ']' + end + end - def render_instruction(i) - r = [] - r << i.opname - if not i.args.empty? - r << ' ' - i.args.each { |a_| r << a_ << ', ' } - r.pop - end - r - end + def render_instruction(i) + r = [] + r << i.opname + if not i.args.empty? + r << ' ' + i.args.each { |a_| r << a_ << ', ' } + r.pop + end + r + end end end diff --git a/lib/metasm/metasm/cpu/cy16/decode.rb b/lib/metasm/metasm/cpu/cy16/decode.rb index c3bc9a5812..b9c5cb6d4b 100644 --- a/lib/metasm/metasm/cpu/cy16/decode.rb +++ b/lib/metasm/metasm/cpu/cy16/decode.rb @@ -9,245 +9,245 @@ require 'metasm/decode' module Metasm class CY16 - def build_opcode_bin_mask(op) - # bit = 0 if can be mutated by an field value, 1 if fixed by opcode - op.bin_mask = 0 - op.fields.each { |f, off| - op.bin_mask |= (@fields_mask[f] << off) - } - op.bin_mask ^= 0xffff - end + def build_opcode_bin_mask(op) + # bit = 0 if can be mutated by an field value, 1 if fixed by opcode + op.bin_mask = 0 + op.fields.each { |f, off| + op.bin_mask |= (@fields_mask[f] << off) + } + op.bin_mask ^= 0xffff + end - def build_bin_lookaside - # sets up a hash byte value => list of opcodes that may match - # opcode.bin_mask is built here - lookaside = Array.new(256) { [] } - opcode_list.each { |op| - build_opcode_bin_mask op - b = (op.bin >> 8) & 0xff - msk = (op.bin_mask >> 8) & 0xff - for i in b..(b | (255^msk)) - lookaside[i] << op if i & msk == b & msk - end - } - lookaside - end + def build_bin_lookaside + # sets up a hash byte value => list of opcodes that may match + # opcode.bin_mask is built here + lookaside = Array.new(256) { [] } + opcode_list.each { |op| + build_opcode_bin_mask op + b = (op.bin >> 8) & 0xff + msk = (op.bin_mask >> 8) & 0xff + for i in b..(b | (255^msk)) + lookaside[i] << op if i & msk == b & msk + end + } + lookaside + end - def decode_findopcode(edata) - di = DecodedInstruction.new self - return if edata.ptr+2 > edata.length - bin = edata.decode_imm(:u16, @endianness) - edata.ptr -= 2 - return di if di.opcode = @bin_lookaside[(bin >> 8) & 0xff].find { |op| - bin & op.bin_mask == op.bin & op.bin_mask - } - end + def decode_findopcode(edata) + di = DecodedInstruction.new self + return if edata.ptr+2 > edata.length + bin = edata.decode_imm(:u16, @endianness) + edata.ptr -= 2 + return di if di.opcode = @bin_lookaside[(bin >> 8) & 0xff].find { |op| + bin & op.bin_mask == op.bin & op.bin_mask + } + end - def decode_instr_op_r(val, edata) - bw = ((val & 0b1000) > 0 ? 1 : 2) - case val & 0b11_0000 - when 0b00_0000 - Reg.new(val) - when 0b01_0000 - if val == 0b01_1111 - Expression[edata.decode_imm(:u16, @endianness)] - else - Memref.new(Reg.new(8+(val&7)), nil, bw) - end - when 0b10_0000 - if val & 7 == 7 - Memref.new(nil, edata.decode_imm(:u16, @endianness), bw) - else - Memref.new(Reg.new(8+(val&7)), nil, bw, true) - end - when 0b11_0000 - Memref.new(Reg.new(8+(val&7)), edata.decode_imm(:u16, @endianness), bw) - end + def decode_instr_op_r(val, edata) + bw = ((val & 0b1000) > 0 ? 1 : 2) + case val & 0b11_0000 + when 0b00_0000 + Reg.new(val) + when 0b01_0000 + if val == 0b01_1111 + Expression[edata.decode_imm(:u16, @endianness)] + else + Memref.new(Reg.new(8+(val&7)), nil, bw) + end + when 0b10_0000 + if val & 7 == 7 + Memref.new(nil, edata.decode_imm(:u16, @endianness), bw) + else + Memref.new(Reg.new(8+(val&7)), nil, bw, true) + end + when 0b11_0000 + Memref.new(Reg.new(8+(val&7)), edata.decode_imm(:u16, @endianness), bw) + end - end + end - def decode_instr_op(edata, di) - before_ptr = edata.ptr - op = di.opcode - di.instruction.opname = op.name - bin = edata.decode_imm(:u16, @endianness) + def decode_instr_op(edata, di) + before_ptr = edata.ptr + op = di.opcode + di.instruction.opname = op.name + bin = edata.decode_imm(:u16, @endianness) - field_val = lambda { |f| - if off = op.fields[f] - (bin >> off) & @fields_mask[f] - end - } + field_val = lambda { |f| + if off = op.fields[f] + (bin >> off) & @fields_mask[f] + end + } - op.args.each { |a| - di.instruction.args << case a - when :rs, :rd; decode_instr_op_r(field_val[a], edata) - when :o7; Expression[2*Expression.make_signed(field_val[a], 7)] - when :x7; Expression[field_val[a]] - when :u3; Expression[field_val[a]+1] - else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" - end - } + op.args.each { |a| + di.instruction.args << case a + when :rs, :rd; decode_instr_op_r(field_val[a], edata) + when :o7; Expression[2*Expression.make_signed(field_val[a], 7)] + when :x7; Expression[field_val[a]] + when :u3; Expression[field_val[a]+1] + else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" + end + } - di.instruction.args.reverse! + di.instruction.args.reverse! - di.bin_length += edata.ptr - before_ptr + di.bin_length += edata.ptr - before_ptr - di - rescue InvalidRD - end + di + rescue InvalidRD + end - def decode_instr_interpret(di, addr) - if di.opcode.props[:setip] and di.opcode.args.last == :o7 - delta = di.instruction.args.last.reduce - arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce - di.instruction.args[-1] = Expression[arg] - end + def decode_instr_interpret(di, addr) + if di.opcode.props[:setip] and di.opcode.args.last == :o7 + delta = di.instruction.args.last.reduce + arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce + di.instruction.args[-1] = Expression[arg] + end - di - end + di + end - # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding } - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end - def backtrace_binding=(b) @backtrace_binding = b end + # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding } + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end + def backtrace_binding=(b) @backtrace_binding = b end - # populate the @backtrace_binding hash with default values - def init_backtrace_binding - @backtrace_binding ||= {} + # populate the @backtrace_binding hash with default values + def init_backtrace_binding + @backtrace_binding ||= {} - mask = 0xffff + mask = 0xffff - opcode_list.map { |ol| ol.basename }.uniq.sort.each { |op| - binding = case op - when 'mov'; lambda { |di, a0, a1| { a0 => Expression[a1] } } - when 'add', 'adc', 'sub', 'sbc', 'and', 'xor', 'or', 'addi', 'subi' - lambda { |di, a0, a1| - e_op = { 'add' => :+, 'adc' => :+, 'sub' => :-, 'sbc' => :-, 'and' => :&, - 'xor' => :^, 'or' => :|, 'addi' => :+, 'subi' => :- }[op] - ret = Expression[a0, e_op, a1] - ret = Expression[ret, e_op, :flag_c] if op == 'adc' or op == 'sbb' - # optimises eax ^ eax => 0 - # avoid hiding memory accesses (to not hide possible fault) - ret = Expression[ret.reduce] if not a0.kind_of? Indirection - { a0 => ret } - } - when 'cmp', 'test'; lambda { |di, *a| {} } - when 'not'; lambda { |di, a0| { a0 => Expression[a0, :^, mask] } } - when 'call' - lambda { |di, a0| { :sp => Expression[:sp, :-, 2], - Indirection[:sp, 2, di.address] => Expression[di.next_addr] } - } - when 'ret'; lambda { |di, *a| { :sp => Expression[:sp, :+, 2] } } - # TODO callCC, retCC ... - when /^j/; lambda { |di, *a| {} } - end + opcode_list.map { |ol| ol.basename }.uniq.sort.each { |op| + binding = case op + when 'mov'; lambda { |di, a0, a1| { a0 => Expression[a1] } } + when 'add', 'adc', 'sub', 'sbc', 'and', 'xor', 'or', 'addi', 'subi' + lambda { |di, a0, a1| + e_op = { 'add' => :+, 'adc' => :+, 'sub' => :-, 'sbc' => :-, 'and' => :&, + 'xor' => :^, 'or' => :|, 'addi' => :+, 'subi' => :- }[op] + ret = Expression[a0, e_op, a1] + ret = Expression[ret, e_op, :flag_c] if op == 'adc' or op == 'sbb' + # optimises eax ^ eax => 0 + # avoid hiding memory accesses (to not hide possible fault) + ret = Expression[ret.reduce] if not a0.kind_of? Indirection + { a0 => ret } + } + when 'cmp', 'test'; lambda { |di, *a| {} } + when 'not'; lambda { |di, a0| { a0 => Expression[a0, :^, mask] } } + when 'call' + lambda { |di, a0| { :sp => Expression[:sp, :-, 2], + Indirection[:sp, 2, di.address] => Expression[di.next_addr] } + } + when 'ret'; lambda { |di, *a| { :sp => Expression[:sp, :+, 2] } } + # TODO callCC, retCC ... + when /^j/; lambda { |di, *a| {} } + end - # TODO flags ? + # TODO flags ? - @backtrace_binding[op] ||= binding if binding - } - @backtrace_binding - end + @backtrace_binding[op] ||= binding if binding + } + @backtrace_binding + end - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when Memref, Reg; arg.symbolic(di) - else arg - end - } + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when Memref, Reg; arg.symbolic(di) + else arg + end + } - if binding = backtrace_binding[di.opcode.basename] - bd = {} - di.instruction.args.each { |aa| bd[aa.base.symbolic] = Expression[aa.base.symbolic, :+, aa.sz] if aa.kind_of?(Memref) and aa.autoincr } - bd.update binding[di, *a] - else - puts "unhandled instruction to backtrace: #{di}" if $VERBOSE - # assume nothing except the 1st arg is modified - case a[0] - when Indirection, Symbol; { a[0] => Expression::Unknown } - when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} - else {} - end.update(:incomplete_binding => Expression[1]) - end - end + if binding = backtrace_binding[di.opcode.basename] + bd = {} + di.instruction.args.each { |aa| bd[aa.base.symbolic] = Expression[aa.base.symbolic, :+, aa.sz] if aa.kind_of?(Memref) and aa.autoincr } + bd.update binding[di, *a] + else + puts "unhandled instruction to backtrace: #{di}" if $VERBOSE + # assume nothing except the 1st arg is modified + case a[0] + when Indirection, Symbol; { a[0] => Expression::Unknown } + when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} + else {} + end.update(:incomplete_binding => Expression[1]) + end + end - # patch a forward binding from the backtrace binding - def fix_fwdemu_binding(di, fbd) - case di.opcode.name - when 'call'; fbd[Indirection[[:sp, :-, 2], 2]] = fbd.delete(Indirection[:sp, 2]) - end - fbd - end + # patch a forward binding from the backtrace binding + def fix_fwdemu_binding(di, fbd) + case di.opcode.name + when 'call'; fbd[Indirection[[:sp, :-, 2], 2]] = fbd.delete(Indirection[:sp, 2]) + end + fbd + end - def get_xrefs_x(dasm, di) - return [] if not di.opcode.props[:setip] + def get_xrefs_x(dasm, di) + return [] if not di.opcode.props[:setip] - return [Indirection[:sp, 2, di.address]] if di.opcode.name =~ /^r/ + return [Indirection[:sp, 2, di.address]] if di.opcode.name =~ /^r/ - case tg = di.instruction.args.first - when Memref; [Expression[tg.symbolic(di)]] - when Reg; [Expression[tg.symbolic(di)]] - when Expression, ::Integer; [Expression[tg]] - else - puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG - [] - end - end + case tg = di.instruction.args.first + when Memref; [Expression[tg.symbolic(di)]] + when Reg; [Expression[tg.symbolic(di)]] + when Expression, ::Integer; [Expression[tg]] + else + puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG + [] + end + end - # checks if expr is a valid return expression matching the :saveip instruction - def backtrace_is_function_return(expr, di=nil) - expr = Expression[expr].reduce_rec - expr.kind_of?(Indirection) and expr.len == 2 and expr.target == Expression[:sp] - end + # checks if expr is a valid return expression matching the :saveip instruction + def backtrace_is_function_return(expr, di=nil) + expr = Expression[expr].reduce_rec + expr.kind_of?(Indirection) and expr.len == 2 and expr.target == Expression[:sp] + end - # updates the function backtrace_binding - # if the function is big and no specific register is given, do nothing (the binding will be lazily updated later, on demand) - def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) - b = f.backtrace_binding + # updates the function backtrace_binding + # if the function is big and no specific register is given, do nothing (the binding will be lazily updated later, on demand) + def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) + b = f.backtrace_binding - bt_val = lambda { |r| - next if not retaddrlist - b[r] = Expression::Unknown - bt = [] - retaddrlist.each { |retaddr| - bt |= dasm.backtrace(Expression[r], retaddr, :include_start => true, - :snapshot_addr => faddr, :origin => retaddr) - } - if bt.length != 1 - b[r] = Expression::Unknown - else - b[r] = bt.first - end - } + bt_val = lambda { |r| + next if not retaddrlist + b[r] = Expression::Unknown + bt = [] + retaddrlist.each { |retaddr| + bt |= dasm.backtrace(Expression[r], retaddr, :include_start => true, + :snapshot_addr => faddr, :origin => retaddr) + } + if bt.length != 1 + b[r] = Expression::Unknown + else + b[r] = bt.first + end + } - if not wantregs.empty? - wantregs.each(&bt_val) - else - bt_val[:sp] - end + if not wantregs.empty? + wantregs.each(&bt_val) + else + bt_val[:sp] + end - b - end + b + end - # returns true if the expression is an address on the stack - def backtrace_is_stack_address(expr) - Expression[expr].expr_externals.include?(:sp) - end + # returns true if the expression is an address on the stack + def backtrace_is_stack_address(expr) + Expression[expr].expr_externals.include?(:sp) + end - # updates an instruction's argument replacing an expression with another (eg label renamed) - def replace_instr_arg_immediate(i, old, new) - i.args.map! { |a| - case a - when Expression; a == old ? new : Expression[a.bind(old => new).reduce] - when Memref - a.offset = (a.offset == old ? new : Expression[a.offset.bind(old => new).reduce]) if a.offset - a - else a - end - } - end + # updates an instruction's argument replacing an expression with another (eg label renamed) + def replace_instr_arg_immediate(i, old, new) + i.args.map! { |a| + case a + when Expression; a == old ? new : Expression[a.bind(old => new).reduce] + when Memref + a.offset = (a.offset == old ? new : Expression[a.offset.bind(old => new).reduce]) if a.offset + a + else a + end + } + end end end diff --git a/lib/metasm/metasm/cpu/cy16/main.rb b/lib/metasm/metasm/cpu/cy16/main.rb index 704bda76c5..7ca90ecf8a 100644 --- a/lib/metasm/metasm/cpu/cy16/main.rb +++ b/lib/metasm/metasm/cpu/cy16/main.rb @@ -8,56 +8,56 @@ require 'metasm/main' module Metasm class CY16 < CPU - class Reg - class << self - attr_accessor :s_to_i, :i_to_s - end - @i_to_s = (0..14).inject({}) { |h, i| h.update i => "r#{i}" } - @i_to_s[15] = 'sp' - @s_to_i = @i_to_s.invert + class Reg + class << self + attr_accessor :s_to_i, :i_to_s + end + @i_to_s = (0..14).inject({}) { |h, i| h.update i => "r#{i}" } + @i_to_s[15] = 'sp' + @s_to_i = @i_to_s.invert - attr_accessor :i - def initialize(i) - @i = i - end + attr_accessor :i + def initialize(i) + @i = i + end - def symbolic(orig=nil) ; to_s.to_sym ; end + def symbolic(orig=nil) ; to_s.to_sym ; end - def self.from_str(s) - raise "Bad name #{s.inspect}" if not x = @s_to_i[s] - new(x) - end - end + def self.from_str(s) + raise "Bad name #{s.inspect}" if not x = @s_to_i[s] + new(x) + end + end - class Memref - attr_accessor :base, :offset, :sz, :autoincr - def initialize(base, offset, sz=nil, autoincr=nil) - @base = base - offset = Expression[offset] if offset - @offset = offset - @sz = sz - @autoincr = autoincr - end + class Memref + attr_accessor :base, :offset, :sz, :autoincr + def initialize(base, offset, sz=nil, autoincr=nil) + @base = base + offset = Expression[offset] if offset + @offset = offset + @sz = sz + @autoincr = autoincr + end - def symbolic(orig) - p = nil - p = Expression[p, :+, @base.symbolic] if base - p = Expression[p, :+, @offset] if offset - Indirection[p.reduce, @sz, orig] - end - end + def symbolic(orig) + p = nil + p = Expression[p, :+, @base.symbolic] if base + p = Expression[p, :+, @offset] if offset + Indirection[p.reduce, @sz, orig] + end + end - def initialize(family = :latest) - super() - @endianness = :little - @size = 16 - @family = family - end + def initialize(family = :latest) + super() + @endianness = :little + @size = 16 + @family = family + end - def init_opcode_list - send("init_#@family") - @opcode_list - end + def init_opcode_list + send("init_#@family") + @opcode_list + end end end diff --git a/lib/metasm/metasm/cpu/cy16/opcodes.rb b/lib/metasm/metasm/cpu/cy16/opcodes.rb index 1fb20e4953..230a7ec08b 100644 --- a/lib/metasm/metasm/cpu/cy16/opcodes.rb +++ b/lib/metasm/metasm/cpu/cy16/opcodes.rb @@ -8,71 +8,71 @@ require 'metasm/cpu/cy16/main' module Metasm class CY16 - def addop(name, bin, *args) - o = Opcode.new name, bin - args.each { |a| - o.args << a if @fields_mask[a] or @valid_args[a] - o.props[a] = true if @valid_props[a] - o.fields[a] = @fields_shift[a] if @fields_mask[a] - raise "wtf #{a.inspect}" unless @valid_args[a] or @valid_props[a] or @fields_mask[a] - } - @opcode_list << o - end + def addop(name, bin, *args) + o = Opcode.new name, bin + args.each { |a| + o.args << a if @fields_mask[a] or @valid_args[a] + o.props[a] = true if @valid_props[a] + o.fields[a] = @fields_shift[a] if @fields_mask[a] + raise "wtf #{a.inspect}" unless @valid_args[a] or @valid_props[a] or @fields_mask[a] + } + @opcode_list << o + end - def addop_macrocc(name, bin, *args) - %w[z nz b ae s ns o no a be g ge l le].each_with_index { |cc, i| - dbin = bin - dbin |= i << 8 - addop name + cc, dbin, *args - } - end + def addop_macrocc(name, bin, *args) + %w[z nz b ae s ns o no a be g ge l le].each_with_index { |cc, i| + dbin = bin + dbin |= i << 8 + addop name + cc, dbin, *args + } + end - def init_cy16 - @opcode_list = [] - @valid_args.update [:rs, :rd, :o7 - ].inject({}) { |h, v| h.update v => true } - @fields_mask.update :rs => 0x3f, :rd => 0x3f, :o7 => 0x7f, :x7 => 0x7f, :u3 => 7 - @fields_shift.update :rs => 6, :rd => 0, :o7 => 0, :x7 => 0, :u3 => 6 + def init_cy16 + @opcode_list = [] + @valid_args.update [:rs, :rd, :o7 + ].inject({}) { |h, v| h.update v => true } + @fields_mask.update :rs => 0x3f, :rd => 0x3f, :o7 => 0x7f, :x7 => 0x7f, :u3 => 7 + @fields_shift.update :rs => 6, :rd => 0, :o7 => 0, :x7 => 0, :u3 => 6 - addop 'mov', 0<<12, :rs, :rd - addop 'add', 1<<12, :rs, :rd - addop 'adc', 2<<12, :rs, :rd - addop 'addc',2<<12, :rs, :rd - addop 'sub', 3<<12, :rs, :rd - addop 'sbb', 4<<12, :rs, :rd - addop 'subb',4<<12, :rs, :rd - addop 'cmp', 5<<12, :rs, :rd - addop 'and', 6<<12, :rs, :rd - addop 'test',7<<12, :rs, :rd - addop 'or', 8<<12, :rs, :rd - addop 'xor', 9<<12, :rs, :rd + addop 'mov', 0<<12, :rs, :rd + addop 'add', 1<<12, :rs, :rd + addop 'adc', 2<<12, :rs, :rd + addop 'addc',2<<12, :rs, :rd + addop 'sub', 3<<12, :rs, :rd + addop 'sbb', 4<<12, :rs, :rd + addop 'subb',4<<12, :rs, :rd + addop 'cmp', 5<<12, :rs, :rd + addop 'and', 6<<12, :rs, :rd + addop 'test',7<<12, :rs, :rd + addop 'or', 8<<12, :rs, :rd + addop 'xor', 9<<12, :rs, :rd - addop_macrocc 'int', (10<<12), :x7 - addop 'int', (10<<12) | (15<<8), :x7 - addop_macrocc 'c', (10<<12) | (1<<7), :setip, :saveip, :rd - addop 'call',(10<<12) | (15<<8) | (1<<7), :setip, :stopexec, :saveip, :rd - addop_macrocc 'r', (12<<12) | (1<<7) | 0b010111, :setip # must come before absolute jmp - addop 'ret', (12<<12) | (15<<8) | (1<<7) | 0b010111, :setip, :stopexec - addop_macrocc 'j', (12<<12), :setip, :o7 # relative - addop 'jmp', (12<<12) | (15<<8), :setip, :stopexec, :o7 # relative - addop_macrocc 'j', (12<<12) | (1<<7), :setip, :rd # absolute - addop 'jmp', (12<<12) | (15<<8) | (1<<7), :setip, :stopexec, :rd # absolute + addop_macrocc 'int', (10<<12), :x7 + addop 'int', (10<<12) | (15<<8), :x7 + addop_macrocc 'c', (10<<12) | (1<<7), :setip, :saveip, :rd + addop 'call',(10<<12) | (15<<8) | (1<<7), :setip, :stopexec, :saveip, :rd + addop_macrocc 'r', (12<<12) | (1<<7) | 0b010111, :setip # must come before absolute jmp + addop 'ret', (12<<12) | (15<<8) | (1<<7) | 0b010111, :setip, :stopexec + addop_macrocc 'j', (12<<12), :setip, :o7 # relative + addop 'jmp', (12<<12) | (15<<8), :setip, :stopexec, :o7 # relative + addop_macrocc 'j', (12<<12) | (1<<7), :setip, :rd # absolute + addop 'jmp', (12<<12) | (15<<8) | (1<<7), :setip, :stopexec, :rd # absolute - addop 'shr', (13<<12) | (0<<9), :u3, :rd - addop 'shl', (13<<12) | (1<<9), :u3, :rd - addop 'ror', (13<<12) | (2<<9), :u3, :rd - addop 'rol', (13<<12) | (3<<9), :u3, :rd - addop 'addi',(13<<12) | (4<<9), :u3, :rd - addop 'subi',(13<<12) | (5<<9), :u3, :rd - addop 'not', (13<<12) | (7<<9) | (0<<6), :rd - addop 'neg', (13<<12) | (7<<9) | (1<<6), :rd - addop 'cbw', (13<<12) | (7<<9) | (4<<6), :rd - addop 'sti', (13<<12) | (7<<9) | (7<<6) | 0 - addop 'cli', (13<<12) | (7<<9) | (7<<6) | 1 - addop 'stc', (13<<12) | (7<<9) | (7<<6) | 2 - addop 'clc', (13<<12) | (7<<9) | (7<<6) | 3 - end + addop 'shr', (13<<12) | (0<<9), :u3, :rd + addop 'shl', (13<<12) | (1<<9), :u3, :rd + addop 'ror', (13<<12) | (2<<9), :u3, :rd + addop 'rol', (13<<12) | (3<<9), :u3, :rd + addop 'addi',(13<<12) | (4<<9), :u3, :rd + addop 'subi',(13<<12) | (5<<9), :u3, :rd + addop 'not', (13<<12) | (7<<9) | (0<<6), :rd + addop 'neg', (13<<12) | (7<<9) | (1<<6), :rd + addop 'cbw', (13<<12) | (7<<9) | (4<<6), :rd + addop 'sti', (13<<12) | (7<<9) | (7<<6) | 0 + addop 'cli', (13<<12) | (7<<9) | (7<<6) | 1 + addop 'stc', (13<<12) | (7<<9) | (7<<6) | 2 + addop 'clc', (13<<12) | (7<<9) | (7<<6) | 3 + end - alias init_latest init_cy16 + alias init_latest init_cy16 end end diff --git a/lib/metasm/metasm/cpu/cy16/render.rb b/lib/metasm/metasm/cpu/cy16/render.rb index 2c2a680435..506fc6f8b2 100644 --- a/lib/metasm/metasm/cpu/cy16/render.rb +++ b/lib/metasm/metasm/cpu/cy16/render.rb @@ -9,33 +9,33 @@ require 'metasm/render' module Metasm class CY16 - class Reg - include Renderable - def render ; [self.class.i_to_s[@i]] end - end - class Memref - include Renderable - def render - r = [] - r << (@sz == 1 ? 'byte ptr ' : 'word ptr ') - r << '[' - r << @base if @base - r << '++' if @autoincr - r << ' + ' if @base and @offset - r << @offset if @offset - r << ']' - end - end + class Reg + include Renderable + def render ; [self.class.i_to_s[@i]] end + end + class Memref + include Renderable + def render + r = [] + r << (@sz == 1 ? 'byte ptr ' : 'word ptr ') + r << '[' + r << @base if @base + r << '++' if @autoincr + r << ' + ' if @base and @offset + r << @offset if @offset + r << ']' + end + end - def render_instruction(i) - r = [] - r << i.opname - if not i.args.empty? - r << ' ' - i.args.each { |a_| r << a_ << ', ' } - r.pop - end - r - end + def render_instruction(i) + r = [] + r << i.opname + if not i.args.empty? + r << ' ' + i.args.each { |a_| r << a_ << ', ' } + r.pop + end + r + end end end diff --git a/lib/metasm/metasm/cpu/dalvik/decode.rb b/lib/metasm/metasm/cpu/dalvik/decode.rb index a52257b797..14616347e2 100644 --- a/lib/metasm/metasm/cpu/dalvik/decode.rb +++ b/lib/metasm/metasm/cpu/dalvik/decode.rb @@ -8,211 +8,211 @@ require 'metasm/decode' module Metasm class Dalvik - def build_bin_lookaside - end + def build_bin_lookaside + end - def decode_findopcode(edata) - return if edata.ptr+2 > edata.length - di = DecodedInstruction.new(self) - di.opcode = opcode_list[edata.decode_imm(:u16, @endianness) & 0xff] - edata.ptr -= 2 - di - end + def decode_findopcode(edata) + return if edata.ptr+2 > edata.length + di = DecodedInstruction.new(self) + di.opcode = opcode_list[edata.decode_imm(:u16, @endianness) & 0xff] + edata.ptr -= 2 + di + end - def decode_instr_op(edata, di) - op = di.opcode - di.instruction.opname = op.name + def decode_instr_op(edata, di) + op = di.opcode + di.instruction.opname = op.name - val = [edata.decode_imm(:u16, @endianness)] + val = [edata.decode_imm(:u16, @endianness)] - op.args.each { |a| - di.instruction.args << case a - when :i16 - val << edata.decode_imm(:i16, @endianness) - Expression[val.last] - when :u16 - val << edata.decode_imm(:u16, @endianness) - Expression[val.last] - when :r16 - val << edata.decode_imm(:u16, @endianness) - Reg.new(val.last) - when :i16_32hi - val << edata.decode_imm(:i16, @endianness) - Expression[val.last << 16] - when :i16_64hi - val << edata.decode_imm(:i16, @endianness) - Expression[val.last << 48] - when :i32 - val << edata.decode_imm(:u16, @endianness) - val << edata.decode_imm(:i16, @endianness) - Expression[val[-2] | (val[-1] << 16)] - when :u32 - val << edata.decode_imm(:u16, @endianness) - val << edata.decode_imm(:u16, @endianness) - Expression[val[-2] | (val[-1] << 16)] - when :u64 - val << edata.decode_imm(:u16, @endianness) - val << edata.decode_imm(:u16, @endianness) - val << edata.decode_imm(:u16, @endianness) - val << edata.decode_imm(:u16, @endianness) - Expression[val[-4] | (val[-3] << 16) | (val[-2] << 32) | (val[-1] << 48)] - when :ra - Reg.new((val[0] >> 8) & 0xf) - when :rb - Reg.new((val[0] >> 12) & 0xf) - when :ib - Expression[Expression.make_signed((val[0] >> 12) & 0xf, 4)] - when :raa - Reg.new((val[0] >> 8) & 0xff) - when :iaa - Expression[Expression.make_signed((val[0] >> 8) & 0xff, 8)] - when :rbb - val[1] ||= edata.decode_imm(:u16, @endianness) - Reg.new(val[1] & 0xff) - when :ibb - val[1] ||= edata.decode_imm(:u16, @endianness) - Expression[Expression.make_signed(val[1] & 0xff, 8)] - when :rcc - val[1] ||= edata.decode_imm(:u16, @endianness) - Reg.new((val[1] >> 8) & 0xff) - when :icc - val[1] ||= edata.decode_imm(:u16, @endianness) - Expression[Expression.make_signed((val[1] >> 8) & 0xff, 8)] - when :rlist4, :rlist5 - cnt = (val[0] >> 12) & 0xf - val << edata.decode_imm(:u16, @endianness) - [cnt, 4].min.times { - di.instruction.args << Reg.new(val[-1] & 0xf) - val[-1] >>= 4 - } - di.instruction.args << Reg.new((val[0] >> 8) & 0xf) if cnt > 4 - next - when :rlist16 - cnt = (val[0] >> 8) & 0xff - val << edata.decode_imm(:u16, @endianness) - cnt.times { |c| - di.instruction.args << Reg.new(val[-1] + c) - } - next - when :m16 - val << edata.decode_imm(:u16, @endianness) - DexMethod.new(@dex, val.last) - when :fld16 - val << edata.decode_imm(:u16, @endianness) - DexField.new(@dex, val.last) - when :typ16 - val << edata.decode_imm(:u16, @endianness) - DexType.new(@dex, val.last) - when :str16 - val << edata.decode_imm(:u16, @endianness) - DexString.new(@dex, val.last) - else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" - end - } + op.args.each { |a| + di.instruction.args << case a + when :i16 + val << edata.decode_imm(:i16, @endianness) + Expression[val.last] + when :u16 + val << edata.decode_imm(:u16, @endianness) + Expression[val.last] + when :r16 + val << edata.decode_imm(:u16, @endianness) + Reg.new(val.last) + when :i16_32hi + val << edata.decode_imm(:i16, @endianness) + Expression[val.last << 16] + when :i16_64hi + val << edata.decode_imm(:i16, @endianness) + Expression[val.last << 48] + when :i32 + val << edata.decode_imm(:u16, @endianness) + val << edata.decode_imm(:i16, @endianness) + Expression[val[-2] | (val[-1] << 16)] + when :u32 + val << edata.decode_imm(:u16, @endianness) + val << edata.decode_imm(:u16, @endianness) + Expression[val[-2] | (val[-1] << 16)] + when :u64 + val << edata.decode_imm(:u16, @endianness) + val << edata.decode_imm(:u16, @endianness) + val << edata.decode_imm(:u16, @endianness) + val << edata.decode_imm(:u16, @endianness) + Expression[val[-4] | (val[-3] << 16) | (val[-2] << 32) | (val[-1] << 48)] + when :ra + Reg.new((val[0] >> 8) & 0xf) + when :rb + Reg.new((val[0] >> 12) & 0xf) + when :ib + Expression[Expression.make_signed((val[0] >> 12) & 0xf, 4)] + when :raa + Reg.new((val[0] >> 8) & 0xff) + when :iaa + Expression[Expression.make_signed((val[0] >> 8) & 0xff, 8)] + when :rbb + val[1] ||= edata.decode_imm(:u16, @endianness) + Reg.new(val[1] & 0xff) + when :ibb + val[1] ||= edata.decode_imm(:u16, @endianness) + Expression[Expression.make_signed(val[1] & 0xff, 8)] + when :rcc + val[1] ||= edata.decode_imm(:u16, @endianness) + Reg.new((val[1] >> 8) & 0xff) + when :icc + val[1] ||= edata.decode_imm(:u16, @endianness) + Expression[Expression.make_signed((val[1] >> 8) & 0xff, 8)] + when :rlist4, :rlist5 + cnt = (val[0] >> 12) & 0xf + val << edata.decode_imm(:u16, @endianness) + [cnt, 4].min.times { + di.instruction.args << Reg.new(val[-1] & 0xf) + val[-1] >>= 4 + } + di.instruction.args << Reg.new((val[0] >> 8) & 0xf) if cnt > 4 + next + when :rlist16 + cnt = (val[0] >> 8) & 0xff + val << edata.decode_imm(:u16, @endianness) + cnt.times { |c| + di.instruction.args << Reg.new(val[-1] + c) + } + next + when :m16 + val << edata.decode_imm(:u16, @endianness) + DexMethod.new(@dex, val.last) + when :fld16 + val << edata.decode_imm(:u16, @endianness) + DexField.new(@dex, val.last) + when :typ16 + val << edata.decode_imm(:u16, @endianness) + DexType.new(@dex, val.last) + when :str16 + val << edata.decode_imm(:u16, @endianness) + DexString.new(@dex, val.last) + else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" + end + } - di.bin_length = val.length*2 + di.bin_length = val.length*2 - return if edata.ptr > edata.length + return if edata.ptr > edata.length - di - end + di + end - def decode_instr_interpret(di, addr) - if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.instruction.opname =~ /^if|^goto/ - arg = Expression[addr, :+, [di.instruction.args.last, :*, 2]].reduce - di.instruction.args[-1] = Expression[arg] - end + def decode_instr_interpret(di, addr) + if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.instruction.opname =~ /^if|^goto/ + arg = Expression[addr, :+, [di.instruction.args.last, :*, 2]].reduce + di.instruction.args[-1] = Expression[arg] + end - di - end + di + end - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end - def init_backtrace_binding - @backtrace_binding ||= {} - sz = @size/8 - @opcode_list.each { |op| - case op.name - when /invoke/ - @backtrace_binding[op.name] = lambda { |di, *args| { - :callstack => Expression[:callstack, :-, sz], - Indirection[:callstack, sz] => Expression[di.next_addr] - } } - when /return/ - @backtrace_binding[op.name] = lambda { |di, *args| { - :callstack => Expression[:callstack, :+, sz] - } } - end - } - @backtrace_binding - end + def init_backtrace_binding + @backtrace_binding ||= {} + sz = @size/8 + @opcode_list.each { |op| + case op.name + when /invoke/ + @backtrace_binding[op.name] = lambda { |di, *args| { + :callstack => Expression[:callstack, :-, sz], + Indirection[:callstack, sz] => Expression[di.next_addr] + } } + when /return/ + @backtrace_binding[op.name] = lambda { |di, *args| { + :callstack => Expression[:callstack, :+, sz] + } } + end + } + @backtrace_binding + end - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when Reg; arg.symbolic - else arg - end - } + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when Reg; arg.symbolic + else arg + end + } - if binding = backtrace_binding[di.opcode.name] - binding[di, *a] - else - puts "unhandled instruction to backtrace: #{di}" if $VERBOSE - # assume nothing except the 1st arg is modified - case a[0] - when Indirection, Symbol; { a[0] => Expression::Unknown } - when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} - else {} - end.update(:incomplete_binding => Expression[1]) - end + if binding = backtrace_binding[di.opcode.name] + binding[di, *a] + else + puts "unhandled instruction to backtrace: #{di}" if $VERBOSE + # assume nothing except the 1st arg is modified + case a[0] + when Indirection, Symbol; { a[0] => Expression::Unknown } + when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} + else {} + end.update(:incomplete_binding => Expression[1]) + end - end + end - def get_xrefs_x(dasm, di) - if di.opcode.props[:saveip] - m = di.instruction.args.first - if m.kind_of?(DexMethod) and m.off - [m.off] - else - [:default] - end - elsif di.opcode.props[:setip] - if di.opcode.name =~ /^return/ - [Indirection[:callstack, @size/8]] - elsif di.opcode.name =~ /^if|^goto/ - [di.instruction.args.last] - else - [] # [di.instruction.args.last] - end - else - [] - end - end + def get_xrefs_x(dasm, di) + if di.opcode.props[:saveip] + m = di.instruction.args.first + if m.kind_of?(DexMethod) and m.off + [m.off] + else + [:default] + end + elsif di.opcode.props[:setip] + if di.opcode.name =~ /^return/ + [Indirection[:callstack, @size/8]] + elsif di.opcode.name =~ /^if|^goto/ + [di.instruction.args.last] + else + [] # [di.instruction.args.last] + end + else + [] + end + end - # returns a DecodedFunction suitable for :default - # uses disassembler_default_bt{for/bind}_callback - def disassembler_default_func - df = DecodedFunction.new - ra = Indirection[:callstack, @size/8] - df.backtracked_for << BacktraceTrace.new(ra, :default, ra, :x, nil) - df.backtrace_binding[:callstack] = Expression[:callstack, :+, @size/8] - df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr| - if funcaddr != :default - btfor - elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip] - btfor - else [] - end - } + # returns a DecodedFunction suitable for :default + # uses disassembler_default_bt{for/bind}_callback + def disassembler_default_func + df = DecodedFunction.new + ra = Indirection[:callstack, @size/8] + df.backtracked_for << BacktraceTrace.new(ra, :default, ra, :x, nil) + df.backtrace_binding[:callstack] = Expression[:callstack, :+, @size/8] + df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr| + if funcaddr != :default + btfor + elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip] + btfor + else [] + end + } - df - end + df + end - def backtrace_is_function_return(expr, di=nil) - expr and Expression[expr] == Expression[Indirection[:callstack, @size/8]] - end + def backtrace_is_function_return(expr, di=nil) + expr and Expression[expr] == Expression[Indirection[:callstack, @size/8]] + end end end diff --git a/lib/metasm/metasm/cpu/dalvik/main.rb b/lib/metasm/metasm/cpu/dalvik/main.rb index 264192cfb4..92665907fe 100644 --- a/lib/metasm/metasm/cpu/dalvik/main.rb +++ b/lib/metasm/metasm/cpu/dalvik/main.rb @@ -8,102 +8,102 @@ require 'metasm/main' module Metasm class Dalvik < CPU - class Reg - attr_accessor :i - def initialize(i) - @i = i - end + class Reg + attr_accessor :i + def initialize(i) + @i = i + end - def symbolic - "r#@i".to_sym - end + def symbolic + "r#@i".to_sym + end - def to_s - "r#@i" - end - end + def to_s + "r#@i" + end + end - class DexMethod - attr_accessor :dex, :midx, :off - def initialize(dex, midx) - @dex = dex - @midx = midx - if @dex and m = @dex.methods[midx] and c = @dex.classes[m.classidx] and c.data and - me = (c.data.direct_methods+c.data.virtual_methods).find { |mm| mm.methodid == midx } - # FIXME this doesnt work - @off = me.codeoff + me.code.insns_off - end - end + class DexMethod + attr_accessor :dex, :midx, :off + def initialize(dex, midx) + @dex = dex + @midx = midx + if @dex and m = @dex.methods[midx] and c = @dex.classes[m.classidx] and c.data and + me = (c.data.direct_methods+c.data.virtual_methods).find { |mm| mm.methodid == midx } + # FIXME this doesnt work + @off = me.codeoff + me.code.insns_off + end + end - def to_s - if @dex and m = @dex.methods[@midx] - @dex.types[m.classidx] + '->' + @dex.strings[m.nameidx] - #dex.encoded.inv_export[@off] - else - "method_#@midx" - end - end - end + def to_s + if @dex and m = @dex.methods[@midx] + @dex.types[m.classidx] + '->' + @dex.strings[m.nameidx] + #dex.encoded.inv_export[@off] + else + "method_#@midx" + end + end + end - class DexField - attr_accessor :dex, :fidx - def initialize(dex, fidx) - @dex = dex - @fidx = fidx - end + class DexField + attr_accessor :dex, :fidx + def initialize(dex, fidx) + @dex = dex + @fidx = fidx + end - def to_s - if @dex and f = @dex.fields[@fidx] - @dex.types[f.classidx] + '->' + @dex.strings[f.nameidx] - else - "field_#@fidx" - end - end - end + def to_s + if @dex and f = @dex.fields[@fidx] + @dex.types[f.classidx] + '->' + @dex.strings[f.nameidx] + else + "field_#@fidx" + end + end + end - class DexType - attr_accessor :dex, :tidx - def initialize(dex, tidx) - @dex = dex - @tidx = tidx - end + class DexType + attr_accessor :dex, :tidx + def initialize(dex, tidx) + @dex = dex + @tidx = tidx + end - def to_s - if @dex and f = @dex.types[@tidx] - f - else - "type_#@tidx" - end - end - end + def to_s + if @dex and f = @dex.types[@tidx] + f + else + "type_#@tidx" + end + end + end - class DexString - attr_accessor :dex, :sidx - def initialize(dex, sidx) - @dex = dex - @sidx = sidx - end + class DexString + attr_accessor :dex, :sidx + def initialize(dex, sidx) + @dex = dex + @sidx = sidx + end - def to_s - if @dex and f = @dex.strings[@sidx] - f.inspect - else - "string_#@sidx" - end - end - end + def to_s + if @dex and f = @dex.strings[@sidx] + f.inspect + else + "string_#@sidx" + end + end + end - def initialize(*args) - super() - @size = args.grep(Integer).first || 32 - @dex = args.grep(ExeFormat).first - @endianness = args.delete(:little) || args.delete(:big) || (@dex ? @dex.endianness : :little) - end + def initialize(*args) + super() + @size = args.grep(Integer).first || 32 + @dex = args.grep(ExeFormat).first + @endianness = args.delete(:little) || args.delete(:big) || (@dex ? @dex.endianness : :little) + end - def init_opcode_list - init_latest - @opcode_list - end + def init_opcode_list + init_latest + @opcode_list + end end end diff --git a/lib/metasm/metasm/cpu/dalvik/opcodes.rb b/lib/metasm/metasm/cpu/dalvik/opcodes.rb index bef8d243bd..9faf467767 100644 --- a/lib/metasm/metasm/cpu/dalvik/opcodes.rb +++ b/lib/metasm/metasm/cpu/dalvik/opcodes.rb @@ -15,7 +15,7 @@ require 'metasm/cpu/dalvik/main' module Metasm class Dalvik - OPCODES = %w[nop move move_from16 move_16 move_wide move_wide_from16 + OPCODES = %w[nop move move_from16 move_16 move_wide move_wide_from16 move_wide_16 move_object move_object_from16 move_object_16 move_result move_result_wide move_result_object move_exception return_void return return_wide return_object @@ -60,315 +60,315 @@ iget_quick iget_wide_quick iget_object_quick iput_quick iput_wide_quick iput_obj invoke_virtual_quick invoke_virtual_quick_range invoke_super_quick invoke_super_quick_range unused_fc unused_fd unused_fe unused_ff] - def init_dalvik - @valid_props[:canthrow] = true - [:i16, :i16_32hi, :i16_64hi, :i32, :iaa, :ib, :icc, :u16, :u32, :u64, - :r16, :ra, :raa, :rb, :rbb, :rcc, :rlist16, :rlist4, :rlist5, - :m16, :fld16, :typ16, :str16 - ].each { |a| @valid_args[a] = true } - @opcode_list = [] + def init_dalvik + @valid_props[:canthrow] = true + [:i16, :i16_32hi, :i16_64hi, :i32, :iaa, :ib, :icc, :u16, :u32, :u64, + :r16, :ra, :raa, :rb, :rbb, :rcc, :rlist16, :rlist4, :rlist5, + :m16, :fld16, :typ16, :str16 + ].each { |a| @valid_args[a] = true } + @opcode_list = [] - OPCODES.each_with_index { |n, b| - op = Opcode.new(n, b) - addop_args(op) - addop_props(op) - @opcode_list << op - } + OPCODES.each_with_index { |n, b| + op = Opcode.new(n, b) + addop_args(op) + addop_props(op) + @opcode_list << op + } - raise "Internal error #{@opcode_list.length}" if @opcode_list.length != 256 - end - alias init_latest init_dalvik + raise "Internal error #{@opcode_list.length}" if @opcode_list.length != 256 + end + alias init_latest init_dalvik - def addop_args(op) - fmt = case op.name - when 'goto' - :fmt10t - when 'nop', 'return_void' - :fmt10x - when 'const_4' - :fmt11n - when 'const_high16' - :fmt21h - when 'const_wide_high16' - :fmt21hh - when 'move_result', 'move_result_wide', 'move_result_object', - 'move_exception', 'return', 'return_wide', - 'return_object', 'monitor_enter', 'monitor_exit', - 'throw' - :fmt11x - when 'move', 'move_wide', 'move_object', 'array_length', - 'neg_int', 'not_int', 'neg_long', 'not_long', - 'neg_float', 'neg_double', 'int_to_long', - 'int_to_float', 'int_to_double', 'long_to_int', - 'long_to_float', 'long_to_double', 'float_to_int', - 'float_to_long', 'float_to_double', 'double_to_int', - 'double_to_long', 'double_to_float', 'int_to_byte', - 'int_to_char', 'int_to_short', 'add_int_2addr', - 'sub_int_2addr', 'mul_int_2addr', 'div_int_2addr', - 'rem_int_2addr', 'and_int_2addr', 'or_int_2addr', - 'xor_int_2addr', 'shl_int_2addr', 'shr_int_2addr', - 'ushr_int_2addr', 'add_long_2addr', 'sub_long_2addr', - 'mul_long_2addr', 'div_long_2addr', 'rem_long_2addr', - 'and_long_2addr', 'or_long_2addr', 'xor_long_2addr', - 'shl_long_2addr', 'shr_long_2addr', 'ushr_long_2addr', - 'add_float_2addr', 'sub_float_2addr', 'mul_float_2addr', - 'div_float_2addr', 'rem_float_2addr', - 'add_double_2addr', 'sub_double_2addr', - 'mul_double_2addr', 'div_double_2addr', - 'rem_double_2addr' - :fmt12x - when 'goto_16' - :fmt20t - when 'goto_32' - :fmt30t - when 'const_string' - :fmt21c_str - when 'const_class', 'check_cast', - 'new_instance' - :fmt21c_typ - when 'sget', 'sget_wide', 'sget_object', - 'sget_boolean', 'sget_byte', 'sget_char', 'sget_short', - 'sput', 'sput_wide', 'sput_object', 'sput_boolean', - 'sput_byte', 'sput_char', 'sput_short' - :fmt21c_fld - when 'const_16', 'const_wide_16' - :fmt21s - when 'if_eqz', 'if_nez', 'if_ltz', 'if_gez', 'if_gtz', 'if_lez' - :fmt21t - when 'fill_array_data', 'packed_switch', 'sparse_switch' - :fmt31t - when 'add_int_lit8', 'rsub_int_lit8', 'mul_int_lit8', - 'div_int_lit8', 'rem_int_lit8', 'and_int_lit8', - 'or_int_lit8', 'xor_int_lit8', 'shl_int_lit8', - 'shr_int_lit8', 'ushr_int_lit8' - :fmt22b - when 'instance_of', 'new_array', 'iget', 'iget_wide', - 'iget_object', 'iget_boolean', 'iget_byte', - 'iget_char', 'iget_short', 'iput', 'iput_wide', - 'iput_object', 'iput_boolean', 'iput_byte', - 'iput_char', 'iput_short' - :fmt22c - when 'add_int_lit16', 'rsub_int', 'mul_int_lit16', - 'div_int_lit16', 'rem_int_lit16', 'and_int_lit16', - 'or_int_lit16', 'xor_int_lit16' - :fmt22s - when 'if_eq', 'if_ne', 'if_lt', 'if_ge', 'if_gt', 'if_le' - :fmt22t - when 'move_from16', 'move_wide_from16', 'move_object_from16' - :fmt22x - when 'cmpl_float', 'cmpg_float', 'cmpl_double', 'cmpg_double', - 'cmp_long', 'aget', 'aget_wide', 'aget_object', - 'aget_boolean', 'aget_byte', 'aget_char', 'aget_short', - 'aput', 'aput_wide', 'aput_object', 'aput_boolean', - 'aput_byte', 'aput_char', 'aput_short', 'add_int', - 'sub_int', 'mul_int', 'div_int', 'rem_int', 'and_int', - 'or_int', 'xor_int', 'shl_int', 'shr_int', 'ushr_int', - 'add_long', 'sub_long', 'mul_long', 'div_long', - 'rem_long', 'and_long', 'or_long', 'xor_long', - 'shl_long', 'shr_long', 'ushr_long', 'add_float', - 'sub_float', 'mul_float', 'div_float', 'rem_float', - 'add_double', 'sub_double', 'mul_double', 'div_double', - 'rem_double' - :fmt23x - when 'const', 'const_wide_32' - :fmt31i - when 'const_string_jumbo' - :fmt31c - when 'move_16', 'move_wide_16', 'move_object_16' - :fmt32x - when 'filled_new_array' - :fmt35ca - when 'invoke_virtual', 'invoke_super', - 'invoke_direct', 'invoke_static', 'invoke_interface' - :fmt35c - when 'filled_new_array_range', 'invoke_virtual_range', - 'invoke_super_range', 'invoke_direct_range', - 'invoke_static_range', 'invoke_interface_range' - :fmt3rc - when 'const_wide' - :fmt51l - when 'throw_verification_error' - :fmt20bc - when 'iget_quick', 'iget_wide_quick', 'iget_object_quick', - 'iput_quick', 'iput_wide_quick', 'iput_object_quick' - :fmt22cs - when 'invoke_virtual_quick', 'invoke_super_quick' - :fmt35ms - when 'invoke_virtual_quick_range', 'invoke_super_quick_range' - :fmt3rms - when 'execute_inline' - :fmt3inline - when 'invoke_direct_empty' - :fmt35c - when 'unused_3e', 'unused_3f', 'unused_40', 'unused_41', - 'unused_42', 'unused_43', 'unused_73', 'unused_79', - 'unused_7a', 'unused_e3', 'unused_e4', 'unused_e5', - 'unused_e6', 'unused_e7', 'unused_e8', 'unused_e9', - 'unused_ea', 'unused_eb', 'unused_ec', 'unused_ef', - 'unused_f1', 'unused_fc', 'unused_fd', 'unused_fe', - 'unused_ff' - :fmtUnknown - else - raise "Internal error #{op.name}" - end + def addop_args(op) + fmt = case op.name + when 'goto' + :fmt10t + when 'nop', 'return_void' + :fmt10x + when 'const_4' + :fmt11n + when 'const_high16' + :fmt21h + when 'const_wide_high16' + :fmt21hh + when 'move_result', 'move_result_wide', 'move_result_object', + 'move_exception', 'return', 'return_wide', + 'return_object', 'monitor_enter', 'monitor_exit', + 'throw' + :fmt11x + when 'move', 'move_wide', 'move_object', 'array_length', + 'neg_int', 'not_int', 'neg_long', 'not_long', + 'neg_float', 'neg_double', 'int_to_long', + 'int_to_float', 'int_to_double', 'long_to_int', + 'long_to_float', 'long_to_double', 'float_to_int', + 'float_to_long', 'float_to_double', 'double_to_int', + 'double_to_long', 'double_to_float', 'int_to_byte', + 'int_to_char', 'int_to_short', 'add_int_2addr', + 'sub_int_2addr', 'mul_int_2addr', 'div_int_2addr', + 'rem_int_2addr', 'and_int_2addr', 'or_int_2addr', + 'xor_int_2addr', 'shl_int_2addr', 'shr_int_2addr', + 'ushr_int_2addr', 'add_long_2addr', 'sub_long_2addr', + 'mul_long_2addr', 'div_long_2addr', 'rem_long_2addr', + 'and_long_2addr', 'or_long_2addr', 'xor_long_2addr', + 'shl_long_2addr', 'shr_long_2addr', 'ushr_long_2addr', + 'add_float_2addr', 'sub_float_2addr', 'mul_float_2addr', + 'div_float_2addr', 'rem_float_2addr', + 'add_double_2addr', 'sub_double_2addr', + 'mul_double_2addr', 'div_double_2addr', + 'rem_double_2addr' + :fmt12x + when 'goto_16' + :fmt20t + when 'goto_32' + :fmt30t + when 'const_string' + :fmt21c_str + when 'const_class', 'check_cast', + 'new_instance' + :fmt21c_typ + when 'sget', 'sget_wide', 'sget_object', + 'sget_boolean', 'sget_byte', 'sget_char', 'sget_short', + 'sput', 'sput_wide', 'sput_object', 'sput_boolean', + 'sput_byte', 'sput_char', 'sput_short' + :fmt21c_fld + when 'const_16', 'const_wide_16' + :fmt21s + when 'if_eqz', 'if_nez', 'if_ltz', 'if_gez', 'if_gtz', 'if_lez' + :fmt21t + when 'fill_array_data', 'packed_switch', 'sparse_switch' + :fmt31t + when 'add_int_lit8', 'rsub_int_lit8', 'mul_int_lit8', + 'div_int_lit8', 'rem_int_lit8', 'and_int_lit8', + 'or_int_lit8', 'xor_int_lit8', 'shl_int_lit8', + 'shr_int_lit8', 'ushr_int_lit8' + :fmt22b + when 'instance_of', 'new_array', 'iget', 'iget_wide', + 'iget_object', 'iget_boolean', 'iget_byte', + 'iget_char', 'iget_short', 'iput', 'iput_wide', + 'iput_object', 'iput_boolean', 'iput_byte', + 'iput_char', 'iput_short' + :fmt22c + when 'add_int_lit16', 'rsub_int', 'mul_int_lit16', + 'div_int_lit16', 'rem_int_lit16', 'and_int_lit16', + 'or_int_lit16', 'xor_int_lit16' + :fmt22s + when 'if_eq', 'if_ne', 'if_lt', 'if_ge', 'if_gt', 'if_le' + :fmt22t + when 'move_from16', 'move_wide_from16', 'move_object_from16' + :fmt22x + when 'cmpl_float', 'cmpg_float', 'cmpl_double', 'cmpg_double', + 'cmp_long', 'aget', 'aget_wide', 'aget_object', + 'aget_boolean', 'aget_byte', 'aget_char', 'aget_short', + 'aput', 'aput_wide', 'aput_object', 'aput_boolean', + 'aput_byte', 'aput_char', 'aput_short', 'add_int', + 'sub_int', 'mul_int', 'div_int', 'rem_int', 'and_int', + 'or_int', 'xor_int', 'shl_int', 'shr_int', 'ushr_int', + 'add_long', 'sub_long', 'mul_long', 'div_long', + 'rem_long', 'and_long', 'or_long', 'xor_long', + 'shl_long', 'shr_long', 'ushr_long', 'add_float', + 'sub_float', 'mul_float', 'div_float', 'rem_float', + 'add_double', 'sub_double', 'mul_double', 'div_double', + 'rem_double' + :fmt23x + when 'const', 'const_wide_32' + :fmt31i + when 'const_string_jumbo' + :fmt31c + when 'move_16', 'move_wide_16', 'move_object_16' + :fmt32x + when 'filled_new_array' + :fmt35ca + when 'invoke_virtual', 'invoke_super', + 'invoke_direct', 'invoke_static', 'invoke_interface' + :fmt35c + when 'filled_new_array_range', 'invoke_virtual_range', + 'invoke_super_range', 'invoke_direct_range', + 'invoke_static_range', 'invoke_interface_range' + :fmt3rc + when 'const_wide' + :fmt51l + when 'throw_verification_error' + :fmt20bc + when 'iget_quick', 'iget_wide_quick', 'iget_object_quick', + 'iput_quick', 'iput_wide_quick', 'iput_object_quick' + :fmt22cs + when 'invoke_virtual_quick', 'invoke_super_quick' + :fmt35ms + when 'invoke_virtual_quick_range', 'invoke_super_quick_range' + :fmt3rms + when 'execute_inline' + :fmt3inline + when 'invoke_direct_empty' + :fmt35c + when 'unused_3e', 'unused_3f', 'unused_40', 'unused_41', + 'unused_42', 'unused_43', 'unused_73', 'unused_79', + 'unused_7a', 'unused_e3', 'unused_e4', 'unused_e5', + 'unused_e6', 'unused_e7', 'unused_e8', 'unused_e9', + 'unused_ea', 'unused_eb', 'unused_ec', 'unused_ef', + 'unused_f1', 'unused_fc', 'unused_fd', 'unused_fe', + 'unused_ff' + :fmtUnknown + else + raise "Internal error #{op.name}" + end - case fmt - when :fmt10x; op.args << :iaa - when :fmt12x; op.args << :ra << :rb - when :fmt11n; op.args << :ra << :ib - when :fmt11x; op.args << :raa - when :fmt10t; op.args << :iaa - when :fmt20t; op.args << :i16 - when :fmt20bc; op.args << :iaa << :u16 - when :fmt21c_str; op.args << :raa << :str16 - when :fmt21c_typ; op.args << :raa << :typ16 - when :fmt21c_fld; op.args << :raa << :fld16 - when :fmt22x; op.args << :raa << :r16 - when :fmt21s, :fmt21t; op.args << :raa << :i16 - when :fmt21h; op.args << :raa << :i16_32hi - when :fmt21hh; op.args << :raa << :i16_64hi - when :fmt23x; op.args << :raa << :rbb << :rcc - when :fmt22b; op.args << :raa << :rbb << :icc - when :fmt22s, :fmt22t; op.args << :ra << :rb << :i16 - when :fmt22c, :fmt22cs; op.args << :ra << :rb << :fld16 - when :fmt30t; op.args << :i32 - when :fmt31t, :fmt31c; op.args << :raa << :u32 - when :fmt32x; op.args << :r16 << :r16 - when :fmt31i; op.args << :raa << :i32 - when :fmt35ca - op.args << :r16 << :rlist5 - when :fmt35c, :fmt35ms - # rlist: - # nr of regs in :ib (max 5) - # regs: :ib.times { reg :i16 & 0xf ; :i16 >>= 4 } - # reg :ra if :ib == 5 - op.args << :m16 << :rlist5 - when :fmt3inline - op.args << :r16 << :rlist4 - when :fmt3rc, :fmt3rms - # rlist = :r16, :r16+1, :r16+2, ..., :r16+:iaa-1 - op.args << :r16 << :rlist16 - when :fmt51l - # u64 = u16 | (u16 << 16) | ... - op.args << :raa << :u64 - when :fmtUnknown - op.args << :iaa - else - raise "Internal error #{fmt.inspect}" - end - end + case fmt + when :fmt10x; op.args << :iaa + when :fmt12x; op.args << :ra << :rb + when :fmt11n; op.args << :ra << :ib + when :fmt11x; op.args << :raa + when :fmt10t; op.args << :iaa + when :fmt20t; op.args << :i16 + when :fmt20bc; op.args << :iaa << :u16 + when :fmt21c_str; op.args << :raa << :str16 + when :fmt21c_typ; op.args << :raa << :typ16 + when :fmt21c_fld; op.args << :raa << :fld16 + when :fmt22x; op.args << :raa << :r16 + when :fmt21s, :fmt21t; op.args << :raa << :i16 + when :fmt21h; op.args << :raa << :i16_32hi + when :fmt21hh; op.args << :raa << :i16_64hi + when :fmt23x; op.args << :raa << :rbb << :rcc + when :fmt22b; op.args << :raa << :rbb << :icc + when :fmt22s, :fmt22t; op.args << :ra << :rb << :i16 + when :fmt22c, :fmt22cs; op.args << :ra << :rb << :fld16 + when :fmt30t; op.args << :i32 + when :fmt31t, :fmt31c; op.args << :raa << :u32 + when :fmt32x; op.args << :r16 << :r16 + when :fmt31i; op.args << :raa << :i32 + when :fmt35ca + op.args << :r16 << :rlist5 + when :fmt35c, :fmt35ms + # rlist: + # nr of regs in :ib (max 5) + # regs: :ib.times { reg :i16 & 0xf ; :i16 >>= 4 } + # reg :ra if :ib == 5 + op.args << :m16 << :rlist5 + when :fmt3inline + op.args << :r16 << :rlist4 + when :fmt3rc, :fmt3rms + # rlist = :r16, :r16+1, :r16+2, ..., :r16+:iaa-1 + op.args << :r16 << :rlist16 + when :fmt51l + # u64 = u16 | (u16 << 16) | ... + op.args << :raa << :u64 + when :fmtUnknown + op.args << :iaa + else + raise "Internal error #{fmt.inspect}" + end + end - def addop_props(op) - case op.name - when 'nop', 'move', 'move_from16', 'move_16', 'move_wide', - 'move_wide_from16', 'move_wide_16', 'move_object', - 'move_object_from16', 'move_object_16', 'move_result', - 'move_result_wide', 'move_result_object', - 'move_exception', 'const_4', 'const_16', 'const', - 'const_high16', 'const_wide_16', 'const_wide_32', - 'const_wide', 'const_wide_high16', 'fill_array_data', - 'cmpl_float', 'cmpg_float', 'cmpl_double', - 'cmpg_double', 'cmp_long', 'neg_int', 'not_int', - 'neg_long', 'not_long', 'neg_float', 'neg_double', - 'int_to_long', 'int_to_float', 'int_to_double', - 'long_to_int', 'long_to_float', 'long_to_double', - 'float_to_int', 'float_to_long', 'float_to_double', - 'double_to_int', 'double_to_long', 'double_to_float', - 'int_to_byte', 'int_to_char', 'int_to_short', 'add_int', - 'sub_int', 'mul_int', 'and_int', 'or_int', 'xor_int', - 'shl_int', 'shr_int', 'ushr_int', 'add_long', - 'sub_long', 'mul_long', 'and_long', 'or_long', - 'xor_long', 'shl_long', 'shr_long', 'ushr_long', - 'add_float', 'sub_float', 'mul_float', 'div_float', - 'rem_float', 'add_double', 'sub_double', 'mul_double', - 'div_double', 'rem_double', 'add_int_2addr', - 'sub_int_2addr', 'mul_int_2addr', 'and_int_2addr', - 'or_int_2addr', 'xor_int_2addr', 'shl_int_2addr', - 'shr_int_2addr', 'ushr_int_2addr', 'add_long_2addr', - 'sub_long_2addr', 'mul_long_2addr', 'and_long_2addr', - 'or_long_2addr', 'xor_long_2addr', 'shl_long_2addr', - 'shr_long_2addr', 'ushr_long_2addr', 'add_float_2addr', - 'sub_float_2addr', 'mul_float_2addr', 'div_float_2addr', - 'rem_float_2addr', 'add_double_2addr', - 'sub_double_2addr', 'mul_double_2addr', - 'div_double_2addr', 'rem_double_2addr', 'add_int_lit16', - 'rsub_int', 'mul_int_lit16', 'and_int_lit16', - 'or_int_lit16', 'xor_int_lit16', 'add_int_lit8', - 'rsub_int_lit8', 'mul_int_lit8', 'and_int_lit8', - 'or_int_lit8', 'xor_int_lit8', 'shl_int_lit8', - 'shr_int_lit8', 'ushr_int_lit8' - # normal opcode, continues to next, nothing raised - when 'const_string', 'const_string_jumbo', 'const_class', - 'monitor_enter', 'monitor_exit', 'check_cast', - 'instance_of', 'array_length', 'new_instance', - 'new_array', 'filled_new_array', - 'filled_new_array_range', 'aget', 'aget_boolean', - 'aget_byte', 'aget_char', 'aget_short', 'aget_wide', - 'aget_object', 'aput', 'aput_boolean', 'aput_byte', - 'aput_char', 'aput_short', 'aput_wide', 'aput_object', - 'iget', 'iget_boolean', 'iget_byte', 'iget_char', - 'iget_short', 'iget_wide', 'iget_object', 'iput', - 'iput_boolean', 'iput_byte', 'iput_char', 'iput_short', - 'iput_wide', 'iput_object', 'sget', 'sget_boolean', - 'sget_byte', 'sget_char', 'sget_short', 'sget_wide', - 'sget_object', 'sput', 'sput_boolean', 'sput_byte', - 'sput_char', 'sput_short', 'sput_wide', 'sput_object', - 'div_int', 'rem_int', 'div_long', 'rem_long', - 'div_int_2addr', 'rem_int_2addr', 'div_long_2addr', - 'rem_long_2addr', 'div_int_lit16', 'rem_int_lit16', - 'div_int_lit8', 'rem_int_lit8' - op.props[:canthrow] = true - when 'invoke_virtual', 'invoke_virtual_range', 'invoke_super', - 'invoke_super_range', 'invoke_direct', - 'invoke_direct_range', 'invoke_static', - 'invoke_static_range', 'invoke_interface', - 'invoke_interface_range' - op.props[:canthrow] = true - op.props[:saveip] = true - op.props[:setip] = true - op.props[:stopexec] = true - when 'return_void', 'return', 'return_wide', 'return_object' - op.props[:setip] = true - op.props[:stopexec] = true - when 'throw' - op.props[:canthrow] = true - op.props[:stopexec] = true - when 'goto', 'goto_16', 'goto_32' - op.props[:setip] = true - op.props[:stopexec] = true - when 'if_eq', 'if_ne', 'if_lt', 'if_ge', 'if_gt', 'if_le', - 'if_eqz', 'if_nez', 'if_ltz', 'if_gez', 'if_gtz', - 'if_lez' - op.props[:setip] = true - when 'packed_switch', 'sparse_switch' - op.props[:setip] = true # if no table match, nostopexec - op.props[:setip] = true - when 'throw_verification_error' - op.props[:canthrow] = true - op.props[:stopexec] = true - when 'execute_inline' - when 'iget_quick', 'iget_wide_quick', 'iget_object_quick', - 'iput_quick', 'iput_wide_quick', 'iput_object_quick' - op.props[:canthrow] = true - when 'invoke_virtual_quick', 'invoke_virtual_quick_range', - 'invoke_super_quick', 'invoke_super_quick_range', - 'invoke_direct_empty' - op.props[:canthrow] = true - op.props[:saveip] = true - op.props[:setip] = true - op.props[:stopexec] = true - when 'unused_3e', 'unused_3f', 'unused_40', 'unused_41', - 'unused_42', 'unused_43', 'unused_73', 'unused_79', - 'unused_7a', 'unused_e3', 'unused_e4', 'unused_e5', - 'unused_e6', 'unused_e7', 'unused_e8', 'unused_e9', - 'unused_ea', 'unused_eb', 'unused_ec', 'unused_ef', - 'unused_f1', 'unused_fc', 'unused_fd', 'unused_fe', - 'unused_ff' - op.props[:stopexec] = true - else - raise "Internal error #{op.name}" - end - end + def addop_props(op) + case op.name + when 'nop', 'move', 'move_from16', 'move_16', 'move_wide', + 'move_wide_from16', 'move_wide_16', 'move_object', + 'move_object_from16', 'move_object_16', 'move_result', + 'move_result_wide', 'move_result_object', + 'move_exception', 'const_4', 'const_16', 'const', + 'const_high16', 'const_wide_16', 'const_wide_32', + 'const_wide', 'const_wide_high16', 'fill_array_data', + 'cmpl_float', 'cmpg_float', 'cmpl_double', + 'cmpg_double', 'cmp_long', 'neg_int', 'not_int', + 'neg_long', 'not_long', 'neg_float', 'neg_double', + 'int_to_long', 'int_to_float', 'int_to_double', + 'long_to_int', 'long_to_float', 'long_to_double', + 'float_to_int', 'float_to_long', 'float_to_double', + 'double_to_int', 'double_to_long', 'double_to_float', + 'int_to_byte', 'int_to_char', 'int_to_short', 'add_int', + 'sub_int', 'mul_int', 'and_int', 'or_int', 'xor_int', + 'shl_int', 'shr_int', 'ushr_int', 'add_long', + 'sub_long', 'mul_long', 'and_long', 'or_long', + 'xor_long', 'shl_long', 'shr_long', 'ushr_long', + 'add_float', 'sub_float', 'mul_float', 'div_float', + 'rem_float', 'add_double', 'sub_double', 'mul_double', + 'div_double', 'rem_double', 'add_int_2addr', + 'sub_int_2addr', 'mul_int_2addr', 'and_int_2addr', + 'or_int_2addr', 'xor_int_2addr', 'shl_int_2addr', + 'shr_int_2addr', 'ushr_int_2addr', 'add_long_2addr', + 'sub_long_2addr', 'mul_long_2addr', 'and_long_2addr', + 'or_long_2addr', 'xor_long_2addr', 'shl_long_2addr', + 'shr_long_2addr', 'ushr_long_2addr', 'add_float_2addr', + 'sub_float_2addr', 'mul_float_2addr', 'div_float_2addr', + 'rem_float_2addr', 'add_double_2addr', + 'sub_double_2addr', 'mul_double_2addr', + 'div_double_2addr', 'rem_double_2addr', 'add_int_lit16', + 'rsub_int', 'mul_int_lit16', 'and_int_lit16', + 'or_int_lit16', 'xor_int_lit16', 'add_int_lit8', + 'rsub_int_lit8', 'mul_int_lit8', 'and_int_lit8', + 'or_int_lit8', 'xor_int_lit8', 'shl_int_lit8', + 'shr_int_lit8', 'ushr_int_lit8' + # normal opcode, continues to next, nothing raised + when 'const_string', 'const_string_jumbo', 'const_class', + 'monitor_enter', 'monitor_exit', 'check_cast', + 'instance_of', 'array_length', 'new_instance', + 'new_array', 'filled_new_array', + 'filled_new_array_range', 'aget', 'aget_boolean', + 'aget_byte', 'aget_char', 'aget_short', 'aget_wide', + 'aget_object', 'aput', 'aput_boolean', 'aput_byte', + 'aput_char', 'aput_short', 'aput_wide', 'aput_object', + 'iget', 'iget_boolean', 'iget_byte', 'iget_char', + 'iget_short', 'iget_wide', 'iget_object', 'iput', + 'iput_boolean', 'iput_byte', 'iput_char', 'iput_short', + 'iput_wide', 'iput_object', 'sget', 'sget_boolean', + 'sget_byte', 'sget_char', 'sget_short', 'sget_wide', + 'sget_object', 'sput', 'sput_boolean', 'sput_byte', + 'sput_char', 'sput_short', 'sput_wide', 'sput_object', + 'div_int', 'rem_int', 'div_long', 'rem_long', + 'div_int_2addr', 'rem_int_2addr', 'div_long_2addr', + 'rem_long_2addr', 'div_int_lit16', 'rem_int_lit16', + 'div_int_lit8', 'rem_int_lit8' + op.props[:canthrow] = true + when 'invoke_virtual', 'invoke_virtual_range', 'invoke_super', + 'invoke_super_range', 'invoke_direct', + 'invoke_direct_range', 'invoke_static', + 'invoke_static_range', 'invoke_interface', + 'invoke_interface_range' + op.props[:canthrow] = true + op.props[:saveip] = true + op.props[:setip] = true + op.props[:stopexec] = true + when 'return_void', 'return', 'return_wide', 'return_object' + op.props[:setip] = true + op.props[:stopexec] = true + when 'throw' + op.props[:canthrow] = true + op.props[:stopexec] = true + when 'goto', 'goto_16', 'goto_32' + op.props[:setip] = true + op.props[:stopexec] = true + when 'if_eq', 'if_ne', 'if_lt', 'if_ge', 'if_gt', 'if_le', + 'if_eqz', 'if_nez', 'if_ltz', 'if_gez', 'if_gtz', + 'if_lez' + op.props[:setip] = true + when 'packed_switch', 'sparse_switch' + op.props[:setip] = true # if no table match, nostopexec + op.props[:setip] = true + when 'throw_verification_error' + op.props[:canthrow] = true + op.props[:stopexec] = true + when 'execute_inline' + when 'iget_quick', 'iget_wide_quick', 'iget_object_quick', + 'iput_quick', 'iput_wide_quick', 'iput_object_quick' + op.props[:canthrow] = true + when 'invoke_virtual_quick', 'invoke_virtual_quick_range', + 'invoke_super_quick', 'invoke_super_quick_range', + 'invoke_direct_empty' + op.props[:canthrow] = true + op.props[:saveip] = true + op.props[:setip] = true + op.props[:stopexec] = true + when 'unused_3e', 'unused_3f', 'unused_40', 'unused_41', + 'unused_42', 'unused_43', 'unused_73', 'unused_79', + 'unused_7a', 'unused_e3', 'unused_e4', 'unused_e5', + 'unused_e6', 'unused_e7', 'unused_e8', 'unused_e9', + 'unused_ea', 'unused_eb', 'unused_ec', 'unused_ef', + 'unused_f1', 'unused_fc', 'unused_fd', 'unused_fe', + 'unused_ff' + op.props[:stopexec] = true + else + raise "Internal error #{op.name}" + end + end end end diff --git a/lib/metasm/metasm/cpu/ia32/compile_c.rb b/lib/metasm/metasm/cpu/ia32/compile_c.rb index dc7a6aaad9..bf24c77c54 100644 --- a/lib/metasm/metasm/cpu/ia32/compile_c.rb +++ b/lib/metasm/metasm/cpu/ia32/compile_c.rb @@ -10,1512 +10,1512 @@ require 'metasm/compile_c' module Metasm class Ia32 class CCompiler < C::Compiler - # holds compiler state information for a function - # registers are saved as register number (see Ia32::Reg) - # TODO cache eflags ? or just z ? (may be defered to asm_optimize) - class State - # variable => offset from ebp (::Integer or CExpression) - attr_accessor :offset - # the current function - attr_accessor :func - # register => CExpression - attr_accessor :cache - # array of register values used in the function (to save/restore at prolog/epilog) - attr_accessor :dirty - # the array of register values currently not available - attr_accessor :used - # the array of args in use (reg/modrm/composite) the reg dependencies are in +used+ - attr_accessor :inuse - # variable => register for current scope (variable never on the stack) - # bound registers are also in +used+ - attr_accessor :bound - # list of reg values that are not kept across function call - attr_accessor :abi_flushregs_call - # list of regs we can trash without restoring them - attr_accessor :abi_trashregs - - # +used+ includes ebp if true - # nil if ebp is not reserved for stack variable addressing - # Reg if used - attr_accessor :saved_ebp - - def initialize(func) - @func = func - @offset = {} - @cache = {} - @dirty = [] - @used = [4] # esp is always in use - @inuse = [] - @bound = {} - @abi_flushregs_call = [0, 1, 2] # eax, ecx, edx (r8 etc ?) - @abi_trashregs = [0, 1, 2] - end - end - - # tracks 2 registers storing a value bigger than each - class Composite - attr_accessor :low, :high - def initialize(low, high) - @low, @high = low, high - end - def sz; 64 end - end - - # some address - class Address - attr_accessor :modrm, :target - def initialize(modrm, target=nil) - @modrm, @target = modrm, target - end - def sz; @modrm.adsz end - def to_s; "#" end - end - - - def initialize(*a) - super(*a) - @cpusz = @exeformat.cpu.size - @regnummax = (@cpusz == 64 ? 15 : 7) - end - - # shortcut to add an instruction to the source - def instr(name, *args) - # XXX parse_postfix ? - @source << Instruction.new(@exeformat.cpu, name, args) - end - - # returns an available register, tries to find one not in @state.cache - # do not use with sz==8 (aliasing ah=>esp) - # does not put it in @state.inuse - # TODO multipass for reg cache optimization - # TODO dynamic regval for later fixup (need a value to be in ecx for shl, etc) - def findreg(sz = @cpusz) - caching = @state.cache.keys.grep(Reg).map { |r| r.val } - if not regval = ([*0..@regnummax] - @state.used - caching).first || - ([*0..@regnummax] - @state.used).first - raise 'need more registers! (or a better compiler?)' - end - getreg(regval, sz) - end - - # returns a Reg from a regval, mark it as dirty, flush old cache dependencies - def getreg(regval, sz=@cpusz) - flushcachereg(regval) - @state.dirty |= [regval] - Reg.new(regval, sz) - end - - # remove the cache keys that depends on the register - def flushcachereg(regval) - @state.cache.delete_if { |e, val| - case e - when Reg; e.val == regval - when Address; e = e.modrm ; redo - when ModRM; e.b && (e.b.val == regval) or e.i && (e.i.val == regval) - when Composite; e.low.val == regval or e.high.val == regval - end - } - end - - # removes elements from @state.inuse, free @state.used if unreferenced - # must be the exact object present in inuse - def unuse(*vals) - vals.each { |val| - val = val.modrm if val.kind_of? Address - @state.inuse.delete val - } - # XXX cache exempt - exempt = @state.bound.values.map { |r| r.kind_of? Composite ? [r.low.val, r.high.val] : r.val }.flatten - exempt << 4 - exempt << 5 if @state.saved_ebp - @state.used.delete_if { |regval| - next if exempt.include? regval - not @state.inuse.find { |val| - case val - when Reg; val.val == regval - when ModRM; (val.b and val.b.val == regval) or (val.i and val.i.val == regval) - when Composite; val.low.val == regval or val.high.val == regval - else raise 'internal error - inuse ' + val.inspect - end - } - } - end - - # marks an arg as in use, returns the arg - def inuse(v) - case v - when Reg; @state.used |= [v.val] - when ModRM - @state.used |= [v.i.val] if v.i - @state.used |= [v.b.val] if v.b - when Composite; @state.used |= [v.low.val, v.high.val] - when Address; inuse v.modrm ; return v - else return v - end - @state.inuse |= [v] - v - end - - # returns a variable storage (ModRM for stack/global, Reg/Composite for register-bound) - def findvar(var) - if ret = @state.bound[var] - return ret - end - - if ret = @state.cache.index(var) - ret = ret.dup - inuse ret - return ret - end - - sz = 8*sizeof(var) rescue nil # extern char foo[]; - - case off = @state.offset[var] - when C::CExpression - # stack, dynamic address - # TODO - # no need to update state.cache here, never recursive - v = raise "find dynamic addr of #{var.name}" - when ::Integer - # stack - # TODO -fomit-frame-pointer ( => state.cache dependant on stack_offset... ) - v = ModRM.new(@cpusz, sz, nil, nil, @state.saved_ebp, Expression[-off]) - when nil - # global - if @exeformat.cpu.generate_PIC - if not reg = @state.cache.index('metasm_intern_geteip') - @need_geteip_stub = true - if @state.used.include? 6 # esi - reg = findreg - else - reg = getreg 6 - end - if reg.val != 0 - if @state.used.include? 0 - eax = Reg.new(0, @cpusz) - instr 'mov', reg, eax - else - eax = getreg 0 - end - end - - instr 'call', Expression['metasm_intern_geteip'] - - if reg.val != 0 - if @state.used.include? 0 - instr 'xchg', eax, reg - else - instr 'mov', reg, eax - end - end - - @state.cache[reg] = 'metasm_intern_geteip' - end - v = ModRM.new(@cpusz, sz, nil, nil, reg, Expression[var.name, :-, 'metasm_intern_geteip']) - else - v = ModRM.new(@cpusz, sz, nil, nil, nil, Expression[var.name]) - end - end - - case var.type - when C::Array; inuse Address.new(v) - else inuse v - end - end - - # resolves the Address to Reg/Expr (may encode an 'lea') - def resolve_address(e) - r = e.modrm - unuse e - if r.imm and not r.b and not r.i - reg = r.imm - elsif not r.imm and ((not r.b and r.s == 1) or not r.i) - reg = r.b || r.i - elsif reg = @state.cache.index(e) - reg = reg.dup - else - reg = findreg - r.sz = reg.sz - instr 'lea', reg, r - end - inuse reg - @state.cache[reg] = e - reg - end - - # copies the arg e to a volatile location (register/composite) if it is not already - # unuses the old storage - # may return a register bigger than the type size (eg __int8 are stored in full reg size) - # use rsz only to force 32bits-return on a 16bits cpu - def make_volatile(e, type, rsz=@cpusz) - if e.kind_of? ModRM or @state.bound.index(e) - if type.integral? or type.pointer? - oldval = @state.cache[e] - if type.integral? and type.name == :__int64 and @cpusz != 64 - e2l = inuse findreg(32) - unuse e - e2h = inuse findreg(32) - el, eh = get_composite_parts e - instr 'mov', e2l, el - instr 'mov', e2h, eh - e2 = inuse Composite.new(e2l, e2h) - unuse e2l, e2h - else - unuse e - n = type.integral? ? type.name : :ptr - if (sz = typesize[n]*8) < @cpusz or sz < rsz or e.sz < rsz - e2 = inuse findreg(rsz) - op = ((type.specifier == :unsigned) ? 'movzx' : 'movsx') - op = 'mov' if e.sz == e2.sz - else - e2 = inuse findreg(sz) - op = 'mov' - end - instr op, e2, e - end - @state.cache[e2] = oldval if oldval and e.kind_of? ModRM - e2 - elsif type.float? - raise 'bad float static' + e.inspect if not e.kind_of? ModRM - unuse e - instr 'fld', e - FpReg.new nil - else raise - end - elsif e.kind_of? Address - make_volatile resolve_address(e), type, rsz - elsif e.kind_of? Expression - if type.integral? or type.pointer? - if type.integral? and type.name == :__int64 and @cpusz != 64 - e2 = inuse Composite.new(inuse(findreg(32)), findreg(32)) - instr 'mov', e2.low, Expression[e, :&, 0xffff_ffff] - instr 'mov', e2.high, Expression[e, :>>, 32] - else - e2 = inuse findreg - instr 'mov', e2, e - end - e2 - elsif type.float? - case e.reduce - when 0; instr 'fldz' - when 1; instr 'fld1' - else - esp = Reg.new(4, @cpusz) - instr 'push.i32', Expression[e, :>>, 32] - instr 'push.i32', Expression[e, :&, 0xffff_ffff] - instr 'fild', ModRM.new(@cpusz, 64, nil, nil, esp, nil) - instr 'add', esp, 8 - end - FpReg.new nil - end - else - e - end - end - - # returns two args corresponding to the low and high 32bits of the 64bits composite arg - def get_composite_parts(e) - case e - when ModRM - el = e.dup - el.sz = 32 - eh = el.dup - eh.imm = Expression[eh.imm, :+, 4] - when Expression - el = Expression[e, :&, 0xffff_ffff] - eh = Expression[e, :>>, 32] - when Composite - el = e.low - eh = e.high - when Reg - el = e - eh = findreg - else raise - end - [el, eh] - end - - # returns the instruction suffix for a comparison operator - def getcc(op, type) - case op - when :'=='; 'z' - when :'!='; 'nz' - when :'<' ; 'b' - when :'>' ; 'a' - when :'<='; 'be' - when :'>='; 'ae' - else raise "bad comparison op #{op}" - end.tr((type.specifier == :unsigned ? '' : 'ab'), 'gl') - end - - # compiles a c expression, returns an Ia32 instruction argument - def c_cexpr_inner(expr) - case expr - when ::Integer; Expression[expr] - when C::Variable; findvar(expr) - when C::CExpression - if not expr.lexpr or not expr.rexpr - inuse c_cexpr_inner_nol(expr) - else - inuse c_cexpr_inner_l(expr) - end - when C::Label; findvar(C::Variable.new(expr.name, C::Array.new(C::BaseType.new(:void), 1))) - else puts "ia32/c_ce_i: unsupported #{expr}" if $VERBOSE - end - end - - # compile a CExpression with no lexpr - def c_cexpr_inner_nol(expr) - case expr.op - when nil - r = c_cexpr_inner(expr.rexpr) - if (expr.rexpr.kind_of? C::CExpression or expr.rexpr.kind_of? C::Variable) and - expr.type.kind_of? C::BaseType and expr.rexpr.type.kind_of? C::BaseType - r = c_cexpr_inner_cast(expr, r) - end - r - when :+ - c_cexpr_inner(expr.rexpr) - when :- - r = c_cexpr_inner(expr.rexpr) - r = make_volatile(r, expr.type) - if expr.type.integral? or expr.type.pointer? - if r.kind_of? Composite - instr 'neg', r.low - instr 'adc', r.high, Expression[0] - instr 'neg', r.high - else - instr 'neg', r - end - elsif expr.type.float? - instr 'fchs' - else raise - end - r - when :'++', :'--' - r = c_cexpr_inner(expr.rexpr) - inc = true if expr.op == :'++' - if expr.type.integral? or expr.type.pointer? - if expr.type.integral? and expr.type.name == :__int64 and @cpusz != 64 - rl, rh = get_composite_parts r - instr 'add', rl, Expression[inc ? 1 : -1] - instr 'adc', rh, Expression[inc ? 0 : -1] - else - op = (inc ? 'inc' : 'dec') - instr op, r - end - elsif expr.type.float? - raise 'bad lvalue' if not r.kind_of? ModRM - instr 'fld1' - op = (inc ? 'faddp' : 'fsubp') - instr op, r - instr 'fstp', r - end - r - when :& - raise 'bad precompiler ' + expr.to_s if not expr.rexpr.kind_of? C::Variable - @state.cache.each { |r_, c| - return inuse(r_) if c.kind_of? Address and c.target == expr.rexpr - } - r = c_cexpr_inner(expr.rexpr) - raise 'bad lvalue' if not r.kind_of? ModRM - unuse r - r = Address.new(r) - inuse r - r.target = expr.rexpr - r - when :* - expr.rexpr.type.name = :ptr if expr.rexpr.kind_of? C::CExpression and expr.rexpr.type.kind_of? C::BaseType and typesize[expr.rexpr.type.name] == typesize[:ptr] # hint to use Address - e = c_cexpr_inner(expr.rexpr) - sz = 8*sizeof(expr) - case e - when Address - unuse e - e = e.modrm.dup - e.sz = sz - inuse e - when ModRM; e = make_volatile(e, expr.rexpr.type) if not expr.rexpr.type.float? - end - case e - when Reg; unuse e ; e = inuse ModRM.new(@cpusz, sz, nil, nil, e, nil) - when Expression; e = inuse ModRM.new(@cpusz, sz, nil, nil, nil, e) - end - e - when :'!' - r = c_cexpr_inner(expr.rexpr) - r = make_volatile(r, expr.rexpr.type) - if expr.rexpr.type.integral? or expr.type.pointer? - if expr.type.integral? and expr.rexpr.type.name == :__int64 and @cpusz != 64 - raise # TODO - end - r = make_volatile(r, expr.rexpr.type) - instr 'test', r, r - elsif expr.rexpr.type.float? - if @exeformat.cpu.opcode_list_byname['fucomip'] - instr 'fldz' - instr 'fucomip' - else - raise # TODO - end - r = inuse findreg - else raise 'bad comparison ' + expr.to_s - end - if @exeformat.cpu.opcode_list_byname['setz'] - instr 'setz', Reg.new(r.val, 8) - instr 'and', r, Expression[1] - else - instr 'mov', r, Expression[1] - label = new_label('setcc') - instr 'jz', Expression[label] - instr 'mov', r, Expression[0] - @source << Label.new(label) - end - r - else raise 'mmh ? ' + expr.to_s - end - end - - # compile a cast (BaseType to BaseType) - def c_cexpr_inner_cast(expr, r) - esp = Reg.new(4, @cpusz) - if expr.type.float? and expr.rexpr.type.float? - if expr.type.name != expr.rexpr.type.name and r.kind_of? ModRM - instr 'fld', r - unuse r - r = FpReg.new nil - end - elsif expr.type.float? and expr.rexpr.type.integral? - r = resolve_address r if r.kind_of? Address - return make_volatile(r, expr.type) if r.kind_of? Expression - unuse r - if expr.rexpr.type.specifier == :unsigned and r.sz != 64 - instr 'push.i32', Expression[0] - end - case r - when ModRM - case expr.rexpr.type.name - when :__int8, :__int16 - r = make_volatile(r, expr.rexpr.type, 32) - instr 'push', r - else - if expr.rexpr.type.specifier != :unsigned - instr 'fild', r - return FpReg.new(nil) - end - if r.sz == 64 - get_composite_parts(r).reverse_each { |rp| instr 'push', rp } - else - instr 'push', r - end - end - when Composite - instr 'push', r.high - instr 'push', r.low - when Reg - if r.sz == 16 - op = ((expr.rexpr.type.specifier == :unsigned) ? 'movzx' : 'movsx') - rr = r.dup - rr.sz = 32 - instr op, rr, r - r = rr - end - instr 'push', r - end - m = ModRM.new(@cpusz, r.sz, nil, nil, esp, nil) - instr 'fild', m - instr 'add', esp, (expr.rexpr.type.specifier == :unsigned ? 8 : Expression[r.sz/8]) - if expr.rexpr.type.specifier == :unsigned and r.sz == 64 - label = new_label('unsign_float') - if m.sz == 64 and @cpusz < 64 - m = get_composite_parts(m)[1] - end - m2 = m - m2 = make_volatile(m, expr.rexpr.type) if m.kind_of? ModRM - m2 = get_composite_parts(m2)[0] if m2.kind_of? Composite - instr 'test', m2, m2 - instr 'jns', Expression[label] - instr 'push.i32', Expression[0x7fff_ffff] - instr 'push.i32', Expression[0xffff_ffff] - instr 'fild', m - instr 'add', esp, 8 - instr 'faddp', FpReg.new(1) - instr 'fld1' - instr 'faddp', FpReg.new(1) - @source << Label.new(label) - end - r = FpReg.new nil - elsif expr.type.integral? and expr.rexpr.type.float? - r = make_volatile(r, expr.rexpr.type) # => ST(0) - - if expr.type.name == :__int64 - instr 'sub', esp, Expression[8] - instr 'fistp', ModRM.new(@cpusz, 64, nil, nil, esp, nil) - if @cpusz == 64 - r = findreg - instr 'pop', r - else - r = Composite.new(findreg(32), findreg(32)) - instr 'pop', r.low - instr 'pop', r.high - end - else - instr 'sub', esp, Expression[4] - instr 'fistp', ModRM.new(@cpusz, 32, nil, nil, esp, nil) - r = findreg(32) - instr 'pop', r - tto = typesize[expr.type.name]*8 - instr 'and', r, Expression[(1< tto - end - inuse r - elsif (expr.type.integral? or expr.type.pointer?) and (expr.rexpr.type.integral? or expr.rexpr.type.pointer?) - tto = typesize[expr.type.integral? ? expr.type.name : :ptr]*8 - tfrom = typesize[expr.rexpr.type.integral? ? expr.rexpr.type.name : :ptr]*8 - r = resolve_address r if r.kind_of? Address - if r.kind_of? Expression - r = make_volatile r, expr.type - elsif tfrom > tto - if tfrom == 64 and r.kind_of? Composite - unuse r - r = r.low - inuse r - end - case r - when ModRM - unuse r - r = r.dup - r.sz = tto - inuse r - when Reg - instr 'and', r, Expression[(1< tto - end - elsif tto > tfrom - if tto == 64 and @cpusz != 64 - high = findreg(32) - unuse r - if not r.kind_of? Reg or r.sz != 32 - inuse high - low = findreg(32) - unuse high - op = (r.sz == 32 ? 'mov' : (expr.rexpr.type.specifier == :unsigned ? 'movzx' : 'movsx')) - instr op, low, r - r = low - end - r = inuse Composite.new(r, high) - if expr.type.specifier == :unsigned - instr 'xor', r.high, r.high - else - instr 'mov', r.high, r.low - instr 'sar', r.high, Expression[31] - end - elsif not r.kind_of? Reg or r.sz != @cpusz - unuse r - reg = inuse findreg - op = (r.sz == reg.sz ? 'mov' : (expr.rexpr.type.specifier == :unsigned ? 'movzx' : 'movsx')) - instr op, reg, r - r = reg - end - end - end - r - end - - # compiles a CExpression, not arithmetic (assignment, comparison etc) - def c_cexpr_inner_l(expr) - case expr.op - when :funcall - c_cexpr_inner_funcall(expr) - when :'+=', :'-=', :'*=', :'/=', :'%=', :'^=', :'&=', :'|=', :'<<=', :'>>=' - l = c_cexpr_inner(expr.lexpr) - raise 'bad lvalue' if not l.kind_of? ModRM and not @state.bound.index(l) - instr 'fld', l if expr.type.float? - r = c_cexpr_inner(expr.rexpr) - op = expr.op.to_s.chop.to_sym - c_cexpr_inner_arith(l, op, r, expr.type) - instr 'fstp', l if expr.type.float? - l - when :'+', :'-', :'*', :'/', :'%', :'^', :'&', :'|', :'<<', :'>>' - # both sides are already cast to the same type by the precompiler - # XXX expr.type.pointer? - if expr.type.integral? and expr.type.name == :ptr and expr.lexpr.type.kind_of? C::BaseType and - typesize[expr.lexpr.type.name] == typesize[:ptr] - expr.lexpr.type.name = :ptr - end - l = c_cexpr_inner(expr.lexpr) - l = make_volatile(l, expr.type) if not l.kind_of? Address - if expr.type.integral? and expr.type.name == :ptr and l.kind_of? Reg - unuse l - l = Address.new ModRM.new(l.sz, @cpusz, nil, nil, l, nil) - inuse l - end - if l.kind_of? Address and expr.type.integral? - l.modrm.imm = nil if l.modrm.imm and not l.modrm.imm.op and l.modrm.imm.rexpr == 0 - if l.modrm.b and l.modrm.i and l.modrm.s == 1 and l.modrm.b.val == l.modrm.i.val - unuse l.modrm.b if l.modrm.b != l.modrm.i - l.modrm.b = nil - l.modrm.s = 2 - end - case expr.op - when :+ - rexpr = expr.rexpr - rexpr = rexpr.rexpr while rexpr.kind_of? C::CExpression and not rexpr.op and rexpr.type.integral? and - rexpr.rexpr.kind_of? C::CExpression and rexpr.rexpr.type.integral? and - typesize[rexpr.type.name] == typesize[rexpr.rexpr.type.name] - if rexpr.kind_of? C::CExpression and rexpr.op == :* and rexpr.lexpr - r1 = c_cexpr_inner(rexpr.lexpr) - r2 = c_cexpr_inner(rexpr.rexpr) - r1, r2 = r2, r1 if r1.kind_of? Expression - if r2.kind_of? Expression and [1, 2, 4, 8].include?(rr2 = r2.reduce) - case r1 - when ModRM, Address, Reg - r1 = make_volatile(r1, rexpr.type) if not r1.kind_of? Reg - if not l.modrm.i or (l.modrm.i.val == r1.val and l.modrm.s == 1 and rr2 == 1) - unuse l, r1, r2 - l = Address.new(l.modrm.dup) - inuse l - l.modrm.i = r1 - l.modrm.s = (l.modrm.s || 0) + rr2 - return l - end - end - end - r = make_volatile(r1, rexpr.type) - c_cexpr_inner_arith(r, :*, r2, rexpr.type) - else - r = c_cexpr_inner(rexpr) - end - r = resolve_address r if r.kind_of? Address - r = make_volatile(r, rexpr.type) if r.kind_of? ModRM - case r - when Reg - unuse l - l = Address.new(l.modrm.dup) - inuse l - if l.modrm.b - if not l.modrm.i or (l.modrm.i.val == r.val and l.modrm.s == 1) - l.modrm.i = r - l.modrm.s = (l.modrm.s || 0) + 1 - unuse r - return l - end - else - l.modrm.b = r - unuse r - return l - end - when Expression - unuse l, r - l = Address.new(l.modrm.dup) - inuse l - l.modrm.imm = Expression[l.modrm.imm, :+, r] - return l - end - when :- - r = c_cexpr_inner(expr.rexpr) - r = resolve_address r if r.kind_of? Address - if r.kind_of? Expression - unuse l, r - l = Address.new(l.modrm.dup) - inuse l - l.modrm.imm = Expression[l.modrm.imm, :-, r] - return l - end - when :* - r = c_cexpr_inner(expr.rexpr) - if r.kind_of? Expression and [1, 2, 4, 8].includre?(rr = r.reduce) - if l.modrm.b and not l.modrm.i - if rr != 1 - l.modrm.i = l.modrm.b - l.modrm.s = rr - l.modrm.imm = Expression[l.modrm.imm, :*, rr] if l.modrm.imm - end - unuse r - return l - elsif l.modrm.i and not l.modrm.b and l.modrm.s*rr <= 8 - l.modrm.s *= rr - l.modrm.imm = Expression[l.modrm.imm, :*, rr] if l.modrm.imm and rr != 1 - unuse r - return l - end - end - end - end - l = make_volatile(l, expr.type) if l.kind_of? Address - r ||= c_cexpr_inner(expr.rexpr) - c_cexpr_inner_arith(l, expr.op, r, expr.type) - l - when :'=' - r = c_cexpr_inner(expr.rexpr) - l = c_cexpr_inner(expr.lexpr) - raise 'bad lvalue ' + l.inspect if not l.kind_of? ModRM and not @state.bound.index(l) - r = resolve_address r if r.kind_of? Address - r = make_volatile(r, expr.type) if l.kind_of? ModRM and r.kind_of? ModRM - unuse r - if expr.type.integral? or expr.type.pointer? - if expr.type.integral? and expr.type.name == :__int64 and @cpusz != 64 - ll, lh = get_composite_parts l - rl, rh = get_composite_parts r - instr 'mov', ll, rl - instr 'mov', lh, rh - elsif r.kind_of? Address - m = r.modrm.dup - m.sz = l.sz - instr 'lea', l, m - else - if l.kind_of? ModRM and r.kind_of? Reg and l.sz != r.sz - raise if l.sz > r.sz - if l.sz == 8 and r.val >= 4 - reg = ([0, 1, 2, 3] - @state.used).first - if not reg - eax = Reg.new(0, r.sz) - instr 'push', eax - instr 'mov', eax, r - instr 'mov', l, Reg.new(eax.val, 8) - instr 'pop', eax - else - flushecachereg(reg) - instr 'mov', Reg.new(reg, r.sz), r - instr 'mov', l, Reg.new(reg, 8) - end - else - instr 'mov', l, Reg.new(r.val, l.sz) - end - else - instr 'mov', l, r - end - end - elsif expr.type.float? - r = make_volatile(r, expr.type) if r.kind_of? Expression - instr 'fstp', l - end - l - when :>, :<, :>=, :<=, :==, :'!=' - l = c_cexpr_inner(expr.lexpr) - l = make_volatile(l, expr.type) - r = c_cexpr_inner(expr.rexpr) - unuse r - if expr.lexpr.type.integral? or expr.lexpr.type.pointer? - if expr.lexpr.type.integral? and expr.lexpr.type.name == :__int64 and @cpusz != 64 - raise # TODO - end - instr 'cmp', l, r - elsif expr.lexpr.type.float? - raise # TODO - instr 'fucompp', l, r - l = inuse findreg - else raise 'bad comparison ' + expr.to_s - end - opcc = getcc(expr.op, expr.type) - if @exeformat.cpu.opcode_list_byname['set'+opcc] - instr 'set'+opcc, Reg.new(l.val, 8) - instr 'and', l, 1 - else - instr 'mov', l, Expression[1] - label = new_label('setcc') - instr 'j'+opcc, Expression[label] - instr 'mov', l, Expression[0] - @source << Label.new(label) - end - l - else - raise 'unhandled cexpr ' + expr.to_s - end - end - - # compiles a subroutine call - def c_cexpr_inner_funcall(expr) - # check if an obj has an attribute - check on obj and its type - hasattr = lambda { |o, a| (o.kind_of?(C::Variable) and o.has_attribute(a)) or o.type.has_attribute(a) } - hasattrv = lambda { |o, a| (o.kind_of?(C::Variable) and o.has_attribute_var(a)) or o.type.has_attribute_var(a) } - - fargs = expr.lexpr.type.pointer? ? expr.lexpr.type.pointed.args : expr.lexpr.type.args - - backup = [] - if hasattr[expr.lexpr, 'fastcall'] - regargs = [1, 2][0, expr.rexpr.length] - regargs += [nil] * (expr.rexpr.length-2) if expr.rexpr.length > 2 - else - regargs = fargs.map { |a| hasattrv[a, 'register'] }.map { |a| Reg.from_str(a).val if a } - end - @state.abi_flushregs_call.each { |reg| - next if reg == 4 - next if reg == 5 and @state.saved_ebp - if not @state.used.include? reg - if not @state.abi_trashregs.include? reg - # XXX should exclude other code compiled by us (we wont trash reg) - @state.dirty |= [reg] - end - next - end - backup << reg - unuse reg - instr 'push', Reg.new(reg, [@cpusz, 32].max) - } - regargs_list = regargs.compact - regargs_list.each { |reg| - next if backup.include? reg - @state.dirty |= [reg] - next if not @state.used.include? reg - backup << reg - instr 'push', Reg.new(reg, [@cpusz, 32].max) - } - expr.rexpr.reverse_each { |arg| - a = c_cexpr_inner(arg) - a = resolve_address a if a.kind_of? Address - unuse a - if r = regargs.pop - inuse r - instr 'mov', Reg.new(r, 32), a - next - end - case arg.type - when C::Pointer - instr 'push', a - when C::BaseType - case t = arg.type.name - when :__int8 - a = make_volatile(a, arg.type) if a.kind_of? ModRM - unuse a - instr 'push', a - when :__int16 - # XXX __int8 unuse, why not here - if @cpusz != 16 and a.kind_of? Reg - instr 'push', Reg.new(a.val, @cpusz) - else - a = make_volatile(a, arg.type) - unuse a - instr 'push', a - end - when :__int32 - instr 'push', a - when :__int64 - case a - when Composite - instr 'push', a.high - instr 'push', a.low - when Reg - instr 'push', a - when ModRM - if @cpusz == 64 - instr 'push', a - else - ml, mh = get_composite_parts a - instr 'push', mh - instr 'push', ml - end - when Expression - instr 'push.i32', Expression[a, :>>, 32] - instr 'push.i32', Expression[a, :&, 0xffff_ffff] - end - when :float, :double, :longdouble - esp = Reg.new(4, @cpusz) - case a - when Expression - # assume expr is integral - a = load_fp_imm(a) - unuse a - when ModRM - instr 'fld', a - end - instr 'sub', esp, typesize[t] - instr 'fstp', ModRM.new(@cpusz, (t == :longdouble ? 80 : (t == :double ? 64 : 32)), nil, nil, esp, nil) - end - when C::Union - raise 'want a modrm ! ' + a.inspect if not a.kind_of? ModRM - al = typesize[:ptr] - argsz = (sizeof(arg) + al - 1) / al * al - while argsz > 0 - argsz -= al - m = a.dup - m.sz = 8*al - m.imm = Expression[m.imm, :+, argsz] - instr 'push', m - end - end - } - if expr.lexpr.kind_of? C::Variable and expr.lexpr.type.kind_of? C::Function - instr 'call', Expression[expr.lexpr.name] - if not hasattr[expr.lexpr, 'stdcall'] and not hasattr[expr.lexpr, 'fastcall'] - al = typesize[:ptr] - argsz = expr.rexpr.zip(fargs).inject(0) { |sum, (a, af)| - af && hasattrv[af, 'register'] ? sum : sum + (sizeof(a) + al - 1) / al * al - } - instr 'add', Reg.new(4, @cpusz), Expression[argsz] if argsz > 0 - end - else - ptr = c_cexpr_inner(expr.lexpr) - unuse ptr - if ptr.kind_of? Address - if ptr.target.kind_of? C::Variable and not @state.offset[ptr.target] - # call an existing global function, maybe after casting to another ABI - ptr = Expression[ptr.target.name] - else - ptr = make_volatile(ptr, expr.lexpr.type) - end - end - instr 'call', ptr - f = expr.lexpr - f = f.rexpr while f.kind_of? C::CExpression and not f.op and f.rexpr.kind_of? C::Typed and f.type == f.rexpr.type - if not hasattr[f, 'stdcall'] and not hasattr[f, 'fastcall'] - al = typesize[:ptr] - argsz = expr.rexpr.zip(fargs).inject(0) { |sum, (a, af)| - af && hasattrv[af, 'register'] ? sum : sum + (sizeof(a) + al - 1) / al * al - } - instr 'add', Reg.new(4, @cpusz), Expression[argsz] if argsz > 0 - end - end - @state.abi_flushregs_call.each { |reg| flushcachereg reg } - if expr.type.float? - retreg = FpReg.new(nil) - elsif not expr.type.kind_of? C::BaseType or expr.type.name != :void - if @state.used.include? 0 - retreg = inuse findreg - else - retreg = inuse getreg(0) - end - if expr.type.integral? and expr.type.name == :__int64 and @cpusz != 64 - retreg.sz = 32 - if @state.used.include? 2 - retreg = inuse Composite.new(retreg, findreg(32)) - else - retreg = inuse Composite.new(retreg, getreg(2, 32)) - end - unuse retreg.low - end - end - regargs_list.each { |reg| unuse reg } - backup.reverse_each { |reg| - sz = [@cpusz, 32].max - if retreg.kind_of? Composite and reg == 0 - # XXX wtf ? and what if retreg.low.val == 2 and it was saved too.. - instr 'pop', Reg.new(retreg.low.val, sz) - instr 'xchg', Reg.new(reg, sz), Reg.new(retreg.low.val, sz) - elsif retreg.kind_of? Composite and reg == 2 - # ..boom ! - instr 'pop', Reg.new(retreg.high.val, sz) - instr 'xchg', Reg.new(reg, sz), Reg.new(retreg.high.val, sz) - elsif retreg.kind_of? Reg and reg == 0 - instr 'pop', Reg.new(retreg.val, sz) - instr 'xchg', Reg.new(reg, sz), Reg.new(retreg.val, sz) - else - instr 'pop', Reg.new(reg, sz) - end - inuse reg - } - retreg - end - - # compiles/optimizes arithmetic operations - def c_cexpr_inner_arith(l, op, r, type) - # optimizes *2 -> <<1 - if r.kind_of? Expression and (rr = r.reduce).kind_of? ::Integer - if type.integral? - log2 = lambda { |v| - # TODO lol - i = 0 - i += 1 while (1 << i) < v - i if (1 << i) == v - } - if (lr = log2[rr]).kind_of? ::Integer - case op - when :*; return c_cexpr_inner_arith(l, :<<, Expression[lr], type) - when :/; return c_cexpr_inner_arith(l, :>>, Expression[lr], type) - when :%; return c_cexpr_inner_arith(l, :&, Expression[rr-1], type) - end - else - # TODO /r => *(r^(-1)), *3 => stuff with magic constants.. - end - elsif type.float? - case op - when :<<; return c_cexpr_inner_arith(l, :*, Expression[1<>; return c_cexpr_inner_arith(l, :/, Expression[1<>; type.specifier == :unsigned ? 'shr' : 'sar' - when :<<; 'shl' - when :*; 'mul' - when :/; 'div' - when :%; 'mod' - end - - case op - when 'add', 'sub', 'and', 'or', 'xor' - r = make_volatile(r, type) if l.kind_of? ModRM and r.kind_of? ModRM - unuse r - r = Reg.new(r.val, l.sz) if r.kind_of?(Reg) and l.kind_of?(ModRM) and l.sz and l.sz != r.sz # add byte ptr [eax], bl - instr op, l, r - when 'shr', 'sar', 'shl' - if r.kind_of? Expression - instr op, l, r - else - # XXX bouh - r = make_volatile(r, C::BaseType.new(:__int8, :unsigned)) - unuse r - if r.val != 1 - ecx = Reg.new(1, 32) - instr 'xchg', ecx, Reg.new(r.val, 32) - l = Reg.new(r.val, l.sz) if l.kind_of? Reg and l.val == 1 - @state.used.delete r.val if not @state.used.include? 1 - inuse ecx - end - instr op, l, Reg.new(1, 8) - instr 'xchg', ecx, Reg.new(r.val, 32) if r.val != 1 - end - when 'mul' - if l.kind_of? ModRM - if r.kind_of? Expression - ll = findreg - instr 'imul', ll, l, r - else - ll = make_volatile(l, type) - unuse ll - instr 'imul', ll, r - end - instr 'mov', l, ll - else - instr 'imul', l, r - end - unuse r - when 'div', 'mod' - lv = l.val if l.kind_of? Reg - eax = Reg.from_str 'eax' - edx = Reg.from_str 'edx' - if @state.used.include? eax.val and lv != eax.val - instr 'push', eax - saved_eax = true - end - if @state.used.include? edx.val and lv != edx.val - instr 'push', edx - saved_edx = true - end - - instr 'mov', eax, l if lv != eax.val - - if r.kind_of? Expression - instr 'push', r - esp = Reg.from_str 'esp' - r = ModRM.new(@cpusz, 32, nil, nil, esp, nil) - need_pop = true - end - - if type.specifier == :unsigned - instr 'mov', edx, Expression[0] - instr 'div', r - else - instr 'cdq' - instr 'idiv', r - end - unuse r - - instr 'add', esp, 4 if need_pop - - if op == 'div' - instr 'mov', l, eax if lv != eax.val - else - instr 'mov', l, edx if lv != edx.val - end - - instr 'pop', edx if saved_edx - instr 'pop', eax if saved_eax - end - end - - # compile an integral arithmetic 64-bits expression on a non-64 cpu - def c_cexpr_inner_arith_int64compose(l, op, r, type) - op = case op - when :+; 'add' - when :-; 'sub' - when :&; 'and' - when :|; 'or' - when :^; 'xor' - when :>>; type.specifier == :unsigned ? 'shr' : 'sar' - when :<<; 'shl' - when :*; 'mul' - when :/; 'div' - when :%; 'mod' - end - - ll, lh = get_composite_parts l - # 1ULL << 2 -> 2 is not ULL - r = make_volatile(r, C::BaseType.new("__int#{r.sz}".to_sym)) if l.kind_of? ModRM and r.kind_of? ModRM - rl, rh = get_composite_parts(r) if not r.kind_of? Reg - - case op - when 'add', 'sub', 'and', 'or', 'xor' - unuse r - instr op, ll, rl - op = {'add' => 'adc', 'sub' => 'sbb'}[op] || op - instr op, lh, rh unless (op == 'or' or op == 'xor') and rh.kind_of?(Expression) and rh.reduce == 0 - when 'shl', 'shr', 'sar' - rlc = r.reduce if r.kind_of? Expression - opd = { 'shl' => 'shld', 'shr' => 'shrd', 'sar' => 'shrd' }[op] - - ll, lh = lh, ll if op != 'shl' # OMGHAX - llv = ll - if llv.kind_of? ModRM - llv = make_volatile(llv, C::BaseType.new(:__int32)) - inuse ll - end - - if rlc.kind_of? Integer - case rlc - when 0 - when 1..31 - instr opd, llv, lh, Expression[rlc] - instr op, ll, Expression[rlc] - when 32..63 - instr 'mov', lh, llv - if op == 'sar' - instr 'sar', ll, Expression[31] - else - instr 'mov', ll, Expression[0] - end - instr op, lh, Expression[rlc-32] if rlc != 32 - else - if op == 'sar' - instr 'sar', ll, Expression[31] - instr 'mov', lh, llv - else - instr 'mov', ll, Expression[0] - instr 'mov', lh, Expression[0] - end - end - else - r = make_volatile(r, C::BaseType.new(:__int8, :unsigned)) - r = r.low if r.kind_of? Composite - rl ||= r - - cl = Reg.new(1, 8) - ecx = Reg.new(1, 32) - if r.val != 1 - instr 'xchg', ecx, Reg.new(r.val, 32) - lh = Reg.new(r.val, lh.sz) if lh.kind_of?(Reg) and lh.val == 1 - ll = Reg.new(r.val, ll.sz) if ll.kind_of?(Reg) and ll.val == 1 - llv = Reg.new(r.val, llv.sz) if llv.kind_of?(Reg) and llv.val == 1 - @state.used.delete r.val if not @state.used.include? 1 - inuse ecx - end - - labelh = new_label('shldh') - labeld = new_label('shldd') - instr 'test', ecx, Expression[0x20] - instr 'jnz', Expression[labelh] - instr opd, llv, lh, cl - instr op, ll, cl - instr 'jmp', Expression[labeld] - @source << Label.new(labelh) - instr op, llv, cl - instr 'mov', lh, llv - if op == 'sar' - instr 'sar', ll, Expression[31] - else - instr 'mov', ll, Expression[0] - end - @source << Label.new(labeld) - - instr 'xchg', ecx, Reg.new(r.val, 32) if r.val != 1 - unuse ecx - unuse r - end - when 'mul' - # high = (low1*high2) + (high1*low2) + (low1*low2).high - t1 = findreg(32) - t2 = findreg(32) - unuse t1, t2, r - instr 'mov', t1, ll - instr 'mov', t2, rl - instr 'imul', t1, rh - instr 'imul', t2, lh - instr 'add', t1, t2 - - raise # TODO push eax/edx, mul, pop - instr 'mov', eax, ll - if rl.kind_of? Expression - instr 'mov', t2, rl - instr 'mul', t2 - else - instr 'mul', rl - end - instr 'add', t1, edx - instr 'mov', lh, t1 - instr 'mov', ll, eax - - when 'div' - raise # TODO - when 'mod' - raise # TODO - end - end - - def c_cexpr(expr) - case expr.op - when :+, :-, :*, :/, :&, :|, :^, :%, :[], nil, :'.', :'->', - :>, :<, :<=, :>=, :==, :'!=', :'!' - # skip no-ops - c_cexpr(expr.lexpr) if expr.lexpr.kind_of? C::CExpression - c_cexpr(expr.rexpr) if expr.rexpr.kind_of? C::CExpression - else unuse c_cexpr_inner(expr) - end - end - - def c_block_exit(block) - @state.cache.delete_if { |k, v| - case v - when C::Variable; block.symbol.index v - when Address; block.symbol.index v.target - end - } - block.symbol.each { |s| - unuse @state.bound.delete(s) - } - end - - def c_decl(var) - if var.type.kind_of? C::Array and - var.type.length.kind_of? C::CExpression - reg = c_cexpr_inner(var.type.length) - unuse reg - instr 'sub', Reg.new(4, @cpusz), reg - # TODO - end - end - - def c_ifgoto(expr, target) - case o = expr.op - when :<, :>, :<=, :>=, :==, :'!=' - l = c_cexpr_inner(expr.lexpr) - r = c_cexpr_inner(expr.rexpr) - if l.kind_of? Expression - o = { :< => :>, :> => :<, :>= => :<=, :<= => :>= }[o] || o - l, r = r, l - end - r = make_volatile(r, expr.type) if r.kind_of? ModRM and l.kind_of? ModRM - unuse l, r - if expr.lexpr.type.integral? - if expr.lexpr.type.name == :__int64 and @cpusz != 64 - raise # TODO - end - instr 'cmp', l, r - elsif expr.lexpr.type.float? - raise # TODO - instr 'fcmpp', l, r - else raise 'bad comparison ' + expr.to_s - end - op = 'j' + getcc(o, expr.lexpr.type) - instr op, Expression[target] - when :'!' - r = c_cexpr_inner(expr.rexpr) - r = make_volatile(r, expr.rexpr.type) - unuse r - instr 'test', r, r - instr 'jz', Expression[target] - else - r = c_cexpr_inner(expr) - r = make_volatile(r, expr.type) - unuse r - instr 'test', r, r - instr 'jnz', Expression[target] - end - end - - def c_goto(target) - instr 'jmp', Expression[target] - end - - def c_label(name) - @state.cache.clear - @source << '' << Label.new(name) - end - - def c_return(expr) - return if not expr - @state.cache.delete_if { |r, v| r.kind_of? Reg and r.val == 0 and expr != v } - r = c_cexpr_inner(expr) - r = make_volatile(r, expr.type) - unuse r - case r - when Composite - if r.low.val == 2 - instr 'xchg', r.low, r.high - instr 'mov', Reg.new(0, 32), r.low if r.high.val != 0 - else - instr 'mov', Reg.new(2, 32), r.high if r.high.val != 2 - instr 'mov', Reg.new(0, 32), r.low if r.low.val != 0 - end - when Reg - instr 'mov', Reg.new(0, r.sz), r if r.val != 0 - when FpReg - instr 'fld', FpReg.new(r.val) if r.val and r.val != 0 - end - end - - def c_asm(stmt) - if stmt.output or stmt.input or stmt.clobber - raise # TODO (handle %%0 => eax, gas, etc) - else - raise if @state.func.initializer.symbol.keys.find { |sym| stmt.body =~ /\b#{Regexp.escape(sym)}\b/ } # gsub ebp+off ? - @source << stmt.body - end - end - - def c_init_state(func) - @state = State.new(func) - # ET_DYN trashes ebx too - # XXX hope we're not a Shellcode to be embedded in an ELF.. - @state.abi_flushregs_call << 3 if @exeformat and @exeformat.shortname == 'elf' - - c_reserve_stack(func.initializer) - off = @state.offset.values.max.to_i # where to store register args - off = 0 if off < 0 - - al = typesize[:ptr] - argoff = 2*al - fa = func.type.args.dup - if func.has_attribute('fastcall') - 2.times { - if a = fa.shift - off = c_reserve_stack_var(a, off) - @state.offset[a] = off - end - } - end - fa.each { |a| - if a.has_attribute_var('register') or a.type.has_attribute_var('register') - off = c_reserve_stack_var(a, off) - @state.offset[a] = off - next - end - @state.offset[a] = -argoff - argoff = (argoff + sizeof(a) + al - 1) / al * al - } - if not @state.offset.values.grep(::Integer).empty? - @state.saved_ebp = Reg.new(5, @cpusz) - @state.used << 5 - end - end - - def c_prolog - localspc = @state.offset.values.grep(::Integer).max - return if @state.func.has_attribute('naked') - if localspc - al = typesize[:ptr] - localspc = (localspc + al - 1) / al * al - ebp = @state.saved_ebp - esp = Reg.new(4, ebp.sz) - instr 'push', ebp - instr 'mov', ebp, esp - instr 'sub', esp, Expression[localspc] if localspc > 0 - - if @state.func.has_attribute('fastcall') - if a0 = @state.func.type.args[0] - instr 'mov', findvar(a0), Reg.new(1, 32) - end - if a1 = @state.func.type.args[1] - instr 'mov', findvar(a1), Reg.new(2, 32) - end - else - @state.func.type.args.each { |a| - if r = (a.has_attribute_var('register') or a.type.has_attribute_var('register')) - # XXX if r == ebp, then prepend_prolog mov [esp-off], ebp... - # XXX this would break when calling anyway (mov ebp, 42; ; call func) - instr 'mov', findvar(a), Reg.from_str(r) - end - } - end - end - @state.dirty -= @state.abi_trashregs # XXX ABI - @state.dirty.each { |reg| - instr 'push', Reg.new(reg, @cpusz) - } - end - - def c_epilog - return if @state.func.attributes.to_a.include? 'naked' - # TODO revert dynamic array alloc - @state.dirty.reverse_each { |reg| - instr 'pop', Reg.new(reg, @cpusz) - } - if ebp = @state.saved_ebp - instr 'mov', Reg.new(4, ebp.sz), ebp - instr 'pop', ebp - end - f = @state.func - if f.has_attribute('stdcall') or f.has_attribute('fastcall') - al = typesize[:ptr] - fa = f.type.args.dup - 2.times { fa.shift } if f.has_attribute('fastcall') - argsz = fa.inject(0) { |sum, a| - (a.has_attribute_var('register') or a.type.has_attribute_var('register')) ? sum : sum + (sizeof(a) + al - 1) / al * al - } - if argsz > 0 - instr 'ret', Expression[argsz] - else - instr 'ret' - end - else - instr 'ret' - end - end - - # adds the metasm_intern_geteip function, which returns its own address in eax (used for PIC addressing) - def c_program_epilog - if defined? @need_geteip_stub and @need_geteip_stub - return if new_label('metasm_intern_geteip') != 'metasm_intern_geteip' # already defined elsewhere - - eax = Reg.new(0, @cpusz) - label = new_label('geteip') - - @source << Label.new('metasm_intern_geteip') - instr 'call', Expression[label] - @source << Label.new(label) - instr 'pop', eax - instr 'add', eax, Expression['metasm_intern_geteip', :-, label] - instr 'ret' - end + # holds compiler state information for a function + # registers are saved as register number (see Ia32::Reg) + # TODO cache eflags ? or just z ? (may be defered to asm_optimize) + class State + # variable => offset from ebp (::Integer or CExpression) + attr_accessor :offset + # the current function + attr_accessor :func + # register => CExpression + attr_accessor :cache + # array of register values used in the function (to save/restore at prolog/epilog) + attr_accessor :dirty + # the array of register values currently not available + attr_accessor :used + # the array of args in use (reg/modrm/composite) the reg dependencies are in +used+ + attr_accessor :inuse + # variable => register for current scope (variable never on the stack) + # bound registers are also in +used+ + attr_accessor :bound + # list of reg values that are not kept across function call + attr_accessor :abi_flushregs_call + # list of regs we can trash without restoring them + attr_accessor :abi_trashregs + + # +used+ includes ebp if true + # nil if ebp is not reserved for stack variable addressing + # Reg if used + attr_accessor :saved_ebp + + def initialize(func) + @func = func + @offset = {} + @cache = {} + @dirty = [] + @used = [4] # esp is always in use + @inuse = [] + @bound = {} + @abi_flushregs_call = [0, 1, 2] # eax, ecx, edx (r8 etc ?) + @abi_trashregs = [0, 1, 2] + end + end + + # tracks 2 registers storing a value bigger than each + class Composite + attr_accessor :low, :high + def initialize(low, high) + @low, @high = low, high + end + def sz; 64 end + end + + # some address + class Address + attr_accessor :modrm, :target + def initialize(modrm, target=nil) + @modrm, @target = modrm, target + end + def sz; @modrm.adsz end + def to_s; "#" end + end + + + def initialize(*a) + super(*a) + @cpusz = @exeformat.cpu.size + @regnummax = (@cpusz == 64 ? 15 : 7) + end + + # shortcut to add an instruction to the source + def instr(name, *args) + # XXX parse_postfix ? + @source << Instruction.new(@exeformat.cpu, name, args) + end + + # returns an available register, tries to find one not in @state.cache + # do not use with sz==8 (aliasing ah=>esp) + # does not put it in @state.inuse + # TODO multipass for reg cache optimization + # TODO dynamic regval for later fixup (need a value to be in ecx for shl, etc) + def findreg(sz = @cpusz) + caching = @state.cache.keys.grep(Reg).map { |r| r.val } + if not regval = ([*0..@regnummax] - @state.used - caching).first || + ([*0..@regnummax] - @state.used).first + raise 'need more registers! (or a better compiler?)' + end + getreg(regval, sz) + end + + # returns a Reg from a regval, mark it as dirty, flush old cache dependencies + def getreg(regval, sz=@cpusz) + flushcachereg(regval) + @state.dirty |= [regval] + Reg.new(regval, sz) + end + + # remove the cache keys that depends on the register + def flushcachereg(regval) + @state.cache.delete_if { |e, val| + case e + when Reg; e.val == regval + when Address; e = e.modrm ; redo + when ModRM; e.b && (e.b.val == regval) or e.i && (e.i.val == regval) + when Composite; e.low.val == regval or e.high.val == regval + end + } + end + + # removes elements from @state.inuse, free @state.used if unreferenced + # must be the exact object present in inuse + def unuse(*vals) + vals.each { |val| + val = val.modrm if val.kind_of? Address + @state.inuse.delete val + } + # XXX cache exempt + exempt = @state.bound.values.map { |r| r.kind_of? Composite ? [r.low.val, r.high.val] : r.val }.flatten + exempt << 4 + exempt << 5 if @state.saved_ebp + @state.used.delete_if { |regval| + next if exempt.include? regval + not @state.inuse.find { |val| + case val + when Reg; val.val == regval + when ModRM; (val.b and val.b.val == regval) or (val.i and val.i.val == regval) + when Composite; val.low.val == regval or val.high.val == regval + else raise 'internal error - inuse ' + val.inspect + end + } + } + end + + # marks an arg as in use, returns the arg + def inuse(v) + case v + when Reg; @state.used |= [v.val] + when ModRM + @state.used |= [v.i.val] if v.i + @state.used |= [v.b.val] if v.b + when Composite; @state.used |= [v.low.val, v.high.val] + when Address; inuse v.modrm ; return v + else return v + end + @state.inuse |= [v] + v + end + + # returns a variable storage (ModRM for stack/global, Reg/Composite for register-bound) + def findvar(var) + if ret = @state.bound[var] + return ret + end + + if ret = @state.cache.index(var) + ret = ret.dup + inuse ret + return ret + end + + sz = 8*sizeof(var) rescue nil # extern char foo[]; + + case off = @state.offset[var] + when C::CExpression + # stack, dynamic address + # TODO + # no need to update state.cache here, never recursive + v = raise "find dynamic addr of #{var.name}" + when ::Integer + # stack + # TODO -fomit-frame-pointer ( => state.cache dependant on stack_offset... ) + v = ModRM.new(@cpusz, sz, nil, nil, @state.saved_ebp, Expression[-off]) + when nil + # global + if @exeformat.cpu.generate_PIC + if not reg = @state.cache.index('metasm_intern_geteip') + @need_geteip_stub = true + if @state.used.include? 6 # esi + reg = findreg + else + reg = getreg 6 + end + if reg.val != 0 + if @state.used.include? 0 + eax = Reg.new(0, @cpusz) + instr 'mov', reg, eax + else + eax = getreg 0 + end + end + + instr 'call', Expression['metasm_intern_geteip'] + + if reg.val != 0 + if @state.used.include? 0 + instr 'xchg', eax, reg + else + instr 'mov', reg, eax + end + end + + @state.cache[reg] = 'metasm_intern_geteip' + end + v = ModRM.new(@cpusz, sz, nil, nil, reg, Expression[var.name, :-, 'metasm_intern_geteip']) + else + v = ModRM.new(@cpusz, sz, nil, nil, nil, Expression[var.name]) + end + end + + case var.type + when C::Array; inuse Address.new(v) + else inuse v + end + end + + # resolves the Address to Reg/Expr (may encode an 'lea') + def resolve_address(e) + r = e.modrm + unuse e + if r.imm and not r.b and not r.i + reg = r.imm + elsif not r.imm and ((not r.b and r.s == 1) or not r.i) + reg = r.b || r.i + elsif reg = @state.cache.index(e) + reg = reg.dup + else + reg = findreg + r.sz = reg.sz + instr 'lea', reg, r + end + inuse reg + @state.cache[reg] = e + reg + end + + # copies the arg e to a volatile location (register/composite) if it is not already + # unuses the old storage + # may return a register bigger than the type size (eg __int8 are stored in full reg size) + # use rsz only to force 32bits-return on a 16bits cpu + def make_volatile(e, type, rsz=@cpusz) + if e.kind_of? ModRM or @state.bound.index(e) + if type.integral? or type.pointer? + oldval = @state.cache[e] + if type.integral? and type.name == :__int64 and @cpusz != 64 + e2l = inuse findreg(32) + unuse e + e2h = inuse findreg(32) + el, eh = get_composite_parts e + instr 'mov', e2l, el + instr 'mov', e2h, eh + e2 = inuse Composite.new(e2l, e2h) + unuse e2l, e2h + else + unuse e + n = type.integral? ? type.name : :ptr + if (sz = typesize[n]*8) < @cpusz or sz < rsz or e.sz < rsz + e2 = inuse findreg(rsz) + op = ((type.specifier == :unsigned) ? 'movzx' : 'movsx') + op = 'mov' if e.sz == e2.sz + else + e2 = inuse findreg(sz) + op = 'mov' + end + instr op, e2, e + end + @state.cache[e2] = oldval if oldval and e.kind_of? ModRM + e2 + elsif type.float? + raise 'bad float static' + e.inspect if not e.kind_of? ModRM + unuse e + instr 'fld', e + FpReg.new nil + else raise + end + elsif e.kind_of? Address + make_volatile resolve_address(e), type, rsz + elsif e.kind_of? Expression + if type.integral? or type.pointer? + if type.integral? and type.name == :__int64 and @cpusz != 64 + e2 = inuse Composite.new(inuse(findreg(32)), findreg(32)) + instr 'mov', e2.low, Expression[e, :&, 0xffff_ffff] + instr 'mov', e2.high, Expression[e, :>>, 32] + else + e2 = inuse findreg + instr 'mov', e2, e + end + e2 + elsif type.float? + case e.reduce + when 0; instr 'fldz' + when 1; instr 'fld1' + else + esp = Reg.new(4, @cpusz) + instr 'push.i32', Expression[e, :>>, 32] + instr 'push.i32', Expression[e, :&, 0xffff_ffff] + instr 'fild', ModRM.new(@cpusz, 64, nil, nil, esp, nil) + instr 'add', esp, 8 + end + FpReg.new nil + end + else + e + end + end + + # returns two args corresponding to the low and high 32bits of the 64bits composite arg + def get_composite_parts(e) + case e + when ModRM + el = e.dup + el.sz = 32 + eh = el.dup + eh.imm = Expression[eh.imm, :+, 4] + when Expression + el = Expression[e, :&, 0xffff_ffff] + eh = Expression[e, :>>, 32] + when Composite + el = e.low + eh = e.high + when Reg + el = e + eh = findreg + else raise + end + [el, eh] + end + + # returns the instruction suffix for a comparison operator + def getcc(op, type) + case op + when :'=='; 'z' + when :'!='; 'nz' + when :'<' ; 'b' + when :'>' ; 'a' + when :'<='; 'be' + when :'>='; 'ae' + else raise "bad comparison op #{op}" + end.tr((type.specifier == :unsigned ? '' : 'ab'), 'gl') + end + + # compiles a c expression, returns an Ia32 instruction argument + def c_cexpr_inner(expr) + case expr + when ::Integer; Expression[expr] + when C::Variable; findvar(expr) + when C::CExpression + if not expr.lexpr or not expr.rexpr + inuse c_cexpr_inner_nol(expr) + else + inuse c_cexpr_inner_l(expr) + end + when C::Label; findvar(C::Variable.new(expr.name, C::Array.new(C::BaseType.new(:void), 1))) + else puts "ia32/c_ce_i: unsupported #{expr}" if $VERBOSE + end + end + + # compile a CExpression with no lexpr + def c_cexpr_inner_nol(expr) + case expr.op + when nil + r = c_cexpr_inner(expr.rexpr) + if (expr.rexpr.kind_of? C::CExpression or expr.rexpr.kind_of? C::Variable) and + expr.type.kind_of? C::BaseType and expr.rexpr.type.kind_of? C::BaseType + r = c_cexpr_inner_cast(expr, r) + end + r + when :+ + c_cexpr_inner(expr.rexpr) + when :- + r = c_cexpr_inner(expr.rexpr) + r = make_volatile(r, expr.type) + if expr.type.integral? or expr.type.pointer? + if r.kind_of? Composite + instr 'neg', r.low + instr 'adc', r.high, Expression[0] + instr 'neg', r.high + else + instr 'neg', r + end + elsif expr.type.float? + instr 'fchs' + else raise + end + r + when :'++', :'--' + r = c_cexpr_inner(expr.rexpr) + inc = true if expr.op == :'++' + if expr.type.integral? or expr.type.pointer? + if expr.type.integral? and expr.type.name == :__int64 and @cpusz != 64 + rl, rh = get_composite_parts r + instr 'add', rl, Expression[inc ? 1 : -1] + instr 'adc', rh, Expression[inc ? 0 : -1] + else + op = (inc ? 'inc' : 'dec') + instr op, r + end + elsif expr.type.float? + raise 'bad lvalue' if not r.kind_of? ModRM + instr 'fld1' + op = (inc ? 'faddp' : 'fsubp') + instr op, r + instr 'fstp', r + end + r + when :& + raise 'bad precompiler ' + expr.to_s if not expr.rexpr.kind_of? C::Variable + @state.cache.each { |r_, c| + return inuse(r_) if c.kind_of? Address and c.target == expr.rexpr + } + r = c_cexpr_inner(expr.rexpr) + raise 'bad lvalue' if not r.kind_of? ModRM + unuse r + r = Address.new(r) + inuse r + r.target = expr.rexpr + r + when :* + expr.rexpr.type.name = :ptr if expr.rexpr.kind_of? C::CExpression and expr.rexpr.type.kind_of? C::BaseType and typesize[expr.rexpr.type.name] == typesize[:ptr] # hint to use Address + e = c_cexpr_inner(expr.rexpr) + sz = 8*sizeof(expr) + case e + when Address + unuse e + e = e.modrm.dup + e.sz = sz + inuse e + when ModRM; e = make_volatile(e, expr.rexpr.type) if not expr.rexpr.type.float? + end + case e + when Reg; unuse e ; e = inuse ModRM.new(@cpusz, sz, nil, nil, e, nil) + when Expression; e = inuse ModRM.new(@cpusz, sz, nil, nil, nil, e) + end + e + when :'!' + r = c_cexpr_inner(expr.rexpr) + r = make_volatile(r, expr.rexpr.type) + if expr.rexpr.type.integral? or expr.type.pointer? + if expr.type.integral? and expr.rexpr.type.name == :__int64 and @cpusz != 64 + raise # TODO + end + r = make_volatile(r, expr.rexpr.type) + instr 'test', r, r + elsif expr.rexpr.type.float? + if @exeformat.cpu.opcode_list_byname['fucomip'] + instr 'fldz' + instr 'fucomip' + else + raise # TODO + end + r = inuse findreg + else raise 'bad comparison ' + expr.to_s + end + if @exeformat.cpu.opcode_list_byname['setz'] + instr 'setz', Reg.new(r.val, 8) + instr 'and', r, Expression[1] + else + instr 'mov', r, Expression[1] + label = new_label('setcc') + instr 'jz', Expression[label] + instr 'mov', r, Expression[0] + @source << Label.new(label) + end + r + else raise 'mmh ? ' + expr.to_s + end + end + + # compile a cast (BaseType to BaseType) + def c_cexpr_inner_cast(expr, r) + esp = Reg.new(4, @cpusz) + if expr.type.float? and expr.rexpr.type.float? + if expr.type.name != expr.rexpr.type.name and r.kind_of? ModRM + instr 'fld', r + unuse r + r = FpReg.new nil + end + elsif expr.type.float? and expr.rexpr.type.integral? + r = resolve_address r if r.kind_of? Address + return make_volatile(r, expr.type) if r.kind_of? Expression + unuse r + if expr.rexpr.type.specifier == :unsigned and r.sz != 64 + instr 'push.i32', Expression[0] + end + case r + when ModRM + case expr.rexpr.type.name + when :__int8, :__int16 + r = make_volatile(r, expr.rexpr.type, 32) + instr 'push', r + else + if expr.rexpr.type.specifier != :unsigned + instr 'fild', r + return FpReg.new(nil) + end + if r.sz == 64 + get_composite_parts(r).reverse_each { |rp| instr 'push', rp } + else + instr 'push', r + end + end + when Composite + instr 'push', r.high + instr 'push', r.low + when Reg + if r.sz == 16 + op = ((expr.rexpr.type.specifier == :unsigned) ? 'movzx' : 'movsx') + rr = r.dup + rr.sz = 32 + instr op, rr, r + r = rr + end + instr 'push', r + end + m = ModRM.new(@cpusz, r.sz, nil, nil, esp, nil) + instr 'fild', m + instr 'add', esp, (expr.rexpr.type.specifier == :unsigned ? 8 : Expression[r.sz/8]) + if expr.rexpr.type.specifier == :unsigned and r.sz == 64 + label = new_label('unsign_float') + if m.sz == 64 and @cpusz < 64 + m = get_composite_parts(m)[1] + end + m2 = m + m2 = make_volatile(m, expr.rexpr.type) if m.kind_of? ModRM + m2 = get_composite_parts(m2)[0] if m2.kind_of? Composite + instr 'test', m2, m2 + instr 'jns', Expression[label] + instr 'push.i32', Expression[0x7fff_ffff] + instr 'push.i32', Expression[0xffff_ffff] + instr 'fild', m + instr 'add', esp, 8 + instr 'faddp', FpReg.new(1) + instr 'fld1' + instr 'faddp', FpReg.new(1) + @source << Label.new(label) + end + r = FpReg.new nil + elsif expr.type.integral? and expr.rexpr.type.float? + r = make_volatile(r, expr.rexpr.type) # => ST(0) + + if expr.type.name == :__int64 + instr 'sub', esp, Expression[8] + instr 'fistp', ModRM.new(@cpusz, 64, nil, nil, esp, nil) + if @cpusz == 64 + r = findreg + instr 'pop', r + else + r = Composite.new(findreg(32), findreg(32)) + instr 'pop', r.low + instr 'pop', r.high + end + else + instr 'sub', esp, Expression[4] + instr 'fistp', ModRM.new(@cpusz, 32, nil, nil, esp, nil) + r = findreg(32) + instr 'pop', r + tto = typesize[expr.type.name]*8 + instr 'and', r, Expression[(1< tto + end + inuse r + elsif (expr.type.integral? or expr.type.pointer?) and (expr.rexpr.type.integral? or expr.rexpr.type.pointer?) + tto = typesize[expr.type.integral? ? expr.type.name : :ptr]*8 + tfrom = typesize[expr.rexpr.type.integral? ? expr.rexpr.type.name : :ptr]*8 + r = resolve_address r if r.kind_of? Address + if r.kind_of? Expression + r = make_volatile r, expr.type + elsif tfrom > tto + if tfrom == 64 and r.kind_of? Composite + unuse r + r = r.low + inuse r + end + case r + when ModRM + unuse r + r = r.dup + r.sz = tto + inuse r + when Reg + instr 'and', r, Expression[(1< tto + end + elsif tto > tfrom + if tto == 64 and @cpusz != 64 + high = findreg(32) + unuse r + if not r.kind_of? Reg or r.sz != 32 + inuse high + low = findreg(32) + unuse high + op = (r.sz == 32 ? 'mov' : (expr.rexpr.type.specifier == :unsigned ? 'movzx' : 'movsx')) + instr op, low, r + r = low + end + r = inuse Composite.new(r, high) + if expr.type.specifier == :unsigned + instr 'xor', r.high, r.high + else + instr 'mov', r.high, r.low + instr 'sar', r.high, Expression[31] + end + elsif not r.kind_of? Reg or r.sz != @cpusz + unuse r + reg = inuse findreg + op = (r.sz == reg.sz ? 'mov' : (expr.rexpr.type.specifier == :unsigned ? 'movzx' : 'movsx')) + instr op, reg, r + r = reg + end + end + end + r + end + + # compiles a CExpression, not arithmetic (assignment, comparison etc) + def c_cexpr_inner_l(expr) + case expr.op + when :funcall + c_cexpr_inner_funcall(expr) + when :'+=', :'-=', :'*=', :'/=', :'%=', :'^=', :'&=', :'|=', :'<<=', :'>>=' + l = c_cexpr_inner(expr.lexpr) + raise 'bad lvalue' if not l.kind_of? ModRM and not @state.bound.index(l) + instr 'fld', l if expr.type.float? + r = c_cexpr_inner(expr.rexpr) + op = expr.op.to_s.chop.to_sym + c_cexpr_inner_arith(l, op, r, expr.type) + instr 'fstp', l if expr.type.float? + l + when :'+', :'-', :'*', :'/', :'%', :'^', :'&', :'|', :'<<', :'>>' + # both sides are already cast to the same type by the precompiler + # XXX expr.type.pointer? + if expr.type.integral? and expr.type.name == :ptr and expr.lexpr.type.kind_of? C::BaseType and + typesize[expr.lexpr.type.name] == typesize[:ptr] + expr.lexpr.type.name = :ptr + end + l = c_cexpr_inner(expr.lexpr) + l = make_volatile(l, expr.type) if not l.kind_of? Address + if expr.type.integral? and expr.type.name == :ptr and l.kind_of? Reg + unuse l + l = Address.new ModRM.new(l.sz, @cpusz, nil, nil, l, nil) + inuse l + end + if l.kind_of? Address and expr.type.integral? + l.modrm.imm = nil if l.modrm.imm and not l.modrm.imm.op and l.modrm.imm.rexpr == 0 + if l.modrm.b and l.modrm.i and l.modrm.s == 1 and l.modrm.b.val == l.modrm.i.val + unuse l.modrm.b if l.modrm.b != l.modrm.i + l.modrm.b = nil + l.modrm.s = 2 + end + case expr.op + when :+ + rexpr = expr.rexpr + rexpr = rexpr.rexpr while rexpr.kind_of? C::CExpression and not rexpr.op and rexpr.type.integral? and + rexpr.rexpr.kind_of? C::CExpression and rexpr.rexpr.type.integral? and + typesize[rexpr.type.name] == typesize[rexpr.rexpr.type.name] + if rexpr.kind_of? C::CExpression and rexpr.op == :* and rexpr.lexpr + r1 = c_cexpr_inner(rexpr.lexpr) + r2 = c_cexpr_inner(rexpr.rexpr) + r1, r2 = r2, r1 if r1.kind_of? Expression + if r2.kind_of? Expression and [1, 2, 4, 8].include?(rr2 = r2.reduce) + case r1 + when ModRM, Address, Reg + r1 = make_volatile(r1, rexpr.type) if not r1.kind_of? Reg + if not l.modrm.i or (l.modrm.i.val == r1.val and l.modrm.s == 1 and rr2 == 1) + unuse l, r1, r2 + l = Address.new(l.modrm.dup) + inuse l + l.modrm.i = r1 + l.modrm.s = (l.modrm.s || 0) + rr2 + return l + end + end + end + r = make_volatile(r1, rexpr.type) + c_cexpr_inner_arith(r, :*, r2, rexpr.type) + else + r = c_cexpr_inner(rexpr) + end + r = resolve_address r if r.kind_of? Address + r = make_volatile(r, rexpr.type) if r.kind_of? ModRM + case r + when Reg + unuse l + l = Address.new(l.modrm.dup) + inuse l + if l.modrm.b + if not l.modrm.i or (l.modrm.i.val == r.val and l.modrm.s == 1) + l.modrm.i = r + l.modrm.s = (l.modrm.s || 0) + 1 + unuse r + return l + end + else + l.modrm.b = r + unuse r + return l + end + when Expression + unuse l, r + l = Address.new(l.modrm.dup) + inuse l + l.modrm.imm = Expression[l.modrm.imm, :+, r] + return l + end + when :- + r = c_cexpr_inner(expr.rexpr) + r = resolve_address r if r.kind_of? Address + if r.kind_of? Expression + unuse l, r + l = Address.new(l.modrm.dup) + inuse l + l.modrm.imm = Expression[l.modrm.imm, :-, r] + return l + end + when :* + r = c_cexpr_inner(expr.rexpr) + if r.kind_of? Expression and [1, 2, 4, 8].includre?(rr = r.reduce) + if l.modrm.b and not l.modrm.i + if rr != 1 + l.modrm.i = l.modrm.b + l.modrm.s = rr + l.modrm.imm = Expression[l.modrm.imm, :*, rr] if l.modrm.imm + end + unuse r + return l + elsif l.modrm.i and not l.modrm.b and l.modrm.s*rr <= 8 + l.modrm.s *= rr + l.modrm.imm = Expression[l.modrm.imm, :*, rr] if l.modrm.imm and rr != 1 + unuse r + return l + end + end + end + end + l = make_volatile(l, expr.type) if l.kind_of? Address + r ||= c_cexpr_inner(expr.rexpr) + c_cexpr_inner_arith(l, expr.op, r, expr.type) + l + when :'=' + r = c_cexpr_inner(expr.rexpr) + l = c_cexpr_inner(expr.lexpr) + raise 'bad lvalue ' + l.inspect if not l.kind_of? ModRM and not @state.bound.index(l) + r = resolve_address r if r.kind_of? Address + r = make_volatile(r, expr.type) if l.kind_of? ModRM and r.kind_of? ModRM + unuse r + if expr.type.integral? or expr.type.pointer? + if expr.type.integral? and expr.type.name == :__int64 and @cpusz != 64 + ll, lh = get_composite_parts l + rl, rh = get_composite_parts r + instr 'mov', ll, rl + instr 'mov', lh, rh + elsif r.kind_of? Address + m = r.modrm.dup + m.sz = l.sz + instr 'lea', l, m + else + if l.kind_of? ModRM and r.kind_of? Reg and l.sz != r.sz + raise if l.sz > r.sz + if l.sz == 8 and r.val >= 4 + reg = ([0, 1, 2, 3] - @state.used).first + if not reg + eax = Reg.new(0, r.sz) + instr 'push', eax + instr 'mov', eax, r + instr 'mov', l, Reg.new(eax.val, 8) + instr 'pop', eax + else + flushecachereg(reg) + instr 'mov', Reg.new(reg, r.sz), r + instr 'mov', l, Reg.new(reg, 8) + end + else + instr 'mov', l, Reg.new(r.val, l.sz) + end + else + instr 'mov', l, r + end + end + elsif expr.type.float? + r = make_volatile(r, expr.type) if r.kind_of? Expression + instr 'fstp', l + end + l + when :>, :<, :>=, :<=, :==, :'!=' + l = c_cexpr_inner(expr.lexpr) + l = make_volatile(l, expr.type) + r = c_cexpr_inner(expr.rexpr) + unuse r + if expr.lexpr.type.integral? or expr.lexpr.type.pointer? + if expr.lexpr.type.integral? and expr.lexpr.type.name == :__int64 and @cpusz != 64 + raise # TODO + end + instr 'cmp', l, r + elsif expr.lexpr.type.float? + raise # TODO + instr 'fucompp', l, r + l = inuse findreg + else raise 'bad comparison ' + expr.to_s + end + opcc = getcc(expr.op, expr.type) + if @exeformat.cpu.opcode_list_byname['set'+opcc] + instr 'set'+opcc, Reg.new(l.val, 8) + instr 'and', l, 1 + else + instr 'mov', l, Expression[1] + label = new_label('setcc') + instr 'j'+opcc, Expression[label] + instr 'mov', l, Expression[0] + @source << Label.new(label) + end + l + else + raise 'unhandled cexpr ' + expr.to_s + end + end + + # compiles a subroutine call + def c_cexpr_inner_funcall(expr) + # check if an obj has an attribute - check on obj and its type + hasattr = lambda { |o, a| (o.kind_of?(C::Variable) and o.has_attribute(a)) or o.type.has_attribute(a) } + hasattrv = lambda { |o, a| (o.kind_of?(C::Variable) and o.has_attribute_var(a)) or o.type.has_attribute_var(a) } + + fargs = expr.lexpr.type.pointer? ? expr.lexpr.type.pointed.args : expr.lexpr.type.args + + backup = [] + if hasattr[expr.lexpr, 'fastcall'] + regargs = [1, 2][0, expr.rexpr.length] + regargs += [nil] * (expr.rexpr.length-2) if expr.rexpr.length > 2 + else + regargs = fargs.map { |a| hasattrv[a, 'register'] }.map { |a| Reg.from_str(a).val if a } + end + @state.abi_flushregs_call.each { |reg| + next if reg == 4 + next if reg == 5 and @state.saved_ebp + if not @state.used.include? reg + if not @state.abi_trashregs.include? reg + # XXX should exclude other code compiled by us (we wont trash reg) + @state.dirty |= [reg] + end + next + end + backup << reg + unuse reg + instr 'push', Reg.new(reg, [@cpusz, 32].max) + } + regargs_list = regargs.compact + regargs_list.each { |reg| + next if backup.include? reg + @state.dirty |= [reg] + next if not @state.used.include? reg + backup << reg + instr 'push', Reg.new(reg, [@cpusz, 32].max) + } + expr.rexpr.reverse_each { |arg| + a = c_cexpr_inner(arg) + a = resolve_address a if a.kind_of? Address + unuse a + if r = regargs.pop + inuse r + instr 'mov', Reg.new(r, 32), a + next + end + case arg.type + when C::Pointer + instr 'push', a + when C::BaseType + case t = arg.type.name + when :__int8 + a = make_volatile(a, arg.type) if a.kind_of? ModRM + unuse a + instr 'push', a + when :__int16 + # XXX __int8 unuse, why not here + if @cpusz != 16 and a.kind_of? Reg + instr 'push', Reg.new(a.val, @cpusz) + else + a = make_volatile(a, arg.type) + unuse a + instr 'push', a + end + when :__int32 + instr 'push', a + when :__int64 + case a + when Composite + instr 'push', a.high + instr 'push', a.low + when Reg + instr 'push', a + when ModRM + if @cpusz == 64 + instr 'push', a + else + ml, mh = get_composite_parts a + instr 'push', mh + instr 'push', ml + end + when Expression + instr 'push.i32', Expression[a, :>>, 32] + instr 'push.i32', Expression[a, :&, 0xffff_ffff] + end + when :float, :double, :longdouble + esp = Reg.new(4, @cpusz) + case a + when Expression + # assume expr is integral + a = load_fp_imm(a) + unuse a + when ModRM + instr 'fld', a + end + instr 'sub', esp, typesize[t] + instr 'fstp', ModRM.new(@cpusz, (t == :longdouble ? 80 : (t == :double ? 64 : 32)), nil, nil, esp, nil) + end + when C::Union + raise 'want a modrm ! ' + a.inspect if not a.kind_of? ModRM + al = typesize[:ptr] + argsz = (sizeof(arg) + al - 1) / al * al + while argsz > 0 + argsz -= al + m = a.dup + m.sz = 8*al + m.imm = Expression[m.imm, :+, argsz] + instr 'push', m + end + end + } + if expr.lexpr.kind_of? C::Variable and expr.lexpr.type.kind_of? C::Function + instr 'call', Expression[expr.lexpr.name] + if not hasattr[expr.lexpr, 'stdcall'] and not hasattr[expr.lexpr, 'fastcall'] + al = typesize[:ptr] + argsz = expr.rexpr.zip(fargs).inject(0) { |sum, (a, af)| + af && hasattrv[af, 'register'] ? sum : sum + (sizeof(a) + al - 1) / al * al + } + instr 'add', Reg.new(4, @cpusz), Expression[argsz] if argsz > 0 + end + else + ptr = c_cexpr_inner(expr.lexpr) + unuse ptr + if ptr.kind_of? Address + if ptr.target.kind_of? C::Variable and not @state.offset[ptr.target] + # call an existing global function, maybe after casting to another ABI + ptr = Expression[ptr.target.name] + else + ptr = make_volatile(ptr, expr.lexpr.type) + end + end + instr 'call', ptr + f = expr.lexpr + f = f.rexpr while f.kind_of? C::CExpression and not f.op and f.rexpr.kind_of? C::Typed and f.type == f.rexpr.type + if not hasattr[f, 'stdcall'] and not hasattr[f, 'fastcall'] + al = typesize[:ptr] + argsz = expr.rexpr.zip(fargs).inject(0) { |sum, (a, af)| + af && hasattrv[af, 'register'] ? sum : sum + (sizeof(a) + al - 1) / al * al + } + instr 'add', Reg.new(4, @cpusz), Expression[argsz] if argsz > 0 + end + end + @state.abi_flushregs_call.each { |reg| flushcachereg reg } + if expr.type.float? + retreg = FpReg.new(nil) + elsif not expr.type.kind_of? C::BaseType or expr.type.name != :void + if @state.used.include? 0 + retreg = inuse findreg + else + retreg = inuse getreg(0) + end + if expr.type.integral? and expr.type.name == :__int64 and @cpusz != 64 + retreg.sz = 32 + if @state.used.include? 2 + retreg = inuse Composite.new(retreg, findreg(32)) + else + retreg = inuse Composite.new(retreg, getreg(2, 32)) + end + unuse retreg.low + end + end + regargs_list.each { |reg| unuse reg } + backup.reverse_each { |reg| + sz = [@cpusz, 32].max + if retreg.kind_of? Composite and reg == 0 + # XXX wtf ? and what if retreg.low.val == 2 and it was saved too.. + instr 'pop', Reg.new(retreg.low.val, sz) + instr 'xchg', Reg.new(reg, sz), Reg.new(retreg.low.val, sz) + elsif retreg.kind_of? Composite and reg == 2 + # ..boom ! + instr 'pop', Reg.new(retreg.high.val, sz) + instr 'xchg', Reg.new(reg, sz), Reg.new(retreg.high.val, sz) + elsif retreg.kind_of? Reg and reg == 0 + instr 'pop', Reg.new(retreg.val, sz) + instr 'xchg', Reg.new(reg, sz), Reg.new(retreg.val, sz) + else + instr 'pop', Reg.new(reg, sz) + end + inuse reg + } + retreg + end + + # compiles/optimizes arithmetic operations + def c_cexpr_inner_arith(l, op, r, type) + # optimizes *2 -> <<1 + if r.kind_of? Expression and (rr = r.reduce).kind_of? ::Integer + if type.integral? + log2 = lambda { |v| + # TODO lol + i = 0 + i += 1 while (1 << i) < v + i if (1 << i) == v + } + if (lr = log2[rr]).kind_of? ::Integer + case op + when :*; return c_cexpr_inner_arith(l, :<<, Expression[lr], type) + when :/; return c_cexpr_inner_arith(l, :>>, Expression[lr], type) + when :%; return c_cexpr_inner_arith(l, :&, Expression[rr-1], type) + end + else + # TODO /r => *(r^(-1)), *3 => stuff with magic constants.. + end + elsif type.float? + case op + when :<<; return c_cexpr_inner_arith(l, :*, Expression[1<>; return c_cexpr_inner_arith(l, :/, Expression[1<>; type.specifier == :unsigned ? 'shr' : 'sar' + when :<<; 'shl' + when :*; 'mul' + when :/; 'div' + when :%; 'mod' + end + + case op + when 'add', 'sub', 'and', 'or', 'xor' + r = make_volatile(r, type) if l.kind_of? ModRM and r.kind_of? ModRM + unuse r + r = Reg.new(r.val, l.sz) if r.kind_of?(Reg) and l.kind_of?(ModRM) and l.sz and l.sz != r.sz # add byte ptr [eax], bl + instr op, l, r + when 'shr', 'sar', 'shl' + if r.kind_of? Expression + instr op, l, r + else + # XXX bouh + r = make_volatile(r, C::BaseType.new(:__int8, :unsigned)) + unuse r + if r.val != 1 + ecx = Reg.new(1, 32) + instr 'xchg', ecx, Reg.new(r.val, 32) + l = Reg.new(r.val, l.sz) if l.kind_of? Reg and l.val == 1 + @state.used.delete r.val if not @state.used.include? 1 + inuse ecx + end + instr op, l, Reg.new(1, 8) + instr 'xchg', ecx, Reg.new(r.val, 32) if r.val != 1 + end + when 'mul' + if l.kind_of? ModRM + if r.kind_of? Expression + ll = findreg + instr 'imul', ll, l, r + else + ll = make_volatile(l, type) + unuse ll + instr 'imul', ll, r + end + instr 'mov', l, ll + else + instr 'imul', l, r + end + unuse r + when 'div', 'mod' + lv = l.val if l.kind_of? Reg + eax = Reg.from_str 'eax' + edx = Reg.from_str 'edx' + if @state.used.include? eax.val and lv != eax.val + instr 'push', eax + saved_eax = true + end + if @state.used.include? edx.val and lv != edx.val + instr 'push', edx + saved_edx = true + end + + instr 'mov', eax, l if lv != eax.val + + if r.kind_of? Expression + instr 'push', r + esp = Reg.from_str 'esp' + r = ModRM.new(@cpusz, 32, nil, nil, esp, nil) + need_pop = true + end + + if type.specifier == :unsigned + instr 'mov', edx, Expression[0] + instr 'div', r + else + instr 'cdq' + instr 'idiv', r + end + unuse r + + instr 'add', esp, 4 if need_pop + + if op == 'div' + instr 'mov', l, eax if lv != eax.val + else + instr 'mov', l, edx if lv != edx.val + end + + instr 'pop', edx if saved_edx + instr 'pop', eax if saved_eax + end + end + + # compile an integral arithmetic 64-bits expression on a non-64 cpu + def c_cexpr_inner_arith_int64compose(l, op, r, type) + op = case op + when :+; 'add' + when :-; 'sub' + when :&; 'and' + when :|; 'or' + when :^; 'xor' + when :>>; type.specifier == :unsigned ? 'shr' : 'sar' + when :<<; 'shl' + when :*; 'mul' + when :/; 'div' + when :%; 'mod' + end + + ll, lh = get_composite_parts l + # 1ULL << 2 -> 2 is not ULL + r = make_volatile(r, C::BaseType.new("__int#{r.sz}".to_sym)) if l.kind_of? ModRM and r.kind_of? ModRM + rl, rh = get_composite_parts(r) if not r.kind_of? Reg + + case op + when 'add', 'sub', 'and', 'or', 'xor' + unuse r + instr op, ll, rl + op = {'add' => 'adc', 'sub' => 'sbb'}[op] || op + instr op, lh, rh unless (op == 'or' or op == 'xor') and rh.kind_of?(Expression) and rh.reduce == 0 + when 'shl', 'shr', 'sar' + rlc = r.reduce if r.kind_of? Expression + opd = { 'shl' => 'shld', 'shr' => 'shrd', 'sar' => 'shrd' }[op] + + ll, lh = lh, ll if op != 'shl' # OMGHAX + llv = ll + if llv.kind_of? ModRM + llv = make_volatile(llv, C::BaseType.new(:__int32)) + inuse ll + end + + if rlc.kind_of? Integer + case rlc + when 0 + when 1..31 + instr opd, llv, lh, Expression[rlc] + instr op, ll, Expression[rlc] + when 32..63 + instr 'mov', lh, llv + if op == 'sar' + instr 'sar', ll, Expression[31] + else + instr 'mov', ll, Expression[0] + end + instr op, lh, Expression[rlc-32] if rlc != 32 + else + if op == 'sar' + instr 'sar', ll, Expression[31] + instr 'mov', lh, llv + else + instr 'mov', ll, Expression[0] + instr 'mov', lh, Expression[0] + end + end + else + r = make_volatile(r, C::BaseType.new(:__int8, :unsigned)) + r = r.low if r.kind_of? Composite + rl ||= r + + cl = Reg.new(1, 8) + ecx = Reg.new(1, 32) + if r.val != 1 + instr 'xchg', ecx, Reg.new(r.val, 32) + lh = Reg.new(r.val, lh.sz) if lh.kind_of?(Reg) and lh.val == 1 + ll = Reg.new(r.val, ll.sz) if ll.kind_of?(Reg) and ll.val == 1 + llv = Reg.new(r.val, llv.sz) if llv.kind_of?(Reg) and llv.val == 1 + @state.used.delete r.val if not @state.used.include? 1 + inuse ecx + end + + labelh = new_label('shldh') + labeld = new_label('shldd') + instr 'test', ecx, Expression[0x20] + instr 'jnz', Expression[labelh] + instr opd, llv, lh, cl + instr op, ll, cl + instr 'jmp', Expression[labeld] + @source << Label.new(labelh) + instr op, llv, cl + instr 'mov', lh, llv + if op == 'sar' + instr 'sar', ll, Expression[31] + else + instr 'mov', ll, Expression[0] + end + @source << Label.new(labeld) + + instr 'xchg', ecx, Reg.new(r.val, 32) if r.val != 1 + unuse ecx + unuse r + end + when 'mul' + # high = (low1*high2) + (high1*low2) + (low1*low2).high + t1 = findreg(32) + t2 = findreg(32) + unuse t1, t2, r + instr 'mov', t1, ll + instr 'mov', t2, rl + instr 'imul', t1, rh + instr 'imul', t2, lh + instr 'add', t1, t2 + + raise # TODO push eax/edx, mul, pop + instr 'mov', eax, ll + if rl.kind_of? Expression + instr 'mov', t2, rl + instr 'mul', t2 + else + instr 'mul', rl + end + instr 'add', t1, edx + instr 'mov', lh, t1 + instr 'mov', ll, eax + + when 'div' + raise # TODO + when 'mod' + raise # TODO + end + end + + def c_cexpr(expr) + case expr.op + when :+, :-, :*, :/, :&, :|, :^, :%, :[], nil, :'.', :'->', + :>, :<, :<=, :>=, :==, :'!=', :'!' + # skip no-ops + c_cexpr(expr.lexpr) if expr.lexpr.kind_of? C::CExpression + c_cexpr(expr.rexpr) if expr.rexpr.kind_of? C::CExpression + else unuse c_cexpr_inner(expr) + end + end + + def c_block_exit(block) + @state.cache.delete_if { |k, v| + case v + when C::Variable; block.symbol.index v + when Address; block.symbol.index v.target + end + } + block.symbol.each { |s| + unuse @state.bound.delete(s) + } + end + + def c_decl(var) + if var.type.kind_of? C::Array and + var.type.length.kind_of? C::CExpression + reg = c_cexpr_inner(var.type.length) + unuse reg + instr 'sub', Reg.new(4, @cpusz), reg + # TODO + end + end + + def c_ifgoto(expr, target) + case o = expr.op + when :<, :>, :<=, :>=, :==, :'!=' + l = c_cexpr_inner(expr.lexpr) + r = c_cexpr_inner(expr.rexpr) + if l.kind_of? Expression + o = { :< => :>, :> => :<, :>= => :<=, :<= => :>= }[o] || o + l, r = r, l + end + r = make_volatile(r, expr.type) if r.kind_of? ModRM and l.kind_of? ModRM + unuse l, r + if expr.lexpr.type.integral? + if expr.lexpr.type.name == :__int64 and @cpusz != 64 + raise # TODO + end + instr 'cmp', l, r + elsif expr.lexpr.type.float? + raise # TODO + instr 'fcmpp', l, r + else raise 'bad comparison ' + expr.to_s + end + op = 'j' + getcc(o, expr.lexpr.type) + instr op, Expression[target] + when :'!' + r = c_cexpr_inner(expr.rexpr) + r = make_volatile(r, expr.rexpr.type) + unuse r + instr 'test', r, r + instr 'jz', Expression[target] + else + r = c_cexpr_inner(expr) + r = make_volatile(r, expr.type) + unuse r + instr 'test', r, r + instr 'jnz', Expression[target] + end + end + + def c_goto(target) + instr 'jmp', Expression[target] + end + + def c_label(name) + @state.cache.clear + @source << '' << Label.new(name) + end + + def c_return(expr) + return if not expr + @state.cache.delete_if { |r, v| r.kind_of? Reg and r.val == 0 and expr != v } + r = c_cexpr_inner(expr) + r = make_volatile(r, expr.type) + unuse r + case r + when Composite + if r.low.val == 2 + instr 'xchg', r.low, r.high + instr 'mov', Reg.new(0, 32), r.low if r.high.val != 0 + else + instr 'mov', Reg.new(2, 32), r.high if r.high.val != 2 + instr 'mov', Reg.new(0, 32), r.low if r.low.val != 0 + end + when Reg + instr 'mov', Reg.new(0, r.sz), r if r.val != 0 + when FpReg + instr 'fld', FpReg.new(r.val) if r.val and r.val != 0 + end + end + + def c_asm(stmt) + if stmt.output or stmt.input or stmt.clobber + raise # TODO (handle %%0 => eax, gas, etc) + else + raise if @state.func.initializer.symbol.keys.find { |sym| stmt.body =~ /\b#{Regexp.escape(sym)}\b/ } # gsub ebp+off ? + @source << stmt.body + end + end + + def c_init_state(func) + @state = State.new(func) + # ET_DYN trashes ebx too + # XXX hope we're not a Shellcode to be embedded in an ELF.. + @state.abi_flushregs_call << 3 if @exeformat and @exeformat.shortname == 'elf' + + c_reserve_stack(func.initializer) + off = @state.offset.values.max.to_i # where to store register args + off = 0 if off < 0 + + al = typesize[:ptr] + argoff = 2*al + fa = func.type.args.dup + if func.has_attribute('fastcall') + 2.times { + if a = fa.shift + off = c_reserve_stack_var(a, off) + @state.offset[a] = off + end + } + end + fa.each { |a| + if a.has_attribute_var('register') or a.type.has_attribute_var('register') + off = c_reserve_stack_var(a, off) + @state.offset[a] = off + next + end + @state.offset[a] = -argoff + argoff = (argoff + sizeof(a) + al - 1) / al * al + } + if not @state.offset.values.grep(::Integer).empty? + @state.saved_ebp = Reg.new(5, @cpusz) + @state.used << 5 + end + end + + def c_prolog + localspc = @state.offset.values.grep(::Integer).max + return if @state.func.has_attribute('naked') + if localspc + al = typesize[:ptr] + localspc = (localspc + al - 1) / al * al + ebp = @state.saved_ebp + esp = Reg.new(4, ebp.sz) + instr 'push', ebp + instr 'mov', ebp, esp + instr 'sub', esp, Expression[localspc] if localspc > 0 + + if @state.func.has_attribute('fastcall') + if a0 = @state.func.type.args[0] + instr 'mov', findvar(a0), Reg.new(1, 32) + end + if a1 = @state.func.type.args[1] + instr 'mov', findvar(a1), Reg.new(2, 32) + end + else + @state.func.type.args.each { |a| + if r = (a.has_attribute_var('register') or a.type.has_attribute_var('register')) + # XXX if r == ebp, then prepend_prolog mov [esp-off], ebp... + # XXX this would break when calling anyway (mov ebp, 42; ; call func) + instr 'mov', findvar(a), Reg.from_str(r) + end + } + end + end + @state.dirty -= @state.abi_trashregs # XXX ABI + @state.dirty.each { |reg| + instr 'push', Reg.new(reg, @cpusz) + } + end + + def c_epilog + return if @state.func.attributes.to_a.include? 'naked' + # TODO revert dynamic array alloc + @state.dirty.reverse_each { |reg| + instr 'pop', Reg.new(reg, @cpusz) + } + if ebp = @state.saved_ebp + instr 'mov', Reg.new(4, ebp.sz), ebp + instr 'pop', ebp + end + f = @state.func + if f.has_attribute('stdcall') or f.has_attribute('fastcall') + al = typesize[:ptr] + fa = f.type.args.dup + 2.times { fa.shift } if f.has_attribute('fastcall') + argsz = fa.inject(0) { |sum, a| + (a.has_attribute_var('register') or a.type.has_attribute_var('register')) ? sum : sum + (sizeof(a) + al - 1) / al * al + } + if argsz > 0 + instr 'ret', Expression[argsz] + else + instr 'ret' + end + else + instr 'ret' + end + end + + # adds the metasm_intern_geteip function, which returns its own address in eax (used for PIC addressing) + def c_program_epilog + if defined? @need_geteip_stub and @need_geteip_stub + return if new_label('metasm_intern_geteip') != 'metasm_intern_geteip' # already defined elsewhere + + eax = Reg.new(0, @cpusz) + label = new_label('geteip') + + @source << Label.new('metasm_intern_geteip') + instr 'call', Expression[label] + @source << Label.new(label) + instr 'pop', eax + instr 'add', eax, Expression['metasm_intern_geteip', :-, label] + instr 'ret' + end #File.open('m-dbg-precomp.c', 'w') { |fd| fd.puts @parser } #File.open('m-dbg-src.asm', 'w') { |fd| fd.puts @source } - end + end end - def new_ccompiler(parser, exe=ExeFormat.new) - exe.cpu = self if not exe.instance_variable_get("@cpu") - CCompiler.new(parser, exe) - end + def new_ccompiler(parser, exe=ExeFormat.new) + exe.cpu = self if not exe.instance_variable_get("@cpu") + CCompiler.new(parser, exe) + end end end diff --git a/lib/metasm/metasm/cpu/ia32/debug.rb b/lib/metasm/metasm/cpu/ia32/debug.rb index 952c72fb47..1cb2694bcf 100644 --- a/lib/metasm/metasm/cpu/ia32/debug.rb +++ b/lib/metasm/metasm/cpu/ia32/debug.rb @@ -8,186 +8,186 @@ require 'metasm/cpu/ia32/opcodes' module Metasm class Ia32 - def dbg_register_pc - @dbg_register_pc ||= :eip - end - def dbg_register_sp - @dbg_register_sp ||= dbg_register_list[7] - end - def dbg_register_flags - @dbg_register_flags ||= :eflags - end + def dbg_register_pc + @dbg_register_pc ||= :eip + end + def dbg_register_sp + @dbg_register_sp ||= dbg_register_list[7] + end + def dbg_register_flags + @dbg_register_flags ||= :eflags + end - def dbg_register_list - @dbg_register_list ||= [:eax, :ebx, :ecx, :edx, :esi, :edi, :ebp, :esp, :eip] - end + def dbg_register_list + @dbg_register_list ||= [:eax, :ebx, :ecx, :edx, :esi, :edi, :ebp, :esp, :eip] + end - def dbg_register_size - @dbg_register_size ||= Hash.new(32).update(:cs => 16, :ds => 16, :es => 16, :fs => 16, :gs => 16) - end + def dbg_register_size + @dbg_register_size ||= Hash.new(32).update(:cs => 16, :ds => 16, :es => 16, :fs => 16, :gs => 16) + end - def dbg_flag_list - @dbg_flag_list ||= [:c, :p, :a, :z, :s, :i, :d, :o] - end + def dbg_flag_list + @dbg_flag_list ||= [:c, :p, :a, :z, :s, :i, :d, :o] + end - DBG_FLAGS = { :c => 0, :p => 2, :a => 4, :z => 6, :s => 7, :t => 8, :i => 9, :d => 10, :o => 11 } - def dbg_get_flag(dbg, f) - (dbg.get_reg_value(dbg_register_flags) >> DBG_FLAGS[f]) & 1 - end - def dbg_set_flag(dbg, f) - fl = dbg.get_reg_value(dbg_register_flags) - fl |= 1 << DBG_FLAGS[f] - dbg.set_reg_value(dbg_register_flags, fl) - end - def dbg_unset_flag(dbg, f) - fl = dbg.get_reg_value(dbg_register_flags) - fl &= ~(1 << DBG_FLAGS[f]) - dbg.set_reg_value(dbg_register_flags, fl) - end + DBG_FLAGS = { :c => 0, :p => 2, :a => 4, :z => 6, :s => 7, :t => 8, :i => 9, :d => 10, :o => 11 } + def dbg_get_flag(dbg, f) + (dbg.get_reg_value(dbg_register_flags) >> DBG_FLAGS[f]) & 1 + end + def dbg_set_flag(dbg, f) + fl = dbg.get_reg_value(dbg_register_flags) + fl |= 1 << DBG_FLAGS[f] + dbg.set_reg_value(dbg_register_flags, fl) + end + def dbg_unset_flag(dbg, f) + fl = dbg.get_reg_value(dbg_register_flags) + fl &= ~(1 << DBG_FLAGS[f]) + dbg.set_reg_value(dbg_register_flags, fl) + end - def dbg_enable_singlestep(dbg) - dbg_set_flag(dbg, :t) if dbg_get_flag(dbg, :t) == 0 - end - def dbg_disable_singlestep(dbg) - dbg_unset_flag(dbg, :t) if dbg_get_flag(dbg, :t) != 0 - end + def dbg_enable_singlestep(dbg) + dbg_set_flag(dbg, :t) if dbg_get_flag(dbg, :t) == 0 + end + def dbg_disable_singlestep(dbg) + dbg_unset_flag(dbg, :t) if dbg_get_flag(dbg, :t) != 0 + end - def dbg_enable_bp(dbg, bp) - case bp.type - when :bpx; dbg_enable_bpx( dbg, bp) - else dbg_enable_bphw(dbg, bp) - end - end + def dbg_enable_bp(dbg, bp) + case bp.type + when :bpx; dbg_enable_bpx( dbg, bp) + else dbg_enable_bphw(dbg, bp) + end + end - def dbg_disable_bp(dbg, bp) - case bp.type - when :bpx; dbg_disable_bpx( dbg, bp) - else dbg_disable_bphw(dbg, bp) - end - end + def dbg_disable_bp(dbg, bp) + case bp.type + when :bpx; dbg_disable_bpx( dbg, bp) + else dbg_disable_bphw(dbg, bp) + end + end - def dbg_enable_bpx(dbg, bp) - bp.internal[:previous] ||= dbg.memory[bp.address, 1] - dbg.memory[bp.address, 1] = "\xcc" - end + def dbg_enable_bpx(dbg, bp) + bp.internal[:previous] ||= dbg.memory[bp.address, 1] + dbg.memory[bp.address, 1] = "\xcc" + end - def dbg_disable_bpx(dbg, bp) - dbg.memory[bp.address, 1] = bp.internal[:previous] - end + def dbg_disable_bpx(dbg, bp) + dbg.memory[bp.address, 1] = bp.internal[:previous] + end - # allocate a debug register for a hwbp by checking the list of hwbp existing in dbg - def dbg_alloc_bphw(dbg, bp) - if not bp.internal[:dr] - may = [0, 1, 2, 3] - dbg.breakpoint_thread.values.each { |bb| may.delete bb.internal[:dr] } - raise 'alloc_bphw: no free debugregister' if may.empty? - bp.internal[:dr] = may.first - end - bp.internal[:type] ||= :x - bp.internal[:len] ||= 1 - bp.internal[:dr] - end + # allocate a debug register for a hwbp by checking the list of hwbp existing in dbg + def dbg_alloc_bphw(dbg, bp) + if not bp.internal[:dr] + may = [0, 1, 2, 3] + dbg.breakpoint_thread.values.each { |bb| may.delete bb.internal[:dr] } + raise 'alloc_bphw: no free debugregister' if may.empty? + bp.internal[:dr] = may.first + end + bp.internal[:type] ||= :x + bp.internal[:len] ||= 1 + bp.internal[:dr] + end - def dbg_enable_bphw(dbg, bp) - nr = dbg_alloc_bphw(dbg, bp) - dr7 = dbg[:dr7] - l = { 1 => 0, 2 => 1, 4 => 3, 8 => 2 }[bp.internal[:len]] - rw = { :x => 0, :w => 1, :r => 3 }[bp.internal[:type]] - raise "enable_bphw: invalid breakpoint #{bp.inspect}" if not l or not rw - dr7 &= ~((15 << (16+4*nr)) | (3 << (2*nr))) # clear - dr7 |= ((l << 2) | rw) << (16+4*nr) # set drN len/rw - dr7 |= 3 << (2*nr) # enable global/local drN + def dbg_enable_bphw(dbg, bp) + nr = dbg_alloc_bphw(dbg, bp) + dr7 = dbg[:dr7] + l = { 1 => 0, 2 => 1, 4 => 3, 8 => 2 }[bp.internal[:len]] + rw = { :x => 0, :w => 1, :r => 3 }[bp.internal[:type]] + raise "enable_bphw: invalid breakpoint #{bp.inspect}" if not l or not rw + dr7 &= ~((15 << (16+4*nr)) | (3 << (2*nr))) # clear + dr7 |= ((l << 2) | rw) << (16+4*nr) # set drN len/rw + dr7 |= 3 << (2*nr) # enable global/local drN - dbg["dr#{nr}"] = bp.address - dbg[:dr7] = dr7 - end + dbg["dr#{nr}"] = bp.address + dbg[:dr7] = dr7 + end - def dbg_disable_bphw(dbg, bp) - nr = bp.internal[:dr] - dr7 = dbg[:dr7] - dr7 &= ~(3 << (2*nr)) - dbg[:dr7] = dr7 - end + def dbg_disable_bphw(dbg, bp) + nr = bp.internal[:dr] + dr7 = dbg[:dr7] + dr7 &= ~(3 << (2*nr)) + dbg[:dr7] = dr7 + end - def dbg_check_pre_run(dbg) - if dbg[:dr6] == 0 and dbg[:dr7] == 0 - dbg[:dr7] = 0x10000 # some OS (eg Windows) only return dr6 if dr7 != 0 - end - dbg[:dr6] = 0 if dbg[:dr6] & 0x400f != 0 - end + def dbg_check_pre_run(dbg) + if dbg[:dr6] == 0 and dbg[:dr7] == 0 + dbg[:dr7] = 0x10000 # some OS (eg Windows) only return dr6 if dr7 != 0 + end + dbg[:dr6] = 0 if dbg[:dr6] & 0x400f != 0 + end - def dbg_evt_bpx(dbg, b) - if b.address == dbg.pc-1 - dbg.pc -= 1 - end - end + def dbg_evt_bpx(dbg, b) + if b.address == dbg.pc-1 + dbg.pc -= 1 + end + end - def dbg_find_bpx(dbg) - return if dbg[:dr6] & 0x4000 != 0 - pc = dbg.pc - dbg.breakpoint[pc-1] || dbg.breakpoint[pc] - end + def dbg_find_bpx(dbg) + return if dbg[:dr6] & 0x4000 != 0 + pc = dbg.pc + dbg.breakpoint[pc-1] || dbg.breakpoint[pc] + end - def dbg_find_hwbp(dbg) - dr6 = dbg[:dr6] - return if dr6 & 0xf == 0 - dn = (0..3).find { |n| dr6 & (1 << n) } - dbg.breakpoint_thread.values.find { |b| b.internal[:dr] == dn } - end + def dbg_find_hwbp(dbg) + dr6 = dbg[:dr6] + return if dr6 & 0xf == 0 + dn = (0..3).find { |n| dr6 & (1 << n) } + dbg.breakpoint_thread.values.find { |b| b.internal[:dr] == dn } + end - def dbg_need_stepover(dbg, addr, di) - di and ((di.instruction.prefix and di.instruction.prefix[:rep]) or di.opcode.props[:saveip]) - end + def dbg_need_stepover(dbg, addr, di) + di and ((di.instruction.prefix and di.instruction.prefix[:rep]) or di.opcode.props[:saveip]) + end - def dbg_end_stepout(dbg, addr, di) - di and di.opcode.name == 'ret' - end + def dbg_end_stepout(dbg, addr, di) + di and di.opcode.name == 'ret' + end - # return (yield) a list of [addr, symbolic name] - def dbg_stacktrace(dbg, rec=500) - ret = [] - s = dbg.addrname!(dbg.pc) - yield(dbg.pc, s) if block_given? - ret << [dbg.pc, s] - fp = dbg.get_reg_value(dbg_register_list[6]) - stack = dbg.get_reg_value(dbg_register_list[7]) - 8 - while fp > stack and fp <= stack+0x10000 and rec != 0 - rec -= 1 - ra = dbg.resolve_expr Indirection[fp+4, 4] - s = dbg.addrname!(ra) - yield(ra, s) if block_given? - ret << [ra, s] - stack = fp # ensure we walk the stack upwards - fp = dbg.resolve_expr Indirection[fp, 4] - end - ret - end + # return (yield) a list of [addr, symbolic name] + def dbg_stacktrace(dbg, rec=500) + ret = [] + s = dbg.addrname!(dbg.pc) + yield(dbg.pc, s) if block_given? + ret << [dbg.pc, s] + fp = dbg.get_reg_value(dbg_register_list[6]) + stack = dbg.get_reg_value(dbg_register_list[7]) - 8 + while fp > stack and fp <= stack+0x10000 and rec != 0 + rec -= 1 + ra = dbg.resolve_expr Indirection[fp+4, 4] + s = dbg.addrname!(ra) + yield(ra, s) if block_given? + ret << [ra, s] + stack = fp # ensure we walk the stack upwards + fp = dbg.resolve_expr Indirection[fp, 4] + end + ret + end - # retrieve the current function return value - # only valid at function exit - def dbg_func_retval(dbg) - dbg.get_reg_value(dbg_register_list[0]) - end - def dbg_func_retval_set(dbg, val) - dbg.set_reg_value(dbg_register_list[0], val) - end + # retrieve the current function return value + # only valid at function exit + def dbg_func_retval(dbg) + dbg.get_reg_value(dbg_register_list[0]) + end + def dbg_func_retval_set(dbg, val) + dbg.set_reg_value(dbg_register_list[0], val) + end - # retrieve the current function return address - # to be called only on entry of the subfunction - def dbg_func_retaddr(dbg) - dbg.memory_read_int(dbg_register_list[7]) - end - def dbg_func_retaddr_set(dbg, ret) - dbg.memory_write_int(dbg_register_list[7], ret) - end + # retrieve the current function return address + # to be called only on entry of the subfunction + def dbg_func_retaddr(dbg) + dbg.memory_read_int(dbg_register_list[7]) + end + def dbg_func_retaddr_set(dbg, ret) + dbg.memory_write_int(dbg_register_list[7], ret) + end - # retrieve the current function arguments - # only valid at function entry (eg right after the call) - def dbg_func_arg(dbg, argnr) - dbg.memory_read_int(Expression[:esp, :+, 4*(argnr+1)]) - end - def dbg_func_arg_set(dbg, argnr, arg) - dbg.memory_write_int(Expression[:esp, :+, 4*(argnr+1)], arg) - end + # retrieve the current function arguments + # only valid at function entry (eg right after the call) + def dbg_func_arg(dbg, argnr) + dbg.memory_read_int(Expression[:esp, :+, 4*(argnr+1)]) + end + def dbg_func_arg_set(dbg, argnr, arg) + dbg.memory_write_int(Expression[:esp, :+, 4*(argnr+1)], arg) + end end end diff --git a/lib/metasm/metasm/cpu/ia32/decode.rb b/lib/metasm/metasm/cpu/ia32/decode.rb index de90f087d5..064fb6b16a 100644 --- a/lib/metasm/metasm/cpu/ia32/decode.rb +++ b/lib/metasm/metasm/cpu/ia32/decode.rb @@ -9,1318 +9,1318 @@ require 'metasm/decode' module Metasm class Ia32 - class ModRM - def self.decode(edata, byte, endianness, adsz, opsz, seg=nil, regclass=Reg, h = {}) - m = (byte >> 6) & 3 - rm = byte & 7 - - if m == 3 - return regclass.new(rm, opsz) - end - - sum = Sum[adsz][m][rm] - - s, i, b, imm = nil - sum.each { |a| - case a - when Integer - if not b - b = Reg.new(a, adsz) - else - s = 1 - if h[:mrmvex] - i = SimdReg.new(a, h[:mrmvex]) - else - i = Reg.new(a, adsz) - end - end - - when :sib - sib = edata.get_byte.to_i - - ii = ((sib >> 3) & 7) - if ii != 4 - s = 1 << ((sib >> 6) & 3) - if h[:mrmvex] - i = SimdReg.new(ii, h[:mrmvex]) - else - i = Reg.new(ii, adsz) - end - end - - bb = sib & 7 - if bb == 5 and m == 0 - imm = Expression[edata.decode_imm("i#{adsz}".to_sym, endianness)] - else - b = Reg.new(bb, adsz) - end - - when :i8, :i16, :i32 - imm = Expression[edata.decode_imm(a, endianness)] - end - } - - if imm and ir = imm.reduce and ir.kind_of?(Integer) and ir < 0 and (ir < -0x10_0000 or (!b and !i)) - # probably a base address -> unsigned - imm = Expression[imm.reduce & ((1 << (adsz || 32)) - 1)] - end - - opsz = h[:argsz] if h[:argsz] - new adsz, opsz, s, i, b, imm, seg - end - end - - class Farptr - def self.decode(edata, endianness, adsz) - addr = Expression[edata.decode_imm("u#{adsz}".to_sym, endianness)] - seg = Expression[edata.decode_imm(:u16, endianness)] - new seg, addr - end - end - - def build_opcode_bin_mask(op) - # bit = 0 if can be mutated by an field value, 1 if fixed by opcode - op.bin_mask = Array.new(op.bin.length, 0) - op.fields.each { |f, (oct, off)| - op.bin_mask[oct] |= (@fields_mask[f] << off) - } - op.bin_mask.map! { |v| 255 ^ v } - end - - def build_bin_lookaside - # sets up a hash byte value => list of opcodes that may match - # opcode.bin_mask is built here - lookaside = Array.new(256) { [] } - opcode_list.each { |op| - - build_opcode_bin_mask op - - b = op.bin[0] - msk = op.bin_mask[0] - - for i in b..(b | (255^msk)) - lookaside[i] << op if i & msk == b & msk - end - } - lookaside - end - - def decode_prefix(instr, byte) - instr.prefix ||= {} - (instr.prefix[:list] ||= []) << byte - - # XXX actual limit = 15-instr.length - return false if instr.prefix[:list].length >= 15 - - case byte - when 0x66; instr.prefix[:opsz] = true - when 0x67; instr.prefix[:adsz] = true - when 0xF0; instr.prefix[:lock] = true - when 0xF2; instr.prefix[:rep] = :nz - when 0xF3; instr.prefix[:rep] = :z # postprocessed by decode_instr - when 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65 - if byte & 0x40 == 0 - v = (byte >> 3) & 3 - else - v = byte & 7 - end - instr.prefix[:seg] = SegReg.new(v) - else - return false - end - true - end - - # tries to find the opcode encoded at edata.ptr - # if no match, tries to match a prefix (update di.instruction.prefix) - # on match, edata.ptr points to the first byte of the opcode (after prefixes) - def decode_findopcode(edata) - di = DecodedInstruction.new self - while edata.ptr < edata.data.length - pfx = di.instruction.prefix || {} - byte = edata.data[edata.ptr] - byte = byte.unpack('C').first if byte.kind_of?(::String) - return di if di.opcode = @bin_lookaside[byte].find { |op| - # fetch the relevant bytes from edata - bseq = edata.data[edata.ptr, op.bin.length].unpack('C*') - - # check against full opcode mask - op.bin.zip(bseq, op.bin_mask).all? { |b1, b2, m| b2 and ((b1 & m) == (b2 & m)) } and - # check special cases - !( - # fail if any of those is true - (fld = op.fields[:seg2A] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg2A] == 1) or - (fld = op.fields[:seg3A] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg3A] < 4) or - (fld = op.fields[:seg3A] || op.fields[:seg3] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg3] > 5) or - (op.props[:modrmA] and fld = op.fields[:modrm] and (bseq[fld[0]] >> fld[1]) & 0xC0 == 0xC0) or - (op.props[:modrmR] and fld = op.fields[:modrm] and (bseq[fld[0]] >> fld[1]) & 0xC0 != 0xC0) or - (fld = op.fields[:vex_vvvv] and @size != 64 and (bseq[fld[0]] >> fld[1]) & @fields_mask[:vex_vvvv] < 8) or - (sz = op.props[:opsz] and opsz(di, op) != sz) or - (sz = op.props[:adsz] and adsz(di, op) != sz) or - (ndpfx = op.props[:needpfx] and not pfx[:list].to_a.include? ndpfx) or - (pfx[:adsz] and op.props[:adsz] and op.props[:adsz] == @size) or - # return non-ambiguous opcode (eg push.i16 in 32bit mode) / sync with addop_post in opcode.rb - (pfx[:opsz] and not op.props[:opsz] and (op.args == [:i] or op.args == [:farptr] or op.name == 'ret')) or - (pfx[:adsz] and not op.props[:adsz] and (op.props[:strop] or op.props[:stropz] or op.args.include?(:mrm_imm) or op.args.include?(:modrm) or op.name =~ /loop|xlat/)) or - (op.name == 'nop' and op.bin[0] == 0x90 and di.instruction.prefix and di.instruction.prefix[:rex_b]) - ) - } - - break if not decode_prefix(di.instruction, edata.get_byte) - di.bin_length += 1 - end - end - - def decode_instr_op(edata, di) - before_ptr = edata.ptr - op = di.opcode - di.instruction.opname = op.name - bseq = edata.read(op.bin.length).unpack('C*') # decode_findopcode ensures that data >= op.length - pfx = di.instruction.prefix || {} - - case op.props[:needpfx] - when 0x66; pfx.delete :opsz - when 0x67; pfx.delete :adsz - when 0xF2, 0xF3; pfx.delete :rep - end - - if op.props[:setip] and not op.props[:stopexec] and pfx[:seg] - case pfx.delete(:seg).val - when 1; pfx[:jmphint] = 'hintnojmp' - when 3; pfx[:jmphint] = 'hintjmp' - end - end - - field_val = lambda { |f| - if fld = op.fields[f] - (bseq[fld[0]] >> fld[1]) & @fields_mask[f] - end - } - - opsz = op.props[:argsz] || opsz(di) - adsz = (pfx[:adsz] ? 48 - @size : @size) - - mmxsz = ((op.props[:xmmx] && pfx[:opsz]) ? 128 : 64) - op.args.each { |a| - di.instruction.args << case a - when :reg; Reg.new field_val[a], opsz - when :eeec; CtrlReg.new field_val[a] - when :eeed; DbgReg.new field_val[a] - when :eeet; TstReg.new field_val[a] - when :seg2, :seg2A, :seg3, :seg3A; SegReg.new field_val[a] - when :regfp; FpReg.new field_val[a] - when :regmmx; SimdReg.new field_val[a], mmxsz - when :regxmm; SimdReg.new field_val[a], 128 - when :regymm; SimdReg.new field_val[a], 256 - - when :farptr; Farptr.decode edata, @endianness, opsz - when :i8, :u8, :u16; Expression[edata.decode_imm(a, @endianness)] - when :i; Expression[edata.decode_imm("#{op.props[:unsigned_imm] ? 'a' : 'i'}#{opsz}".to_sym, @endianness)] - - when :mrm_imm; ModRM.decode edata, (adsz == 16 ? 6 : 5), @endianness, adsz, opsz, pfx.delete(:seg) - when :modrm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, opsz, pfx.delete(:seg) - when :modrmmmx; ModRM.decode edata, field_val[:modrm], @endianness, adsz, mmxsz, pfx.delete(:seg), SimdReg, :argsz => op.props[:argsz] - when :modrmxmm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 128, pfx.delete(:seg), SimdReg, :argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex] - when :modrmymm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 256, pfx.delete(:seg), SimdReg, :argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex] - - when :vexvreg; Reg.new((field_val[:vex_vvvv] ^ 0xf), opsz) - when :vexvxmm; SimdReg.new((field_val[:vex_vvvv] ^ 0xf), 128) - when :vexvymm; SimdReg.new((field_val[:vex_vvvv] ^ 0xf), 256) - when :i4xmm; SimdReg.new((edata.decode_imm(:u8, @endianness) >> 4) & 7, 128) - when :i4ymm; SimdReg.new((edata.decode_imm(:u8, @endianness) >> 4) & 7, 256) - - when :imm_val1; Expression[1] - when :imm_val3; Expression[3] - when :reg_cl; Reg.new 1, 8 - when :reg_eax; Reg.new 0, opsz - when :reg_dx; Reg.new 2, 16 - when :regfp0; FpReg.new nil - else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" - end - } - - di.bin_length += edata.ptr - before_ptr - - return false if edata.ptr > edata.length - - if op.name == 'movsx' or op.name == 'movzx' - if di.opcode.props[:argsz] == 8 - di.instruction.args[1].sz = 8 - else - di.instruction.args[1].sz = 16 - end - if pfx[:opsz] - di.instruction.args[0].sz = 48-@size - else - di.instruction.args[0].sz = @size - end - elsif op.name == 'crc32' - di.instruction.args[0].sz = 32 - end - - case pfx.delete(:rep) - when :nz - if di.opcode.props[:strop] - pfx[:rep] = 'rep' - elsif di.opcode.props[:stropz] - pfx[:rep] = 'repnz' - end - when :z - if di.opcode.props[:strop] - pfx[:rep] = 'rep' - elsif di.opcode.props[:stropz] - pfx[:rep] = 'repz' - end - end - - di - end - - # converts relative jump/call offsets to absolute addresses - # adds the eip delta to the offset +off+ of the instruction (may be an Expression) + its bin_length - # do not call twice on the same di ! - def decode_instr_interpret(di, addr) - if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.instruction.opname !~ /^i?ret/ - delta = di.instruction.args.last.reduce - arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce - di.instruction.args[-1] = Expression[arg] - end - - di - end - - # return the list of registers as symbols in the order used by pushad - # for use in backtrace and stuff, for compatibility with x64 - # esp is [4] - REG_SYMS = [:eax, :ecx, :edx, :ebx, :esp, :ebp, :esi, :edi] - def register_symbols - REG_SYMS - end - - # interprets a condition code (in an opcode name) as an expression involving backtracked eflags - # eflag_p is never computed, and this returns Expression::Unknown for this flag - # ex: 'z' => Expression[:eflag_z] - def decode_cc_to_expr(cc) - case cc - when 'o'; Expression[:eflag_o] - when 'no'; Expression[:'!', :eflag_o] - when 'b', 'nae', 'c'; Expression[:eflag_c] - when 'nb', 'ae', 'nc'; Expression[:'!', :eflag_c] - when 'z', 'e'; Expression[:eflag_z] - when 'nz', 'ne'; Expression[:'!', :eflag_z] - when 'be', 'na'; Expression[:eflag_c, :|, :eflag_z] - when 'nbe', 'a'; Expression[:'!', [:eflag_c, :|, :eflag_z]] - when 's'; Expression[:eflag_s] - when 'ns'; Expression[:'!', :eflag_s] - when 'p', 'pe'; Expression::Unknown - when 'np', 'po'; Expression::Unknown - when 'l', 'nge'; Expression[:eflag_s, :'!=', :eflag_o] - when 'nl', 'ge'; Expression[:eflag_s, :==, :eflag_o] - when 'le', 'ng'; Expression[[:eflag_s, :'!=', :eflag_o], :|, :eflag_z] - when 'nle', 'g'; Expression[[:eflag_s, :==, :eflag_o], :&, :eflag_z] - when 'ecxz'; Expression[:'!', register_symbols[1]] - when 'cxz'; Expression[:'!', [register_symbols[1], :&, 0xffff]] - end - end - - # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding } - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end - def backtrace_binding=(b) @backtrace_binding = b end - - def opsz(di, op=nil) - if di and di.instruction.prefix and di.instruction.prefix[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 48-@size - else @size - end - end - - def adsz(di, op=nil) - if di and di.instruction.prefix and di.instruction.prefix[:adsz] and (op || di.opcode).props[:needpfx] != 0x67; 48-@size - else @size - end - end - - # populate the @backtrace_binding hash with default values - def init_backtrace_binding - @backtrace_binding ||= {} - - eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols - ebx = ebx - - mask = lambda { |di| (1 << opsz(di))-1 } # 32bits => 0xffff_ffff - sign = lambda { |v, di| Expression[[[v, :&, mask[di]], :>>, opsz(di)-1], :'!=', 0] } - - opcode_list.map { |ol| ol.basename }.uniq.sort.each { |op| - binding = case op - when 'mov', 'movzx', 'movd', 'movq'; lambda { |di, a0, a1| { a0 => Expression[a1] } } - when 'movsx', 'movsxd' - lambda { |di, a0, a1| - sz1 = di.instruction.args[1].sz - sign1 = Expression[[a1, :>>, sz1-1], :&, 1] - { a0 => Expression[[a1, :|, [sign1, :*, (-1 << sz1)]], :&, mask[di]] } - } - when 'lea'; lambda { |di, a0, a1| { a0 => a1.target } } - when 'xchg'; lambda { |di, a0, a1| { a0 => Expression[a1], a1 => Expression[a0] } } - when 'add', 'sub', 'or', 'xor', 'and', 'pxor', 'adc', 'sbb' - lambda { |di, a0, a1| - e_op = { 'add' => :+, 'sub' => :-, 'or' => :|, 'and' => :&, 'xor' => :^, 'pxor' => :^, 'adc' => :+, 'sbb' => :- }[op] - ret = Expression[a0, e_op, a1] - ret = Expression[ret, e_op, :eflag_c] if op == 'adc' or op == 'sbb' - # optimises eax ^ eax => 0 - # avoid hiding memory accesses (to not hide possible fault) - ret = Expression[ret.reduce] if not a0.kind_of? Indirection - { a0 => ret } - } - when 'xadd'; lambda { |di, a0, a1| { a0 => Expression[a0, :+, a1], a1 => Expression[a0] } } - when 'inc'; lambda { |di, a0| { a0 => Expression[a0, :+, 1] } } - when 'dec'; lambda { |di, a0| { a0 => Expression[a0, :-, 1] } } - when 'not'; lambda { |di, a0| { a0 => Expression[a0, :^, mask[di]] } } - when 'neg'; lambda { |di, a0| { a0 => Expression[:-, a0] } } - when 'rol', 'ror' - lambda { |di, a0, a1| - e_op = (op[2] == ?r ? :>> : :<<) - inv_op = {:<< => :>>, :>> => :<< }[e_op] - sz = [a1, :%, opsz(di)] - isz = [[opsz(di), :-, a1], :%, opsz(di)] - # ror a, b => (a >> b) | (a << (32-b)) - { a0 => Expression[[[a0, e_op, sz], :|, [a0, inv_op, isz]], :&, mask[di]] } - } - when 'sar', 'shl', 'sal'; lambda { |di, a0, a1| { a0 => Expression[a0, (op[-1] == ?r ? :>> : :<<), [a1, :%, [opsz(di), 32].max]] } } - when 'shr'; lambda { |di, a0, a1| { a0 => Expression[[a0, :&, mask[di]], :>>, [a1, :%, opsz(di)]] } } - when 'cwd', 'cdq', 'cqo'; lambda { |di| { Expression[edx, :&, mask[di]] => Expression[mask[di], :*, sign[eax, di]] } } - when 'cbw', 'cwde', 'cdqe'; lambda { |di| - o2 = opsz(di)/2 ; m2 = (1 << o2) - 1 - { Expression[eax, :&, mask[di]] => Expression[[eax, :&, m2], :|, [m2 << o2, :*, [[eax, :>>, o2-1], :&, 1]]] } } - when 'push' - lambda { |di, a0| { esp => Expression[esp, :-, opsz(di)/8], - Indirection[esp, opsz(di)/8, di.address] => Expression[a0] } } - when 'pop' - lambda { |di, a0| { esp => Expression[esp, :+, opsz(di)/8], - a0 => Indirection[esp, opsz(di)/8, di.address] } } - when 'pushfd', 'pushf' - # TODO Unknown per bit - lambda { |di| - efl = Expression[0x202] - bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] } - bts[0, :eflag_c] - bts[6, :eflag_z] - bts[7, :eflag_s] - bts[11, :eflag_o] - { esp => Expression[esp, :-, opsz(di)/8], Indirection[esp, opsz(di)/8, di.address] => efl } - } - when 'popfd', 'popf' - lambda { |di| bt = lambda { |pos| Expression[[Indirection[esp, opsz(di)/8, di.address], :>>, pos], :&, 1] } - { esp => Expression[esp, :+, opsz(di)/8], :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7], :eflag_o => bt[11] } } - when 'sahf' - lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, pos], :&, 1] } - { :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7] } } - when 'lahf' - lambda { |di| - efl = Expression[2] - bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] } - bts[0, :eflag_c] #bts[2, :eflag_p] #bts[4, :eflag_a] - bts[6, :eflag_z] - bts[7, :eflag_s] - { eax => efl } - } - when 'pushad' - lambda { |di| - ret = {} - st_off = 0 - register_symbols.reverse_each { |r| - ret[Indirection[Expression[esp, :+, st_off].reduce, opsz(di)/8, di.address]] = Expression[r] - st_off += opsz(di)/8 - } - ret[esp] = Expression[esp, :-, st_off] - ret - } - when 'popad' - lambda { |di| - ret = {} - st_off = 0 - register_symbols.reverse_each { |r| - ret[r] = Indirection[Expression[esp, :+, st_off].reduce, opsz(di)/8, di.address] - st_off += opsz(di)/8 - } - ret[esp] = Expression[esp, :+, st_off] # esp is not popped - ret - } - when 'call' - lambda { |di, a0| - sz = opsz(di)/8 - if a0.kind_of? Farptr - { esp => Expression[esp, :-, 2*sz], - Indirection[esp, sz, di.address] => Expression[di.next_addr], - Indirection[[esp, :+, sz], sz, di.address] => Expression::Unknown } - else - { esp => Expression[esp, :-, sz], - Indirection[esp, sz, di.address] => Expression[di.next_addr] } - end - } - when 'callf' - lambda { |di, a0| - sz = opsz(di)/8 - { esp => Expression[esp, :-, 2*sz], - Indirection[esp, sz, di.address] => Expression[di.next_addr], - Indirection[[esp, :+, sz], sz, di.address] => Expression::Unknown } } - when 'ret'; lambda { |di, *a| { esp => Expression[esp, :+, [opsz(di)/8, :+, a[0] || 0]] } } - when 'retf';lambda { |di, *a| { esp => Expression[esp, :+, [opsz(di)/4, :+, a[0] || 0]] } } - when 'loop', 'loopz', 'loopnz'; lambda { |di, a0| { ecx => Expression[ecx, :-, 1] } } - when 'enter' - lambda { |di, a0, a1| - sz = opsz(di)/8 - depth = a1.reduce % 32 - b = { Indirection[ebp, sz, di.address] => Expression[ebp], - Indirection[[esp, :+, a0.reduce+sz*depth], sz, di.address] => Expression[ebp], - ebp => Expression[esp, :-, sz], - esp => Expression[esp, :-, a0.reduce+sz*depth+sz] } - (1..depth).each { |i| - b[Indirection[[esp, :+, a0.reduce+i*sz], sz, di.address]] = - b[Indirection[[ebp, :-, i*sz], sz, di.address]] = - Expression::Unknown # TODO Indirection[[ebp, :-, i*sz], sz, di.address] - } - b - } - when 'leave'; lambda { |di| { ebp => Indirection[[ebp], opsz(di)/8, di.address], esp => Expression[ebp, :+, opsz(di)/8] } } - when 'aaa'; lambda { |di| { eax => Expression::Unknown, :incomplete_binding => Expression[1] } } - when 'imul' - lambda { |di, *a| - if not a[1] - # 1 operand from: store result in edx:eax - bd = {} - m = mask[di] - s = opsz(di) - e = Expression[Expression.make_signed(Expression[a[0], :&, m], s), :*, Expression.make_signed(Expression[eax, :&, m], s)] - if s == 8 - bd[Expression[eax, :&, 0xffff]] = e - else - bd[Expression[eax, :&, m]] = Expression[e, :&, m] - bd[Expression[edx, :&, m]] = Expression[[e, :>>, opsz(di)], :&, m] - end - # XXX eflags? - next bd - end - - if a[2]; e = Expression[a[1], :*, a[2]] - else e = Expression[[a[0], :*, a[1]], :&, (1 << (di.instruction.args.first.sz || opsz(di))) - 1] - end - { a[0] => e } - } - when 'mul' - lambda { |di, *a| - m = mask[di] - e = Expression[a, :*, [eax, :&, m]] - if opsz(di) == 8 - { Expression[eax, :&, 0xffff] => e } - else - { Expression[eax, :&, m] => Expression[e, :&, m], - Expression[edx, :&, m] => Expression[[e, :>>, opsz(di)], :&, m] } - end - } - when 'div', 'idiv'; lambda { |di, *a| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } } - when 'rdtsc'; lambda { |di| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } } - when /^(stos|movs|lods|scas|cmps)[bwd]$/ - lambda { |di, *a| - next {:incomplete_binding => 1} if di.opcode.args.include?(:regxmm) # XXX movsd xmm0, xmm1... - op =~ /^(stos|movs|lods|scas|cmps)([bwd])$/ - e_op = $1 - sz = { 'b' => 1, 'w' => 2, 'd' => 4 }[$2] - eax_ = Reg.new(0, 8*sz).symbolic - dir = :+ - if di.block and (di.block.list.find { |ddi| ddi.opcode.name == 'std' } rescue nil) - dir = :- - end - pesi = Indirection[esi, sz, di.address] - pedi = Indirection[edi, sz, di.address] - pfx = di.instruction.prefix || {} - bd = - case e_op - when 'movs' - case pfx[:rep] - when nil; { pedi => pesi, esi => Expression[esi, dir, sz], edi => Expression[edi, dir, sz] } - else { pedi => pesi, esi => Expression[esi, dir, [sz ,:*, ecx]], edi => Expression[edi, dir, [sz, :*, ecx]], ecx => 0 } - end - when 'stos' - case pfx[:rep] - when nil; { pedi => Expression[eax_], edi => Expression[edi, dir, sz] } - else { pedi => Expression[eax_], edi => Expression[edi, dir, [sz, :*, ecx]], ecx => 0 } - end - when 'lods' - case pfx[:rep] - when nil; { eax_ => pesi, esi => Expression[esi, dir, sz] } - else { eax_ => Indirection[[esi, dir, [sz, :*, [ecx, :-, 1]]], sz, di.address], esi => Expression[esi, dir, [sz, :*, ecx]], ecx => 0 } - end - when 'scas' - case pfx[:rep] - when nil; { edi => Expression[edi, dir, sz] } - else { edi => Expression::Unknown, ecx => Expression::Unknown } - end - when 'cmps' - case pfx[:rep] - when nil; { edi => Expression[edi, dir, sz], esi => Expression[esi, dir, sz] } - else { edi => Expression::Unknown, esi => Expression::Unknown, ecx => Expression::Unknown } - end - end - bd[:incomplete_binding] = Expression[1] if pfx[:rep] - bd - } - when 'clc'; lambda { |di| { :eflag_c => Expression[0] } } - when 'stc'; lambda { |di| { :eflag_c => Expression[1] } } - when 'cmc'; lambda { |di| { :eflag_c => Expression[:'!', :eflag_c] } } - when 'cld'; lambda { |di| { :eflag_d => Expression[0] } } - when 'std'; lambda { |di| { :eflag_d => Expression[1] } } - when 'setalc'; lambda { |di| { Reg.new(0, 8).symbolic => Expression[:eflag_c, :*, 0xff] } } - when /^set/; lambda { |di, *a| { a[0] => Expression[decode_cc_to_expr(op[/^set(.*)/, 1])] } } - when /^cmov/; lambda { |di, *a| fl = decode_cc_to_expr(op[/^cmov(.*)/, 1]) ; { a[0] => Expression[[fl, :*, a[1]], :|, [[1, :-, fl], :*, a[0]]] } } - when /^j/ - lambda { |di, a0| - ret = { 'dummy_metasm_0' => Expression[a0] } # mark modr/m as read - if fl = decode_cc_to_expr(op[/^j(.*)/, 1]) and fl != Expression::Unknown - ret['dummy_metasm_1'] = fl # mark eflags as read - end - ret - } - when 'fstenv', 'fnstenv' - lambda { |di, a0| - # stores the address of the last non-control fpu instr run - lastfpuinstr = di.block.list[0...di.block.list.index(di)].reverse.find { |pdi| - case pdi.opcode.name - when /fn?init|fn?clex|fldcw|fn?st[cs]w|fn?stenv|fldenv|fn?save|frstor|f?wait/ - when /^f/; true - end - } if di.block - lastfpuinstr = lastfpuinstr.address if lastfpuinstr - ret = {} - save_at = lambda { |off, val| ret[Indirection[a0.target + off, 4, di.address]] = val } - save_at[0, Expression::Unknown] - save_at[4, Expression::Unknown] - save_at[8, Expression::Unknown] - save_at[12, lastfpuinstr || Expression::Unknown] - save_at[16, Expression::Unknown] - save_at[20, Expression::Unknown] - save_at[24, Expression::Unknown] - ret - } - when 'bt'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1] } } - when 'bts'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1], - a0 => Expression[a0, :|, [1, :<<, [a1, :%, opsz(di)]]] } } - when 'btr'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1], - a0 => Expression[a0, :&, [[1, :<<, [a1, :%, opsz(di)]], :^, mask[di]]] } } - when 'btc'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1], - a0 => Expression[a0, :^, [1, :<<, [a1, :%, opsz(di)]]] } } - when 'bswap' - lambda { |di, a0| - case opsz(di) - when 64 - { a0 => Expression[ - [[[[a0, :&, 0xff000000_00000000], :>>, 56], :|, - [[a0, :&, 0x00ff0000_00000000], :>>, 40]], :|, - [[[a0, :&, 0x0000ff00_00000000], :>>, 24], :|, - [[a0, :&, 0x000000ff_00000000], :>>, 8]]], :|, - [[[[a0, :&, 0x00000000_ff000000], :<<, 8], :|, - [[a0, :&, 0x00000000_00ff0000], :<<, 24]], :|, - [[[a0, :&, 0x00000000_0000ff00], :<<, 40], :|, - [[a0, :&, 0x00000000_000000ff], :<<, 56]]]] } - when 32 - { a0 => Expression[ - [[[a0, :&, 0xff000000], :>>, 24], :|, - [[a0, :&, 0x00ff0000], :>>, 8]], :|, - [[[a0, :&, 0x0000ff00], :<<, 8], :|, - [[a0, :&, 0x000000ff], :<<, 24]]] } - when 16 - # bswap ax => mov ax, 0 - { a0 => 0 } - end - } - when 'nop', 'pause', 'wait', 'cmp', 'test'; lambda { |di, *a| {} } - end - - # add eflags side-effects - - full_binding = case op - when 'adc', 'add', 'and', 'cmp', 'or', 'sbb', 'sub', 'xor', 'test', 'xadd' - lambda { |di, a0, a1| - e_op = { 'adc' => :+, 'add' => :+, 'xadd' => :+, 'and' => :&, 'cmp' => :-, 'or' => :|, 'sbb' => :-, 'sub' => :-, 'xor' => :^, 'test' => :& }[op] - res = Expression[[a0, :&, mask[di]], e_op, [a1, :&, mask[di]]] - res = Expression[res, e_op, :eflag_c] if op == 'adc' or op == 'sbb' - - ret = (binding ? binding[di, a0, a1] : {}) - ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0] - ret[:eflag_s] = sign[res, di] - ret[:eflag_c] = case e_op - when :+; Expression[res, :>, mask[di]] - when :-; Expression[[a0, :&, mask[di]], :<, [a1, :&, mask[di]]] - else Expression[0] - end - ret[:eflag_o] = case e_op - when :+; Expression[[sign[a0, di], :==, sign[a1, di]], :'&&', [sign[a0, di], :'!=', sign[res, di]]] - when :-; Expression[[sign[a0, di], :==, [:'!', sign[a1, di]]], :'&&', [sign[a0, di], :'!=', sign[res, di]]] - else Expression[0] - end - ret - } - when 'inc', 'dec', 'neg', 'shl', 'shr', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd' - lambda { |di, a0, *a| - ret = (binding ? binding[di, a0, *a] : {}) - res = ret[a0] || Expression::Unknown - ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0] - ret[:eflag_s] = sign[res, di] - case op - when 'neg'; ret[:eflag_c] = Expression[[res, :&, mask[di]], :'!=', 0] - when 'inc', 'dec' # don't touch carry flag - else ret[:eflag_c] = Expression::Unknown # :incomplete_binding ? - end - ret[:eflag_o] = case op - when 'inc'; Expression[[a0, :&, mask[di]], :==, mask[di] >> 1] - when 'dec'; Expression[[res , :&, mask[di]], :==, mask[di] >> 1] - when 'neg'; Expression[[a0, :&, mask[di]], :==, (mask[di]+1) >> 1] - else Expression::Unknown - end - ret - } - when 'imul', 'mul', 'idiv', 'div', /^(scas|cmps)[bwdq]$/ - lambda { |di, *a| - ret = (binding ? binding[di, *a] : {}) - ret[:eflag_z] = ret[:eflag_s] = ret[:eflag_c] = ret[:eflag_o] = Expression::Unknown # :incomplete_binding ? - ret - } - end - - @backtrace_binding[op] ||= full_binding || binding if full_binding || binding - } - @backtrace_binding - end - - # returns the condition (bool Expression) under which a conditionnal jump is taken - # returns nil if not a conditionnal jump - # backtrace for the condition must include the jump itself (eg loop -> ecx--) - def get_jump_condition(di) - ecx = register_symbols[1] - case di.opcode.name - when /^j(.*)/ - decode_cc_to_expr($1) - when /^loop(.+)?/ - e = Expression[ecx, :'!=', 0] - e = Expression[e, :'||', decode_cc_to_expr($1)] if $1 - e - end - end - - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when ModRM, Reg, SimdReg; arg.symbolic(di) - else arg - end - } - - if binding = backtrace_binding[di.opcode.basename] - bd = binding[di, *a] - # handle modifications to al/ah etc - bd.keys.grep(Expression).each { |e| - # must be in the form (x & mask), with x either :reg or (:reg >> shift) eg ah == ((eax >> 8) & 0xff) - if e.op == :& and mask = e.rexpr and mask.kind_of? Integer - reg = e.lexpr - reg = reg.lexpr if reg.kind_of? Expression and reg.op == :>> and shift = reg.rexpr and shift.kind_of? Integer - next if not reg.kind_of? Symbol - if bd.has_key? reg - # xchg ah, al ; pop sp.. - puts "backtrace: conflict for #{di}: #{e} vs #{reg}" if $VERBOSE - bd[reg] = Expression::Unknown - next - end - val = bd.delete e - mask <<= shift if shift - invmask = mask ^ (@size == 64 ? 0xffff_ffff_ffff_ffff : 0xffff_ffff) - if invmask == 0xffff_ffff_0000_0000 and not di.opcode.props[:op32no64] - bd[reg] = Expression[val, :&, 0xffff_ffff] - elsif invmask == 0 - bd[reg] = val - else - val = Expression[val, :<<, shift] if shift - bd[reg] = Expression[[reg, :&, invmask], :|, [val, :&, mask]] - end - end - } - bd - else - puts "unhandled instruction to backtrace: #{di}" if $VERBOSE - # assume nothing except the 1st arg is modified - case a[0] - when Indirection, Symbol; { a[0] => Expression::Unknown } - when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} - else {} - end.update(:incomplete_binding => Expression[1]) - end - end - - # patch a forward binding from the backtrace binding - # fixes fwdemu for push/pop/call/ret - def fix_fwdemu_binding(di, fbd) - if di.instruction.args.grep(ModRM).find { |m| m.seg and m.symbolic(di).target.lexpr =~ /^segment_base_/ } - fbd = fbd.dup - fbd[:incomplete_binding] = Expression[1] - end - - case di.opcode.name - when 'push', 'call' - fbd = fbd.dup - sz = opsz(di)/8 - esp = register_symbols[4] - if i = fbd.delete(Indirection[esp, sz]) - fbd[Indirection[[esp, :-, sz], sz]] = i - end - when 'pop', 'ret' # nothing to do - when /^(push|pop|call|ret|enter|leave|stos|movs|lods|scas|cmps)/ - fbd = fbd.dup - fbd[:incomplete_binding] = Expression[1] # TODO - end - fbd - end - - def get_xrefs_x(dasm, di) - return [] if not di.opcode.props[:setip] - - sz = opsz(di) - case di.opcode.basename - when 'ret'; return [Indirection[register_symbols[4], sz/8, di.address]] - when 'jmp', 'call' - a = di.instruction.args.first - if dasm and a.kind_of?(ModRM) and a.imm and (a.s == sz/8 or a.s == 4) and not a.b and dasm.get_section_at(a.imm) - return get_xrefs_x_jmptable(dasm, di, a, a.s*8) - end - end - - case tg = di.instruction.args.first - when ModRM - tg.sz ||= sz if tg.kind_of? ModRM - [Expression[tg.symbolic(di)]] - when Reg; [Expression[tg.symbolic(di)]] - when Expression, ::Integer; [Expression[tg]] - when Farptr; tg.seg.reduce < 0x30 ? [tg.addr] : [Expression[[tg.seg, :*, 0x10], :+, tg.addr]] - else - puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG - [] - end - end - - # we detected a jmp table (jmp [base+4*idx]) - # try to return an accurate dest list - def get_xrefs_x_jmptable(dasm, di, mrm, sz) - # include the symbolic dest for backtrack stuff - ret = [Expression[mrm.symbolic(di)]] - i = mrm.i - if di.block.list.length == 2 and di.block.list[0].opcode.name =~ /^mov/ and a0 = di.block.list[0].instruction.args[0] and - a0.respond_to? :symbolic and a0.symbolic == i.symbolic - i = di.block.list[0].instruction.args[1] - end - pb = di.block.from_normal.to_a - if pb.length == 1 and pdi = dasm.decoded[pb[0]] and pdi.opcode.name =~ /^jn?be?/ and ppdi = pdi.block.list[-2] and ppdi.opcode.name == 'cmp' and - ppdi.instruction.args[0].symbolic == i.symbolic and lim = Expression[ppdi.instruction.args[1]].reduce and lim.kind_of? Integer - # cmp eax, 42 ; jbe switch ; switch: jmp [base+4*eax] - s = dasm.get_section_at(mrm.imm) - lim += 1 if pdi.opcode.name[-1] == ?e - lim.times { |v| - dasm.add_xref(s[1]+s[0].ptr, Xref.new(:r, di.address, sz/8)) - ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address] - s[0].read(sz/8) - } - l = dasm.auto_label_at(mrm.imm, 'jmp_table', 'xref') - replace_instr_arg_immediate(di.instruction, mrm.imm, Expression[l]) - # add 'case 1' comments - cases = {} - ret.each_with_index { |ind, idx| - idx -= 1 # ret[0] = symbolic - next if idx < 0 - a = dasm.backtrace(ind, di.address) - if a.length == 1 and a[0].kind_of?(Expression) and addr = a[0].reduce and addr.kind_of?(::Integer) - (cases[addr] ||= []) << idx - end - } - cases.each { |addr, list| - dasm.add_comment(addr, "case #{list.join(', ')}:") - } - return ret - end - - puts "unrecognized jmp table pattern, using wild guess for #{di}" if $VERBOSE - di.add_comment 'wildguess' - if s = dasm.get_section_at(mrm.imm - 3*sz/8) - v = -3 - else - s = dasm.get_section_at(mrm.imm) - v = 0 - end - while s[0].ptr < s[0].length - ptr = dasm.normalize s[0].decode_imm("u#{sz}".to_sym, @endianness) - diff = Expression[ptr, :-, di.address].reduce - if (diff.kind_of? ::Integer and diff.abs < 4096) or (di.opcode.basename == 'call' and ptr != 0 and dasm.get_section_at(ptr)) - dasm.add_xref(s[1]+s[0].ptr-sz/8, Xref.new(:r, di.address, sz/8)) - ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address] - elsif v > 0 - break - end - v += 1 - end - ret - end - - # checks if expr is a valid return expression matching the :saveip instruction - def backtrace_is_function_return(expr, di=nil) - expr = Expression[expr].reduce_rec - expr.kind_of? Indirection and expr.len == @size/8 and expr.target == Expression[register_symbols[4]] - end - - # updates the function backtrace_binding - # if the function is big and no specific register is given, do nothing (the binding will be lazily updated later, on demand) - # XXX assume retaddrlist is either a list of addr of ret or a list with a single entry which is an external function name (thunk) - def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) - b = f.backtrace_binding - - esp, ebp = register_symbols[4, 2] - - # XXX handle retaddrlist for multiple/mixed thunks - if retaddrlist and not dasm.decoded[retaddrlist.first] and di = dasm.decoded[faddr] - # no return instruction, must be a thunk : find the last instruction (to backtrace from it) - done = [] - while ndi = dasm.decoded[di.block.to_subfuncret.to_a.first] || dasm.decoded[di.block.to_normal.to_a.first] and ndi.kind_of? DecodedInstruction and not done.include? ndi.address - done << ndi.address - di = ndi - end - if not di.block.to_subfuncret.to_a.first and di.block.to_normal and di.block.to_normal.length > 1 - thunklast = di.block.list.last.address - end - end - - bt_val = lambda { |r| - next if not retaddrlist - b[r] = Expression::Unknown # TODO :pending or something ? (for recursive lazy functions) - bt = [] - retaddrlist.each { |retaddr| - bt |= dasm.backtrace(Expression[r], (thunklast ? thunklast : retaddr), - :include_start => true, :snapshot_addr => faddr, :origin => retaddr, :from_subfuncret => thunklast) - } - if bt.length != 1 - b[r] = Expression::Unknown - else - b[r] = bt.first - end - } - - if not wantregs.empty? - wantregs.each(&bt_val) - else - if dasm.function_blocks(faddr, true).length < 20 - register_symbols.each(&bt_val) - else - [ebp, esp].each(&bt_val) - end - end - - backtrace_update_function_binding_check(dasm, faddr, f, b, &bt_val) - - b - end - - def backtrace_update_function_binding_check(dasm, faddr, f, b) - sz = @size/8 - if b[:ebp] and b[:ebp] != Expression[:ebp] - # may be a custom 'enter' function (eg recent Visual Studio) - # TODO put all memory writes in the binding ? - [[:ebp], [:esp, :+, 1*sz], [:esp, :+, 2*sz], [:esp, :+, 3*sz]].each { |ptr| - ind = Indirection[ptr, sz, faddr] - yield(ind) - b.delete(ind) if b[ind] and not [:ebx, :edx, :esi, :edi, :ebp].include? b[ind].reduce_rec - } - end - if dasm.funcs_stdabi - if b[:esp] and b[:esp] == Expression::Unknown and not f.btbind_callback - puts "update_func_bind: #{Expression[faddr]} has esp -> unknown, use dynamic callback" if $DEBUG - f.btbind_callback = disassembler_default_btbind_callback - end - [:ebp, :ebx, :esi, :edi].each { |reg| - if b[reg] and b[reg] == Expression::Unknown - puts "update_func_bind: #{Expression[faddr]} has #{reg} -> unknown, presume it is preserved" if $DEBUG - b[reg] = Expression[reg] - end - } - else - if b[:esp] and not Expression[b[:esp], :-, :esp].reduce.kind_of?(::Integer) - puts "update_func_bind: #{Expression[faddr]} has esp -> #{b[:esp]}" if $DEBUG - end - end - - # rename some functions - # TODO database and real signatures - rename = - if b[:eax] and Expression[b[:eax], :-, faddr].reduce == 0 - 'geteip' # metasm pic linker - elsif b[:eax] and b[:ebx] and Expression[b[:eax], :-, :eax].reduce == 0 and Expression[b[:ebx], :-, Indirection[:esp, sz, nil]].reduce == 0 - 'get_pc_thunk_ebx' # elf pic convention - elsif b[:esp] and Expression[b[:esp], :-, [:esp, :-, Indirection[[:esp, :+, 2*sz], sz]]].reduce.kind_of? ::Integer and - dasm.decoded[faddr].block.list.find { |di| di.backtrace_binding[Indirection['segment_base_fs', sz]] } - '__SEH_prolog' - elsif b[:esp] == Expression[:ebp, :+, sz] and - dasm.decoded[faddr].block.list.find { |di| di.backtrace_binding[Indirection['segment_base_fs', sz]] } - '__SEH_epilog' - end - dasm.auto_label_at(faddr, rename, 'loc', 'sub') if rename - end - - # returns true if the expression is an address on the stack - def backtrace_is_stack_address(expr) - Expression[expr].expr_externals.include? register_symbols[4] - end - - # updates an instruction's argument replacing an expression with another (eg label renamed) - def replace_instr_arg_immediate(i, old, new) - i.args.map! { |a| - case a - when Expression; a == old ? new : Expression[a.bind(old => new).reduce] - when ModRM - a.imm = (a.imm == old ? new : Expression[a.imm.bind(old => new).reduce]) if a.imm - a - else a - end - } - end - - # returns a DecodedFunction from a parsed C function prototype - # TODO rebacktrace already decoded functions (load a header file after dasm finished) - # TODO walk structs args - def decode_c_function_prototype(cp, sym, orig=nil) - sym = cp.toplevel.symbol[sym] if sym.kind_of?(::String) - df = DecodedFunction.new - orig ||= Expression[sym.name] - - new_bt = lambda { |expr, rlen| - df.backtracked_for << BacktraceTrace.new(expr, orig, expr, rlen ? :r : :x, rlen) - } - - # return instr emulation - if sym.has_attribute 'noreturn' or sym.has_attribute '__noreturn__' - df.noreturn = true - else - new_bt[Indirection[:esp, @size/8, orig], nil] - end - - # register dirty (XXX assume standard ABI) - [:eax, :ecx, :edx].each { |r| - df.backtrace_binding.update r => Expression::Unknown - } - - # emulate ret - al = cp.typesize[:ptr] - stackoff = al - if sym.has_attribute 'fastcall' - stackoff = sym.type.args.to_a[2..-1].to_a.inject(al) { |sum, a| sum += (cp.sizeof(a) + al - 1) / al * al } - elsif sym.has_attribute 'stdcall' - stackoff = sym.type.args.to_a.inject(al) { |sum, a| sum += (cp.sizeof(a) + al - 1) / al * al } - end - df.backtrace_binding[:esp] = Expression[:esp, :+, stackoff] - - # scan args for function pointers - # TODO walk structs/unions.. - stackoff = al - sym.type.args.to_a.each { |a| - p = Indirection[[:esp, :+, stackoff], al, orig] - stackoff += (cp.sizeof(a) + al - 1) / al * al - if a.type.untypedef.kind_of? C::Pointer - pt = a.type.untypedef.type.untypedef - if pt.kind_of? C::Function - new_bt[p, nil] - df.backtracked_for.last.detached = true - elsif pt.kind_of? C::Struct - new_bt[p, al] - else - new_bt[p, cp.sizeof(nil, pt)] - end - end - } - - df - end - - # the lambda for the :default backtrace_binding callback of the disassembler - # tries to determine the stack offset of unprototyped functions - # working: - # checks that origin is a ret, that expr is an indirection from esp and that expr.origin is the ret - # bt_walk from calladdr until we finds a call into us, and assumes it is the current function start - # TODO handle foo: call bar ; bar: pop eax ; call ; ret -> bar is not the function start (foo is) - # then backtrace expr from calladdr to funcstart (snapshot), using esp -> esp+ - # from the result, compute stackoffvariable (only if trivial) - # will not work if the current function calls any other unknown function (unless all are __cdecl) - # will not work if the current function is framed (ebp leave ret): in this case the function will return, but its esp will be unknown - # if the stack offset is found and funcaddr is a string, fixup the static binding and remove the dynamic binding - # TODO dynamise thunks bt_for & bt_cb - def disassembler_default_btbind_callback - esp = register_symbols[4] - - lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth| - @dasm_func_default_off ||= {} - if off = @dasm_func_default_off[[dasm, calladdr]] - bind = bind.merge(esp => Expression[esp, :+, off]) - break bind - end - break bind if not odi = dasm.decoded[origin] or odi.opcode.basename != 'ret' - expr = expr.reduce_rec if expr.kind_of? Expression - break bind unless expr.kind_of? Indirection and expr.origin == origin - break bind unless expr.externals.reject { |e| e =~ /^autostackoffset_/ } == [esp] - - curfunc = dasm.function[funcaddr] - if curfunc.backtrace_binding and tk = curfunc.backtrace_binding[:thunk] and dasm.function[tk] - curfunc = dasm.function[tk] - end - - # scan from calladdr for the probable parent function start - func_start = nil - dasm.backtrace_walk(true, calladdr, false, false, nil, maxdepth) { |ev, foo, h| - if ev == :up and h[:sfret] != :subfuncret and di = dasm.decoded[h[:to]] and di.opcode.basename == 'call' - func_start = h[:from] - break - elsif ev == :end - # entrypoints are functions too - func_start = h[:addr] - break - end - } - break bind if not func_start - puts "automagic #{Expression[funcaddr]}: found func start for #{dasm.decoded[origin]} at #{Expression[func_start]}" if dasm.debug_backtrace - s_off = "autostackoffset_#{Expression[funcaddr]}_#{Expression[calladdr]}" - list = dasm.backtrace(expr.bind(esp => Expression[esp, :+, s_off]), calladdr, :include_start => true, :snapshot_addr => func_start, :maxdepth => maxdepth, :origin => origin) - # check if this backtrace made us find our binding - if off = @dasm_func_default_off[[dasm, calladdr]] - bind = bind.merge(esp => Expression[esp, :+, off]) - break bind - elsif not curfunc.btbind_callback - break curfunc.backtrace_binding - end - e_expr = list.find { |e_expr_| - # TODO cleanup this - e_expr_ = Expression[e_expr_].reduce_rec - next if not e_expr_.kind_of? Indirection - off = Expression[[esp, :+, s_off], :-, e_expr_.target].reduce - off.kind_of? Integer and off >= @size/8 and off < 10*@size/8 and (off % (@size/8)) == 0 - } || list.first - - e_expr = e_expr.rexpr if e_expr.kind_of? Expression and e_expr.op == :+ and not e_expr.lexpr - break bind unless e_expr.kind_of? Indirection - - off = Expression[[esp, :+, s_off], :-, e_expr.target].reduce - if off.kind_of? Expression - bd = off.externals.grep(/^autostackoffset_/).inject({}) { |bd_, xt| bd_.update xt => @size/8 } - bd.delete s_off - if off.bind(bd).reduce == @size/8 - # all __cdecl - off = @size/8 - else - # check if all calls are to the same extern func - bd.delete_if { |k, v| k !~ /^autostackoffset_#{Expression[funcaddr]}_/ } - bd.each_key { |k| bd[k] = 0 } - if off.bind(bd).reduce.kind_of? Integer - off = off.bind(bd).reduce / (bd.length + 1) - end - end - end - if off.kind_of? Integer - if off < @size/8 or off > 20*@size/8 or (off % (@size/8)) != 0 - puts "autostackoffset: ignoring off #{off} for #{Expression[funcaddr]} from #{dasm.decoded[calladdr]}" if $VERBOSE - off = :unknown - end - end - - bind = bind.merge esp => Expression[esp, :+, off] if off != :unknown - if funcaddr != :default - if not off.kind_of? ::Integer - #XXX we allow the current function to return, so we should handle the func backtracking its esp - #(and other register that are saved and restored in epilog) - puts "stackoff #{dasm.decoded[calladdr]} | #{Expression[func_start]} | #{expr} | #{e_expr} | #{off}" if dasm.debug_backtrace - else - puts "autostackoffset: found #{off} for #{Expression[funcaddr]} from #{dasm.decoded[calladdr]}" if $VERBOSE - curfunc.btbind_callback = nil - curfunc.backtrace_binding = bind - - # rebacktrace the return address, so that other unknown funcs that depend on us are solved - dasm.backtrace(Indirection[esp, @size/8, origin], origin, :origin => origin) - end - else - if off.kind_of? ::Integer and dasm.decoded[calladdr] - puts "autostackoffset: found #{off-@size/8} for #{dasm.decoded[calladdr]}" if $VERBOSE - di = dasm.decoded[calladdr] - di.comment.delete_if { |c| c =~ /^stackoff=/ } if di.comment - di.add_comment "stackoff=#{off-@size/8}" - @dasm_func_default_off[[dasm, calladdr]] = off - - dasm.backtrace(Indirection[esp, @size/8, origin], origin, :origin => origin) - elsif cachedoff = @dasm_func_default_off[[dasm, calladdr]] - bind[esp] = Expression[esp, :+, cachedoff] - elsif off.kind_of? ::Integer - dasm.decoded[calladdr].add_comment "stackoff=#{off-@size/8}" - end - - puts "stackoff #{dasm.decoded[calladdr]} | #{Expression[func_start]} | #{expr} | #{e_expr} | #{off}" if dasm.debug_backtrace - end - - bind - } - end - - # the :default backtracked_for callback - # returns empty unless funcaddr is not default or calladdr is a call or a jmp - def disassembler_default_btfor_callback - lambda { |dasm, btfor, funcaddr, calladdr| - if funcaddr != :default; btfor - elsif di = dasm.decoded[calladdr] and (di.opcode.name == 'call' or di.opcode.name == 'jmp'); btfor - else [] - end - } - end - - # returns a DecodedFunction suitable for :default - # uses disassembler_default_bt{for/bind}_callback - def disassembler_default_func - esp = register_symbols[4] - cp = new_cparser - cp.parse 'void stdfunc(void);' - f = decode_c_function_prototype(cp, 'stdfunc', :default) - f.backtrace_binding[esp] = Expression[esp, :+, :unknown] - f.btbind_callback = disassembler_default_btbind_callback - f.btfor_callback = disassembler_default_btfor_callback - f - end - - # returns a hash { :retval => r, :changed => [] } - def abi_funcall - { :retval => register_symbols[0], :changed => register_symbols[0, 3] } - end - - - # computes the binding of the sequence of code starting at entry included - # the binding is a hash showing the value of modified elements at the - # end of the code sequence, relative to their value at entry - # the elements are all the registers and the memory written to - # if finish is nil, the binding will include :ip, which is the address - # to be executed next (if it exists) - # the binding will not include memory access from subfunctions - # entry should be an entrypoint of the disassembler if finish is nil - # the code sequence must have only one end, with no to_normal - def code_binding(dasm, entry, finish=nil) - entry = dasm.normalize(entry) - finish = dasm.normalize(finish) if finish - lastdi = nil - binding = {} - bt = lambda { |from, expr, inc_start| - ret = dasm.backtrace(Expression[expr], from, :snapshot_addr => entry, :include_start => inc_start) - ret.length == 1 ? ret.first : Expression::Unknown - } - - # walk blocks, search for finish, scan memory writes - todo = [entry] - done = [Expression::Unknown] - while addr = todo.pop - addr = dasm.normalize(addr) - next if done.include? addr or addr == finish or not dasm.decoded[addr].kind_of? DecodedInstruction - done << addr - b = dasm.decoded[addr].block - - next if b.list.find { |di| - a = di.address - if a == finish - lastdi = b.list[b.list.index(di) - 1] - true - else - # check writes from the instruction - get_xrefs_w(dasm, di).each { |waddr, len| - # we want the ptr expressed with reg values at entry - ptr = bt[a, waddr, false] - binding[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true] - } - false - end - } - - hasnext = false - b.each_to_samefunc(dasm) { |t| - hasnext = true - if t == finish - lastdi = b.list.last - else - todo << t - end - } - - # check end of sequence - if not hasnext - raise "two-ended code_binding #{lastdi} & #{b.list.last}" if lastdi - lastdi = b.list.last - if lastdi.opcode.props[:setip] - e = get_xrefs_x(dasm, lastdi) - raise 'bad code_binding ending' if e.to_a.length != 1 or not lastdi.opcode.props[:stopexec] - binding[:ip] = bt[lastdi.address, e.first, false] - elsif not lastdi.opcode.props[:stopexec] - binding[:ip] = lastdi.next_addr - end - end - end - binding.delete_if { |k, v| Expression[k] == Expression[v] } - - # add register binding - raise "no code_binding end" if not lastdi and not finish - register_symbols.each { |reg| - val = - if lastdi; bt[lastdi.address, reg, true] - else bt[finish, reg, false] - end - next if val == Expression[reg] - mask = 0xffff_ffff # dont use 1<<@size, because 16bit code may use e.g. edi (through opszoverride) - mask = 0xffff_ffff_ffff_ffff if @size == 64 - val = Expression[val, :&, mask].reduce - binding[reg] = Expression[val] - } - - binding - end - - # trace the stack pointer register across a function, rename occurences of esp+XX to esp+var_XX - def name_local_vars(dasm, funcaddr) - esp = register_symbols[4] - func = dasm.function[funcaddr] - subs = [] - dasm.trace_function_register(funcaddr, esp => 0) { |di, r, off, trace| - next if r.to_s =~ /flag/ - if di.opcode.name == 'call' and tf = di.block.to_normal.find { |t| dasm.function[t] and dasm.function[t].localvars } - subs << [trace[esp], dasm.function[tf].localvars] - end - di.instruction.args.grep(ModRM).each { |mrm| - b = mrm.b || (mrm.i if mrm.s == 1) - # its a modrm => b is read, so ignore r/off (not yet applied), use trace only - stackoff = trace[b.symbolic] if b - next if not stackoff - imm = mrm.imm || Expression[0] - frameoff = imm + stackoff - if frameoff.kind_of?(::Integer) - # XXX register args ? non-ABI standard register args ? (eg optimized x64) - str = 'var_%X' % (-frameoff) - str = 'arg_%X' % (frameoff-@size/8) if frameoff > 0 - str = func.get_localvar_stackoff(frameoff, di, str) if func - imm = imm.expr if imm.kind_of?(ExpressionString) - mrm.imm = ExpressionString.new(imm, str, :stackvar) - end - } - off = off.reduce if off.kind_of?(Expression) - next unless off.kind_of?(Integer) - off - } - # if subfunctions are called at a fixed stack offset, rename var_3c -> subarg_0 - if func and func.localvars and not subs.empty? and subs.all? { |sb| sb[0] == subs.first[0] } - func.localvars.each { |varoff, varname| - subargnames = subs.map { |o, sb| sb[varoff-o+@size/8] }.compact - if subargnames.uniq.length == 1 - varname.replace 'sub'+subargnames[0] - end - } - end - end + class ModRM + def self.decode(edata, byte, endianness, adsz, opsz, seg=nil, regclass=Reg, h = {}) + m = (byte >> 6) & 3 + rm = byte & 7 + + if m == 3 + return regclass.new(rm, opsz) + end + + sum = Sum[adsz][m][rm] + + s, i, b, imm = nil + sum.each { |a| + case a + when Integer + if not b + b = Reg.new(a, adsz) + else + s = 1 + if h[:mrmvex] + i = SimdReg.new(a, h[:mrmvex]) + else + i = Reg.new(a, adsz) + end + end + + when :sib + sib = edata.get_byte.to_i + + ii = ((sib >> 3) & 7) + if ii != 4 + s = 1 << ((sib >> 6) & 3) + if h[:mrmvex] + i = SimdReg.new(ii, h[:mrmvex]) + else + i = Reg.new(ii, adsz) + end + end + + bb = sib & 7 + if bb == 5 and m == 0 + imm = Expression[edata.decode_imm("i#{adsz}".to_sym, endianness)] + else + b = Reg.new(bb, adsz) + end + + when :i8, :i16, :i32 + imm = Expression[edata.decode_imm(a, endianness)] + end + } + + if imm and ir = imm.reduce and ir.kind_of?(Integer) and ir < 0 and (ir < -0x10_0000 or (!b and !i)) + # probably a base address -> unsigned + imm = Expression[imm.reduce & ((1 << (adsz || 32)) - 1)] + end + + opsz = h[:argsz] if h[:argsz] + new adsz, opsz, s, i, b, imm, seg + end + end + + class Farptr + def self.decode(edata, endianness, adsz) + addr = Expression[edata.decode_imm("u#{adsz}".to_sym, endianness)] + seg = Expression[edata.decode_imm(:u16, endianness)] + new seg, addr + end + end + + def build_opcode_bin_mask(op) + # bit = 0 if can be mutated by an field value, 1 if fixed by opcode + op.bin_mask = Array.new(op.bin.length, 0) + op.fields.each { |f, (oct, off)| + op.bin_mask[oct] |= (@fields_mask[f] << off) + } + op.bin_mask.map! { |v| 255 ^ v } + end + + def build_bin_lookaside + # sets up a hash byte value => list of opcodes that may match + # opcode.bin_mask is built here + lookaside = Array.new(256) { [] } + opcode_list.each { |op| + + build_opcode_bin_mask op + + b = op.bin[0] + msk = op.bin_mask[0] + + for i in b..(b | (255^msk)) + lookaside[i] << op if i & msk == b & msk + end + } + lookaside + end + + def decode_prefix(instr, byte) + instr.prefix ||= {} + (instr.prefix[:list] ||= []) << byte + + # XXX actual limit = 15-instr.length + return false if instr.prefix[:list].length >= 15 + + case byte + when 0x66; instr.prefix[:opsz] = true + when 0x67; instr.prefix[:adsz] = true + when 0xF0; instr.prefix[:lock] = true + when 0xF2; instr.prefix[:rep] = :nz + when 0xF3; instr.prefix[:rep] = :z # postprocessed by decode_instr + when 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65 + if byte & 0x40 == 0 + v = (byte >> 3) & 3 + else + v = byte & 7 + end + instr.prefix[:seg] = SegReg.new(v) + else + return false + end + true + end + + # tries to find the opcode encoded at edata.ptr + # if no match, tries to match a prefix (update di.instruction.prefix) + # on match, edata.ptr points to the first byte of the opcode (after prefixes) + def decode_findopcode(edata) + di = DecodedInstruction.new self + while edata.ptr < edata.data.length + pfx = di.instruction.prefix || {} + byte = edata.data[edata.ptr] + byte = byte.unpack('C').first if byte.kind_of?(::String) + return di if di.opcode = @bin_lookaside[byte].find { |op| + # fetch the relevant bytes from edata + bseq = edata.data[edata.ptr, op.bin.length].unpack('C*') + + # check against full opcode mask + op.bin.zip(bseq, op.bin_mask).all? { |b1, b2, m| b2 and ((b1 & m) == (b2 & m)) } and + # check special cases + !( + # fail if any of those is true + (fld = op.fields[:seg2A] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg2A] == 1) or + (fld = op.fields[:seg3A] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg3A] < 4) or + (fld = op.fields[:seg3A] || op.fields[:seg3] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg3] > 5) or + (op.props[:modrmA] and fld = op.fields[:modrm] and (bseq[fld[0]] >> fld[1]) & 0xC0 == 0xC0) or + (op.props[:modrmR] and fld = op.fields[:modrm] and (bseq[fld[0]] >> fld[1]) & 0xC0 != 0xC0) or + (fld = op.fields[:vex_vvvv] and @size != 64 and (bseq[fld[0]] >> fld[1]) & @fields_mask[:vex_vvvv] < 8) or + (sz = op.props[:opsz] and opsz(di, op) != sz) or + (sz = op.props[:adsz] and adsz(di, op) != sz) or + (ndpfx = op.props[:needpfx] and not pfx[:list].to_a.include? ndpfx) or + (pfx[:adsz] and op.props[:adsz] and op.props[:adsz] == @size) or + # return non-ambiguous opcode (eg push.i16 in 32bit mode) / sync with addop_post in opcode.rb + (pfx[:opsz] and not op.props[:opsz] and (op.args == [:i] or op.args == [:farptr] or op.name == 'ret')) or + (pfx[:adsz] and not op.props[:adsz] and (op.props[:strop] or op.props[:stropz] or op.args.include?(:mrm_imm) or op.args.include?(:modrm) or op.name =~ /loop|xlat/)) or + (op.name == 'nop' and op.bin[0] == 0x90 and di.instruction.prefix and di.instruction.prefix[:rex_b]) + ) + } + + break if not decode_prefix(di.instruction, edata.get_byte) + di.bin_length += 1 + end + end + + def decode_instr_op(edata, di) + before_ptr = edata.ptr + op = di.opcode + di.instruction.opname = op.name + bseq = edata.read(op.bin.length).unpack('C*') # decode_findopcode ensures that data >= op.length + pfx = di.instruction.prefix || {} + + case op.props[:needpfx] + when 0x66; pfx.delete :opsz + when 0x67; pfx.delete :adsz + when 0xF2, 0xF3; pfx.delete :rep + end + + if op.props[:setip] and not op.props[:stopexec] and pfx[:seg] + case pfx.delete(:seg).val + when 1; pfx[:jmphint] = 'hintnojmp' + when 3; pfx[:jmphint] = 'hintjmp' + end + end + + field_val = lambda { |f| + if fld = op.fields[f] + (bseq[fld[0]] >> fld[1]) & @fields_mask[f] + end + } + + opsz = op.props[:argsz] || opsz(di) + adsz = (pfx[:adsz] ? 48 - @size : @size) + + mmxsz = ((op.props[:xmmx] && pfx[:opsz]) ? 128 : 64) + op.args.each { |a| + di.instruction.args << case a + when :reg; Reg.new field_val[a], opsz + when :eeec; CtrlReg.new field_val[a] + when :eeed; DbgReg.new field_val[a] + when :eeet; TstReg.new field_val[a] + when :seg2, :seg2A, :seg3, :seg3A; SegReg.new field_val[a] + when :regfp; FpReg.new field_val[a] + when :regmmx; SimdReg.new field_val[a], mmxsz + when :regxmm; SimdReg.new field_val[a], 128 + when :regymm; SimdReg.new field_val[a], 256 + + when :farptr; Farptr.decode edata, @endianness, opsz + when :i8, :u8, :u16; Expression[edata.decode_imm(a, @endianness)] + when :i; Expression[edata.decode_imm("#{op.props[:unsigned_imm] ? 'a' : 'i'}#{opsz}".to_sym, @endianness)] + + when :mrm_imm; ModRM.decode edata, (adsz == 16 ? 6 : 5), @endianness, adsz, opsz, pfx.delete(:seg) + when :modrm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, opsz, pfx.delete(:seg) + when :modrmmmx; ModRM.decode edata, field_val[:modrm], @endianness, adsz, mmxsz, pfx.delete(:seg), SimdReg, :argsz => op.props[:argsz] + when :modrmxmm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 128, pfx.delete(:seg), SimdReg, :argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex] + when :modrmymm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 256, pfx.delete(:seg), SimdReg, :argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex] + + when :vexvreg; Reg.new((field_val[:vex_vvvv] ^ 0xf), opsz) + when :vexvxmm; SimdReg.new((field_val[:vex_vvvv] ^ 0xf), 128) + when :vexvymm; SimdReg.new((field_val[:vex_vvvv] ^ 0xf), 256) + when :i4xmm; SimdReg.new((edata.decode_imm(:u8, @endianness) >> 4) & 7, 128) + when :i4ymm; SimdReg.new((edata.decode_imm(:u8, @endianness) >> 4) & 7, 256) + + when :imm_val1; Expression[1] + when :imm_val3; Expression[3] + when :reg_cl; Reg.new 1, 8 + when :reg_eax; Reg.new 0, opsz + when :reg_dx; Reg.new 2, 16 + when :regfp0; FpReg.new nil + else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" + end + } + + di.bin_length += edata.ptr - before_ptr + + return false if edata.ptr > edata.length + + if op.name == 'movsx' or op.name == 'movzx' + if di.opcode.props[:argsz] == 8 + di.instruction.args[1].sz = 8 + else + di.instruction.args[1].sz = 16 + end + if pfx[:opsz] + di.instruction.args[0].sz = 48-@size + else + di.instruction.args[0].sz = @size + end + elsif op.name == 'crc32' + di.instruction.args[0].sz = 32 + end + + case pfx.delete(:rep) + when :nz + if di.opcode.props[:strop] + pfx[:rep] = 'rep' + elsif di.opcode.props[:stropz] + pfx[:rep] = 'repnz' + end + when :z + if di.opcode.props[:strop] + pfx[:rep] = 'rep' + elsif di.opcode.props[:stropz] + pfx[:rep] = 'repz' + end + end + + di + end + + # converts relative jump/call offsets to absolute addresses + # adds the eip delta to the offset +off+ of the instruction (may be an Expression) + its bin_length + # do not call twice on the same di ! + def decode_instr_interpret(di, addr) + if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.instruction.opname !~ /^i?ret/ + delta = di.instruction.args.last.reduce + arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce + di.instruction.args[-1] = Expression[arg] + end + + di + end + + # return the list of registers as symbols in the order used by pushad + # for use in backtrace and stuff, for compatibility with x64 + # esp is [4] + REG_SYMS = [:eax, :ecx, :edx, :ebx, :esp, :ebp, :esi, :edi] + def register_symbols + REG_SYMS + end + + # interprets a condition code (in an opcode name) as an expression involving backtracked eflags + # eflag_p is never computed, and this returns Expression::Unknown for this flag + # ex: 'z' => Expression[:eflag_z] + def decode_cc_to_expr(cc) + case cc + when 'o'; Expression[:eflag_o] + when 'no'; Expression[:'!', :eflag_o] + when 'b', 'nae', 'c'; Expression[:eflag_c] + when 'nb', 'ae', 'nc'; Expression[:'!', :eflag_c] + when 'z', 'e'; Expression[:eflag_z] + when 'nz', 'ne'; Expression[:'!', :eflag_z] + when 'be', 'na'; Expression[:eflag_c, :|, :eflag_z] + when 'nbe', 'a'; Expression[:'!', [:eflag_c, :|, :eflag_z]] + when 's'; Expression[:eflag_s] + when 'ns'; Expression[:'!', :eflag_s] + when 'p', 'pe'; Expression::Unknown + when 'np', 'po'; Expression::Unknown + when 'l', 'nge'; Expression[:eflag_s, :'!=', :eflag_o] + when 'nl', 'ge'; Expression[:eflag_s, :==, :eflag_o] + when 'le', 'ng'; Expression[[:eflag_s, :'!=', :eflag_o], :|, :eflag_z] + when 'nle', 'g'; Expression[[:eflag_s, :==, :eflag_o], :&, :eflag_z] + when 'ecxz'; Expression[:'!', register_symbols[1]] + when 'cxz'; Expression[:'!', [register_symbols[1], :&, 0xffff]] + end + end + + # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding } + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end + def backtrace_binding=(b) @backtrace_binding = b end + + def opsz(di, op=nil) + if di and di.instruction.prefix and di.instruction.prefix[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 48-@size + else @size + end + end + + def adsz(di, op=nil) + if di and di.instruction.prefix and di.instruction.prefix[:adsz] and (op || di.opcode).props[:needpfx] != 0x67; 48-@size + else @size + end + end + + # populate the @backtrace_binding hash with default values + def init_backtrace_binding + @backtrace_binding ||= {} + + eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols + ebx = ebx + + mask = lambda { |di| (1 << opsz(di))-1 } # 32bits => 0xffff_ffff + sign = lambda { |v, di| Expression[[[v, :&, mask[di]], :>>, opsz(di)-1], :'!=', 0] } + + opcode_list.map { |ol| ol.basename }.uniq.sort.each { |op| + binding = case op + when 'mov', 'movzx', 'movd', 'movq'; lambda { |di, a0, a1| { a0 => Expression[a1] } } + when 'movsx', 'movsxd' + lambda { |di, a0, a1| + sz1 = di.instruction.args[1].sz + sign1 = Expression[[a1, :>>, sz1-1], :&, 1] + { a0 => Expression[[a1, :|, [sign1, :*, (-1 << sz1)]], :&, mask[di]] } + } + when 'lea'; lambda { |di, a0, a1| { a0 => a1.target } } + when 'xchg'; lambda { |di, a0, a1| { a0 => Expression[a1], a1 => Expression[a0] } } + when 'add', 'sub', 'or', 'xor', 'and', 'pxor', 'adc', 'sbb' + lambda { |di, a0, a1| + e_op = { 'add' => :+, 'sub' => :-, 'or' => :|, 'and' => :&, 'xor' => :^, 'pxor' => :^, 'adc' => :+, 'sbb' => :- }[op] + ret = Expression[a0, e_op, a1] + ret = Expression[ret, e_op, :eflag_c] if op == 'adc' or op == 'sbb' + # optimises eax ^ eax => 0 + # avoid hiding memory accesses (to not hide possible fault) + ret = Expression[ret.reduce] if not a0.kind_of? Indirection + { a0 => ret } + } + when 'xadd'; lambda { |di, a0, a1| { a0 => Expression[a0, :+, a1], a1 => Expression[a0] } } + when 'inc'; lambda { |di, a0| { a0 => Expression[a0, :+, 1] } } + when 'dec'; lambda { |di, a0| { a0 => Expression[a0, :-, 1] } } + when 'not'; lambda { |di, a0| { a0 => Expression[a0, :^, mask[di]] } } + when 'neg'; lambda { |di, a0| { a0 => Expression[:-, a0] } } + when 'rol', 'ror' + lambda { |di, a0, a1| + e_op = (op[2] == ?r ? :>> : :<<) + inv_op = {:<< => :>>, :>> => :<< }[e_op] + sz = [a1, :%, opsz(di)] + isz = [[opsz(di), :-, a1], :%, opsz(di)] + # ror a, b => (a >> b) | (a << (32-b)) + { a0 => Expression[[[a0, e_op, sz], :|, [a0, inv_op, isz]], :&, mask[di]] } + } + when 'sar', 'shl', 'sal'; lambda { |di, a0, a1| { a0 => Expression[a0, (op[-1] == ?r ? :>> : :<<), [a1, :%, [opsz(di), 32].max]] } } + when 'shr'; lambda { |di, a0, a1| { a0 => Expression[[a0, :&, mask[di]], :>>, [a1, :%, opsz(di)]] } } + when 'cwd', 'cdq', 'cqo'; lambda { |di| { Expression[edx, :&, mask[di]] => Expression[mask[di], :*, sign[eax, di]] } } + when 'cbw', 'cwde', 'cdqe'; lambda { |di| + o2 = opsz(di)/2 ; m2 = (1 << o2) - 1 + { Expression[eax, :&, mask[di]] => Expression[[eax, :&, m2], :|, [m2 << o2, :*, [[eax, :>>, o2-1], :&, 1]]] } } + when 'push' + lambda { |di, a0| { esp => Expression[esp, :-, opsz(di)/8], + Indirection[esp, opsz(di)/8, di.address] => Expression[a0] } } + when 'pop' + lambda { |di, a0| { esp => Expression[esp, :+, opsz(di)/8], + a0 => Indirection[esp, opsz(di)/8, di.address] } } + when 'pushfd', 'pushf' + # TODO Unknown per bit + lambda { |di| + efl = Expression[0x202] + bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] } + bts[0, :eflag_c] + bts[6, :eflag_z] + bts[7, :eflag_s] + bts[11, :eflag_o] + { esp => Expression[esp, :-, opsz(di)/8], Indirection[esp, opsz(di)/8, di.address] => efl } + } + when 'popfd', 'popf' + lambda { |di| bt = lambda { |pos| Expression[[Indirection[esp, opsz(di)/8, di.address], :>>, pos], :&, 1] } + { esp => Expression[esp, :+, opsz(di)/8], :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7], :eflag_o => bt[11] } } + when 'sahf' + lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, pos], :&, 1] } + { :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7] } } + when 'lahf' + lambda { |di| + efl = Expression[2] + bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] } + bts[0, :eflag_c] #bts[2, :eflag_p] #bts[4, :eflag_a] + bts[6, :eflag_z] + bts[7, :eflag_s] + { eax => efl } + } + when 'pushad' + lambda { |di| + ret = {} + st_off = 0 + register_symbols.reverse_each { |r| + ret[Indirection[Expression[esp, :+, st_off].reduce, opsz(di)/8, di.address]] = Expression[r] + st_off += opsz(di)/8 + } + ret[esp] = Expression[esp, :-, st_off] + ret + } + when 'popad' + lambda { |di| + ret = {} + st_off = 0 + register_symbols.reverse_each { |r| + ret[r] = Indirection[Expression[esp, :+, st_off].reduce, opsz(di)/8, di.address] + st_off += opsz(di)/8 + } + ret[esp] = Expression[esp, :+, st_off] # esp is not popped + ret + } + when 'call' + lambda { |di, a0| + sz = opsz(di)/8 + if a0.kind_of? Farptr + { esp => Expression[esp, :-, 2*sz], + Indirection[esp, sz, di.address] => Expression[di.next_addr], + Indirection[[esp, :+, sz], sz, di.address] => Expression::Unknown } + else + { esp => Expression[esp, :-, sz], + Indirection[esp, sz, di.address] => Expression[di.next_addr] } + end + } + when 'callf' + lambda { |di, a0| + sz = opsz(di)/8 + { esp => Expression[esp, :-, 2*sz], + Indirection[esp, sz, di.address] => Expression[di.next_addr], + Indirection[[esp, :+, sz], sz, di.address] => Expression::Unknown } } + when 'ret'; lambda { |di, *a| { esp => Expression[esp, :+, [opsz(di)/8, :+, a[0] || 0]] } } + when 'retf';lambda { |di, *a| { esp => Expression[esp, :+, [opsz(di)/4, :+, a[0] || 0]] } } + when 'loop', 'loopz', 'loopnz'; lambda { |di, a0| { ecx => Expression[ecx, :-, 1] } } + when 'enter' + lambda { |di, a0, a1| + sz = opsz(di)/8 + depth = a1.reduce % 32 + b = { Indirection[ebp, sz, di.address] => Expression[ebp], + Indirection[[esp, :+, a0.reduce+sz*depth], sz, di.address] => Expression[ebp], + ebp => Expression[esp, :-, sz], + esp => Expression[esp, :-, a0.reduce+sz*depth+sz] } + (1..depth).each { |i| + b[Indirection[[esp, :+, a0.reduce+i*sz], sz, di.address]] = + b[Indirection[[ebp, :-, i*sz], sz, di.address]] = + Expression::Unknown # TODO Indirection[[ebp, :-, i*sz], sz, di.address] + } + b + } + when 'leave'; lambda { |di| { ebp => Indirection[[ebp], opsz(di)/8, di.address], esp => Expression[ebp, :+, opsz(di)/8] } } + when 'aaa'; lambda { |di| { eax => Expression::Unknown, :incomplete_binding => Expression[1] } } + when 'imul' + lambda { |di, *a| + if not a[1] + # 1 operand from: store result in edx:eax + bd = {} + m = mask[di] + s = opsz(di) + e = Expression[Expression.make_signed(Expression[a[0], :&, m], s), :*, Expression.make_signed(Expression[eax, :&, m], s)] + if s == 8 + bd[Expression[eax, :&, 0xffff]] = e + else + bd[Expression[eax, :&, m]] = Expression[e, :&, m] + bd[Expression[edx, :&, m]] = Expression[[e, :>>, opsz(di)], :&, m] + end + # XXX eflags? + next bd + end + + if a[2]; e = Expression[a[1], :*, a[2]] + else e = Expression[[a[0], :*, a[1]], :&, (1 << (di.instruction.args.first.sz || opsz(di))) - 1] + end + { a[0] => e } + } + when 'mul' + lambda { |di, *a| + m = mask[di] + e = Expression[a, :*, [eax, :&, m]] + if opsz(di) == 8 + { Expression[eax, :&, 0xffff] => e } + else + { Expression[eax, :&, m] => Expression[e, :&, m], + Expression[edx, :&, m] => Expression[[e, :>>, opsz(di)], :&, m] } + end + } + when 'div', 'idiv'; lambda { |di, *a| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } } + when 'rdtsc'; lambda { |di| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } } + when /^(stos|movs|lods|scas|cmps)[bwd]$/ + lambda { |di, *a| + next {:incomplete_binding => 1} if di.opcode.args.include?(:regxmm) # XXX movsd xmm0, xmm1... + op =~ /^(stos|movs|lods|scas|cmps)([bwd])$/ + e_op = $1 + sz = { 'b' => 1, 'w' => 2, 'd' => 4 }[$2] + eax_ = Reg.new(0, 8*sz).symbolic + dir = :+ + if di.block and (di.block.list.find { |ddi| ddi.opcode.name == 'std' } rescue nil) + dir = :- + end + pesi = Indirection[esi, sz, di.address] + pedi = Indirection[edi, sz, di.address] + pfx = di.instruction.prefix || {} + bd = + case e_op + when 'movs' + case pfx[:rep] + when nil; { pedi => pesi, esi => Expression[esi, dir, sz], edi => Expression[edi, dir, sz] } + else { pedi => pesi, esi => Expression[esi, dir, [sz ,:*, ecx]], edi => Expression[edi, dir, [sz, :*, ecx]], ecx => 0 } + end + when 'stos' + case pfx[:rep] + when nil; { pedi => Expression[eax_], edi => Expression[edi, dir, sz] } + else { pedi => Expression[eax_], edi => Expression[edi, dir, [sz, :*, ecx]], ecx => 0 } + end + when 'lods' + case pfx[:rep] + when nil; { eax_ => pesi, esi => Expression[esi, dir, sz] } + else { eax_ => Indirection[[esi, dir, [sz, :*, [ecx, :-, 1]]], sz, di.address], esi => Expression[esi, dir, [sz, :*, ecx]], ecx => 0 } + end + when 'scas' + case pfx[:rep] + when nil; { edi => Expression[edi, dir, sz] } + else { edi => Expression::Unknown, ecx => Expression::Unknown } + end + when 'cmps' + case pfx[:rep] + when nil; { edi => Expression[edi, dir, sz], esi => Expression[esi, dir, sz] } + else { edi => Expression::Unknown, esi => Expression::Unknown, ecx => Expression::Unknown } + end + end + bd[:incomplete_binding] = Expression[1] if pfx[:rep] + bd + } + when 'clc'; lambda { |di| { :eflag_c => Expression[0] } } + when 'stc'; lambda { |di| { :eflag_c => Expression[1] } } + when 'cmc'; lambda { |di| { :eflag_c => Expression[:'!', :eflag_c] } } + when 'cld'; lambda { |di| { :eflag_d => Expression[0] } } + when 'std'; lambda { |di| { :eflag_d => Expression[1] } } + when 'setalc'; lambda { |di| { Reg.new(0, 8).symbolic => Expression[:eflag_c, :*, 0xff] } } + when /^set/; lambda { |di, *a| { a[0] => Expression[decode_cc_to_expr(op[/^set(.*)/, 1])] } } + when /^cmov/; lambda { |di, *a| fl = decode_cc_to_expr(op[/^cmov(.*)/, 1]) ; { a[0] => Expression[[fl, :*, a[1]], :|, [[1, :-, fl], :*, a[0]]] } } + when /^j/ + lambda { |di, a0| + ret = { 'dummy_metasm_0' => Expression[a0] } # mark modr/m as read + if fl = decode_cc_to_expr(op[/^j(.*)/, 1]) and fl != Expression::Unknown + ret['dummy_metasm_1'] = fl # mark eflags as read + end + ret + } + when 'fstenv', 'fnstenv' + lambda { |di, a0| + # stores the address of the last non-control fpu instr run + lastfpuinstr = di.block.list[0...di.block.list.index(di)].reverse.find { |pdi| + case pdi.opcode.name + when /fn?init|fn?clex|fldcw|fn?st[cs]w|fn?stenv|fldenv|fn?save|frstor|f?wait/ + when /^f/; true + end + } if di.block + lastfpuinstr = lastfpuinstr.address if lastfpuinstr + ret = {} + save_at = lambda { |off, val| ret[Indirection[a0.target + off, 4, di.address]] = val } + save_at[0, Expression::Unknown] + save_at[4, Expression::Unknown] + save_at[8, Expression::Unknown] + save_at[12, lastfpuinstr || Expression::Unknown] + save_at[16, Expression::Unknown] + save_at[20, Expression::Unknown] + save_at[24, Expression::Unknown] + ret + } + when 'bt'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1] } } + when 'bts'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1], + a0 => Expression[a0, :|, [1, :<<, [a1, :%, opsz(di)]]] } } + when 'btr'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1], + a0 => Expression[a0, :&, [[1, :<<, [a1, :%, opsz(di)]], :^, mask[di]]] } } + when 'btc'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1], + a0 => Expression[a0, :^, [1, :<<, [a1, :%, opsz(di)]]] } } + when 'bswap' + lambda { |di, a0| + case opsz(di) + when 64 + { a0 => Expression[ + [[[[a0, :&, 0xff000000_00000000], :>>, 56], :|, + [[a0, :&, 0x00ff0000_00000000], :>>, 40]], :|, + [[[a0, :&, 0x0000ff00_00000000], :>>, 24], :|, + [[a0, :&, 0x000000ff_00000000], :>>, 8]]], :|, + [[[[a0, :&, 0x00000000_ff000000], :<<, 8], :|, + [[a0, :&, 0x00000000_00ff0000], :<<, 24]], :|, + [[[a0, :&, 0x00000000_0000ff00], :<<, 40], :|, + [[a0, :&, 0x00000000_000000ff], :<<, 56]]]] } + when 32 + { a0 => Expression[ + [[[a0, :&, 0xff000000], :>>, 24], :|, + [[a0, :&, 0x00ff0000], :>>, 8]], :|, + [[[a0, :&, 0x0000ff00], :<<, 8], :|, + [[a0, :&, 0x000000ff], :<<, 24]]] } + when 16 + # bswap ax => mov ax, 0 + { a0 => 0 } + end + } + when 'nop', 'pause', 'wait', 'cmp', 'test'; lambda { |di, *a| {} } + end + + # add eflags side-effects + + full_binding = case op + when 'adc', 'add', 'and', 'cmp', 'or', 'sbb', 'sub', 'xor', 'test', 'xadd' + lambda { |di, a0, a1| + e_op = { 'adc' => :+, 'add' => :+, 'xadd' => :+, 'and' => :&, 'cmp' => :-, 'or' => :|, 'sbb' => :-, 'sub' => :-, 'xor' => :^, 'test' => :& }[op] + res = Expression[[a0, :&, mask[di]], e_op, [a1, :&, mask[di]]] + res = Expression[res, e_op, :eflag_c] if op == 'adc' or op == 'sbb' + + ret = (binding ? binding[di, a0, a1] : {}) + ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0] + ret[:eflag_s] = sign[res, di] + ret[:eflag_c] = case e_op + when :+; Expression[res, :>, mask[di]] + when :-; Expression[[a0, :&, mask[di]], :<, [a1, :&, mask[di]]] + else Expression[0] + end + ret[:eflag_o] = case e_op + when :+; Expression[[sign[a0, di], :==, sign[a1, di]], :'&&', [sign[a0, di], :'!=', sign[res, di]]] + when :-; Expression[[sign[a0, di], :==, [:'!', sign[a1, di]]], :'&&', [sign[a0, di], :'!=', sign[res, di]]] + else Expression[0] + end + ret + } + when 'inc', 'dec', 'neg', 'shl', 'shr', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd' + lambda { |di, a0, *a| + ret = (binding ? binding[di, a0, *a] : {}) + res = ret[a0] || Expression::Unknown + ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0] + ret[:eflag_s] = sign[res, di] + case op + when 'neg'; ret[:eflag_c] = Expression[[res, :&, mask[di]], :'!=', 0] + when 'inc', 'dec' # don't touch carry flag + else ret[:eflag_c] = Expression::Unknown # :incomplete_binding ? + end + ret[:eflag_o] = case op + when 'inc'; Expression[[a0, :&, mask[di]], :==, mask[di] >> 1] + when 'dec'; Expression[[res , :&, mask[di]], :==, mask[di] >> 1] + when 'neg'; Expression[[a0, :&, mask[di]], :==, (mask[di]+1) >> 1] + else Expression::Unknown + end + ret + } + when 'imul', 'mul', 'idiv', 'div', /^(scas|cmps)[bwdq]$/ + lambda { |di, *a| + ret = (binding ? binding[di, *a] : {}) + ret[:eflag_z] = ret[:eflag_s] = ret[:eflag_c] = ret[:eflag_o] = Expression::Unknown # :incomplete_binding ? + ret + } + end + + @backtrace_binding[op] ||= full_binding || binding if full_binding || binding + } + @backtrace_binding + end + + # returns the condition (bool Expression) under which a conditionnal jump is taken + # returns nil if not a conditionnal jump + # backtrace for the condition must include the jump itself (eg loop -> ecx--) + def get_jump_condition(di) + ecx = register_symbols[1] + case di.opcode.name + when /^j(.*)/ + decode_cc_to_expr($1) + when /^loop(.+)?/ + e = Expression[ecx, :'!=', 0] + e = Expression[e, :'||', decode_cc_to_expr($1)] if $1 + e + end + end + + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when ModRM, Reg, SimdReg; arg.symbolic(di) + else arg + end + } + + if binding = backtrace_binding[di.opcode.basename] + bd = binding[di, *a] + # handle modifications to al/ah etc + bd.keys.grep(Expression).each { |e| + # must be in the form (x & mask), with x either :reg or (:reg >> shift) eg ah == ((eax >> 8) & 0xff) + if e.op == :& and mask = e.rexpr and mask.kind_of? Integer + reg = e.lexpr + reg = reg.lexpr if reg.kind_of? Expression and reg.op == :>> and shift = reg.rexpr and shift.kind_of? Integer + next if not reg.kind_of? Symbol + if bd.has_key? reg + # xchg ah, al ; pop sp.. + puts "backtrace: conflict for #{di}: #{e} vs #{reg}" if $VERBOSE + bd[reg] = Expression::Unknown + next + end + val = bd.delete e + mask <<= shift if shift + invmask = mask ^ (@size == 64 ? 0xffff_ffff_ffff_ffff : 0xffff_ffff) + if invmask == 0xffff_ffff_0000_0000 and not di.opcode.props[:op32no64] + bd[reg] = Expression[val, :&, 0xffff_ffff] + elsif invmask == 0 + bd[reg] = val + else + val = Expression[val, :<<, shift] if shift + bd[reg] = Expression[[reg, :&, invmask], :|, [val, :&, mask]] + end + end + } + bd + else + puts "unhandled instruction to backtrace: #{di}" if $VERBOSE + # assume nothing except the 1st arg is modified + case a[0] + when Indirection, Symbol; { a[0] => Expression::Unknown } + when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} + else {} + end.update(:incomplete_binding => Expression[1]) + end + end + + # patch a forward binding from the backtrace binding + # fixes fwdemu for push/pop/call/ret + def fix_fwdemu_binding(di, fbd) + if di.instruction.args.grep(ModRM).find { |m| m.seg and m.symbolic(di).target.lexpr =~ /^segment_base_/ } + fbd = fbd.dup + fbd[:incomplete_binding] = Expression[1] + end + + case di.opcode.name + when 'push', 'call' + fbd = fbd.dup + sz = opsz(di)/8 + esp = register_symbols[4] + if i = fbd.delete(Indirection[esp, sz]) + fbd[Indirection[[esp, :-, sz], sz]] = i + end + when 'pop', 'ret' # nothing to do + when /^(push|pop|call|ret|enter|leave|stos|movs|lods|scas|cmps)/ + fbd = fbd.dup + fbd[:incomplete_binding] = Expression[1] # TODO + end + fbd + end + + def get_xrefs_x(dasm, di) + return [] if not di.opcode.props[:setip] + + sz = opsz(di) + case di.opcode.basename + when 'ret'; return [Indirection[register_symbols[4], sz/8, di.address]] + when 'jmp', 'call' + a = di.instruction.args.first + if dasm and a.kind_of?(ModRM) and a.imm and (a.s == sz/8 or a.s == 4) and not a.b and dasm.get_section_at(a.imm) + return get_xrefs_x_jmptable(dasm, di, a, a.s*8) + end + end + + case tg = di.instruction.args.first + when ModRM + tg.sz ||= sz if tg.kind_of? ModRM + [Expression[tg.symbolic(di)]] + when Reg; [Expression[tg.symbolic(di)]] + when Expression, ::Integer; [Expression[tg]] + when Farptr; tg.seg.reduce < 0x30 ? [tg.addr] : [Expression[[tg.seg, :*, 0x10], :+, tg.addr]] + else + puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG + [] + end + end + + # we detected a jmp table (jmp [base+4*idx]) + # try to return an accurate dest list + def get_xrefs_x_jmptable(dasm, di, mrm, sz) + # include the symbolic dest for backtrack stuff + ret = [Expression[mrm.symbolic(di)]] + i = mrm.i + if di.block.list.length == 2 and di.block.list[0].opcode.name =~ /^mov/ and a0 = di.block.list[0].instruction.args[0] and + a0.respond_to? :symbolic and a0.symbolic == i.symbolic + i = di.block.list[0].instruction.args[1] + end + pb = di.block.from_normal.to_a + if pb.length == 1 and pdi = dasm.decoded[pb[0]] and pdi.opcode.name =~ /^jn?be?/ and ppdi = pdi.block.list[-2] and ppdi.opcode.name == 'cmp' and + ppdi.instruction.args[0].symbolic == i.symbolic and lim = Expression[ppdi.instruction.args[1]].reduce and lim.kind_of? Integer + # cmp eax, 42 ; jbe switch ; switch: jmp [base+4*eax] + s = dasm.get_section_at(mrm.imm) + lim += 1 if pdi.opcode.name[-1] == ?e + lim.times { |v| + dasm.add_xref(s[1]+s[0].ptr, Xref.new(:r, di.address, sz/8)) + ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address] + s[0].read(sz/8) + } + l = dasm.auto_label_at(mrm.imm, 'jmp_table', 'xref') + replace_instr_arg_immediate(di.instruction, mrm.imm, Expression[l]) + # add 'case 1' comments + cases = {} + ret.each_with_index { |ind, idx| + idx -= 1 # ret[0] = symbolic + next if idx < 0 + a = dasm.backtrace(ind, di.address) + if a.length == 1 and a[0].kind_of?(Expression) and addr = a[0].reduce and addr.kind_of?(::Integer) + (cases[addr] ||= []) << idx + end + } + cases.each { |addr, list| + dasm.add_comment(addr, "case #{list.join(', ')}:") + } + return ret + end + + puts "unrecognized jmp table pattern, using wild guess for #{di}" if $VERBOSE + di.add_comment 'wildguess' + if s = dasm.get_section_at(mrm.imm - 3*sz/8) + v = -3 + else + s = dasm.get_section_at(mrm.imm) + v = 0 + end + while s[0].ptr < s[0].length + ptr = dasm.normalize s[0].decode_imm("u#{sz}".to_sym, @endianness) + diff = Expression[ptr, :-, di.address].reduce + if (diff.kind_of? ::Integer and diff.abs < 4096) or (di.opcode.basename == 'call' and ptr != 0 and dasm.get_section_at(ptr)) + dasm.add_xref(s[1]+s[0].ptr-sz/8, Xref.new(:r, di.address, sz/8)) + ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address] + elsif v > 0 + break + end + v += 1 + end + ret + end + + # checks if expr is a valid return expression matching the :saveip instruction + def backtrace_is_function_return(expr, di=nil) + expr = Expression[expr].reduce_rec + expr.kind_of? Indirection and expr.len == @size/8 and expr.target == Expression[register_symbols[4]] + end + + # updates the function backtrace_binding + # if the function is big and no specific register is given, do nothing (the binding will be lazily updated later, on demand) + # XXX assume retaddrlist is either a list of addr of ret or a list with a single entry which is an external function name (thunk) + def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) + b = f.backtrace_binding + + esp, ebp = register_symbols[4, 2] + + # XXX handle retaddrlist for multiple/mixed thunks + if retaddrlist and not dasm.decoded[retaddrlist.first] and di = dasm.decoded[faddr] + # no return instruction, must be a thunk : find the last instruction (to backtrace from it) + done = [] + while ndi = dasm.decoded[di.block.to_subfuncret.to_a.first] || dasm.decoded[di.block.to_normal.to_a.first] and ndi.kind_of? DecodedInstruction and not done.include? ndi.address + done << ndi.address + di = ndi + end + if not di.block.to_subfuncret.to_a.first and di.block.to_normal and di.block.to_normal.length > 1 + thunklast = di.block.list.last.address + end + end + + bt_val = lambda { |r| + next if not retaddrlist + b[r] = Expression::Unknown # TODO :pending or something ? (for recursive lazy functions) + bt = [] + retaddrlist.each { |retaddr| + bt |= dasm.backtrace(Expression[r], (thunklast ? thunklast : retaddr), + :include_start => true, :snapshot_addr => faddr, :origin => retaddr, :from_subfuncret => thunklast) + } + if bt.length != 1 + b[r] = Expression::Unknown + else + b[r] = bt.first + end + } + + if not wantregs.empty? + wantregs.each(&bt_val) + else + if dasm.function_blocks(faddr, true).length < 20 + register_symbols.each(&bt_val) + else + [ebp, esp].each(&bt_val) + end + end + + backtrace_update_function_binding_check(dasm, faddr, f, b, &bt_val) + + b + end + + def backtrace_update_function_binding_check(dasm, faddr, f, b) + sz = @size/8 + if b[:ebp] and b[:ebp] != Expression[:ebp] + # may be a custom 'enter' function (eg recent Visual Studio) + # TODO put all memory writes in the binding ? + [[:ebp], [:esp, :+, 1*sz], [:esp, :+, 2*sz], [:esp, :+, 3*sz]].each { |ptr| + ind = Indirection[ptr, sz, faddr] + yield(ind) + b.delete(ind) if b[ind] and not [:ebx, :edx, :esi, :edi, :ebp].include? b[ind].reduce_rec + } + end + if dasm.funcs_stdabi + if b[:esp] and b[:esp] == Expression::Unknown and not f.btbind_callback + puts "update_func_bind: #{Expression[faddr]} has esp -> unknown, use dynamic callback" if $DEBUG + f.btbind_callback = disassembler_default_btbind_callback + end + [:ebp, :ebx, :esi, :edi].each { |reg| + if b[reg] and b[reg] == Expression::Unknown + puts "update_func_bind: #{Expression[faddr]} has #{reg} -> unknown, presume it is preserved" if $DEBUG + b[reg] = Expression[reg] + end + } + else + if b[:esp] and not Expression[b[:esp], :-, :esp].reduce.kind_of?(::Integer) + puts "update_func_bind: #{Expression[faddr]} has esp -> #{b[:esp]}" if $DEBUG + end + end + + # rename some functions + # TODO database and real signatures + rename = + if b[:eax] and Expression[b[:eax], :-, faddr].reduce == 0 + 'geteip' # metasm pic linker + elsif b[:eax] and b[:ebx] and Expression[b[:eax], :-, :eax].reduce == 0 and Expression[b[:ebx], :-, Indirection[:esp, sz, nil]].reduce == 0 + 'get_pc_thunk_ebx' # elf pic convention + elsif b[:esp] and Expression[b[:esp], :-, [:esp, :-, Indirection[[:esp, :+, 2*sz], sz]]].reduce.kind_of? ::Integer and + dasm.decoded[faddr].block.list.find { |di| di.backtrace_binding[Indirection['segment_base_fs', sz]] } + '__SEH_prolog' + elsif b[:esp] == Expression[:ebp, :+, sz] and + dasm.decoded[faddr].block.list.find { |di| di.backtrace_binding[Indirection['segment_base_fs', sz]] } + '__SEH_epilog' + end + dasm.auto_label_at(faddr, rename, 'loc', 'sub') if rename + end + + # returns true if the expression is an address on the stack + def backtrace_is_stack_address(expr) + Expression[expr].expr_externals.include? register_symbols[4] + end + + # updates an instruction's argument replacing an expression with another (eg label renamed) + def replace_instr_arg_immediate(i, old, new) + i.args.map! { |a| + case a + when Expression; a == old ? new : Expression[a.bind(old => new).reduce] + when ModRM + a.imm = (a.imm == old ? new : Expression[a.imm.bind(old => new).reduce]) if a.imm + a + else a + end + } + end + + # returns a DecodedFunction from a parsed C function prototype + # TODO rebacktrace already decoded functions (load a header file after dasm finished) + # TODO walk structs args + def decode_c_function_prototype(cp, sym, orig=nil) + sym = cp.toplevel.symbol[sym] if sym.kind_of?(::String) + df = DecodedFunction.new + orig ||= Expression[sym.name] + + new_bt = lambda { |expr, rlen| + df.backtracked_for << BacktraceTrace.new(expr, orig, expr, rlen ? :r : :x, rlen) + } + + # return instr emulation + if sym.has_attribute 'noreturn' or sym.has_attribute '__noreturn__' + df.noreturn = true + else + new_bt[Indirection[:esp, @size/8, orig], nil] + end + + # register dirty (XXX assume standard ABI) + [:eax, :ecx, :edx].each { |r| + df.backtrace_binding.update r => Expression::Unknown + } + + # emulate ret + al = cp.typesize[:ptr] + stackoff = al + if sym.has_attribute 'fastcall' + stackoff = sym.type.args.to_a[2..-1].to_a.inject(al) { |sum, a| sum += (cp.sizeof(a) + al - 1) / al * al } + elsif sym.has_attribute 'stdcall' + stackoff = sym.type.args.to_a.inject(al) { |sum, a| sum += (cp.sizeof(a) + al - 1) / al * al } + end + df.backtrace_binding[:esp] = Expression[:esp, :+, stackoff] + + # scan args for function pointers + # TODO walk structs/unions.. + stackoff = al + sym.type.args.to_a.each { |a| + p = Indirection[[:esp, :+, stackoff], al, orig] + stackoff += (cp.sizeof(a) + al - 1) / al * al + if a.type.untypedef.kind_of? C::Pointer + pt = a.type.untypedef.type.untypedef + if pt.kind_of? C::Function + new_bt[p, nil] + df.backtracked_for.last.detached = true + elsif pt.kind_of? C::Struct + new_bt[p, al] + else + new_bt[p, cp.sizeof(nil, pt)] + end + end + } + + df + end + + # the lambda for the :default backtrace_binding callback of the disassembler + # tries to determine the stack offset of unprototyped functions + # working: + # checks that origin is a ret, that expr is an indirection from esp and that expr.origin is the ret + # bt_walk from calladdr until we finds a call into us, and assumes it is the current function start + # TODO handle foo: call bar ; bar: pop eax ; call ; ret -> bar is not the function start (foo is) + # then backtrace expr from calladdr to funcstart (snapshot), using esp -> esp+ + # from the result, compute stackoffvariable (only if trivial) + # will not work if the current function calls any other unknown function (unless all are __cdecl) + # will not work if the current function is framed (ebp leave ret): in this case the function will return, but its esp will be unknown + # if the stack offset is found and funcaddr is a string, fixup the static binding and remove the dynamic binding + # TODO dynamise thunks bt_for & bt_cb + def disassembler_default_btbind_callback + esp = register_symbols[4] + + lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth| + @dasm_func_default_off ||= {} + if off = @dasm_func_default_off[[dasm, calladdr]] + bind = bind.merge(esp => Expression[esp, :+, off]) + break bind + end + break bind if not odi = dasm.decoded[origin] or odi.opcode.basename != 'ret' + expr = expr.reduce_rec if expr.kind_of? Expression + break bind unless expr.kind_of? Indirection and expr.origin == origin + break bind unless expr.externals.reject { |e| e =~ /^autostackoffset_/ } == [esp] + + curfunc = dasm.function[funcaddr] + if curfunc.backtrace_binding and tk = curfunc.backtrace_binding[:thunk] and dasm.function[tk] + curfunc = dasm.function[tk] + end + + # scan from calladdr for the probable parent function start + func_start = nil + dasm.backtrace_walk(true, calladdr, false, false, nil, maxdepth) { |ev, foo, h| + if ev == :up and h[:sfret] != :subfuncret and di = dasm.decoded[h[:to]] and di.opcode.basename == 'call' + func_start = h[:from] + break + elsif ev == :end + # entrypoints are functions too + func_start = h[:addr] + break + end + } + break bind if not func_start + puts "automagic #{Expression[funcaddr]}: found func start for #{dasm.decoded[origin]} at #{Expression[func_start]}" if dasm.debug_backtrace + s_off = "autostackoffset_#{Expression[funcaddr]}_#{Expression[calladdr]}" + list = dasm.backtrace(expr.bind(esp => Expression[esp, :+, s_off]), calladdr, :include_start => true, :snapshot_addr => func_start, :maxdepth => maxdepth, :origin => origin) + # check if this backtrace made us find our binding + if off = @dasm_func_default_off[[dasm, calladdr]] + bind = bind.merge(esp => Expression[esp, :+, off]) + break bind + elsif not curfunc.btbind_callback + break curfunc.backtrace_binding + end + e_expr = list.find { |e_expr_| + # TODO cleanup this + e_expr_ = Expression[e_expr_].reduce_rec + next if not e_expr_.kind_of? Indirection + off = Expression[[esp, :+, s_off], :-, e_expr_.target].reduce + off.kind_of? Integer and off >= @size/8 and off < 10*@size/8 and (off % (@size/8)) == 0 + } || list.first + + e_expr = e_expr.rexpr if e_expr.kind_of? Expression and e_expr.op == :+ and not e_expr.lexpr + break bind unless e_expr.kind_of? Indirection + + off = Expression[[esp, :+, s_off], :-, e_expr.target].reduce + if off.kind_of? Expression + bd = off.externals.grep(/^autostackoffset_/).inject({}) { |bd_, xt| bd_.update xt => @size/8 } + bd.delete s_off + if off.bind(bd).reduce == @size/8 + # all __cdecl + off = @size/8 + else + # check if all calls are to the same extern func + bd.delete_if { |k, v| k !~ /^autostackoffset_#{Expression[funcaddr]}_/ } + bd.each_key { |k| bd[k] = 0 } + if off.bind(bd).reduce.kind_of? Integer + off = off.bind(bd).reduce / (bd.length + 1) + end + end + end + if off.kind_of? Integer + if off < @size/8 or off > 20*@size/8 or (off % (@size/8)) != 0 + puts "autostackoffset: ignoring off #{off} for #{Expression[funcaddr]} from #{dasm.decoded[calladdr]}" if $VERBOSE + off = :unknown + end + end + + bind = bind.merge esp => Expression[esp, :+, off] if off != :unknown + if funcaddr != :default + if not off.kind_of? ::Integer + #XXX we allow the current function to return, so we should handle the func backtracking its esp + #(and other register that are saved and restored in epilog) + puts "stackoff #{dasm.decoded[calladdr]} | #{Expression[func_start]} | #{expr} | #{e_expr} | #{off}" if dasm.debug_backtrace + else + puts "autostackoffset: found #{off} for #{Expression[funcaddr]} from #{dasm.decoded[calladdr]}" if $VERBOSE + curfunc.btbind_callback = nil + curfunc.backtrace_binding = bind + + # rebacktrace the return address, so that other unknown funcs that depend on us are solved + dasm.backtrace(Indirection[esp, @size/8, origin], origin, :origin => origin) + end + else + if off.kind_of? ::Integer and dasm.decoded[calladdr] + puts "autostackoffset: found #{off-@size/8} for #{dasm.decoded[calladdr]}" if $VERBOSE + di = dasm.decoded[calladdr] + di.comment.delete_if { |c| c =~ /^stackoff=/ } if di.comment + di.add_comment "stackoff=#{off-@size/8}" + @dasm_func_default_off[[dasm, calladdr]] = off + + dasm.backtrace(Indirection[esp, @size/8, origin], origin, :origin => origin) + elsif cachedoff = @dasm_func_default_off[[dasm, calladdr]] + bind[esp] = Expression[esp, :+, cachedoff] + elsif off.kind_of? ::Integer + dasm.decoded[calladdr].add_comment "stackoff=#{off-@size/8}" + end + + puts "stackoff #{dasm.decoded[calladdr]} | #{Expression[func_start]} | #{expr} | #{e_expr} | #{off}" if dasm.debug_backtrace + end + + bind + } + end + + # the :default backtracked_for callback + # returns empty unless funcaddr is not default or calladdr is a call or a jmp + def disassembler_default_btfor_callback + lambda { |dasm, btfor, funcaddr, calladdr| + if funcaddr != :default; btfor + elsif di = dasm.decoded[calladdr] and (di.opcode.name == 'call' or di.opcode.name == 'jmp'); btfor + else [] + end + } + end + + # returns a DecodedFunction suitable for :default + # uses disassembler_default_bt{for/bind}_callback + def disassembler_default_func + esp = register_symbols[4] + cp = new_cparser + cp.parse 'void stdfunc(void);' + f = decode_c_function_prototype(cp, 'stdfunc', :default) + f.backtrace_binding[esp] = Expression[esp, :+, :unknown] + f.btbind_callback = disassembler_default_btbind_callback + f.btfor_callback = disassembler_default_btfor_callback + f + end + + # returns a hash { :retval => r, :changed => [] } + def abi_funcall + { :retval => register_symbols[0], :changed => register_symbols[0, 3] } + end + + + # computes the binding of the sequence of code starting at entry included + # the binding is a hash showing the value of modified elements at the + # end of the code sequence, relative to their value at entry + # the elements are all the registers and the memory written to + # if finish is nil, the binding will include :ip, which is the address + # to be executed next (if it exists) + # the binding will not include memory access from subfunctions + # entry should be an entrypoint of the disassembler if finish is nil + # the code sequence must have only one end, with no to_normal + def code_binding(dasm, entry, finish=nil) + entry = dasm.normalize(entry) + finish = dasm.normalize(finish) if finish + lastdi = nil + binding = {} + bt = lambda { |from, expr, inc_start| + ret = dasm.backtrace(Expression[expr], from, :snapshot_addr => entry, :include_start => inc_start) + ret.length == 1 ? ret.first : Expression::Unknown + } + + # walk blocks, search for finish, scan memory writes + todo = [entry] + done = [Expression::Unknown] + while addr = todo.pop + addr = dasm.normalize(addr) + next if done.include? addr or addr == finish or not dasm.decoded[addr].kind_of? DecodedInstruction + done << addr + b = dasm.decoded[addr].block + + next if b.list.find { |di| + a = di.address + if a == finish + lastdi = b.list[b.list.index(di) - 1] + true + else + # check writes from the instruction + get_xrefs_w(dasm, di).each { |waddr, len| + # we want the ptr expressed with reg values at entry + ptr = bt[a, waddr, false] + binding[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true] + } + false + end + } + + hasnext = false + b.each_to_samefunc(dasm) { |t| + hasnext = true + if t == finish + lastdi = b.list.last + else + todo << t + end + } + + # check end of sequence + if not hasnext + raise "two-ended code_binding #{lastdi} & #{b.list.last}" if lastdi + lastdi = b.list.last + if lastdi.opcode.props[:setip] + e = get_xrefs_x(dasm, lastdi) + raise 'bad code_binding ending' if e.to_a.length != 1 or not lastdi.opcode.props[:stopexec] + binding[:ip] = bt[lastdi.address, e.first, false] + elsif not lastdi.opcode.props[:stopexec] + binding[:ip] = lastdi.next_addr + end + end + end + binding.delete_if { |k, v| Expression[k] == Expression[v] } + + # add register binding + raise "no code_binding end" if not lastdi and not finish + register_symbols.each { |reg| + val = + if lastdi; bt[lastdi.address, reg, true] + else bt[finish, reg, false] + end + next if val == Expression[reg] + mask = 0xffff_ffff # dont use 1<<@size, because 16bit code may use e.g. edi (through opszoverride) + mask = 0xffff_ffff_ffff_ffff if @size == 64 + val = Expression[val, :&, mask].reduce + binding[reg] = Expression[val] + } + + binding + end + + # trace the stack pointer register across a function, rename occurences of esp+XX to esp+var_XX + def name_local_vars(dasm, funcaddr) + esp = register_symbols[4] + func = dasm.function[funcaddr] + subs = [] + dasm.trace_function_register(funcaddr, esp => 0) { |di, r, off, trace| + next if r.to_s =~ /flag/ + if di.opcode.name == 'call' and tf = di.block.to_normal.find { |t| dasm.function[t] and dasm.function[t].localvars } + subs << [trace[esp], dasm.function[tf].localvars] + end + di.instruction.args.grep(ModRM).each { |mrm| + b = mrm.b || (mrm.i if mrm.s == 1) + # its a modrm => b is read, so ignore r/off (not yet applied), use trace only + stackoff = trace[b.symbolic] if b + next if not stackoff + imm = mrm.imm || Expression[0] + frameoff = imm + stackoff + if frameoff.kind_of?(::Integer) + # XXX register args ? non-ABI standard register args ? (eg optimized x64) + str = 'var_%X' % (-frameoff) + str = 'arg_%X' % (frameoff-@size/8) if frameoff > 0 + str = func.get_localvar_stackoff(frameoff, di, str) if func + imm = imm.expr if imm.kind_of?(ExpressionString) + mrm.imm = ExpressionString.new(imm, str, :stackvar) + end + } + off = off.reduce if off.kind_of?(Expression) + next unless off.kind_of?(Integer) + off + } + # if subfunctions are called at a fixed stack offset, rename var_3c -> subarg_0 + if func and func.localvars and not subs.empty? and subs.all? { |sb| sb[0] == subs.first[0] } + func.localvars.each { |varoff, varname| + subargnames = subs.map { |o, sb| sb[varoff-o+@size/8] }.compact + if subargnames.uniq.length == 1 + varname.replace 'sub'+subargnames[0] + end + } + end + end end end diff --git a/lib/metasm/metasm/cpu/ia32/decompile.rb b/lib/metasm/metasm/cpu/ia32/decompile.rb index 06035defd7..a32950ddb7 100644 --- a/lib/metasm/metasm/cpu/ia32/decompile.rb +++ b/lib/metasm/metasm/cpu/ia32/decompile.rb @@ -8,557 +8,557 @@ require 'metasm/cpu/ia32/main' module Metasm class Ia32 - # temporarily setup dasm.address_binding so that backtracking - # stack-related offsets resolve in :frameptr (relative to func start) - def decompile_makestackvars(dasm, funcstart, blocks) - oldfuncbd = dasm.address_binding[funcstart] - dasm.address_binding[funcstart] = { :esp => :frameptr } # this would suffice, the rest here is just optimisation + # temporarily setup dasm.address_binding so that backtracking + # stack-related offsets resolve in :frameptr (relative to func start) + def decompile_makestackvars(dasm, funcstart, blocks) + oldfuncbd = dasm.address_binding[funcstart] + dasm.address_binding[funcstart] = { :esp => :frameptr } # this would suffice, the rest here is just optimisation - patched_binding = [funcstart] # list of addresses to cleanup later - ebp_frame = true + patched_binding = [funcstart] # list of addresses to cleanup later + ebp_frame = true - # pretrace esp and ebp for each function block (cleared later) - # TODO with more than 1 unknown __stdcall ext func per path, esp -> unknown, which makes very ugly C (*esp-- = 12...); add heuristics ? - blocks.each { |block| - blockstart = block.address - if not dasm.address_binding[blockstart] - patched_binding << blockstart - dasm.address_binding[blockstart] = {} - foo = dasm.backtrace(:esp, blockstart, :snapshot_addr => funcstart) - if foo.length == 1 and ee = foo.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or - (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer)) - dasm.address_binding[blockstart][:esp] = ee - end - if ebp_frame - foo = dasm.backtrace(:ebp, blockstart, :snapshot_addr => funcstart) - if foo.length == 1 and ee = foo.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or - (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer)) - dasm.address_binding[blockstart][:ebp] = ee - else - ebp_frame = false # func does not use ebp as frame ptr, no need to bt for later blocks - end - end - end + # pretrace esp and ebp for each function block (cleared later) + # TODO with more than 1 unknown __stdcall ext func per path, esp -> unknown, which makes very ugly C (*esp-- = 12...); add heuristics ? + blocks.each { |block| + blockstart = block.address + if not dasm.address_binding[blockstart] + patched_binding << blockstart + dasm.address_binding[blockstart] = {} + foo = dasm.backtrace(:esp, blockstart, :snapshot_addr => funcstart) + if foo.length == 1 and ee = foo.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or + (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer)) + dasm.address_binding[blockstart][:esp] = ee + end + if ebp_frame + foo = dasm.backtrace(:ebp, blockstart, :snapshot_addr => funcstart) + if foo.length == 1 and ee = foo.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or + (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer)) + dasm.address_binding[blockstart][:ebp] = ee + else + ebp_frame = false # func does not use ebp as frame ptr, no need to bt for later blocks + end + end + end - yield block - } + yield block + } - ensure - patched_binding.each { |a| dasm.address_binding.delete a } - dasm.address_binding[funcstart] = oldfuncbd if oldfuncbd - end + ensure + patched_binding.each { |a| dasm.address_binding.delete a } + dasm.address_binding[funcstart] = oldfuncbd if oldfuncbd + end - # list variable dependency for each block, remove useless writes - # returns { blockaddr => [list of vars that are needed by a following block] } - def decompile_func_finddeps(dcmp, blocks, func) - deps_r = {} ; deps_w = {} ; deps_to = {} - deps_subfunc = {} # things read/written by subfuncs + # list variable dependency for each block, remove useless writes + # returns { blockaddr => [list of vars that are needed by a following block] } + def decompile_func_finddeps(dcmp, blocks, func) + deps_r = {} ; deps_w = {} ; deps_to = {} + deps_subfunc = {} # things read/written by subfuncs - # find read/writes by each block - blocks.each { |b, to| - deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to - deps_subfunc[b] = [] + # find read/writes by each block + blocks.each { |b, to| + deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to + deps_subfunc[b] = [] - blk = dcmp.dasm.decoded[b].block - blk.list.each { |di| - a = di.backtrace_binding.values - w = [] - di.backtrace_binding.keys.each { |k| - case k - when ::Symbol; w |= [k] - else a |= Expression[k].externals # if dword [eax] <- 42, eax is read - end - } - a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI + blk = dcmp.dasm.decoded[b].block + blk.list.each { |di| + a = di.backtrace_binding.values + w = [] + di.backtrace_binding.keys.each { |k| + case k + when ::Symbol; w |= [k] + else a |= Expression[k].externals # if dword [eax] <- 42, eax is read + end + } + a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI - deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b] - deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - } - stackoff = nil - blk.each_to_normal { |t| - t = dcmp.backtrace_target(t, blk.list.last.address) - next if not t = dcmp.c_parser.toplevel.symbol[t] - t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function # XXX this may seem a bit extreme, and yes, it is. - stackoff ||= Expression[dcmp.dasm.backtrace(:esp, blk.list.last.address, :snapshot_addr => blocks.first[0]).first, :-, :esp].reduce + deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b] + deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] + } + stackoff = nil + blk.each_to_normal { |t| + t = dcmp.backtrace_target(t, blk.list.last.address) + next if not t = dcmp.c_parser.toplevel.symbol[t] + t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function # XXX this may seem a bit extreme, and yes, it is. + stackoff ||= Expression[dcmp.dasm.backtrace(:esp, blk.list.last.address, :snapshot_addr => blocks.first[0]).first, :-, :esp].reduce - # things that are needed by the subfunction - if t.has_attribute('fastcall') - a = t.type.args.to_a - dep = [:ecx, :edx] - dep.shift if not a[0] or a[0].has_attribute('unused') - dep.pop if not a[1] or a[1].has_attribute('unused') - deps_subfunc[b] |= dep - end - t.type.args.to_a.each { |arg| - if reg = arg.has_attribute('register') - deps_subfunc[b] |= [reg.to_sym] - end - } - } - if stackoff # last block instr == subfunction call - deps_r[b] |= deps_subfunc[b] - deps_w[b] - deps_w[b] |= [:eax, :ecx, :edx] # standard ABI - end - } + # things that are needed by the subfunction + if t.has_attribute('fastcall') + a = t.type.args.to_a + dep = [:ecx, :edx] + dep.shift if not a[0] or a[0].has_attribute('unused') + dep.pop if not a[1] or a[1].has_attribute('unused') + deps_subfunc[b] |= dep + end + t.type.args.to_a.each { |arg| + if reg = arg.has_attribute('register') + deps_subfunc[b] |= [reg.to_sym] + end + } + } + if stackoff # last block instr == subfunction call + deps_r[b] |= deps_subfunc[b] - deps_w[b] + deps_w[b] |= [:eax, :ecx, :edx] # standard ABI + end + } - bt = blocks.transpose - roots = bt[0] - bt[1].flatten # XXX jmp 1stblock ? + bt = blocks.transpose + roots = bt[0] - bt[1].flatten # XXX jmp 1stblock ? - # find regs read and never written (must have been set by caller and are part of the func ABI) - uninitialized = lambda { |b, r, done| - if not deps_r[b] - elsif deps_r[b].include?(r) - blk = dcmp.dasm.decoded[b].block - bw = [] - rdi = blk.list.find { |di| - a = di.backtrace_binding.values - w = [] - di.backtrace_binding.keys.each { |k| - case k - when ::Symbol; w |= [k] - else a |= Expression[k].externals # if dword [eax] <- 42, eax is read - end - } - a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI + # find regs read and never written (must have been set by caller and are part of the func ABI) + uninitialized = lambda { |b, r, done| + if not deps_r[b] + elsif deps_r[b].include?(r) + blk = dcmp.dasm.decoded[b].block + bw = [] + rdi = blk.list.find { |di| + a = di.backtrace_binding.values + w = [] + di.backtrace_binding.keys.each { |k| + case k + when ::Symbol; w |= [k] + else a |= Expression[k].externals # if dword [eax] <- 42, eax is read + end + } + a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI - next true if (a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - bw).include? r - bw |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - false - } - if r == :eax and (rdi || blk.list.last).opcode.name == 'ret' - func.type.type = C::BaseType.new(:void) - false - elsif rdi and rdi.backtrace_binding[r] - false # mov al, 42 ; ret -> don't regarg eax - else - true - end - elsif deps_w[b].include?(r) - else - done << b - (deps_to[b] - done).find { |tb| uninitialized[tb, r, done] } - end - } + next true if (a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - bw).include? r + bw |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] + false + } + if r == :eax and (rdi || blk.list.last).opcode.name == 'ret' + func.type.type = C::BaseType.new(:void) + false + elsif rdi and rdi.backtrace_binding[r] + false # mov al, 42 ; ret -> don't regarg eax + else + true + end + elsif deps_w[b].include?(r) + else + done << b + (deps_to[b] - done).find { |tb| uninitialized[tb, r, done] } + end + } - regargs = [] - register_symbols.each { |r| - if roots.find { |root| uninitialized[root, r, []] } - regargs << r - end - } + regargs = [] + register_symbols.each { |r| + if roots.find { |root| uninitialized[root, r, []] } + regargs << r + end + } - # TODO honor user-defined prototype if available (eg no, really, eax is not read in this function returning al) - regargs.sort_by { |r| r.to_s }.each { |r| - a = C::Variable.new(r.to_s, C::BaseType.new(:int, :unsigned)) - a.add_attribute("register(#{r})") - func.type.args << a - } + # TODO honor user-defined prototype if available (eg no, really, eax is not read in this function returning al) + regargs.sort_by { |r| r.to_s }.each { |r| + a = C::Variable.new(r.to_s, C::BaseType.new(:int, :unsigned)) + a.add_attribute("register(#{r})") + func.type.args << a + } - # remove writes from a block if no following block read the value - dw = {} - deps_w.each { |b, deps| - dw[b] = deps.reject { |dep| - ret = true - done = [] - todo = deps_to[b].dup - while a = todo.pop - next if done.include? a - done << a - if not deps_r[a] or deps_r[a].include? dep - ret = false - break - elsif not deps_w[a].include? dep - todo.concat deps_to[a] - end - end - ret - } - } + # remove writes from a block if no following block read the value + dw = {} + deps_w.each { |b, deps| + dw[b] = deps.reject { |dep| + ret = true + done = [] + todo = deps_to[b].dup + while a = todo.pop + next if done.include? a + done << a + if not deps_r[a] or deps_r[a].include? dep + ret = false + break + elsif not deps_w[a].include? dep + todo.concat deps_to[a] + end + end + ret + } + } - dw - end + dw + end - def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil) - scope = func.initializer - func.type.args.each { |a| scope.symbol[a.name] = a } - stmts = scope.statements - blocks_toclean = myblocks.dup - func_entry = myblocks.first[0] - until myblocks.empty? - b, to = myblocks.shift - if l = dcmp.dasm.get_label_at(b) - stmts << C::Label.new(l) - end + def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil) + scope = func.initializer + func.type.args.each { |a| scope.symbol[a.name] = a } + stmts = scope.statements + blocks_toclean = myblocks.dup + func_entry = myblocks.first[0] + until myblocks.empty? + b, to = myblocks.shift + if l = dcmp.dasm.get_label_at(b) + stmts << C::Label.new(l) + end - # list of assignments [[dest reg, expr assigned]] - ops = [] - # reg binding (reg => value, values.externals = regs at block start) - binding = {} - # Expr => CExpr - ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) } - # Expr => Expr.bind(binding) => CExpr - ceb = lambda { |*e| ce[Expression[*e].bind(binding)] } + # list of assignments [[dest reg, expr assigned]] + ops = [] + # reg binding (reg => value, values.externals = regs at block start) + binding = {} + # Expr => CExpr + ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) } + # Expr => Expr.bind(binding) => CExpr + ceb = lambda { |*e| ce[Expression[*e].bind(binding)] } - # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil]) - commit = lambda { - deps[b].map { |k| - [k, ops.rindex(ops.reverse.find { |r, v| r == k })] - }.sort_by { |k, i| i.to_i }.each { |k, i| - next if not i or not binding[k] - e = k - final = [] - ops[0..i].reverse_each { |r, v| - final << r if not v - e = Expression[e].bind(r => v).reduce if not final.include? r - } - ops[i][1] = nil - binding.delete k - stmts << ce[k, :'=', e] if k != e - } - } + # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil]) + commit = lambda { + deps[b].map { |k| + [k, ops.rindex(ops.reverse.find { |r, v| r == k })] + }.sort_by { |k, i| i.to_i }.each { |k, i| + next if not i or not binding[k] + e = k + final = [] + ops[0..i].reverse_each { |r, v| + final << r if not v + e = Expression[e].bind(r => v).reduce if not final.include? r + } + ops[i][1] = nil + binding.delete k + stmts << ce[k, :'=', e] if k != e + } + } - # returns an array to use as funcall arguments - get_func_args = lambda { |di, f| - # XXX see remarks in #finddeps - bt = dcmp.dasm.backtrace(:esp, di.address, :snapshot_addr => func_entry, :include_start => true) - stackoff = Expression[[bt, :+, @size/8], :-, :esp].bind(:esp => :frameptr).reduce rescue nil - args_todo = f.type.args.to_a.dup - args = [] - if f.has_attribute('fastcall') # XXX DRY - if a = args_todo.shift - mask = (1 << (8*dcmp.c_parser.sizeof(a))) - 1 - mask = 0 if a.has_attribute('unused') - args << Expression[:ecx, :&, mask] - end - if a = args_todo.shift - mask = (1 << (8*dcmp.c_parser.sizeof(a))) - 1 # char => dl - mask = 0 if a.has_attribute('unused') - args << Expression[:edx, :&, mask] - end - end - args_todo.each { |a_| - if r = a_.has_attribute_var('register') - args << Expression[r.to_sym] - elsif stackoff.kind_of? Integer - args << Indirection[[:frameptr, :+, stackoff], @size/8] - stackoff += [dcmp.sizeof(a_), @size/8].max - else - args << Expression[0] - end - } + # returns an array to use as funcall arguments + get_func_args = lambda { |di, f| + # XXX see remarks in #finddeps + bt = dcmp.dasm.backtrace(:esp, di.address, :snapshot_addr => func_entry, :include_start => true) + stackoff = Expression[[bt, :+, @size/8], :-, :esp].bind(:esp => :frameptr).reduce rescue nil + args_todo = f.type.args.to_a.dup + args = [] + if f.has_attribute('fastcall') # XXX DRY + if a = args_todo.shift + mask = (1 << (8*dcmp.c_parser.sizeof(a))) - 1 + mask = 0 if a.has_attribute('unused') + args << Expression[:ecx, :&, mask] + end + if a = args_todo.shift + mask = (1 << (8*dcmp.c_parser.sizeof(a))) - 1 # char => dl + mask = 0 if a.has_attribute('unused') + args << Expression[:edx, :&, mask] + end + end + args_todo.each { |a_| + if r = a_.has_attribute_var('register') + args << Expression[r.to_sym] + elsif stackoff.kind_of? Integer + args << Indirection[[:frameptr, :+, stackoff], @size/8] + stackoff += [dcmp.sizeof(a_), @size/8].max + else + args << Expression[0] + end + } - if f.type.varargs and f.type.args.last.type.pointer? and stackoff.kind_of? Integer - # check if last arg is a fmtstring - bt = dcmp.dasm.backtrace(args.last, di.address, :snapshot_addr => func_entry, :include_start => true) - if bt.length == 1 and s = dcmp.dasm.get_section_at(bt.first) - fmt = s[0].read(512) - fmt = fmt.unpack('v*').pack('C*') if dcmp.sizeof(f.type.args.last.type.untypedef.type) == 2 - if fmt.index(?\0) - fmt = fmt[0...fmt.index(?\0)] - fmt.gsub('%%', '').count('%').times { # XXX %.*s etc.. - args << Indirection[[:frameptr, :+, stackoff], @size/8] - stackoff += @size/8 - } - end - end - end + if f.type.varargs and f.type.args.last.type.pointer? and stackoff.kind_of? Integer + # check if last arg is a fmtstring + bt = dcmp.dasm.backtrace(args.last, di.address, :snapshot_addr => func_entry, :include_start => true) + if bt.length == 1 and s = dcmp.dasm.get_section_at(bt.first) + fmt = s[0].read(512) + fmt = fmt.unpack('v*').pack('C*') if dcmp.sizeof(f.type.args.last.type.untypedef.type) == 2 + if fmt.index(?\0) + fmt = fmt[0...fmt.index(?\0)] + fmt.gsub('%%', '').count('%').times { # XXX %.*s etc.. + args << Indirection[[:frameptr, :+, stackoff], @size/8] + stackoff += @size/8 + } + end + end + end - args.map { |e| ceb[e] } - } + args.map { |e| ceb[e] } + } - # go ! - dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx| - a = di.instruction.args - if di.opcode.props[:setip] and not di.opcode.props[:stopexec] - # conditional jump - commit[] - n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address) - if di.opcode.name =~ /^loop(.+)?/ - cx = C::CExpression[:'--', ceb[:ecx]] - cc = $1 ? C::CExpression[cx, :'&&', ceb[decode_cc_to_expr($1)]] : cx - else - cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])] - end - # XXX switch/indirect/multiple jmp - stmts << C::If.new(C::CExpression[cc], C::Goto.new(n)) - to.delete dcmp.dasm.normalize(n) - next - end + # go ! + dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx| + a = di.instruction.args + if di.opcode.props[:setip] and not di.opcode.props[:stopexec] + # conditional jump + commit[] + n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address) + if di.opcode.name =~ /^loop(.+)?/ + cx = C::CExpression[:'--', ceb[:ecx]] + cc = $1 ? C::CExpression[cx, :'&&', ceb[decode_cc_to_expr($1)]] : cx + else + cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])] + end + # XXX switch/indirect/multiple jmp + stmts << C::If.new(C::CExpression[cc], C::Goto.new(n)) + to.delete dcmp.dasm.normalize(n) + next + end - if di.opcode.name == 'mov' - # mov cr0 etc - a1, a2 = di.instruction.args - case a1 - when Ia32::CtrlReg, Ia32::DbgReg, Ia32::TstReg, Ia32::SegReg - sz = a1.kind_of?(Ia32::SegReg) ? 16 : 32 - if not dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"] - dcmp.c_parser.parse("void intrinsic_set_#{a1}(__int#{sz});") - end - f = dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"] - a2 = a2.symbolic(di) - a2 = [a2, :&, 0xffff] if sz == 16 - stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type) - next - end - case a2 - when Ia32::CtrlReg, Ia32::DbgReg, Ia32::TstReg, Ia32::SegReg - if not dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"] - sz = a2.kind_of?(Ia32::SegReg) ? 16 : 32 - dcmp.c_parser.parse("__int#{sz} intrinsic_get_#{a2}(void);") - end - f = dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"] - t = f.type.type - binding.delete a1.symbolic(di) - stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t), t) - next - end - end + if di.opcode.name == 'mov' + # mov cr0 etc + a1, a2 = di.instruction.args + case a1 + when Ia32::CtrlReg, Ia32::DbgReg, Ia32::TstReg, Ia32::SegReg + sz = a1.kind_of?(Ia32::SegReg) ? 16 : 32 + if not dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"] + dcmp.c_parser.parse("void intrinsic_set_#{a1}(__int#{sz});") + end + f = dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"] + a2 = a2.symbolic(di) + a2 = [a2, :&, 0xffff] if sz == 16 + stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type) + next + end + case a2 + when Ia32::CtrlReg, Ia32::DbgReg, Ia32::TstReg, Ia32::SegReg + if not dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"] + sz = a2.kind_of?(Ia32::SegReg) ? 16 : 32 + dcmp.c_parser.parse("__int#{sz} intrinsic_get_#{a2}(void);") + end + f = dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"] + t = f.type.type + binding.delete a1.symbolic(di) + stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t), t) + next + end + end - case di.opcode.name - when 'ret' - commit[] - ret = nil - ret = C::CExpression[ceb[:eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void - stmts << C::Return.new(ret) - when 'call' # :saveip - n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address) - args = [] - if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of? C::Function and f.type.args - args = get_func_args[di, f] - elsif defined? @dasm_func_default_off and o = @dasm_func_default_off[[dcmp.dasm, di.address]] and o.kind_of? Integer and o > @size/8 - f = C::Variable.new - f.type = C::Function.new(C::BaseType.new(:int), []) - ((o/(@size/8))-1).times { f.type.args << C::Variable.new(nil,C::BaseType.new(:int)) } - args = get_func_args[di, f] - end - commit[] - #next if not di.block.to_subfuncret + case di.opcode.name + when 'ret' + commit[] + ret = nil + ret = C::CExpression[ceb[:eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void + stmts << C::Return.new(ret) + when 'call' # :saveip + n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address) + args = [] + if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of? C::Function and f.type.args + args = get_func_args[di, f] + elsif defined? @dasm_func_default_off and o = @dasm_func_default_off[[dcmp.dasm, di.address]] and o.kind_of? Integer and o > @size/8 + f = C::Variable.new + f.type = C::Function.new(C::BaseType.new(:int), []) + ((o/(@size/8))-1).times { f.type.args << C::Variable.new(nil,C::BaseType.new(:int)) } + args = get_func_args[di, f] + end + commit[] + #next if not di.block.to_subfuncret - if not n.kind_of? ::String or (f and not f.type.kind_of? C::Function) - # indirect funcall - fptr = ceb[n] - binding.delete n - proto = C::Function.new(C::BaseType.new(:int)) - proto = f.type if f and f.type.kind_of? C::Function - f = C::CExpression[[fptr], C::Pointer.new(proto)] - elsif not f - # internal functions are predeclared, so this one is extern - f = C::Variable.new - f.name = n - f.type = C::Function.new(C::BaseType.new(:int)) - if dcmp.recurse > 0 - dcmp.c_parser.toplevel.symbol[n] = f - dcmp.c_parser.toplevel.statements << C::Declaration.new(f) - end - end - commit[] - binding.delete :eax - e = C::CExpression[f, :funcall, args] - e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void) - stmts << e - when 'jmp' - #if di.comment.to_a.include? 'switch' - # n = di.instruction.args.first.symbolic(di) - # fptr = ceb[n] - # binding.delete n - # commit[] - # sw = C::Switch.new(fptr, C::Block.new(scope)) - # di.block.to_normal.to_a.each { |addr| - # addr = dcmp.dasm.normalize addr - # to.delete addr - # next if not l = dcmp.dasm.get_label_at(addr) - # sw.body.statements << C::Goto.new(l) - # } - # stmts << sw - a = di.instruction.args.first - if a.kind_of? Expression - elsif not a.respond_to? :symbolic - stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil) - else - n = di.instruction.args.first.symbolic(di) - fptr = ceb[n] - binding.delete n - commit[] - if fptr.kind_of? C::CExpression and fptr.type.pointer? and fptr.type.untypedef.type.kind_of? C::Function - proto = fptr.type.untypedef.type - args = get_func_args[di, fptr.type] - else - proto = C::Function.new(C::BaseType.new(:void)) - fptr = C::CExpression[[fptr], C::Pointer.new(proto)] - args = [] - end - ret = C::Return.new(C::CExpression[fptr, :funcall, args]) - class << ret ; attr_accessor :from_instr end - ret.from_instr = di - stmts << ret - to = [] - end - when 'lgdt' - if not dcmp.c_parser.toplevel.struct['segment_descriptor'] - dcmp.c_parser.parse('struct segment_descriptor { __int16 limit; __int16 base0_16; __int8 base16_24; __int8 flags1; __int8 flags2_limit_16_20; __int8 base24_32; };') - dcmp.c_parser.parse('struct segment_table { __int16 size; struct segment_descriptor *table; } __attribute__((pack(2)));') - end - if not dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'] - dcmp.c_parser.parse('void intrinsic_lgdt(struct segment_table *);') - end - # need a way to transform arg => :frameptr+12 - arg = di.backtrace_binding.keys.grep(Indirection).first.pointer - stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void)) - when 'lidt' - if not dcmp.c_parser.toplevel.struct['interrupt_descriptor'] - dcmp.c_parser.parse('struct interrupt_descriptor { __int16 offset0_16; __int16 segment; __int16 flags; __int16 offset16_32; };') - dcmp.c_parser.parse('struct interrupt_table { __int16 size; struct interrupt_descriptor *table; } __attribute__((pack(2)));') - end - if not dcmp.c_parser.toplevel.symbol['intrinsic_lidt'] - dcmp.c_parser.parse('void intrinsic_lidt(struct interrupt_table *);') - end - arg = di.backtrace_binding.keys.grep(Indirection).first.pointer - stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void)) - when 'ltr', 'lldt' - if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"] - dcmp.c_parser.parse("void intrinsic_#{di.opcode.name}(int);") - end - arg = di.backtrace_binding.keys.first - stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void)) - when 'out' - sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz - if not dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"] - dcmp.c_parser.parse("void intrinsic_out#{sz}(unsigned short port, __int#{sz} value);") - end - port = di.instruction.args.grep(Expression).first || :edx - stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[:eax]], C::BaseType.new(:void)) - when 'in' - sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz - if not dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"] - dcmp.c_parser.parse("__int#{sz} intrinsic_in#{sz}(unsigned short port);") - end - port = di.instruction.args.grep(Expression).first || :edx - f = dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"] - binding.delete :eax - stmts << C::CExpression.new(ce[:eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type) - when 'sti', 'cli' - stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil) - when /^(mov|sto|lod)s([bwdq])/ - op, sz = $1, $2 - commit[] - sz = { 'b' => 1, 'w' => 2, 'd' => 4, 'q' => 8 }[sz] - pt = C::Pointer.new(C::BaseType.new("__int#{sz*8}".to_sym)) + if not n.kind_of? ::String or (f and not f.type.kind_of? C::Function) + # indirect funcall + fptr = ceb[n] + binding.delete n + proto = C::Function.new(C::BaseType.new(:int)) + proto = f.type if f and f.type.kind_of? C::Function + f = C::CExpression[[fptr], C::Pointer.new(proto)] + elsif not f + # internal functions are predeclared, so this one is extern + f = C::Variable.new + f.name = n + f.type = C::Function.new(C::BaseType.new(:int)) + if dcmp.recurse > 0 + dcmp.c_parser.toplevel.symbol[n] = f + dcmp.c_parser.toplevel.statements << C::Declaration.new(f) + end + end + commit[] + binding.delete :eax + e = C::CExpression[f, :funcall, args] + e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void) + stmts << e + when 'jmp' + #if di.comment.to_a.include? 'switch' + # n = di.instruction.args.first.symbolic(di) + # fptr = ceb[n] + # binding.delete n + # commit[] + # sw = C::Switch.new(fptr, C::Block.new(scope)) + # di.block.to_normal.to_a.each { |addr| + # addr = dcmp.dasm.normalize addr + # to.delete addr + # next if not l = dcmp.dasm.get_label_at(addr) + # sw.body.statements << C::Goto.new(l) + # } + # stmts << sw + a = di.instruction.args.first + if a.kind_of? Expression + elsif not a.respond_to? :symbolic + stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil) + else + n = di.instruction.args.first.symbolic(di) + fptr = ceb[n] + binding.delete n + commit[] + if fptr.kind_of? C::CExpression and fptr.type.pointer? and fptr.type.untypedef.type.kind_of? C::Function + proto = fptr.type.untypedef.type + args = get_func_args[di, fptr.type] + else + proto = C::Function.new(C::BaseType.new(:void)) + fptr = C::CExpression[[fptr], C::Pointer.new(proto)] + args = [] + end + ret = C::Return.new(C::CExpression[fptr, :funcall, args]) + class << ret ; attr_accessor :from_instr end + ret.from_instr = di + stmts << ret + to = [] + end + when 'lgdt' + if not dcmp.c_parser.toplevel.struct['segment_descriptor'] + dcmp.c_parser.parse('struct segment_descriptor { __int16 limit; __int16 base0_16; __int8 base16_24; __int8 flags1; __int8 flags2_limit_16_20; __int8 base24_32; };') + dcmp.c_parser.parse('struct segment_table { __int16 size; struct segment_descriptor *table; } __attribute__((pack(2)));') + end + if not dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'] + dcmp.c_parser.parse('void intrinsic_lgdt(struct segment_table *);') + end + # need a way to transform arg => :frameptr+12 + arg = di.backtrace_binding.keys.grep(Indirection).first.pointer + stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void)) + when 'lidt' + if not dcmp.c_parser.toplevel.struct['interrupt_descriptor'] + dcmp.c_parser.parse('struct interrupt_descriptor { __int16 offset0_16; __int16 segment; __int16 flags; __int16 offset16_32; };') + dcmp.c_parser.parse('struct interrupt_table { __int16 size; struct interrupt_descriptor *table; } __attribute__((pack(2)));') + end + if not dcmp.c_parser.toplevel.symbol['intrinsic_lidt'] + dcmp.c_parser.parse('void intrinsic_lidt(struct interrupt_table *);') + end + arg = di.backtrace_binding.keys.grep(Indirection).first.pointer + stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void)) + when 'ltr', 'lldt' + if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"] + dcmp.c_parser.parse("void intrinsic_#{di.opcode.name}(int);") + end + arg = di.backtrace_binding.keys.first + stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void)) + when 'out' + sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz + if not dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"] + dcmp.c_parser.parse("void intrinsic_out#{sz}(unsigned short port, __int#{sz} value);") + end + port = di.instruction.args.grep(Expression).first || :edx + stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[:eax]], C::BaseType.new(:void)) + when 'in' + sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz + if not dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"] + dcmp.c_parser.parse("__int#{sz} intrinsic_in#{sz}(unsigned short port);") + end + port = di.instruction.args.grep(Expression).first || :edx + f = dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"] + binding.delete :eax + stmts << C::CExpression.new(ce[:eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type) + when 'sti', 'cli' + stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil) + when /^(mov|sto|lod)s([bwdq])/ + op, sz = $1, $2 + commit[] + sz = { 'b' => 1, 'w' => 2, 'd' => 4, 'q' => 8 }[sz] + pt = C::Pointer.new(C::BaseType.new("__int#{sz*8}".to_sym)) - blk = C::Block.new(scope) - case op - when 'mov' - blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', [:*, [[ceb[:esi]], pt]]] - blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]] - blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]] - when 'sto' - blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', ceb[:eax]] - blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]] - when 'lod' - blk.statements << C::CExpression[ceb[:eax], :'=', [:*, [[ceb[:esi]], pt]]] - blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]] - #when 'sca' - #when 'cmp' - end + blk = C::Block.new(scope) + case op + when 'mov' + blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', [:*, [[ceb[:esi]], pt]]] + blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]] + blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]] + when 'sto' + blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', ceb[:eax]] + blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]] + when 'lod' + blk.statements << C::CExpression[ceb[:eax], :'=', [:*, [[ceb[:esi]], pt]]] + blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]] + #when 'sca' + #when 'cmp' + end - case (di.instruction.prefix || {})[:rep] - when nil - stmts.concat blk.statements - when 'rep' - blk.statements << C::CExpression[ceb[:ecx], :'=', [ceb[:ecx], :-, [1]]] - stmts << C::While.new(C::CExpression[ceb[:ecx]], blk) - #when 'repz' # sca/cmp only - #when 'repnz' - end - next - else - bd = get_fwdemu_binding(di) - if di.backtrace_binding[:incomplete_binding] - commit[] - stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil) - else - update = {} - bd.each { |k, v| - if k.kind_of? ::Symbol and not deps[b].include? k - ops << [k, v] - update[k] = Expression[Expression[v].bind(binding).reduce] - else - stmts << ceb[k, :'=', v] - stmts.pop if stmts.last.kind_of? C::Variable # [:eflag_s, :=, :unknown].reduce - end - } - binding.update update - end - end - } - commit[] + case (di.instruction.prefix || {})[:rep] + when nil + stmts.concat blk.statements + when 'rep' + blk.statements << C::CExpression[ceb[:ecx], :'=', [ceb[:ecx], :-, [1]]] + stmts << C::While.new(C::CExpression[ceb[:ecx]], blk) + #when 'repz' # sca/cmp only + #when 'repnz' + end + next + else + bd = get_fwdemu_binding(di) + if di.backtrace_binding[:incomplete_binding] + commit[] + stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil) + else + update = {} + bd.each { |k, v| + if k.kind_of? ::Symbol and not deps[b].include? k + ops << [k, v] + update[k] = Expression[Expression[v].bind(binding).reduce] + else + stmts << ceb[k, :'=', v] + stmts.pop if stmts.last.kind_of? C::Variable # [:eflag_s, :=, :unknown].reduce + end + } + binding.update update + end + end + } + commit[] - case to.length - when 0 - if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname - puts " block #{Expression[b]} has no to and don't end in ret" - end - when 1 - if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0]) - stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto')) - end - else - puts " block #{Expression[b]} with multiple to" - end - end + case to.length + when 0 + if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname + puts " block #{Expression[b]} has no to and don't end in ret" + end + when 1 + if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0]) + stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto')) + end + else + puts " block #{Expression[b]} with multiple to" + end + end - # cleanup di.bt_binding (we set :frameptr etc in those, this may confuse the dasm) - blocks_toclean.each { |b_, to_| - dcmp.dasm.decoded[b_].block.list.each { |di| - di.backtrace_binding = nil - } - } - end + # cleanup di.bt_binding (we set :frameptr etc in those, this may confuse the dasm) + blocks_toclean.each { |b_, to_| + dcmp.dasm.decoded[b_].block.list.each { |di| + di.backtrace_binding = nil + } + } + end - def decompile_check_abi(dcmp, entry, func) - a = func.type.args || [] - a.delete_if { |arg| arg.has_attribute_var('register') and arg.has_attribute('unused') } - ra = a.map { |arg| arg.has_attribute_var('register') }.compact - if (a.length == 1 and ra == ['ecx']) or (a.length >= 2 and ra.sort == ['ecx', 'edx']) - func.add_attribute 'fastcall' - # reorder args - ecx = a.find { |arg| arg.has_attribute_var('register') == 'ecx' } - edx = a.find { |arg| arg.has_attribute_var('register') == 'edx' } - a.insert(0, a.delete(ecx)) - a.insert(1, a.delete(edx)) if edx - end + def decompile_check_abi(dcmp, entry, func) + a = func.type.args || [] + a.delete_if { |arg| arg.has_attribute_var('register') and arg.has_attribute('unused') } + ra = a.map { |arg| arg.has_attribute_var('register') }.compact + if (a.length == 1 and ra == ['ecx']) or (a.length >= 2 and ra.sort == ['ecx', 'edx']) + func.add_attribute 'fastcall' + # reorder args + ecx = a.find { |arg| arg.has_attribute_var('register') == 'ecx' } + edx = a.find { |arg| arg.has_attribute_var('register') == 'edx' } + a.insert(0, a.delete(ecx)) + a.insert(1, a.delete(edx)) if edx + end - if not f = dcmp.dasm.function[entry] or not f.return_address - #func.add_attribute 'noreturn' - else - adj = f.return_address.map { |ra_| dcmp.dasm.backtrace(:esp, ra_, :include_start => true, :stopaddr => entry) }.flatten.uniq - if adj.length == 1 and so = Expression[adj.first, :-, :esp].reduce and so.kind_of? ::Integer - argsz = a.map { |fa| - next if not fa.stackoff - (fa.stackoff + [dcmp.sizeof(fa), dcmp.c_parser.typesize[:ptr]].max-1) / dcmp.c_parser.typesize[:ptr] - }.compact.max.to_i - so /= dcmp.dasm.cpu.size/8 - so -= 1 - if so > argsz - aso = a.empty? ? 0 : a.last.stackoff.to_i + dcmp.c_parser.typesize[:ptr] - (so-argsz).times { - a << C::Variable.new(dcmp.stackoff_to_varname(aso), C::BaseType.new(:int)) - a.last.add_attribute('unused') - aso += dcmp.sizeof(a.last) - } - argsz = so - end - case so - when 0 - when argsz - func.add_attribute 'stdcall' if not func.has_attribute('fastcall') - else - func.add_attribute "stackoff:#{so*dcmp.dasm.cpu.size/8}" - end - else - func.add_attribute "breakstack:#{adj.inspect}" - end - end - end + if not f = dcmp.dasm.function[entry] or not f.return_address + #func.add_attribute 'noreturn' + else + adj = f.return_address.map { |ra_| dcmp.dasm.backtrace(:esp, ra_, :include_start => true, :stopaddr => entry) }.flatten.uniq + if adj.length == 1 and so = Expression[adj.first, :-, :esp].reduce and so.kind_of? ::Integer + argsz = a.map { |fa| + next if not fa.stackoff + (fa.stackoff + [dcmp.sizeof(fa), dcmp.c_parser.typesize[:ptr]].max-1) / dcmp.c_parser.typesize[:ptr] + }.compact.max.to_i + so /= dcmp.dasm.cpu.size/8 + so -= 1 + if so > argsz + aso = a.empty? ? 0 : a.last.stackoff.to_i + dcmp.c_parser.typesize[:ptr] + (so-argsz).times { + a << C::Variable.new(dcmp.stackoff_to_varname(aso), C::BaseType.new(:int)) + a.last.add_attribute('unused') + aso += dcmp.sizeof(a.last) + } + argsz = so + end + case so + when 0 + when argsz + func.add_attribute 'stdcall' if not func.has_attribute('fastcall') + else + func.add_attribute "stackoff:#{so*dcmp.dasm.cpu.size/8}" + end + else + func.add_attribute "breakstack:#{adj.inspect}" + end + end + end end end diff --git a/lib/metasm/metasm/cpu/ia32/encode.rb b/lib/metasm/metasm/cpu/ia32/encode.rb index bda4feb6b9..aaf9072ded 100644 --- a/lib/metasm/metasm/cpu/ia32/encode.rb +++ b/lib/metasm/metasm/cpu/ia32/encode.rb @@ -9,312 +9,312 @@ require 'metasm/encode' module Metasm class Ia32 - class InvalidModRM < Exception ; end - class ModRM - # returns the byte representing the register encoded as modrm - # works with Reg/SimdReg - def self.encode_reg(reg, mregval = 0) - 0xc0 | (mregval << 3) | reg.val - end + class InvalidModRM < Exception ; end + class ModRM + # returns the byte representing the register encoded as modrm + # works with Reg/SimdReg + def self.encode_reg(reg, mregval = 0) + 0xc0 | (mregval << 3) | reg.val + end - # The argument is an integer representing the 'reg' field of the mrm - # - # caller is responsible for setting the adsz - # returns an array, 1 element per possible immediate size (for un-reduce()able Expression) - def encode(reg = 0, endianness = :little) - reg = reg.val if reg.kind_of? Argument - case @adsz - when 16; encode16(reg, endianness) - when 32; encode32(reg, endianness) - end - end + # The argument is an integer representing the 'reg' field of the mrm + # + # caller is responsible for setting the adsz + # returns an array, 1 element per possible immediate size (for un-reduce()able Expression) + def encode(reg = 0, endianness = :little) + reg = reg.val if reg.kind_of? Argument + case @adsz + when 16; encode16(reg, endianness) + when 32; encode32(reg, endianness) + end + end - private - def encode16(reg, endianness) - if not b - # imm only - return [EncodedData.new << (6 | (reg << 3)) << @imm.encode(:u16, endianness)] - end + private + def encode16(reg, endianness) + if not b + # imm only + return [EncodedData.new << (6 | (reg << 3)) << @imm.encode(:u16, endianness)] + end - imm = @imm.reduce if self.imm - imm = nil if imm == 0 - ret = EncodedData.new - ret << - case [@b.val, (@i.val if i)] - when [3, 6], [6, 3]; 0 - when [3, 7], [7, 3]; 1 - when [5, 6], [6, 5]; 2 - when [5, 7], [7, 5]; 3 - when [6, nil]; 4 - when [7, nil]; 5 - when [5, nil] - imm ||= 0 - 6 - when [3, nil]; 7 - else raise InvalidModRM, 'invalid modrm16' - end + imm = @imm.reduce if self.imm + imm = nil if imm == 0 + ret = EncodedData.new + ret << + case [@b.val, (@i.val if i)] + when [3, 6], [6, 3]; 0 + when [3, 7], [7, 3]; 1 + when [5, 6], [6, 5]; 2 + when [5, 7], [7, 5]; 3 + when [6, nil]; 4 + when [7, nil]; 5 + when [5, nil] + imm ||= 0 + 6 + when [3, nil]; 7 + else raise InvalidModRM, 'invalid modrm16' + end - # add bits in the first octet of ret.data (1.9 compatibility layer) - or_bits = lambda { |v| # rape me - if ret.data[0].kind_of? Integer - ret.data[0] |= v - else - ret.data[0] = (ret.data[0].unpack('C').first | v).chr - end - } + # add bits in the first octet of ret.data (1.9 compatibility layer) + or_bits = lambda { |v| # rape me + if ret.data[0].kind_of? Integer + ret.data[0] |= v + else + ret.data[0] = (ret.data[0].unpack('C').first | v).chr + end + } - or_bits[reg << 3] + or_bits[reg << 3] - if imm - case Expression.in_range?(imm, :i8) - when true - or_bits[1 << 6] - [ret << Expression.encode_imm(imm, :i8, endianness)] - when false - or_bits[2 << 6] - [ret << Expression.encode_imm(imm, :a16, endianness)] - when nil - rets = ret.dup - or_bits[1<<6] - ret << @imm.encode(:i8, endianness) - ret, rets = rets, ret # or_bits uses ret - or_bits[2<<6] - ret << @imm.encode(:a16, endianness) - [ret, rets] - end - else - [ret] - end - end + if imm + case Expression.in_range?(imm, :i8) + when true + or_bits[1 << 6] + [ret << Expression.encode_imm(imm, :i8, endianness)] + when false + or_bits[2 << 6] + [ret << Expression.encode_imm(imm, :a16, endianness)] + when nil + rets = ret.dup + or_bits[1<<6] + ret << @imm.encode(:i8, endianness) + ret, rets = rets, ret # or_bits uses ret + or_bits[2<<6] + ret << @imm.encode(:a16, endianness) + [ret, rets] + end + else + [ret] + end + end - def encode32(reg, endianness) - # 0 => [ [0 ], [1 ], [2 ], [3 ], [:sib ], [:i32 ], [6 ], [7 ] ], \ - # 1 => [ [0, :i8 ], [1, :i8 ], [2, :i8 ], [3, :i8 ], [:sib, :i8 ], [5, :i8 ], [6, :i8 ], [7, :i8 ] ], \ - # 2 => [ [0, :i32], [1, :i32], [2, :i32], [3, :i32], [:sib, :i32], [5, :i32], [6, :i32], [7, :i32] ] - # - # b => 0 1 2 3 4 5+i|i 6 7 - # i => 0 1 2 3 nil 5 6 7 + def encode32(reg, endianness) + # 0 => [ [0 ], [1 ], [2 ], [3 ], [:sib ], [:i32 ], [6 ], [7 ] ], \ + # 1 => [ [0, :i8 ], [1, :i8 ], [2, :i8 ], [3, :i8 ], [:sib, :i8 ], [5, :i8 ], [6, :i8 ], [7, :i8 ] ], \ + # 2 => [ [0, :i32], [1, :i32], [2, :i32], [3, :i32], [:sib, :i32], [5, :i32], [6, :i32], [7, :i32] ] + # + # b => 0 1 2 3 4 5+i|i 6 7 + # i => 0 1 2 3 nil 5 6 7 - ret = EncodedData.new << (reg << 3) + ret = EncodedData.new << (reg << 3) - # add bits in the first octet of ret.data (1.9 compatibility layer) - or_bits = lambda { |v| # rape me - if ret.data[0].kind_of? Integer - ret.data[0] |= v - else - ret.data[0] = (ret.data[0].unpack('C').first | v).chr - end - } + # add bits in the first octet of ret.data (1.9 compatibility layer) + or_bits = lambda { |v| # rape me + if ret.data[0].kind_of? Integer + ret.data[0] |= v + else + ret.data[0] = (ret.data[0].unpack('C').first | v).chr + end + } - if not self.b and not self.i - or_bits[5] - [ret << @imm.encode(:a32, endianness)] + if not self.b and not self.i + or_bits[5] + [ret << @imm.encode(:a32, endianness)] - elsif not self.b and self.s != 1 - # sib with no b - raise EncodeError, "Invalid ModRM #{self}" if @i.val == 4 - or_bits[4] - s = {8=>3, 4=>2, 2=>1}[@s] - imm = self.imm || Expression[0] - fu = (s << 6) | (@i.val << 3) | 5 - fu = fu.chr if s >= 2 # rb1.9 encoding fix - [ret << fu << imm.encode(:a32, endianness)] - else - imm = @imm.reduce if self.imm - imm = nil if imm == 0 + elsif not self.b and self.s != 1 + # sib with no b + raise EncodeError, "Invalid ModRM #{self}" if @i.val == 4 + or_bits[4] + s = {8=>3, 4=>2, 2=>1}[@s] + imm = self.imm || Expression[0] + fu = (s << 6) | (@i.val << 3) | 5 + fu = fu.chr if s >= 2 # rb1.9 encoding fix + [ret << fu << imm.encode(:a32, endianness)] + else + imm = @imm.reduce if self.imm + imm = nil if imm == 0 - if not self.i or (not self.b and self.s == 1) - # no sib byte (except for [esp]) - b = self.b || self.i + if not self.i or (not self.b and self.s == 1) + # no sib byte (except for [esp]) + b = self.b || self.i - or_bits[b.val] - ret << 0x24 if b.val == 4 - else - # sib - or_bits[4] + or_bits[b.val] + ret << 0x24 if b.val == 4 + else + # sib + or_bits[4] - i, b = @i, @b - b, i = i, b if @s == 1 and (i.val == 4 or b.val == 5) + i, b = @i, @b + b, i = i, b if @s == 1 and (i.val == 4 or b.val == 5) - raise EncodeError, "Invalid ModRM #{self}" if i.val == 4 + raise EncodeError, "Invalid ModRM #{self}" if i.val == 4 - s = {8=>3, 4=>2, 2=>1, 1=>0}[@s] - fu = (s << 6) | (i.val << 3) | b.val - fu = fu.chr if s >= 2 # rb1.9 encoding fix - ret << fu - end + s = {8=>3, 4=>2, 2=>1, 1=>0}[@s] + fu = (s << 6) | (i.val << 3) | b.val + fu = fu.chr if s >= 2 # rb1.9 encoding fix + ret << fu + end - imm ||= 0 if b.val == 5 - if imm - case Expression.in_range?(imm, :i8) - when true - or_bits[1<<6] - [ret << Expression.encode_imm(imm, :i8, endianness)] - when false - or_bits[2<<6] - [ret << Expression.encode_imm(imm, :a32, endianness)] - when nil - rets = ret.dup - or_bits[1<<6] - ret << @imm.encode(:i8, endianness) - rets, ret = ret, rets # or_bits[] modifies ret directly - or_bits[2<<6] - ret << @imm.encode(:a32, endianness) - [ret, rets] - end - else - [ret] - end - end - end - end + imm ||= 0 if b.val == 5 + if imm + case Expression.in_range?(imm, :i8) + when true + or_bits[1<<6] + [ret << Expression.encode_imm(imm, :i8, endianness)] + when false + or_bits[2<<6] + [ret << Expression.encode_imm(imm, :a32, endianness)] + when nil + rets = ret.dup + or_bits[1<<6] + ret << @imm.encode(:i8, endianness) + rets, ret = ret, rets # or_bits[] modifies ret directly + or_bits[2<<6] + ret << @imm.encode(:a32, endianness) + [ret, rets] + end + else + [ret] + end + end + end + end - class Farptr - def encode(endianness, atype) - @addr.encode(atype, endianness) << @seg.encode(:u16, endianness) - end - end + class Farptr + def encode(endianness, atype) + @addr.encode(atype, endianness) << @seg.encode(:u16, endianness) + end + end - # returns all forms of the encoding of instruction i using opcode op - # program may be used to create a new label for relative jump/call - def encode_instr_op(program, i, op) - base = op.bin.dup - oi = op.args.zip(i.args) - set_field = lambda { |f, v| - v ||= 0 # ST => ST(0) - fld = op.fields[f] - base[fld[0]] |= v << fld[1] - } + # returns all forms of the encoding of instruction i using opcode op + # program may be used to create a new label for relative jump/call + def encode_instr_op(program, i, op) + base = op.bin.dup + oi = op.args.zip(i.args) + set_field = lambda { |f, v| + v ||= 0 # ST => ST(0) + fld = op.fields[f] + base[fld[0]] |= v << fld[1] + } - size = i.prefix[:sz] || @size + size = i.prefix[:sz] || @size - # - # handle prefixes and bit fields - # - pfx = i.prefix.map { |k, v| - case k - when :jmp; {:jmp => 0x3e, :nojmp => 0x2e}[v] - when :lock; 0xf0 - when :rep; {'repnz' => 0xf2, 'repz' => 0xf3, 'rep' => 0xf2}[v] - when :jmphint; {'hintjmp' => 0x3e, 'hintnojmp' => 0x2e}[v] - when :seg; [0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65][v.val] - end - }.compact.pack 'C*' + # + # handle prefixes and bit fields + # + pfx = i.prefix.map { |k, v| + case k + when :jmp; {:jmp => 0x3e, :nojmp => 0x2e}[v] + when :lock; 0xf0 + when :rep; {'repnz' => 0xf2, 'repz' => 0xf3, 'rep' => 0xf2}[v] + when :jmphint; {'hintjmp' => 0x3e, 'hintnojmp' => 0x2e}[v] + when :seg; [0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65][v.val] + end + }.compact.pack 'C*' - if op.name == 'movsx' or op.name == 'movzx' - pfx << 0x66 if size == 48-i.args[0].sz - elsif op.name == 'crc32' - pfx << 0x66 if size == 48-i.args[1].sz - else - opsz = op.props[:argsz] - oi.each { |oa, ia| - case oa - when :reg, :reg_eax, :modrm, :mrm_imm - raise EncodeError, "Incompatible arg size in #{i}" if ia.sz and opsz and opsz != ia.sz - opsz = ia.sz - end - } - pfx << 0x66 if (op.props[:opsz] and size == 48 - op.props[:opsz]) or - (not op.props[:argsz] and opsz and size == 48 - opsz) - opsz ||= op.props[:opsz] - end - opsz ||= size + if op.name == 'movsx' or op.name == 'movzx' + pfx << 0x66 if size == 48-i.args[0].sz + elsif op.name == 'crc32' + pfx << 0x66 if size == 48-i.args[1].sz + else + opsz = op.props[:argsz] + oi.each { |oa, ia| + case oa + when :reg, :reg_eax, :modrm, :mrm_imm + raise EncodeError, "Incompatible arg size in #{i}" if ia.sz and opsz and opsz != ia.sz + opsz = ia.sz + end + } + pfx << 0x66 if (op.props[:opsz] and size == 48 - op.props[:opsz]) or + (not op.props[:argsz] and opsz and size == 48 - opsz) + opsz ||= op.props[:opsz] + end + opsz ||= size - if op.props[:adsz] and size == 48 - op.props[:adsz] - pfx << 0x67 - adsz = 48 - size - end - adsz ||= size - # addrsize override / segment override - if mrm = i.args.grep(ModRM).first - if not op.props[:adsz] and ((mrm.b and mrm.b.sz == 48 - adsz) or (mrm.i and mrm.i.sz == 48 - adsz)) - pfx << 0x67 - adsz = 48 - adsz - end - pfx << [0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65][mrm.seg.val] if mrm.seg - end + if op.props[:adsz] and size == 48 - op.props[:adsz] + pfx << 0x67 + adsz = 48 - size + end + adsz ||= size + # addrsize override / segment override + if mrm = i.args.grep(ModRM).first + if not op.props[:adsz] and ((mrm.b and mrm.b.sz == 48 - adsz) or (mrm.i and mrm.i.sz == 48 - adsz)) + pfx << 0x67 + adsz = 48 - adsz + end + pfx << [0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65][mrm.seg.val] if mrm.seg + end - # - # encode embedded arguments - # - postponed = [] - oi.each { |oa, ia| - case oa - when :reg, :seg3, :seg3A, :seg2, :seg2A, :eeec, :eeed, :eeet, :regfp, :regmmx, :regxmm, :regymm - # field arg - set_field[oa, ia.val] - pfx << 0x66 if oa == :regmmx and op.props[:xmmx] and ia.sz == 128 - when :vexvreg, :vexvxmm, :vexvymm - set_field[:vex_vvvv, ia.val ^ 0xf] - when :imm_val1, :imm_val3, :reg_cl, :reg_eax, :reg_dx, :regfp0 - # implicit - else - postponed << [oa, ia] - end - } + # + # encode embedded arguments + # + postponed = [] + oi.each { |oa, ia| + case oa + when :reg, :seg3, :seg3A, :seg2, :seg2A, :eeec, :eeed, :eeet, :regfp, :regmmx, :regxmm, :regymm + # field arg + set_field[oa, ia.val] + pfx << 0x66 if oa == :regmmx and op.props[:xmmx] and ia.sz == 128 + when :vexvreg, :vexvxmm, :vexvymm + set_field[:vex_vvvv, ia.val ^ 0xf] + when :imm_val1, :imm_val3, :reg_cl, :reg_eax, :reg_dx, :regfp0 + # implicit + else + postponed << [oa, ia] + end + } - if !(op.args & [:modrm, :modrmmmx, :modrmxmm, :modrmymm]).empty? - # reg field of modrm - regval = (base[-1] >> 3) & 7 - base.pop - end + if !(op.args & [:modrm, :modrmmmx, :modrmxmm, :modrmymm]).empty? + # reg field of modrm + regval = (base[-1] >> 3) & 7 + base.pop + end - # convert label name for jmp/call/loop to relative offset - if op.props[:setip] and op.name[0, 3] != 'ret' and i.args.first.kind_of? Expression - postlabel = program.new_label('post'+op.name) - target = postponed.first[1] - target = target.rexpr if target.kind_of? Expression and target.op == :+ and not target.lexpr - postponed.first[1] = Expression[target, :-, postlabel] - end + # convert label name for jmp/call/loop to relative offset + if op.props[:setip] and op.name[0, 3] != 'ret' and i.args.first.kind_of? Expression + postlabel = program.new_label('post'+op.name) + target = postponed.first[1] + target = target.rexpr if target.kind_of? Expression and target.op == :+ and not target.lexpr + postponed.first[1] = Expression[target, :-, postlabel] + end - pfx << op.props[:needpfx] if op.props[:needpfx] + pfx << op.props[:needpfx] if op.props[:needpfx] - # - # append other arguments - # - ret = EncodedData.new(pfx + base.pack('C*')) + # + # append other arguments + # + ret = EncodedData.new(pfx + base.pack('C*')) - postponed.each { |oa, ia| - case oa - when :farptr; ed = ia.encode(@endianness, "a#{opsz}".to_sym) - when :modrm, :modrmmmx, :modrmxmm, :modrmymm - if ia.kind_of? ModRM - ed = ia.encode(regval, @endianness) - if ed.kind_of?(::Array) - if ed.length > 1 - # we know that no opcode can have more than 1 modrm - ary = [] - ed.each { |m| - ary << (ret.dup << m) - } - ret = ary - next - else - ed = ed.first - end - end - else - ed = ModRM.encode_reg(ia, regval) - end - when :mrm_imm; ed = ia.imm.encode("a#{adsz}".to_sym, @endianness) - when :i8, :u8, :u16; ed = ia.encode(oa, @endianness) - when :i; ed = ia.encode("a#{opsz}".to_sym, @endianness) - when :i4xmm, :i4ymm; ed = ia.val << 4 # u8 - else raise SyntaxError, "Internal error: want to encode field #{oa.inspect} as arg in #{i}" - end + postponed.each { |oa, ia| + case oa + when :farptr; ed = ia.encode(@endianness, "a#{opsz}".to_sym) + when :modrm, :modrmmmx, :modrmxmm, :modrmymm + if ia.kind_of? ModRM + ed = ia.encode(regval, @endianness) + if ed.kind_of?(::Array) + if ed.length > 1 + # we know that no opcode can have more than 1 modrm + ary = [] + ed.each { |m| + ary << (ret.dup << m) + } + ret = ary + next + else + ed = ed.first + end + end + else + ed = ModRM.encode_reg(ia, regval) + end + when :mrm_imm; ed = ia.imm.encode("a#{adsz}".to_sym, @endianness) + when :i8, :u8, :u16; ed = ia.encode(oa, @endianness) + when :i; ed = ia.encode("a#{opsz}".to_sym, @endianness) + when :i4xmm, :i4ymm; ed = ia.val << 4 # u8 + else raise SyntaxError, "Internal error: want to encode field #{oa.inspect} as arg in #{i}" + end - if ret.kind_of?(::Array) - ret.each { |e| e << ed } - else - ret << ed - end - } + if ret.kind_of?(::Array) + ret.each { |e| e << ed } + else + ret << ed + end + } - # we know that no opcode with setip accept both modrm and immediate arg, so ret is not an ::Array - ret.add_export(postlabel, ret.virtsize) if postlabel + # we know that no opcode with setip accept both modrm and immediate arg, so ret is not an ::Array + ret.add_export(postlabel, ret.virtsize) if postlabel - ret - end + ret + end end end diff --git a/lib/metasm/metasm/cpu/ia32/main.rb b/lib/metasm/metasm/cpu/ia32/main.rb index ac2fc115ba..9a37b998ff 100644 --- a/lib/metasm/metasm/cpu/ia32/main.rb +++ b/lib/metasm/metasm/cpu/ia32/main.rb @@ -12,270 +12,270 @@ module Metasm # currently limited to 16 and 32bit modes class Ia32 < CPU - # some ruby magic to declare classes with index -> name association (registers) - class Argument - class << self - # for subclasses - attr_accessor :i_to_s, :s_to_i - end + # some ruby magic to declare classes with index -> name association (registers) + class Argument + class << self + # for subclasses + attr_accessor :i_to_s, :s_to_i + end - private - # index -> name, name -> index - def self.simple_map(a) - # { 1 => 'dr1' } - @i_to_s = Hash[*a.flatten] - # { 'dr1' => 1 } - @s_to_i = @i_to_s.invert + private + # index -> name, name -> index + def self.simple_map(a) + # { 1 => 'dr1' } + @i_to_s = Hash[*a.flatten] + # { 'dr1' => 1 } + @s_to_i = @i_to_s.invert - class_eval { - attr_accessor :val - def initialize(v) - raise Exception, "invalid #{self.class} #{v}" unless self.class.i_to_s[v] - @val = v - end + class_eval { + attr_accessor :val + def initialize(v) + raise Exception, "invalid #{self.class} #{v}" unless self.class.i_to_s[v] + @val = v + end - def ==(o) - self.class == o.class and val == o.val - end + def ==(o) + self.class == o.class and val == o.val + end - def self.from_str(s) new(@s_to_i[s]) end - } - end + def self.from_str(s) new(@s_to_i[s]) end + } + end - # size -> (index -> name), name -> [index, size] - def self.double_map(h) - # { 32 => { 1 => 'ecx' } } - @i_to_s = h - # { 'ecx' => [1, 32] } - @s_to_i = {} ; @i_to_s.each { |sz, hh| hh.each_with_index { |r, i| @s_to_i[r] = [i, sz] } } + # size -> (index -> name), name -> [index, size] + def self.double_map(h) + # { 32 => { 1 => 'ecx' } } + @i_to_s = h + # { 'ecx' => [1, 32] } + @s_to_i = {} ; @i_to_s.each { |sz, hh| hh.each_with_index { |r, i| @s_to_i[r] = [i, sz] } } - class_eval { - attr_accessor :val, :sz - def initialize(v, sz) - raise Exception, "invalid #{self.class} #{sz}/#{v}" unless self.class.i_to_s[sz] and self.class.i_to_s[sz][v] - @val = v - @sz = sz - end + class_eval { + attr_accessor :val, :sz + def initialize(v, sz) + raise Exception, "invalid #{self.class} #{sz}/#{v}" unless self.class.i_to_s[sz] and self.class.i_to_s[sz][v] + @val = v + @sz = sz + end - def ==(o) - self.class == o.class and val == o.val and sz == o.sz - end + def ==(o) + self.class == o.class and val == o.val and sz == o.sz + end - def self.from_str(s) - raise "Bad #{name} #{s.inspect}" if not x = @s_to_i[s] - new(*x) - end - } - end - end + def self.from_str(s) + raise "Bad #{name} #{s.inspect}" if not x = @s_to_i[s] + new(*x) + end + } + end + end - # segment register: es, cs, ss, ds, fs, gs and the theoretical segr6/7 - class SegReg < Argument - simple_map((0..7).zip(%w(es cs ss ds fs gs segr6 segr7))) - end + # segment register: es, cs, ss, ds, fs, gs and the theoretical segr6/7 + class SegReg < Argument + simple_map((0..7).zip(%w(es cs ss ds fs gs segr6 segr7))) + end - # debug register (dr0..dr3, dr6, dr7), and theoretical dr4/5 - class DbgReg < Argument - simple_map((0..7).map { |i| [i, "dr#{i}"] }) - end + # debug register (dr0..dr3, dr6, dr7), and theoretical dr4/5 + class DbgReg < Argument + simple_map((0..7).map { |i| [i, "dr#{i}"] }) + end - # control register (cr0, cr2, cr3, cr4) and theoretical cr1/5/6/7 - class CtrlReg < Argument - simple_map((0..7).map { |i| [i, "cr#{i}"] }) - end + # control register (cr0, cr2, cr3, cr4) and theoretical cr1/5/6/7 + class CtrlReg < Argument + simple_map((0..7).map { |i| [i, "cr#{i}"] }) + end - # test registers (tr0..tr7) (undocumented) - class TstReg < Argument - simple_map((0..7).map { |i| [i, "tr#{i}"] }) - end + # test registers (tr0..tr7) (undocumented) + class TstReg < Argument + simple_map((0..7).map { |i| [i, "tr#{i}"] }) + end - # floating point registers - class FpReg < Argument - simple_map((0..7).map { |i| [i, "ST(#{i})"] } << [nil, 'ST']) - end + # floating point registers + class FpReg < Argument + simple_map((0..7).map { |i| [i, "ST(#{i})"] } << [nil, 'ST']) + end - # Single Instr Multiple Data register (mm0..mm7, xmm0..xmm7, ymm0..ymm7) - class SimdReg < Argument - double_map 64 => (0..7).map { |n| "mm#{n}" }, - 128 => (0..7).map { |n| "xmm#{n}" }, - 256 => (0..7).map { |n| "ymm#{n}" } - def symbolic(di=nil) ; to_s.to_sym end - end + # Single Instr Multiple Data register (mm0..mm7, xmm0..xmm7, ymm0..ymm7) + class SimdReg < Argument + double_map 64 => (0..7).map { |n| "mm#{n}" }, + 128 => (0..7).map { |n| "xmm#{n}" }, + 256 => (0..7).map { |n| "ymm#{n}" } + def symbolic(di=nil) ; to_s.to_sym end + end - # general purpose registers, all sizes - class Reg < Argument - double_map 8 => %w{ al cl dl bl ah ch dh bh}, - 16 => %w{ ax cx dx bx sp bp si di}, - 32 => %w{eax ecx edx ebx esp ebp esi edi} + # general purpose registers, all sizes + class Reg < Argument + double_map 8 => %w{ al cl dl bl ah ch dh bh}, + 16 => %w{ ax cx dx bx sp bp si di}, + 32 => %w{eax ecx edx ebx esp ebp esi edi} - Sym = @i_to_s[32].map { |s| s.to_sym } + Sym = @i_to_s[32].map { |s| s.to_sym } - # returns a symbolic representation of the register: - # eax => :eax - # cx => :ecx & 0xffff - # ah => (:eax >> 8) & 0xff - def symbolic(di=nil) - s = Sym[@val] - if @sz == 8 and to_s[-1] == ?h - Expression[[Sym[@val-4], :>>, 8], :&, 0xff] - elsif @sz == 8 - Expression[s, :&, 0xff] - elsif @sz == 16 - Expression[s, :&, 0xffff] - else - s - end - end + # returns a symbolic representation of the register: + # eax => :eax + # cx => :ecx & 0xffff + # ah => (:eax >> 8) & 0xff + def symbolic(di=nil) + s = Sym[@val] + if @sz == 8 and to_s[-1] == ?h + Expression[[Sym[@val-4], :>>, 8], :&, 0xff] + elsif @sz == 8 + Expression[s, :&, 0xff] + elsif @sz == 16 + Expression[s, :&, 0xffff] + else + s + end + end - # checks if two registers have bits in common - def share?(other) - other.val % (other.sz >> 1) == @val % (@sz >> 1) and (other.sz != @sz or @sz != 8 or other.val == @val) - end - end + # checks if two registers have bits in common + def share?(other) + other.val % (other.sz >> 1) == @val % (@sz >> 1) and (other.sz != @sz or @sz != 8 or other.val == @val) + end + end - # a far pointer - # an immediate (numeric) pointer and an immediate segment selector - class Farptr < Argument - attr_accessor :seg, :addr - def initialize(seg, addr) - @seg, @addr = seg, addr - end + # a far pointer + # an immediate (numeric) pointer and an immediate segment selector + class Farptr < Argument + attr_accessor :seg, :addr + def initialize(seg, addr) + @seg, @addr = seg, addr + end - def ==(o) - self.class == o.class and seg == o.seg and addr == o.addr - end - end + def ==(o) + self.class == o.class and seg == o.seg and addr == o.addr + end + end - # ModRM represents indirections in x86 (eg dword ptr [eax+4*ebx+12h]) - class ModRM < Argument - # valid combinaisons for a modrm - # ints are reg indexes, symbols are immediates, except :sib - Sum = { - 16 => { - 0 => [ [3, 6], [3, 7], [5, 6], [5, 7], [6], [7], [:i16], [3] ], - 1 => [ [3, 6, :i8 ], [3, 7, :i8 ], [5, 6, :i8 ], [5, 7, :i8 ], [6, :i8 ], [7, :i8 ], [5, :i8 ], [3, :i8 ] ], - 2 => [ [3, 6, :i16], [3, 7, :i16], [5, 6, :i16], [5, 7, :i16], [6, :i16], [7, :i16], [5, :i16], [3, :i16] ] - }, - 32 => { - 0 => [ [0], [1], [2], [3], [:sib], [:i32], [6], [7] ], - 1 => [ [0, :i8 ], [1, :i8 ], [2, :i8 ], [3, :i8 ], [:sib, :i8 ], [5, :i8 ], [6, :i8 ], [7, :i8 ] ], - 2 => [ [0, :i32], [1, :i32], [2, :i32], [3, :i32], [:sib, :i32], [5, :i32], [6, :i32], [7, :i32] ] - } - } + # ModRM represents indirections in x86 (eg dword ptr [eax+4*ebx+12h]) + class ModRM < Argument + # valid combinaisons for a modrm + # ints are reg indexes, symbols are immediates, except :sib + Sum = { + 16 => { + 0 => [ [3, 6], [3, 7], [5, 6], [5, 7], [6], [7], [:i16], [3] ], + 1 => [ [3, 6, :i8 ], [3, 7, :i8 ], [5, 6, :i8 ], [5, 7, :i8 ], [6, :i8 ], [7, :i8 ], [5, :i8 ], [3, :i8 ] ], + 2 => [ [3, 6, :i16], [3, 7, :i16], [5, 6, :i16], [5, 7, :i16], [6, :i16], [7, :i16], [5, :i16], [3, :i16] ] + }, + 32 => { + 0 => [ [0], [1], [2], [3], [:sib], [:i32], [6], [7] ], + 1 => [ [0, :i8 ], [1, :i8 ], [2, :i8 ], [3, :i8 ], [:sib, :i8 ], [5, :i8 ], [6, :i8 ], [7, :i8 ] ], + 2 => [ [0, :i32], [1, :i32], [2, :i32], [3, :i32], [:sib, :i32], [5, :i32], [6, :i32], [7, :i32] ] + } + } - attr_accessor :adsz, :sz - attr_accessor :seg - attr_accessor :s, :i, :b, :imm + attr_accessor :adsz, :sz + attr_accessor :seg + attr_accessor :s, :i, :b, :imm - # creates a new ModRM with the specified attributes: - # - adsz (16/32), sz (8/16/32: byte ptr, word ptr, dword ptr) - # - s, i, b, imm - # - segment selector override - def initialize(adsz, sz, s, i, b, imm, seg = nil) - @adsz, @sz = adsz, sz - @s, @i = s, i if i - @b = b if b - @imm = imm if imm - @seg = seg if seg - end + # creates a new ModRM with the specified attributes: + # - adsz (16/32), sz (8/16/32: byte ptr, word ptr, dword ptr) + # - s, i, b, imm + # - segment selector override + def initialize(adsz, sz, s, i, b, imm, seg = nil) + @adsz, @sz = adsz, sz + @s, @i = s, i if i + @b = b if b + @imm = imm if imm + @seg = seg if seg + end - # returns the symbolic representation of the ModRM (ie an Indirection) - # segment selectors are represented as eg "segment_base_fs" - # not present when same as implicit (ds:edx, ss:esp) - def symbolic(di=nil) - p = nil - p = Expression[p, :+, @b.symbolic(di)] if b - p = Expression[p, :+, [@s, :*, @i.symbolic(di)]] if i - p = Expression[p, :+, @imm] if imm - p = Expression["segment_base_#@seg", :+, p] if seg and seg.val != ((b && (@b.val == 4 || @b.val == 5)) ? 2 : 3) - Indirection[p.reduce, @sz/8, (di.address if di)] - end + # returns the symbolic representation of the ModRM (ie an Indirection) + # segment selectors are represented as eg "segment_base_fs" + # not present when same as implicit (ds:edx, ss:esp) + def symbolic(di=nil) + p = nil + p = Expression[p, :+, @b.symbolic(di)] if b + p = Expression[p, :+, [@s, :*, @i.symbolic(di)]] if i + p = Expression[p, :+, @imm] if imm + p = Expression["segment_base_#@seg", :+, p] if seg and seg.val != ((b && (@b.val == 4 || @b.val == 5)) ? 2 : 3) + Indirection[p.reduce, @sz/8, (di.address if di)] + end - def ==(o) - self.class == o.class and s == o.s and i == o.i and b == o.b and imm == o.imm and seg == o.seg and adsz == o.adsz and sz == o.sz - end - end + def ==(o) + self.class == o.class and s == o.s and i == o.i and b == o.b and imm == o.imm and seg == o.seg and adsz == o.adsz and sz == o.sz + end + end - # Create a new instance of an Ia32 cpu - # arguments (any order) - # - size in bits (16, 32) [32] - # - instruction set (386, 486, pentium...) [latest] - # - endianness [:little] - def initialize(*a) - super() - @size = (a & [16, 32]).first || 32 - a.delete @size - @endianness = (a & [:big, :little]).first || :little - a.delete @endianness - @family = a.pop || :latest - raise "Invalid arguments #{a.inspect}" if not a.empty? - raise "Invalid Ia32 family #{@family.inspect}" if not respond_to?("init_#@family") - end + # Create a new instance of an Ia32 cpu + # arguments (any order) + # - size in bits (16, 32) [32] + # - instruction set (386, 486, pentium...) [latest] + # - endianness [:little] + def initialize(*a) + super() + @size = (a & [16, 32]).first || 32 + a.delete @size + @endianness = (a & [:big, :little]).first || :little + a.delete @endianness + @family = a.pop || :latest + raise "Invalid arguments #{a.inspect}" if not a.empty? + raise "Invalid Ia32 family #{@family.inspect}" if not respond_to?("init_#@family") + end - # wrapper to transparently forward Ia32.new(64) to X86_64.new - def self.new(*a) - return X86_64.new(*a) if a.include? 64 and self == Ia32 - super(*a) - end + # wrapper to transparently forward Ia32.new(64) to X86_64.new + def self.new(*a) + return X86_64.new(*a) if a.include? 64 and self == Ia32 + super(*a) + end - # initializes the @opcode_list according to @family - def init_opcode_list - send("init_#@family") - @opcode_list - end + # initializes the @opcode_list according to @family + def init_opcode_list + send("init_#@family") + @opcode_list + end - # defines some preprocessor macros to say who we are: - # _M_IX86 = 500, _X86_, __i386__ - # pass any value in nodefine to just call super w/o defining anything of our own - def tune_prepro(pp, nodefine = false) - super(pp) - return if nodefine - pp.define_weak('_M_IX86', 500) - pp.define_weak('_X86_') - pp.define_weak('__i386__') - end + # defines some preprocessor macros to say who we are: + # _M_IX86 = 500, _X86_, __i386__ + # pass any value in nodefine to just call super w/o defining anything of our own + def tune_prepro(pp, nodefine = false) + super(pp) + return if nodefine + pp.define_weak('_M_IX86', 500) + pp.define_weak('_X86_') + pp.define_weak('__i386__') + end - # returns a Reg/SimdReg object if the arg is a valid register (eg 'ax' => Reg.new(0, 16)) - # returns nil if str is invalid - def str_to_reg(str) - Reg.s_to_i.has_key?(str) ? Reg.from_str(str) : SimdReg.s_to_i.has_key?(str) ? SimdReg.from_str(str) : nil - end + # returns a Reg/SimdReg object if the arg is a valid register (eg 'ax' => Reg.new(0, 16)) + # returns nil if str is invalid + def str_to_reg(str) + Reg.s_to_i.has_key?(str) ? Reg.from_str(str) : SimdReg.s_to_i.has_key?(str) ? SimdReg.from_str(str) : nil + end - # returns the list of Regs in the instruction arguments - # may be converted into symbols through Reg#symbolic - def instr_args_regs(i) - i = i.instruction if i.kind_of?(DecodedInstruction) - i.args.grep(Reg) - end + # returns the list of Regs in the instruction arguments + # may be converted into symbols through Reg#symbolic + def instr_args_regs(i) + i = i.instruction if i.kind_of?(DecodedInstruction) + i.args.grep(Reg) + end - # returns the list of ModRMs in the instruction arguments - # may be converted into Indirection through ModRM#symbolic - def instr_args_memoryptr(i) - i = i.instruction if i.kind_of?(DecodedInstruction) - i.args.grep(ModRM) - end + # returns the list of ModRMs in the instruction arguments + # may be converted into Indirection through ModRM#symbolic + def instr_args_memoryptr(i) + i = i.instruction if i.kind_of?(DecodedInstruction) + i.args.grep(ModRM) + end - # return the 'base' of the ModRM (Reg/nil) - def instr_args_memoryptr_getbase(mrm) - mrm.b || (mrm.i if mrm.s == 1) - end + # return the 'base' of the ModRM (Reg/nil) + def instr_args_memoryptr_getbase(mrm) + mrm.b || (mrm.i if mrm.s == 1) + end - # return the offset of the ModRM (Expression/nil) - def instr_args_memoryptr_getoffset(mrm) - mrm.imm - end + # return the offset of the ModRM (Expression/nil) + def instr_args_memoryptr_getoffset(mrm) + mrm.imm + end - # define ModRM offset (eg to changing imm into an ExpressionString) - def instr_args_memoryptr_setoffset(mrm, imm) - mrm.imm = (imm ? Expression[imm] : imm) - end + # define ModRM offset (eg to changing imm into an ExpressionString) + def instr_args_memoryptr_setoffset(mrm, imm) + mrm.imm = (imm ? Expression[imm] : imm) + end - def shortname - "ia32#{'_16' if @size == 16}#{'_be' if @endianness == :big}" - end + def shortname + "ia32#{'_16' if @size == 16}#{'_be' if @endianness == :big}" + end end X86 = Ia32 end diff --git a/lib/metasm/metasm/cpu/ia32/opcodes.rb b/lib/metasm/metasm/cpu/ia32/opcodes.rb index 0aa16cbf43..23f45be198 100644 --- a/lib/metasm/metasm/cpu/ia32/opcodes.rb +++ b/lib/metasm/metasm/cpu/ia32/opcodes.rb @@ -8,1417 +8,1417 @@ require 'metasm/cpu/ia32/main' module Metasm class Ia32 - def init_cpu_constants - @opcode_list ||= [] - @fields_mask.update :w => 1, :s => 1, :d => 1, :modrm => 0xC7, - :reg => 7, :eeec => 7, :eeed => 7, :eeet => 7, :seg2 => 3, :seg3 => 7, - :regfp => 7, :regmmx => 7, :regxmm => 7, :regymm => 7, - :vex_r => 1, :vex_b => 1, :vex_x => 1, :vex_w => 1, - :vex_vvvv => 0xF - @fields_mask[:seg2A] = @fields_mask[:seg2] - @fields_mask[:seg3A] = @fields_mask[:seg3] + def init_cpu_constants + @opcode_list ||= [] + @fields_mask.update :w => 1, :s => 1, :d => 1, :modrm => 0xC7, + :reg => 7, :eeec => 7, :eeed => 7, :eeet => 7, :seg2 => 3, :seg3 => 7, + :regfp => 7, :regmmx => 7, :regxmm => 7, :regymm => 7, + :vex_r => 1, :vex_b => 1, :vex_x => 1, :vex_w => 1, + :vex_vvvv => 0xF + @fields_mask[:seg2A] = @fields_mask[:seg2] + @fields_mask[:seg3A] = @fields_mask[:seg3] - [:i, :i8, :u8, :u16, :reg, :seg2, :seg2A, - :seg3, :seg3A, :eeec, :eeed, :eeet, :modrm, :mrm_imm, - :farptr, :imm_val1, :imm_val3, :reg_cl, :reg_eax, - :reg_dx, :regfp, :regfp0, :modrmmmx, :regmmx, - :modrmxmm, :regxmm, :modrmymm, :regymm, - :vexvxmm, :vexvymm, :vexvreg, :i4xmm, :i4ymm - ].each { |a| @valid_args[a] = true } + [:i, :i8, :u8, :u16, :reg, :seg2, :seg2A, + :seg3, :seg3A, :eeec, :eeed, :eeet, :modrm, :mrm_imm, + :farptr, :imm_val1, :imm_val3, :reg_cl, :reg_eax, + :reg_dx, :regfp, :regfp0, :modrmmmx, :regmmx, + :modrmxmm, :regxmm, :modrmymm, :regymm, + :vexvxmm, :vexvymm, :vexvreg, :i4xmm, :i4ymm + ].each { |a| @valid_args[a] = true } - [:strop, :stropz, :opsz, :adsz, :argsz, :setip, - :stopexec, :saveip, :unsigned_imm, :random, :needpfx, - :xmmx, :modrmR, :modrmA, :mrmvex - ].each { |a| @valid_props[a] = true } - end + [:strop, :stropz, :opsz, :adsz, :argsz, :setip, + :stopexec, :saveip, :unsigned_imm, :random, :needpfx, + :xmmx, :modrmR, :modrmA, :mrmvex + ].each { |a| @valid_props[a] = true } + end - # only most common instructions from the 386 instruction set - # inexhaustive list : - # no aaa, arpl, mov crX, call/jmp/ret far, in/out, bts, xchg... - def init_386_common_only - init_cpu_constants + # only most common instructions from the 386 instruction set + # inexhaustive list : + # no aaa, arpl, mov crX, call/jmp/ret far, in/out, bts, xchg... + def init_386_common_only + init_cpu_constants - addop_macro1 'adc', 2 - addop_macro1 'add', 0 - addop_macro1 'and', 4, :unsigned_imm - addop 'bswap', [0x0F, 0xC8], :reg - addop 'call', [0xE8], nil, :stopexec, :setip, :i, :saveip - addop 'call', [0xFF], 2, :stopexec, :setip, :saveip - addop('cbw', [0x98]) { |o| o.props[:opsz] = 16 } - addop('cwde', [0x98]) { |o| o.props[:opsz] = 32 } - addop('cwd', [0x99]) { |o| o.props[:opsz] = 16 } - addop('cdq', [0x99]) { |o| o.props[:opsz] = 32 } - addop_macro1 'cmp', 7 - addop_macrostr 'cmps', [0xA6], :stropz - addop 'dec', [0x48], :reg - addop 'dec', [0xFE], 1, {:w => [0, 0]} - addop 'div', [0xF6], 6, {:w => [0, 0]} - addop 'enter', [0xC8], nil, :u16, :u8 - addop 'idiv', [0xF6], 7, {:w => [0, 0]} - addop 'imul', [0xF6], 5, {:w => [0, 0]} # implicit eax, but different semantic from imul eax, ebx (the implicit version updates edx:eax) - addop 'imul', [0x0F, 0xAF], :mrm - addop 'imul', [0x69], :mrm, {:s => [0, 1]}, :i - addop 'inc', [0x40], :reg - addop 'inc', [0xFE], 0, {:w => [0, 0]} - addop 'int', [0xCC], nil, :imm_val3, :stopexec - addop 'int', [0xCD], nil, :u8 - addop_macrotttn 'j', [0x70], nil, :setip, :i8 - addop_macrotttn('j', [0x70], nil, :setip, :i8) { |o| o.name << '.i8' } - addop_macrotttn 'j', [0x0F, 0x80], nil, :setip, :i - addop_macrotttn('j', [0x0F, 0x80], nil, :setip, :i) { |o| o.name << '.i' } - addop 'jmp', [0xE9], nil, {:s => [0, 1]}, :setip, :i, :stopexec - addop 'jmp', [0xFF], 4, :setip, :stopexec - addop 'lea', [0x8D], :mrmA - addop 'leave', [0xC9] - addop_macrostr 'lods', [0xAC], :strop - addop 'loop', [0xE2], nil, :setip, :i8 - addop 'loopz', [0xE1], nil, :setip, :i8 - addop 'loope', [0xE1], nil, :setip, :i8 - addop 'loopnz',[0xE0], nil, :setip, :i8 - addop 'loopne',[0xE0], nil, :setip, :i8 - addop 'mov', [0xA0], nil, {:w => [0, 0], :d => [0, 1]}, :reg_eax, :mrm_imm - addop('mov', [0x88], :mrmw,{:d => [0, 1]}) { |o| o.args.reverse! } - addop 'mov', [0xB0], :reg, {:w => [0, 3]}, :i, :unsigned_imm - addop 'mov', [0xC6], 0, {:w => [0, 0]}, :i, :unsigned_imm - addop_macrostr 'movs', [0xA4], :strop - addop 'movsx', [0x0F, 0xBE], :mrmw - addop 'movzx', [0x0F, 0xB6], :mrmw - addop 'mul', [0xF6], 4, {:w => [0, 0]} - addop 'neg', [0xF6], 3, {:w => [0, 0]} - addop 'nop', [0x90] - addop 'not', [0xF6], 2, {:w => [0, 0]} - addop_macro1 'or', 1, :unsigned_imm - addop 'pop', [0x58], :reg - addop 'pop', [0x8F], 0 - addop 'push', [0x50], :reg - addop 'push', [0xFF], 6 - addop 'push', [0x68], nil, {:s => [0, 1]}, :i, :unsigned_imm - addop 'ret', [0xC3], nil, :stopexec, :setip - addop 'ret', [0xC2], nil, :stopexec, :u16, :setip - addop_macro3 'rol', 0 - addop_macro3 'ror', 1 - addop_macro3 'sar', 7 - addop_macro1 'sbb', 3 - addop_macrostr 'scas', [0xAE], :stropz - addop_macrotttn('set', [0x0F, 0x90], 0) { |o| o.props[:argsz] = 8 } - addop_macrotttn('set', [0x0F, 0x90], :mrm) { |o| o.props[:argsz] = 8 ; o.args.reverse! } # :reg field is unused - addop_macro3 'shl', 4 - addop_macro3 'sal', 6 - addop 'shld', [0x0F, 0xA4], :mrm, :u8 - addop 'shld', [0x0F, 0xA5], :mrm, :reg_cl - addop_macro3 'shr', 5 - addop 'shrd', [0x0F, 0xAC], :mrm, :u8 - addop 'shrd', [0x0F, 0xAD], :mrm, :reg_cl - addop_macrostr 'stos', [0xAA], :strop - addop_macro1 'sub', 5 - addop 'test', [0x84], :mrmw - addop 'test', [0xA8], nil, {:w => [0, 0]}, :reg_eax, :i, :unsigned_imm - addop 'test', [0xF6], 0, {:w => [0, 0]}, :i, :unsigned_imm - addop 'xchg', [0x90], :reg, :reg_eax - addop('xchg', [0x90], :reg, :reg_eax) { |o| o.args.reverse! } # xchg eax, ebx == xchg ebx, eax) - addop 'xchg', [0x86], :mrmw - addop('xchg', [0x86], :mrmw) { |o| o.args.reverse! } - addop_macro1 'xor', 6, :unsigned_imm - end + addop_macro1 'adc', 2 + addop_macro1 'add', 0 + addop_macro1 'and', 4, :unsigned_imm + addop 'bswap', [0x0F, 0xC8], :reg + addop 'call', [0xE8], nil, :stopexec, :setip, :i, :saveip + addop 'call', [0xFF], 2, :stopexec, :setip, :saveip + addop('cbw', [0x98]) { |o| o.props[:opsz] = 16 } + addop('cwde', [0x98]) { |o| o.props[:opsz] = 32 } + addop('cwd', [0x99]) { |o| o.props[:opsz] = 16 } + addop('cdq', [0x99]) { |o| o.props[:opsz] = 32 } + addop_macro1 'cmp', 7 + addop_macrostr 'cmps', [0xA6], :stropz + addop 'dec', [0x48], :reg + addop 'dec', [0xFE], 1, {:w => [0, 0]} + addop 'div', [0xF6], 6, {:w => [0, 0]} + addop 'enter', [0xC8], nil, :u16, :u8 + addop 'idiv', [0xF6], 7, {:w => [0, 0]} + addop 'imul', [0xF6], 5, {:w => [0, 0]} # implicit eax, but different semantic from imul eax, ebx (the implicit version updates edx:eax) + addop 'imul', [0x0F, 0xAF], :mrm + addop 'imul', [0x69], :mrm, {:s => [0, 1]}, :i + addop 'inc', [0x40], :reg + addop 'inc', [0xFE], 0, {:w => [0, 0]} + addop 'int', [0xCC], nil, :imm_val3, :stopexec + addop 'int', [0xCD], nil, :u8 + addop_macrotttn 'j', [0x70], nil, :setip, :i8 + addop_macrotttn('j', [0x70], nil, :setip, :i8) { |o| o.name << '.i8' } + addop_macrotttn 'j', [0x0F, 0x80], nil, :setip, :i + addop_macrotttn('j', [0x0F, 0x80], nil, :setip, :i) { |o| o.name << '.i' } + addop 'jmp', [0xE9], nil, {:s => [0, 1]}, :setip, :i, :stopexec + addop 'jmp', [0xFF], 4, :setip, :stopexec + addop 'lea', [0x8D], :mrmA + addop 'leave', [0xC9] + addop_macrostr 'lods', [0xAC], :strop + addop 'loop', [0xE2], nil, :setip, :i8 + addop 'loopz', [0xE1], nil, :setip, :i8 + addop 'loope', [0xE1], nil, :setip, :i8 + addop 'loopnz',[0xE0], nil, :setip, :i8 + addop 'loopne',[0xE0], nil, :setip, :i8 + addop 'mov', [0xA0], nil, {:w => [0, 0], :d => [0, 1]}, :reg_eax, :mrm_imm + addop('mov', [0x88], :mrmw,{:d => [0, 1]}) { |o| o.args.reverse! } + addop 'mov', [0xB0], :reg, {:w => [0, 3]}, :i, :unsigned_imm + addop 'mov', [0xC6], 0, {:w => [0, 0]}, :i, :unsigned_imm + addop_macrostr 'movs', [0xA4], :strop + addop 'movsx', [0x0F, 0xBE], :mrmw + addop 'movzx', [0x0F, 0xB6], :mrmw + addop 'mul', [0xF6], 4, {:w => [0, 0]} + addop 'neg', [0xF6], 3, {:w => [0, 0]} + addop 'nop', [0x90] + addop 'not', [0xF6], 2, {:w => [0, 0]} + addop_macro1 'or', 1, :unsigned_imm + addop 'pop', [0x58], :reg + addop 'pop', [0x8F], 0 + addop 'push', [0x50], :reg + addop 'push', [0xFF], 6 + addop 'push', [0x68], nil, {:s => [0, 1]}, :i, :unsigned_imm + addop 'ret', [0xC3], nil, :stopexec, :setip + addop 'ret', [0xC2], nil, :stopexec, :u16, :setip + addop_macro3 'rol', 0 + addop_macro3 'ror', 1 + addop_macro3 'sar', 7 + addop_macro1 'sbb', 3 + addop_macrostr 'scas', [0xAE], :stropz + addop_macrotttn('set', [0x0F, 0x90], 0) { |o| o.props[:argsz] = 8 } + addop_macrotttn('set', [0x0F, 0x90], :mrm) { |o| o.props[:argsz] = 8 ; o.args.reverse! } # :reg field is unused + addop_macro3 'shl', 4 + addop_macro3 'sal', 6 + addop 'shld', [0x0F, 0xA4], :mrm, :u8 + addop 'shld', [0x0F, 0xA5], :mrm, :reg_cl + addop_macro3 'shr', 5 + addop 'shrd', [0x0F, 0xAC], :mrm, :u8 + addop 'shrd', [0x0F, 0xAD], :mrm, :reg_cl + addop_macrostr 'stos', [0xAA], :strop + addop_macro1 'sub', 5 + addop 'test', [0x84], :mrmw + addop 'test', [0xA8], nil, {:w => [0, 0]}, :reg_eax, :i, :unsigned_imm + addop 'test', [0xF6], 0, {:w => [0, 0]}, :i, :unsigned_imm + addop 'xchg', [0x90], :reg, :reg_eax + addop('xchg', [0x90], :reg, :reg_eax) { |o| o.args.reverse! } # xchg eax, ebx == xchg ebx, eax) + addop 'xchg', [0x86], :mrmw + addop('xchg', [0x86], :mrmw) { |o| o.args.reverse! } + addop_macro1 'xor', 6, :unsigned_imm + end - def init_386_only - init_cpu_constants + def init_386_only + init_cpu_constants - addop 'aaa', [0x37] - addop 'aad', [0xD5, 0x0A] - addop 'aam', [0xD4, 0x0A] - addop 'aas', [0x3F] - addop('arpl', [0x63], :mrm) { |o| o.props[:argsz] = 16 ; o.args.reverse! } - addop 'bound', [0x62], :mrmA - addop 'bsf', [0x0F, 0xBC], :mrm - addop 'bsr', [0x0F, 0xBD], :mrm - addop_macro2 'bt' , 0 - addop_macro2 'btc', 3 - addop_macro2 'btr', 2 - addop_macro2 'bts', 1 - addop 'call', [0x9A], nil, :stopexec, :setip, :farptr, :saveip - addop 'callf', [0x9A], nil, :stopexec, :setip, :farptr, :saveip - addop 'callf', [0xFF], 3, :stopexec, :setip, :saveip - addop 'clc', [0xF8] - addop 'cld', [0xFC] - addop 'cli', [0xFA] - addop 'clts', [0x0F, 0x06] - addop 'cmc', [0xF5] - addop('cmpxchg',[0x0F, 0xB0], :mrmw) { |o| o.args.reverse! } - addop 'cpuid', [0x0F, 0xA2] - addop 'daa', [0x27] - addop 'das', [0x2F] - addop 'hlt', [0xF4], nil, :stopexec - addop 'in', [0xE4], nil, {:w => [0, 0]}, :reg_eax, :u8 - addop 'in', [0xE4], nil, {:w => [0, 0]}, :u8 - addop 'in', [0xEC], nil, {:w => [0, 0]}, :reg_eax, :reg_dx - addop 'in', [0xEC], nil, {:w => [0, 0]}, :reg_eax - addop 'in', [0xEC], nil, {:w => [0, 0]} - addop_macrostr 'ins', [0x6C], :strop - addop 'into', [0xCE] - addop 'invd', [0x0F, 0x08] - addop 'invlpg', [0x0F, 0x01, 7<<3], :modrmA - addop('iretd', [0xCF], nil, :stopexec, :setip) { |o| o.props[:opsz] = 32 } - addop_macroret 'iret', [0xCF] - addop('jcxz', [0xE3], nil, :setip, :i8) { |o| o.props[:adsz] = 16 } - addop('jecxz', [0xE3], nil, :setip, :i8) { |o| o.props[:adsz] = 32 } - addop 'jmp', [0xEA], nil, :farptr, :setip, :stopexec - addop 'jmpf', [0xEA], nil, :farptr, :setip, :stopexec - addop 'jmpf', [0xFF], 5, :stopexec, :setip # reg ? - addop 'lahf', [0x9F] - addop 'lar', [0x0F, 0x02], :mrm - addop 'lds', [0xC5], :mrmA - addop 'les', [0xC4], :mrmA - addop 'lfs', [0x0F, 0xB4], :mrmA - addop 'lgs', [0x0F, 0xB5], :mrmA - addop 'lgdt', [0x0F, 0x01], 2, :modrmA - addop 'lidt', [0x0F, 0x01, 3<<3], :modrmA - addop 'lldt', [0x0F, 0x00], 2, :modrmA - addop 'lmsw', [0x0F, 0x01], 6 + addop 'aaa', [0x37] + addop 'aad', [0xD5, 0x0A] + addop 'aam', [0xD4, 0x0A] + addop 'aas', [0x3F] + addop('arpl', [0x63], :mrm) { |o| o.props[:argsz] = 16 ; o.args.reverse! } + addop 'bound', [0x62], :mrmA + addop 'bsf', [0x0F, 0xBC], :mrm + addop 'bsr', [0x0F, 0xBD], :mrm + addop_macro2 'bt' , 0 + addop_macro2 'btc', 3 + addop_macro2 'btr', 2 + addop_macro2 'bts', 1 + addop 'call', [0x9A], nil, :stopexec, :setip, :farptr, :saveip + addop 'callf', [0x9A], nil, :stopexec, :setip, :farptr, :saveip + addop 'callf', [0xFF], 3, :stopexec, :setip, :saveip + addop 'clc', [0xF8] + addop 'cld', [0xFC] + addop 'cli', [0xFA] + addop 'clts', [0x0F, 0x06] + addop 'cmc', [0xF5] + addop('cmpxchg',[0x0F, 0xB0], :mrmw) { |o| o.args.reverse! } + addop 'cpuid', [0x0F, 0xA2] + addop 'daa', [0x27] + addop 'das', [0x2F] + addop 'hlt', [0xF4], nil, :stopexec + addop 'in', [0xE4], nil, {:w => [0, 0]}, :reg_eax, :u8 + addop 'in', [0xE4], nil, {:w => [0, 0]}, :u8 + addop 'in', [0xEC], nil, {:w => [0, 0]}, :reg_eax, :reg_dx + addop 'in', [0xEC], nil, {:w => [0, 0]}, :reg_eax + addop 'in', [0xEC], nil, {:w => [0, 0]} + addop_macrostr 'ins', [0x6C], :strop + addop 'into', [0xCE] + addop 'invd', [0x0F, 0x08] + addop 'invlpg', [0x0F, 0x01, 7<<3], :modrmA + addop('iretd', [0xCF], nil, :stopexec, :setip) { |o| o.props[:opsz] = 32 } + addop_macroret 'iret', [0xCF] + addop('jcxz', [0xE3], nil, :setip, :i8) { |o| o.props[:adsz] = 16 } + addop('jecxz', [0xE3], nil, :setip, :i8) { |o| o.props[:adsz] = 32 } + addop 'jmp', [0xEA], nil, :farptr, :setip, :stopexec + addop 'jmpf', [0xEA], nil, :farptr, :setip, :stopexec + addop 'jmpf', [0xFF], 5, :stopexec, :setip # reg ? + addop 'lahf', [0x9F] + addop 'lar', [0x0F, 0x02], :mrm + addop 'lds', [0xC5], :mrmA + addop 'les', [0xC4], :mrmA + addop 'lfs', [0x0F, 0xB4], :mrmA + addop 'lgs', [0x0F, 0xB5], :mrmA + addop 'lgdt', [0x0F, 0x01], 2, :modrmA + addop 'lidt', [0x0F, 0x01, 3<<3], :modrmA + addop 'lldt', [0x0F, 0x00], 2, :modrmA + addop 'lmsw', [0x0F, 0x01], 6 # prefix addop 'lock', [0xF0] - addop 'lsl', [0x0F, 0x03], :mrm - addop 'lss', [0x0F, 0xB2], :mrmA - addop 'ltr', [0x0F, 0x00], 3 - addop 'mov', [0x0F, 0x20, 0xC0], :reg, {:d => [1, 1], :eeec => [2, 3]}, :eeec - addop 'mov', [0x0F, 0x21, 0xC0], :reg, {:d => [1, 1], :eeed => [2, 3]}, :eeed - addop 'mov', [0x0F, 0x24, 0xC0], :reg, {:d => [1, 1], :eeet => [2, 3]}, :eeet - addop 'mov', [0x8C], 0, {:d => [0, 1], :seg3 => [1, 3]}, :seg3 - addop 'movbe', [0x0F, 0x38, 0xF0], :mrm, { :d => [2, 0] } - addop 'out', [0xE6], nil, {:w => [0, 0]}, :u8, :reg_eax - addop 'out', [0xE6], nil, {:w => [0, 0]}, :reg_eax, :u8 - addop 'out', [0xE6], nil, {:w => [0, 0]}, :u8 - addop 'out', [0xEE], nil, {:w => [0, 0]}, :reg_dx, :reg_eax - addop 'out', [0xEE], nil, {:w => [0, 0]}, :reg_eax, :reg_dx - addop 'out', [0xEE], nil, {:w => [0, 0]}, :reg_eax # implicit arguments - addop 'out', [0xEE], nil, {:w => [0, 0]} - addop_macrostr 'outs', [0x6E], :strop - addop 'pop', [0x07], nil, {:seg2A => [0, 3]}, :seg2A - addop 'pop', [0x0F, 0x81], nil, {:seg3A => [1, 3]}, :seg3A - addop('popa', [0x61]) { |o| o.props[:opsz] = 16 } - addop('popad', [0x61]) { |o| o.props[:opsz] = 32 } - addop('popf', [0x9D]) { |o| o.props[:opsz] = 16 } - addop('popfd', [0x9D]) { |o| o.props[:opsz] = 32 } - addop 'push', [0x06], nil, {:seg2 => [0, 3]}, :seg2 - addop 'push', [0x0F, 0x80], nil, {:seg3A => [1, 3]}, :seg3A - addop('pusha', [0x60]) { |o| o.props[:opsz] = 16 } - addop('pushad',[0x60]) { |o| o.props[:opsz] = 32 } - addop('pushf', [0x9C]) { |o| o.props[:opsz] = 16 } - addop('pushfd',[0x9C]) { |o| o.props[:opsz] = 32 } - addop_macro3 'rcl', 2 - addop_macro3 'rcr', 3 - addop 'rdmsr', [0x0F, 0x32] - addop 'rdpmc', [0x0F, 0x33] - addop 'rdtsc', [0x0F, 0x31], nil, :random - addop_macroret 'retf', [0xCB] - addop_macroret 'retf', [0xCA], :u16 - addop 'rsm', [0x0F, 0xAA], nil, :stopexec - addop 'sahf', [0x9E] - addop 'sgdt', [0x0F, 0x01, 0<<3], :modrmA - addop 'sidt', [0x0F, 0x01, 1<<3], :modrmA - addop 'sldt', [0x0F, 0x00], 0 - addop 'smsw', [0x0F, 0x01], 4 - addop 'stc', [0xF9] - addop 'std', [0xFD] - addop 'sti', [0xFB] - addop 'str', [0x0F, 0x00], 1 - addop 'test', [0xF6], 1, {:w => [0, 0]}, :i, :unsigned_imm # undocumented alias to F6/0 - addop 'ud2', [0x0F, 0x0B] - addop 'verr', [0x0F, 0x00], 4 - addop 'verw', [0x0F, 0x00], 5 - addop 'wait', [0x9B] - addop 'wbinvd',[0x0F, 0x09] - addop 'wrmsr', [0x0F, 0x30] - addop('xadd', [0x0F, 0xC0], :mrmw) { |o| o.args.reverse! } - addop 'xlat', [0xD7] + addop 'lsl', [0x0F, 0x03], :mrm + addop 'lss', [0x0F, 0xB2], :mrmA + addop 'ltr', [0x0F, 0x00], 3 + addop 'mov', [0x0F, 0x20, 0xC0], :reg, {:d => [1, 1], :eeec => [2, 3]}, :eeec + addop 'mov', [0x0F, 0x21, 0xC0], :reg, {:d => [1, 1], :eeed => [2, 3]}, :eeed + addop 'mov', [0x0F, 0x24, 0xC0], :reg, {:d => [1, 1], :eeet => [2, 3]}, :eeet + addop 'mov', [0x8C], 0, {:d => [0, 1], :seg3 => [1, 3]}, :seg3 + addop 'movbe', [0x0F, 0x38, 0xF0], :mrm, { :d => [2, 0] } + addop 'out', [0xE6], nil, {:w => [0, 0]}, :u8, :reg_eax + addop 'out', [0xE6], nil, {:w => [0, 0]}, :reg_eax, :u8 + addop 'out', [0xE6], nil, {:w => [0, 0]}, :u8 + addop 'out', [0xEE], nil, {:w => [0, 0]}, :reg_dx, :reg_eax + addop 'out', [0xEE], nil, {:w => [0, 0]}, :reg_eax, :reg_dx + addop 'out', [0xEE], nil, {:w => [0, 0]}, :reg_eax # implicit arguments + addop 'out', [0xEE], nil, {:w => [0, 0]} + addop_macrostr 'outs', [0x6E], :strop + addop 'pop', [0x07], nil, {:seg2A => [0, 3]}, :seg2A + addop 'pop', [0x0F, 0x81], nil, {:seg3A => [1, 3]}, :seg3A + addop('popa', [0x61]) { |o| o.props[:opsz] = 16 } + addop('popad', [0x61]) { |o| o.props[:opsz] = 32 } + addop('popf', [0x9D]) { |o| o.props[:opsz] = 16 } + addop('popfd', [0x9D]) { |o| o.props[:opsz] = 32 } + addop 'push', [0x06], nil, {:seg2 => [0, 3]}, :seg2 + addop 'push', [0x0F, 0x80], nil, {:seg3A => [1, 3]}, :seg3A + addop('pusha', [0x60]) { |o| o.props[:opsz] = 16 } + addop('pushad',[0x60]) { |o| o.props[:opsz] = 32 } + addop('pushf', [0x9C]) { |o| o.props[:opsz] = 16 } + addop('pushfd',[0x9C]) { |o| o.props[:opsz] = 32 } + addop_macro3 'rcl', 2 + addop_macro3 'rcr', 3 + addop 'rdmsr', [0x0F, 0x32] + addop 'rdpmc', [0x0F, 0x33] + addop 'rdtsc', [0x0F, 0x31], nil, :random + addop_macroret 'retf', [0xCB] + addop_macroret 'retf', [0xCA], :u16 + addop 'rsm', [0x0F, 0xAA], nil, :stopexec + addop 'sahf', [0x9E] + addop 'sgdt', [0x0F, 0x01, 0<<3], :modrmA + addop 'sidt', [0x0F, 0x01, 1<<3], :modrmA + addop 'sldt', [0x0F, 0x00], 0 + addop 'smsw', [0x0F, 0x01], 4 + addop 'stc', [0xF9] + addop 'std', [0xFD] + addop 'sti', [0xFB] + addop 'str', [0x0F, 0x00], 1 + addop 'test', [0xF6], 1, {:w => [0, 0]}, :i, :unsigned_imm # undocumented alias to F6/0 + addop 'ud2', [0x0F, 0x0B] + addop 'verr', [0x0F, 0x00], 4 + addop 'verw', [0x0F, 0x00], 5 + addop 'wait', [0x9B] + addop 'wbinvd',[0x0F, 0x09] + addop 'wrmsr', [0x0F, 0x30] + addop('xadd', [0x0F, 0xC0], :mrmw) { |o| o.args.reverse! } + addop 'xlat', [0xD7] # pfx: addrsz = 0x67, lock = 0xF0, opsz = 0x66, repnz = 0xF2, rep/repz = 0xF3 # cs/nojmp = 0x2E, ds/jmp = 0x3E, es = 0x26, fs = 0x64, gs = 0x65, ss = 0x36 - # undocumented opcodes - addop 'aam', [0xD4], nil, :u8 - addop 'aad', [0xD5], nil, :u8 - addop 'setalc',[0xD6] - addop 'salc', [0xD6] - addop 'icebp', [0xF1] - #addop 'loadall',[0x0F, 0x07] # conflict with syscall - addop 'ud0', [0x0F, 0xFF] # amd - addop 'ud2', [0x0F, 0xB9], :mrm - #addop 'umov', [0x0F, 0x10], :mrmw, {:d => [1, 1]} # conflicts with movups/movhlps - end - - def init_387_only - init_cpu_constants - - addop 'f2xm1', [0xD9, 0xF0] - addop 'fabs', [0xD9, 0xE1] - addop_macrofpu1 'fadd', 0 - addop 'faddp', [0xDE, 0xC0], :regfp - addop 'faddp', [0xDE, 0xC1] - addop('fbld', [0xDF, 4<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 80 } - addop('fbstp', [0xDF, 6<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 80 } - addop 'fchs', [0xD9, 0xE0], nil, :regfp0 - addop 'fnclex', [0xDB, 0xE2] - addop_macrofpu1 'fcom', 2 - addop_macrofpu1 'fcomp', 3 - addop 'fcompp',[0xDE, 0xD9] - addop 'fcomip',[0xDF, 0xF0], :regfp - addop 'fcos', [0xD9, 0xFF], nil, :regfp0 - addop 'fdecstp', [0xD9, 0xF6] - addop_macrofpu1 'fdiv', 6 - addop_macrofpu1 'fdivr', 7 - addop 'fdivp', [0xDE, 0xF8], :regfp - addop 'fdivp', [0xDE, 0xF9] - addop 'fdivrp',[0xDE, 0xF0], :regfp - addop 'fdivrp',[0xDE, 0xF1] - addop 'ffree', [0xDD, 0xC0], nil, {:regfp => [1, 0]}, :regfp - addop_macrofpu2 'fiadd', 0 - addop_macrofpu2 'fimul', 1 - addop_macrofpu2 'ficom', 2 - addop_macrofpu2 'ficomp',3 - addop_macrofpu2 'fisub', 4 - addop_macrofpu2 'fisubr',5 - addop_macrofpu2 'fidiv', 6 - addop_macrofpu2 'fidivr',7 - addop 'fincstp', [0xD9, 0xF7] - addop 'fninit', [0xDB, 0xE3] - addop_macrofpu2 'fist', 2, 1 - addop_macrofpu3 'fild', 0 - addop_macrofpu3 'fistp',3 - addop('fld', [0xD9, 0<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 32 } - addop('fld', [0xDD, 0<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 64 } - addop('fld', [0xDB, 5<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 80 } - addop 'fld', [0xD9, 0xC0], :regfp - - addop('fldcw', [0xD9, 5<<3], :modrmA) { |o| o.props[:argsz] = 16 } - addop 'fldenv', [0xD9, 4<<3], :modrmA - addop 'fld1', [0xD9, 0xE8] - addop 'fldl2t', [0xD9, 0xE9] - addop 'fldl2e', [0xD9, 0xEA] - addop 'fldpi', [0xD9, 0xEB] - addop 'fldlg2', [0xD9, 0xEC] - addop 'fldln2', [0xD9, 0xED] - addop 'fldz', [0xD9, 0xEE] - addop_macrofpu1 'fmul', 1 - addop 'fmulp', [0xDE, 0xC8], :regfp - addop 'fmulp', [0xDE, 0xC9] - addop 'fnop', [0xD9, 0xD0] - addop 'fpatan', [0xD9, 0xF3] - addop 'fprem', [0xD9, 0xF8] - addop 'fprem1', [0xD9, 0xF5] - addop 'fptan', [0xD9, 0xF2] - addop 'frndint',[0xD9, 0xFC] - addop 'frstor', [0xDD, 4<<3], :modrmA - addop 'fnsave', [0xDD, 6<<3], :modrmA - addop('fnstcw', [0xD9, 7<<3], :modrmA) { |o| o.props[:argsz] = 16 } - addop 'fnstenv',[0xD9, 6<<3], :modrmA - addop 'fnstsw', [0xDF, 0xE0] - addop('fnstsw', [0xDD, 7<<3], :modrmA) { |o| o.props[:argsz] = 16 } - addop 'fscale', [0xD9, 0xFD] - addop 'fsin', [0xD9, 0xFE] - addop 'fsincos',[0xD9, 0xFB] - addop 'fsqrt', [0xD9, 0xFA] - addop('fst', [0xD9, 2<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 32 } - addop('fst', [0xDD, 2<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 64 } - addop 'fst', [0xD9, 0xD0], :regfp - addop('fstp', [0xD9, 3<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 32 } - addop('fstp', [0xDD, 3<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 64 } - addop('fstp', [0xDB, 7<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 80 } - addop 'fstp', [0xDD, 0xD8], :regfp - addop_macrofpu1 'fsub', 4 - addop 'fsubp', [0xDE, 0xE8], :regfp - addop 'fsubp', [0xDE, 0xE9] - addop_macrofpu1 'fsubp', 5 - addop 'fsubrp', [0xDE, 0xE0], :regfp - addop 'fsubrp', [0xDE, 0xE1] - addop 'ftst', [0xD9, 0xE4] - addop 'fucom', [0xDD, 0xE0], :regfp - addop 'fucomp', [0xDD, 0xE8], :regfp - addop 'fucompp',[0xDA, 0xE9] - addop 'fucomi', [0xDB, 0xE8], :regfp - addop 'fxam', [0xD9, 0xE5] - addop 'fxch', [0xD9, 0xC8], :regfp - addop 'fxtract',[0xD9, 0xF4] - addop 'fyl2x', [0xD9, 0xF1] - addop 'fyl2xp1',[0xD9, 0xF9] - # fwait prefix - addop 'fclex', [0x9B, 0xDB, 0xE2] - addop 'finit', [0x9B, 0xDB, 0xE3] - addop 'fsave', [0x9B, 0xDD, 6<<3], :modrmA - addop('fstcw', [0x9B, 0xD9, 7<<3], :modrmA) { |o| o.props[:argsz] = 16 } - addop 'fstenv', [0x9B, 0xD9, 6<<3], :modrmA - addop 'fstsw', [0x9B, 0xDF, 0xE0] - addop('fstsw', [0x9B, 0xDD, 7<<3], :modrmA) { |o| o.props[:argsz] = 16 } - addop 'fwait', [0x9B] - end - - def init_486_only - init_cpu_constants - end - - def init_pentium_only - init_cpu_constants - - addop('cmpxchg8b', [0x0F, 0xC7], 1) { |o| o.props[:opsz] = 32 ; o.props[:argsz] = 64 } - # lock cmpxchg8b eax - #addop 'f00fbug', [0xF0, 0x0F, 0xC7, 0xC8] - - # mmx - addop 'emms', [0x0F, 0x77] - addop('movd', [0x0F, 0x6E], :mrmmmx, {:d => [1, 4]}) { |o| o.args = [:modrm, :regmmx] ; o.props[:opsz] = o.props[:argsz] = 32 } - addop('movq', [0x0F, 0x6F], :mrmmmx, {:d => [1, 4]}) { |o| o.props[:argsz] = 64 } - addop 'packssdw', [0x0F, 0x6B], :mrmmmx - addop 'packsswb', [0x0F, 0x63], :mrmmmx - addop 'packuswb', [0x0F, 0x67], :mrmmmx - addop_macrogg 0..2, 'padd', [0x0F, 0xFC], :mrmmmx - addop_macrogg 0..1, 'padds', [0x0F, 0xEC], :mrmmmx - addop_macrogg 0..1, 'paddus',[0x0F, 0xDC], :mrmmmx - addop 'pand', [0x0F, 0xDB], :mrmmmx - addop 'pandn', [0x0F, 0xDF], :mrmmmx - addop_macrogg 0..2, 'pcmpeq',[0x0F, 0x74], :mrmmmx - addop_macrogg 0..2, 'pcmpgt',[0x0F, 0x64], :mrmmmx - addop 'pmaddwd', [0x0F, 0xF5], :mrmmmx - addop 'pmulhuw', [0x0F, 0xE4], :mrmmmx - addop 'pmulhw',[0x0F, 0xE5], :mrmmmx - addop 'pmullw',[0x0F, 0xD5], :mrmmmx - addop 'por', [0x0F, 0xEB], :mrmmmx - [[1..3, 'psll', 3], [1..2, 'psra', 2], [1..3, 'psrl', 1]].each { |ggrng, name, val| - addop_macrogg ggrng, name, [0x0F, 0xC0 | (val << 4)], :mrmmmx - addop_macrogg ggrng, name, [0x0F, 0x70, 0xC0 | (val << 4)], nil, {:regmmx => [2, 0]}, :regmmx, :u8 - } - addop_macrogg 0..2, 'psub', [0x0F, 0xF8], :mrmmmx - addop_macrogg 0..1, 'psubs', [0x0F, 0xE8], :mrmmmx - addop_macrogg 0..1, 'psubus',[0x0F, 0xD8], :mrmmmx - addop_macrogg 1..3, 'punpckh', [0x0F, 0x68], :mrmmmx - addop_macrogg 1..3, 'punpckl', [0x0F, 0x60], :mrmmmx - addop 'pxor', [0x0F, 0xEF], :mrmmmx - end - - def init_p6_only - addop_macrotttn 'cmov', [0x0F, 0x40], :mrm - - %w{b e be u}.each_with_index { |tt, i| - addop 'fcmov' + tt, [0xDA, 0xC0 | (i << 3)], :regfp - addop 'fcmovn'+ tt, [0xDB, 0xC0 | (i << 3)], :regfp - } - addop 'fcomi', [0xDB, 0xF0], :regfp - addop('fxrstor', [0x0F, 0xAE, 1<<3], :modrmA) { |o| o.props[:argsz] = 512*8 } - addop('fxsave', [0x0F, 0xAE, 0<<3], :modrmA) { |o| o.props[:argsz] = 512*8 } - addop 'sysenter',[0x0F, 0x34] - addop 'sysexit', [0x0F, 0x35] - - addop 'syscall', [0x0F, 0x05] # AMD - addop_macroret 'sysret', [0x0F, 0x07] # AMD - end - - def init_3dnow_only - init_cpu_constants - - [['pavgusb', 0xBF], ['pfadd', 0x9E], ['pfsub', 0x9A], - ['pfsubr', 0xAA], ['pfacc', 0xAE], ['pfcmpge', 0x90], - ['pfcmpgt', 0xA0], ['fpcmpeq', 0xB0], ['pfmin', 0x94], - ['pfmax', 0xA4], ['pi2fd', 0x0D], ['pf2id', 0x1D], - ['pfrcp', 0x96], ['pfrsqrt', 0x97], ['pfmul', 0xB4], - ['pfrcpit1', 0xA6], ['pfrsqit1', 0xA7], ['pfrcpit2', 0xB6], - ['pmulhrw', 0xB7]].each { |str, bin| - addop str, [0x0F, 0x0F, bin], :mrmmmx - } - # 3dnow prefix fallback - addop '3dnow', [0x0F, 0x0F], :mrmmmx, :u8 - - addop 'femms', [0x0F, 0x0E] - addop 'prefetch', [0x0F, 0x0D, 0<<3], :modrmA - addop 'prefetchw', [0x0F, 0x0D, 1<<3], :modrmA - end - - def init_sse_only - init_cpu_constants - - addop_macrossps 'addps', [0x0F, 0x58], :mrmxmm - addop 'andnps', [0x0F, 0x55], :mrmxmm - addop 'andps', [0x0F, 0x54], :mrmxmm - addop_macrossps 'cmpps', [0x0F, 0xC2], :mrmxmm, :u8 - addop 'comiss', [0x0F, 0x2F], :mrmxmm - - addop('cvtpi2ps', [0x0F, 0x2A], :mrmxmm) { |o| o.args[o.args.index(:modrmxmm)] = :modrmmmx } - addop('cvtps2pi', [0x0F, 0x2D], :mrmmmx) { |o| o.args[o.args.index(:modrmmmx)] = :modrmxmm } - addop('cvtsi2ss', [0x0F, 0x2A], :mrmxmm) { |o| o.args[o.args.index(:modrmxmm)] = :modrm ; o.props[:needpfx] = 0xF3 } - addop('cvtss2si', [0x0F, 0x2D], :mrm) { |o| o.args[o.args.index(:modrm)] = :modrmxmm ; o.props[:needpfx] = 0xF3 } - addop('cvttps2pi',[0x0F, 0x2C], :mrmmmx) { |o| o.args[o.args.index(:modrmmmx)] = :modrmxmm } - addop('cvttss2si',[0x0F, 0x2C], :mrm) { |o| o.args[o.args.index(:modrm)] = :modrmxmm ; o.props[:needpfx] = 0xF3 } - - addop_macrossps 'divps', [0x0F, 0x5E], :mrmxmm - addop 'ldmxcsr', [0x0F, 0xAE, 2<<3], :modrmA - addop_macrossps 'maxps', [0x0F, 0x5F], :mrmxmm - addop_macrossps 'minps', [0x0F, 0x5D], :mrmxmm - addop 'movaps', [0x0F, 0x28], :mrmxmm, {:d => [1, 0]} - addop 'movhlps', [0x0F, 0x12], :mrmxmm, :modrmR - addop 'movlps', [0x0F, 0x12], :mrmxmm, {:d => [1, 0]}, :modrmA - addop 'movlhps', [0x0F, 0x16], :mrmxmm, :modrmR - addop 'movhps', [0x0F, 0x16], :mrmxmm, {:d => [1, 0]}, :modrmA - addop 'movmskps',[0x0F, 0x50, 0xC0], nil, {:reg => [2, 3], :regxmm => [2, 0]}, :regxmm, :reg - addop('movss', [0x0F, 0x10], :mrmxmm, {:d => [1, 0]}) { |o| o.props[:needpfx] = 0xF3 } - addop 'movups', [0x0F, 0x10], :mrmxmm, {:d => [1, 0]} - addop_macrossps 'mulps', [0x0F, 0x59], :mrmxmm - addop 'orps', [0x0F, 0x56], :mrmxmm - addop_macrossps 'rcpps', [0x0F, 0x53], :mrmxmm - addop_macrossps 'rsqrtps',[0x0F, 0x52], :mrmxmm - addop 'shufps', [0x0F, 0xC6], :mrmxmm, :u8 - addop_macrossps 'sqrtps', [0x0F, 0x51], :mrmxmm - addop 'stmxcsr', [0x0F, 0xAE, 3<<3], :modrmA - addop_macrossps 'subps', [0x0F, 0x5C], :mrmxmm - addop 'ucomiss', [0x0F, 0x2E], :mrmxmm - addop 'unpckhps',[0x0F, 0x15], :mrmxmm - addop 'unpcklps',[0x0F, 0x14], :mrmxmm - addop 'xorps', [0x0F, 0x57], :mrmxmm - - # integer instrs, mmx only - addop 'pavgb', [0x0F, 0xE0], :mrmmmx - addop 'pavgw', [0x0F, 0xE3], :mrmmmx - addop 'pextrw', [0x0F, 0xC5, 0xC0], nil, {:reg => [2, 3], :regmmx => [2, 0]}, :reg, :regmmx, :u8 - addop 'pinsrw', [0x0F, 0xC4, 0x00], nil, {:modrm => [2, 0], :regmmx => [2, 3]}, :modrm, :regmmx, :u8 - addop 'pmaxsw', [0x0F, 0xEE], :mrmmmx - addop 'pmaxub', [0x0F, 0xDE], :mrmmmx - addop 'pminsw', [0x0F, 0xEA], :mrmmmx - addop 'pminub', [0x0F, 0xDA], :mrmmmx - addop 'pmovmskb',[0x0F, 0xD7, 0xC0], nil, {:reg => [2, 3], :regmmx => [2, 0]}, :reg, :regmmx - addop 'psadbw', [0x0F, 0xF6], :mrmmmx - addop 'pshufw', [0x0F, 0x70], :mrmmmx, :u8 - - addop 'maskmovq',[0x0F, 0xF7], :mrmmmx, :modrmR - addop('movntq', [0x0F, 0xE7], :mrmmmx) { |o| o.args.reverse! } - addop('movntps', [0x0F, 0x2B], :mrmxmm) { |o| o.args.reverse! } - addop 'prefetcht0', [0x0F, 0x18, 1<<3], :modrmA - addop 'prefetcht1', [0x0F, 0x18, 2<<3], :modrmA - addop 'prefetcht2', [0x0F, 0x18, 3<<3], :modrmA - addop 'prefetchnta',[0x0F, 0x18, 0<<3], :modrmA - addop 'sfence', [0x0F, 0xAE, 0xF8] - - # the whole row of prefetch is actually nops - addop 'nop', [0x0F, 0x1C], :mrmw, :d => [1, 1] # incl. official version = 0f1f mrm - addop 'nop_8', [0x0F, 0x18], :mrmw, :d => [1, 1] - addop 'nop_d', [0x0F, 0x0D], :mrm - addop 'nop', [0x0F, 0x1C], 0 # official asm syntax is 'nop [eax]' - end - - def init_sse2_only - init_cpu_constants - - @opcode_list.each { |o| o.props[:xmmx] = true if o.fields[:regmmx] and o.name !~ /^(?:mov(?:nt)?q|pshufw|cvt.*)$/ } - - # mirror of the init_sse part - addop_macrosdpd 'addpd', [0x0F, 0x58], :mrmxmm - addop('andnpd', [0x0F, 0x55], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('andpd', [0x0F, 0x54], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop_macrosdpd 'cmppd', [0x0F, 0xC2], :mrmxmm, :u8 - addop('comisd', [0x0F, 0x2F], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - - addop('cvtpi2pd', [0x0F, 0x2A], :mrmxmm) { |o| o.args[o.args.index(:modrmxmm)] = :modrmmmx ; o.props[:needpfx] = 0x66 } - addop('cvtpd2pi', [0x0F, 0x2D], :mrmmmx) { |o| o.args[o.args.index(:modrmmmx)] = :modrmxmm ; o.props[:needpfx] = 0x66 } - addop('cvtsi2sd', [0x0F, 0x2A], :mrmxmm) { |o| o.args[o.args.index(:modrmxmm)] = :modrm ; o.props[:needpfx] = 0xF2 } - addop('cvtsd2si', [0x0F, 0x2D], :mrm ) { |o| o.args[o.args.index(:modrm )] = :modrmxmm ; o.props[:needpfx] = 0xF2 } - addop('cvttpd2pi',[0x0F, 0x2C], :mrmmmx) { |o| o.args[o.args.index(:modrmmmx)] = :modrmxmm ; o.props[:needpfx] = 0x66 } - addop('cvttsd2si',[0x0F, 0x2C], :mrm ) { |o| o.args[o.args.index(:modrm )] = :modrmxmm ; o.props[:needpfx] = 0xF2 } - - addop('cvtpd2ps', [0x0F, 0x5A], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('cvtps2pd', [0x0F, 0x5A], :mrmxmm) - addop('cvtsd2ss', [0x0F, 0x5A], :mrmxmm) { |o| o.props[:needpfx] = 0xF2 } - addop('cvtss2sd', [0x0F, 0x5A], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 } - - addop('cvtpd2dq', [0x0F, 0xE6], :mrmxmm) { |o| o.props[:needpfx] = 0xF2 } - addop('cvttpd2dq',[0x0F, 0xE6], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('cvtdq2pd', [0x0F, 0xE6], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 } - addop('cvtps2dq', [0x0F, 0x5B], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('cvttps2dq',[0x0F, 0x5B], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 } - addop('cvtdq2ps', [0x0F, 0x5B], :mrmxmm) - - addop_macrosdpd 'divpd', [0x0F, 0x5E], :mrmxmm - addop_macrosdpd 'maxpd', [0x0F, 0x5F], :mrmxmm - addop_macrosdpd 'minpd', [0x0F, 0x5D], :mrmxmm - addop('movapd', [0x0F, 0x28], :mrmxmm, {:d => [1, 0]}) { |o| o.props[:needpfx] = 0x66 } - - addop('movlpd', [0x0F, 0x12], :mrmxmm, {:d => [1, 0]}) { |o| o.props[:needpfx] = 0x66 } - addop('movhpd', [0x0F, 0x16], :mrmxmm, {:d => [1, 0]}) { |o| o.props[:needpfx] = 0x66 } - - addop('movmskpd',[0x0F, 0x50, 0xC0], nil, {:reg => [2, 3], :regxmm => [2, 0]}, :regxmm, :reg) { |o| o.props[:needpfx] = 0x66 } - addop('movsd', [0x0F, 0x10], :mrmxmm, {:d => [1, 0]}) { |o| o.props[:needpfx] = 0xF2 } - addop('movupd', [0x0F, 0x10], :mrmxmm, {:d => [1, 0]}) { |o| o.props[:needpfx] = 0x66 } - addop_macrosdpd 'mulpd', [0x0F, 0x59], :mrmxmm - addop('orpd', [0x0F, 0x56], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('shufpd', [0x0F, 0xC6], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop_macrosdpd 'sqrtpd', [0x0F, 0x51], :mrmxmm - addop_macrosdpd 'subpd', [0x0F, 0x5C], :mrmxmm - addop('ucomisd', [0x0F, 0x2E], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('unpckhpd',[0x0F, 0x15], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('unpcklpd',[0x0F, 0x14], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('xorpd', [0x0F, 0x57], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - - addop('movdqa', [0x0F, 0x6F], :mrmxmm, {:d => [1, 4]}) { |o| o.props[:needpfx] = 0x66 } - addop('movdqu', [0x0F, 0x6F], :mrmxmm, {:d => [1, 4]}) { |o| o.props[:needpfx] = 0xF3 } - addop('movq2dq', [0x0F, 0xD6], :mrmxmm, :modrmR) { |o| o.args[o.args.index(:modrmxmm)] = :modrmmmx ; o.props[:needpfx] = 0xF3 } - addop('movdq2q', [0x0F, 0xD6], :mrmmmx, :modrmR) { |o| o.args[o.args.index(:modrmmmx)] = :modrmxmm ; o.props[:needpfx] = 0xF2 } - addop('movq', [0x0F, 0x7E], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 ; o.props[:argsz] = 128 } - addop('movq', [0x0F, 0xD6], :mrmxmm) { |o| o.args.reverse! ; o.props[:needpfx] = 0x66 ; o.props[:argsz] = 128 } - - addop 'paddq', [0x0F, 0xD4], :mrmmmx, :xmmx - addop 'pmuludq', [0x0F, 0xF4], :mrmmmx, :xmmx - addop('pshuflw', [0x0F, 0x70], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0xF2 } - addop('pshufhw', [0x0F, 0x70], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0xF3 } - addop('pshufd', [0x0F, 0x70], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop('pslldq', [0x0F, 0x73, 0xF8], nil, {:regxmm => [2, 0]}, :regxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop('psrldq', [0x0F, 0x73, 0xD8], nil, {:regxmm => [2, 0]}, :regxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop 'psubq', [0x0F, 0xFB], :mrmmmx, :xmmx - addop('punpckhqdq', [0x0F, 0x6D], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('punpcklqdq', [0x0F, 0x6C], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - - addop('clflush', [0x0F, 0xAE, 7<<3], :modrmA) { |o| o.props[:argsz] = 8 } - addop('maskmovdqu', [0x0F, 0xF7], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('movntpd', [0x0F, 0x2B], :mrmxmm) { |o| o.args.reverse! ; o.props[:needpfx] = 0x66 } - addop('movntdq', [0x0F, 0xE7], :mrmxmm) { |o| o.args.reverse! ; o.props[:needpfx] = 0x66 } - addop('movnti', [0x0F, 0xC3], :mrm) { |o| o.args.reverse! } - addop('pause', [0x90]) { |o| o.props[:needpfx] = 0xF3 } - addop 'lfence', [0x0F, 0xAE, 0xE8] - addop 'mfence', [0x0F, 0xAE, 0xF0] - end - - def init_sse3_only - init_cpu_constants - - addop('addsubpd', [0x0F, 0xD0], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('addsubps', [0x0F, 0xD0], :mrmxmm) { |o| o.props[:needpfx] = 0xF2 } - addop('haddpd', [0x0F, 0x7C], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('haddps', [0x0F, 0x7C], :mrmxmm) { |o| o.props[:needpfx] = 0xF2 } - addop('hsubpd', [0x0F, 0x7D], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('hsubps', [0x0F, 0x7D], :mrmxmm) { |o| o.props[:needpfx] = 0xF2 } - - addop 'monitor', [0x0F, 0x01, 0xC8] - addop 'mwait', [0x0F, 0x01, 0xC9] - - addop('fisttp', [0xDF, 1<<3], :modrmA) { |o| o.props[:argsz] = 16 } - addop('fisttp', [0xDB, 1<<3], :modrmA) { |o| o.props[:argsz] = 32 } - addop('fisttp', [0xDD, 1<<3], :modrmA) { |o| o.props[:argsz] = 64 } - addop('lddqu', [0x0F, 0xF0], :mrmxmm, :modrmA) { |o| o.args[o.args.index(:modrmxmm)] = :modrm ; o.props[:needpfx] = 0xF2 } - addop('movddup', [0x0F, 0x12], :mrmxmm) { |o| o.props[:needpfx] = 0xF2 } - addop('movshdup', [0x0F, 0x16], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 } - addop('movsldup', [0x0F, 0x12], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 } - end - - def init_ssse3_only - init_cpu_constants - - addop_macrogg 0..2, 'pabs', [0x0F, 0x38, 0x1C], :mrmmmx, :xmmx - addop 'palignr', [0x0F, 0x3A, 0x0F], :mrmmmx, :u8, :xmmx - addop 'phaddd', [0x0F, 0x38, 0x02], :mrmmmx, :xmmx - addop 'phaddsw', [0x0F, 0x38, 0x03], :mrmmmx, :xmmx - addop 'phaddw', [0x0F, 0x38, 0x01], :mrmmmx, :xmmx - addop 'phsubd', [0x0F, 0x38, 0x06], :mrmmmx, :xmmx - addop 'phsubsw', [0x0F, 0x38, 0x07], :mrmmmx, :xmmx - addop 'phsubw', [0x0F, 0x38, 0x05], :mrmmmx, :xmmx - addop 'pmaddubsw',[0x0F, 0x38, 0x04], :mrmmmx, :xmmx - addop 'pmulhrsw', [0x0F, 0x38, 0x0B], :mrmmmx, :xmmx - addop 'pshufb', [0x0F, 0x38, 0x00], :mrmmmx, :xmmx - addop_macrogg 0..2, 'psignb', [0x0F, 0x38, 0x80], :mrmmmx, :xmmx - end - - def init_aesni_only - init_cpu_constants - - addop('aesdec', [0x0F, 0x38, 0xDE], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('aesdeclast',[0x0F, 0x38, 0xDF], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('aesenc', [0x0F, 0x38, 0xDC], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('aesenclast',[0x0F, 0x38, 0xDD], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('aesimc', [0x0F, 0x38, 0xDB], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('aeskeygenassist', [0x0F, 0x3A, 0xDF], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - - addop('pclmulqdq', [0x0F, 0x3A, 0x44], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - end - - def init_vmx_only - init_cpu_constants - - addop 'vmcall', [0x0F, 0x01, 0xC1] - addop 'vmlaunch', [0x0F, 0x01, 0xC2] - addop 'vmresume', [0x0F, 0x01, 0xC3] - addop 'vmxoff', [0x0F, 0x01, 0xC4] - addop 'vmread', [0x0F, 0x78], :mrm - addop 'vmwrite', [0x0F, 0x79], :mrm - addop('vmclear', [0x0F, 0xC7, 6<<3], :modrmA) { |o| o.props[:argsz] = 64 ; o.props[:needpfx] = 0x66 } - addop('vmxon', [0x0F, 0xC7, 6<<3], :modrmA) { |o| o.props[:argsz] = 64 ; o.props[:needpfx] = 0xF3 } - addop('vmptrld', [0x0F, 0xC7, 6<<3], :modrmA) { |o| o.props[:argsz] = 64 } - addop('vmptrrst', [0x0F, 0xC7, 7<<3], :modrmA) { |o| o.props[:argsz] = 64 } - addop('invept', [0x0F, 0x38, 0x80], :mrmA) { |o| o.props[:needpfx] = 0x66 } - addop('invvpid', [0x0F, 0x38, 0x81], :mrmA) { |o| o.props[:needpfx] = 0x66 } - - addop 'getsec', [0x0F, 0x37] - - addop 'xgetbv', [0x0F, 0x01, 0xD0] - addop 'xsetbv', [0x0F, 0x01, 0xD1] - addop 'rdtscp', [0x0F, 0x01, 0xF9] - addop 'xrstor', [0x0F, 0xAE, 5<<3], :modrmA - addop 'xsave', [0x0F, 0xAE, 4<<3], :modrmA - end - - def init_sse41_only - init_cpu_constants - - addop('blendpd', [0x0F, 0x3A, 0x0D], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('blendps', [0x0F, 0x3A, 0x0C], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('blendvpd', [0x0F, 0x38, 0x15], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('blendvps', [0x0F, 0x38, 0x14], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('dppd', [0x0F, 0x3A, 0x41], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop('dpps', [0x0F, 0x3A, 0x40], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop('extractps',[0x0F, 0x3A, 0x17], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop('insertps', [0x0F, 0x3A, 0x21], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop('movntdqa', [0x0F, 0x38, 0x2A], :mrmxmm, :modrmA) { |o| o.props[:needpfx] = 0x66 } - addop('mpsadbw', [0x0F, 0x3A, 0x42], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop('packusdw', [0x0F, 0x38, 0x2B], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pblendvb', [0x0F, 0x38, 0x10], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pblendw', [0x0F, 0x3A, 0x1E], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop('pcmpeqq', [0x0F, 0x38, 0x29], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pextrb', [0x0F, 0x3A, 0x14], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 8 } - addop('pextrw', [0x0F, 0x3A, 0x15], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 16 } - addop('pextrd', [0x0F, 0x3A, 0x16], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 32 } - addop('pinsrb', [0x0F, 0x3A, 0x20], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 8 } - addop('pinsrw', [0x0F, 0x3A, 0x21], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 16 } - addop('pinsrd', [0x0F, 0x3A, 0x22], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 32 } - addop('phminposuw', [0x0F, 0x38, 0x41], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pminsb', [0x0F, 0x38, 0x38], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pminsd', [0x0F, 0x38, 0x39], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pminuw', [0x0F, 0x38, 0x3A], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pminud', [0x0F, 0x38, 0x3B], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmaxsb', [0x0F, 0x38, 0x3C], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmaxsd', [0x0F, 0x38, 0x3D], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmaxuw', [0x0F, 0x38, 0x3E], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmaxud', [0x0F, 0x38, 0x3F], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - - addop('pmovsxbw', [0x0F, 0x38, 0x20], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmovsxbd', [0x0F, 0x38, 0x21], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmovsxbq', [0x0F, 0x38, 0x22], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmovsxwd', [0x0F, 0x38, 0x23], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmovsxwq', [0x0F, 0x38, 0x24], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmovsxdq', [0x0F, 0x38, 0x25], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmovzxbw', [0x0F, 0x38, 0x30], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmovzxbd', [0x0F, 0x38, 0x31], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmovzxbq', [0x0F, 0x38, 0x32], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmovzxwd', [0x0F, 0x38, 0x33], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmovzxwq', [0x0F, 0x38, 0x34], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmovzxdq', [0x0F, 0x38, 0x35], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - - addop('pmuldq', [0x0F, 0x38, 0x28], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('pmulld', [0x0F, 0x38, 0x40], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('ptest', [0x0F, 0x38, 0x17], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('roundps', [0x0F, 0x3A, 0x08], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop('roundpd', [0x0F, 0x3A, 0x09], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop('roundss', [0x0F, 0x3A, 0x0A], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - addop('roundsd', [0x0F, 0x3A, 0x0B], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } - end - - def init_sse42_only - init_cpu_constants - - addop('crc32', [0x0F, 0x38, 0xF0], :mrmw) { |o| o.props[:needpfx] = 0xF2 } - addop('pcmpestrm', [0x0F, 0x3A, 0x60], :mrmxmm, :i8) { |o| o.props[:needpfx] = 0x66 } - addop('pcmpestri', [0x0F, 0x3A, 0x61], :mrmxmm, :i8) { |o| o.props[:needpfx] = 0x66 } - addop('pcmpistrm', [0x0F, 0x3A, 0x62], :mrmxmm, :i8) { |o| o.props[:needpfx] = 0x66 } - addop('pcmpistri', [0x0F, 0x3A, 0x63], :mrmxmm, :i8) { |o| o.props[:needpfx] = 0x66 } - addop('pcmpgtq', [0x0F, 0x38, 0x37], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } - addop('popcnt', [0x0F, 0xB8], :mrm) { |o| o.props[:needpfx] = 0xF3 } - end - - def init_avx_only - init_cpu_constants - - add128 = {} - add256 = {} - %w[movss movsd movlhps movhpd movhlps - cvtsi2ss cvtsi2sd sqrtss sqrtsd rsqrtss rcpss - addss addsd mulss mulsd cvtss2sd cvtsd2ss subss subsd - minss minsd divss divsd maxss maxsd - punpcklb punpcklw punpckld packsswb pcmpgtb pcmpgtw pcmpgtd packuswb - punpckhb punpckhw punpckhd packssdw punpcklq punpckhq - pcmpeqb pcmpeqw pcmpeqd ldmxcsr stmxcsr - cmpss cmpsd paddq pmullw psubusb psubusw pminub - pand paddusb paddusw pmaxub pandn pavgb pavgw - pmulhuw pmulhw psubsb psubsw pminsw por paddsb paddsw pmaxsw pxor - pmuludq pmaddwd psadbw - psubb psubw psubd psubq paddb paddw paddd - phaddw phaddsw phaddd phsubw phsubsw phsubd - pmaddubsw palignr pshufb pmulhrsw psignb psignw psignd - dppd insertps mpsadbw packusdw pblendw pcmpeqq - pinsrb pinsrw pinsrd pinsrq - pmaxsb pmaxsd pmaxud pmaxuw pminsb pminsd pminud pminuw - pmuldq pmulld roundsd roundss pcmpgtq - aesdec aesdeclast aesenc aesenclast - pclmulqdq punpcklbw punpcklwd punpckldq punpckhbw punpckhwd - punpckhdq punpcklqdq punpckhqdq].each { |n| add128[n] = true } - - %w[movups movupd movddup movsldup - unpcklps unpcklpd unpckhps unpckhpd - movaps movshdup movapd movntps movntpd movmskps movmskpd - sqrtps sqrtpd rsqrtps rcpps andps andpd andnps andnpd - orps orpd xorps xorpd addps addpd mulps mulpd - cvtps2pd cvtpd2ps cvtdq2ps cvtps2dq cvttps2dq - subps subpd minps minpd divps divpd maxps maxpd - movdqa movdqu haddpd haddps hsubpd hsubps - cmpps cmppd shufps shufpd addsubpd addsubps - cvtpd2dq cvttpd2dq cvtdq2pd movntdq lddqu - blendps blendpd blendvps blendvpd dpps ptest - roundpd roundps].each { |n| add128[n] = add256[n] = true } - - varg = Hash.new(1) - %w[pabsb pabsw pabsd pmovmskb pshufd pshufhw pshuflw movntdqa - pmovsxbw pmovsxbd pmovsxbq pmovsxwd pmovsxwq pmovsxdq - pmovzxbw pmovzxbd pmovzxbq pmovzxwd pmovzxwq pmovzxdq - aesimc aeskeygenassist lddqu maskmovdqu movapd movaps - pcmpestri pcmpestrm pcmpistri pcmpistrm phminposuw - cvtpd2dq cvttpd2dq cvtdq2pd cvtps2pd cvtpd2ps cvtdq2ps cvtps2dq - cvttps2dq movd movq movddup movdqa movdqu movmskps movmskpd - movntdq movntps movntpd movshdup movsldup movups movupd - pextrb pextrw pextrd pextrq ptest rcpps roundps roundpd - extractps sqrtps sqrtpd comiss comisd ucomiss ucomisd - cvttss2si cvttsd2si cvtss2si cvtsd2si - ].each { |n| add128[n] = true ; varg[n] = nil } - - cvtarg128 = { :regmmx => :regxmm, :modrmmmx => :modrmxmm } - cvtarg256 = { :regmmx => :regymm, :modrmmmx => :modrmymm, - :regxmm => :regymm, :modrmxmm => :modrmymm } - - # autopromote old sseX opcodes - @opcode_list.each { |o| - next if o.bin[0] != 0x0F or not add128[o.name] # rep cmpsd / movsd - - mm = (o.bin[1] == 0x38 ? 0x0F38 : o.bin[1] == 0x3A ? 0x0F3A : 0x0F) - pp = o.props[:needpfx] - pp = 0x66 if o.props[:xmmx] - fpxlen = (mm == 0x0F ? 1 : 2) - - addop_vex('v' + o.name, [varg[o.name], 128, pp, mm], o.bin[fpxlen], nil, *o.args.map { |oa| cvtarg128[oa] || oa }) { |oo| - oo.bin += [o.bin[fpxlen+1]] if o.bin[fpxlen+1] - dbinlen = o.bin.length - oo.bin.length - o.fields.each { |k, v| oo.fields[cvtarg128[k] || k] = [v[0]-dbinlen, v[1]] } - o.props.each { |k, v| oo.props[k] = v if k != :xmmx and k != :needpfx } - } - - next if not add256[o.name] - addop_vex('v' + o.name, [varg[o.name], 256, pp, mm], o.bin[fpxlen], nil, *o.args.map { |oa| cvtarg256[oa] || oa }) { |oo| - oo.bin += [o.bin[fpxlen+1]] if o.bin[fpxlen+1] - dbinlen = o.bin.length - oo.bin.length - o.fields.each { |k, v| oo.fields[cvtarg256[k] || k] = [v[0]-dbinlen, v[1]] } - o.props.each { |k, v| oo.props[k] = v if k != :xmmx and k != :needpfx } - } - } - - # sse promotion, special cases - addop_vex 'vpblendvb', [1, 128, 0x66, 0x0F3A, 0], 0x4C, :mrmxmm, :i4xmm - addop_vex 'vpsllw', [1, 128, 0x66, 0x0F], 0xF1, :mrmxmm - addop_vex('vpsllw', [0, 128, 0x66, 0x0F], 0x71, 6, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } - addop_vex 'vpslld', [1, 128, 0x66, 0x0F], 0xF2, :mrmxmm - addop_vex('vpslld', [0, 128, 0x66, 0x0F], 0x72, 6, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } - addop_vex 'vpsllq', [1, 128, 0x66, 0x0F], 0xF3, :mrmxmm - addop_vex('vpsllq', [0, 128, 0x66, 0x0F], 0x73, 6, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } - addop_vex('vpslldq',[0, 128, 0x66, 0x0F], 0x73, 7, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } - addop_vex 'vpsraw', [1, 128, 0x66, 0x0F], 0xE1, :mrmxmm - addop_vex('vpsraw', [0, 128, 0x66, 0x0F], 0x71, 4, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } - addop_vex 'vpsrad', [1, 128, 0x66, 0x0F], 0xE2, :mrmxmm - addop_vex('vpsrad', [0, 128, 0x66, 0x0F], 0x72, 4, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } - addop_vex 'vpsrlw', [1, 128, 0x66, 0x0F], 0xD1, :mrmxmm - addop_vex('vpsrlw', [0, 128, 0x66, 0x0F], 0x71, 2, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } - addop_vex 'vpsrld', [1, 128, 0x66, 0x0F], 0xD2, :mrmxmm - addop_vex('vpsrld', [0, 128, 0x66, 0x0F], 0x72, 2, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } - addop_vex 'vpsrlq', [1, 128, 0x66, 0x0F], 0xD3, :mrmxmm - addop_vex('vpsrlq', [0, 128, 0x66, 0x0F], 0x73, 2, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } - addop_vex('vpsrldq',[0, 128, 0x66, 0x0F], 0x73, 3, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } - - # dst==mem => no vreg - addop_vex 'vmovhps', [1, 128, nil, 0x0F], 0x16, :mrmxmm, :modrmA - addop_vex('vmovhps', [nil, 128, nil, 0x0F], 0x17, :mrmxmm, :modrmA) { |o| o.args.reverse! } - addop_vex 'vmovlpd', [1, 128, 0x66, 0x0F], 0x12, :mrmxmm, :modrmA - addop_vex('vmovlpd', [nil, 128, 0x66, 0x0F], 0x13, :mrmxmm, :modrmA) { |o| o.args.reverse! } - addop_vex 'vmovlps', [1, 128, nil, 0x0F], 0x12, :mrmxmm, :modrmA - addop_vex('vmovlps', [nil, 128, nil, 0x0F], 0x13, :mrmxmm, :modrmA) { |o| o.args.reverse! } - - addop_vex 'vbroadcastss', [nil, 128, 0x66, 0x0F38, 0], 0x18, :mrmxmm, :modrmA - addop_vex 'vbroadcastss', [nil, 256, 0x66, 0x0F38, 0], 0x18, :mrmymm, :modrmA - addop_vex 'vbroadcastsd', [nil, 256, 0x66, 0x0F38, 0], 0x19, :mrmymm, :modrmA - addop_vex 'vbroadcastf128', [nil, 256, 0x66, 0x0F38, 0], 0x1A, :mrmymm, :modrmA - - # general-purpose register operations - addop_vex 'andn', [1, :vexvreg, 128, nil, 0x0F38], 0xF2, :mrm - addop_vex 'bextr', [2, :vexvreg, 128, nil, 0x0F38], 0xF7, :mrm - addop_vex 'blsi', [0, :vexvreg, 128, nil, 0x0F38], 0xF3, 3 - addop_vex 'blsmsk', [0, :vexvreg, 128, nil, 0x0F38], 0xF3, 2 - addop_vex 'blsr', [0, :vexvreg, 128, nil, 0x0F38], 0xF3, 1 - addop_vex 'bzhi', [2, :vexvreg, 128, nil, 0x0F38], 0xF5, :mrm - addop('lzcnt', [0x0F, 0xBD], :mrm) { |o| o.props[:needpfx] = 0xF3 } - addop_vex 'mulx', [1, :vexvreg, 128, 0xF2, 0x0F38], 0xF6, :mrm - addop_vex 'pdep', [1, :vexvreg, 128, 0xF2, 0x0F38], 0xF5, :mrm - addop_vex 'pext', [1, :vexvreg, 128, 0xF3, 0x0F38], 0xF5, :mrm - addop_vex 'rorx', [nil, 128, 0xF2, 0x0F3A], 0xF0, :mrm, :u8 - addop_vex 'sarx', [2, :vexvreg, 128, 0xF3, 0x0F38], 0xF7, :mrm - addop_vex 'shrx', [2, :vexvreg, 128, 0xF2, 0x0F38], 0xF7, :mrm - addop_vex 'shlx', [2, :vexvreg, 128, 0x66, 0x0F38], 0xF7, :mrm - addop('tzcnt', [0x0F, 0xBC], :mrm) { |o| o.props[:needpfx] = 0xF3 } - addop('invpcid', [0x0F, 0x38, 0x82], :mrm) { |o| o.props[:needpfx] = 0x66 } - addop 'rdrand', [0x0F, 0xC7], 6, :modrmR - addop 'rdseed', [0x0F, 0xC7], 7, :modrmR - addop('adcx', [0x0F, 0x38, 0xF6], :mrm) { |o| o.props[:needpfx] = 0x66 } - addop('adox', [0x0F, 0x38, 0xF6], :mrm) { |o| o.props[:needpfx] = 0xF3 } - - # fp16 - addop_vex 'vcvtph2ps', [nil, 128, 0x66, 0x0F38, 0], 0x13, :mrmxmm - addop_vex 'vcvtph2ps', [nil, 256, 0x66, 0x0F38, 0], 0x13, :mrmymm - addop_vex('vcvtps2ph', [nil, 128, 0x66, 0x0F3A, 0], 0x1D, :mrmxmm, :u8) { |o| o.args.reverse! } - addop_vex('vcvtps2ph', [nil, 256, 0x66, 0x0F3A, 0], 0x1D, :mrmymm, :u8) { |o| o.args.reverse! } - - # TSE - addop 'xabort', [0xC6, 0xF8], nil, :i8 # may :stopexec - addop 'xbegin', [0xC7, 0xF8], nil, :i # may :setip: xabortreturns to $_(xbegin) + off - addop 'xend', [0x0F, 0x01, 0xD5] - addop 'xtest', [0x0F, 0x01, 0xD6] - - # SMAP - addop 'clac', [0x0F, 0x01, 0xCA] - addop 'stac', [0x0F, 0x01, 0xCB] - end - - def init_avx2_only - init_cpu_constants - - add256 = {} - %w[packsswb pcmpgtb pcmpgtw pcmpgtd packuswb packssdw - pcmpeqb pcmpeqw pcmpeqd paddq pmullw psubusb psubusw - pminub pand paddusb paddusw pmaxub pandn pavgb pavgw - pmulhuw pmulhw psubsb psubsw pminsw por paddsb paddsw - pmaxsw pxor pmuludq pmaddwd psadbw - psubb psubw psubd psubq paddb paddw paddd - phaddw phaddsw phaddd phsubw phsubsw phsubd - pmaddubsw palignr pshufb pmulhrsw psignb psignw psignd - mpsadbw packusdw pblendw pcmpeqq - pmaxsb pmaxsd pmaxud pmaxuw pminsb pminsd pminud pminuw - pmuldq pmulld pcmpgtq punpcklbw punpcklwd punpckldq - punpckhbw punpckhwd punpckhdq punpcklqdq punpckhqdq - ].each { |n| add256[n] = true } - - varg = Hash.new(1) - %w[pabsb pabsw pabsd pmovmskb pshufd pshufhw pshuflw movntdqa - pmovsxbw pmovsxbd pmovsxbq pmovsxwd pmovsxwq pmovsxdq - pmovzxbw pmovzxbd pmovzxbq pmovzxwd pmovzxwq pmovzxdq - maskmovdqu].each { |n| add256[n] = true ; varg[n] = nil } - - cvtarg256 = { :regmmx => :regymm, :modrmmmx => :modrmymm, - :regxmm => :regymm, :modrmxmm => :modrmymm } - - # autopromote old sseX opcodes - @opcode_list.each { |o| - next if o.bin[0] != 0x0F or not add256[o.name] - - mm = (o.bin[1] == 0x38 ? 0x0F38 : o.bin[1] == 0x3A ? 0x0F3A : 0x0F) - pp = o.props[:needpfx] - pp = 0x66 if o.props[:xmmx] - fpxlen = (mm == 0x0F ? 1 : 2) - - addop_vex('v' + o.name, [varg[o.name], 256, pp, mm], o.bin[fpxlen], nil, *o.args.map { |oa| cvtarg256[oa] || oa }) { |oo| - oo.bin += [o.bin[fpxlen+1]] if o.bin[fpxlen+1] - dbinlen = o.bin.length - oo.bin.length - o.fields.each { |k, v| oo.fields[cvtarg256[k] || k] = [v[0]-dbinlen, v[1]] } - o.props.each { |k, v| oo.props[k] = v if k != :xmmx and k != :needpfx } - } - } - - # promote special cases - addop_vex 'vpblendvb', [1, 256, 0x66, 0x0F3A, 0], 0x4C, :mrmymm, :i4ymm - addop_vex 'vpsllw', [1, 256, 0x66, 0x0F], 0xF1, :mrmymm - addop_vex('vpsllw', [0, 256, 0x66, 0x0F], 0x71, 6, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } - addop_vex 'vpslld', [1, 256, 0x66, 0x0F], 0xF2, :mrmymm - addop_vex('vpslld', [0, 256, 0x66, 0x0F], 0x72, 6, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } - addop_vex 'vpsllq', [1, 256, 0x66, 0x0F], 0xF3, :mrmymm - addop_vex('vpsllq', [0, 256, 0x66, 0x0F], 0x73, 6, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } - addop_vex('vpslldq',[0, 256, 0x66, 0x0F], 0x73, 7, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } - addop_vex 'vpsraw', [1, 256, 0x66, 0x0F], 0xE1, :mrmymm - addop_vex('vpsraw', [0, 256, 0x66, 0x0F], 0x71, 4, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } - addop_vex 'vpsrad', [1, 256, 0x66, 0x0F], 0xE2, :mrmymm - addop_vex('vpsrad', [0, 256, 0x66, 0x0F], 0x72, 4, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } - addop_vex 'vpsrlw', [1, 256, 0x66, 0x0F], 0xD1, :mrmymm - addop_vex('vpsrlw', [0, 256, 0x66, 0x0F], 0x71, 2, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } - addop_vex 'vpsrld', [1, 256, 0x66, 0x0F], 0xD2, :mrmymm - addop_vex('vpsrld', [0, 256, 0x66, 0x0F], 0x72, 2, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } - addop_vex 'vpsrlq', [1, 256, 0x66, 0x0F], 0xD3, :mrmymm - addop_vex('vpsrlq', [0, 256, 0x66, 0x0F], 0x73, 2, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } - addop_vex('vpsrldq',[0, 256, 0x66, 0x0F], 0x73, 3, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } - - addop_vex 'vbroadcastss', [nil, 128, 0x66, 0x0F38, 0], 0x18, :mrmxmm, :modrmR - addop_vex 'vbroadcastss', [nil, 256, 0x66, 0x0F38, 0], 0x18, :mrmymm, :modrmR - addop_vex 'vbroadcastsd', [nil, 256, 0x66, 0x0F38, 0], 0x19, :mrmymm, :modrmR - addop_vex 'vbroadcasti128', [nil, 256, 0x66, 0x0F38, 0], 0x5A, :mrmymm, :modrmA - addop_vex 'vpblendd', [1, 128, 0x66, 0x0F3A, 0], 0x02, :mrmxmm, :u8 - addop_vex 'vpblendd', [1, 256, 0x66, 0x0F3A, 0], 0x02, :mrmymm, :u8 - addop_vex 'vpbroadcastb', [nil, 128, 0x66, 0x0F38, 0], 0x78, :mrmxmm - addop_vex 'vpbroadcastb', [nil, 256, 0x66, 0x0F38, 0], 0x78, :mrmymm - addop_vex 'vpbroadcastw', [nil, 128, 0x66, 0x0F38, 0], 0x79, :mrmxmm - addop_vex 'vpbroadcastw', [nil, 256, 0x66, 0x0F38, 0], 0x79, :mrmymm - addop_vex 'vpbroadcastd', [nil, 128, 0x66, 0x0F38, 0], 0x58, :mrmxmm - addop_vex 'vpbroadcastd', [nil, 256, 0x66, 0x0F38, 0], 0x58, :mrmymm - addop_vex 'vpbroadcastq', [nil, 128, 0x66, 0x0F38, 0], 0x59, :mrmxmm - addop_vex 'vpbroadcastq', [nil, 256, 0x66, 0x0F38, 0], 0x59, :mrmymm - addop_vex 'vpermd', [1, 256, 0x66, 0x0F38, 0], 0x36, :mrmymm - addop_vex 'vpermpd', [nil, 256, 0x66, 0x0F3A, 1], 0x01, :mrmymm, :u8 - addop_vex 'vpermps', [1, 256, 0x66, 0x0F38, 0], 0x16, :mrmymm, :u8 - addop_vex 'vpermq', [nil, 256, 0x66, 0x0F3A, 1], 0x00, :mrmymm, :u8 - addop_vex 'vperm2i128', [1, 256, 0x66, 0x0F3A, 0], 0x46, :mrmymm, :u8 - addop_vex 'vextracti128', [nil, 256, 0x66, 0x0F3A, 0], 0x39, :mrmymm, :u8 - addop_vex 'vinserti128', [1, 256, 0x66, 0x0F3A, 0], 0x38, :mrmymm, :u8 - addop_vex 'vpmaskmovd', [1, 128, 0x66, 0x0F38, 0], 0x8C, :mrmxmm, :modrmA - addop_vex 'vpmaskmovd', [1, 256, 0x66, 0x0F38, 0], 0x8C, :mrmymm, :modrmA - addop_vex 'vpmaskmovq', [1, 128, 0x66, 0x0F38, 1], 0x8C, :mrmxmm, :modrmA - addop_vex 'vpmaskmovq', [1, 256, 0x66, 0x0F38, 1], 0x8C, :mrmymm, :modrmA - addop_vex('vpmaskmovd', [1, 128, 0x66, 0x0F38, 0], 0x8E, :mrmxmm, :modrmA) { |o| o.args.reverse! } - addop_vex('vpmaskmovd', [1, 256, 0x66, 0x0F38, 0], 0x8E, :mrmymm, :modrmA) { |o| o.args.reverse! } - addop_vex('vpmaskmovq', [1, 128, 0x66, 0x0F38, 1], 0x8E, :mrmxmm, :modrmA) { |o| o.args.reverse! } - addop_vex('vpmaskmovq', [1, 256, 0x66, 0x0F38, 1], 0x8E, :mrmymm, :modrmA) { |o| o.args.reverse! } - addop_vex 'vpsllvd', [1, 128, 0x66, 0x0F38, 0], 0x47, :mrmxmm - addop_vex 'vpsllvq', [1, 128, 0x66, 0x0F38, 1], 0x47, :mrmxmm - addop_vex 'vpsllvd', [1, 256, 0x66, 0x0F38, 0], 0x47, :mrmymm - addop_vex 'vpsllvq', [1, 256, 0x66, 0x0F38, 1], 0x47, :mrmymm - addop_vex 'vpsravd', [1, 128, 0x66, 0x0F38, 0], 0x46, :mrmxmm - addop_vex 'vpsravd', [1, 256, 0x66, 0x0F38, 0], 0x46, :mrmymm - addop_vex 'vpsrlvd', [1, 128, 0x66, 0x0F38, 0], 0x45, :mrmxmm - addop_vex 'vpsrlvq', [1, 128, 0x66, 0x0F38, 1], 0x45, :mrmxmm - addop_vex 'vpsrlvd', [1, 256, 0x66, 0x0F38, 0], 0x45, :mrmymm - addop_vex 'vpsrlvq', [1, 256, 0x66, 0x0F38, 1], 0x45, :mrmymm - - addop_vex('vpgatherdd', [2, 128, 0x66, 0x0F38, 0], 0x90, :mrmxmm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 128 } - addop_vex('vpgatherdd', [2, 256, 0x66, 0x0F38, 0], 0x90, :mrmymm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 256 } - addop_vex('vpgatherdq', [2, 128, 0x66, 0x0F38, 1], 0x90, :mrmxmm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 128 } - addop_vex('vpgatherdq', [2, 256, 0x66, 0x0F38, 1], 0x90, :mrmymm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 256 } - addop_vex('vpgatherqd', [2, 128, 0x66, 0x0F38, 0], 0x91, :mrmxmm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 128 } - addop_vex('vpgatherqd', [2, 256, 0x66, 0x0F38, 0], 0x91, :mrmymm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 256 } - addop_vex('vpgatherqq', [2, 128, 0x66, 0x0F38, 1], 0x91, :mrmxmm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 128 } - addop_vex('vpgatherqq', [2, 256, 0x66, 0x0F38, 1], 0x91, :mrmymm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 256 } - addop_vex('vgatherdps', [2, 128, 0x66, 0x0F38, 0], 0x92, :mrmxmm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 128 } - addop_vex('vgatherdps', [2, 256, 0x66, 0x0F38, 0], 0x92, :mrmymm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 256 } - addop_vex('vgatherdpd', [2, 128, 0x66, 0x0F38, 1], 0x92, :mrmxmm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 128 } - addop_vex('vgatherdpd', [2, 256, 0x66, 0x0F38, 1], 0x92, :mrmymm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 256 } - addop_vex('vgatherqps', [2, 128, 0x66, 0x0F38, 0], 0x93, :mrmxmm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 128 } - addop_vex('vgatherqps', [2, 256, 0x66, 0x0F38, 0], 0x93, :mrmymm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 256 } - addop_vex('vgatherqpd', [2, 128, 0x66, 0x0F38, 1], 0x93, :mrmxmm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 128 } - addop_vex('vgatherqpd', [2, 256, 0x66, 0x0F38, 1], 0x93, :mrmymm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 256 } - end - - def init_fma_only - init_cpu_constants - - [['vfmaddsub', 'p', 0x86], - ['vfmsubadd', 'p', 0x87], - ['vfmadd', 'p', 0x88], - ['vfmadd', 's', 0x89], - ['vfmsub', 'p', 0x8A], - ['vfmsub', 's', 0x8B], - ['vfnmadd', 'p', 0x8C], - ['vfnmadd', 's', 0x8D], - ['vfnmsub', 'p', 0x8E], - ['vfnmsub', 's', 0x8F]].each { |n1, n2, bin| - addop_vex n1 + '132' + n2 + 's', [1, 128, 0x66, 0x0F38, 0], bin | 0x10, :mrmxmm - addop_vex n1 + '132' + n2 + 's', [1, 256, 0x66, 0x0F38, 0], bin | 0x10, :mrmymm - addop_vex n1 + '132' + n2 + 'd', [1, 128, 0x66, 0x0F38, 1], bin | 0x10, :mrmxmm - addop_vex n1 + '132' + n2 + 'd', [1, 256, 0x66, 0x0F38, 1], bin | 0x10, :mrmymm - addop_vex n1 + '213' + n2 + 's', [1, 128, 0x66, 0x0F38, 0], bin | 0x20, :mrmxmm - addop_vex n1 + '213' + n2 + 's', [1, 256, 0x66, 0x0F38, 0], bin | 0x20, :mrmymm - addop_vex n1 + '213' + n2 + 'd', [1, 128, 0x66, 0x0F38, 1], bin | 0x20, :mrmxmm - addop_vex n1 + '213' + n2 + 'd', [1, 256, 0x66, 0x0F38, 1], bin | 0x20, :mrmymm - addop_vex n1 + '231' + n2 + 's', [1, 128, 0x66, 0x0F38, 0], bin | 0x30, :mrmxmm - addop_vex n1 + '231' + n2 + 's', [1, 256, 0x66, 0x0F38, 0], bin | 0x30, :mrmymm - addop_vex n1 + '231' + n2 + 'd', [1, 128, 0x66, 0x0F38, 1], bin | 0x30, :mrmxmm - addop_vex n1 + '231' + n2 + 'd', [1, 256, 0x66, 0x0F38, 1], bin | 0x30, :mrmymm - - # pseudo-opcodes aliases (swap arg0/arg1) - addop_vex(n1 + '312' + n2 + 's', [1, 128, 0x66, 0x0F38, 0], bin | 0x10, :mrmxmm) { |o| o.args[0, 2] = o.args[0, 2].reverse } - addop_vex(n1 + '312' + n2 + 's', [1, 256, 0x66, 0x0F38, 0], bin | 0x10, :mrmymm) { |o| o.args[0, 2] = o.args[0, 2].reverse } - addop_vex(n1 + '312' + n2 + 'd', [1, 128, 0x66, 0x0F38, 1], bin | 0x10, :mrmxmm) { |o| o.args[0, 2] = o.args[0, 2].reverse } - addop_vex(n1 + '312' + n2 + 'd', [1, 256, 0x66, 0x0F38, 1], bin | 0x10, :mrmymm) { |o| o.args[0, 2] = o.args[0, 2].reverse } - addop_vex(n1 + '123' + n2 + 's', [1, 128, 0x66, 0x0F38, 0], bin | 0x20, :mrmxmm) { |o| o.args[0, 2] = o.args[0, 2].reverse } - addop_vex(n1 + '123' + n2 + 's', [1, 256, 0x66, 0x0F38, 0], bin | 0x20, :mrmymm) { |o| o.args[0, 2] = o.args[0, 2].reverse } - addop_vex(n1 + '123' + n2 + 'd', [1, 128, 0x66, 0x0F38, 1], bin | 0x20, :mrmxmm) { |o| o.args[0, 2] = o.args[0, 2].reverse } - addop_vex(n1 + '123' + n2 + 'd', [1, 256, 0x66, 0x0F38, 1], bin | 0x20, :mrmymm) { |o| o.args[0, 2] = o.args[0, 2].reverse } - addop_vex(n1 + '321' + n2 + 's', [1, 128, 0x66, 0x0F38, 0], bin | 0x30, :mrmxmm) { |o| o.args[0, 2] = o.args[0, 2].reverse } - addop_vex(n1 + '321' + n2 + 's', [1, 256, 0x66, 0x0F38, 0], bin | 0x30, :mrmymm) { |o| o.args[0, 2] = o.args[0, 2].reverse } - addop_vex(n1 + '321' + n2 + 'd', [1, 128, 0x66, 0x0F38, 1], bin | 0x30, :mrmxmm) { |o| o.args[0, 2] = o.args[0, 2].reverse } - addop_vex(n1 + '321' + n2 + 'd', [1, 256, 0x66, 0x0F38, 1], bin | 0x30, :mrmymm) { |o| o.args[0, 2] = o.args[0, 2].reverse } - } - end - - # - # CPU family dependencies - # - - def init_386_common - init_386_common_only - end - - def init_386 - init_386_common - init_386_only - end - - def init_387 - init_387_only - end - - def init_486 - init_386 - init_387 - init_486_only - end - - def init_pentium - init_486 - init_pentium_only - end - - def init_3dnow - init_pentium - init_3dnow_only - end - - def init_p6 - init_pentium - init_p6_only - end - - def init_sse - init_p6 - init_sse_only - end - - def init_sse2 - init_sse - init_sse2_only - end - - def init_sse3 - init_sse2 - init_sse3_only - end - - def init_ssse3 - init_sse3 - init_ssse3_only - end - - def init_sse41 - init_ssse3 - init_sse41_only - end - - def init_sse42 - init_sse41 - init_sse42_only - end - - def init_avx - init_sse42 - init_avx_only - end - - def init_avx2 - init_avx - init_fma_only - init_avx2_only - end - - def init_all - init_avx2 - init_3dnow_only - init_vmx_only - init_aesni_only - end - - alias init_latest init_all - - - # - # addop_* macros - # - - def addop_macro1(name, num, *props) - addop name, [(num << 3) | 4], nil, {:w => [0, 0]}, :reg_eax, :i, *props - addop(name, [num << 3], :mrmw, {:d => [0, 1]}) { |o| o.args.reverse! } - addop name, [0x80], num, {:w => [0, 0], :s => [0, 1]}, :i, *props - end - def addop_macro2(name, num) - addop name, [0x0F, 0xBA], (4 | num), :u8 - addop(name, [0x0F, 0xA3 | (num << 3)], :mrm) { |op| op.args.reverse! } - end - def addop_macro3(name, num) - addop name, [0xD0], num, {:w => [0, 0]}, :imm_val1 - addop name, [0xD2], num, {:w => [0, 0]}, :reg_cl - addop name, [0xC0], num, {:w => [0, 0]}, :u8 - end - - def addop_macrotttn(name, bin, hint, *props, &blk) - [%w{o}, %w{no}, %w{b nae c}, %w{nb ae nc}, - %w{z e}, %w{nz ne}, %w{be na}, %w{nbe a}, - %w{s}, %w{ns}, %w{p pe}, %w{np po}, - %w{l nge}, %w{nl ge}, %w{le ng}, %w{nle g}].each_with_index { |e, i| - b = bin.dup - if b[0] == 0x0F - b[1] |= i - else - b[0] |= i - end - - e.each { |k| addop(name + k, b.dup, hint, *props, &blk) } - } - end - - def addop_macrostr(name, bin, type) - # addop(name, bin.dup, {:w => [0, 0]}) { |o| o.props[type] = true } # TODO allow segment override - addop(name+'b', bin) { |o| o.props[:opsz] = 16 ; o.props[type] = true } - addop(name+'b', bin) { |o| o.props[:opsz] = 32 ; o.props[type] = true } - bin = bin.dup - bin[0] |= 1 - addop(name+'w', bin) { |o| o.props[:opsz] = 16 ; o.props[type] = true } - addop(name+'d', bin) { |o| o.props[:opsz] = 32 ; o.props[type] = true } - end - - def addop_macrofpu1(name, n) - addop(name, [0xD8, n<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 32 } - addop(name, [0xDC, n<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 64 } - addop(name, [0xD8, 0xC0|(n<<3)], :regfp, {:d => [0, 2]}) { |o| o.args.reverse! } - end - def addop_macrofpu2(name, n, n2=0) - addop(name, [0xDE|n2, n<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 16 } - addop(name, [0xDA|n2, n<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 32 } - end - def addop_macrofpu3(name, n) - addop_macrofpu2 name, n, 1 - addop(name, [0xDF, 0x28|(n<<3)], :modrmA, :regfp0) { |o| o.props[:argsz] = 64 } - end - - def addop_macrogg(ggrng, name, bin, *args, &blk) - ggoff = 1 - ggoff = 2 if bin[1] == 0x38 or bin[1] == 0x3A - ggrng.each { |gg| - bindup = bin.dup - bindup[ggoff] |= gg - sfx = %w(b w d q)[gg] - addop name+sfx, bindup, *args, &blk - } - end - - def addop_macrossps(name, bin, hint, *a) - addop name, bin.dup, hint, *a - addop(name.sub(/ps$/, 'ss'), bin.dup, hint, *a) { |o| o.props[:needpfx] = 0xF3 } - end - - def addop_macrosdpd(name, bin, hint, *a) - addop(name, bin.dup, hint, *a) { |o| o.props[:needpfx] = 0x66 } - addop(name.sub(/pd$/, 'sd'), bin.dup, hint, *a) { |o| o.props[:needpfx] = 0xF2 } - end - - # special ret (iret/retf), that still default to 32b mode in x64 - def addop_macroret(name, bin, *args) - addop(name + '.i32', bin.dup, nil, :stopexec, :setip, *args) { |o| o.props[:opsz] = 32 } - addop(name + '.i16', bin.dup, nil, :stopexec, :setip, *args) { |o| o.props[:opsz] = 16 } if name != 'sysret' - addop(name, bin.dup, nil, :stopexec, :setip, *args) { |o| o.props[:opsz] = @size } - end - - # add an AVX instruction needing a VEX prefix (c4h/c5h) - # the prefix is hardcoded - def addop_vex(name, vexspec, bin, *args) - argnr = vexspec.shift - argt = vexspec.shift if argnr and vexspec.first.kind_of?(::Symbol) - l = vexspec.shift - pfx = vexspec.shift - of = vexspec.shift - w = vexspec.shift - argt ||= (l == 128 ? :vexvxmm : :vexvymm) - - lpp = ((l >> 8) << 2) | [nil, 0x66, 0xF3, 0xF2].index(pfx) - mmmmm = [nil, 0x0F, 0x0F38, 0x0F3A].index(of) - - c4bin = [0xC4, mmmmm, lpp, bin] - c4bin[1] |= 1 << 7 if @size != 64 - c4bin[1] |= 1 << 6 if @size != 64 - c4bin[2] |= 1 << 7 if w == 1 - c4bin[2] |= 0xF << 3 if not argnr - - addop(name, c4bin, *args) { |o| - o.args.insert(argnr, argt) if argnr - - o.fields[:vex_r] = [1, 7] if @size == 64 - o.fields[:vex_x] = [1, 6] if @size == 64 - o.fields[:vex_b] = [1, 5] - o.fields[:vex_w] = [2, 7] if not w - o.fields[:vex_vvvv] = [2, 3] if argnr - - yield o if block_given? - } - - return if w == 1 or mmmmm != 1 - - c5bin = [0xC5, lpp, bin] - c5bin[1] |= 1 << 7 if @size != 64 - c5bin[1] |= 0xF << 3 if not argnr - - addop(name, c5bin, *args) { |o| - o.args.insert(argnr, argt) if argnr - - o.fields[:vex_r] = [1, 7] if @size == 64 - o.fields[:vex_vvvv] = [1, 3] if argnr - - yield o if block_given? - } - end - - # helper function: creates a new Opcode based on the arguments, eventually - # yields it for further customisation, and append it to the instruction set - # is responsible of the creation of disambiguating opcodes if necessary (:s flag hardcoding) - def addop(name, bin, hint=nil, *argprops) - fields = (argprops.first.kind_of?(Hash) ? argprops.shift : {}) - op = Opcode.new name, bin - op.fields.replace fields - - case hint - when nil - - when :mrm, :mrmw, :mrmA - op.fields[:reg] = [bin.length, 3] - op.fields[:modrm] = [bin.length, 0] - op.fields[:w] = [bin.length - 1, 0] if hint == :mrmw - argprops.unshift :reg, :modrm - argprops << :modrmA if hint == :mrmA - op.bin << 0 - when :reg - op.fields[:reg] = [bin.length-1, 0] - argprops.unshift :reg - when :regfp - op.fields[:regfp] = [bin.length-1, 0] - argprops.unshift :regfp, :regfp0 - when :modrmA - op.fields[:modrm] = [bin.length-1, 0] - argprops << :modrm << :modrmA - - when Integer # mod/m, reg == opcode extension = hint - op.fields[:modrm] = [bin.length, 0] - op.bin << (hint << 3) - argprops.unshift :modrm - - when :mrmmmx - op.fields[:regmmx] = [bin.length, 3] - op.fields[:modrm] = [bin.length, 0] - bin << 0 - argprops.unshift :regmmx, :modrmmmx - when :mrmxmm - op.fields[:regxmm] = [bin.length, 3] - op.fields[:modrm] = [bin.length, 0] - bin << 0 - argprops.unshift :regxmm, :modrmxmm - when :mrmymm - op.fields[:regymm] = [bin.length, 3] - op.fields[:modrm] = [bin.length, 0] - bin << 0 - argprops.unshift :regymm, :modrmymm - else - raise SyntaxError, "invalid hint #{hint.inspect} for #{name}" - end - - argprops.each { |a| - op.props[a] = true if @valid_props[a] - op.args << a if @valid_args[a] - } - - yield op if block_given? - - if $DEBUG - argprops -= @valid_props.keys + @valid_args.keys - raise "Invalid opcode definition: #{name}: unknown #{argprops.inspect}" unless argprops.empty? - - argprops = (op.props.keys - @valid_props.keys) + (op.args - @valid_args.keys) + (op.fields.keys - @fields_mask.keys) - raise "Invalid opcode customisation: #{name}: #{argprops.inspect}" unless argprops.empty? - end - - addop_post(op) - end - - # this recursive method is in charge of Opcode duplication (eg to hardcode some flag) - def addop_post(op) - if df = op.fields.delete(:d) - # hardcode the bit - dop = op.dup - addop_post dop - - op.bin[df[0]] |= 1 << df[1] - op.args.reverse! - addop_post op - - return - elsif wf = op.fields.delete(:w) - # hardcode the bit - dop = op.dup - dop.props[:argsz] = 8 - # 64-bit w=0 s=1 => UD - dop.fields.delete(:s) if @size == 64 - addop_post dop - - op.bin[wf[0]] |= 1 << wf[1] - addop_post op - - return - elsif sf = op.fields.delete(:s) - # add explicit choice versions, with lower precedence (so that disassembling will return the general version) - # eg "jmp", "jmp.i8", "jmp.i" - # also hardcode the bit - op32 = op - addop_post op32 - - op8 = op.dup - op8.bin[sf[0]] |= 1 << sf[1] - op8.args.map! { |arg| arg == :i ? :i8 : arg } - addop_post op8 - - op32 = op32.dup - op32.name << '.i' - addop_post op32 - - op8 = op8.dup - op8.name << '.i8' - addop_post op8 - - return - elsif op.args.first == :regfp0 - dop = op.dup - dop.args.delete :regfp0 - addop_post dop - end - - if op.props[:needpfx] - @opcode_list.unshift op - else - @opcode_list << op - end - - if (op.args == [:i] or op.args == [:farptr] or op.name == 'ret') and op.name !~ /\.i/ - # define opsz-override version for ambiguous opcodes - op16 = op.dup - op16.name << '.i16' - op16.props[:opsz] = 16 - @opcode_list << op16 - op32 = op.dup - op32.name << '.i32' - op32.props[:opsz] = 32 - @opcode_list << op32 - elsif op.props[:strop] or op.props[:stropz] or op.args.include? :mrm_imm or - op.args.include? :modrm or op.name =~ /loop|xlat/ - # define adsz-override version for ambiguous opcodes (TODO allow movsd edi / movsd di syntax) - # XXX loop pfx 67 = eip+cx, 66 = ip+ecx - op16 = op.dup - op16.name << '.a16' - op16.props[:adsz] = 16 - @opcode_list << op16 - op32 = op.dup - op32.name << '.a32' - op32.props[:adsz] = 32 - @opcode_list << op32 - end - end + # undocumented opcodes + addop 'aam', [0xD4], nil, :u8 + addop 'aad', [0xD5], nil, :u8 + addop 'setalc',[0xD6] + addop 'salc', [0xD6] + addop 'icebp', [0xF1] + #addop 'loadall',[0x0F, 0x07] # conflict with syscall + addop 'ud0', [0x0F, 0xFF] # amd + addop 'ud2', [0x0F, 0xB9], :mrm + #addop 'umov', [0x0F, 0x10], :mrmw, {:d => [1, 1]} # conflicts with movups/movhlps + end + + def init_387_only + init_cpu_constants + + addop 'f2xm1', [0xD9, 0xF0] + addop 'fabs', [0xD9, 0xE1] + addop_macrofpu1 'fadd', 0 + addop 'faddp', [0xDE, 0xC0], :regfp + addop 'faddp', [0xDE, 0xC1] + addop('fbld', [0xDF, 4<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 80 } + addop('fbstp', [0xDF, 6<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 80 } + addop 'fchs', [0xD9, 0xE0], nil, :regfp0 + addop 'fnclex', [0xDB, 0xE2] + addop_macrofpu1 'fcom', 2 + addop_macrofpu1 'fcomp', 3 + addop 'fcompp',[0xDE, 0xD9] + addop 'fcomip',[0xDF, 0xF0], :regfp + addop 'fcos', [0xD9, 0xFF], nil, :regfp0 + addop 'fdecstp', [0xD9, 0xF6] + addop_macrofpu1 'fdiv', 6 + addop_macrofpu1 'fdivr', 7 + addop 'fdivp', [0xDE, 0xF8], :regfp + addop 'fdivp', [0xDE, 0xF9] + addop 'fdivrp',[0xDE, 0xF0], :regfp + addop 'fdivrp',[0xDE, 0xF1] + addop 'ffree', [0xDD, 0xC0], nil, {:regfp => [1, 0]}, :regfp + addop_macrofpu2 'fiadd', 0 + addop_macrofpu2 'fimul', 1 + addop_macrofpu2 'ficom', 2 + addop_macrofpu2 'ficomp',3 + addop_macrofpu2 'fisub', 4 + addop_macrofpu2 'fisubr',5 + addop_macrofpu2 'fidiv', 6 + addop_macrofpu2 'fidivr',7 + addop 'fincstp', [0xD9, 0xF7] + addop 'fninit', [0xDB, 0xE3] + addop_macrofpu2 'fist', 2, 1 + addop_macrofpu3 'fild', 0 + addop_macrofpu3 'fistp',3 + addop('fld', [0xD9, 0<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 32 } + addop('fld', [0xDD, 0<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 64 } + addop('fld', [0xDB, 5<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 80 } + addop 'fld', [0xD9, 0xC0], :regfp + + addop('fldcw', [0xD9, 5<<3], :modrmA) { |o| o.props[:argsz] = 16 } + addop 'fldenv', [0xD9, 4<<3], :modrmA + addop 'fld1', [0xD9, 0xE8] + addop 'fldl2t', [0xD9, 0xE9] + addop 'fldl2e', [0xD9, 0xEA] + addop 'fldpi', [0xD9, 0xEB] + addop 'fldlg2', [0xD9, 0xEC] + addop 'fldln2', [0xD9, 0xED] + addop 'fldz', [0xD9, 0xEE] + addop_macrofpu1 'fmul', 1 + addop 'fmulp', [0xDE, 0xC8], :regfp + addop 'fmulp', [0xDE, 0xC9] + addop 'fnop', [0xD9, 0xD0] + addop 'fpatan', [0xD9, 0xF3] + addop 'fprem', [0xD9, 0xF8] + addop 'fprem1', [0xD9, 0xF5] + addop 'fptan', [0xD9, 0xF2] + addop 'frndint',[0xD9, 0xFC] + addop 'frstor', [0xDD, 4<<3], :modrmA + addop 'fnsave', [0xDD, 6<<3], :modrmA + addop('fnstcw', [0xD9, 7<<3], :modrmA) { |o| o.props[:argsz] = 16 } + addop 'fnstenv',[0xD9, 6<<3], :modrmA + addop 'fnstsw', [0xDF, 0xE0] + addop('fnstsw', [0xDD, 7<<3], :modrmA) { |o| o.props[:argsz] = 16 } + addop 'fscale', [0xD9, 0xFD] + addop 'fsin', [0xD9, 0xFE] + addop 'fsincos',[0xD9, 0xFB] + addop 'fsqrt', [0xD9, 0xFA] + addop('fst', [0xD9, 2<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 32 } + addop('fst', [0xDD, 2<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 64 } + addop 'fst', [0xD9, 0xD0], :regfp + addop('fstp', [0xD9, 3<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 32 } + addop('fstp', [0xDD, 3<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 64 } + addop('fstp', [0xDB, 7<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 80 } + addop 'fstp', [0xDD, 0xD8], :regfp + addop_macrofpu1 'fsub', 4 + addop 'fsubp', [0xDE, 0xE8], :regfp + addop 'fsubp', [0xDE, 0xE9] + addop_macrofpu1 'fsubp', 5 + addop 'fsubrp', [0xDE, 0xE0], :regfp + addop 'fsubrp', [0xDE, 0xE1] + addop 'ftst', [0xD9, 0xE4] + addop 'fucom', [0xDD, 0xE0], :regfp + addop 'fucomp', [0xDD, 0xE8], :regfp + addop 'fucompp',[0xDA, 0xE9] + addop 'fucomi', [0xDB, 0xE8], :regfp + addop 'fxam', [0xD9, 0xE5] + addop 'fxch', [0xD9, 0xC8], :regfp + addop 'fxtract',[0xD9, 0xF4] + addop 'fyl2x', [0xD9, 0xF1] + addop 'fyl2xp1',[0xD9, 0xF9] + # fwait prefix + addop 'fclex', [0x9B, 0xDB, 0xE2] + addop 'finit', [0x9B, 0xDB, 0xE3] + addop 'fsave', [0x9B, 0xDD, 6<<3], :modrmA + addop('fstcw', [0x9B, 0xD9, 7<<3], :modrmA) { |o| o.props[:argsz] = 16 } + addop 'fstenv', [0x9B, 0xD9, 6<<3], :modrmA + addop 'fstsw', [0x9B, 0xDF, 0xE0] + addop('fstsw', [0x9B, 0xDD, 7<<3], :modrmA) { |o| o.props[:argsz] = 16 } + addop 'fwait', [0x9B] + end + + def init_486_only + init_cpu_constants + end + + def init_pentium_only + init_cpu_constants + + addop('cmpxchg8b', [0x0F, 0xC7], 1) { |o| o.props[:opsz] = 32 ; o.props[:argsz] = 64 } + # lock cmpxchg8b eax + #addop 'f00fbug', [0xF0, 0x0F, 0xC7, 0xC8] + + # mmx + addop 'emms', [0x0F, 0x77] + addop('movd', [0x0F, 0x6E], :mrmmmx, {:d => [1, 4]}) { |o| o.args = [:modrm, :regmmx] ; o.props[:opsz] = o.props[:argsz] = 32 } + addop('movq', [0x0F, 0x6F], :mrmmmx, {:d => [1, 4]}) { |o| o.props[:argsz] = 64 } + addop 'packssdw', [0x0F, 0x6B], :mrmmmx + addop 'packsswb', [0x0F, 0x63], :mrmmmx + addop 'packuswb', [0x0F, 0x67], :mrmmmx + addop_macrogg 0..2, 'padd', [0x0F, 0xFC], :mrmmmx + addop_macrogg 0..1, 'padds', [0x0F, 0xEC], :mrmmmx + addop_macrogg 0..1, 'paddus',[0x0F, 0xDC], :mrmmmx + addop 'pand', [0x0F, 0xDB], :mrmmmx + addop 'pandn', [0x0F, 0xDF], :mrmmmx + addop_macrogg 0..2, 'pcmpeq',[0x0F, 0x74], :mrmmmx + addop_macrogg 0..2, 'pcmpgt',[0x0F, 0x64], :mrmmmx + addop 'pmaddwd', [0x0F, 0xF5], :mrmmmx + addop 'pmulhuw', [0x0F, 0xE4], :mrmmmx + addop 'pmulhw',[0x0F, 0xE5], :mrmmmx + addop 'pmullw',[0x0F, 0xD5], :mrmmmx + addop 'por', [0x0F, 0xEB], :mrmmmx + [[1..3, 'psll', 3], [1..2, 'psra', 2], [1..3, 'psrl', 1]].each { |ggrng, name, val| + addop_macrogg ggrng, name, [0x0F, 0xC0 | (val << 4)], :mrmmmx + addop_macrogg ggrng, name, [0x0F, 0x70, 0xC0 | (val << 4)], nil, {:regmmx => [2, 0]}, :regmmx, :u8 + } + addop_macrogg 0..2, 'psub', [0x0F, 0xF8], :mrmmmx + addop_macrogg 0..1, 'psubs', [0x0F, 0xE8], :mrmmmx + addop_macrogg 0..1, 'psubus',[0x0F, 0xD8], :mrmmmx + addop_macrogg 1..3, 'punpckh', [0x0F, 0x68], :mrmmmx + addop_macrogg 1..3, 'punpckl', [0x0F, 0x60], :mrmmmx + addop 'pxor', [0x0F, 0xEF], :mrmmmx + end + + def init_p6_only + addop_macrotttn 'cmov', [0x0F, 0x40], :mrm + + %w{b e be u}.each_with_index { |tt, i| + addop 'fcmov' + tt, [0xDA, 0xC0 | (i << 3)], :regfp + addop 'fcmovn'+ tt, [0xDB, 0xC0 | (i << 3)], :regfp + } + addop 'fcomi', [0xDB, 0xF0], :regfp + addop('fxrstor', [0x0F, 0xAE, 1<<3], :modrmA) { |o| o.props[:argsz] = 512*8 } + addop('fxsave', [0x0F, 0xAE, 0<<3], :modrmA) { |o| o.props[:argsz] = 512*8 } + addop 'sysenter',[0x0F, 0x34] + addop 'sysexit', [0x0F, 0x35] + + addop 'syscall', [0x0F, 0x05] # AMD + addop_macroret 'sysret', [0x0F, 0x07] # AMD + end + + def init_3dnow_only + init_cpu_constants + + [['pavgusb', 0xBF], ['pfadd', 0x9E], ['pfsub', 0x9A], + ['pfsubr', 0xAA], ['pfacc', 0xAE], ['pfcmpge', 0x90], + ['pfcmpgt', 0xA0], ['fpcmpeq', 0xB0], ['pfmin', 0x94], + ['pfmax', 0xA4], ['pi2fd', 0x0D], ['pf2id', 0x1D], + ['pfrcp', 0x96], ['pfrsqrt', 0x97], ['pfmul', 0xB4], + ['pfrcpit1', 0xA6], ['pfrsqit1', 0xA7], ['pfrcpit2', 0xB6], + ['pmulhrw', 0xB7]].each { |str, bin| + addop str, [0x0F, 0x0F, bin], :mrmmmx + } + # 3dnow prefix fallback + addop '3dnow', [0x0F, 0x0F], :mrmmmx, :u8 + + addop 'femms', [0x0F, 0x0E] + addop 'prefetch', [0x0F, 0x0D, 0<<3], :modrmA + addop 'prefetchw', [0x0F, 0x0D, 1<<3], :modrmA + end + + def init_sse_only + init_cpu_constants + + addop_macrossps 'addps', [0x0F, 0x58], :mrmxmm + addop 'andnps', [0x0F, 0x55], :mrmxmm + addop 'andps', [0x0F, 0x54], :mrmxmm + addop_macrossps 'cmpps', [0x0F, 0xC2], :mrmxmm, :u8 + addop 'comiss', [0x0F, 0x2F], :mrmxmm + + addop('cvtpi2ps', [0x0F, 0x2A], :mrmxmm) { |o| o.args[o.args.index(:modrmxmm)] = :modrmmmx } + addop('cvtps2pi', [0x0F, 0x2D], :mrmmmx) { |o| o.args[o.args.index(:modrmmmx)] = :modrmxmm } + addop('cvtsi2ss', [0x0F, 0x2A], :mrmxmm) { |o| o.args[o.args.index(:modrmxmm)] = :modrm ; o.props[:needpfx] = 0xF3 } + addop('cvtss2si', [0x0F, 0x2D], :mrm) { |o| o.args[o.args.index(:modrm)] = :modrmxmm ; o.props[:needpfx] = 0xF3 } + addop('cvttps2pi',[0x0F, 0x2C], :mrmmmx) { |o| o.args[o.args.index(:modrmmmx)] = :modrmxmm } + addop('cvttss2si',[0x0F, 0x2C], :mrm) { |o| o.args[o.args.index(:modrm)] = :modrmxmm ; o.props[:needpfx] = 0xF3 } + + addop_macrossps 'divps', [0x0F, 0x5E], :mrmxmm + addop 'ldmxcsr', [0x0F, 0xAE, 2<<3], :modrmA + addop_macrossps 'maxps', [0x0F, 0x5F], :mrmxmm + addop_macrossps 'minps', [0x0F, 0x5D], :mrmxmm + addop 'movaps', [0x0F, 0x28], :mrmxmm, {:d => [1, 0]} + addop 'movhlps', [0x0F, 0x12], :mrmxmm, :modrmR + addop 'movlps', [0x0F, 0x12], :mrmxmm, {:d => [1, 0]}, :modrmA + addop 'movlhps', [0x0F, 0x16], :mrmxmm, :modrmR + addop 'movhps', [0x0F, 0x16], :mrmxmm, {:d => [1, 0]}, :modrmA + addop 'movmskps',[0x0F, 0x50, 0xC0], nil, {:reg => [2, 3], :regxmm => [2, 0]}, :regxmm, :reg + addop('movss', [0x0F, 0x10], :mrmxmm, {:d => [1, 0]}) { |o| o.props[:needpfx] = 0xF3 } + addop 'movups', [0x0F, 0x10], :mrmxmm, {:d => [1, 0]} + addop_macrossps 'mulps', [0x0F, 0x59], :mrmxmm + addop 'orps', [0x0F, 0x56], :mrmxmm + addop_macrossps 'rcpps', [0x0F, 0x53], :mrmxmm + addop_macrossps 'rsqrtps',[0x0F, 0x52], :mrmxmm + addop 'shufps', [0x0F, 0xC6], :mrmxmm, :u8 + addop_macrossps 'sqrtps', [0x0F, 0x51], :mrmxmm + addop 'stmxcsr', [0x0F, 0xAE, 3<<3], :modrmA + addop_macrossps 'subps', [0x0F, 0x5C], :mrmxmm + addop 'ucomiss', [0x0F, 0x2E], :mrmxmm + addop 'unpckhps',[0x0F, 0x15], :mrmxmm + addop 'unpcklps',[0x0F, 0x14], :mrmxmm + addop 'xorps', [0x0F, 0x57], :mrmxmm + + # integer instrs, mmx only + addop 'pavgb', [0x0F, 0xE0], :mrmmmx + addop 'pavgw', [0x0F, 0xE3], :mrmmmx + addop 'pextrw', [0x0F, 0xC5, 0xC0], nil, {:reg => [2, 3], :regmmx => [2, 0]}, :reg, :regmmx, :u8 + addop 'pinsrw', [0x0F, 0xC4, 0x00], nil, {:modrm => [2, 0], :regmmx => [2, 3]}, :modrm, :regmmx, :u8 + addop 'pmaxsw', [0x0F, 0xEE], :mrmmmx + addop 'pmaxub', [0x0F, 0xDE], :mrmmmx + addop 'pminsw', [0x0F, 0xEA], :mrmmmx + addop 'pminub', [0x0F, 0xDA], :mrmmmx + addop 'pmovmskb',[0x0F, 0xD7, 0xC0], nil, {:reg => [2, 3], :regmmx => [2, 0]}, :reg, :regmmx + addop 'psadbw', [0x0F, 0xF6], :mrmmmx + addop 'pshufw', [0x0F, 0x70], :mrmmmx, :u8 + + addop 'maskmovq',[0x0F, 0xF7], :mrmmmx, :modrmR + addop('movntq', [0x0F, 0xE7], :mrmmmx) { |o| o.args.reverse! } + addop('movntps', [0x0F, 0x2B], :mrmxmm) { |o| o.args.reverse! } + addop 'prefetcht0', [0x0F, 0x18, 1<<3], :modrmA + addop 'prefetcht1', [0x0F, 0x18, 2<<3], :modrmA + addop 'prefetcht2', [0x0F, 0x18, 3<<3], :modrmA + addop 'prefetchnta',[0x0F, 0x18, 0<<3], :modrmA + addop 'sfence', [0x0F, 0xAE, 0xF8] + + # the whole row of prefetch is actually nops + addop 'nop', [0x0F, 0x1C], :mrmw, :d => [1, 1] # incl. official version = 0f1f mrm + addop 'nop_8', [0x0F, 0x18], :mrmw, :d => [1, 1] + addop 'nop_d', [0x0F, 0x0D], :mrm + addop 'nop', [0x0F, 0x1C], 0 # official asm syntax is 'nop [eax]' + end + + def init_sse2_only + init_cpu_constants + + @opcode_list.each { |o| o.props[:xmmx] = true if o.fields[:regmmx] and o.name !~ /^(?:mov(?:nt)?q|pshufw|cvt.*)$/ } + + # mirror of the init_sse part + addop_macrosdpd 'addpd', [0x0F, 0x58], :mrmxmm + addop('andnpd', [0x0F, 0x55], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('andpd', [0x0F, 0x54], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop_macrosdpd 'cmppd', [0x0F, 0xC2], :mrmxmm, :u8 + addop('comisd', [0x0F, 0x2F], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + + addop('cvtpi2pd', [0x0F, 0x2A], :mrmxmm) { |o| o.args[o.args.index(:modrmxmm)] = :modrmmmx ; o.props[:needpfx] = 0x66 } + addop('cvtpd2pi', [0x0F, 0x2D], :mrmmmx) { |o| o.args[o.args.index(:modrmmmx)] = :modrmxmm ; o.props[:needpfx] = 0x66 } + addop('cvtsi2sd', [0x0F, 0x2A], :mrmxmm) { |o| o.args[o.args.index(:modrmxmm)] = :modrm ; o.props[:needpfx] = 0xF2 } + addop('cvtsd2si', [0x0F, 0x2D], :mrm ) { |o| o.args[o.args.index(:modrm )] = :modrmxmm ; o.props[:needpfx] = 0xF2 } + addop('cvttpd2pi',[0x0F, 0x2C], :mrmmmx) { |o| o.args[o.args.index(:modrmmmx)] = :modrmxmm ; o.props[:needpfx] = 0x66 } + addop('cvttsd2si',[0x0F, 0x2C], :mrm ) { |o| o.args[o.args.index(:modrm )] = :modrmxmm ; o.props[:needpfx] = 0xF2 } + + addop('cvtpd2ps', [0x0F, 0x5A], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('cvtps2pd', [0x0F, 0x5A], :mrmxmm) + addop('cvtsd2ss', [0x0F, 0x5A], :mrmxmm) { |o| o.props[:needpfx] = 0xF2 } + addop('cvtss2sd', [0x0F, 0x5A], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 } + + addop('cvtpd2dq', [0x0F, 0xE6], :mrmxmm) { |o| o.props[:needpfx] = 0xF2 } + addop('cvttpd2dq',[0x0F, 0xE6], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('cvtdq2pd', [0x0F, 0xE6], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 } + addop('cvtps2dq', [0x0F, 0x5B], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('cvttps2dq',[0x0F, 0x5B], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 } + addop('cvtdq2ps', [0x0F, 0x5B], :mrmxmm) + + addop_macrosdpd 'divpd', [0x0F, 0x5E], :mrmxmm + addop_macrosdpd 'maxpd', [0x0F, 0x5F], :mrmxmm + addop_macrosdpd 'minpd', [0x0F, 0x5D], :mrmxmm + addop('movapd', [0x0F, 0x28], :mrmxmm, {:d => [1, 0]}) { |o| o.props[:needpfx] = 0x66 } + + addop('movlpd', [0x0F, 0x12], :mrmxmm, {:d => [1, 0]}) { |o| o.props[:needpfx] = 0x66 } + addop('movhpd', [0x0F, 0x16], :mrmxmm, {:d => [1, 0]}) { |o| o.props[:needpfx] = 0x66 } + + addop('movmskpd',[0x0F, 0x50, 0xC0], nil, {:reg => [2, 3], :regxmm => [2, 0]}, :regxmm, :reg) { |o| o.props[:needpfx] = 0x66 } + addop('movsd', [0x0F, 0x10], :mrmxmm, {:d => [1, 0]}) { |o| o.props[:needpfx] = 0xF2 } + addop('movupd', [0x0F, 0x10], :mrmxmm, {:d => [1, 0]}) { |o| o.props[:needpfx] = 0x66 } + addop_macrosdpd 'mulpd', [0x0F, 0x59], :mrmxmm + addop('orpd', [0x0F, 0x56], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('shufpd', [0x0F, 0xC6], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop_macrosdpd 'sqrtpd', [0x0F, 0x51], :mrmxmm + addop_macrosdpd 'subpd', [0x0F, 0x5C], :mrmxmm + addop('ucomisd', [0x0F, 0x2E], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('unpckhpd',[0x0F, 0x15], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('unpcklpd',[0x0F, 0x14], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('xorpd', [0x0F, 0x57], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + + addop('movdqa', [0x0F, 0x6F], :mrmxmm, {:d => [1, 4]}) { |o| o.props[:needpfx] = 0x66 } + addop('movdqu', [0x0F, 0x6F], :mrmxmm, {:d => [1, 4]}) { |o| o.props[:needpfx] = 0xF3 } + addop('movq2dq', [0x0F, 0xD6], :mrmxmm, :modrmR) { |o| o.args[o.args.index(:modrmxmm)] = :modrmmmx ; o.props[:needpfx] = 0xF3 } + addop('movdq2q', [0x0F, 0xD6], :mrmmmx, :modrmR) { |o| o.args[o.args.index(:modrmmmx)] = :modrmxmm ; o.props[:needpfx] = 0xF2 } + addop('movq', [0x0F, 0x7E], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 ; o.props[:argsz] = 128 } + addop('movq', [0x0F, 0xD6], :mrmxmm) { |o| o.args.reverse! ; o.props[:needpfx] = 0x66 ; o.props[:argsz] = 128 } + + addop 'paddq', [0x0F, 0xD4], :mrmmmx, :xmmx + addop 'pmuludq', [0x0F, 0xF4], :mrmmmx, :xmmx + addop('pshuflw', [0x0F, 0x70], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0xF2 } + addop('pshufhw', [0x0F, 0x70], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0xF3 } + addop('pshufd', [0x0F, 0x70], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop('pslldq', [0x0F, 0x73, 0xF8], nil, {:regxmm => [2, 0]}, :regxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop('psrldq', [0x0F, 0x73, 0xD8], nil, {:regxmm => [2, 0]}, :regxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop 'psubq', [0x0F, 0xFB], :mrmmmx, :xmmx + addop('punpckhqdq', [0x0F, 0x6D], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('punpcklqdq', [0x0F, 0x6C], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + + addop('clflush', [0x0F, 0xAE, 7<<3], :modrmA) { |o| o.props[:argsz] = 8 } + addop('maskmovdqu', [0x0F, 0xF7], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('movntpd', [0x0F, 0x2B], :mrmxmm) { |o| o.args.reverse! ; o.props[:needpfx] = 0x66 } + addop('movntdq', [0x0F, 0xE7], :mrmxmm) { |o| o.args.reverse! ; o.props[:needpfx] = 0x66 } + addop('movnti', [0x0F, 0xC3], :mrm) { |o| o.args.reverse! } + addop('pause', [0x90]) { |o| o.props[:needpfx] = 0xF3 } + addop 'lfence', [0x0F, 0xAE, 0xE8] + addop 'mfence', [0x0F, 0xAE, 0xF0] + end + + def init_sse3_only + init_cpu_constants + + addop('addsubpd', [0x0F, 0xD0], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('addsubps', [0x0F, 0xD0], :mrmxmm) { |o| o.props[:needpfx] = 0xF2 } + addop('haddpd', [0x0F, 0x7C], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('haddps', [0x0F, 0x7C], :mrmxmm) { |o| o.props[:needpfx] = 0xF2 } + addop('hsubpd', [0x0F, 0x7D], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('hsubps', [0x0F, 0x7D], :mrmxmm) { |o| o.props[:needpfx] = 0xF2 } + + addop 'monitor', [0x0F, 0x01, 0xC8] + addop 'mwait', [0x0F, 0x01, 0xC9] + + addop('fisttp', [0xDF, 1<<3], :modrmA) { |o| o.props[:argsz] = 16 } + addop('fisttp', [0xDB, 1<<3], :modrmA) { |o| o.props[:argsz] = 32 } + addop('fisttp', [0xDD, 1<<3], :modrmA) { |o| o.props[:argsz] = 64 } + addop('lddqu', [0x0F, 0xF0], :mrmxmm, :modrmA) { |o| o.args[o.args.index(:modrmxmm)] = :modrm ; o.props[:needpfx] = 0xF2 } + addop('movddup', [0x0F, 0x12], :mrmxmm) { |o| o.props[:needpfx] = 0xF2 } + addop('movshdup', [0x0F, 0x16], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 } + addop('movsldup', [0x0F, 0x12], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 } + end + + def init_ssse3_only + init_cpu_constants + + addop_macrogg 0..2, 'pabs', [0x0F, 0x38, 0x1C], :mrmmmx, :xmmx + addop 'palignr', [0x0F, 0x3A, 0x0F], :mrmmmx, :u8, :xmmx + addop 'phaddd', [0x0F, 0x38, 0x02], :mrmmmx, :xmmx + addop 'phaddsw', [0x0F, 0x38, 0x03], :mrmmmx, :xmmx + addop 'phaddw', [0x0F, 0x38, 0x01], :mrmmmx, :xmmx + addop 'phsubd', [0x0F, 0x38, 0x06], :mrmmmx, :xmmx + addop 'phsubsw', [0x0F, 0x38, 0x07], :mrmmmx, :xmmx + addop 'phsubw', [0x0F, 0x38, 0x05], :mrmmmx, :xmmx + addop 'pmaddubsw',[0x0F, 0x38, 0x04], :mrmmmx, :xmmx + addop 'pmulhrsw', [0x0F, 0x38, 0x0B], :mrmmmx, :xmmx + addop 'pshufb', [0x0F, 0x38, 0x00], :mrmmmx, :xmmx + addop_macrogg 0..2, 'psignb', [0x0F, 0x38, 0x80], :mrmmmx, :xmmx + end + + def init_aesni_only + init_cpu_constants + + addop('aesdec', [0x0F, 0x38, 0xDE], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('aesdeclast',[0x0F, 0x38, 0xDF], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('aesenc', [0x0F, 0x38, 0xDC], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('aesenclast',[0x0F, 0x38, 0xDD], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('aesimc', [0x0F, 0x38, 0xDB], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('aeskeygenassist', [0x0F, 0x3A, 0xDF], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + + addop('pclmulqdq', [0x0F, 0x3A, 0x44], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + end + + def init_vmx_only + init_cpu_constants + + addop 'vmcall', [0x0F, 0x01, 0xC1] + addop 'vmlaunch', [0x0F, 0x01, 0xC2] + addop 'vmresume', [0x0F, 0x01, 0xC3] + addop 'vmxoff', [0x0F, 0x01, 0xC4] + addop 'vmread', [0x0F, 0x78], :mrm + addop 'vmwrite', [0x0F, 0x79], :mrm + addop('vmclear', [0x0F, 0xC7, 6<<3], :modrmA) { |o| o.props[:argsz] = 64 ; o.props[:needpfx] = 0x66 } + addop('vmxon', [0x0F, 0xC7, 6<<3], :modrmA) { |o| o.props[:argsz] = 64 ; o.props[:needpfx] = 0xF3 } + addop('vmptrld', [0x0F, 0xC7, 6<<3], :modrmA) { |o| o.props[:argsz] = 64 } + addop('vmptrrst', [0x0F, 0xC7, 7<<3], :modrmA) { |o| o.props[:argsz] = 64 } + addop('invept', [0x0F, 0x38, 0x80], :mrmA) { |o| o.props[:needpfx] = 0x66 } + addop('invvpid', [0x0F, 0x38, 0x81], :mrmA) { |o| o.props[:needpfx] = 0x66 } + + addop 'getsec', [0x0F, 0x37] + + addop 'xgetbv', [0x0F, 0x01, 0xD0] + addop 'xsetbv', [0x0F, 0x01, 0xD1] + addop 'rdtscp', [0x0F, 0x01, 0xF9] + addop 'xrstor', [0x0F, 0xAE, 5<<3], :modrmA + addop 'xsave', [0x0F, 0xAE, 4<<3], :modrmA + end + + def init_sse41_only + init_cpu_constants + + addop('blendpd', [0x0F, 0x3A, 0x0D], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('blendps', [0x0F, 0x3A, 0x0C], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('blendvpd', [0x0F, 0x38, 0x15], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('blendvps', [0x0F, 0x38, 0x14], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('dppd', [0x0F, 0x3A, 0x41], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop('dpps', [0x0F, 0x3A, 0x40], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop('extractps',[0x0F, 0x3A, 0x17], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop('insertps', [0x0F, 0x3A, 0x21], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop('movntdqa', [0x0F, 0x38, 0x2A], :mrmxmm, :modrmA) { |o| o.props[:needpfx] = 0x66 } + addop('mpsadbw', [0x0F, 0x3A, 0x42], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop('packusdw', [0x0F, 0x38, 0x2B], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pblendvb', [0x0F, 0x38, 0x10], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pblendw', [0x0F, 0x3A, 0x1E], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop('pcmpeqq', [0x0F, 0x38, 0x29], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pextrb', [0x0F, 0x3A, 0x14], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 8 } + addop('pextrw', [0x0F, 0x3A, 0x15], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 16 } + addop('pextrd', [0x0F, 0x3A, 0x16], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 32 } + addop('pinsrb', [0x0F, 0x3A, 0x20], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 8 } + addop('pinsrw', [0x0F, 0x3A, 0x21], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 16 } + addop('pinsrd', [0x0F, 0x3A, 0x22], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 32 } + addop('phminposuw', [0x0F, 0x38, 0x41], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pminsb', [0x0F, 0x38, 0x38], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pminsd', [0x0F, 0x38, 0x39], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pminuw', [0x0F, 0x38, 0x3A], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pminud', [0x0F, 0x38, 0x3B], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmaxsb', [0x0F, 0x38, 0x3C], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmaxsd', [0x0F, 0x38, 0x3D], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmaxuw', [0x0F, 0x38, 0x3E], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmaxud', [0x0F, 0x38, 0x3F], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + + addop('pmovsxbw', [0x0F, 0x38, 0x20], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmovsxbd', [0x0F, 0x38, 0x21], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmovsxbq', [0x0F, 0x38, 0x22], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmovsxwd', [0x0F, 0x38, 0x23], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmovsxwq', [0x0F, 0x38, 0x24], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmovsxdq', [0x0F, 0x38, 0x25], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmovzxbw', [0x0F, 0x38, 0x30], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmovzxbd', [0x0F, 0x38, 0x31], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmovzxbq', [0x0F, 0x38, 0x32], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmovzxwd', [0x0F, 0x38, 0x33], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmovzxwq', [0x0F, 0x38, 0x34], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmovzxdq', [0x0F, 0x38, 0x35], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + + addop('pmuldq', [0x0F, 0x38, 0x28], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('pmulld', [0x0F, 0x38, 0x40], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('ptest', [0x0F, 0x38, 0x17], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('roundps', [0x0F, 0x3A, 0x08], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop('roundpd', [0x0F, 0x3A, 0x09], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop('roundss', [0x0F, 0x3A, 0x0A], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + addop('roundsd', [0x0F, 0x3A, 0x0B], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 } + end + + def init_sse42_only + init_cpu_constants + + addop('crc32', [0x0F, 0x38, 0xF0], :mrmw) { |o| o.props[:needpfx] = 0xF2 } + addop('pcmpestrm', [0x0F, 0x3A, 0x60], :mrmxmm, :i8) { |o| o.props[:needpfx] = 0x66 } + addop('pcmpestri', [0x0F, 0x3A, 0x61], :mrmxmm, :i8) { |o| o.props[:needpfx] = 0x66 } + addop('pcmpistrm', [0x0F, 0x3A, 0x62], :mrmxmm, :i8) { |o| o.props[:needpfx] = 0x66 } + addop('pcmpistri', [0x0F, 0x3A, 0x63], :mrmxmm, :i8) { |o| o.props[:needpfx] = 0x66 } + addop('pcmpgtq', [0x0F, 0x38, 0x37], :mrmxmm) { |o| o.props[:needpfx] = 0x66 } + addop('popcnt', [0x0F, 0xB8], :mrm) { |o| o.props[:needpfx] = 0xF3 } + end + + def init_avx_only + init_cpu_constants + + add128 = {} + add256 = {} + %w[movss movsd movlhps movhpd movhlps + cvtsi2ss cvtsi2sd sqrtss sqrtsd rsqrtss rcpss + addss addsd mulss mulsd cvtss2sd cvtsd2ss subss subsd + minss minsd divss divsd maxss maxsd + punpcklb punpcklw punpckld packsswb pcmpgtb pcmpgtw pcmpgtd packuswb + punpckhb punpckhw punpckhd packssdw punpcklq punpckhq + pcmpeqb pcmpeqw pcmpeqd ldmxcsr stmxcsr + cmpss cmpsd paddq pmullw psubusb psubusw pminub + pand paddusb paddusw pmaxub pandn pavgb pavgw + pmulhuw pmulhw psubsb psubsw pminsw por paddsb paddsw pmaxsw pxor + pmuludq pmaddwd psadbw + psubb psubw psubd psubq paddb paddw paddd + phaddw phaddsw phaddd phsubw phsubsw phsubd + pmaddubsw palignr pshufb pmulhrsw psignb psignw psignd + dppd insertps mpsadbw packusdw pblendw pcmpeqq + pinsrb pinsrw pinsrd pinsrq + pmaxsb pmaxsd pmaxud pmaxuw pminsb pminsd pminud pminuw + pmuldq pmulld roundsd roundss pcmpgtq + aesdec aesdeclast aesenc aesenclast + pclmulqdq punpcklbw punpcklwd punpckldq punpckhbw punpckhwd + punpckhdq punpcklqdq punpckhqdq].each { |n| add128[n] = true } + + %w[movups movupd movddup movsldup + unpcklps unpcklpd unpckhps unpckhpd + movaps movshdup movapd movntps movntpd movmskps movmskpd + sqrtps sqrtpd rsqrtps rcpps andps andpd andnps andnpd + orps orpd xorps xorpd addps addpd mulps mulpd + cvtps2pd cvtpd2ps cvtdq2ps cvtps2dq cvttps2dq + subps subpd minps minpd divps divpd maxps maxpd + movdqa movdqu haddpd haddps hsubpd hsubps + cmpps cmppd shufps shufpd addsubpd addsubps + cvtpd2dq cvttpd2dq cvtdq2pd movntdq lddqu + blendps blendpd blendvps blendvpd dpps ptest + roundpd roundps].each { |n| add128[n] = add256[n] = true } + + varg = Hash.new(1) + %w[pabsb pabsw pabsd pmovmskb pshufd pshufhw pshuflw movntdqa + pmovsxbw pmovsxbd pmovsxbq pmovsxwd pmovsxwq pmovsxdq + pmovzxbw pmovzxbd pmovzxbq pmovzxwd pmovzxwq pmovzxdq + aesimc aeskeygenassist lddqu maskmovdqu movapd movaps + pcmpestri pcmpestrm pcmpistri pcmpistrm phminposuw + cvtpd2dq cvttpd2dq cvtdq2pd cvtps2pd cvtpd2ps cvtdq2ps cvtps2dq + cvttps2dq movd movq movddup movdqa movdqu movmskps movmskpd + movntdq movntps movntpd movshdup movsldup movups movupd + pextrb pextrw pextrd pextrq ptest rcpps roundps roundpd + extractps sqrtps sqrtpd comiss comisd ucomiss ucomisd + cvttss2si cvttsd2si cvtss2si cvtsd2si + ].each { |n| add128[n] = true ; varg[n] = nil } + + cvtarg128 = { :regmmx => :regxmm, :modrmmmx => :modrmxmm } + cvtarg256 = { :regmmx => :regymm, :modrmmmx => :modrmymm, + :regxmm => :regymm, :modrmxmm => :modrmymm } + + # autopromote old sseX opcodes + @opcode_list.each { |o| + next if o.bin[0] != 0x0F or not add128[o.name] # rep cmpsd / movsd + + mm = (o.bin[1] == 0x38 ? 0x0F38 : o.bin[1] == 0x3A ? 0x0F3A : 0x0F) + pp = o.props[:needpfx] + pp = 0x66 if o.props[:xmmx] + fpxlen = (mm == 0x0F ? 1 : 2) + + addop_vex('v' + o.name, [varg[o.name], 128, pp, mm], o.bin[fpxlen], nil, *o.args.map { |oa| cvtarg128[oa] || oa }) { |oo| + oo.bin += [o.bin[fpxlen+1]] if o.bin[fpxlen+1] + dbinlen = o.bin.length - oo.bin.length + o.fields.each { |k, v| oo.fields[cvtarg128[k] || k] = [v[0]-dbinlen, v[1]] } + o.props.each { |k, v| oo.props[k] = v if k != :xmmx and k != :needpfx } + } + + next if not add256[o.name] + addop_vex('v' + o.name, [varg[o.name], 256, pp, mm], o.bin[fpxlen], nil, *o.args.map { |oa| cvtarg256[oa] || oa }) { |oo| + oo.bin += [o.bin[fpxlen+1]] if o.bin[fpxlen+1] + dbinlen = o.bin.length - oo.bin.length + o.fields.each { |k, v| oo.fields[cvtarg256[k] || k] = [v[0]-dbinlen, v[1]] } + o.props.each { |k, v| oo.props[k] = v if k != :xmmx and k != :needpfx } + } + } + + # sse promotion, special cases + addop_vex 'vpblendvb', [1, 128, 0x66, 0x0F3A, 0], 0x4C, :mrmxmm, :i4xmm + addop_vex 'vpsllw', [1, 128, 0x66, 0x0F], 0xF1, :mrmxmm + addop_vex('vpsllw', [0, 128, 0x66, 0x0F], 0x71, 6, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } + addop_vex 'vpslld', [1, 128, 0x66, 0x0F], 0xF2, :mrmxmm + addop_vex('vpslld', [0, 128, 0x66, 0x0F], 0x72, 6, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } + addop_vex 'vpsllq', [1, 128, 0x66, 0x0F], 0xF3, :mrmxmm + addop_vex('vpsllq', [0, 128, 0x66, 0x0F], 0x73, 6, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } + addop_vex('vpslldq',[0, 128, 0x66, 0x0F], 0x73, 7, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } + addop_vex 'vpsraw', [1, 128, 0x66, 0x0F], 0xE1, :mrmxmm + addop_vex('vpsraw', [0, 128, 0x66, 0x0F], 0x71, 4, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } + addop_vex 'vpsrad', [1, 128, 0x66, 0x0F], 0xE2, :mrmxmm + addop_vex('vpsrad', [0, 128, 0x66, 0x0F], 0x72, 4, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } + addop_vex 'vpsrlw', [1, 128, 0x66, 0x0F], 0xD1, :mrmxmm + addop_vex('vpsrlw', [0, 128, 0x66, 0x0F], 0x71, 2, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } + addop_vex 'vpsrld', [1, 128, 0x66, 0x0F], 0xD2, :mrmxmm + addop_vex('vpsrld', [0, 128, 0x66, 0x0F], 0x72, 2, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } + addop_vex 'vpsrlq', [1, 128, 0x66, 0x0F], 0xD3, :mrmxmm + addop_vex('vpsrlq', [0, 128, 0x66, 0x0F], 0x73, 2, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } + addop_vex('vpsrldq',[0, 128, 0x66, 0x0F], 0x73, 3, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmxmm } + + # dst==mem => no vreg + addop_vex 'vmovhps', [1, 128, nil, 0x0F], 0x16, :mrmxmm, :modrmA + addop_vex('vmovhps', [nil, 128, nil, 0x0F], 0x17, :mrmxmm, :modrmA) { |o| o.args.reverse! } + addop_vex 'vmovlpd', [1, 128, 0x66, 0x0F], 0x12, :mrmxmm, :modrmA + addop_vex('vmovlpd', [nil, 128, 0x66, 0x0F], 0x13, :mrmxmm, :modrmA) { |o| o.args.reverse! } + addop_vex 'vmovlps', [1, 128, nil, 0x0F], 0x12, :mrmxmm, :modrmA + addop_vex('vmovlps', [nil, 128, nil, 0x0F], 0x13, :mrmxmm, :modrmA) { |o| o.args.reverse! } + + addop_vex 'vbroadcastss', [nil, 128, 0x66, 0x0F38, 0], 0x18, :mrmxmm, :modrmA + addop_vex 'vbroadcastss', [nil, 256, 0x66, 0x0F38, 0], 0x18, :mrmymm, :modrmA + addop_vex 'vbroadcastsd', [nil, 256, 0x66, 0x0F38, 0], 0x19, :mrmymm, :modrmA + addop_vex 'vbroadcastf128', [nil, 256, 0x66, 0x0F38, 0], 0x1A, :mrmymm, :modrmA + + # general-purpose register operations + addop_vex 'andn', [1, :vexvreg, 128, nil, 0x0F38], 0xF2, :mrm + addop_vex 'bextr', [2, :vexvreg, 128, nil, 0x0F38], 0xF7, :mrm + addop_vex 'blsi', [0, :vexvreg, 128, nil, 0x0F38], 0xF3, 3 + addop_vex 'blsmsk', [0, :vexvreg, 128, nil, 0x0F38], 0xF3, 2 + addop_vex 'blsr', [0, :vexvreg, 128, nil, 0x0F38], 0xF3, 1 + addop_vex 'bzhi', [2, :vexvreg, 128, nil, 0x0F38], 0xF5, :mrm + addop('lzcnt', [0x0F, 0xBD], :mrm) { |o| o.props[:needpfx] = 0xF3 } + addop_vex 'mulx', [1, :vexvreg, 128, 0xF2, 0x0F38], 0xF6, :mrm + addop_vex 'pdep', [1, :vexvreg, 128, 0xF2, 0x0F38], 0xF5, :mrm + addop_vex 'pext', [1, :vexvreg, 128, 0xF3, 0x0F38], 0xF5, :mrm + addop_vex 'rorx', [nil, 128, 0xF2, 0x0F3A], 0xF0, :mrm, :u8 + addop_vex 'sarx', [2, :vexvreg, 128, 0xF3, 0x0F38], 0xF7, :mrm + addop_vex 'shrx', [2, :vexvreg, 128, 0xF2, 0x0F38], 0xF7, :mrm + addop_vex 'shlx', [2, :vexvreg, 128, 0x66, 0x0F38], 0xF7, :mrm + addop('tzcnt', [0x0F, 0xBC], :mrm) { |o| o.props[:needpfx] = 0xF3 } + addop('invpcid', [0x0F, 0x38, 0x82], :mrm) { |o| o.props[:needpfx] = 0x66 } + addop 'rdrand', [0x0F, 0xC7], 6, :modrmR + addop 'rdseed', [0x0F, 0xC7], 7, :modrmR + addop('adcx', [0x0F, 0x38, 0xF6], :mrm) { |o| o.props[:needpfx] = 0x66 } + addop('adox', [0x0F, 0x38, 0xF6], :mrm) { |o| o.props[:needpfx] = 0xF3 } + + # fp16 + addop_vex 'vcvtph2ps', [nil, 128, 0x66, 0x0F38, 0], 0x13, :mrmxmm + addop_vex 'vcvtph2ps', [nil, 256, 0x66, 0x0F38, 0], 0x13, :mrmymm + addop_vex('vcvtps2ph', [nil, 128, 0x66, 0x0F3A, 0], 0x1D, :mrmxmm, :u8) { |o| o.args.reverse! } + addop_vex('vcvtps2ph', [nil, 256, 0x66, 0x0F3A, 0], 0x1D, :mrmymm, :u8) { |o| o.args.reverse! } + + # TSE + addop 'xabort', [0xC6, 0xF8], nil, :i8 # may :stopexec + addop 'xbegin', [0xC7, 0xF8], nil, :i # may :setip: xabortreturns to $_(xbegin) + off + addop 'xend', [0x0F, 0x01, 0xD5] + addop 'xtest', [0x0F, 0x01, 0xD6] + + # SMAP + addop 'clac', [0x0F, 0x01, 0xCA] + addop 'stac', [0x0F, 0x01, 0xCB] + end + + def init_avx2_only + init_cpu_constants + + add256 = {} + %w[packsswb pcmpgtb pcmpgtw pcmpgtd packuswb packssdw + pcmpeqb pcmpeqw pcmpeqd paddq pmullw psubusb psubusw + pminub pand paddusb paddusw pmaxub pandn pavgb pavgw + pmulhuw pmulhw psubsb psubsw pminsw por paddsb paddsw + pmaxsw pxor pmuludq pmaddwd psadbw + psubb psubw psubd psubq paddb paddw paddd + phaddw phaddsw phaddd phsubw phsubsw phsubd + pmaddubsw palignr pshufb pmulhrsw psignb psignw psignd + mpsadbw packusdw pblendw pcmpeqq + pmaxsb pmaxsd pmaxud pmaxuw pminsb pminsd pminud pminuw + pmuldq pmulld pcmpgtq punpcklbw punpcklwd punpckldq + punpckhbw punpckhwd punpckhdq punpcklqdq punpckhqdq + ].each { |n| add256[n] = true } + + varg = Hash.new(1) + %w[pabsb pabsw pabsd pmovmskb pshufd pshufhw pshuflw movntdqa + pmovsxbw pmovsxbd pmovsxbq pmovsxwd pmovsxwq pmovsxdq + pmovzxbw pmovzxbd pmovzxbq pmovzxwd pmovzxwq pmovzxdq + maskmovdqu].each { |n| add256[n] = true ; varg[n] = nil } + + cvtarg256 = { :regmmx => :regymm, :modrmmmx => :modrmymm, + :regxmm => :regymm, :modrmxmm => :modrmymm } + + # autopromote old sseX opcodes + @opcode_list.each { |o| + next if o.bin[0] != 0x0F or not add256[o.name] + + mm = (o.bin[1] == 0x38 ? 0x0F38 : o.bin[1] == 0x3A ? 0x0F3A : 0x0F) + pp = o.props[:needpfx] + pp = 0x66 if o.props[:xmmx] + fpxlen = (mm == 0x0F ? 1 : 2) + + addop_vex('v' + o.name, [varg[o.name], 256, pp, mm], o.bin[fpxlen], nil, *o.args.map { |oa| cvtarg256[oa] || oa }) { |oo| + oo.bin += [o.bin[fpxlen+1]] if o.bin[fpxlen+1] + dbinlen = o.bin.length - oo.bin.length + o.fields.each { |k, v| oo.fields[cvtarg256[k] || k] = [v[0]-dbinlen, v[1]] } + o.props.each { |k, v| oo.props[k] = v if k != :xmmx and k != :needpfx } + } + } + + # promote special cases + addop_vex 'vpblendvb', [1, 256, 0x66, 0x0F3A, 0], 0x4C, :mrmymm, :i4ymm + addop_vex 'vpsllw', [1, 256, 0x66, 0x0F], 0xF1, :mrmymm + addop_vex('vpsllw', [0, 256, 0x66, 0x0F], 0x71, 6, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } + addop_vex 'vpslld', [1, 256, 0x66, 0x0F], 0xF2, :mrmymm + addop_vex('vpslld', [0, 256, 0x66, 0x0F], 0x72, 6, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } + addop_vex 'vpsllq', [1, 256, 0x66, 0x0F], 0xF3, :mrmymm + addop_vex('vpsllq', [0, 256, 0x66, 0x0F], 0x73, 6, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } + addop_vex('vpslldq',[0, 256, 0x66, 0x0F], 0x73, 7, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } + addop_vex 'vpsraw', [1, 256, 0x66, 0x0F], 0xE1, :mrmymm + addop_vex('vpsraw', [0, 256, 0x66, 0x0F], 0x71, 4, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } + addop_vex 'vpsrad', [1, 256, 0x66, 0x0F], 0xE2, :mrmymm + addop_vex('vpsrad', [0, 256, 0x66, 0x0F], 0x72, 4, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } + addop_vex 'vpsrlw', [1, 256, 0x66, 0x0F], 0xD1, :mrmymm + addop_vex('vpsrlw', [0, 256, 0x66, 0x0F], 0x71, 2, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } + addop_vex 'vpsrld', [1, 256, 0x66, 0x0F], 0xD2, :mrmymm + addop_vex('vpsrld', [0, 256, 0x66, 0x0F], 0x72, 2, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } + addop_vex 'vpsrlq', [1, 256, 0x66, 0x0F], 0xD3, :mrmymm + addop_vex('vpsrlq', [0, 256, 0x66, 0x0F], 0x73, 2, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } + addop_vex('vpsrldq',[0, 256, 0x66, 0x0F], 0x73, 3, :u8, :modrmR) { |o| o.args[o.args.index(:modrm)] = :modrmymm } + + addop_vex 'vbroadcastss', [nil, 128, 0x66, 0x0F38, 0], 0x18, :mrmxmm, :modrmR + addop_vex 'vbroadcastss', [nil, 256, 0x66, 0x0F38, 0], 0x18, :mrmymm, :modrmR + addop_vex 'vbroadcastsd', [nil, 256, 0x66, 0x0F38, 0], 0x19, :mrmymm, :modrmR + addop_vex 'vbroadcasti128', [nil, 256, 0x66, 0x0F38, 0], 0x5A, :mrmymm, :modrmA + addop_vex 'vpblendd', [1, 128, 0x66, 0x0F3A, 0], 0x02, :mrmxmm, :u8 + addop_vex 'vpblendd', [1, 256, 0x66, 0x0F3A, 0], 0x02, :mrmymm, :u8 + addop_vex 'vpbroadcastb', [nil, 128, 0x66, 0x0F38, 0], 0x78, :mrmxmm + addop_vex 'vpbroadcastb', [nil, 256, 0x66, 0x0F38, 0], 0x78, :mrmymm + addop_vex 'vpbroadcastw', [nil, 128, 0x66, 0x0F38, 0], 0x79, :mrmxmm + addop_vex 'vpbroadcastw', [nil, 256, 0x66, 0x0F38, 0], 0x79, :mrmymm + addop_vex 'vpbroadcastd', [nil, 128, 0x66, 0x0F38, 0], 0x58, :mrmxmm + addop_vex 'vpbroadcastd', [nil, 256, 0x66, 0x0F38, 0], 0x58, :mrmymm + addop_vex 'vpbroadcastq', [nil, 128, 0x66, 0x0F38, 0], 0x59, :mrmxmm + addop_vex 'vpbroadcastq', [nil, 256, 0x66, 0x0F38, 0], 0x59, :mrmymm + addop_vex 'vpermd', [1, 256, 0x66, 0x0F38, 0], 0x36, :mrmymm + addop_vex 'vpermpd', [nil, 256, 0x66, 0x0F3A, 1], 0x01, :mrmymm, :u8 + addop_vex 'vpermps', [1, 256, 0x66, 0x0F38, 0], 0x16, :mrmymm, :u8 + addop_vex 'vpermq', [nil, 256, 0x66, 0x0F3A, 1], 0x00, :mrmymm, :u8 + addop_vex 'vperm2i128', [1, 256, 0x66, 0x0F3A, 0], 0x46, :mrmymm, :u8 + addop_vex 'vextracti128', [nil, 256, 0x66, 0x0F3A, 0], 0x39, :mrmymm, :u8 + addop_vex 'vinserti128', [1, 256, 0x66, 0x0F3A, 0], 0x38, :mrmymm, :u8 + addop_vex 'vpmaskmovd', [1, 128, 0x66, 0x0F38, 0], 0x8C, :mrmxmm, :modrmA + addop_vex 'vpmaskmovd', [1, 256, 0x66, 0x0F38, 0], 0x8C, :mrmymm, :modrmA + addop_vex 'vpmaskmovq', [1, 128, 0x66, 0x0F38, 1], 0x8C, :mrmxmm, :modrmA + addop_vex 'vpmaskmovq', [1, 256, 0x66, 0x0F38, 1], 0x8C, :mrmymm, :modrmA + addop_vex('vpmaskmovd', [1, 128, 0x66, 0x0F38, 0], 0x8E, :mrmxmm, :modrmA) { |o| o.args.reverse! } + addop_vex('vpmaskmovd', [1, 256, 0x66, 0x0F38, 0], 0x8E, :mrmymm, :modrmA) { |o| o.args.reverse! } + addop_vex('vpmaskmovq', [1, 128, 0x66, 0x0F38, 1], 0x8E, :mrmxmm, :modrmA) { |o| o.args.reverse! } + addop_vex('vpmaskmovq', [1, 256, 0x66, 0x0F38, 1], 0x8E, :mrmymm, :modrmA) { |o| o.args.reverse! } + addop_vex 'vpsllvd', [1, 128, 0x66, 0x0F38, 0], 0x47, :mrmxmm + addop_vex 'vpsllvq', [1, 128, 0x66, 0x0F38, 1], 0x47, :mrmxmm + addop_vex 'vpsllvd', [1, 256, 0x66, 0x0F38, 0], 0x47, :mrmymm + addop_vex 'vpsllvq', [1, 256, 0x66, 0x0F38, 1], 0x47, :mrmymm + addop_vex 'vpsravd', [1, 128, 0x66, 0x0F38, 0], 0x46, :mrmxmm + addop_vex 'vpsravd', [1, 256, 0x66, 0x0F38, 0], 0x46, :mrmymm + addop_vex 'vpsrlvd', [1, 128, 0x66, 0x0F38, 0], 0x45, :mrmxmm + addop_vex 'vpsrlvq', [1, 128, 0x66, 0x0F38, 1], 0x45, :mrmxmm + addop_vex 'vpsrlvd', [1, 256, 0x66, 0x0F38, 0], 0x45, :mrmymm + addop_vex 'vpsrlvq', [1, 256, 0x66, 0x0F38, 1], 0x45, :mrmymm + + addop_vex('vpgatherdd', [2, 128, 0x66, 0x0F38, 0], 0x90, :mrmxmm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 128 } + addop_vex('vpgatherdd', [2, 256, 0x66, 0x0F38, 0], 0x90, :mrmymm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 256 } + addop_vex('vpgatherdq', [2, 128, 0x66, 0x0F38, 1], 0x90, :mrmxmm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 128 } + addop_vex('vpgatherdq', [2, 256, 0x66, 0x0F38, 1], 0x90, :mrmymm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 256 } + addop_vex('vpgatherqd', [2, 128, 0x66, 0x0F38, 0], 0x91, :mrmxmm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 128 } + addop_vex('vpgatherqd', [2, 256, 0x66, 0x0F38, 0], 0x91, :mrmymm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 256 } + addop_vex('vpgatherqq', [2, 128, 0x66, 0x0F38, 1], 0x91, :mrmxmm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 128 } + addop_vex('vpgatherqq', [2, 256, 0x66, 0x0F38, 1], 0x91, :mrmymm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 256 } + addop_vex('vgatherdps', [2, 128, 0x66, 0x0F38, 0], 0x92, :mrmxmm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 128 } + addop_vex('vgatherdps', [2, 256, 0x66, 0x0F38, 0], 0x92, :mrmymm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 256 } + addop_vex('vgatherdpd', [2, 128, 0x66, 0x0F38, 1], 0x92, :mrmxmm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 128 } + addop_vex('vgatherdpd', [2, 256, 0x66, 0x0F38, 1], 0x92, :mrmymm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 256 } + addop_vex('vgatherqps', [2, 128, 0x66, 0x0F38, 0], 0x93, :mrmxmm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 128 } + addop_vex('vgatherqps', [2, 256, 0x66, 0x0F38, 0], 0x93, :mrmymm) { |o| o.props[:argsz] = 32 ; o.props[:mrmvex] = 256 } + addop_vex('vgatherqpd', [2, 128, 0x66, 0x0F38, 1], 0x93, :mrmxmm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 128 } + addop_vex('vgatherqpd', [2, 256, 0x66, 0x0F38, 1], 0x93, :mrmymm) { |o| o.props[:argsz] = 64 ; o.props[:mrmvex] = 256 } + end + + def init_fma_only + init_cpu_constants + + [['vfmaddsub', 'p', 0x86], + ['vfmsubadd', 'p', 0x87], + ['vfmadd', 'p', 0x88], + ['vfmadd', 's', 0x89], + ['vfmsub', 'p', 0x8A], + ['vfmsub', 's', 0x8B], + ['vfnmadd', 'p', 0x8C], + ['vfnmadd', 's', 0x8D], + ['vfnmsub', 'p', 0x8E], + ['vfnmsub', 's', 0x8F]].each { |n1, n2, bin| + addop_vex n1 + '132' + n2 + 's', [1, 128, 0x66, 0x0F38, 0], bin | 0x10, :mrmxmm + addop_vex n1 + '132' + n2 + 's', [1, 256, 0x66, 0x0F38, 0], bin | 0x10, :mrmymm + addop_vex n1 + '132' + n2 + 'd', [1, 128, 0x66, 0x0F38, 1], bin | 0x10, :mrmxmm + addop_vex n1 + '132' + n2 + 'd', [1, 256, 0x66, 0x0F38, 1], bin | 0x10, :mrmymm + addop_vex n1 + '213' + n2 + 's', [1, 128, 0x66, 0x0F38, 0], bin | 0x20, :mrmxmm + addop_vex n1 + '213' + n2 + 's', [1, 256, 0x66, 0x0F38, 0], bin | 0x20, :mrmymm + addop_vex n1 + '213' + n2 + 'd', [1, 128, 0x66, 0x0F38, 1], bin | 0x20, :mrmxmm + addop_vex n1 + '213' + n2 + 'd', [1, 256, 0x66, 0x0F38, 1], bin | 0x20, :mrmymm + addop_vex n1 + '231' + n2 + 's', [1, 128, 0x66, 0x0F38, 0], bin | 0x30, :mrmxmm + addop_vex n1 + '231' + n2 + 's', [1, 256, 0x66, 0x0F38, 0], bin | 0x30, :mrmymm + addop_vex n1 + '231' + n2 + 'd', [1, 128, 0x66, 0x0F38, 1], bin | 0x30, :mrmxmm + addop_vex n1 + '231' + n2 + 'd', [1, 256, 0x66, 0x0F38, 1], bin | 0x30, :mrmymm + + # pseudo-opcodes aliases (swap arg0/arg1) + addop_vex(n1 + '312' + n2 + 's', [1, 128, 0x66, 0x0F38, 0], bin | 0x10, :mrmxmm) { |o| o.args[0, 2] = o.args[0, 2].reverse } + addop_vex(n1 + '312' + n2 + 's', [1, 256, 0x66, 0x0F38, 0], bin | 0x10, :mrmymm) { |o| o.args[0, 2] = o.args[0, 2].reverse } + addop_vex(n1 + '312' + n2 + 'd', [1, 128, 0x66, 0x0F38, 1], bin | 0x10, :mrmxmm) { |o| o.args[0, 2] = o.args[0, 2].reverse } + addop_vex(n1 + '312' + n2 + 'd', [1, 256, 0x66, 0x0F38, 1], bin | 0x10, :mrmymm) { |o| o.args[0, 2] = o.args[0, 2].reverse } + addop_vex(n1 + '123' + n2 + 's', [1, 128, 0x66, 0x0F38, 0], bin | 0x20, :mrmxmm) { |o| o.args[0, 2] = o.args[0, 2].reverse } + addop_vex(n1 + '123' + n2 + 's', [1, 256, 0x66, 0x0F38, 0], bin | 0x20, :mrmymm) { |o| o.args[0, 2] = o.args[0, 2].reverse } + addop_vex(n1 + '123' + n2 + 'd', [1, 128, 0x66, 0x0F38, 1], bin | 0x20, :mrmxmm) { |o| o.args[0, 2] = o.args[0, 2].reverse } + addop_vex(n1 + '123' + n2 + 'd', [1, 256, 0x66, 0x0F38, 1], bin | 0x20, :mrmymm) { |o| o.args[0, 2] = o.args[0, 2].reverse } + addop_vex(n1 + '321' + n2 + 's', [1, 128, 0x66, 0x0F38, 0], bin | 0x30, :mrmxmm) { |o| o.args[0, 2] = o.args[0, 2].reverse } + addop_vex(n1 + '321' + n2 + 's', [1, 256, 0x66, 0x0F38, 0], bin | 0x30, :mrmymm) { |o| o.args[0, 2] = o.args[0, 2].reverse } + addop_vex(n1 + '321' + n2 + 'd', [1, 128, 0x66, 0x0F38, 1], bin | 0x30, :mrmxmm) { |o| o.args[0, 2] = o.args[0, 2].reverse } + addop_vex(n1 + '321' + n2 + 'd', [1, 256, 0x66, 0x0F38, 1], bin | 0x30, :mrmymm) { |o| o.args[0, 2] = o.args[0, 2].reverse } + } + end + + # + # CPU family dependencies + # + + def init_386_common + init_386_common_only + end + + def init_386 + init_386_common + init_386_only + end + + def init_387 + init_387_only + end + + def init_486 + init_386 + init_387 + init_486_only + end + + def init_pentium + init_486 + init_pentium_only + end + + def init_3dnow + init_pentium + init_3dnow_only + end + + def init_p6 + init_pentium + init_p6_only + end + + def init_sse + init_p6 + init_sse_only + end + + def init_sse2 + init_sse + init_sse2_only + end + + def init_sse3 + init_sse2 + init_sse3_only + end + + def init_ssse3 + init_sse3 + init_ssse3_only + end + + def init_sse41 + init_ssse3 + init_sse41_only + end + + def init_sse42 + init_sse41 + init_sse42_only + end + + def init_avx + init_sse42 + init_avx_only + end + + def init_avx2 + init_avx + init_fma_only + init_avx2_only + end + + def init_all + init_avx2 + init_3dnow_only + init_vmx_only + init_aesni_only + end + + alias init_latest init_all + + + # + # addop_* macros + # + + def addop_macro1(name, num, *props) + addop name, [(num << 3) | 4], nil, {:w => [0, 0]}, :reg_eax, :i, *props + addop(name, [num << 3], :mrmw, {:d => [0, 1]}) { |o| o.args.reverse! } + addop name, [0x80], num, {:w => [0, 0], :s => [0, 1]}, :i, *props + end + def addop_macro2(name, num) + addop name, [0x0F, 0xBA], (4 | num), :u8 + addop(name, [0x0F, 0xA3 | (num << 3)], :mrm) { |op| op.args.reverse! } + end + def addop_macro3(name, num) + addop name, [0xD0], num, {:w => [0, 0]}, :imm_val1 + addop name, [0xD2], num, {:w => [0, 0]}, :reg_cl + addop name, [0xC0], num, {:w => [0, 0]}, :u8 + end + + def addop_macrotttn(name, bin, hint, *props, &blk) + [%w{o}, %w{no}, %w{b nae c}, %w{nb ae nc}, + %w{z e}, %w{nz ne}, %w{be na}, %w{nbe a}, + %w{s}, %w{ns}, %w{p pe}, %w{np po}, + %w{l nge}, %w{nl ge}, %w{le ng}, %w{nle g}].each_with_index { |e, i| + b = bin.dup + if b[0] == 0x0F + b[1] |= i + else + b[0] |= i + end + + e.each { |k| addop(name + k, b.dup, hint, *props, &blk) } + } + end + + def addop_macrostr(name, bin, type) + # addop(name, bin.dup, {:w => [0, 0]}) { |o| o.props[type] = true } # TODO allow segment override + addop(name+'b', bin) { |o| o.props[:opsz] = 16 ; o.props[type] = true } + addop(name+'b', bin) { |o| o.props[:opsz] = 32 ; o.props[type] = true } + bin = bin.dup + bin[0] |= 1 + addop(name+'w', bin) { |o| o.props[:opsz] = 16 ; o.props[type] = true } + addop(name+'d', bin) { |o| o.props[:opsz] = 32 ; o.props[type] = true } + end + + def addop_macrofpu1(name, n) + addop(name, [0xD8, n<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 32 } + addop(name, [0xDC, n<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 64 } + addop(name, [0xD8, 0xC0|(n<<3)], :regfp, {:d => [0, 2]}) { |o| o.args.reverse! } + end + def addop_macrofpu2(name, n, n2=0) + addop(name, [0xDE|n2, n<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 16 } + addop(name, [0xDA|n2, n<<3], :modrmA, :regfp0) { |o| o.props[:argsz] = 32 } + end + def addop_macrofpu3(name, n) + addop_macrofpu2 name, n, 1 + addop(name, [0xDF, 0x28|(n<<3)], :modrmA, :regfp0) { |o| o.props[:argsz] = 64 } + end + + def addop_macrogg(ggrng, name, bin, *args, &blk) + ggoff = 1 + ggoff = 2 if bin[1] == 0x38 or bin[1] == 0x3A + ggrng.each { |gg| + bindup = bin.dup + bindup[ggoff] |= gg + sfx = %w(b w d q)[gg] + addop name+sfx, bindup, *args, &blk + } + end + + def addop_macrossps(name, bin, hint, *a) + addop name, bin.dup, hint, *a + addop(name.sub(/ps$/, 'ss'), bin.dup, hint, *a) { |o| o.props[:needpfx] = 0xF3 } + end + + def addop_macrosdpd(name, bin, hint, *a) + addop(name, bin.dup, hint, *a) { |o| o.props[:needpfx] = 0x66 } + addop(name.sub(/pd$/, 'sd'), bin.dup, hint, *a) { |o| o.props[:needpfx] = 0xF2 } + end + + # special ret (iret/retf), that still default to 32b mode in x64 + def addop_macroret(name, bin, *args) + addop(name + '.i32', bin.dup, nil, :stopexec, :setip, *args) { |o| o.props[:opsz] = 32 } + addop(name + '.i16', bin.dup, nil, :stopexec, :setip, *args) { |o| o.props[:opsz] = 16 } if name != 'sysret' + addop(name, bin.dup, nil, :stopexec, :setip, *args) { |o| o.props[:opsz] = @size } + end + + # add an AVX instruction needing a VEX prefix (c4h/c5h) + # the prefix is hardcoded + def addop_vex(name, vexspec, bin, *args) + argnr = vexspec.shift + argt = vexspec.shift if argnr and vexspec.first.kind_of?(::Symbol) + l = vexspec.shift + pfx = vexspec.shift + of = vexspec.shift + w = vexspec.shift + argt ||= (l == 128 ? :vexvxmm : :vexvymm) + + lpp = ((l >> 8) << 2) | [nil, 0x66, 0xF3, 0xF2].index(pfx) + mmmmm = [nil, 0x0F, 0x0F38, 0x0F3A].index(of) + + c4bin = [0xC4, mmmmm, lpp, bin] + c4bin[1] |= 1 << 7 if @size != 64 + c4bin[1] |= 1 << 6 if @size != 64 + c4bin[2] |= 1 << 7 if w == 1 + c4bin[2] |= 0xF << 3 if not argnr + + addop(name, c4bin, *args) { |o| + o.args.insert(argnr, argt) if argnr + + o.fields[:vex_r] = [1, 7] if @size == 64 + o.fields[:vex_x] = [1, 6] if @size == 64 + o.fields[:vex_b] = [1, 5] + o.fields[:vex_w] = [2, 7] if not w + o.fields[:vex_vvvv] = [2, 3] if argnr + + yield o if block_given? + } + + return if w == 1 or mmmmm != 1 + + c5bin = [0xC5, lpp, bin] + c5bin[1] |= 1 << 7 if @size != 64 + c5bin[1] |= 0xF << 3 if not argnr + + addop(name, c5bin, *args) { |o| + o.args.insert(argnr, argt) if argnr + + o.fields[:vex_r] = [1, 7] if @size == 64 + o.fields[:vex_vvvv] = [1, 3] if argnr + + yield o if block_given? + } + end + + # helper function: creates a new Opcode based on the arguments, eventually + # yields it for further customisation, and append it to the instruction set + # is responsible of the creation of disambiguating opcodes if necessary (:s flag hardcoding) + def addop(name, bin, hint=nil, *argprops) + fields = (argprops.first.kind_of?(Hash) ? argprops.shift : {}) + op = Opcode.new name, bin + op.fields.replace fields + + case hint + when nil + + when :mrm, :mrmw, :mrmA + op.fields[:reg] = [bin.length, 3] + op.fields[:modrm] = [bin.length, 0] + op.fields[:w] = [bin.length - 1, 0] if hint == :mrmw + argprops.unshift :reg, :modrm + argprops << :modrmA if hint == :mrmA + op.bin << 0 + when :reg + op.fields[:reg] = [bin.length-1, 0] + argprops.unshift :reg + when :regfp + op.fields[:regfp] = [bin.length-1, 0] + argprops.unshift :regfp, :regfp0 + when :modrmA + op.fields[:modrm] = [bin.length-1, 0] + argprops << :modrm << :modrmA + + when Integer # mod/m, reg == opcode extension = hint + op.fields[:modrm] = [bin.length, 0] + op.bin << (hint << 3) + argprops.unshift :modrm + + when :mrmmmx + op.fields[:regmmx] = [bin.length, 3] + op.fields[:modrm] = [bin.length, 0] + bin << 0 + argprops.unshift :regmmx, :modrmmmx + when :mrmxmm + op.fields[:regxmm] = [bin.length, 3] + op.fields[:modrm] = [bin.length, 0] + bin << 0 + argprops.unshift :regxmm, :modrmxmm + when :mrmymm + op.fields[:regymm] = [bin.length, 3] + op.fields[:modrm] = [bin.length, 0] + bin << 0 + argprops.unshift :regymm, :modrmymm + else + raise SyntaxError, "invalid hint #{hint.inspect} for #{name}" + end + + argprops.each { |a| + op.props[a] = true if @valid_props[a] + op.args << a if @valid_args[a] + } + + yield op if block_given? + + if $DEBUG + argprops -= @valid_props.keys + @valid_args.keys + raise "Invalid opcode definition: #{name}: unknown #{argprops.inspect}" unless argprops.empty? + + argprops = (op.props.keys - @valid_props.keys) + (op.args - @valid_args.keys) + (op.fields.keys - @fields_mask.keys) + raise "Invalid opcode customisation: #{name}: #{argprops.inspect}" unless argprops.empty? + end + + addop_post(op) + end + + # this recursive method is in charge of Opcode duplication (eg to hardcode some flag) + def addop_post(op) + if df = op.fields.delete(:d) + # hardcode the bit + dop = op.dup + addop_post dop + + op.bin[df[0]] |= 1 << df[1] + op.args.reverse! + addop_post op + + return + elsif wf = op.fields.delete(:w) + # hardcode the bit + dop = op.dup + dop.props[:argsz] = 8 + # 64-bit w=0 s=1 => UD + dop.fields.delete(:s) if @size == 64 + addop_post dop + + op.bin[wf[0]] |= 1 << wf[1] + addop_post op + + return + elsif sf = op.fields.delete(:s) + # add explicit choice versions, with lower precedence (so that disassembling will return the general version) + # eg "jmp", "jmp.i8", "jmp.i" + # also hardcode the bit + op32 = op + addop_post op32 + + op8 = op.dup + op8.bin[sf[0]] |= 1 << sf[1] + op8.args.map! { |arg| arg == :i ? :i8 : arg } + addop_post op8 + + op32 = op32.dup + op32.name << '.i' + addop_post op32 + + op8 = op8.dup + op8.name << '.i8' + addop_post op8 + + return + elsif op.args.first == :regfp0 + dop = op.dup + dop.args.delete :regfp0 + addop_post dop + end + + if op.props[:needpfx] + @opcode_list.unshift op + else + @opcode_list << op + end + + if (op.args == [:i] or op.args == [:farptr] or op.name == 'ret') and op.name !~ /\.i/ + # define opsz-override version for ambiguous opcodes + op16 = op.dup + op16.name << '.i16' + op16.props[:opsz] = 16 + @opcode_list << op16 + op32 = op.dup + op32.name << '.i32' + op32.props[:opsz] = 32 + @opcode_list << op32 + elsif op.props[:strop] or op.props[:stropz] or op.args.include? :mrm_imm or + op.args.include? :modrm or op.name =~ /loop|xlat/ + # define adsz-override version for ambiguous opcodes (TODO allow movsd edi / movsd di syntax) + # XXX loop pfx 67 = eip+cx, 66 = ip+ecx + op16 = op.dup + op16.name << '.a16' + op16.props[:adsz] = 16 + @opcode_list << op16 + op32 = op.dup + op32.name << '.a32' + op32.props[:adsz] = 32 + @opcode_list << op32 + end + end end end diff --git a/lib/metasm/metasm/cpu/ia32/parse.rb b/lib/metasm/metasm/cpu/ia32/parse.rb index 4c91da0d23..16fffb0e18 100644 --- a/lib/metasm/metasm/cpu/ia32/parse.rb +++ b/lib/metasm/metasm/cpu/ia32/parse.rb @@ -11,347 +11,349 @@ require 'metasm/parse' module Metasm class Ia32 class ModRM - # may return a SegReg - # must be called before SegReg parser (which could match only the seg part of a modrm) - def self.parse(lexer, otok, cpu) - tok = otok + # may return a SegReg + # must be called before SegReg parser (which could match only the seg part of a modrm) + def self.parse(lexer, otok, cpu) + tok = otok - # read operand size specifier - if tok and tok.type == :string and tok.raw =~ /^(?:byte|[dqo]?word|_(\d+)bits)$/ - ptsz = - if $1 - $1.to_i - else - case tok.raw - when 'byte'; 8 - when 'word'; 16 - when 'dword'; 32 - when 'qword'; 64 - when 'oword'; 128 - else raise otok, 'mrm: bad ptr size' - end - end - lexer.skip_space - if tok = lexer.readtok and tok.type == :string and tok.raw == 'ptr' - lexer.skip_space - tok = lexer.readtok - end - end + # read operand size specifier + if tok and tok.type == :string and tok.raw =~ /^(?:byte|[dqo]?word|_(\d+)bits)$/ + ptsz = + if $1 + $1.to_i + else + case tok.raw + when 'byte'; 8 + when 'word'; 16 + when 'dword'; 32 + when 'qword'; 64 + when 'oword'; 128 + else raise otok, 'mrm: bad ptr size' + end + end + lexer.skip_space + if tok = lexer.readtok and tok.type == :string and tok.raw == 'ptr' + lexer.skip_space + tok = lexer.readtok + end + end - # read segment selector - if tok and tok.type == :string and seg = SegReg.s_to_i[tok.raw] - lexer.skip_space - seg = SegReg.new(seg) - if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':' - raise otok, 'invalid modrm' if ptsz - lexer.unreadtok ntok - return seg - end - lexer.skip_space - tok = lexer.readtok - end + # read segment selector + if tok and tok.type == :string and seg = SegReg.s_to_i[tok.raw] + lexer.skip_space + seg = SegReg.new(seg) + if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':' + raise otok, 'invalid modrm' if ptsz + lexer.unreadtok ntok + return seg + end + lexer.skip_space + tok = lexer.readtok + end - # ensure we have a modrm - if not tok or tok.type != :punct or tok.raw != '[' - raise otok, 'invalid modrm' if ptsz or seg - return - end - lexer.skip_space_eol + # ensure we have a modrm + if not tok or tok.type != :punct or tok.raw != '[' + raise otok, 'invalid modrm' if ptsz or seg + return + end + lexer.skip_space_eol - # support fasm syntax [fs:eax] for segment selector - if tok = lexer.readtok and tok.type == :string and not seg and seg = SegReg.s_to_i[tok.raw] - raise otok, 'invalid modrm' if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':' - seg = SegReg.new(seg) - lexer.skip_space_eol - else - lexer.unreadtok tok - end + # support fasm syntax [fs:eax] for segment selector + if tok = lexer.readtok and tok.type == :string and not seg and seg = SegReg.s_to_i[tok.raw] + raise otok, 'invalid modrm' if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':' + seg = SegReg.new(seg) + lexer.skip_space_eol + else + lexer.unreadtok tok + end - # read modrm content as generic expression - content = Expression.parse(lexer) - lexer.skip_space_eol - raise(otok, 'bad modrm') if not content or not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ']' + # read modrm content as generic expression + content = Expression.parse(lexer) + lexer.skip_space_eol + raise(otok, 'bad modrm') if not content or not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ']' - # converts matching externals to Regs in an expression - regify = lambda { |o| - case o - when Expression - o.lexpr = regify[o.lexpr] - o.rexpr = regify[o.rexpr] - o - when String - cpu.str_to_reg(o) || o - else o - end - } + # converts matching externals to Regs in an expression + regify = lambda { |o| + case o + when Expression + o.lexpr = regify[o.lexpr] + o.rexpr = regify[o.rexpr] + o + when String + cpu.str_to_reg(o) || o + else o + end + } - s = i = b = imm = nil + s = i = b = imm = nil - # assigns the Regs in the expression to base or index field of the modrm - walker = lambda { |o| - case o - when nil - when Reg - if b - raise otok, 'mrm: too many regs' if i - i = o - s = 1 - else - b = o - end - when SimdReg - raise otok, 'mrm: too many regs' if i - i = o - s = 1 - when Expression - if o.op == :* and (o.rexpr.kind_of? Reg or o.lexpr.kind_of? Reg) - # scaled index - raise otok, 'mrm: too many indexes' if i - s = o.lexpr - i = o.rexpr - s, i = i, s if s.kind_of? Reg - raise otok, "mrm: bad scale #{s}" unless [1, 2, 4, 8].include?(s) - elsif o.op == :+ - # recurse - walker[o.lexpr] - walker[o.rexpr] - else - # found (a part of) the immediate - imm = Expression[imm, :+, o] - end - else - # found (a part of) the immediate - imm = Expression[imm, :+, o] - end - } + # assigns the Regs in the expression to base or index field of the modrm + walker = lambda { |o| + case o + when nil + when Reg + if b + raise otok, 'mrm: too many regs' if i + i = o + s = 1 + else + b = o + end + when SimdReg + raise otok, 'mrm: too many regs' if i + i = o + s = 1 + when Expression + if o.op == :* and (o.rexpr.kind_of?(Reg) or o.lexpr.kind_of?(Reg)) + # scaled index + raise otok, 'mrm: too many indexes' if i + s = o.lexpr + i = o.rexpr + s, i = i, s if s.kind_of? Reg + raise otok, "mrm: bad scale #{s}" unless [1, 2, 4, 8].include?(s) + elsif o.op == :+ + # recurse + walker[o.lexpr] + walker[o.rexpr] + else + # found (a part of) the immediate + imm = Expression[imm, :+, o] + end + else + # found (a part of) the immediate + imm = Expression[imm, :+, o] + end + } - # do it - walker[regify[content.reduce]] + # do it + walker[regify[content.reduce]] - # ensure found immediate is really an immediate - raise otok, 'mrm: reg in imm' if imm.kind_of? Expression and not imm.externals.grep(Reg).empty? + # ensure found immediate is really an immediate + raise otok, 'mrm: reg in imm' if imm.kind_of?(Expression) and not imm.externals.grep(Reg).empty? - # find default address size - adsz = b ? b.sz : i ? i.sz : nil - # ptsz may be nil now, will be fixed up later (in parse_instr_fixup) to match another instruction argument's size - new adsz, ptsz, s, i, b, imm, seg - end + raise otok, 'mrm: bad reg size' if b.kind_of?(Reg) and i.kind_of?(Reg) and b.sz != i.sz + + # find default address size + adsz = b ? b.sz : i ? i.sz : nil + # ptsz may be nil now, will be fixed up later (in parse_instr_fixup) to match another instruction argument's size + new adsz, ptsz, s, i, b, imm, seg + end end - # handles cpu-specific parser instruction, falls back to Ancestor's version if unknown keyword - # XXX changing the cpu size in the middle of the code may have baaad effects... - def parse_parser_instruction(lexer, instr) - case instr.raw.downcase - when '.mode', '.bits' - lexer.skip_space - if tok = lexer.readtok and tok.type == :string and (tok.raw == '16' or tok.raw == '32') - @size = tok.raw.to_i - lexer.skip_space - raise instr, 'syntax error' if ntok = lexer.nexttok and ntok.type != :eol - else - raise instr, 'invalid cpu mode' - end - else super(lexer, instr) - end - end + # handles cpu-specific parser instruction, falls back to Ancestor's version if unknown keyword + # XXX changing the cpu size in the middle of the code may have baaad effects... + def parse_parser_instruction(lexer, instr) + case instr.raw.downcase + when '.mode', '.bits' + lexer.skip_space + if tok = lexer.readtok and tok.type == :string and (tok.raw == '16' or tok.raw == '32') + @size = tok.raw.to_i + lexer.skip_space + raise instr, 'syntax error' if ntok = lexer.nexttok and ntok.type != :eol + else + raise instr, 'invalid cpu mode' + end + else super(lexer, instr) + end + end - def parse_prefix(i, pfx) - # implicit 'true' return value when assignment occur - i.prefix ||= {} - case pfx - when 'lock'; i.prefix[:lock] = true - when 'rep'; i.prefix[:rep] = 'rep' - when 'repe', 'repz'; i.prefix[:rep] = 'repz' - when 'repne', 'repnz'; i.prefix[:rep] = 'repnz' - when 'code16'; i.prefix[:sz] = 16 - when 'code32'; i.prefix[:sz] = 32 - when 'hintjmp', 'ht'; i.prefix[:jmphint] = 'hintjmp' - when 'hintnojmp', 'hnt';i.prefix[:jmphint] = 'hintnojmp' - when /^seg_([c-g]s)$/; i.prefix[:seg] = SegReg.new(SegReg.s_to_i[$1]) - end - end + def parse_prefix(i, pfx) + # implicit 'true' return value when assignment occur + i.prefix ||= {} + case pfx + when 'lock'; i.prefix[:lock] = true + when 'rep'; i.prefix[:rep] = 'rep' + when 'repe', 'repz'; i.prefix[:rep] = 'repz' + when 'repne', 'repnz'; i.prefix[:rep] = 'repnz' + when 'code16'; i.prefix[:sz] = 16 + when 'code32'; i.prefix[:sz] = 32 + when 'hintjmp', 'ht'; i.prefix[:jmphint] = 'hintjmp' + when 'hintnojmp', 'hnt';i.prefix[:jmphint] = 'hintnojmp' + when /^seg_([c-g]s)$/; i.prefix[:seg] = SegReg.new(SegReg.s_to_i[$1]) + end + end - def parse_argregclasslist - [Reg, SimdReg, SegReg, DbgReg, TstReg, CtrlReg, FpReg] - end - def parse_modrm(lex, tok, cpu) - ModRM.parse(lex, tok, cpu) - end + def parse_argregclasslist + [Reg, SimdReg, SegReg, DbgReg, TstReg, CtrlReg, FpReg] + end + def parse_modrm(lex, tok, cpu) + ModRM.parse(lex, tok, cpu) + end - # parses an arbitrary ia32 instruction argument - def parse_argument(lexer) - lexer = AsmPreprocessor.new(lexer) if lexer.kind_of? String + # parses an arbitrary ia32 instruction argument + def parse_argument(lexer) + lexer = AsmPreprocessor.new(lexer) if lexer.kind_of? String - # reserved names (registers/segments etc) - @args_token ||= parse_argregclasslist.map { |a| a.s_to_i.keys }.flatten.inject({}) { |h, e| h.update e => true } + # reserved names (registers/segments etc) + @args_token ||= parse_argregclasslist.map { |a| a.s_to_i.keys }.flatten.inject({}) { |h, e| h.update e => true } - lexer.skip_space - return if not tok = lexer.readtok + lexer.skip_space + return if not tok = lexer.readtok - if tok.type == :string and tok.raw == 'ST' - lexer.skip_space - if ntok = lexer.readtok and ntok.type == :punct and ntok.raw == '(' - lexer.skip_space - if not nntok = lexer.readtok or nntok.type != :string or nntok.raw !~ /^[0-9]$/ or - not ntok = (lexer.skip_space; lexer.readtok) or ntok.type != :punct or ntok.raw != ')' - raise tok, 'invalid FP register' - else - tok.raw << '(' << nntok.raw << ')' - fpr = parse_argregclasslist.last - if fpr.s_to_i.has_key? tok.raw - return fpr.new(fpr.s_to_i[tok.raw]) - else - raise tok, 'invalid FP register' - end - end - else - lexer.unreadtok ntok - end - end + if tok.type == :string and tok.raw == 'ST' + lexer.skip_space + if ntok = lexer.readtok and ntok.type == :punct and ntok.raw == '(' + lexer.skip_space + if not nntok = lexer.readtok or nntok.type != :string or nntok.raw !~ /^[0-9]$/ or + not ntok = (lexer.skip_space; lexer.readtok) or ntok.type != :punct or ntok.raw != ')' + raise tok, 'invalid FP register' + else + tok.raw << '(' << nntok.raw << ')' + fpr = parse_argregclasslist.last + if fpr.s_to_i.has_key? tok.raw + return fpr.new(fpr.s_to_i[tok.raw]) + else + raise tok, 'invalid FP register' + end + end + else + lexer.unreadtok ntok + end + end - if ret = parse_modrm(lexer, tok, self) - ret - elsif @args_token[tok.raw] - parse_argregclasslist.each { |a| - return a.from_str(tok.raw) if a.s_to_i.has_key? tok.raw - } - raise tok, 'internal error' - else - lexer.unreadtok tok - expr = Expression.parse(lexer) - lexer.skip_space + if ret = parse_modrm(lexer, tok, self) + ret + elsif @args_token[tok.raw] + parse_argregclasslist.each { |a| + return a.from_str(tok.raw) if a.s_to_i.has_key? tok.raw + } + raise tok, 'internal error' + else + lexer.unreadtok tok + expr = Expression.parse(lexer) + lexer.skip_space - # may be a farptr - if expr and ntok = lexer.readtok and ntok.type == :punct and ntok.raw == ':' - raise tok, 'invalid farptr' if not addr = Expression.parse(lexer) - Farptr.new expr, addr - else - lexer.unreadtok ntok - Expression[expr.reduce] if expr - end - end - end + # may be a farptr + if expr and ntok = lexer.readtok and ntok.type == :punct and ntok.raw == ':' + raise tok, 'invalid farptr' if not addr = Expression.parse(lexer) + Farptr.new expr, addr + else + lexer.unreadtok ntok + Expression[expr.reduce] if expr + end + end + end - # check if the argument matches the opcode's argument spec - def parse_arg_valid?(o, spec, arg) - if o.name == 'movsx' or o.name == 'movzx' - if not arg.kind_of?(Reg) and not arg.kind_of?(ModRM) - return - elsif not arg.sz - puts "ambiguous arg size for indirection in #{o.name}" if $VERBOSE - return - elsif spec == :reg # reg=dst, modrm=src (smaller) - return (arg.kind_of?(Reg) and arg.sz >= 16) - elsif o.props[:argsz] - return arg.sz == o.props[:argsz] - else - return arg.sz == 16 - end - elsif o.name == 'crc32' - if not arg.kind_of?(Reg) and not arg.kind_of?(ModRM) - return - elsif not arg.sz - puts "ambiguous arg size for indirection in #{o.name}" if $VERBOSE - return - elsif spec == :reg - return (arg.kind_of?(Reg) and arg.sz >= 32) - elsif o.props[:argsz] - return arg.sz == o.props[:argsz] - else - return arg.sz >= 16 - end - end + # check if the argument matches the opcode's argument spec + def parse_arg_valid?(o, spec, arg) + if o.name == 'movsx' or o.name == 'movzx' + if not arg.kind_of?(Reg) and not arg.kind_of?(ModRM) + return + elsif not arg.sz + puts "ambiguous arg size for indirection in #{o.name}" if $VERBOSE + return + elsif spec == :reg # reg=dst, modrm=src (smaller) + return (arg.kind_of?(Reg) and arg.sz >= 16) + elsif o.props[:argsz] + return arg.sz == o.props[:argsz] + else + return arg.sz == 16 + end + elsif o.name == 'crc32' + if not arg.kind_of?(Reg) and not arg.kind_of?(ModRM) + return + elsif not arg.sz + puts "ambiguous arg size for indirection in #{o.name}" if $VERBOSE + return + elsif spec == :reg + return (arg.kind_of?(Reg) and arg.sz >= 32) + elsif o.props[:argsz] + return arg.sz == o.props[:argsz] + else + return arg.sz >= 16 + end + end - return false if arg.kind_of? ModRM and arg.adsz and o.props[:adsz] and arg.adsz != o.props[:adsz] + return false if arg.kind_of? ModRM and arg.adsz and o.props[:adsz] and arg.adsz != o.props[:adsz] - cond = true - if s = o.props[:argsz] and (arg.kind_of? Reg or arg.kind_of? ModRM) - cond = (!arg.sz or arg.sz == s or spec == :reg_dx) - end + cond = true + if s = o.props[:argsz] and (arg.kind_of? Reg or arg.kind_of? ModRM) + cond = (!arg.sz or arg.sz == s or spec == :reg_dx) + end - cond and - case spec - when :reg; arg.kind_of? Reg and (arg.sz >= 16 or o.props[:argsz]) - when :modrm; (arg.kind_of? ModRM or arg.kind_of? Reg) and (!arg.sz or arg.sz >= 16 or o.props[:argsz]) and (!o.props[:modrmA] or arg.kind_of? ModRM) and (!o.props[:modrmR] or arg.kind_of? Reg) - when :i; arg.kind_of? Expression - when :imm_val1; arg.kind_of? Expression and arg.reduce == 1 - when :imm_val3; arg.kind_of? Expression and arg.reduce == 3 - when :reg_eax; arg.kind_of? Reg and arg.val == 0 - when :reg_cl; arg.kind_of? Reg and arg.val == 1 and arg.sz == 8 - when :reg_dx; arg.kind_of? Reg and arg.val == 2 and arg.sz == 16 - when :seg3; arg.kind_of? SegReg - when :seg3A; arg.kind_of? SegReg and arg.val > 3 - when :seg2; arg.kind_of? SegReg and arg.val < 4 - when :seg2A; arg.kind_of? SegReg and arg.val < 4 and arg.val != 1 - when :eeec; arg.kind_of? CtrlReg - when :eeed; arg.kind_of? DbgReg - when :eeet; arg.kind_of? TstReg - when :mrm_imm; arg.kind_of? ModRM and not arg.s and not arg.i and not arg.b - when :farptr; arg.kind_of? Farptr - when :regfp; arg.kind_of? FpReg - when :regfp0; arg.kind_of? FpReg and (arg.val == nil or arg.val == 0) - when :modrmmmx; arg.kind_of? ModRM or (arg.kind_of? SimdReg and (arg.sz == 64 or (arg.sz == 128 and o.props[:xmmx]))) and (!o.props[:modrmA] or arg.kind_of? ModRM) and (!o.props[:modrmR] or arg.kind_of? SimdReg) - when :regmmx; arg.kind_of? SimdReg and (arg.sz == 64 or (arg.sz == 128 and o.props[:xmmx])) - when :modrmxmm; arg.kind_of? ModRM or (arg.kind_of? SimdReg and arg.sz == 128) and (!o.props[:modrmA] or arg.kind_of? ModRM) and (!o.props[:modrmR] or arg.kind_of? SimdReg) - when :regxmm; arg.kind_of? SimdReg and arg.sz == 128 - when :modrmymm; arg.kind_of? ModRM or (arg.kind_of? SimdReg and arg.sz == 256) and (!o.props[:modrmA] or arg.kind_of? ModRM) and (!o.props[:modrmR] or arg.kind_of? SimdReg) - when :regymm; arg.kind_of? SimdReg and arg.sz == 256 + cond and + case spec + when :reg; arg.kind_of? Reg and (arg.sz >= 16 or o.props[:argsz]) + when :modrm; (arg.kind_of? ModRM or arg.kind_of? Reg) and (!arg.sz or arg.sz >= 16 or o.props[:argsz]) and (!o.props[:modrmA] or arg.kind_of? ModRM) and (!o.props[:modrmR] or arg.kind_of? Reg) + when :i; arg.kind_of? Expression + when :imm_val1; arg.kind_of? Expression and arg.reduce == 1 + when :imm_val3; arg.kind_of? Expression and arg.reduce == 3 + when :reg_eax; arg.kind_of? Reg and arg.val == 0 + when :reg_cl; arg.kind_of? Reg and arg.val == 1 and arg.sz == 8 + when :reg_dx; arg.kind_of? Reg and arg.val == 2 and arg.sz == 16 + when :seg3; arg.kind_of? SegReg + when :seg3A; arg.kind_of? SegReg and arg.val > 3 + when :seg2; arg.kind_of? SegReg and arg.val < 4 + when :seg2A; arg.kind_of? SegReg and arg.val < 4 and arg.val != 1 + when :eeec; arg.kind_of? CtrlReg + when :eeed; arg.kind_of? DbgReg + when :eeet; arg.kind_of? TstReg + when :mrm_imm; arg.kind_of? ModRM and not arg.s and not arg.i and not arg.b + when :farptr; arg.kind_of? Farptr + when :regfp; arg.kind_of? FpReg + when :regfp0; arg.kind_of? FpReg and (arg.val == nil or arg.val == 0) + when :modrmmmx; arg.kind_of? ModRM or (arg.kind_of? SimdReg and (arg.sz == 64 or (arg.sz == 128 and o.props[:xmmx]))) and (!o.props[:modrmA] or arg.kind_of? ModRM) and (!o.props[:modrmR] or arg.kind_of? SimdReg) + when :regmmx; arg.kind_of? SimdReg and (arg.sz == 64 or (arg.sz == 128 and o.props[:xmmx])) + when :modrmxmm; arg.kind_of? ModRM or (arg.kind_of? SimdReg and arg.sz == 128) and (!o.props[:modrmA] or arg.kind_of? ModRM) and (!o.props[:modrmR] or arg.kind_of? SimdReg) + when :regxmm; arg.kind_of? SimdReg and arg.sz == 128 + when :modrmymm; arg.kind_of? ModRM or (arg.kind_of? SimdReg and arg.sz == 256) and (!o.props[:modrmA] or arg.kind_of? ModRM) and (!o.props[:modrmR] or arg.kind_of? SimdReg) + when :regymm; arg.kind_of? SimdReg and arg.sz == 256 - when :vexvreg; arg.kind_of? Reg and arg.sz == @size - when :vexvxmm, :i4xmm; arg.kind_of? SimdReg and arg.sz == 128 - when :vexvymm, :i4ymm; arg.kind_of? SimdReg and arg.sz == 256 + when :vexvreg; arg.kind_of? Reg and arg.sz == @size + when :vexvxmm, :i4xmm; arg.kind_of? SimdReg and arg.sz == 128 + when :vexvymm, :i4ymm; arg.kind_of? SimdReg and arg.sz == 256 - when :i8, :u8, :u16 - arg.kind_of? Expression and - (o.props[:setip] or Expression.in_range?(arg, spec) != false) # true or nil allowed - # jz 0x28282828 may fit in :i8 depending on instr addr - else raise EncodeError, "Internal error: unknown argument specification #{spec.inspect}" - end - end + when :i8, :u8, :u16 + arg.kind_of? Expression and + (o.props[:setip] or Expression.in_range?(arg, spec) != false) # true or nil allowed + # jz 0x28282828 may fit in :i8 depending on instr addr + else raise EncodeError, "Internal error: unknown argument specification #{spec.inspect}" + end + end - def parse_instruction_checkproto(i) - case i.opname - when 'imul' - if i.args.length == 2 and i.args.first.kind_of? Reg and i.args.last.kind_of? Expression - i.args.unshift i.args.first.dup - end - end - super(i) - end + def parse_instruction_checkproto(i) + case i.opname + when 'imul' + if i.args.length == 2 and i.args.first.kind_of? Reg and i.args.last.kind_of? Expression + i.args.unshift i.args.first.dup + end + end + super(i) + end - # fixup the sz of a modrm argument, defaults to other argument size or current cpu mode - def parse_instruction_fixup(i) - if m = i.args.grep(ModRM).first and not m.sz - if i.opname == 'movzx' or i.opname == 'movsx' - m.sz = 8 - else - if r = i.args.grep(Reg).first - m.sz = r.sz - elsif l = opcode_list_byname[i.opname].map { |o| o.props[:argsz] }.uniq and l.length == 1 and l.first - m.sz = l.first - else - # this is also the size of ctrlreg/dbgreg etc - # XXX fpu/simd ? - m.sz = i.prefix[:sz] || @size - end - end - end - if m and not m.adsz - if opcode_list_byname[i.opname].all? { |o| o.props[:adsz] } - m.adsz = opcode_list_byname[i.opname].first.props[:adsz] - else - m.adsz = i.prefix[:sz] || @size - end - end - end + # fixup the sz of a modrm argument, defaults to other argument size or current cpu mode + def parse_instruction_fixup(i) + if m = i.args.grep(ModRM).first and not m.sz + if i.opname == 'movzx' or i.opname == 'movsx' + m.sz = 8 + else + if r = i.args.grep(Reg).first + m.sz = r.sz + elsif l = opcode_list_byname[i.opname].map { |o| o.props[:argsz] }.uniq and l.length == 1 and l.first + m.sz = l.first + else + # this is also the size of ctrlreg/dbgreg etc + # XXX fpu/simd ? + m.sz = i.prefix[:sz] || @size + end + end + end + if m and not m.adsz + if opcode_list_byname[i.opname].all? { |o| o.props[:adsz] } + m.adsz = opcode_list_byname[i.opname].first.props[:adsz] + else + m.adsz = i.prefix[:sz] || @size + end + end + end - def check_reserved_name(name) - Reg.s_to_i[name] - end + def check_reserved_name(name) + Reg.s_to_i[name] + end - def instr_uncond_jump_to(target) - parse_instruction("jmp #{target}") - end + def instr_uncond_jump_to(target) + parse_instruction("jmp #{target}") + end end end diff --git a/lib/metasm/metasm/cpu/ia32/render.rb b/lib/metasm/metasm/cpu/ia32/render.rb index 2223819fdf..0533317a5f 100644 --- a/lib/metasm/metasm/cpu/ia32/render.rb +++ b/lib/metasm/metasm/cpu/ia32/render.rb @@ -10,109 +10,109 @@ require 'metasm/render' # XXX move context in another file ? module Metasm class Ia32 - class Argument - include Renderable - end + class Argument + include Renderable + end - [SegReg, DbgReg, TstReg, CtrlReg, FpReg].each { |c| c.class_eval { - def render ; [self.class.i_to_s[@val]] end - } } - [Reg, SimdReg].each { |c| c.class_eval { - def render ; [self.class.i_to_s[@sz][@val]] end - def context ; {'set sz' => lambda { |s| @sz = s }} end - } } + [SegReg, DbgReg, TstReg, CtrlReg, FpReg].each { |c| c.class_eval { + def render ; [self.class.i_to_s[@val]] end + } } + [Reg, SimdReg].each { |c| c.class_eval { + def render ; [self.class.i_to_s[@sz][@val]] end + def context ; {'set sz' => lambda { |s| @sz = s }} end + } } - class Farptr - def render - [@seg, ':', @addr] - end - end + class Farptr + def render + [@seg, ':', @addr] + end + end - class ModRM - def qualifier(sz) - { - 8 => 'byte', - 16 => 'word', - 32 => 'dword', - 64 => 'qword', - 128 => 'oword' - }.fetch(sz) { |k| "_#{sz}bits" } - end + class ModRM + def qualifier(sz) + { + 8 => 'byte', + 16 => 'word', + 32 => 'dword', + 64 => 'qword', + 128 => 'oword' + }.fetch(sz) { |k| "_#{sz}bits" } + end - attr_accessor :instruction - def render - r = [] - r << ( qualifier(@sz) << ' ptr ' ) if @sz and (not instruction or not @instruction.args.find { |a| a.kind_of? Reg and a.sz == @sz }) - r << @seg << ':' if seg + attr_accessor :instruction + def render + r = [] + r << ( qualifier(@sz) << ' ptr ' ) if @sz and (not instruction or not @instruction.args.find { |a| a.kind_of? Reg and a.sz == @sz }) + r << @seg << ':' if seg - e = nil - e = Expression[e, :+, @b] if b - e = Expression[e, :+, @imm] if imm - e = Expression[e, :+, (@s == 1 ? @i : [@s, :*, @i])] if s - r << '[' << e << ']' - end + e = nil + e = Expression[e, :+, @b] if b + e = Expression[e, :+, @imm] if imm + e = Expression[e, :+, (@s == 1 ? @i : [@s, :*, @i])] if s + r << '[' << e << ']' + end - def context - {'set targetsz' => lambda { |s| @sz = s }, - 'set seg' => lambda { |s| @seg = Seg.new s }} - end - end + def context + {'set targetsz' => lambda { |s| @sz = s }, + 'set seg' => lambda { |s| @seg = Seg.new s }} + end + end - def render_instruction(i) - r = [] - if pfx = i.prefix - r << 'lock ' if pfx[:lock] - r << pfx[:rep] << ' ' if pfx[:rep] - r << pfx[:jmphint] << ' ' if pfx[:jmphint] - r << 'seg_' << pfx[:seg] << ' ' if pfx[:seg] - end - r << i.opname - sep = ' ' - i.args.each { |a| - a.instruction = i if a.kind_of? ModRM - r << sep << a - sep = ', ' - } - r - end + def render_instruction(i) + r = [] + if pfx = i.prefix + r << 'lock ' if pfx[:lock] + r << pfx[:rep] << ' ' if pfx[:rep] + r << pfx[:jmphint] << ' ' if pfx[:jmphint] + r << 'seg_' << pfx[:seg] << ' ' if pfx[:seg] + end + r << i.opname + sep = ' ' + i.args.each { |a| + a.instruction = i if a.kind_of? ModRM + r << sep << a + sep = ', ' + } + r + end - def instruction_context(i) - # XXX - h = {} - op = opcode_list_byname[i.opname].first - if i.prefix and i.prefix[:rep] - h['toogle repz'] = lambda { i.prefix[:rep] = {'repnz' => 'repz', 'repz' => 'repnz'}[i.prefix[:rep]] } if op.props[:stropz] - h['rm rep'] = lambda { i.prefix.delete :rep } - else - h['set rep'] = lambda { (i.prefix ||= {})[:rep] = 'rep' } if op.props[:strop] - h['set rep'] = lambda { (i.prefix ||= {})[:rep] = 'repz' } if op.props[:stropz] - end - if i.args.find { |a| a.kind_of? ModRM and a.seg } - h['rm seg'] = lambda { i.args.find { |a| a.kind_of? ModRM and a.seg }.seg = nil } - end - h['toggle lock'] = lambda { (i.prefix ||= {})[:lock] = !i.prefix[:lock] } - h - end + def instruction_context(i) + # XXX + h = {} + op = opcode_list_byname[i.opname].first + if i.prefix and i.prefix[:rep] + h['toogle repz'] = lambda { i.prefix[:rep] = {'repnz' => 'repz', 'repz' => 'repnz'}[i.prefix[:rep]] } if op.props[:stropz] + h['rm rep'] = lambda { i.prefix.delete :rep } + else + h['set rep'] = lambda { (i.prefix ||= {})[:rep] = 'rep' } if op.props[:strop] + h['set rep'] = lambda { (i.prefix ||= {})[:rep] = 'repz' } if op.props[:stropz] + end + if i.args.find { |a| a.kind_of? ModRM and a.seg } + h['rm seg'] = lambda { i.args.find { |a| a.kind_of? ModRM and a.seg }.seg = nil } + end + h['toggle lock'] = lambda { (i.prefix ||= {})[:lock] = !i.prefix[:lock] } + h + end - def gui_hilight_word_regexp_init - ret = {} + def gui_hilight_word_regexp_init + ret = {} - %w[a b c d].each { |r| - ret["#{r}l"] = "e?#{r}x|#{r}l" - ret["#{r}h"] = "e?#{r}x|#{r}h" - ret["#{r}x"] = ret["e#{r}x"] = "e?#{r}x|#{r}[hl]" - } + %w[a b c d].each { |r| + ret["#{r}l"] = "e?#{r}x|#{r}l" + ret["#{r}h"] = "e?#{r}x|#{r}h" + ret["#{r}x"] = ret["e#{r}x"] = "e?#{r}x|#{r}[hl]" + } - %w[sp bp si di].each { |r| - ret[r] = ret["e#{r}"] = "e?#{r}" - } + %w[sp bp si di].each { |r| + ret[r] = ret["e#{r}"] = "e?#{r}" + } - ret - end + ret + end - def gui_hilight_word_regexp(word) - @gui_hilight_word_hash ||= gui_hilight_word_regexp_init - @gui_hilight_word_hash[word] or super(word) - end + def gui_hilight_word_regexp(word) + @gui_hilight_word_hash ||= gui_hilight_word_regexp_init + @gui_hilight_word_hash[word] or super(word) + end end end diff --git a/lib/metasm/metasm/cpu/mips/debug.rb b/lib/metasm/metasm/cpu/mips/debug.rb index 05e39c0b02..3ab310e8ed 100644 --- a/lib/metasm/metasm/cpu/mips/debug.rb +++ b/lib/metasm/metasm/cpu/mips/debug.rb @@ -8,35 +8,35 @@ require 'metasm/main' module Metasm class MIPS - def dbg_register_pc - @dbg_register_pc ||= :pc - end - def dbg_register_flags - @dbg_register_flags ||= :flags - end + def dbg_register_pc + @dbg_register_pc ||= :pc + end + def dbg_register_flags + @dbg_register_flags ||= :flags + end - def dbg_register_list - @dbg_register_list ||= %w[z0 at v0 v1 a0 a1 a2 a3 - t0 t1 t2 t3 t4 t5 t6 t7 - s0 s1 s2 s3 s4 s5 s6 s7 - t8 t9 k0 k1 gp sp fp ra - sr mullo mulhi badva cause pc].map { |r| r.to_sym } - end + def dbg_register_list + @dbg_register_list ||= %w[z0 at v0 v1 a0 a1 a2 a3 + t0 t1 t2 t3 t4 t5 t6 t7 + s0 s1 s2 s3 s4 s5 s6 s7 + t8 t9 k0 k1 gp sp fp ra + sr mullo mulhi badva cause pc].map { |r| r.to_sym } + end - def dbg_flag_list - @dbg_flag_list ||= [] - end + def dbg_flag_list + @dbg_flag_list ||= [] + end - def dbg_register_size - @dbg_register_size ||= Hash.new(@size) - end + def dbg_register_size + @dbg_register_size ||= Hash.new(@size) + end - def dbg_need_stepover(dbg, addr, di) - di and di.opcode.props[:saveip] - end + def dbg_need_stepover(dbg, addr, di) + di and di.opcode.props[:saveip] + end - def dbg_end_stepout(dbg, addr, di) - di and di.opcode.name == 'foobar' # TODO - end + def dbg_end_stepout(dbg, addr, di) + di and di.opcode.name == 'foobar' # TODO + end end end diff --git a/lib/metasm/metasm/cpu/mips/decode.rb b/lib/metasm/metasm/cpu/mips/decode.rb index 6b61555bbb..b5703cec25 100644 --- a/lib/metasm/metasm/cpu/mips/decode.rb +++ b/lib/metasm/metasm/cpu/mips/decode.rb @@ -9,276 +9,276 @@ require 'metasm/decode' module Metasm class MIPS - def build_opcode_bin_mask(op) - # bit = 0 if can be mutated by an field value, 1 if fixed by opcode - op.bin_mask = 0 - op.args.each { |f| - op.bin_mask |= @fields_mask[f] << @fields_shift[f] - } - op.bin_mask = 0xffffffff ^ op.bin_mask - end + def build_opcode_bin_mask(op) + # bit = 0 if can be mutated by an field value, 1 if fixed by opcode + op.bin_mask = 0 + op.args.each { |f| + op.bin_mask |= @fields_mask[f] << @fields_shift[f] + } + op.bin_mask = 0xffffffff ^ op.bin_mask + end - def build_bin_lookaside - lookaside = Array.new(256) { [] } - opcode_list.each { |op| - build_opcode_bin_mask op + def build_bin_lookaside + lookaside = Array.new(256) { [] } + opcode_list.each { |op| + build_opcode_bin_mask op - b = op.bin >> 24 - msk = op.bin_mask >> 24 + b = op.bin >> 24 + msk = op.bin_mask >> 24 - for i in b..(b | (255^msk)) - next if i & msk != b & msk - lookaside[i] << op - end - } - lookaside - end + for i in b..(b | (255^msk)) + next if i & msk != b & msk + lookaside[i] << op + end + } + lookaside + end - def decode_findopcode(edata) - di = DecodedInstruction.new(self) - val = edata.decode_imm(:u32, @endianness) - edata.ptr -= 4 - if val.kind_of?(Expression) - # relocations - hval = Expression[val, :&, 0xff000000].reduce - if hval.kind_of?(Expression) - # reloc_i26 - if hval.kind_of?(Expression) and pat = hval.match(Expression[['a', :&, 0x300_0000], :|, 'b'], 'a', 'b') - hval = pat['b'] - end - end - di if di.opcode = @bin_lookaside[hval >> 24].find { |op| - (op.bin & op.bin_mask) == Expression[val, :&, op.bin_mask].reduce - } - else - di if di.opcode = @bin_lookaside[val >> 24].find { |op| - (op.bin & op.bin_mask) == (val & op.bin_mask) - } - end - end + def decode_findopcode(edata) + di = DecodedInstruction.new(self) + val = edata.decode_imm(:u32, @endianness) + edata.ptr -= 4 + if val.kind_of?(Expression) + # relocations + hval = Expression[val, :&, 0xff000000].reduce + if hval.kind_of?(Expression) + # reloc_i26 + if hval.kind_of?(Expression) and pat = hval.match(Expression[['a', :&, 0x300_0000], :|, 'b'], 'a', 'b') + hval = pat['b'] + end + end + di if di.opcode = @bin_lookaside[hval >> 24].find { |op| + (op.bin & op.bin_mask) == Expression[val, :&, op.bin_mask].reduce + } + else + di if di.opcode = @bin_lookaside[val >> 24].find { |op| + (op.bin & op.bin_mask) == (val & op.bin_mask) + } + end + end - def decode_instr_op(edata, di) - before_ptr = edata.ptr - op = di.opcode - di.instruction.opname = op.name - val = edata.decode_imm(:u32, @endianness) + def decode_instr_op(edata, di) + before_ptr = edata.ptr + op = di.opcode + di.instruction.opname = op.name + val = edata.decode_imm(:u32, @endianness) - field_val = lambda { |f| - if val.kind_of?(Expression) - r = Expression[[val, :>>, @fields_shift[f]], :&, @fields_mask[f]].reduce - else - r = (val >> @fields_shift[f]) & @fields_mask[f] - end + field_val = lambda { |f| + if val.kind_of?(Expression) + r = Expression[[val, :>>, @fields_shift[f]], :&, @fields_mask[f]].reduce + else + r = (val >> @fields_shift[f]) & @fields_mask[f] + end - next r if r.kind_of?(Expression) - case f - when :msbd; r += 1 - when :i16; r = Expression.make_signed(r, 16) - when :i20; r = Expression.make_signed(r, 20) - when :i26; r = Expression.make_signed(r, 26) - else r - end - } + next r if r.kind_of?(Expression) + case f + when :msbd; r += 1 + when :i16; r = Expression.make_signed(r, 16) + when :i20; r = Expression.make_signed(r, 20) + when :i26; r = Expression.make_signed(r, 26) + else r + end + } - op.args.each { |a| - di.instruction.args << case a - when :rs, :rt, :rd; Reg.new field_val[a] - when :sa, :i16, :i20, :i26, :it, :msbd, :sel, :idb; Expression[field_val[a]] - when :rs_i16 - len = 32 - len = 64 if op.props[:m64] - len = 16 if op.props[:mi16] or op.props[:mu16] - len = 8 if op.props[:mi8 ] or op.props[:mu8] - Memref.new Reg.new(field_val[:rs]), Expression[field_val[:i16]], len - when :ft; FpReg.new field_val[a] - when :idm1; Expression['unsupported'] - else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" - end - } + op.args.each { |a| + di.instruction.args << case a + when :rs, :rt, :rd; Reg.new field_val[a] + when :sa, :i16, :i20, :i26, :it, :msbd, :sel, :idb; Expression[field_val[a]] + when :rs_i16 + len = 32 + len = 64 if op.props[:m64] + len = 16 if op.props[:mi16] or op.props[:mu16] + len = 8 if op.props[:mi8 ] or op.props[:mu8] + Memref.new Reg.new(field_val[:rs]), Expression[field_val[:i16]], len + when :ft; FpReg.new field_val[a] + when :idm1; Expression['unsupported'] + else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" + end + } - di.bin_length += edata.ptr - before_ptr + di.bin_length += edata.ptr - before_ptr - return false if edata.ptr > edata.length + return false if edata.ptr > edata.length - di - end + di + end - # converts relative branch offsets to absolute addresses - # else just add the offset +off+ of the instruction + its length (off may be an Expression) - # assumes edata.ptr points just after the instruction (as decode_instr_op left it) - # do not call twice on the same di ! - def decode_instr_interpret(di, addr) - if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.opcode.name[0] != ?t - delta = Expression[di.instruction.args.last, :<<, 2].reduce - if di.opcode.args.include? :i26 - # absolute jump in the 0x3ff_ffff region surrounding next_pc - if delta.kind_of? Expression and delta.op == :& and delta.rexpr == 0x3ff_fffc - # relocated arg: assume the linker mapped so that instr&target are in the same region - arg = Expression[delta.lexpr].reduce - else - arg = Expression[[[addr, :+, di.bin_length], :&, 0xfc00_0000], :+, delta].reduce - end - else - arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce - end - di.instruction.args[-1] = Expression[arg] - end + # converts relative branch offsets to absolute addresses + # else just add the offset +off+ of the instruction + its length (off may be an Expression) + # assumes edata.ptr points just after the instruction (as decode_instr_op left it) + # do not call twice on the same di ! + def decode_instr_interpret(di, addr) + if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.opcode.name[0] != ?t + delta = Expression[di.instruction.args.last, :<<, 2].reduce + if di.opcode.args.include? :i26 + # absolute jump in the 0x3ff_ffff region surrounding next_pc + if delta.kind_of? Expression and delta.op == :& and delta.rexpr == 0x3ff_fffc + # relocated arg: assume the linker mapped so that instr&target are in the same region + arg = Expression[delta.lexpr].reduce + else + arg = Expression[[[addr, :+, di.bin_length], :&, 0xfc00_0000], :+, delta].reduce + end + else + arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce + end + di.instruction.args[-1] = Expression[arg] + end - di - end + di + end - # hash opname => lambda { |di, *sym_args| binding } - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end - def backtrace_binding=(b) @backtrace_binding = b end + # hash opname => lambda { |di, *sym_args| binding } + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end + def backtrace_binding=(b) @backtrace_binding = b end - def init_backtrace_binding - @backtrace_binding ||= {} - opcode_list.map { |ol| ol.name }.uniq.each { |op| - binding = case op - when 'break' - when 'bltzal', 'bgezal'; lambda { |di, *a| - # XXX $ra is set only if branch is taken... - { :$ra => Expression[Expression[di.address, :+, 2*di.bin_length].reduce] } - } - when 'nop', 'j', 'jr', /^b/; lambda { |di, *a| {} } - when 'lui'; lambda { |di, a0, a1| { a0 => Expression[[a1, :&, 0xffff], :<<, 16] } } - when 'add', 'addu', 'addi', 'addiu'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :+, a2] } } # XXX addiu $sp, -40h should be addiu $sp, 0xffc0 from the books, but.. - when 'sub', 'subu'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :-, a2] } } - when 'slt', 'slti'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<, a2] } } - when 'and', 'andi'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :&, a2] } } - when 'or', 'ori'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :|, a2] } } - when 'nor'; lambda { |di, a0, a1, a2| { a0 => Expression[:~, [a1, :|, a2]] } } - when 'xor'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :^, a2] } } - when 'sll', 'sllv'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :>>, a2] } } - when 'srl', 'srlv', 'sra', 'srav'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<<, a2] } } # XXX sign-extend - when 'lw'; lambda { |di, a0, a1| { a0 => Expression[a1] } } - when 'sw'; lambda { |di, a0, a1| { a1 => Expression[a0] } } - when 'lh', 'lhu'; lambda { |di, a0, a1| { a0 => Expression[a1] } } # XXX sign-extend - when 'sh'; lambda { |di, a0, a1| { a1 => Expression[a0] } } - when 'lb', 'lbu'; lambda { |di, a0, a1| { a0 => Expression[a1] } } - when 'sb'; lambda { |di, a0, a1| { a1 => Expression[a0] } } - when /^slti?u?/; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<, a2] } } # XXX signedness - when 'mfhi'; lambda { |di, a0| { a0 => Expression[:hi] } } - when 'mflo'; lambda { |di, a0| { a0 => Expression[:lo] } } - when 'mult'; lambda { |di, a0, a1| { :hi => Expression[[a0, :*, a1], :>>, 32], :lo => Expression[[a0, :*, a1], :&, 0xffff_ffff] } } - when 'div'; lambda { |di, a0, a1| { :hi => Expression[a0, :%, a1], :lo => Expression[a0, :/, a1] } } - when 'jal', 'jalr'; lambda { |di, a0| { :$ra => Expression[Expression[di.address, :+, 2*di.bin_length].reduce] } } - when 'li', 'mov'; lambda { |di, a0, a1| { a0 => Expression[a1] } } - when 'syscall'; lambda { |di, *a| { :$v0 => Expression::Unknown } } - end + def init_backtrace_binding + @backtrace_binding ||= {} + opcode_list.map { |ol| ol.name }.uniq.each { |op| + binding = case op + when 'break' + when 'bltzal', 'bgezal'; lambda { |di, *a| + # XXX $ra is set only if branch is taken... + { :$ra => Expression[Expression[di.address, :+, 2*di.bin_length].reduce] } + } + when 'nop', 'j', 'jr', /^b/; lambda { |di, *a| {} } + when 'lui'; lambda { |di, a0, a1| { a0 => Expression[[a1, :&, 0xffff], :<<, 16] } } + when 'add', 'addu', 'addi', 'addiu'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :+, a2] } } # XXX addiu $sp, -40h should be addiu $sp, 0xffc0 from the books, but.. + when 'sub', 'subu'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :-, a2] } } + when 'slt', 'slti'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<, a2] } } + when 'and', 'andi'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :&, a2] } } + when 'or', 'ori'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :|, a2] } } + when 'nor'; lambda { |di, a0, a1, a2| { a0 => Expression[:~, [a1, :|, a2]] } } + when 'xor'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :^, a2] } } + when 'sll', 'sllv'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :>>, a2] } } + when 'srl', 'srlv', 'sra', 'srav'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<<, a2] } } # XXX sign-extend + when 'lw'; lambda { |di, a0, a1| { a0 => Expression[a1] } } + when 'sw'; lambda { |di, a0, a1| { a1 => Expression[a0] } } + when 'lh', 'lhu'; lambda { |di, a0, a1| { a0 => Expression[a1] } } # XXX sign-extend + when 'sh'; lambda { |di, a0, a1| { a1 => Expression[a0] } } + when 'lb', 'lbu'; lambda { |di, a0, a1| { a0 => Expression[a1] } } + when 'sb'; lambda { |di, a0, a1| { a1 => Expression[a0] } } + when /^slti?u?/; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<, a2] } } # XXX signedness + when 'mfhi'; lambda { |di, a0| { a0 => Expression[:hi] } } + when 'mflo'; lambda { |di, a0| { a0 => Expression[:lo] } } + when 'mult'; lambda { |di, a0, a1| { :hi => Expression[[a0, :*, a1], :>>, 32], :lo => Expression[[a0, :*, a1], :&, 0xffff_ffff] } } + when 'div'; lambda { |di, a0, a1| { :hi => Expression[a0, :%, a1], :lo => Expression[a0, :/, a1] } } + when 'jal', 'jalr'; lambda { |di, a0| { :$ra => Expression[Expression[di.address, :+, 2*di.bin_length].reduce] } } + when 'li', 'mov'; lambda { |di, a0, a1| { a0 => Expression[a1] } } + when 'syscall'; lambda { |di, *a| { :$v0 => Expression::Unknown } } + end - @backtrace_binding[op] ||= binding if binding - } - @backtrace_binding - end + @backtrace_binding[op] ||= binding if binding + } + @backtrace_binding + end - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when Memref; arg.symbolic(di.address) - when Reg; arg.symbolic - else arg - end - } + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when Memref; arg.symbolic(di.address) + when Reg; arg.symbolic + else arg + end + } - binding = if binding = backtrace_binding[di.instruction.opname] - binding[di, *a] - else - if di.instruction.opname[0] == ?b and di.opcode.props[:setip] - else - puts "unknown instruction to emu #{di}" if $VERBOSE - end - {} - end + binding = if binding = backtrace_binding[di.instruction.opname] + binding[di, *a] + else + if di.instruction.opname[0] == ?b and di.opcode.props[:setip] + else + puts "unknown instruction to emu #{di}" if $VERBOSE + end + {} + end - binding.delete 0 # allow add $zero, 42 => nop + binding.delete 0 # allow add $zero, 42 => nop - binding - end + binding + end - def get_xrefs_x(dasm, di) - return [] if not di.opcode.props[:setip] + def get_xrefs_x(dasm, di) + return [] if not di.opcode.props[:setip] - arg = di.instruction.args.last - [Expression[ - case arg - when Memref; Indirection[[arg.base.to_s.to_sym, :+, arg.offset], @size/8, di.address] - when Reg; arg.to_s.to_sym - else arg - end]] - end + arg = di.instruction.args.last + [Expression[ + case arg + when Memref; Indirection[[arg.base.to_s.to_sym, :+, arg.offset], @size/8, di.address] + when Reg; arg.to_s.to_sym + else arg + end]] + end - def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) - retaddrlist.map! { |retaddr| dasm.decoded[retaddr] ? dasm.decoded[retaddr].block.list.last.address : retaddr } if retaddrlist - b = f.backtrace_binding + def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) + retaddrlist.map! { |retaddr| dasm.decoded[retaddr] ? dasm.decoded[retaddr].block.list.last.address : retaddr } if retaddrlist + b = f.backtrace_binding - bt_val = lambda { |r| - next if not retaddrlist - bt = [] - b[r] = Expression::Unknown # break recursive dep - retaddrlist.each { |retaddr| - bt |= dasm.backtrace(Expression[r], retaddr, - :include_start => true, :snapshot_addr => faddr, :origin => retaddr) - } - b[r] = ((bt.length == 1) ? bt.first : Expression::Unknown) - } - wantregs = Reg.i_to_s.values if wantregs.empty? - wantregs.map { |r| r.to_sym }.each(&bt_val) + bt_val = lambda { |r| + next if not retaddrlist + bt = [] + b[r] = Expression::Unknown # break recursive dep + retaddrlist.each { |retaddr| + bt |= dasm.backtrace(Expression[r], retaddr, + :include_start => true, :snapshot_addr => faddr, :origin => retaddr) + } + b[r] = ((bt.length == 1) ? bt.first : Expression::Unknown) + } + wantregs = Reg.i_to_s.values if wantregs.empty? + wantregs.map { |r| r.to_sym }.each(&bt_val) - puts "update_func_bind: #{Expression[faddr]} has sp -> #{b[:$sp]}" if not Expression[b[:$sp], :-, :$sp].reduce.kind_of?(::Integer) if $VERBOSE - end + puts "update_func_bind: #{Expression[faddr]} has sp -> #{b[:$sp]}" if not Expression[b[:$sp], :-, :$sp].reduce.kind_of?(::Integer) if $VERBOSE + end - def backtrace_is_function_return(expr, di=nil) - expr.reduce_rec == :$ra - end + def backtrace_is_function_return(expr, di=nil) + expr.reduce_rec == :$ra + end - def backtrace_is_stack_address(expr) - Expression[expr].expr_externals.include? :$sp - end + def backtrace_is_stack_address(expr) + Expression[expr].expr_externals.include? :$sp + end - def replace_instr_arg_immediate(i, old, new) - i.args.map! { |a| - case a - when Expression; a == old ? new : Expression[a.bind(old => new).reduce] - when Memref - a.offset = (a.offset == old ? new : Expression[a.offset.bind(old => new).reduce]) if a.offset - a - else a - end - } - end + def replace_instr_arg_immediate(i, old, new) + i.args.map! { |a| + case a + when Expression; a == old ? new : Expression[a.bind(old => new).reduce] + when Memref + a.offset = (a.offset == old ? new : Expression[a.offset.bind(old => new).reduce]) if a.offset + a + else a + end + } + end - # make the target of the call know the value of $t9 (specified by the ABI) - # XXX hackish - def backtrace_found_result(dasm, di, expr, type, len) - if di.opcode.name == 'jalr' and di.instruction.args == [:$t9] - expr = dasm.normalize(expr) - (dasm.address_binding[expr] ||= {})[:$t9] ||= expr - end - end + # make the target of the call know the value of $t9 (specified by the ABI) + # XXX hackish + def backtrace_found_result(dasm, di, expr, type, len) + if di.opcode.name == 'jalr' and di.instruction.args == [:$t9] + expr = dasm.normalize(expr) + (dasm.address_binding[expr] ||= {})[:$t9] ||= expr + end + end - def delay_slot(di=nil) - # branch.*likely has no delay slot - # bltzal/bgezal are 'link', not 'likely', hence the check for -2 - (di and di.opcode.props[:setip] and (di.opcode.name[-1] != ?l or di.opcode.name[-2] == ?a)) ? 1 : 0 - end + def delay_slot(di=nil) + # branch.*likely has no delay slot + # bltzal/bgezal are 'link', not 'likely', hence the check for -2 + (di and di.opcode.props[:setip] and (di.opcode.name[-1] != ?l or di.opcode.name[-2] == ?a)) ? 1 : 0 + end - def disassembler_default_func - df = DecodedFunction.new - df.backtrace_binding = %w[v0 v1 a0 a1 a2 a3 t0 t1 t2 t3 t4 t5 t6 t7 t8 t9 at k0 k1].inject({}) { |h, r| h.update "$#{r}".to_sym => Expression::Unknown } - df.backtrace_binding.update %w[gp sp fp ra s0 s1 s2 s3 s4 s5 s6 s7].inject({}) { |h, r| h.update "$#{r}".to_sym => "$#{r}".to_sym } - df.backtracked_for = [BacktraceTrace.new(Expression[:$ra], :default, Expression[:$ra], :x)] - df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr| - if funcaddr != :default - btfor - elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip] and di.instruction.to_s != 'jr $ra' - btfor - else [] - end - } - df - end + def disassembler_default_func + df = DecodedFunction.new + df.backtrace_binding = %w[v0 v1 a0 a1 a2 a3 t0 t1 t2 t3 t4 t5 t6 t7 t8 t9 at k0 k1].inject({}) { |h, r| h.update "$#{r}".to_sym => Expression::Unknown } + df.backtrace_binding.update %w[gp sp fp ra s0 s1 s2 s3 s4 s5 s6 s7].inject({}) { |h, r| h.update "$#{r}".to_sym => "$#{r}".to_sym } + df.backtracked_for = [BacktraceTrace.new(Expression[:$ra], :default, Expression[:$ra], :x)] + df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr| + if funcaddr != :default + btfor + elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip] and di.instruction.to_s != 'jr $ra' + btfor + else [] + end + } + df + end end end diff --git a/lib/metasm/metasm/cpu/mips/encode.rb b/lib/metasm/metasm/cpu/mips/encode.rb index 23df0694ff..1360f0bdcb 100644 --- a/lib/metasm/metasm/cpu/mips/encode.rb +++ b/lib/metasm/metasm/cpu/mips/encode.rb @@ -9,44 +9,44 @@ require 'metasm/encode' module Metasm class MIPS - private - def encode_instr_op(exe, instr, op) - base = op.bin - set_field = lambda { |f, v| - base |= (v & @fields_mask[f]) << @fields_shift[f] - } + private + def encode_instr_op(exe, instr, op) + base = op.bin + set_field = lambda { |f, v| + base |= (v & @fields_mask[f]) << @fields_shift[f] + } - val, mask, shift = 0, 0, 0 + val, mask, shift = 0, 0, 0 - # convert label name for jmp/call/loop to relative offset - if op.props[:setip] and op.name[0] != ?t and instr.args.last.kind_of? Expression - postlabel = exe.new_label('jmp_offset') - instr = instr.dup - if op.args.include? :i26 - pl = Expression[postlabel, :&, 0xfc00_0000] - else - pl = postlabel - end - instr.args[-1] = Expression[[instr.args[-1], :-, pl], :>>, 2] - postdata = EncodedData.new '', :export => {postlabel => 0} - else - postdata = '' - end + # convert label name for jmp/call/loop to relative offset + if op.props[:setip] and op.name[0] != ?t and instr.args.last.kind_of? Expression + postlabel = exe.new_label('jmp_offset') + instr = instr.dup + if op.args.include? :i26 + pl = Expression[postlabel, :&, 0xfc00_0000] + else + pl = postlabel + end + instr.args[-1] = Expression[[instr.args[-1], :-, pl], :>>, 2] + postdata = EncodedData.new '', :export => {postlabel => 0} + else + postdata = '' + end - op.args.zip(instr.args).each { |sym, arg| - case sym - when :rs, :rt, :rd, :ft - set_field[sym, arg.i] - when :rs_i16 - set_field[:rs, arg.base.i] - val, mask, shift = arg.offset, @fields_mask[:i16], @fields_shift[:i16] - when :sa, :i16, :i20, :i26, :it, :msbd, :sel, :idb - val, mask, shift = arg, @fields_mask[sym], @fields_shift[sym] - val = Expression[val, :-, 1] if sym == :msbd - end - } + op.args.zip(instr.args).each { |sym, arg| + case sym + when :rs, :rt, :rd, :ft + set_field[sym, arg.i] + when :rs_i16 + set_field[:rs, arg.base.i] + val, mask, shift = arg.offset, @fields_mask[:i16], @fields_shift[:i16] + when :sa, :i16, :i20, :i26, :it, :msbd, :sel, :idb + val, mask, shift = arg, @fields_mask[sym], @fields_shift[sym] + val = Expression[val, :-, 1] if sym == :msbd + end + } - Expression[base, :|, [[val, :&, mask], :<<, shift]].encode(:u32, @endianness) << postdata - end + Expression[base, :|, [[val, :&, mask], :<<, shift]].encode(:u32, @endianness) << postdata + end end end diff --git a/lib/metasm/metasm/cpu/mips/main.rb b/lib/metasm/metasm/cpu/mips/main.rb index f1dc5311cc..984ab95269 100644 --- a/lib/metasm/metasm/cpu/mips/main.rb +++ b/lib/metasm/metasm/cpu/mips/main.rb @@ -8,72 +8,72 @@ require 'metasm/main' module Metasm class MIPS < CPU - class Reg - class << self - attr_accessor :s_to_i, :i_to_s - end - @s_to_i = {} - @i_to_s = {} - (0..31).each { |i| @s_to_i["r#{i}"] = @s_to_i["$r#{i}"] = @s_to_i["$#{i}"] = i } - %w[zero at v0 v1 a0 a1 a2 a3 - t0 t1 t2 t3 t4 t5 t6 t7 - s0 s1 s2 s3 s4 s5 s6 s7 - t8 t9 k0 k1 gp sp fp ra].each_with_index { |r, i| @s_to_i[r] = @s_to_i['$'+r] = i ; @i_to_s[i] = '$'+r } + class Reg + class << self + attr_accessor :s_to_i, :i_to_s + end + @s_to_i = {} + @i_to_s = {} + (0..31).each { |i| @s_to_i["r#{i}"] = @s_to_i["$r#{i}"] = @s_to_i["$#{i}"] = i } + %w[zero at v0 v1 a0 a1 a2 a3 + t0 t1 t2 t3 t4 t5 t6 t7 + s0 s1 s2 s3 s4 s5 s6 s7 + t8 t9 k0 k1 gp sp fp ra].each_with_index { |r, i| @s_to_i[r] = @s_to_i['$'+r] = i ; @i_to_s[i] = '$'+r } - attr_accessor :i - def initialize(i) - @i = i - end + attr_accessor :i + def initialize(i) + @i = i + end - Sym = @i_to_s.sort.map { |k, v| v.to_sym } - def symbolic ; @i == 0 ? 0 : Sym[@i] end - end + Sym = @i_to_s.sort.map { |k, v| v.to_sym } + def symbolic ; @i == 0 ? 0 : Sym[@i] end + end - class FpReg - class << self - attr_accessor :s_to_i, :i_to_s - end - @i_to_s = (0..31).map { |i| "$f#{i}" } - @s_to_i = (0..31).inject({}) { |h, i| h.update "f#{i}" => i, "$f#{i}" => i } + class FpReg + class << self + attr_accessor :s_to_i, :i_to_s + end + @i_to_s = (0..31).map { |i| "$f#{i}" } + @s_to_i = (0..31).inject({}) { |h, i| h.update "f#{i}" => i, "$f#{i}" => i } - attr_accessor :i - def initialize(i) - @i = i - end - end + attr_accessor :i + def initialize(i) + @i = i + end + end - class Memref - attr_accessor :base, :offset, :sz - def initialize(base, offset, sz=32) - @base, @offset, @sz = base, offset, sz - end + class Memref + attr_accessor :base, :offset, :sz + def initialize(base, offset, sz=32) + @base, @offset, @sz = base, offset, sz + end - def symbolic(orig) - p = nil - p = Expression[p, :+, @base.symbolic] if base - p = Expression[p, :+, @offset] if offset - Indirection[p.reduce, @sz/8, orig] - end - end + def symbolic(orig) + p = nil + p = Expression[p, :+, @base.symbolic] if base + p = Expression[p, :+, @offset] if offset + Indirection[p.reduce, @sz/8, orig] + end + end - def initialize(endianness = :big, family = :latest) - super() - @endianness = endianness - @size = 32 - @family = family - end + def initialize(endianness = :big, family = :latest) + super() + @endianness = endianness + @size = 32 + @family = family + end - def init_opcode_list - send("init_#@family") - @opcode_list - end + def init_opcode_list + send("init_#@family") + @opcode_list + end end class MIPS64 < MIPS - def initialize(endianness = :big, family = :latest) - super(endianness, family) - @size = 64 - end + def initialize(endianness = :big, family = :latest) + super(endianness, family) + @size = 64 + end end end diff --git a/lib/metasm/metasm/cpu/mips/opcodes.rb b/lib/metasm/metasm/cpu/mips/opcodes.rb index a447304425..7fed71be8b 100644 --- a/lib/metasm/metasm/cpu/mips/opcodes.rb +++ b/lib/metasm/metasm/cpu/mips/opcodes.rb @@ -11,502 +11,502 @@ require 'metasm/cpu/mips/main' module Metasm class MIPS - def addop(name, bin, *args) - o = Opcode.new name, bin - args.each { |a| - o.args << a if @fields_mask[a] - o.props[a] = true if @valid_props[a] - } - @opcode_list << o - end + def addop(name, bin, *args) + o = Opcode.new name, bin + args.each { |a| + o.args << a if @fields_mask[a] + o.props[a] = true if @valid_props[a] + } + @opcode_list << o + end - def init_mips32_obsolete - addop 'beql', 0b010100 << 26, :rt, :rs, :i16, :setip # == , exec delay slot only if jump taken - addop 'bnel', 0b010101 << 26, :rt, :rs, :i16, :setip # != - addop 'blezl',0b010110 << 26, :rt_z, :rs, :i16, :setip # <= 0 - addop 'bgtzl',0b010111 << 26, :rt_z, :rs, :i16, :setip # > 0 - addop 'bltzl',1 << 26 | 0b00010 << 16, :rs, :i16, :setip - addop 'bgezl',1 << 26 | 0b00011 << 16, :rs, :i16, :setip - addop 'bltzall', 1 << 26 | 0b10010 << 16, :rs, :i16, :setip - addop 'bgezall', 1 << 26 | 0b10011 << 16, :rs, :i16, :setip - end + def init_mips32_obsolete + addop 'beql', 0b010100 << 26, :rt, :rs, :i16, :setip # == , exec delay slot only if jump taken + addop 'bnel', 0b010101 << 26, :rt, :rs, :i16, :setip # != + addop 'blezl',0b010110 << 26, :rt_z, :rs, :i16, :setip # <= 0 + addop 'bgtzl',0b010111 << 26, :rt_z, :rs, :i16, :setip # > 0 + addop 'bltzl',1 << 26 | 0b00010 << 16, :rs, :i16, :setip + addop 'bgezl',1 << 26 | 0b00011 << 16, :rs, :i16, :setip + addop 'bltzall', 1 << 26 | 0b10010 << 16, :rs, :i16, :setip + addop 'bgezall', 1 << 26 | 0b10011 << 16, :rs, :i16, :setip + end - def init_mips32_reserved - addop 'future111011', 0b111011 << 26, :i26 + def init_mips32_reserved + addop 'future111011', 0b111011 << 26, :i26 - %w[011000 011001 011010 011011 100111 101100 101101 110100 110111 111100 111111].each { |b| - addop "reserved#{b}", b.to_i(2) << 26, :i26 - } + %w[011000 011001 011010 011011 100111 101100 101101 110100 110111 111100 111111].each { |b| + addop "reserved#{b}", b.to_i(2) << 26, :i26 + } - addop 'ase_jalx', 0b011101 << 26, :i26 - addop 'ase011110', 0b011110 << 26, :i26 - # TODO add all special/regimm/... - end + addop 'ase_jalx', 0b011101 << 26, :i26 + addop 'ase011110', 0b011110 << 26, :i26 + # TODO add all special/regimm/... + end - def init_mips32 - @opcode_list = [] - @fields_mask.update :rs => 0x1f, :rt => 0x1f, :rd => 0x1f, :sa => 0x1f, - :i16 => 0xffff, :i26 => 0x3ffffff, :rs_i16 => 0x3e0ffff, :it => 0x1f, - :ft => 0x1f, :idm1 => 0x1f, :idb => 0x1f, :sel => 7, :i20 => 0xfffff - @fields_shift.update :rs => 21, :rt => 16, :rd => 11, :sa => 6, - :i16 => 0, :i26 => 0, :rs_i16 => 0, :it => 16, - :ft => 16, :idm1 => 11, :idb => 11, :sel => 0, :i20 => 6 - @valid_props.update :mi8 => true, :mu8 => true, :mi16 => true, :mu16 => true + def init_mips32 + @opcode_list = [] + @fields_mask.update :rs => 0x1f, :rt => 0x1f, :rd => 0x1f, :sa => 0x1f, + :i16 => 0xffff, :i26 => 0x3ffffff, :rs_i16 => 0x3e0ffff, :it => 0x1f, + :ft => 0x1f, :idm1 => 0x1f, :idb => 0x1f, :sel => 7, :i20 => 0xfffff + @fields_shift.update :rs => 21, :rt => 16, :rd => 11, :sa => 6, + :i16 => 0, :i26 => 0, :rs_i16 => 0, :it => 16, + :ft => 16, :idm1 => 11, :idb => 11, :sel => 0, :i20 => 6 + @valid_props.update :mi8 => true, :mu8 => true, :mi16 => true, :mu16 => true - init_mips32_obsolete - init_mips32_reserved + init_mips32_obsolete + init_mips32_reserved - addop 'j', 0b000010 << 26, :i26, :setip, :stopexec # sets the program counter to (i26 << 2) | ((pc+4) & 0xfc000000) ie i26*4 in the 256M-aligned section containing the instruction in the delay slot - addop 'jal', 0b000011 << 26, :i26, :setip, :stopexec, :saveip # same thing, saves return addr in r31 + addop 'j', 0b000010 << 26, :i26, :setip, :stopexec # sets the program counter to (i26 << 2) | ((pc+4) & 0xfc000000) ie i26*4 in the 256M-aligned section containing the instruction in the delay slot + addop 'jal', 0b000011 << 26, :i26, :setip, :stopexec, :saveip # same thing, saves return addr in r31 - addop 'mov', 0b001000 << 26, :rt, :rs # rt <- rs+0 - addop 'addi', 0b001000 << 26, :rt, :rs, :i16 # add rt <- rs+i - addop 'li', 0b001001 << 26, :rt, :i16 # addiu rt <- zero+i - addop 'addiu',0b001001 << 26, :rt, :rs, :i16 # add unsigned - addop 'slti', 0b001010 << 26, :rt, :rs, :i16 # set on less than - addop 'sltiu',0b001011 << 26, :rt, :rs, :i16 # set on less than unsigned - addop 'andi', 0b001100 << 26, :rt, :rs, :i16 # and - addop 'ori', 0b001101 << 26, :rt, :rs, :i16 # or - addop 'xori', 0b001110 << 26, :rt, :rs, :i16 # xor - addop 'lui', 0b001111 << 26, :rt, :i16 # load upper + addop 'mov', 0b001000 << 26, :rt, :rs # rt <- rs+0 + addop 'addi', 0b001000 << 26, :rt, :rs, :i16 # add rt <- rs+i + addop 'li', 0b001001 << 26, :rt, :i16 # addiu rt <- zero+i + addop 'addiu',0b001001 << 26, :rt, :rs, :i16 # add unsigned + addop 'slti', 0b001010 << 26, :rt, :rs, :i16 # set on less than + addop 'sltiu',0b001011 << 26, :rt, :rs, :i16 # set on less than unsigned + addop 'andi', 0b001100 << 26, :rt, :rs, :i16 # and + addop 'ori', 0b001101 << 26, :rt, :rs, :i16 # or + addop 'xori', 0b001110 << 26, :rt, :rs, :i16 # xor + addop 'lui', 0b001111 << 26, :rt, :i16 # load upper # addop 'li', (0b001111 << 26) << 32 | (0b001101 << 26), :rt_64, :i32 # lui + ori - addop 'b', 0b000100 << 26, :i16, :setip, :stopexec # bz $zero - addop 'bz', 0b000100 << 26, :rs, :i16, :setip # == 0 (beq $0) - addop 'bz', 0b000100 << 26, :rt, :i16, :setip # == 0 - addop 'bnz', 0b000101 << 26, :rs, :i16, :setip # != 0 - addop 'bnz', 0b000101 << 26, :rt, :i16, :setip # != 0 + addop 'b', 0b000100 << 26, :i16, :setip, :stopexec # bz $zero + addop 'bz', 0b000100 << 26, :rs, :i16, :setip # == 0 (beq $0) + addop 'bz', 0b000100 << 26, :rt, :i16, :setip # == 0 + addop 'bnz', 0b000101 << 26, :rs, :i16, :setip # != 0 + addop 'bnz', 0b000101 << 26, :rt, :i16, :setip # != 0 - addop 'beq', 0b000100 << 26, :rt, :rs, :i16, :setip # == - addop 'bne', 0b000101 << 26, :rt, :rs, :i16, :setip # != - addop 'blez', 0b000110 << 26, :rs, :i16, :setip # <= 0 - addop 'bgtz', 0b000111 << 26, :rs, :i16, :setip # > 0 + addop 'beq', 0b000100 << 26, :rt, :rs, :i16, :setip # == + addop 'bne', 0b000101 << 26, :rt, :rs, :i16, :setip # != + addop 'blez', 0b000110 << 26, :rs, :i16, :setip # <= 0 + addop 'bgtz', 0b000111 << 26, :rs, :i16, :setip # > 0 - addop 'lb', 0b100000 << 26, :rt, :rs_i16, :mi8 # load byte rs <- [rt+i] - addop 'lh', 0b100001 << 26, :rt, :rs_i16, :mi16 # load halfword - addop 'lwl', 0b100010 << 26, :rt, :rs_i16 # load word left - addop 'lw', 0b100011 << 26, :rt, :rs_i16 # load word - addop 'lbu', 0b100100 << 26, :rt, :rs_i16, :mu8 # load byte unsigned - addop 'lhu', 0b100101 << 26, :rt, :rs_i16, :mu16 # load halfword unsigned - addop 'lwr', 0b100110 << 26, :rt, :rs_i16 # load word right + addop 'lb', 0b100000 << 26, :rt, :rs_i16, :mi8 # load byte rs <- [rt+i] + addop 'lh', 0b100001 << 26, :rt, :rs_i16, :mi16 # load halfword + addop 'lwl', 0b100010 << 26, :rt, :rs_i16 # load word left + addop 'lw', 0b100011 << 26, :rt, :rs_i16 # load word + addop 'lbu', 0b100100 << 26, :rt, :rs_i16, :mu8 # load byte unsigned + addop 'lhu', 0b100101 << 26, :rt, :rs_i16, :mu16 # load halfword unsigned + addop 'lwr', 0b100110 << 26, :rt, :rs_i16 # load word right - addop 'sb', 0b101000 << 26, :rt, :rs_i16, :mi8 # store byte - addop 'sh', 0b101001 << 26, :rt, :rs_i16, :mi16 # store halfword - addop 'swl', 0b101010 << 26, :rt, :rs_i16 # store word left - addop 'sw', 0b101011 << 26, :rt, :rs_i16 # store word - addop 'swr', 0b101110 << 26, :rt, :rs_i16 # store word right + addop 'sb', 0b101000 << 26, :rt, :rs_i16, :mi8 # store byte + addop 'sh', 0b101001 << 26, :rt, :rs_i16, :mi16 # store halfword + addop 'swl', 0b101010 << 26, :rt, :rs_i16 # store word left + addop 'sw', 0b101011 << 26, :rt, :rs_i16 # store word + addop 'swr', 0b101110 << 26, :rt, :rs_i16 # store word right - addop 'll', 0b110000 << 26, :rt, :rs_i16 # load linked word (read for atomic r/modify/w, sc does the w) - addop 'sc', 0b111000 << 26, :rt, :rs_i16 # store conditional word + addop 'll', 0b110000 << 26, :rt, :rs_i16 # load linked word (read for atomic r/modify/w, sc does the w) + addop 'sc', 0b111000 << 26, :rt, :rs_i16 # store conditional word - addop 'lwc1', 0b110001 << 26, :ft, :rs_i16 # load word in fpreg low - addop 'swc1', 0b111001 << 26, :ft, :rs_i16 # store low fpreg word - addop 'lwc2', 0b110010 << 26, :rt, :rs_i16 # load word to copro2 register low - addop 'swc2', 0b111010 << 26, :rt, :rs_i16 # store low coproc2 register + addop 'lwc1', 0b110001 << 26, :ft, :rs_i16 # load word in fpreg low + addop 'swc1', 0b111001 << 26, :ft, :rs_i16 # store low fpreg word + addop 'lwc2', 0b110010 << 26, :rt, :rs_i16 # load word to copro2 register low + addop 'swc2', 0b111010 << 26, :rt, :rs_i16 # store low coproc2 register - addop 'ldc1', 0b110101 << 26, :ft, :rs_i16 # load dword in fpreg low - addop 'sdc1', 0b111101 << 26, :ft, :rs_i16 # store fpreg - addop 'ldc2', 0b110110 << 26, :rt, :rs_i16 # load dword to copro2 register - addop 'sdc2', 0b111110 << 26, :rt, :rs_i16 # store coproc2 register + addop 'ldc1', 0b110101 << 26, :ft, :rs_i16 # load dword in fpreg low + addop 'sdc1', 0b111101 << 26, :ft, :rs_i16 # store fpreg + addop 'ldc2', 0b110110 << 26, :rt, :rs_i16 # load dword to copro2 register + addop 'sdc2', 0b111110 << 26, :rt, :rs_i16 # store coproc2 register - addop 'pref', 0b110011 << 26, :it, :rs_i16 # prefetch (it = %w[load store r2 r3 load_streamed store_streamed load_retained store_retained - # r8 r9 r10 r11 r12 r13 r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 writeback_invalidate - # id26 id27 id28 id29 prepare_for_store id31] - addop 'cache',0b101111 << 26, :it, :rs_i16 # do things with the proc cache + addop 'pref', 0b110011 << 26, :it, :rs_i16 # prefetch (it = %w[load store r2 r3 load_streamed store_streamed load_retained store_retained + # r8 r9 r10 r11 r12 r13 r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 writeback_invalidate + # id26 id27 id28 id29 prepare_for_store id31] + addop 'cache',0b101111 << 26, :it, :rs_i16 # do things with the proc cache - # special - addop 'nop', 0 - addop 'ssnop',1<<6 - addop 'ehb', 3<<6 - addop 'sll', 0b000000, :rd, :rt, :sa - addop 'movf', 0b000001, :rd, :rs, :cc - addop 'movt', 0b000001 | (1<<16), :rd, :rs, :cc - addop 'srl', 0b000010, :rd, :rt, :sa - addop 'sra', 0b000011, :rd, :rt, :sa - addop 'sllv', 0b000100, :rd, :rt, :rs - addop 'srlv', 0b000110, :rd, :rt, :rs - addop 'srav', 0b000111, :rd, :rt, :rs + # special + addop 'nop', 0 + addop 'ssnop',1<<6 + addop 'ehb', 3<<6 + addop 'sll', 0b000000, :rd, :rt, :sa + addop 'movf', 0b000001, :rd, :rs, :cc + addop 'movt', 0b000001 | (1<<16), :rd, :rs, :cc + addop 'srl', 0b000010, :rd, :rt, :sa + addop 'sra', 0b000011, :rd, :rt, :sa + addop 'sllv', 0b000100, :rd, :rt, :rs + addop 'srlv', 0b000110, :rd, :rt, :rs + addop 'srav', 0b000111, :rd, :rt, :rs - addop 'jr', 0b001000, :rs, :setip, :stopexec # hint field ? - addop 'jr.hb',0b001000 | (1<<10), :rs, :setip, :stopexec - addop 'jalr', 0b001001 | (31<<11), :rs, :setip, :stopexec, :saveip # rd = r31 implicit - addop 'jalr', 0b001001, :rd, :rs, :setip, :stopexec, :saveip - addop 'jalr.hb', 0b001001 | (1<<10) | (31<<11), :rs, :setip, :stopexec, :saveip - addop 'jalr.hb', 0b001001 | (1<<10), :rd, :rs, :setip, :stopexec, :saveip - addop 'movz', 0b001010, :rd, :rs, :rt # rt == 0 ? rd <- rs - addop 'movn', 0b001011, :rd, :rs, :rt - addop 'syscall', 0b001100, :i20 - addop 'break',0b001101, :i20, :stopexec - addop 'sync', 0b001111 # type 0 implicit - addop 'sync', 0b001111, :sa + addop 'jr', 0b001000, :rs, :setip, :stopexec # hint field ? + addop 'jr.hb',0b001000 | (1<<10), :rs, :setip, :stopexec + addop 'jalr', 0b001001 | (31<<11), :rs, :setip, :stopexec, :saveip # rd = r31 implicit + addop 'jalr', 0b001001, :rd, :rs, :setip, :stopexec, :saveip + addop 'jalr.hb', 0b001001 | (1<<10) | (31<<11), :rs, :setip, :stopexec, :saveip + addop 'jalr.hb', 0b001001 | (1<<10), :rd, :rs, :setip, :stopexec, :saveip + addop 'movz', 0b001010, :rd, :rs, :rt # rt == 0 ? rd <- rs + addop 'movn', 0b001011, :rd, :rs, :rt + addop 'syscall', 0b001100, :i20 + addop 'break',0b001101, :i20, :stopexec + addop 'sync', 0b001111 # type 0 implicit + addop 'sync', 0b001111, :sa - addop 'mfhi', 0b010000, :rd # copies special reg HI to reg - addop 'mthi', 0b010001, :rs # copies reg to special reg HI - addop 'mflo', 0b010010, :rd # copies special reg LO to reg - addop 'mtlo', 0b010011, :rs # copies reg to special reg LO + addop 'mfhi', 0b010000, :rd # copies special reg HI to reg + addop 'mthi', 0b010001, :rs # copies reg to special reg HI + addop 'mflo', 0b010010, :rd # copies special reg LO to reg + addop 'mtlo', 0b010011, :rs # copies reg to special reg LO - addop 'mult', 0b011000, :rs, :rt # multiplies the registers and store the result in HI:LO - addop 'multu',0b011001, :rs, :rt - addop 'div', 0b011010, :rs, :rt - addop 'divu', 0b011011, :rs, :rt - addop 'add', 0b100000, :rd, :rs, :rt - addop 'addu', 0b100001, :rd, :rs, :rt - addop 'sub', 0b100010, :rd, :rs, :rt - addop 'subu', 0b100011, :rd, :rs, :rt - addop 'and', 0b100100, :rd, :rs, :rt - addop 'or', 0b100101, :rd, :rs, :rt - addop 'xor', 0b100110, :rd, :rs, :rt - addop 'not', 0b100111, :rd, :rt # nor $0 - addop 'not', 0b100111, :rd, :rs - addop 'nor', 0b100111, :rd, :rs, :rt + addop 'mult', 0b011000, :rs, :rt # multiplies the registers and store the result in HI:LO + addop 'multu',0b011001, :rs, :rt + addop 'div', 0b011010, :rs, :rt + addop 'divu', 0b011011, :rs, :rt + addop 'add', 0b100000, :rd, :rs, :rt + addop 'addu', 0b100001, :rd, :rs, :rt + addop 'sub', 0b100010, :rd, :rs, :rt + addop 'subu', 0b100011, :rd, :rs, :rt + addop 'and', 0b100100, :rd, :rs, :rt + addop 'or', 0b100101, :rd, :rs, :rt + addop 'xor', 0b100110, :rd, :rs, :rt + addop 'not', 0b100111, :rd, :rt # nor $0 + addop 'not', 0b100111, :rd, :rs + addop 'nor', 0b100111, :rd, :rs, :rt - addop 'slt', 0b101010, :rd, :rs, :rt # rs= rt ? trap - addop 'tgeu', 0b110001, :rs, :rt - addop 'tlt', 0b110010, :rs, :rt - addop 'tltu', 0b110011, :rs, :rt - addop 'teq', 0b110100, :rs, :rt - addop 'tne', 0b110110, :rs, :rt + addop 'tge', 0b110000, :rs, :rt # rs >= rt ? trap + addop 'tgeu', 0b110001, :rs, :rt + addop 'tlt', 0b110010, :rs, :rt + addop 'tltu', 0b110011, :rs, :rt + addop 'teq', 0b110100, :rs, :rt + addop 'tne', 0b110110, :rs, :rt - # regimm - addop 'bltz', (1<<26) | (0b00000<<16), :rs, :i16, :setip - addop 'bgez', (1<<26) | (0b00001<<16), :rs, :i16, :setip - addop 'tgei', (1<<26) | (0b01000<<16), :rs, :i16, :setip - addop 'tgfiu',(1<<26) | (0b01001<<16), :rs, :i16, :setip - addop 'tlti', (1<<26) | (0b01010<<16), :rs, :i16, :setip - addop 'tltiu',(1<<26) | (0b01011<<16), :rs, :i16, :setip - addop 'teqi', (1<<26) | (0b01100<<16), :rs, :i16, :setip - addop 'tnei', (1<<26) | (0b01110<<16), :rs, :i16, :setip - addop 'bltzal', (1<<26) | (0b10000<<16), :rs, :i16, :setip, :saveip - addop 'bgezal', (1<<26) | (0b10001<<16), :i16, :setip, :stopexec, :saveip # bgezal $zero => unconditionnal - addop 'bgezal', (1<<26) | (0b10001<<16), :rs, :i16, :setip, :saveip + # regimm + addop 'bltz', (1<<26) | (0b00000<<16), :rs, :i16, :setip + addop 'bgez', (1<<26) | (0b00001<<16), :rs, :i16, :setip + addop 'tgei', (1<<26) | (0b01000<<16), :rs, :i16, :setip + addop 'tgfiu',(1<<26) | (0b01001<<16), :rs, :i16, :setip + addop 'tlti', (1<<26) | (0b01010<<16), :rs, :i16, :setip + addop 'tltiu',(1<<26) | (0b01011<<16), :rs, :i16, :setip + addop 'teqi', (1<<26) | (0b01100<<16), :rs, :i16, :setip + addop 'tnei', (1<<26) | (0b01110<<16), :rs, :i16, :setip + addop 'bltzal', (1<<26) | (0b10000<<16), :rs, :i16, :setip, :saveip + addop 'bgezal', (1<<26) | (0b10001<<16), :i16, :setip, :stopexec, :saveip # bgezal $zero => unconditionnal + addop 'bgezal', (1<<26) | (0b10001<<16), :rs, :i16, :setip, :saveip - # special2 - addop 'madd', (0b011100<<26) | 0b000000, :rs, :rt - addop 'maddu',(0b011100<<26) | 0b000001, :rs, :rt - addop 'mul', (0b011100<<26) | 0b000010, :rd, :rs, :rt - addop 'msub', (0b011100<<26) | 0b000100, :rs, :rt - addop 'msubu',(0b011100<<26) | 0b000101, :rs, :rt - addop 'clz', (0b011100<<26) | 0b100000, :rd, :rs, :rt # must have rs == rt - addop 'clo', (0b011100<<26) | 0b100001, :rd, :rs, :rt # must have rs == rt - addop 'sdbbp',(0b011100<<26) | 0b111111, :i20 + # special2 + addop 'madd', (0b011100<<26) | 0b000000, :rs, :rt + addop 'maddu',(0b011100<<26) | 0b000001, :rs, :rt + addop 'mul', (0b011100<<26) | 0b000010, :rd, :rs, :rt + addop 'msub', (0b011100<<26) | 0b000100, :rs, :rt + addop 'msubu',(0b011100<<26) | 0b000101, :rs, :rt + addop 'clz', (0b011100<<26) | 0b100000, :rd, :rs, :rt # must have rs == rt + addop 'clo', (0b011100<<26) | 0b100001, :rd, :rs, :rt # must have rs == rt + addop 'sdbbp',(0b011100<<26) | 0b111111, :i20 - # cp0 - addop 'mfc0', (0b010000<<26) | (0b00000<<21), :rt, :idb - addop 'mfc0', (0b010000<<26) | (0b00000<<21), :rt, :idb, :sel - addop 'mtc0', (0b010000<<26) | (0b00100<<21), :rt, :idb - addop 'mtc0', (0b010000<<26) | (0b00100<<21), :rt, :idb, :sel + # cp0 + addop 'mfc0', (0b010000<<26) | (0b00000<<21), :rt, :idb + addop 'mfc0', (0b010000<<26) | (0b00000<<21), :rt, :idb, :sel + addop 'mtc0', (0b010000<<26) | (0b00100<<21), :rt, :idb + addop 'mtc0', (0b010000<<26) | (0b00100<<21), :rt, :idb, :sel - addop 'tlbr', (0b010000<<26) | (1<<25) | 0b000001 - addop 'tlbwi',(0b010000<<26) | (1<<25) | 0b000010 - addop 'tlbwr',(0b010000<<26) | (1<<25) | 0b000110 - addop 'tlbp', (0b010000<<26) | (1<<25) | 0b001000 - addop 'eret', (0b010000<<26) | (1<<25) | 0b011000 - addop 'deret',(0b010000<<26) | (1<<25) | 0b011111 - addop 'wait', (0b010000<<26) | (1<<25) | 0b100000 # mode field ? - end + addop 'tlbr', (0b010000<<26) | (1<<25) | 0b000001 + addop 'tlbwi',(0b010000<<26) | (1<<25) | 0b000010 + addop 'tlbwr',(0b010000<<26) | (1<<25) | 0b000110 + addop 'tlbp', (0b010000<<26) | (1<<25) | 0b001000 + addop 'eret', (0b010000<<26) | (1<<25) | 0b011000 + addop 'deret',(0b010000<<26) | (1<<25) | 0b011111 + addop 'wait', (0b010000<<26) | (1<<25) | 0b100000 # mode field ? + end - def init_mips32r2 - init_mips32 + def init_mips32r2 + init_mips32 - addop 'rotr', 0b000010 | (1<<21), :rd, :rt, :sa - addop 'rotrv',0b000110 | (1<<6), :rd, :rt, :rs + addop 'rotr', 0b000010 | (1<<21), :rd, :rt, :sa + addop 'rotrv',0b000110 | (1<<6), :rd, :rt, :rs - addop 'synci',(1<<26) | (0b11111<<16), :rs_i16 + addop 'synci',(1<<26) | (0b11111<<16), :rs_i16 - # special3 - addop 'ext', (0b011111<<26) | 0b000000, :rt, :rs, :sa, :idm1 - addop 'ins', (0b011111<<26) | 0b000100, :rt, :rs, :sa, :idb - addop 'rdhwr',(0b011111<<26)| 0b111011, :rt, :rd - addop 'wsbh',(0b011111<<26) | (0b00010<<6) | 0b100000, :rd, :rt - addop 'seb', (0b011111<<26) | (0b10000<<6) | 0b100000, :rd, :rt - addop 'seh', (0b011111<<26) | (0b11000<<6) | 0b100000, :rd, :rt + # special3 + addop 'ext', (0b011111<<26) | 0b000000, :rt, :rs, :sa, :idm1 + addop 'ins', (0b011111<<26) | 0b000100, :rt, :rs, :sa, :idb + addop 'rdhwr',(0b011111<<26)| 0b111011, :rt, :rd + addop 'wsbh',(0b011111<<26) | (0b00010<<6) | 0b100000, :rd, :rt + addop 'seb', (0b011111<<26) | (0b10000<<6) | 0b100000, :rd, :rt + addop 'seh', (0b011111<<26) | (0b11000<<6) | 0b100000, :rd, :rt - # cp0 - addop 'rdpgpr', (0b010000<<26) | (0b01010<<21), :rd, :rt - addop 'wrpgpr', (0b010000<<26) | (0b01110<<21), :rd, :rt - addop 'di', (0b010000<<26) | (0b01011<<21) | (0b01100<<11) | (0<<5) - addop 'di', (0b010000<<26) | (0b01011<<21) | (0b01100<<11) | (0<<5), :rt - addop 'ei', (0b010000<<26) | (0b01011<<21) | (0b01100<<11) | (1<<5) - addop 'ei', (0b010000<<26) | (0b01011<<21) | (0b01100<<11) | (1<<5), :rt - end - alias init_latest init_mips32r2 + # cp0 + addop 'rdpgpr', (0b010000<<26) | (0b01010<<21), :rd, :rt + addop 'wrpgpr', (0b010000<<26) | (0b01110<<21), :rd, :rt + addop 'di', (0b010000<<26) | (0b01011<<21) | (0b01100<<11) | (0<<5) + addop 'di', (0b010000<<26) | (0b01011<<21) | (0b01100<<11) | (0<<5), :rt + addop 'ei', (0b010000<<26) | (0b01011<<21) | (0b01100<<11) | (1<<5) + addop 'ei', (0b010000<<26) | (0b01011<<21) | (0b01100<<11) | (1<<5), :rt + end + alias init_latest init_mips32r2 end class MIPS64 - def init_mips64 - init_mips32r2 - @valid_props.update :mi64 => true + def init_mips64 + init_mips32r2 + @valid_props.update :mi64 => true - addop 'ld', 0b110111 << 26, :rt, :rs_i16, :m64 - addop 'lwu', 0b100111 << 26, :rt, :rs_i16 - addop 'sd', 0b111111 << 26, :rt, :rs_i16, :m64 - addop 'scd', 0b111100 << 26, :rt, :rs_i16, :m64 - addop 'ldl', 0b011010 << 26, :rt, :rs_i16 - addop 'ldr', 0b011011 << 26, :rt, :rs_i16 - addop 'sdl', 0b101100 << 26, :rt, :rs_i16 - addop 'sdr', 0b101101 << 26, :rt, :rs_i16 - addop 'lld', 0b110100 << 26, :rt, :rs_i16 - addop 'daddi', 0b011000 << 26, :rt, :rs, :i16 - addop 'daddiu', 0b011001 << 26, :rt, :rs, :i16 + addop 'ld', 0b110111 << 26, :rt, :rs_i16, :m64 + addop 'lwu', 0b100111 << 26, :rt, :rs_i16 + addop 'sd', 0b111111 << 26, :rt, :rs_i16, :m64 + addop 'scd', 0b111100 << 26, :rt, :rs_i16, :m64 + addop 'ldl', 0b011010 << 26, :rt, :rs_i16 + addop 'ldr', 0b011011 << 26, :rt, :rs_i16 + addop 'sdl', 0b101100 << 26, :rt, :rs_i16 + addop 'sdr', 0b101101 << 26, :rt, :rs_i16 + addop 'lld', 0b110100 << 26, :rt, :rs_i16 + addop 'daddi', 0b011000 << 26, :rt, :rs, :i16 + addop 'daddiu', 0b011001 << 26, :rt, :rs, :i16 - addop 'dclo', (0b011100 << 26) | (0b100101), :rd, :rt, :rs - addop 'dclz', (0b011100 << 26) | (0b100100), :rd, :rt, :rs + addop 'dclo', (0b011100 << 26) | (0b100101), :rd, :rt, :rs + addop 'dclz', (0b011100 << 26) | (0b100100), :rd, :rt, :rs - addop 'dadd', 0b101100, :rd, :rs, :rt - addop 'daddu', 0b101101, :rd, :rs, :rt - addop 'dsub', 0b101110, :rd, :rs, :rt - addop 'dsubu', 0b101111, :rd, :rs, :rt - addop 'dsll', 0b111000, :rd, :rt, :sa - addop 'dsll32', 0b111100, :rd, :rt, :sa - addop 'dsllv', 0b010100, :rd, :rt, :rs - addop 'dsra', 0b111011, :rd, :rt, :sa - addop 'dsra32', 0b111111, :rd, :rt, :sa - addop 'dsrav', 0b010111, :rd, :rt, :rs - addop 'dsrl', 0b111010, :rd, :rt, :sa - addop 'dsrl32', 0b111110, :rd, :rt, :sa - addop 'dsrlv', 0b010110, :rd, :rt, :rs - addop 'ddiv', 0b011110, :rs, :rt - addop 'ddivu', 0b011111, :rs, :rt - addop 'dmult', 0b011100, :rs, :rt - addop 'dmultu', 0b011101, :rs, :rt + addop 'dadd', 0b101100, :rd, :rs, :rt + addop 'daddu', 0b101101, :rd, :rs, :rt + addop 'dsub', 0b101110, :rd, :rs, :rt + addop 'dsubu', 0b101111, :rd, :rs, :rt + addop 'dsll', 0b111000, :rd, :rt, :sa + addop 'dsll32', 0b111100, :rd, :rt, :sa + addop 'dsllv', 0b010100, :rd, :rt, :rs + addop 'dsra', 0b111011, :rd, :rt, :sa + addop 'dsra32', 0b111111, :rd, :rt, :sa + addop 'dsrav', 0b010111, :rd, :rt, :rs + addop 'dsrl', 0b111010, :rd, :rt, :sa + addop 'dsrl32', 0b111110, :rd, :rt, :sa + addop 'dsrlv', 0b010110, :rd, :rt, :rs + addop 'ddiv', 0b011110, :rs, :rt + addop 'ddivu', 0b011111, :rs, :rt + addop 'dmult', 0b011100, :rs, :rt + addop 'dmultu', 0b011101, :rs, :rt - addop 'dmfc0', (0b010000<<26) | (0b00001<<21), :rt, :idb - addop 'dmfc0', (0b010000<<26) | (0b00001<<21), :rt, :idb, :sel - addop 'dmtc0', (0b010000<<26) | (0b00101<<21), :rt, :idb - addop 'dmtc0', (0b010000<<26) | (0b00101<<21), :rt, :idb, :sel - end + addop 'dmfc0', (0b010000<<26) | (0b00001<<21), :rt, :idb + addop 'dmfc0', (0b010000<<26) | (0b00001<<21), :rt, :idb, :sel + addop 'dmtc0', (0b010000<<26) | (0b00101<<21), :rt, :idb + addop 'dmtc0', (0b010000<<26) | (0b00101<<21), :rt, :idb, :sel + end - def init_mips64r2 - init_mips64 - @fields_mask.update :msbd => 0x1f - @fields_shift.update :msbd => 11 + def init_mips64r2 + init_mips64 + @fields_mask.update :msbd => 0x1f + @fields_shift.update :msbd => 11 - addop 'dext', (0b011111 << 26) | 0b000011, :rt, :rs, :sa, :msbd # sa => lsb - addop 'dextm', (0b011111 << 26) | 0b000001, :rt, :rs, :sa, :msbd - addop 'dextu', (0b011111 << 26) | 0b000010, :rt, :rs, :sa, :msbd - addop 'dins', (0b011111 << 26) | 0b000111, :rt, :rs, :sa, :msbd - addop 'dinsm', (0b011111 << 26) | 0b000101, :rt, :rs, :sa, :msbd - addop 'dinsu', (0b011111 << 26) | 0b000110, :rt, :rs, :sa, :msbd + addop 'dext', (0b011111 << 26) | 0b000011, :rt, :rs, :sa, :msbd # sa => lsb + addop 'dextm', (0b011111 << 26) | 0b000001, :rt, :rs, :sa, :msbd + addop 'dextu', (0b011111 << 26) | 0b000010, :rt, :rs, :sa, :msbd + addop 'dins', (0b011111 << 26) | 0b000111, :rt, :rs, :sa, :msbd + addop 'dinsm', (0b011111 << 26) | 0b000101, :rt, :rs, :sa, :msbd + addop 'dinsu', (0b011111 << 26) | 0b000110, :rt, :rs, :sa, :msbd - addop 'drotr', (1 << 21) | 0b111010, :rd, :rt, :sa - addop 'drotr32', (1 << 21) | 0b111110, :rd, :rt, :sa - addop 'drotrv', (1 << 6) | 0b010110, :rd, :rt, :rs + addop 'drotr', (1 << 21) | 0b111010, :rd, :rt, :sa + addop 'drotr32', (1 << 21) | 0b111110, :rd, :rt, :sa + addop 'drotrv', (1 << 6) | 0b010110, :rd, :rt, :rs - addop 'dsbh', (0b011111 << 26) | (0b00010 << 6) | 0b100100, :rd, :rt - addop 'dshd', (0b011111 << 26) | (0b00101 << 6) | 0b100100, :rd, :rt - end + addop 'dsbh', (0b011111 << 26) | (0b00010 << 6) | 0b100100, :rd, :rt + addop 'dshd', (0b011111 << 26) | (0b00101 << 6) | 0b100100, :rd, :rt + end - alias init_latest init_mips64r2 + alias init_latest init_mips64r2 end end __END__ - def macro_addop_cop1(name, bin, *aprops) - flds = [ :rt, :fs ] - addop name, :cop1, bin, 'rt, fs', flds, *aprops - end + def macro_addop_cop1(name, bin, *aprops) + flds = [ :rt, :fs ] + addop name, :cop1, bin, 'rt, fs', flds, *aprops + end - def macro_addop_cop1_precision(name, type, bin, fmt, *aprops) - flds = [ :ft, :fs, :fd ] - addop name+'.'+(type.to_s[5,7]), type, bin, fmt, flds, *aprops - end + def macro_addop_cop1_precision(name, type, bin, fmt, *aprops) + flds = [ :ft, :fs, :fd ] + addop name+'.'+(type.to_s[5,7]), type, bin, fmt, flds, *aprops + end - public - # Initialize the instruction set with the MIPS32 Instruction Set - def init_mips32 - :cc => [7, 18, :fpcc], - :op => [0x1F, 16, :op ], :cp2_rt => [0x1F, 16, :cp2_reg ], - :stype => [0x1F, 6, :imm ], - :code => [0xFFFFF, 6, :code ], - :sel => [3, 0, :sel ]}) + public + # Initialize the instruction set with the MIPS32 Instruction Set + def init_mips32 + :cc => [7, 18, :fpcc], + :op => [0x1F, 16, :op ], :cp2_rt => [0x1F, 16, :cp2_reg ], + :stype => [0x1F, 6, :imm ], + :code => [0xFFFFF, 6, :code ], + :sel => [3, 0, :sel ]}) - # --------------------------------------------------------------- - # COP0, field rs - # --------------------------------------------------------------- + # --------------------------------------------------------------- + # COP0, field rs + # --------------------------------------------------------------- - addop 'mfc0', :cop0, 0b00000, 'rt, rd, sel', [ :rt, :rd, :sel ] - addop 'mtc0', :cop0, 0b00100, 'rt, rd, sel', [ :rt, :rd, :sel ] + addop 'mfc0', :cop0, 0b00000, 'rt, rd, sel', [ :rt, :rd, :sel ] + addop 'mtc0', :cop0, 0b00100, 'rt, rd, sel', [ :rt, :rd, :sel ] - # --------------------------------------------------------------- - # COP0 when rs=C0 - # --------------------------------------------------------------- + # --------------------------------------------------------------- + # COP0 when rs=C0 + # --------------------------------------------------------------- - macro_addop_cop0_c0 'tlbr', 0b000001 - macro_addop_cop0_c0 'tlbwi', 0b000010 - macro_addop_cop0_c0 'tlwr', 0b000110 - macro_addop_cop0_c0 'tlbp', 0b001000 - macro_addop_cop0_c0 'eret', 0b011000 - macro_addop_cop0_c0 'deret', 0b011111 - macro_addop_cop0_c0 'wait', 0b100000 + macro_addop_cop0_c0 'tlbr', 0b000001 + macro_addop_cop0_c0 'tlbwi', 0b000010 + macro_addop_cop0_c0 'tlwr', 0b000110 + macro_addop_cop0_c0 'tlbp', 0b001000 + macro_addop_cop0_c0 'eret', 0b011000 + macro_addop_cop0_c0 'deret', 0b011111 + macro_addop_cop0_c0 'wait', 0b100000 - # --------------------------------------------------------------- - # COP1, field rs - # --------------------------------------------------------------- + # --------------------------------------------------------------- + # COP1, field rs + # --------------------------------------------------------------- - macro_addop_cop1 'mfc1', 0b00000 - macro_addop_cop1 'cfc1', 0b00010 - macro_addop_cop1 'mtc1', 0b00100 - macro_addop_cop1 'ctc1', 0b00110 + macro_addop_cop1 'mfc1', 0b00000 + macro_addop_cop1 'cfc1', 0b00010 + macro_addop_cop1 'mtc1', 0b00100 + macro_addop_cop1 'ctc1', 0b00110 - addop "bc1f", :cop1, 0b01000, 'cc, off', [ :cc, :off ], :diff_bits, [ 16, 3, 0 ] - addop "bc1fl", :cop1, 0b01000, 'cc, off', [ :cc, :off ], :diff_bits, [ 16, 3, 2 ] - addop "bc1t", :cop1, 0b01000, 'cc, off', [ :cc, :off ], :diff_bits, [ 16, 3, 1 ] - addop "bc1tl", :cop1, 0b01000, 'cc, off', [ :cc, :off ], :diff_bits, [ 16, 3, 3 ] + addop "bc1f", :cop1, 0b01000, 'cc, off', [ :cc, :off ], :diff_bits, [ 16, 3, 0 ] + addop "bc1fl", :cop1, 0b01000, 'cc, off', [ :cc, :off ], :diff_bits, [ 16, 3, 2 ] + addop "bc1t", :cop1, 0b01000, 'cc, off', [ :cc, :off ], :diff_bits, [ 16, 3, 1 ] + addop "bc1tl", :cop1, 0b01000, 'cc, off', [ :cc, :off ], :diff_bits, [ 16, 3, 3 ] - # --------------------------------------------------------------- - # COP1, field rs=S/D - # --------------------------------------------------------------- + # --------------------------------------------------------------- + # COP1, field rs=S/D + # --------------------------------------------------------------- - [ :cop1_s, :cop1_d ].each do |type| - type_str = type.to_s[5,7] + [ :cop1_s, :cop1_d ].each do |type| + type_str = type.to_s[5,7] - macro_addop_cop1_precision 'add', type, 0b000000, 'fd, fs, ft' - macro_addop_cop1_precision 'sub', type, 0b000001, 'fd, fs, ft' - macro_addop_cop1_precision 'mul', type, 0b000010, 'fd, fs, ft' - macro_addop_cop1_precision 'abs', type, 0b000101, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'mov', type, 0b000110, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'neg', type, 0b000111, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'add', type, 0b000000, 'fd, fs, ft' + macro_addop_cop1_precision 'sub', type, 0b000001, 'fd, fs, ft' + macro_addop_cop1_precision 'mul', type, 0b000010, 'fd, fs, ft' + macro_addop_cop1_precision 'abs', type, 0b000101, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'mov', type, 0b000110, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'neg', type, 0b000111, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'movz', type, 0b010010, 'fd, fs, ft' - macro_addop_cop1_precision 'movn', type, 0b010011, 'fd, fs, ft' + macro_addop_cop1_precision 'movz', type, 0b010010, 'fd, fs, ft' + macro_addop_cop1_precision 'movn', type, 0b010011, 'fd, fs, ft' - addop "movf.#{type_str}", type, 0b010001, 'fd, fs, cc', [ :cc, :fs, :fd ], :diff_bits, [ 16, 1, 0 ] - addop "movt.#{type_str}", type, 0b010001, 'fd, fs, cc', [ :cc, :fs, :fd ], :diff_bits, [ 16, 1, 1 ] + addop "movf.#{type_str}", type, 0b010001, 'fd, fs, cc', [ :cc, :fs, :fd ], :diff_bits, [ 16, 1, 0 ] + addop "movt.#{type_str}", type, 0b010001, 'fd, fs, cc', [ :cc, :fs, :fd ], :diff_bits, [ 16, 1, 1 ] - %w(f un eq ueq olt ult ole ule sf ngle seq ngl lt nge le ngt).each_with_index do |cond, index| - addop "c.#{cond}.#{type_str}", type, 0b110000+index, 'cc, fs, ft', - [ :ft, :fs, :cc ] - end - end + %w(f un eq ueq olt ult ole ule sf ngle seq ngl lt nge le ngt).each_with_index do |cond, index| + addop "c.#{cond}.#{type_str}", type, 0b110000+index, 'cc, fs, ft', + [ :ft, :fs, :cc ] + end + end - # S and D Without PS + # S and D Without PS - [:cop1_s, :cop1_d].each do |type| - macro_addop_cop1_precision 'div', type, 0b000011, 'fd, fs, ft' - macro_addop_cop1_precision 'sqrt', type, 0b000100, 'fd, fs', :ft_zero + [:cop1_s, :cop1_d].each do |type| + macro_addop_cop1_precision 'div', type, 0b000011, 'fd, fs, ft' + macro_addop_cop1_precision 'sqrt', type, 0b000100, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'round.w', type, 0b001100, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'trunc.w', type, 0b001101, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'ceil.w', type, 0b001110, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'floor.w', type, 0b001111, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'round.w', type, 0b001100, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'trunc.w', type, 0b001101, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'ceil.w', type, 0b001110, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'floor.w', type, 0b001111, 'fd, fs', :ft_zero - end + end - # COP2 is not decoded (pretty useless) + # COP2 is not decoded (pretty useless) - [:cop1_d,:cop1_w].each { |type| macro_addop_cop1_precision 'cvt.s', type, 0b100000, 'fd, fs', :ft_zero } - [:cop1_s,:cop1_w].each { |type| macro_addop_cop1_precision 'cvt.d', type, 0b100001, 'fd, fs', :ft_zero } - [:cop1_s,:cop1_d].each { |type| macro_addop_cop1_precision 'cvt.w', type, 0b100100, 'fd, fs', :ft_zero } + [:cop1_d,:cop1_w].each { |type| macro_addop_cop1_precision 'cvt.s', type, 0b100000, 'fd, fs', :ft_zero } + [:cop1_s,:cop1_w].each { |type| macro_addop_cop1_precision 'cvt.d', type, 0b100001, 'fd, fs', :ft_zero } + [:cop1_s,:cop1_d].each { |type| macro_addop_cop1_precision 'cvt.w', type, 0b100100, 'fd, fs', :ft_zero } - [ :normal, :special, :regimm, :special2, :cop0, :cop0_c0, :cop1, :cop1_s, - :cop1_d, :cop1_w ].each \ - { |t| @@opcodes_by_class[t] = opcode_list.find_all { |o| o.type == t } } - end + [ :normal, :special, :regimm, :special2, :cop0, :cop0_c0, :cop1, :cop1_s, + :cop1_d, :cop1_w ].each \ + { |t| @@opcodes_by_class[t] = opcode_list.find_all { |o| o.type == t } } + end - # Initialize the instruction set with the MIPS32 Instruction Set Release 2 - def init_mips64 - init_mips32 + # Initialize the instruction set with the MIPS32 Instruction Set Release 2 + def init_mips64 + init_mips32 - #SPECIAL - macro_addop_special "rotr", 0b000010, 'rd, rt, sa', :diff_bits, [ 26, 1, 1 ] - macro_addop_special "rotrv", 0b000110, 'rd, rt, rs', :diff_bits, [ 6, 1, 1 ] + #SPECIAL + macro_addop_special "rotr", 0b000010, 'rd, rt, sa', :diff_bits, [ 26, 1, 1 ] + macro_addop_special "rotrv", 0b000110, 'rd, rt, rs', :diff_bits, [ 6, 1, 1 ] - # REGIMM - addop "synci", :regimm, 0b11111, '', {:base => [5,21], :off => [16, 0] } + # REGIMM + addop "synci", :regimm, 0b11111, '', {:base => [5,21], :off => [16, 0] } - # --------------------------------------------------------------- - # SPECIAL3 opcode encoding of function field - # --------------------------------------------------------------- + # --------------------------------------------------------------- + # SPECIAL3 opcode encoding of function field + # --------------------------------------------------------------- - addop "ext", :special3, 0b00000, 'rt, rs, pos, size', { :rs => [5, 21], :rt => [5, 16], - :msbd => [5, 11], :lsb => [5, 6] } - addop "ins", :special3, 0b00100, 'rt, rs, pos, size', { :rs => [5, 21], :rt => [5, 16], - :msb => [5, 11], :lsb => [5, 6] } + addop "ext", :special3, 0b00000, 'rt, rs, pos, size', { :rs => [5, 21], :rt => [5, 16], + :msbd => [5, 11], :lsb => [5, 6] } + addop "ins", :special3, 0b00100, 'rt, rs, pos, size', { :rs => [5, 21], :rt => [5, 16], + :msb => [5, 11], :lsb => [5, 6] } - addop "rdhwr", :special3, 0b111011, 'rt, rd', { :rt => [5, 16], :rd => [5, 11] } + addop "rdhwr", :special3, 0b111011, 'rt, rd', { :rt => [5, 16], :rd => [5, 11] } - addop "wsbh", :bshfl, 0b00010, 'rd, rt', { :rt => [5, 16], :rd => [5, 11] } - addop "seb", :bshfl, 0b10000, 'rd, rt', { :rt => [5, 16], :rd => [5, 11] } - addop "seh", :bshfl, 0b11000, 'rd, rt', { :rt => [5, 16], :rd => [5, 11] } + addop "wsbh", :bshfl, 0b00010, 'rd, rt', { :rt => [5, 16], :rd => [5, 11] } + addop "seb", :bshfl, 0b10000, 'rd, rt', { :rt => [5, 16], :rd => [5, 11] } + addop "seh", :bshfl, 0b11000, 'rd, rt', { :rt => [5, 16], :rd => [5, 11] } - # --------------------------------------------------------------- - # COP0 - # --------------------------------------------------------------- + # --------------------------------------------------------------- + # COP0 + # --------------------------------------------------------------- - addop "rdpgpr", :cop0, 0b01010, 'rt, rd', {:rt => [5, 16], :rd => [5, 11] } - addop "wdpgpr", :cop0, 0b01110, 'rt, rd', {:rt => [5, 16], :rd => [5, 11] } - addop "di", :cop0, 0b01011, '', {}, :diff_bits, [ 5, 1 , 0] - addop "ei", :cop0, 0b01011, '', {}, :diff_bits, [ 5, 1 , 1] + addop "rdpgpr", :cop0, 0b01010, 'rt, rd', {:rt => [5, 16], :rd => [5, 11] } + addop "wdpgpr", :cop0, 0b01110, 'rt, rd', {:rt => [5, 16], :rd => [5, 11] } + addop "di", :cop0, 0b01011, '', {}, :diff_bits, [ 5, 1 , 0] + addop "ei", :cop0, 0b01011, '', {}, :diff_bits, [ 5, 1 , 1] - # --------------------------------------------------------------- - # COP1, field rs - # --------------------------------------------------------------- + # --------------------------------------------------------------- + # COP1, field rs + # --------------------------------------------------------------- - macro_addop_cop1 "mfhc1", 0b00011 - macro_addop_cop1 "mthc1", 0b00111 + macro_addop_cop1 "mfhc1", 0b00011 + macro_addop_cop1 "mthc1", 0b00111 - # Floating point + # Floating point - [:cop1_s, :cop1_d].each do |type| - macro_addop_cop1_precision 'round.l', type, 0b001000, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'trunc.l', type, 0b001001, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'ceil.l', type, 0b001010, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'floor.l', type, 0b001011, 'fd, fs', :ft_zero + [:cop1_s, :cop1_d].each do |type| + macro_addop_cop1_precision 'round.l', type, 0b001000, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'trunc.l', type, 0b001001, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'ceil.l', type, 0b001010, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'floor.l', type, 0b001011, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'recip', type, 0b010101, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'rsqrt', type, 0b010110, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'recip', type, 0b010101, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'rsqrt', type, 0b010110, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'cvt.l', type, 0b100101, 'fd, fs', :ft_zero - end - macro_addop_cop1_precision 'cvt.ps', :cop1_s, 0b100110, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'cvt.s', :cop1_l, 0b100000, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'cvt.d', :cop1_l, 0b100000, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'cvt.l', type, 0b100101, 'fd, fs', :ft_zero + end + macro_addop_cop1_precision 'cvt.ps', :cop1_s, 0b100110, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'cvt.s', :cop1_l, 0b100000, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'cvt.d', :cop1_l, 0b100000, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'add', :cop1_ps, 0b000000, 'fd, fs, ft' - macro_addop_cop1_precision 'sub', :cop1_ps, 0b000001, 'fd, fs, ft' - macro_addop_cop1_precision 'mul', :cop1_ps, 0b000010, 'fd, fs, ft' - macro_addop_cop1_precision 'abs', :cop1_ps, 0b000101, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'mov', :cop1_ps, 0b000110, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'neg', :cop1_ps, 0b000111, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'add', :cop1_ps, 0b000000, 'fd, fs, ft' + macro_addop_cop1_precision 'sub', :cop1_ps, 0b000001, 'fd, fs, ft' + macro_addop_cop1_precision 'mul', :cop1_ps, 0b000010, 'fd, fs, ft' + macro_addop_cop1_precision 'abs', :cop1_ps, 0b000101, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'mov', :cop1_ps, 0b000110, 'fd, fs', :ft_zero + macro_addop_cop1_precision 'neg', :cop1_ps, 0b000111, 'fd, fs', :ft_zero - macro_addop_cop1_precision 'movz', :cop1_ps, 0b010010, 'fd, fs, ft' - macro_addop_cop1_precision 'movn', :cop1_ps, 0b010011, 'fd, fs, ft' + macro_addop_cop1_precision 'movz', :cop1_ps, 0b010010, 'fd, fs, ft' + macro_addop_cop1_precision 'movn', :cop1_ps, 0b010011, 'fd, fs, ft' - addop "movf.#{:cop1_ps_str}", :cop1_ps, 0b010001, 'fd, fs, cc', [ :cc, :fs, :fd ] - addop "movt.#{:cop1_ps_str}", :cop1_ps, 0b010001, 'fd, fs, cc', [ :cc, :fs, :fd ] + addop "movf.#{:cop1_ps_str}", :cop1_ps, 0b010001, 'fd, fs, cc', [ :cc, :fs, :fd ] + addop "movt.#{:cop1_ps_str}", :cop1_ps, 0b010001, 'fd, fs, cc', [ :cc, :fs, :fd ] - %w(f un eq ueq olt ult ole ule sf ngle seq ngl lt nge le ngt).each_with_index do |cond, index| - addop "c.#{cond}.ps", :cop1_cond, 0b110000+index, 'cc, fs, ft', - [ :ft, :fs, :cc ] + %w(f un eq ueq olt ult ole ule sf ngle seq ngl lt nge le ngt).each_with_index do |cond, index| + addop "c.#{cond}.ps", :cop1_cond, 0b110000+index, 'cc, fs, ft', + [ :ft, :fs, :cc ] - # TODO: COP1X + # TODO: COP1X - [ :special3, :bshfl, :cop1_l, :cop1_ps ].each \ - { |t| @@opcodes_by_class[t] = opcode_list.find_all { |o| o.type == t } } - end + [ :special3, :bshfl, :cop1_l, :cop1_ps ].each \ + { |t| @@opcodes_by_class[t] = opcode_list.find_all { |o| o.type == t } } + end - end + end - # Reset all instructions - def reset - metaprops_allowed.clear - args_allowed.clear - props_allowed.clear - fields_spec.clear - opcode_list.clear - end + # Reset all instructions + def reset + metaprops_allowed.clear + args_allowed.clear + props_allowed.clear + fields_spec.clear + opcode_list.clear + end end - # Array containing all the supported opcodes - attr_accessor :opcode_list + # Array containing all the supported opcodes + attr_accessor :opcode_list - init_mips32 + init_mips32 end end diff --git a/lib/metasm/metasm/cpu/mips/parse.rb b/lib/metasm/metasm/cpu/mips/parse.rb index e7daeac27b..4a69945d0f 100644 --- a/lib/metasm/metasm/cpu/mips/parse.rb +++ b/lib/metasm/metasm/cpu/mips/parse.rb @@ -9,43 +9,43 @@ require 'metasm/parse' module Metasm class MIPS - def parse_arg_valid?(op, sym, arg) - # special case for lw reg, imm32(reg) ? (pseudo-instr, need to convert to 'lui t0, up imm32 ori t0 down imm32 add t0, reg lw reg, 0(t0) - case sym - when :rs, :rt, :rd; arg.kind_of? Reg - when :sa, :i16, :i20, :i26; arg.kind_of? Expression - when :rs_i16; arg.kind_of? Memref - when :ft; arg.kind_of? FpReg - else raise "internal error: mips arg #{sym.inspect}" - end - end + def parse_arg_valid?(op, sym, arg) + # special case for lw reg, imm32(reg) ? (pseudo-instr, need to convert to 'lui t0, up imm32 ori t0 down imm32 add t0, reg lw reg, 0(t0) + case sym + when :rs, :rt, :rd; arg.kind_of? Reg + when :sa, :i16, :i20, :i26; arg.kind_of? Expression + when :rs_i16; arg.kind_of? Memref + when :ft; arg.kind_of? FpReg + else raise "internal error: mips arg #{sym.inspect}" + end + end - def parse_argument(pgm) - pgm.skip_space - return if not tok = pgm.nexttok - if tok.type == :string and Reg.s_to_i[tok.raw] - pgm.readtok - arg = Reg.new Reg.s_to_i[tok.raw] - elsif tok.type == :string and FpReg.s_to_i[tok.raw] - pgm.readtok - arg = FpReg.new FpReg.s_to_i[tok.raw] - else - arg = Expression.parse pgm - pgm.skip_space - # check memory indirection: 'off(base reg)' # XXX scaled index ? - if arg and pgm.nexttok and pgm.nexttok.type == :punct and pgm.nexttok.raw == '(' - pgm.readtok - pgm.skip_space_eol - ntok = pgm.readtok - raise tok, "Invalid base #{ntok}" unless ntok and ntok.type == :string and Reg.s_to_i[ntok.raw] - base = Reg.new Reg.s_to_i[ntok.raw] - pgm.skip_space_eol - ntok = pgm.readtok - raise tok, "Invalid memory reference, ')' expected" if not ntok or ntok.type != :punct or ntok.raw != ')' - arg = Memref.new base, arg - end - end - arg - end + def parse_argument(pgm) + pgm.skip_space + return if not tok = pgm.nexttok + if tok.type == :string and Reg.s_to_i[tok.raw] + pgm.readtok + arg = Reg.new Reg.s_to_i[tok.raw] + elsif tok.type == :string and FpReg.s_to_i[tok.raw] + pgm.readtok + arg = FpReg.new FpReg.s_to_i[tok.raw] + else + arg = Expression.parse pgm + pgm.skip_space + # check memory indirection: 'off(base reg)' # XXX scaled index ? + if arg and pgm.nexttok and pgm.nexttok.type == :punct and pgm.nexttok.raw == '(' + pgm.readtok + pgm.skip_space_eol + ntok = pgm.readtok + raise tok, "Invalid base #{ntok}" unless ntok and ntok.type == :string and Reg.s_to_i[ntok.raw] + base = Reg.new Reg.s_to_i[ntok.raw] + pgm.skip_space_eol + ntok = pgm.readtok + raise tok, "Invalid memory reference, ')' expected" if not ntok or ntok.type != :punct or ntok.raw != ')' + arg = Memref.new base, arg + end + end + arg + end end end diff --git a/lib/metasm/metasm/cpu/mips/render.rb b/lib/metasm/metasm/cpu/mips/render.rb index 31ad15a331..9c70903f9f 100644 --- a/lib/metasm/metasm/cpu/mips/render.rb +++ b/lib/metasm/metasm/cpu/mips/render.rb @@ -9,35 +9,35 @@ require 'metasm/render' module Metasm class MIPS - class Reg - include Renderable - def render ; [self.class.i_to_s[@i]] end - end - class FpReg - include Renderable - def render ; [self.class.i_to_s[@i]] end - end - class Memref - include Renderable - def render ; [@offset, '(', @base, ')'] end - end + class Reg + include Renderable + def render ; [self.class.i_to_s[@i]] end + end + class FpReg + include Renderable + def render ; [self.class.i_to_s[@i]] end + end + class Memref + include Renderable + def render ; [@offset, '(', @base, ')'] end + end - def render_instruction(i) - r = [] - r << i.opname - if not i.args.empty? - r << ' ' - if (a = i.args.first).kind_of? Expression and a.op == :- and a.lexpr.kind_of? String and a.rexpr.kind_of? String and opcode_list_byname[i.opname].first.props[:setip] - # jmp foo is stored as jmp foo - bar ; bar: - r << a.lexpr - else - i.args.each { |a_| - r << a_ << ', ' - } - r.pop - end - end - r - end + def render_instruction(i) + r = [] + r << i.opname + if not i.args.empty? + r << ' ' + if (a = i.args.first).kind_of? Expression and a.op == :- and a.lexpr.kind_of? String and a.rexpr.kind_of? String and opcode_list_byname[i.opname].first.props[:setip] + # jmp foo is stored as jmp foo - bar ; bar: + r << a.lexpr + else + i.args.each { |a_| + r << a_ << ', ' + } + r.pop + end + end + r + end end end diff --git a/lib/metasm/metasm/cpu/pic16c/decode.rb b/lib/metasm/metasm/cpu/pic16c/decode.rb index 9c8ccfb1e1..d2935baf40 100644 --- a/lib/metasm/metasm/cpu/pic16c/decode.rb +++ b/lib/metasm/metasm/cpu/pic16c/decode.rb @@ -9,33 +9,33 @@ require 'metasm/decode' module Metasm class Pic16c - def build_opcode_bin_mask(op) - # bit = 0 if can be mutated by an field value, 1 if fixed by opcode - op.bin_mask = Array.new(op.bin.length, 0) - op.fields.each { |f, (oct, off)| - op.bin_mask[oct] |= (@fields_mask[f] << off) - } - op.bin_mask.map! { |v| 255 ^ v } - end + def build_opcode_bin_mask(op) + # bit = 0 if can be mutated by an field value, 1 if fixed by opcode + op.bin_mask = Array.new(op.bin.length, 0) + op.fields.each { |f, (oct, off)| + op.bin_mask[oct] |= (@fields_mask[f] << off) + } + op.bin_mask.map! { |v| 255 ^ v } + end - def build_bin_lookaside - # sets up a hash byte value => list of opcodes that may match - # opcode.bin_mask is built here - lookaside = Array.new(256) { [] } - @opcode_list.each { |op| + def build_bin_lookaside + # sets up a hash byte value => list of opcodes that may match + # opcode.bin_mask is built here + lookaside = Array.new(256) { [] } + @opcode_list.each { |op| - build_opcode_bin_mask op + build_opcode_bin_mask op - b = op.bin[0] - msk = op.bin_mask[0] + b = op.bin[0] + msk = op.bin_mask[0] - for i in b..(b | (255^msk)) - ext if i & msk != b & msk + for i in b..(b | (255^msk)) + ext if i & msk != b & msk - lookaside[i] << op - end - } - lookaside - end + lookaside[i] << op + end + } + lookaside + end end end diff --git a/lib/metasm/metasm/cpu/pic16c/main.rb b/lib/metasm/metasm/cpu/pic16c/main.rb index ca4185e86d..cd94e200a0 100644 --- a/lib/metasm/metasm/cpu/pic16c/main.rb +++ b/lib/metasm/metasm/cpu/pic16c/main.rb @@ -8,10 +8,10 @@ require 'metasm/main' module Metasm class Pic16c < CPU - def initialize(endianness = :big) - super() - @endianness = endianness - init - end + def initialize(endianness = :big) + super() + @endianness = endianness + init + end end end diff --git a/lib/metasm/metasm/cpu/pic16c/opcodes.rb b/lib/metasm/metasm/cpu/pic16c/opcodes.rb index 3112af715f..32a8dfe49d 100644 --- a/lib/metasm/metasm/cpu/pic16c/opcodes.rb +++ b/lib/metasm/metasm/cpu/pic16c/opcodes.rb @@ -8,61 +8,61 @@ require 'metasm/cpu/pic16c/main' module Metasm class Pic16c - def addop(name, bin, *l) - o = Opcode.new name, bin - l.each { |ll| - if @props_allowed[ll] - o.props[ll] = true - else - o.args << ll - o.fields[ll] = @fields_off[ll] - end - } - @opcode_list << o - end + def addop(name, bin, *l) + o = Opcode.new name, bin + l.each { |ll| + if @props_allowed[ll] + o.props[ll] = true + else + o.args << ll + o.fields[ll] = @fields_off[ll] + end + } + @opcode_list << o + end - def init - @fields_mask = {:f => 0x7f, :b => 0x7, :k => 0xff, :klong => 0x3ff, :d => 1 } - @props_allowed = {:setip => true, :saveip => true, :stopexec => true } - @fields_off = { :f => 0, :b => 7, :k => 0, :klong => 0, :d => 7, :d => 7 } + def init + @fields_mask = {:f => 0x7f, :b => 0x7, :k => 0xff, :klong => 0x3ff, :d => 1 } + @props_allowed = {:setip => true, :saveip => true, :stopexec => true } + @fields_off = { :f => 0, :b => 7, :k => 0, :klong => 0, :d => 7, :d => 7 } - addop 'addwf', 0b00_0111_0000_0000, :f, :d - addop 'andwf', 0b00_0101_0000_0000, :f, :d - addop 'clrf', 0b00_0001_1000_0000, :f - addop 'clrw', 0b00_0001_0000_0000 # 00_0001_0xxx_xxxx - addop 'comf', 0b00_1001_0000_0000, :f, :d - addop 'decf', 0b00_0011_0000_0000, :f, :d - addop 'decfsz',0b00_1011_0000_0000, :f, :d - addop 'incf', 0b00_1010_0000_0000, :f, :d - addop 'incfsz',0b00_1111_0000_0000, :f, :d - addop 'iorwf', 0b00_0100_0000_0000, :f, :d - addop 'movf', 0b00_1000_0000_0000, :f, :d - addop 'movwf', 0b00_0000_1000_0000, :f - addop 'nop', 0b00_0000_0000_0000 # 00_0000_0xx0_0000 - addop 'rlf', 0b00_1101_0000_0000, :f, :d - addop 'rrf', 0b00_1100_0000_0000, :f, :d - addop 'subwf', 0b00_0010_0000_0000, :f, :d - addop 'swapf', 0b00_1110_0000_0000, :f, :d - addop 'xorwf', 0b00_0110_0000_0000, :f, :d + addop 'addwf', 0b00_0111_0000_0000, :f, :d + addop 'andwf', 0b00_0101_0000_0000, :f, :d + addop 'clrf', 0b00_0001_1000_0000, :f + addop 'clrw', 0b00_0001_0000_0000 # 00_0001_0xxx_xxxx + addop 'comf', 0b00_1001_0000_0000, :f, :d + addop 'decf', 0b00_0011_0000_0000, :f, :d + addop 'decfsz',0b00_1011_0000_0000, :f, :d + addop 'incf', 0b00_1010_0000_0000, :f, :d + addop 'incfsz',0b00_1111_0000_0000, :f, :d + addop 'iorwf', 0b00_0100_0000_0000, :f, :d + addop 'movf', 0b00_1000_0000_0000, :f, :d + addop 'movwf', 0b00_0000_1000_0000, :f + addop 'nop', 0b00_0000_0000_0000 # 00_0000_0xx0_0000 + addop 'rlf', 0b00_1101_0000_0000, :f, :d + addop 'rrf', 0b00_1100_0000_0000, :f, :d + addop 'subwf', 0b00_0010_0000_0000, :f, :d + addop 'swapf', 0b00_1110_0000_0000, :f, :d + addop 'xorwf', 0b00_0110_0000_0000, :f, :d - addop 'bcf', 0b01_0000_0000_0000, :f, :b - addop 'bsf', 0b01_0100_0000_0000, :f, :b - addop 'btfsc', 0b01_1000_0000_0000, :f, :b, :setip - addop 'btfss', 0b01_1100_0000_0000, :f, :b, :setip + addop 'bcf', 0b01_0000_0000_0000, :f, :b + addop 'bsf', 0b01_0100_0000_0000, :f, :b + addop 'btfsc', 0b01_1000_0000_0000, :f, :b, :setip + addop 'btfss', 0b01_1100_0000_0000, :f, :b, :setip - addop 'addlw', 0b11_1110_0000_0000, :k # 00_000x_0000_0000 - addop 'andlw', 0b11_1001_0000_0000, :k - addop 'call', 0b10_0000_0000_0000, :klong, :setip, :stopexec, :saveip - addop 'clrwdt',0b00_0000_0110_0100 - addop 'goto', 0b10_1000_0000_0000, :klong, :setip, :stopexec - addop 'iorlw', 0b11_1000_0000_0000, :k - addop 'movlw', 0b11_0000_0000_0000, :k # 00_00xx_0000_0000 - addop 'retfie',0b00_0000_0000_1001, :setip, :stopexec - addop 'retlw', 0b11_0100_0000_0000, :k, :setip, :stopexec # 00_00xx_0000_0000 - addop 'return',0b00_0000_0000_1000, :setip, :stopexec - addop 'sleep', 0b00_0000_0110_0011 - addop 'sublw', 0b11_1100_0000_0000, :k # 00_000x_0000_0000 - addop 'xorlw', 0b11_1010_0000_0000, :k - end + addop 'addlw', 0b11_1110_0000_0000, :k # 00_000x_0000_0000 + addop 'andlw', 0b11_1001_0000_0000, :k + addop 'call', 0b10_0000_0000_0000, :klong, :setip, :stopexec, :saveip + addop 'clrwdt',0b00_0000_0110_0100 + addop 'goto', 0b10_1000_0000_0000, :klong, :setip, :stopexec + addop 'iorlw', 0b11_1000_0000_0000, :k + addop 'movlw', 0b11_0000_0000_0000, :k # 00_00xx_0000_0000 + addop 'retfie',0b00_0000_0000_1001, :setip, :stopexec + addop 'retlw', 0b11_0100_0000_0000, :k, :setip, :stopexec # 00_00xx_0000_0000 + addop 'return',0b00_0000_0000_1000, :setip, :stopexec + addop 'sleep', 0b00_0000_0110_0011 + addop 'sublw', 0b11_1100_0000_0000, :k # 00_000x_0000_0000 + addop 'xorlw', 0b11_1010_0000_0000, :k + end end end diff --git a/lib/metasm/metasm/cpu/ppc/decode.rb b/lib/metasm/metasm/cpu/ppc/decode.rb index 923c15265c..366cdf0a2e 100644 --- a/lib/metasm/metasm/cpu/ppc/decode.rb +++ b/lib/metasm/metasm/cpu/ppc/decode.rb @@ -9,262 +9,262 @@ require 'metasm/decode' module Metasm class PowerPC - def build_opcode_bin_mask(op) - # bit = 0 if can be mutated by an field value, 1 if fixed by opcode - return if not op.bin.kind_of? Integer - op.bin_mask = 0 - op.fields.each { |k, (m, s)| - op.bin_mask |= m << s - } - op.bin_mask = 0xffff_ffff ^ op.bin_mask - end + def build_opcode_bin_mask(op) + # bit = 0 if can be mutated by an field value, 1 if fixed by opcode + return if not op.bin.kind_of? Integer + op.bin_mask = 0 + op.fields.each { |k, (m, s)| + op.bin_mask |= m << s + } + op.bin_mask = 0xffff_ffff ^ op.bin_mask + end - def build_bin_lookaside - lookaside = Array.new(256) { [] } - opcode_list.each { |op| - next if not op.bin.kind_of? Integer - build_opcode_bin_mask op + def build_bin_lookaside + lookaside = Array.new(256) { [] } + opcode_list.each { |op| + next if not op.bin.kind_of? Integer + build_opcode_bin_mask op - b = op.bin >> 24 - msk = op.bin_mask >> 24 + b = op.bin >> 24 + msk = op.bin_mask >> 24 - for i in b..(b | (255^msk)) - next if i & msk != b & msk - lookaside[i] << op - end - } - lookaside - end + for i in b..(b | (255^msk)) + next if i & msk != b & msk + lookaside[i] << op + end + } + lookaside + end - def decode_findopcode(edata) - return if edata.ptr+4 > edata.length - di = DecodedInstruction.new(self) - val = edata.decode_imm(:u32, @endianness) - edata.ptr -= 4 - di if di.opcode = @bin_lookaside[val >> 24].find { |op| - (op.bin & op.bin_mask) == (val & op.bin_mask) - } - end + def decode_findopcode(edata) + return if edata.ptr+4 > edata.length + di = DecodedInstruction.new(self) + val = edata.decode_imm(:u32, @endianness) + edata.ptr -= 4 + di if di.opcode = @bin_lookaside[val >> 24].find { |op| + (op.bin & op.bin_mask) == (val & op.bin_mask) + } + end - def decode_instr_op(edata, di) - before_ptr = edata.ptr - op = di.opcode - di.instruction.opname = op.name - val = edata.decode_imm(:u32, @endianness) + def decode_instr_op(edata, di) + before_ptr = edata.ptr + op = di.opcode + di.instruction.opname = op.name + val = edata.decode_imm(:u32, @endianness) - field_val = lambda { |f| - r = (val >> @fields_shift[f]) & @fields_mask[f] - case f - when :bd, :d, :ds, :dq, :si, :ui; r = Expression.make_signed(r<<@fields_shift[f], 16) - when :li; r = Expression.make_signed(r<<@fields_shift[f], 26) - else r - end - } + field_val = lambda { |f| + r = (val >> @fields_shift[f]) & @fields_mask[f] + case f + when :bd, :d, :ds, :dq, :si, :ui; r = Expression.make_signed(r<<@fields_shift[f], 16) + when :li; r = Expression.make_signed(r<<@fields_shift[f], 26) + else r + end + } - op.args.each { |a| - di.instruction.args << case a - when :ra, :rb, :rs, :rt; GPR.new field_val[a] - when :fra, :frb, :frc, :frs, :frt; FPR.new field_val[a] - when :ra_i16, :ra_i16s, :ra_i16q - i = field_val[{:ra_i16 => :d, :ra_i16s => :ds, :ra_i16q => :dq}[a]] - Memref.new GPR.new(field_val[:ra]), Expression[i] - when :bd, :d, :ds, :dq, :si, :ui, :li, :sh, :mb, :me, :mb_, :me_, :u; Expression[field_val[a]] - when :ba, :bf, :bfa, :bt; CR.new field_val[a] - when :bb, :bh, :flm, :fxm, :l_, :l__, :lev, :nb, :sh_, :spr, :sr, :tbr, :th, :to - puts "PPC.decode: unsupported argument #{a.inspect}" if $VERBOSE # TODO - Expression[field_val[a]] - else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" - end - } + op.args.each { |a| + di.instruction.args << case a + when :ra, :rb, :rs, :rt; GPR.new field_val[a] + when :fra, :frb, :frc, :frs, :frt; FPR.new field_val[a] + when :ra_i16, :ra_i16s, :ra_i16q + i = field_val[{:ra_i16 => :d, :ra_i16s => :ds, :ra_i16q => :dq}[a]] + Memref.new GPR.new(field_val[:ra]), Expression[i] + when :bd, :d, :ds, :dq, :si, :ui, :li, :sh, :mb, :me, :mb_, :me_, :u; Expression[field_val[a]] + when :ba, :bf, :bfa, :bt; CR.new field_val[a] + when :bb, :bh, :flm, :fxm, :l_, :l__, :lev, :nb, :sh_, :spr, :sr, :tbr, :th, :to + puts "PPC.decode: unsupported argument #{a.inspect}" if $VERBOSE # TODO + Expression[field_val[a]] + else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" + end + } - di.bin_length += edata.ptr - before_ptr + di.bin_length += edata.ptr - before_ptr - return if edata.ptr > edata.length + return if edata.ptr > edata.length - decode_aliases(di.instruction) + decode_aliases(di.instruction) - di - end + di + end - def decode_aliases(i) - case i.opname - when /^n?or\.?$/ - if i.args[1] == i.args[2] - i.args.pop - i.opname = {'or' => 'mr', 'or.' => 'mr.', 'nor' => 'not', 'nor.' => 'not.'}[i.opname] - end - when /^addi/ - if a = i.args[2].reduce and a.kind_of? Integer and a < 0 - i.args[2] = Expression[-a] - i.opname = i.opname.sub('addi', 'subi') - end - end + def decode_aliases(i) + case i.opname + when /^n?or\.?$/ + if i.args[1] == i.args[2] + i.args.pop + i.opname = {'or' => 'mr', 'or.' => 'mr.', 'nor' => 'not', 'nor.' => 'not.'}[i.opname] + end + when /^addi/ + if a = i.args[2].reduce and a.kind_of? Integer and a < 0 + i.args[2] = Expression[-a] + i.opname = i.opname.sub('addi', 'subi') + end + end - case i.opname - when /^(add|sub|xor|and|or|div|mul|nand)/ - if i.args.length == 3 and i.args[0] == i.args[1] - i.args.shift - end - end + case i.opname + when /^(add|sub|xor|and|or|div|mul|nand)/ + if i.args.length == 3 and i.args[0] == i.args[1] + i.args.shift + end + end - end + end - # converts relative branch offsets to absolute addresses - # else just add the offset +off+ of the instruction + its length (off may be an Expression) - # assumes edata.ptr points just after the instruction (as decode_instr_op left it) - # do not call twice on the same di ! - def decode_instr_interpret(di, addr) - if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.opcode.name[0] != ?t and di.opcode.name[-1] != ?a - arg = Expression[addr, :+, di.instruction.args.last].reduce - di.instruction.args[-1] = Expression[arg] - end + # converts relative branch offsets to absolute addresses + # else just add the offset +off+ of the instruction + its length (off may be an Expression) + # assumes edata.ptr points just after the instruction (as decode_instr_op left it) + # do not call twice on the same di ! + def decode_instr_interpret(di, addr) + if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.opcode.name[0] != ?t and di.opcode.name[-1] != ?a + arg = Expression[addr, :+, di.instruction.args.last].reduce + di.instruction.args[-1] = Expression[arg] + end - di - end + di + end - # TODO - def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) - retaddrlist.to_a.map! { |retaddr| dasm.decoded[retaddr] ? dasm.decoded[retaddr].block.list.last.address : retaddr } - b = f.backtrace_binding + # TODO + def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) + retaddrlist.to_a.map! { |retaddr| dasm.decoded[retaddr] ? dasm.decoded[retaddr].block.list.last.address : retaddr } + b = f.backtrace_binding - bt_val = lambda { |r| - bt = [] - retaddrlist.to_a.each { |retaddr| - bt |= dasm.backtrace(Expression[r], retaddr, - :include_start => true, :snapshot_addr => faddr, :origin => retaddr) - } - b[r] = ((bt.length == 1) ? bt.first : Expression::Unknown) - } - wantregs = GPR::Sym if wantregs.empty? - wantregs.map { |r| r.to_sym }.each(&bt_val) + bt_val = lambda { |r| + bt = [] + retaddrlist.to_a.each { |retaddr| + bt |= dasm.backtrace(Expression[r], retaddr, + :include_start => true, :snapshot_addr => faddr, :origin => retaddr) + } + b[r] = ((bt.length == 1) ? bt.first : Expression::Unknown) + } + wantregs = GPR::Sym if wantregs.empty? + wantregs.map { |r| r.to_sym }.each(&bt_val) - #puts "update_func_bind: #{Expression[faddr]} has sp -> #{b[:$sp]}" if not Expression[b[:$sp], :-, :$sp].reduce.kind_of?(::Integer) if $VERBOSE - end + #puts "update_func_bind: #{Expression[faddr]} has sp -> #{b[:$sp]}" if not Expression[b[:$sp], :-, :$sp].reduce.kind_of?(::Integer) if $VERBOSE + end - def backtrace_is_function_return(expr, di=nil) - expr.reduce_rec == :lr - end + def backtrace_is_function_return(expr, di=nil) + expr.reduce_rec == :lr + end - def backtrace_is_stack_address(expr) - Expression[expr].expr_externals.include? :sp - end + def backtrace_is_stack_address(expr) + Expression[expr].expr_externals.include? :sp + end - def replace_instr_arg_immediate(i, old, new) - i.args.map! { |a| - case a - when Expression; a == old ? new : Expression[a.bind(old => new).reduce] - when Memref - a.offset = (a.offset == old ? new : Expression[a.offset.bind(old => new).reduce]) if a.offset.kind_of? Expression - a - else a - end - } - end + def replace_instr_arg_immediate(i, old, new) + i.args.map! { |a| + case a + when Expression; a == old ? new : Expression[a.bind(old => new).reduce] + when Memref + a.offset = (a.offset == old ? new : Expression[a.offset.bind(old => new).reduce]) if a.offset.kind_of? Expression + a + else a + end + } + end - def disassembler_default_func - df = DecodedFunction.new - df.backtrace_binding = (0..31).inject({}) { |h, r| r != 1 ? h.update("r#{r}".to_sym => Expression::Unknown) : h } - df.backtracked_for = [BacktraceTrace.new(Expression[:lr], :default, Expression[:lr], :x)] - df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr| - if funcaddr != :default - btfor - elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip] - btfor - else [] - end - } - df - end + def disassembler_default_func + df = DecodedFunction.new + df.backtrace_binding = (0..31).inject({}) { |h, r| r != 1 ? h.update("r#{r}".to_sym => Expression::Unknown) : h } + df.backtracked_for = [BacktraceTrace.new(Expression[:lr], :default, Expression[:lr], :x)] + df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr| + if funcaddr != :default + btfor + elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip] + btfor + else [] + end + } + df + end - # hash opname => lambda { |di, *sym_args| binding } - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end - def backtrace_binding=(b) @backtrace_binding = b end + # hash opname => lambda { |di, *sym_args| binding } + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end + def backtrace_binding=(b) @backtrace_binding = b end - def init_backtrace_binding - @backtrace_binding ||= {} - opcode_list.map { |ol| ol.name }.uniq.each { |op| - binding = case op - when 'mr', 'li', 'la'; lambda { |di, a0, a1| { a0 => Expression[a1] } } - when 'lis'; lambda { |di, a0, a1| { a0 => Expression[a1, :<<, 16] } } - when 'mtctr'; lambda { |di, a0| { :ctr => Expression[a0] } } - when 'mfctr'; lambda { |di, a0| { a0 => Expression[:ctr] } } - when 'mtlr'; lambda { |di, a0| { :lr => Expression[a0] } } - when 'mflr'; lambda { |di, a0| { a0 => Expression[:lr] } } - when 'lwzu'; lambda { |di, a0, m| - ret = { a0 => Expression[m] } - ptr = m.pointer.externals.grep(Symbol).first - ret[ptr] = m.pointer if ptr != a0 - ret - } - when 'lwz'; lambda { |di, a0, m| { a0 => Expression[m] } } - when 'stwu'; lambda { |di, a0, m| - { m => Expression[a0], m.pointer.externals.grep(Symbol).first => m.pointer } - } - when 'stw'; lambda { |di, a0, m| { m => Expression[a0] } } - when 'rlwinm'; lambda { |di, a0, a1, sh, mb, me| - mb, me = mb.reduce, me.reduce - cpmsk = (1<<@size) - 1 - a1 = Expression[a1, :&, cpmsk] - rol = Expression[[a1, :<<, sh], :|, [a1, :>>, [@size, :-, sh]]] - if mb == me+1 - msk = cpmsk - elsif mb < me+1 - msk = (((1 << ((me+1)-mb)) - 1) << (@size-(me+1))) - else - msk = (((1 << (mb-(me+1))) - 1) << (@size-mb)) ^ cpmsk - end - { a0 => Expression[Expression[rol, :&, msk].reduce] } - } + def init_backtrace_binding + @backtrace_binding ||= {} + opcode_list.map { |ol| ol.name }.uniq.each { |op| + binding = case op + when 'mr', 'li', 'la'; lambda { |di, a0, a1| { a0 => Expression[a1] } } + when 'lis'; lambda { |di, a0, a1| { a0 => Expression[a1, :<<, 16] } } + when 'mtctr'; lambda { |di, a0| { :ctr => Expression[a0] } } + when 'mfctr'; lambda { |di, a0| { a0 => Expression[:ctr] } } + when 'mtlr'; lambda { |di, a0| { :lr => Expression[a0] } } + when 'mflr'; lambda { |di, a0| { a0 => Expression[:lr] } } + when 'lwzu'; lambda { |di, a0, m| + ret = { a0 => Expression[m] } + ptr = m.pointer.externals.grep(Symbol).first + ret[ptr] = m.pointer if ptr != a0 + ret + } + when 'lwz'; lambda { |di, a0, m| { a0 => Expression[m] } } + when 'stwu'; lambda { |di, a0, m| + { m => Expression[a0], m.pointer.externals.grep(Symbol).first => m.pointer } + } + when 'stw'; lambda { |di, a0, m| { m => Expression[a0] } } + when 'rlwinm'; lambda { |di, a0, a1, sh, mb, me| + mb, me = mb.reduce, me.reduce + cpmsk = (1<<@size) - 1 + a1 = Expression[a1, :&, cpmsk] + rol = Expression[[a1, :<<, sh], :|, [a1, :>>, [@size, :-, sh]]] + if mb == me+1 + msk = cpmsk + elsif mb < me+1 + msk = (((1 << ((me+1)-mb)) - 1) << (@size-(me+1))) + else + msk = (((1 << (mb-(me+1))) - 1) << (@size-mb)) ^ cpmsk + end + { a0 => Expression[Expression[rol, :&, msk].reduce] } + } - when 'add', 'addi', 'add.', 'addi.'; lambda { |di, *a| { a[0] => Expression[a[-2], :+, a[-1]] } } - when 'addis', 'addis.'; lambda { |di, *a| { a[0] => Expression[a[-2], :+, [a[-1], :<<, 16]] } } - when 'sub', 'subi', 'sub.', 'subi.'; lambda { |di, *a| { a[0] => Expression[a[-2], :-, a[-1]] } } - when 'subis', 'subis.'; lambda { |di, *a| { a[0] => Expression[a[-2], :-, [a[-1], :<<, 16]] } } - when /^b.*la?$/; lambda { |di, *a| { :lr => Expression[di.next_addr] } } - when 'nop', /^cmp/, /^b/; lambda { |di, *a| {} } - end + when 'add', 'addi', 'add.', 'addi.'; lambda { |di, *a| { a[0] => Expression[a[-2], :+, a[-1]] } } + when 'addis', 'addis.'; lambda { |di, *a| { a[0] => Expression[a[-2], :+, [a[-1], :<<, 16]] } } + when 'sub', 'subi', 'sub.', 'subi.'; lambda { |di, *a| { a[0] => Expression[a[-2], :-, a[-1]] } } + when 'subis', 'subis.'; lambda { |di, *a| { a[0] => Expression[a[-2], :-, [a[-1], :<<, 16]] } } + when /^b.*la?$/; lambda { |di, *a| { :lr => Expression[di.next_addr] } } + when 'nop', /^cmp/, /^b/; lambda { |di, *a| {} } + end - @backtrace_binding[op] ||= binding if binding - } - @backtrace_binding - end + @backtrace_binding[op] ||= binding if binding + } + @backtrace_binding + end - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when Memref; arg.symbolic(di.address) - when Reg; arg.symbolic - else arg - end - } + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when Memref; arg.symbolic(di.address) + when Reg; arg.symbolic + else arg + end + } - binding = if binding = backtrace_binding[di.instruction.opname] - binding[di, *a] - else - puts "unknown instruction to emu #{di}" if $VERBOSE - {} - end + binding = if binding = backtrace_binding[di.instruction.opname] + binding[di, *a] + else + puts "unknown instruction to emu #{di}" if $VERBOSE + {} + end - binding - end + binding + end - def get_xrefs_x(dasm, di) - return [] if not di.opcode.props[:setip] + def get_xrefs_x(dasm, di) + return [] if not di.opcode.props[:setip] - arg = case di.instruction.opname - when 'bctr', 'bctrl'; :ctr - when 'blr', 'blrl'; :lr - else di.instruction.args.last - end + arg = case di.instruction.opname + when 'bctr', 'bctrl'; :ctr + when 'blr', 'blrl'; :lr + else di.instruction.args.last + end - [Expression[ - case arg - when Memref; Indirection[[arg.base.to_s.to_sym, :+, arg.offset], @size/8, di.address] - when Reg; arg.to_s.to_sym - else arg - end]] - end + [Expression[ + case arg + when Memref; Indirection[[arg.base.to_s.to_sym, :+, arg.offset], @size/8, di.address] + when Reg; arg.to_s.to_sym + else arg + end]] + end end end diff --git a/lib/metasm/metasm/cpu/ppc/decompile.rb b/lib/metasm/metasm/cpu/ppc/decompile.rb index c042903e29..500f7d35ab 100644 --- a/lib/metasm/metasm/cpu/ppc/decompile.rb +++ b/lib/metasm/metasm/cpu/ppc/decompile.rb @@ -8,244 +8,244 @@ require 'metasm/cpu/ppc/main' module Metasm class PowerPC - # temporarily setup dasm.address_binding so that backtracking - # stack-related offsets resolve in :frameptr (relative to func start) - def decompile_makestackvars(dasm, funcstart, blocks) - oldfuncbd = dasm.address_binding[funcstart] - dasm.address_binding[funcstart] = { :sp => :frameptr } # this would suffice, the rest here is just optimisation + # temporarily setup dasm.address_binding so that backtracking + # stack-related offsets resolve in :frameptr (relative to func start) + def decompile_makestackvars(dasm, funcstart, blocks) + oldfuncbd = dasm.address_binding[funcstart] + dasm.address_binding[funcstart] = { :sp => :frameptr } # this would suffice, the rest here is just optimisation - blocks.each { |block| - yield block - } + blocks.each { |block| + yield block + } - dasm.address_binding[funcstart] = oldfuncbd if oldfuncbd - end + dasm.address_binding[funcstart] = oldfuncbd if oldfuncbd + end - # list variable dependency for each block, remove useless writes - # returns { blockaddr => [list of vars that are needed by a following block] } - def decompile_func_finddeps(dcmp, blocks, func) - deps_r = {} ; deps_w = {} ; deps_to = {} - deps_subfunc = {} # things read/written by subfuncs + # list variable dependency for each block, remove useless writes + # returns { blockaddr => [list of vars that are needed by a following block] } + def decompile_func_finddeps(dcmp, blocks, func) + deps_r = {} ; deps_w = {} ; deps_to = {} + deps_subfunc = {} # things read/written by subfuncs - # find read/writes by each block - blocks.each { |b, to| - deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to - deps_subfunc[b] = [] + # find read/writes by each block + blocks.each { |b, to| + deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to + deps_subfunc[b] = [] - blk = dcmp.dasm.decoded[b].block - blk.list.each { |di| - a = di.backtrace_binding.values - w = [] - di.backtrace_binding.keys.each { |k| - case k - when ::Symbol; w |= [k] - else a |= Expression[k].externals # if dword [eax] <- 42, eax is read - end - } - #a << :eax if di.opcode.name == 'ret' # standard ABI + blk = dcmp.dasm.decoded[b].block + blk.list.each { |di| + a = di.backtrace_binding.values + w = [] + di.backtrace_binding.keys.each { |k| + case k + when ::Symbol; w |= [k] + else a |= Expression[k].externals # if dword [eax] <- 42, eax is read + end + } + #a << :eax if di.opcode.name == 'ret' # standard ABI - deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b] - deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - } - stackoff = nil - blk.each_to_normal { |t| - t = dcmp.backtrace_target(t, blk.list.last.address) - next if not t = dcmp.c_parser.toplevel.symbol[t] - t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function # XXX this may seem a bit extreme, and yes, it is. - stackoff ||= Expression[dcmp.dasm.backtrace(:sp, blk.list.last.address, :snapshot_addr => blocks.first[0]).first, :-, :sp].reduce - } - if stackoff # last block instr == subfunction call - deps_r[b] |= deps_subfunc[b] - deps_w[b] - #deps_w[b] |= [:eax, :ecx, :edx] # standard ABI - end - } + deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b] + deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] + } + stackoff = nil + blk.each_to_normal { |t| + t = dcmp.backtrace_target(t, blk.list.last.address) + next if not t = dcmp.c_parser.toplevel.symbol[t] + t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function # XXX this may seem a bit extreme, and yes, it is. + stackoff ||= Expression[dcmp.dasm.backtrace(:sp, blk.list.last.address, :snapshot_addr => blocks.first[0]).first, :-, :sp].reduce + } + if stackoff # last block instr == subfunction call + deps_r[b] |= deps_subfunc[b] - deps_w[b] + #deps_w[b] |= [:eax, :ecx, :edx] # standard ABI + end + } - # find regs read and never written (must have been set by caller and are part of the func ABI) - uninitialized = lambda { |b, r, done| - from = deps_to.keys.find_all { |f| deps_to[f].include? b } - done - from.empty? or from.find { |f| - !deps_w[f].include?(r) and uninitialized[f, r, done + [b]] - } - } + # find regs read and never written (must have been set by caller and are part of the func ABI) + uninitialized = lambda { |b, r, done| + from = deps_to.keys.find_all { |f| deps_to[f].include? b } - done + from.empty? or from.find { |f| + !deps_w[f].include?(r) and uninitialized[f, r, done + [b]] + } + } - # remove writes from a block if no following block read the value - dw = {} - deps_w.each { |b, deps| - dw[b] = deps.reject { |dep| - ret = true - done = [] - todo = deps_to[b].dup - while a = todo.pop - next if done.include? a - done << a - if not deps_r[a] or deps_r[a].include? dep - ret = false - break - elsif not deps_w[a].include? dep - todo.concat deps_to[a] - end - end - ret - } - } + # remove writes from a block if no following block read the value + dw = {} + deps_w.each { |b, deps| + dw[b] = deps.reject { |dep| + ret = true + done = [] + todo = deps_to[b].dup + while a = todo.pop + next if done.include? a + done << a + if not deps_r[a] or deps_r[a].include? dep + ret = false + break + elsif not deps_w[a].include? dep + todo.concat deps_to[a] + end + end + ret + } + } - dw - end + dw + end - def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil) - scope = func.initializer - func.type.args.each { |a| scope.symbol[a.name] = a } - stmts = scope.statements - func_entry = myblocks.first[0] - until myblocks.empty? - b, to = myblocks.shift - if l = dcmp.dasm.get_label_at(b) - stmts << C::Label.new(l) - end + def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil) + scope = func.initializer + func.type.args.each { |a| scope.symbol[a.name] = a } + stmts = scope.statements + func_entry = myblocks.first[0] + until myblocks.empty? + b, to = myblocks.shift + if l = dcmp.dasm.get_label_at(b) + stmts << C::Label.new(l) + end - # list of assignments [[dest reg, expr assigned]] - ops = [] - # reg binding (reg => value, values.externals = regs at block start) - binding = {} - # Expr => CExpr - ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) } - # Expr => Expr.bind(binding) => CExpr - ceb = lambda { |*e| ce[Expression[*e].bind(binding)] } + # list of assignments [[dest reg, expr assigned]] + ops = [] + # reg binding (reg => value, values.externals = regs at block start) + binding = {} + # Expr => CExpr + ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) } + # Expr => Expr.bind(binding) => CExpr + ceb = lambda { |*e| ce[Expression[*e].bind(binding)] } - # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil]) - commit = lambda { - deps[b].map { |k| - [k, ops.rindex(ops.reverse.find { |r, v| r == k })] - }.sort_by { |k, i| i.to_i }.each { |k, i| - next if not i or not binding[k] - e = k - final = [] - ops[0..i].reverse_each { |r, v| - final << r if not v - e = Expression[e].bind(r => v).reduce if not final.include? r - } - ops[i][1] = nil - binding.delete k - stmts << ce[k, :'=', e] if k != e - } - } + # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil]) + commit = lambda { + deps[b].map { |k| + [k, ops.rindex(ops.reverse.find { |r, v| r == k })] + }.sort_by { |k, i| i.to_i }.each { |k, i| + next if not i or not binding[k] + e = k + final = [] + ops[0..i].reverse_each { |r, v| + final << r if not v + e = Expression[e].bind(r => v).reduce if not final.include? r + } + ops[i][1] = nil + binding.delete k + stmts << ce[k, :'=', e] if k != e + } + } - # go ! - dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx| - a = di.instruction.args - if di.opcode.props[:setip] and not di.opcode.props[:stopexec] - # conditional jump - commit[] - n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address) - #cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])] - cc = ceb[:condjmp] - stmts << C::If.new(C::CExpression[cc], C::Goto.new(n)) - to.delete dcmp.dasm.normalize(n) - next - end + # go ! + dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx| + a = di.instruction.args + if di.opcode.props[:setip] and not di.opcode.props[:stopexec] + # conditional jump + commit[] + n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address) + #cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])] + cc = ceb[:condjmp] + stmts << C::If.new(C::CExpression[cc], C::Goto.new(n)) + to.delete dcmp.dasm.normalize(n) + next + end - case di.opcode.name - when 'blr' - commit[] - stmts << C::Return.new(nil) - when 'bl' # :saveip - n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address) - args = [] - if t = dcmp.c_parser.toplevel.symbol[n] and t.type.args - stackoff = Expression[dcmp.dasm.backtrace(:sp, di.address, :snapshot_addr => func_entry), :-, :sp].bind(:sp => :frameptr).reduce rescue nil - args_todo = t.type.args.dup - args = [] - args_todo.each { - if stackoff.kind_of? Integer - var = Indirection[[:frameptr, :+, stackoff], @size/8] - stackoff += @size/8 - else - var = 0 - end - args << ceb[var] - binding.delete var - } - end - commit[] - #next if not di.block.to_subfuncret + case di.opcode.name + when 'blr' + commit[] + stmts << C::Return.new(nil) + when 'bl' # :saveip + n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address) + args = [] + if t = dcmp.c_parser.toplevel.symbol[n] and t.type.args + stackoff = Expression[dcmp.dasm.backtrace(:sp, di.address, :snapshot_addr => func_entry), :-, :sp].bind(:sp => :frameptr).reduce rescue nil + args_todo = t.type.args.dup + args = [] + args_todo.each { + if stackoff.kind_of? Integer + var = Indirection[[:frameptr, :+, stackoff], @size/8] + stackoff += @size/8 + else + var = 0 + end + args << ceb[var] + binding.delete var + } + end + commit[] + #next if not di.block.to_subfuncret - if n.kind_of? ::String - if not f = dcmp.c_parser.toplevel.symbol[n] - # internal functions are predeclared, so this one is extern - f = dcmp.c_parser.toplevel.symbol[n] = C::Variable.new - f.name = n - f.type = C::Function.new(C::BaseType.new(:int)) - dcmp.c_parser.toplevel.statements << C::Declaration.new(f) - end - commit[] - else - # indirect funcall - fptr = ceb[n] - binding.delete n - commit[] - proto = C::Function.new(C::BaseType.new(:int)) - f = C::CExpression[[fptr], proto] - end - binding.delete :eax - e = C::CExpression[f, :funcall, args] - e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void) - stmts << e - when 'b' - a = di.instruction.args.first - if a.kind_of? Expression - else - # indirect jmp, convert to return (*fptr)(); - n = di.instruction.args.first.symbolic - fptr = ceb[n] - binding.delete n - commit[] - proto = C::Function.new(C::BaseType.new(:void)) - ret = C::Return.new(C::CExpression[[[fptr], C::Pointer.new(proto)], :funcall, []]) - class << ret ; attr_accessor :from_instr end - ret.from_instr = di - stmts << ret - to = [] - end - else - bd = get_fwdemu_binding(di) - if di.backtrace_binding[:incomplete_binding] - commit[] - stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil) - else - bd.each { |k, v| - if k.kind_of? ::Symbol - ops << [k, v] - else # memory - stmts << ceb[k, :'=', v] - binding.delete k - end - } - update = {} - bd.each { |k, v| - next if not k.kind_of? ::Symbol - update[k] = Expression[Expression[v].bind(binding).reduce] - } - binding.update update - end - end - } - commit[] + if n.kind_of? ::String + if not f = dcmp.c_parser.toplevel.symbol[n] + # internal functions are predeclared, so this one is extern + f = dcmp.c_parser.toplevel.symbol[n] = C::Variable.new + f.name = n + f.type = C::Function.new(C::BaseType.new(:int)) + dcmp.c_parser.toplevel.statements << C::Declaration.new(f) + end + commit[] + else + # indirect funcall + fptr = ceb[n] + binding.delete n + commit[] + proto = C::Function.new(C::BaseType.new(:int)) + f = C::CExpression[[fptr], proto] + end + binding.delete :eax + e = C::CExpression[f, :funcall, args] + e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void) + stmts << e + when 'b' + a = di.instruction.args.first + if a.kind_of? Expression + else + # indirect jmp, convert to return (*fptr)(); + n = di.instruction.args.first.symbolic + fptr = ceb[n] + binding.delete n + commit[] + proto = C::Function.new(C::BaseType.new(:void)) + ret = C::Return.new(C::CExpression[[[fptr], C::Pointer.new(proto)], :funcall, []]) + class << ret ; attr_accessor :from_instr end + ret.from_instr = di + stmts << ret + to = [] + end + else + bd = get_fwdemu_binding(di) + if di.backtrace_binding[:incomplete_binding] + commit[] + stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil) + else + bd.each { |k, v| + if k.kind_of? ::Symbol + ops << [k, v] + else # memory + stmts << ceb[k, :'=', v] + binding.delete k + end + } + update = {} + bd.each { |k, v| + next if not k.kind_of? ::Symbol + update[k] = Expression[Expression[v].bind(binding).reduce] + } + binding.update update + end + end + } + commit[] - case to.length - when 0 - if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname - puts " block #{Expression[b]} has no to and don't end in ret" - end - when 1 - if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0]) - stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto')) - end - else - puts " block #{Expression[b]} with multiple to" - end - end - end + case to.length + when 0 + if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname + puts " block #{Expression[b]} has no to and don't end in ret" + end + when 1 + if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0]) + stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto')) + end + else + puts " block #{Expression[b]} with multiple to" + end + end + end end end diff --git a/lib/metasm/metasm/cpu/ppc/encode.rb b/lib/metasm/metasm/cpu/ppc/encode.rb index aa2c9a69c4..2484e321b5 100644 --- a/lib/metasm/metasm/cpu/ppc/encode.rb +++ b/lib/metasm/metasm/cpu/ppc/encode.rb @@ -9,43 +9,43 @@ require 'metasm/encode' module Metasm class PowerPC - private - def encode_instr_op(exe, instr, op) - base = op.bin - set_field = lambda { |f, v| - base |= (v & @fields_mask[f]) << @fields_shift[f] - } + private + def encode_instr_op(exe, instr, op) + base = op.bin + set_field = lambda { |f, v| + base |= (v & @fields_mask[f]) << @fields_shift[f] + } - val, mask, shift = 0, 0, 0 + val, mask, shift = 0, 0, 0 # TODO - # convert label name for jmp/call/loop to relative offset - if op.props[:setip] and op.name[0] != ?t and instr.args.last.kind_of? Expression - postlabel = exe.new_label('jmp_offset') - instr = instr.dup - instr.args[-1] = Expression[[instr.args[-1], :-, postlabel], :>>, 2] - postdata = EncodedData.new '', :export => {postlabel => 0} - else - postdata = '' - end + # convert label name for jmp/call/loop to relative offset + if op.props[:setip] and op.name[0] != ?t and instr.args.last.kind_of? Expression + postlabel = exe.new_label('jmp_offset') + instr = instr.dup + instr.args[-1] = Expression[[instr.args[-1], :-, postlabel], :>>, 2] + postdata = EncodedData.new '', :export => {postlabel => 0} + else + postdata = '' + end - op.args.zip(instr.args).each { |sym, arg| - case sym - when :rs, :rt, :rd, :ba, :bf, :bfa, :bt - set_field[sym, arg.i] - when :ft - set_field[sym, arg.i] - when :rs_i16 - set_field[:rs, arg.base.i] - val, mask, shift = arg.offset, @fields_mask[:i16], @fields_shift[:i16] - when :sa, :i16, :i20 - val, mask, shift = arg, @fields_mask[sym], @fields_shift[sym] - when :i26 - val, mask, shift = Expression[arg, :>>, 2], @fields_mask[sym], @fields_shift[sym] - end - } + op.args.zip(instr.args).each { |sym, arg| + case sym + when :rs, :rt, :rd, :ba, :bf, :bfa, :bt + set_field[sym, arg.i] + when :ft + set_field[sym, arg.i] + when :rs_i16 + set_field[:rs, arg.base.i] + val, mask, shift = arg.offset, @fields_mask[:i16], @fields_shift[:i16] + when :sa, :i16, :i20 + val, mask, shift = arg, @fields_mask[sym], @fields_shift[sym] + when :i26 + val, mask, shift = Expression[arg, :>>, 2], @fields_mask[sym], @fields_shift[sym] + end + } - Expression[base, :+, [[val, :&, mask], :<<, shift]].encode(:u32, @endianness) << postdata - end + Expression[base, :+, [[val, :&, mask], :<<, shift]].encode(:u32, @endianness) << postdata + end end end diff --git a/lib/metasm/metasm/cpu/ppc/main.rb b/lib/metasm/metasm/cpu/ppc/main.rb index b66385557a..6509ead596 100644 --- a/lib/metasm/metasm/cpu/ppc/main.rb +++ b/lib/metasm/metasm/cpu/ppc/main.rb @@ -9,126 +9,126 @@ require 'metasm/render' module Metasm class PowerPC < CPU - class Reg - include Renderable - class << self - attr_accessor :s_to_i, :i_to_s - end + class Reg + include Renderable + class << self + attr_accessor :s_to_i, :i_to_s + end - def ==(o) - o.class == self.class and (not respond_to?(:i) or o.i == i) - end + def ==(o) + o.class == self.class and (not respond_to?(:i) or o.i == i) + end - def render ; [self.class.i_to_s[@i]] ; end - end + def render ; [self.class.i_to_s[@i]] ; end + end - # general purpose reg - class GPR < Reg - attr_accessor :i - def initialize(i) - @i = i - end + # general purpose reg + class GPR < Reg + attr_accessor :i + def initialize(i) + @i = i + end - @s_to_i = (0..31).inject({}) { |h, i| h.update((i == 1 ? 'sp' : "r#{i}") => i) } - @i_to_s = @s_to_i.invert - Sym = @s_to_i.sort.transpose.last - def symbolic ; Sym[@i] end - end + @s_to_i = (0..31).inject({}) { |h, i| h.update((i == 1 ? 'sp' : "r#{i}") => i) } + @i_to_s = @s_to_i.invert + Sym = @s_to_i.sort.transpose.last + def symbolic ; Sym[@i] end + end - # special purpose reg - class SPR < Reg - @s_to_i = {'xer' => 1, 'lr' => 8, 'ctr' => 9, 'dec' => 22, 'srr0' => 26, 'srr1' => 27, - 'sprg0' => 272, 'sprg1' => 273, 'sprg2' => 274, 'sprg3' => 275, 'pvr' => 287} - @i_to_s = @s_to_i.invert + # special purpose reg + class SPR < Reg + @s_to_i = {'xer' => 1, 'lr' => 8, 'ctr' => 9, 'dec' => 22, 'srr0' => 26, 'srr1' => 27, + 'sprg0' => 272, 'sprg1' => 273, 'sprg2' => 274, 'sprg3' => 275, 'pvr' => 287} + @i_to_s = @s_to_i.invert - attr_accessor :i - def initialize(i) - @i = i - end + attr_accessor :i + def initialize(i) + @i = i + end - Sym = @i_to_s.sort.inject({}) { |h, (k, v)| h.update k => v.to_sym } - def symbolic ; Sym[@i] end - def render ; [self.class.i_to_s[@i] || "spr#@i"] end - end + Sym = @i_to_s.sort.inject({}) { |h, (k, v)| h.update k => v.to_sym } + def symbolic ; Sym[@i] end + def render ; [self.class.i_to_s[@i] || "spr#@i"] end + end - # floating point - class FPR < Reg - attr_accessor :i - def initialize(i) - @i = i - end + # floating point + class FPR < Reg + attr_accessor :i + def initialize(i) + @i = i + end - @s_to_i = (0..31).inject({}) { |h, i| h.update "fp#{i}" => i } - @i_to_s = @s_to_i.invert - Sym = @s_to_i.sort.transpose.last - end + @s_to_i = (0..31).inject({}) { |h, i| h.update "fp#{i}" => i } + @i_to_s = @s_to_i.invert + Sym = @s_to_i.sort.transpose.last + end - # machine state reg - class MSR < Reg - def symbolic ; :msr end - def render ; ['msr'] end - end + # machine state reg + class MSR < Reg + def symbolic ; :msr end + def render ; ['msr'] end + end - # condition reg (7 regs * 4 bits : lt, gt, eq, of) - class CR < Reg - attr_accessor :i - def initialize(i) - @i = i - end + # condition reg (7 regs * 4 bits : lt, gt, eq, of) + class CR < Reg + attr_accessor :i + def initialize(i) + @i = i + end - @s_to_i = (0..31).inject({}) { |h, i| h.update "cr#{i}" => i } - @i_to_s = @s_to_i.invert - Sym = @s_to_i.sort.transpose.last - def symbolic ; "cr#@i".to_sym end - end + @s_to_i = (0..31).inject({}) { |h, i| h.update "cr#{i}" => i } + @i_to_s = @s_to_i.invert + Sym = @s_to_i.sort.transpose.last + def symbolic ; "cr#@i".to_sym end + end - # indirection : reg+reg or reg+16b_off - # r0 may mean 0 in some cases (stwx) - class Memref - attr_accessor :base, :offset - def initialize(base, offset) - @base, @offset = base, offset - end + # indirection : reg+reg or reg+16b_off + # r0 may mean 0 in some cases (stwx) + class Memref + attr_accessor :base, :offset + def initialize(base, offset) + @base, @offset = base, offset + end - def symbolic(orig) - b = @base.symbolic - b = nil if b == :r0 # XXX is it true ? - o = @offset - o = o.symbolic if o.kind_of?(Reg) - Indirection[Expression[b, :+, o].reduce, 4, orig] - end + def symbolic(orig) + b = @base.symbolic + b = nil if b == :r0 # XXX is it true ? + o = @offset + o = o.symbolic if o.kind_of?(Reg) + Indirection[Expression[b, :+, o].reduce, 4, orig] + end - include Renderable - def render - if @offset.kind_of?(Reg) - ['(', @base, ' + ', @offset, ')'] - else - [@offset, '(', @base, ')'] - end - end - end + include Renderable + def render + if @offset.kind_of?(Reg) + ['(', @base, ' + ', @offset, ')'] + else + [@offset, '(', @base, ')'] + end + end + end - def initialize - super() - @endianness = :big - @size = 32 - end + def initialize + super() + @endianness = :big + @size = 32 + end - def init_opcode_list - init - end + def init_opcode_list + init + end - def render_instruction(i) - r = [i.opname] - if not i.args.empty? - r << ' ' - i.args.each { |a| - r << a << ', ' - } - r.pop - end - r - end + def render_instruction(i) + r = [i.opname] + if not i.args.empty? + r << ' ' + i.args.each { |a| + r << a << ', ' + } + r.pop + end + r + end end PPC = PowerPC end diff --git a/lib/metasm/metasm/cpu/ppc/opcodes.rb b/lib/metasm/metasm/cpu/ppc/opcodes.rb index 1e03e77b9e..36f4ba95e0 100644 --- a/lib/metasm/metasm/cpu/ppc/opcodes.rb +++ b/lib/metasm/metasm/cpu/ppc/opcodes.rb @@ -8,409 +8,409 @@ require 'metasm/cpu/ppc/main' module Metasm class PowerPC - def addop(name, bin, *argprops) - o = Opcode.new name, bin - argprops.each { |a| - o.args << a if @valid_args[a] - o.fields[a] = [@fields_mask[a], @fields_shift[a]] if @fields_mask[a] - o.props[a] = true if @valid_props[a] - } - @opcode_list << o - end + def addop(name, bin, *argprops) + o = Opcode.new name, bin + argprops.each { |a| + o.args << a if @valid_args[a] + o.fields[a] = [@fields_mask[a], @fields_shift[a]] if @fields_mask[a] + o.props[a] = true if @valid_props[a] + } + @opcode_list << o + end - # generate l/a variations, add :setip/:saveip, include lr/ctr in opname - def addop_branch(nbase, bin, *argprops) - nbase += 'ctr' if argprops.delete :ctr - nbase += 'lr' if argprops.delete :lr - addop(nbase, bin, :setip, *argprops) - addop(nbase+'l', bin|1, :setip, :saveip, *argprops) - return if nbase[-2, 2] == 'lr' or nbase[-3, 3] == 'ctr' + # generate l/a variations, add :setip/:saveip, include lr/ctr in opname + def addop_branch(nbase, bin, *argprops) + nbase += 'ctr' if argprops.delete :ctr + nbase += 'lr' if argprops.delete :lr + addop(nbase, bin, :setip, *argprops) + addop(nbase+'l', bin|1, :setip, :saveip, *argprops) + return if nbase[-2, 2] == 'lr' or nbase[-3, 3] == 'ctr' - addop(nbase+'a', bin|2, :setip, *argprops) - addop(nbase+'la', bin|3, :setip, :saveip, *argprops) - end + addop(nbase+'a', bin|2, :setip, *argprops) + addop(nbase+'la', bin|3, :setip, :saveip, *argprops) + end - # generate condition variations, passes to addop_branch - def addop_branchcond(nbase, bin, *argprops) - # :bi & 0b11100 is the condition register to use, shift&mask == :bfa. Defaults to cr0 - # bo values - # no cc (10000 != 0) - addop_branch(nbase, bin|(0b10100<<21), :ign_bo_zzz, :stopexec, *argprops) - addop_branch(nbase+'dz', bin|(0b10010<<21), :ign_bo_at2, :stopexec, *argprops) if not argprops.include? :ctr - addop_branch(nbase+'dnz', bin|(0b10000<<21), :ign_bo_at2, :stopexec, *argprops) if not argprops.include? :ctr + # generate condition variations, passes to addop_branch + def addop_branchcond(nbase, bin, *argprops) + # :bi & 0b11100 is the condition register to use, shift&mask == :bfa. Defaults to cr0 + # bo values + # no cc (10000 != 0) + addop_branch(nbase, bin|(0b10100<<21), :ign_bo_zzz, :stopexec, *argprops) + addop_branch(nbase+'dz', bin|(0b10010<<21), :ign_bo_at2, :stopexec, *argprops) if not argprops.include? :ctr + addop_branch(nbase+'dnz', bin|(0b10000<<21), :ign_bo_at2, :stopexec, *argprops) if not argprops.include? :ctr - # conditionnal - %w[lt gt eq so].each_with_index { |cd, i| - ncd = {'lt' => 'gte', 'gt' => 'lte', 'eq' => 'ne', 'so' => 'nso'}[cd] - addop_branch(nbase+cd, bin|(0b1100<<21)|(i<<16), :ign_bo_at, *argprops) - addop_branch(nbase+cd, bin|(0b1100<<21)|(i<<16), :ign_bo_at, :bfa, *argprops) - addop_branch(nbase+ncd, bin|(0b100<<21)|(i<<16), :ign_bo_at, *argprops) - addop_branch(nbase+ncd, bin|(0b100<<21)|(i<<16), :ign_bo_at, :bfa, *argprops) - next if argprops.include? :ctr + # conditionnal + %w[lt gt eq so].each_with_index { |cd, i| + ncd = {'lt' => 'gte', 'gt' => 'lte', 'eq' => 'ne', 'so' => 'nso'}[cd] + addop_branch(nbase+cd, bin|(0b1100<<21)|(i<<16), :ign_bo_at, *argprops) + addop_branch(nbase+cd, bin|(0b1100<<21)|(i<<16), :ign_bo_at, :bfa, *argprops) + addop_branch(nbase+ncd, bin|(0b100<<21)|(i<<16), :ign_bo_at, *argprops) + addop_branch(nbase+ncd, bin|(0b100<<21)|(i<<16), :ign_bo_at, :bfa, *argprops) + next if argprops.include? :ctr - addop_branch(nbase+'dz'+cd, bin|(0b1010<<21)|(i<<16), :ign_bo_z, *argprops) - addop_branch(nbase+'dz'+cd, bin|(0b1010<<21)|(i<<16), :ign_bo_z, :bfa, *argprops) - addop_branch(nbase+'dnz'+cd, bin|(0b1000<<21)|(i<<16), :ign_bo_z, *argprops) - addop_branch(nbase+'dnz'+cd, bin|(0b1000<<21)|(i<<16), :ign_bo_z, :bfa, *argprops) - addop_branch(nbase+'dz'+ncd, bin|(0b010<<21)|(i<<16), :ign_bo_z, *argprops) - addop_branch(nbase+'dz'+ncd, bin|(0b010<<21)|(i<<16), :ign_bo_z, :bfa, *argprops) - addop_branch(nbase+'dnz'+ncd, bin|(0b000<<21)|(i<<16), :ign_bo_z, *argprops) - addop_branch(nbase+'dnz'+ncd, bin|(0b000<<21)|(i<<16), :ign_bo_z, :bfa, *argprops) - } - end + addop_branch(nbase+'dz'+cd, bin|(0b1010<<21)|(i<<16), :ign_bo_z, *argprops) + addop_branch(nbase+'dz'+cd, bin|(0b1010<<21)|(i<<16), :ign_bo_z, :bfa, *argprops) + addop_branch(nbase+'dnz'+cd, bin|(0b1000<<21)|(i<<16), :ign_bo_z, *argprops) + addop_branch(nbase+'dnz'+cd, bin|(0b1000<<21)|(i<<16), :ign_bo_z, :bfa, *argprops) + addop_branch(nbase+'dz'+ncd, bin|(0b010<<21)|(i<<16), :ign_bo_z, *argprops) + addop_branch(nbase+'dz'+ncd, bin|(0b010<<21)|(i<<16), :ign_bo_z, :bfa, *argprops) + addop_branch(nbase+'dnz'+ncd, bin|(0b000<<21)|(i<<16), :ign_bo_z, *argprops) + addop_branch(nbase+'dnz'+ncd, bin|(0b000<<21)|(i<<16), :ign_bo_z, :bfa, *argprops) + } + end - def addop_trap(nbase, bin, *argprops) - addop nbase+'trap', bin|(0b11111<<21), *argprops - addop nbase+'lt', bin|(0b10000<<21), *argprops - addop nbase+'le', bin|(0b10100<<21), *argprops - addop nbase+'eq', bin|(0b00100<<21), *argprops - addop nbase+'ge', bin|(0b01100<<21), *argprops - addop nbase+'gt', bin|(0b01000<<21), *argprops - addop nbase+'ne', bin|(0b11000<<21), *argprops - addop nbase+'llt', bin|(0b00010<<21), *argprops - addop nbase+'lle', bin|(0b00110<<21), *argprops - addop nbase+'lge', bin|(0b00101<<21), *argprops - addop nbase+'lgt', bin|(0b00001<<21), *argprops - end + def addop_trap(nbase, bin, *argprops) + addop nbase+'trap', bin|(0b11111<<21), *argprops + addop nbase+'lt', bin|(0b10000<<21), *argprops + addop nbase+'le', bin|(0b10100<<21), *argprops + addop nbase+'eq', bin|(0b00100<<21), *argprops + addop nbase+'ge', bin|(0b01100<<21), *argprops + addop nbase+'gt', bin|(0b01000<<21), *argprops + addop nbase+'ne', bin|(0b11000<<21), *argprops + addop nbase+'llt', bin|(0b00010<<21), *argprops + addop nbase+'lle', bin|(0b00110<<21), *argprops + addop nbase+'lge', bin|(0b00101<<21), *argprops + addop nbase+'lgt', bin|(0b00001<<21), *argprops + end - # generate cmp variations (default cr0, w/d) - def addop_cmp(nbase, bin, *argprops) - addop nbase.sub(/(cmpl?)/, '\\1w'), bin, *(argprops-[:bf]) - addop nbase.sub(/(cmpl?)/, '\\1w'), bin, *argprops - addop nbase.sub(/(cmpl?)/, '\\1d'), bin|(1<<@fields_shift[:l]), *(argprops-[:bf]) - addop nbase.sub(/(cmpl?)/, '\\1d'), bin|(1<<@fields_shift[:l]), *argprops - end + # generate cmp variations (default cr0, w/d) + def addop_cmp(nbase, bin, *argprops) + addop nbase.sub(/(cmpl?)/, '\\1w'), bin, *(argprops-[:bf]) + addop nbase.sub(/(cmpl?)/, '\\1w'), bin, *argprops + addop nbase.sub(/(cmpl?)/, '\\1d'), bin|(1<<@fields_shift[:l]), *(argprops-[:bf]) + addop nbase.sub(/(cmpl?)/, '\\1d'), bin|(1<<@fields_shift[:l]), *argprops + end - # adds op and 'op.' with last bit of bin set - def addop_(base, bin, *argprops) - addop(base, bin, *argprops) - addop(base+'.', bin|1, *argprops) - end + # adds op and 'op.' with last bit of bin set + def addop_(base, bin, *argprops) + addop(base, bin, *argprops) + addop(base+'.', bin|1, *argprops) + end - # adds op and 'opo' - def addop_o(base, bin, *argprops) - addop(base, bin, *argprops) - addop(base+'o', bin|0x400, *argprops) - end + # adds op and 'opo' + def addop_o(base, bin, *argprops) + addop(base, bin, *argprops) + addop(base+'o', bin|0x400, *argprops) + end - def init - @opcode_list = [] - @fields_shift.update :aa => 1, :ba => 16, :bb => 11, :bd => 2, :bf => 23, - :bfa => 18, :bh => 11, :bt => 21, :d => 0, :dq => 4, - :ds => 2, :flm => 17, :fra => 16, :frb => 11, :frc => 6, :frs => 21, - :frt => 21, :fxm => 12, :l => 21, :l_ => 21, :l__ => 16, :lev => 5, - :li => 2, :lk => 0, :mb => 5, :mb_ => 6, :me => 5, :me_ => 1, - :nb => 11, :oe => 10, :ra => 16, :rb => 11, :rc => 0, :rs => 21, - :rt => 21, :sh => 11, :sh_ => 1, :si => 0, :spr => 11, :sr => 16, - :tbr => 11, :th => 21, :to => 21, :u => 12, :ui => 0, - :ign_bo_zzz => 16, :ign_bo_z => 21, :ign_bo_at => 21, :ign_bo_at2 => 16 + def init + @opcode_list = [] + @fields_shift.update :aa => 1, :ba => 16, :bb => 11, :bd => 2, :bf => 23, + :bfa => 18, :bh => 11, :bt => 21, :d => 0, :dq => 4, + :ds => 2, :flm => 17, :fra => 16, :frb => 11, :frc => 6, :frs => 21, + :frt => 21, :fxm => 12, :l => 21, :l_ => 21, :l__ => 16, :lev => 5, + :li => 2, :lk => 0, :mb => 5, :mb_ => 6, :me => 5, :me_ => 1, + :nb => 11, :oe => 10, :ra => 16, :rb => 11, :rc => 0, :rs => 21, + :rt => 21, :sh => 11, :sh_ => 1, :si => 0, :spr => 11, :sr => 16, + :tbr => 11, :th => 21, :to => 21, :u => 12, :ui => 0, + :ign_bo_zzz => 16, :ign_bo_z => 21, :ign_bo_at => 21, :ign_bo_at2 => 16 - @fields_mask.update :aa => 1, :ba => 31, :bb => 31, :bd => 0x3FFF, :bf => 7, - :bfa => 7, :bh => 3, :bt => 31, :d => 0xFFFF, :dq => 0xFFF, - :ds => 0x3FFF, :flm => 255, :fra => 31, :frb => 31, :frc => 31, :frs => 31, - :frt => 31, :fxm => 255, :l => 1, :l_ => 3, :l__ => 1, :lev => 127, - :li => 0xFFFFFF, :lk => 1, :mb => 63, :mb_ => 31, :me => 63, :me_ => 31, - :nb => 31, :oe => 1, :ra => 31, :rb => 31, :rc => 1, :rs => 31, - :rt => 31, :sh => 31, :sh_ => 1, :si => 0xFFFF, :spr => 0x3FF, :sr => 15, - :tbr => 0x3FF, :th => 15, :to => 31, :u => 15, :ui => 0xFFFF, - :ign_bo_zzz => 0b101111111, :ign_bo_z => 1, :ign_bo_at => 3, :ign_bo_at2 => 0b100111111 + @fields_mask.update :aa => 1, :ba => 31, :bb => 31, :bd => 0x3FFF, :bf => 7, + :bfa => 7, :bh => 3, :bt => 31, :d => 0xFFFF, :dq => 0xFFF, + :ds => 0x3FFF, :flm => 255, :fra => 31, :frb => 31, :frc => 31, :frs => 31, + :frt => 31, :fxm => 255, :l => 1, :l_ => 3, :l__ => 1, :lev => 127, + :li => 0xFFFFFF, :lk => 1, :mb => 63, :mb_ => 31, :me => 63, :me_ => 31, + :nb => 31, :oe => 1, :ra => 31, :rb => 31, :rc => 1, :rs => 31, + :rt => 31, :sh => 31, :sh_ => 1, :si => 0xFFFF, :spr => 0x3FF, :sr => 15, + :tbr => 0x3FF, :th => 15, :to => 31, :u => 15, :ui => 0xFFFF, + :ign_bo_zzz => 0b101111111, :ign_bo_z => 1, :ign_bo_at => 3, :ign_bo_at2 => 0b100111111 - @valid_args = @fields_mask.dup - [:ign_bo_zzz, :ign_bo_z, :ign_bo_at, :ign_bo_at2, :aa, :lk, :oe, :rc, :l].each { |k| @valid_args.delete k } + @valid_args = @fields_mask.dup + [:ign_bo_zzz, :ign_bo_z, :ign_bo_at, :ign_bo_at2, :aa, :lk, :oe, :rc, :l].each { |k| @valid_args.delete k } - @fields_shift[:ra_i16] = @fields_shift[:ra_i16s] = @fields_shift[:ra_i16q] = 0 - @fields_mask[:ra_i16] = (@fields_mask[:d] << @fields_shift[:d]) | (@fields_mask[:ra] << @fields_shift[:ra]) - @fields_mask[:ra_i16s] = (@fields_mask[:ds] << @fields_shift[:d]) | (@fields_mask[:ra] << @fields_shift[:ra]) - @fields_mask[:ra_i16q] = (@fields_mask[:dq] << @fields_shift[:d]) | (@fields_mask[:ra] << @fields_shift[:ra]) + @fields_shift[:ra_i16] = @fields_shift[:ra_i16s] = @fields_shift[:ra_i16q] = 0 + @fields_mask[:ra_i16] = (@fields_mask[:d] << @fields_shift[:d]) | (@fields_mask[:ra] << @fields_shift[:ra]) + @fields_mask[:ra_i16s] = (@fields_mask[:ds] << @fields_shift[:d]) | (@fields_mask[:ra] << @fields_shift[:ra]) + @fields_mask[:ra_i16q] = (@fields_mask[:dq] << @fields_shift[:d]) | (@fields_mask[:ra] << @fields_shift[:ra]) - addop_branch 'b', 0x48000000, :li, :stopexec - addop_branchcond 'b', 0x40000000, :bd - addop_branchcond 'b', 0x4C000020, :lr - addop_branchcond 'b', 0x4C000420, :ctr + addop_branch 'b', 0x48000000, :li, :stopexec + addop_branchcond 'b', 0x40000000, :bd + addop_branchcond 'b', 0x4C000020, :lr + addop_branchcond 'b', 0x4C000420, :ctr - addop 'sc', 0x44000002, :lev - addop 'crand', 0x4C000202, :bt, :ba, :bb - addop 'crxor', 0x4C000182, :bt, :ba, :bb - # alias crclr bx -> crxor bx, bx, bx - addop 'cror', 0x4C000382, :bt, :ba, :bb - # alias crmove bx, by -> cror bx, by, by - addop 'crnand', 0x4C0001C2, :bt, :ba, :bb - addop 'crnor', 0x4C000042, :bt, :ba, :bb - # alias crnot bx, by -> crnor bx, by, by - addop 'crandc', 0x4C000102, :bt, :ba, :bb - addop 'creqv', 0x4C000242, :bt, :ba, :bb - # alias crset bx -> creqv bx, bx, bx - addop 'crorc', 0x4C000342, :bt, :ba, :bb - addop 'mcrf', 0x4C000000, :bf, :bfa - addop 'lbz', 0x88000000, :rt, :ra_i16 - addop 'lbzu', 0x8C000000, :rt, :ra_i16 - addop 'lbzx', 0x7C0000AE, :rt, :ra, :rb - addop 'lbzux', 0x7C0000EE, :rt, :ra, :rb - addop 'lhz', 0xA0000000, :rt, :ra_i16 - addop 'lhzu', 0xA4000000, :rt, :ra_i16 - addop 'lhzx', 0x7C00022E, :rt, :ra, :rb - addop 'lhzux', 0x7C00026E, :rt, :ra, :rb - addop 'lha', 0xA8000000, :rt, :ra_i16 - addop 'lhau', 0xAC000000, :rt, :ra_i16 - addop 'lhax', 0x7C0002AE, :rt, :ra, :rb - addop 'lhaux', 0x7C0002EE, :rt, :ra, :rb - addop 'lwz', 0x80000000, :rt, :ra_i16 - addop 'lwzu', 0x84000000, :rt, :ra_i16 - addop 'lwzx', 0x7C00002E, :rt, :ra, :rb - addop 'lwzux', 0x7C00006E, :rt, :ra, :rb - addop 'lwa', 0xE8000002, :rt, :ra_i16s - addop 'lwax', 0x7C0002AA, :rt, :ra, :rb - addop 'lwaux', 0x7C0002EA, :rt, :ra, :rb - addop 'ld', 0xE8000000, :rt, :ra_i16s - addop 'ldu', 0xE8000001, :rt, :ra_i16s - addop 'ldx', 0x7C00002A, :rt, :ra, :rb - addop 'ldux', 0x7C00006A, :rt, :ra, :rb - addop 'stb', 0x98000000, :rs, :ra_i16 - addop 'stbu', 0x9C000000, :rs, :ra_i16 - addop 'stbx', 0x7C0001AE, :rs, :ra, :rb - addop 'stbux', 0x7C0001EE, :rs, :ra, :rb - addop 'sth', 0xB0000000, :rs, :ra_i16 - addop 'sthu', 0xB4000000, :rs, :ra_i16 - addop 'sthx', 0x7C00032E, :rs, :ra, :rb - addop 'sthux', 0x7C00036E, :rs, :ra, :rb - addop 'stw', 0x90000000, :rs, :ra_i16 - addop 'stwu', 0x94000000, :rs, :ra_i16 - addop 'stwx', 0x7C00012E, :rs, :ra, :rb - addop 'stwux', 0x7C00016E, :rs, :ra, :rb - addop 'std', 0xF8000000, :rs, :ra_i16s - addop 'stdu', 0xF8000001, :rs, :ra_i16s - addop 'stdx', 0x7C00012A, :rs, :ra, :rb - addop 'stdux', 0x7C00016A, :rs, :ra, :rb - addop 'lhbrx', 0x7C00062C, :rt, :ra, :rb - addop 'lwbrx', 0x7C00042C, :rt, :ra, :rb - addop 'sthbrx', 0x7C00072C, :rs, :ra, :rb - addop 'stwbrx', 0x7C00052C, :rs, :ra, :rb - addop 'lmw', 0xB8000000, :rt, :ra_i16 - addop 'stmw', 0xBC000000, :rs, :ra_i16 - addop 'lswi', 0x7C0004AA, :rt, :ra, :nb - addop 'lswx', 0x7C00042A, :rt, :ra, :rb - addop 'stswi', 0x7C0005AA, :rs, :ra, :nb - addop 'stswx', 0x7C00052A, :rs, :ra, :rb - addop 'li', 0x38000000, :rt, :si # alias li rx, value -> addi rx, 0, value - addop 'addi', 0x38000000, :rt, :ra, :si - addop 'la', 0x38000000, :rt, :ra_i16 # alias la rx, disp(ry) -> addi rx, ry, disp - addop 'lis', 0x3C000000, :rt, :si # alias lis rx, value -> addis rx, 0, value - addop 'addis', 0x3C000000, :rt, :ra, :si - addop_o 'add', 0x7C000214, :rt, :ra, :rb - addop 'addic', 0x30000000, :rt, :ra, :si - addop_o 'sub', 0x7C000050, :rt, :rb, :ra # alias sub rx, ry, rz -> subf rx, rz, ry - addop_o 'subf', 0x7C000050, :rt, :ra, :rb - addop 'addic.', 0x34000000, :rt, :ra, :si - addop 'subfic', 0x20000000, :rt, :ra, :si - addop_o 'addc', 0x7C000014, :rt, :ra, :rb - addop_o 'subc', 0x7C000010, :rt, :rb, :ra # alias subc rx, ry, rz -> subfc rx, rz, ry - addop_o 'subfc',0x7C000010, :rt, :ra, :rb - addop_o 'adde', 0x7C000114, :rt, :ra, :rb - addop_o 'addme',0x7C0001D4, :rt, :ra - addop_o 'subfe',0x7C000110, :rt, :ra, :rb - addop_o 'subfme',0x7C0001D0,:rt, :ra - addop_o 'addze',0x7C000194, :rt, :ra - addop_o 'subfze',0x7C000190,:rt, :ra - addop_o 'neg', 0x7C0000D0, :rt, :ra - addop 'mulli', 0x1C000000, :rt, :ra, :si - addop_o 'mulld',0x7C0001D2, :rt, :ra, :rb - addop_o 'mullw',0x7C0001D6, :rt, :ra, :rb - addop_ 'mulhd', 0x7C000092, :rt, :ra, :rb - addop_ 'mulhdu',0x7C000012, :rt, :ra, :rb - addop_ 'mulhw', 0x7C000096, :rt, :ra, :rb - addop_ 'mulhwu',0x7C000016, :rt, :ra, :rb - addop_o 'divd', 0x7C0003D2, :rt, :ra, :rb - addop_o 'divw', 0x7C0003D6, :rt, :ra, :rb - addop_o 'divdu',0x7C000392, :rt, :ra, :rb - addop_o 'divwu',0x7C000396, :rt, :ra, :rb - addop_cmp 'cmpi', 0x2C000000, :bf, :ra, :si - addop_cmp 'cmp', 0x7C000000, :bf, :ra, :rb - addop_cmp 'cmpli', 0x28000000, :bf, :ra, :ui - addop_cmp 'cmpl', 0x7C000040, :bf, :ra, :rb - addop 'andi.', 0x70000000, :ra, :rs, :ui - addop 'andis.', 0x74000000, :ra, :rs, :ui - addop 'nop', 0x60000000 - addop 'ori', 0x60000000, :ra, :rs, :ui - addop 'oris', 0x64000000, :ra, :rs, :ui - addop 'xori', 0x68000000, :ra, :rs, :ui - addop 'xoris', 0x6C000000, :ra, :rs, :ui - addop_ 'and', 0x7C000038, :ra, :rs, :rb - addop_ 'xor', 0x7C000278, :ra, :rs, :rb - addop_ 'or', 0x7C000378, :ra, :rs, :rb - # alias mr rx, ry -> or rx, ry, ry - addop_ 'nand', 0x7C0003B8, :ra, :rs, :rb - addop_ 'nor', 0x7C0000F8, :ra, :rs, :rb - # alias not rx, ry -> nor rx, ry, ry - addop_ 'andc', 0x7C000078, :ra, :rs, :rb - addop_ 'eqv', 0x7C000238, :ra, :rs, :rb - addop_ 'orc', 0x7C000338, :ra, :rs, :rb - addop_ 'extsb', 0x7C000774, :ra, :rs - addop_ 'extsw', 0x7C0007B4, :ra, :rs - addop_ 'extsh', 0x7C000734, :ra, :rs - addop_ 'cntlzd',0x7C000074, :ra, :rs - addop_ 'cntlzw',0x7C000034, :ra, :rs - addop 'popcntb',0x7C0000F4, :ra, :rs - addop 'clrldi', 0x78000000, :ra, :rs, :mb # alias clrldi rx, ry, n -> rldicl rx, ry, 0, n - addop_ 'rldicl',0x78000000, :ra, :rs, :sh, :mb, :sh_ - # alias extrdi rx, ry, n, b -> rldicl rx, ry, b+n, 64 - n - # alias srdi rx, ry, n -> rldicl rx, ry, 64 - n, n - addop_ 'rldicr',0x78000004, :ra, :rs, :sh, :me, :sh_ - # alias extldi rx, ry, n, b -> rldicr rx, ry, b, n - 1 - # alias sldi rx, ry, n -> rldicr rx, ry, n, 63 - n - # alias clrrdi rx, ry, n -> rldicr rx, ry, 0, 63 - n - addop_ 'rldic', 0x78000008, :ra, :rs, :sh, :mb, :sh_ - # alias clrlsldi rx, ry, b, n -> rldic rx, ry, n, b - n - addop_ 'rlwinm',0x54000000, :ra, :rs, :sh, :mb_, :me_ - # alias extlwi rx, ry, n, b -> rlwinm rx, ry, b, 0, n - 1 - # alias srwi rx, ry, n -> rlwinm rx, ry, 32 - n, n, 31 - # alias clrrwi rx, ry, n -> rlwinm rx, ry, 0, 0, 31 - n - addop 'rotld', 0x78000010, :ra, :rs, :rb # alias rotld rx, ry, rz -> rldcl rx, ry, rz, 0 - addop_ 'rldcl', 0x78000010, :ra, :rs, :rb, :mb - addop_ 'rldcr', 0x78000012, :ra, :rs, :rb, :me - addop 'rotlw', 0x5C000000|(31<<@fields_shift[:me_]), :ra, :rs, :rb # alias rotlw rx, ry, rz -> rlwnm rx, ry, rz, 0, 31 - addop_ 'rlwnm', 0x5C000000, :ra, :rs, :rb, :mb_, :me_ - addop_ 'rldimi',0x7800000C, :ra, :rs, :sh, :mb, :sh_ - # alias insrdi rx, ry, n, b -> rldimi rx, ry, 64 - (b+n), b - addop_ 'rlwimi',0x50000000, :ra, :rs, :sh, :mb_, :me_ - # alias inslwi rx, ry, n, b -> rlwimi rx, ry, 32-b, b, b+n - 1 - addop_ 'sld', 0x7C000036, :ra, :rs, :rb - addop_ 'slw', 0x7C000030, :ra, :rs, :rb - addop_ 'srd', 0x7C000436, :ra, :rs, :rb - addop_ 'srw', 0x7C000430, :ra, :rs, :rb - addop_ 'sradi', 0x7C000674, :ra, :rs, :sh, :sh_ - addop_ 'srawi', 0x7C000670, :ra, :rs, :sh - addop_ 'srad', 0x7C000634, :ra, :rs, :rb - addop_ 'sraw', 0x7C000630, :ra, :rs, :rb - #addop 'mtspr', 0x7C0003A6, :spr, :rs - addop 'mtxer', 0x7C0003A6|(1<<16), :rs - addop 'mtlr', 0x7C0003A6|(8<<16), :rs - addop 'mtctr', 0x7C0003A6|(9<<16), :rs - #addop 'mfspr', 0x7C0002A6, :rt, :spr - addop 'mfxer', 0x7C0002A6|(1<<16), :rt - addop 'mflr', 0x7C0002A6|(8<<16), :rt - addop 'mfctr', 0x7C0002A6|(9<<16), :rt - addop 'mtcrf', 0x7C000120, :fxm, :rs - # alias mtcr rx -> mtcrf 0xff, rx - addop 'mfcr', 0x7C000026, :rt - addop 'lfs', 0xC0000000, :frt, :ra_i16 - addop 'lfsu', 0xC4000000, :frt, :ra_i16 - addop 'lfsx', 0x7C00042E, :frt, :ra, :rb - addop 'lfsux', 0x7C00046E, :frt, :ra, :rb - addop 'lfd', 0xC8000000, :frt, :ra_i16 - addop 'lfdu', 0xCC000000, :frt, :ra_i16 - addop 'lfdx', 0x7C0004AE, :frt, :ra, :rb - addop 'lfdux', 0x7C0004EE, :frt, :ra, :rb - addop 'stfs', 0xD0000000, :frs, :ra_i16 - addop 'stfsu', 0xD4000000, :frs, :ra_i16 - addop 'stfsx', 0x7C00052E, :frs, :ra, :rb - addop 'stfsux', 0x7C00056E, :frs, :ra, :rb - addop 'stfd', 0xD8000000, :frs, :ra_i16 - addop 'stfdu', 0xDC000000, :frs, :ra_i16 - addop 'stfdx', 0x7C0005AE, :frs, :ra, :rb - addop 'stfdux', 0x7C0005EE, :frs, :ra, :rb - addop 'stfiwx', 0x7C0007AE, :frs, :ra, :rb - addop_ 'fmr', 0xFC000090, :frt, :frb - addop_ 'fabs', 0xFC000210, :frt, :frb - addop_ 'fneg', 0xFC000050, :frt, :frb - addop_ 'fnabs', 0xFC000110, :frt, :frb - addop_ 'fadd', 0xFC00002A, :frt, :fra, :frb - addop_ 'fadds', 0xEC00002A, :frt, :fra, :frb - addop_ 'fsub', 0xFC000028, :frt, :fra, :frb - addop_ 'fsubs', 0xEC000028, :frt, :fra, :frb - addop_ 'fmul', 0xFC000032, :frt, :fra, :frc - addop_ 'fmuls', 0xEC000032, :frt, :fra, :frc - addop_ 'fdiv', 0xFC000024, :frt, :fra, :frb - addop_ 'fdivs', 0xEC000024, :frt, :fra, :frb - addop_ 'fmadd', 0xFC00003A, :frt, :fra, :frc, :frb - addop_ 'fmadds',0xEC00003A, :frt, :fra, :frc, :frb - addop_ 'fmsub', 0xFC000038, :frt, :fra, :frc, :frb - addop_ 'fmsubs',0xEC000038, :frt, :fra, :frc, :frb - addop_ 'fnmadd',0xFC00003E, :frt, :fra, :frc, :frb - addop_ 'fnmadds',0xEC00003E,:frt, :fra, :frc, :frb - addop_ 'fnmsub',0xFC00003C, :frt, :fra, :frc, :frb - addop_ 'fnmsubs',0xEC00003C,:frt, :fra, :frc, :frb - addop_ 'frsp', 0xFC000018, :frt, :frb - addop_ 'fctid', 0xFC00065C, :frt, :frb - addop_ 'fctidz',0xFC00065E, :frt, :frb - addop_ 'fctiw', 0xFC00001C, :frt, :frb - addop_ 'fctiwz',0xFC00001E, :frt, :frb - addop_ 'fcfid', 0xFC00069C, :frt, :frb - addop 'fcmpu', 0xFC000000, :bf, :fra, :frb - addop 'fcmpo', 0xFC000040, :bf, :fra, :frb - addop_ 'mffs', 0xFC00048E, :frt - addop 'mcrfs', 0xFC000080, :bf, :bfa - addop_ 'mtfsfi',0xFC00010C, :bf, :u - addop_ 'mtfsf', 0xFC00058E, :flm, :frb - addop_ 'mtfsb0',0xFC00008C, :bt - addop_ 'mtfsb1',0xFC00004C, :bt - addop 'mtocrf', 0x7C100120, :fxm, :rs - addop_ 'fsqrt', 0xFC00002C, :frt, :frb - addop_ 'fsqrts',0xEC00002C, :frt, :frb - addop_ 'fre', 0xFC000030, :frt, :frb - addop_ 'fres', 0xEC000030, :frt, :frb - addop_ 'frsqrte',0xFC000034,:frt, :frb - addop_ 'frsqrtes',0xEC000034, :frt, :frb - addop_ 'fsel', 0xFC00002E, :frt, :fra, :frc, :frb - addop 'mcrxr', 0x7C000400, :bf - addop 'icbi', 0x7C0007AC, :ra, :rb - addop 'dcbt', 0x7C00022C, :ra, :rb - addop 'dcbtst', 0x7C0001EC, :ra, :rb - addop 'dcbz', 0x7C0007EC, :ra, :rb - addop 'dcbst', 0x7C00006C, :ra, :rb - addop 'dcbf', 0x7C0000AC, :ra, :rb - addop 'isync', 0x4C00012C - addop 'lwarx', 0x7C000028, :rt, :ra, :rb - addop 'ldarx', 0x7C0000A8, :rt, :ra, :rb - addop 'stwcx.', 0x7C00012D, :rs, :ra, :rb - addop 'stdcx.', 0x7C0001AD, :rs, :ra, :rb - addop 'sync', 0x7C0004AC, :l_ - addop 'eieio', 0x7C0006AC - addop 'mftb', 0x7C0002E6, :rt, :tbr - addop 'eciwx', 0x7C00026C, :rt, :ra, :rb - addop 'ecowx', 0x7C00036C, :rs, :ra, :rb - addop 'dcbt', 0x7C00022C, :ra, :rb, :th - addop 'dcbf', 0x7C0000AC, :ra, :rb - addop 'dcbf', 0x7C0000AC, :ra, :rb, :l - addop 'sc', 0x44000002, :lev - addop 'rfid', 0x4C000024 - addop 'hrfid', 0x4C000224 - addop 'mtmsrd', 0x7C000164, :rs, :l__ - addop 'mfmsr', 0x7C0000A6, :rt - addop 'slbie', 0x7C000364, :rb - addop 'slbmte', 0x7C000324, :rs, :rb - addop 'slbmfev',0x7C0006A6, :rt, :rb - addop 'slbmfee',0x7C000726, :rt, :rb - addop 'tlbie', 0x7C000264, :rb, :l - addop 'tlbiel', 0x7C000224, :rb, :l - addop 'tlbia', 0x7C0002E4 - addop 'tlbsync',0x7C00046C - addop 'mtmsr', 0x7C000124, :rs, :l__ - addop 'lq', 0xE0000000, :rt, :ra_i16q - addop 'stq', 0xF8000002, :rs, :ra_i16s - addop 'mtsr', 0x7C0001A4, :sr, :rs - addop 'mtsrin', 0x7C0001E4, :rs, :rb - addop 'mfsr', 0x7C0004A6, :rt, :sr - addop 'mfsrin', 0x7C000526, :rt, :rb + addop 'sc', 0x44000002, :lev + addop 'crand', 0x4C000202, :bt, :ba, :bb + addop 'crxor', 0x4C000182, :bt, :ba, :bb + # alias crclr bx -> crxor bx, bx, bx + addop 'cror', 0x4C000382, :bt, :ba, :bb + # alias crmove bx, by -> cror bx, by, by + addop 'crnand', 0x4C0001C2, :bt, :ba, :bb + addop 'crnor', 0x4C000042, :bt, :ba, :bb + # alias crnot bx, by -> crnor bx, by, by + addop 'crandc', 0x4C000102, :bt, :ba, :bb + addop 'creqv', 0x4C000242, :bt, :ba, :bb + # alias crset bx -> creqv bx, bx, bx + addop 'crorc', 0x4C000342, :bt, :ba, :bb + addop 'mcrf', 0x4C000000, :bf, :bfa + addop 'lbz', 0x88000000, :rt, :ra_i16 + addop 'lbzu', 0x8C000000, :rt, :ra_i16 + addop 'lbzx', 0x7C0000AE, :rt, :ra, :rb + addop 'lbzux', 0x7C0000EE, :rt, :ra, :rb + addop 'lhz', 0xA0000000, :rt, :ra_i16 + addop 'lhzu', 0xA4000000, :rt, :ra_i16 + addop 'lhzx', 0x7C00022E, :rt, :ra, :rb + addop 'lhzux', 0x7C00026E, :rt, :ra, :rb + addop 'lha', 0xA8000000, :rt, :ra_i16 + addop 'lhau', 0xAC000000, :rt, :ra_i16 + addop 'lhax', 0x7C0002AE, :rt, :ra, :rb + addop 'lhaux', 0x7C0002EE, :rt, :ra, :rb + addop 'lwz', 0x80000000, :rt, :ra_i16 + addop 'lwzu', 0x84000000, :rt, :ra_i16 + addop 'lwzx', 0x7C00002E, :rt, :ra, :rb + addop 'lwzux', 0x7C00006E, :rt, :ra, :rb + addop 'lwa', 0xE8000002, :rt, :ra_i16s + addop 'lwax', 0x7C0002AA, :rt, :ra, :rb + addop 'lwaux', 0x7C0002EA, :rt, :ra, :rb + addop 'ld', 0xE8000000, :rt, :ra_i16s + addop 'ldu', 0xE8000001, :rt, :ra_i16s + addop 'ldx', 0x7C00002A, :rt, :ra, :rb + addop 'ldux', 0x7C00006A, :rt, :ra, :rb + addop 'stb', 0x98000000, :rs, :ra_i16 + addop 'stbu', 0x9C000000, :rs, :ra_i16 + addop 'stbx', 0x7C0001AE, :rs, :ra, :rb + addop 'stbux', 0x7C0001EE, :rs, :ra, :rb + addop 'sth', 0xB0000000, :rs, :ra_i16 + addop 'sthu', 0xB4000000, :rs, :ra_i16 + addop 'sthx', 0x7C00032E, :rs, :ra, :rb + addop 'sthux', 0x7C00036E, :rs, :ra, :rb + addop 'stw', 0x90000000, :rs, :ra_i16 + addop 'stwu', 0x94000000, :rs, :ra_i16 + addop 'stwx', 0x7C00012E, :rs, :ra, :rb + addop 'stwux', 0x7C00016E, :rs, :ra, :rb + addop 'std', 0xF8000000, :rs, :ra_i16s + addop 'stdu', 0xF8000001, :rs, :ra_i16s + addop 'stdx', 0x7C00012A, :rs, :ra, :rb + addop 'stdux', 0x7C00016A, :rs, :ra, :rb + addop 'lhbrx', 0x7C00062C, :rt, :ra, :rb + addop 'lwbrx', 0x7C00042C, :rt, :ra, :rb + addop 'sthbrx', 0x7C00072C, :rs, :ra, :rb + addop 'stwbrx', 0x7C00052C, :rs, :ra, :rb + addop 'lmw', 0xB8000000, :rt, :ra_i16 + addop 'stmw', 0xBC000000, :rs, :ra_i16 + addop 'lswi', 0x7C0004AA, :rt, :ra, :nb + addop 'lswx', 0x7C00042A, :rt, :ra, :rb + addop 'stswi', 0x7C0005AA, :rs, :ra, :nb + addop 'stswx', 0x7C00052A, :rs, :ra, :rb + addop 'li', 0x38000000, :rt, :si # alias li rx, value -> addi rx, 0, value + addop 'addi', 0x38000000, :rt, :ra, :si + addop 'la', 0x38000000, :rt, :ra_i16 # alias la rx, disp(ry) -> addi rx, ry, disp + addop 'lis', 0x3C000000, :rt, :si # alias lis rx, value -> addis rx, 0, value + addop 'addis', 0x3C000000, :rt, :ra, :si + addop_o 'add', 0x7C000214, :rt, :ra, :rb + addop 'addic', 0x30000000, :rt, :ra, :si + addop_o 'sub', 0x7C000050, :rt, :rb, :ra # alias sub rx, ry, rz -> subf rx, rz, ry + addop_o 'subf', 0x7C000050, :rt, :ra, :rb + addop 'addic.', 0x34000000, :rt, :ra, :si + addop 'subfic', 0x20000000, :rt, :ra, :si + addop_o 'addc', 0x7C000014, :rt, :ra, :rb + addop_o 'subc', 0x7C000010, :rt, :rb, :ra # alias subc rx, ry, rz -> subfc rx, rz, ry + addop_o 'subfc',0x7C000010, :rt, :ra, :rb + addop_o 'adde', 0x7C000114, :rt, :ra, :rb + addop_o 'addme',0x7C0001D4, :rt, :ra + addop_o 'subfe',0x7C000110, :rt, :ra, :rb + addop_o 'subfme',0x7C0001D0,:rt, :ra + addop_o 'addze',0x7C000194, :rt, :ra + addop_o 'subfze',0x7C000190,:rt, :ra + addop_o 'neg', 0x7C0000D0, :rt, :ra + addop 'mulli', 0x1C000000, :rt, :ra, :si + addop_o 'mulld',0x7C0001D2, :rt, :ra, :rb + addop_o 'mullw',0x7C0001D6, :rt, :ra, :rb + addop_ 'mulhd', 0x7C000092, :rt, :ra, :rb + addop_ 'mulhdu',0x7C000012, :rt, :ra, :rb + addop_ 'mulhw', 0x7C000096, :rt, :ra, :rb + addop_ 'mulhwu',0x7C000016, :rt, :ra, :rb + addop_o 'divd', 0x7C0003D2, :rt, :ra, :rb + addop_o 'divw', 0x7C0003D6, :rt, :ra, :rb + addop_o 'divdu',0x7C000392, :rt, :ra, :rb + addop_o 'divwu',0x7C000396, :rt, :ra, :rb + addop_cmp 'cmpi', 0x2C000000, :bf, :ra, :si + addop_cmp 'cmp', 0x7C000000, :bf, :ra, :rb + addop_cmp 'cmpli', 0x28000000, :bf, :ra, :ui + addop_cmp 'cmpl', 0x7C000040, :bf, :ra, :rb + addop 'andi.', 0x70000000, :ra, :rs, :ui + addop 'andis.', 0x74000000, :ra, :rs, :ui + addop 'nop', 0x60000000 + addop 'ori', 0x60000000, :ra, :rs, :ui + addop 'oris', 0x64000000, :ra, :rs, :ui + addop 'xori', 0x68000000, :ra, :rs, :ui + addop 'xoris', 0x6C000000, :ra, :rs, :ui + addop_ 'and', 0x7C000038, :ra, :rs, :rb + addop_ 'xor', 0x7C000278, :ra, :rs, :rb + addop_ 'or', 0x7C000378, :ra, :rs, :rb + # alias mr rx, ry -> or rx, ry, ry + addop_ 'nand', 0x7C0003B8, :ra, :rs, :rb + addop_ 'nor', 0x7C0000F8, :ra, :rs, :rb + # alias not rx, ry -> nor rx, ry, ry + addop_ 'andc', 0x7C000078, :ra, :rs, :rb + addop_ 'eqv', 0x7C000238, :ra, :rs, :rb + addop_ 'orc', 0x7C000338, :ra, :rs, :rb + addop_ 'extsb', 0x7C000774, :ra, :rs + addop_ 'extsw', 0x7C0007B4, :ra, :rs + addop_ 'extsh', 0x7C000734, :ra, :rs + addop_ 'cntlzd',0x7C000074, :ra, :rs + addop_ 'cntlzw',0x7C000034, :ra, :rs + addop 'popcntb',0x7C0000F4, :ra, :rs + addop 'clrldi', 0x78000000, :ra, :rs, :mb # alias clrldi rx, ry, n -> rldicl rx, ry, 0, n + addop_ 'rldicl',0x78000000, :ra, :rs, :sh, :mb, :sh_ + # alias extrdi rx, ry, n, b -> rldicl rx, ry, b+n, 64 - n + # alias srdi rx, ry, n -> rldicl rx, ry, 64 - n, n + addop_ 'rldicr',0x78000004, :ra, :rs, :sh, :me, :sh_ + # alias extldi rx, ry, n, b -> rldicr rx, ry, b, n - 1 + # alias sldi rx, ry, n -> rldicr rx, ry, n, 63 - n + # alias clrrdi rx, ry, n -> rldicr rx, ry, 0, 63 - n + addop_ 'rldic', 0x78000008, :ra, :rs, :sh, :mb, :sh_ + # alias clrlsldi rx, ry, b, n -> rldic rx, ry, n, b - n + addop_ 'rlwinm',0x54000000, :ra, :rs, :sh, :mb_, :me_ + # alias extlwi rx, ry, n, b -> rlwinm rx, ry, b, 0, n - 1 + # alias srwi rx, ry, n -> rlwinm rx, ry, 32 - n, n, 31 + # alias clrrwi rx, ry, n -> rlwinm rx, ry, 0, 0, 31 - n + addop 'rotld', 0x78000010, :ra, :rs, :rb # alias rotld rx, ry, rz -> rldcl rx, ry, rz, 0 + addop_ 'rldcl', 0x78000010, :ra, :rs, :rb, :mb + addop_ 'rldcr', 0x78000012, :ra, :rs, :rb, :me + addop 'rotlw', 0x5C000000|(31<<@fields_shift[:me_]), :ra, :rs, :rb # alias rotlw rx, ry, rz -> rlwnm rx, ry, rz, 0, 31 + addop_ 'rlwnm', 0x5C000000, :ra, :rs, :rb, :mb_, :me_ + addop_ 'rldimi',0x7800000C, :ra, :rs, :sh, :mb, :sh_ + # alias insrdi rx, ry, n, b -> rldimi rx, ry, 64 - (b+n), b + addop_ 'rlwimi',0x50000000, :ra, :rs, :sh, :mb_, :me_ + # alias inslwi rx, ry, n, b -> rlwimi rx, ry, 32-b, b, b+n - 1 + addop_ 'sld', 0x7C000036, :ra, :rs, :rb + addop_ 'slw', 0x7C000030, :ra, :rs, :rb + addop_ 'srd', 0x7C000436, :ra, :rs, :rb + addop_ 'srw', 0x7C000430, :ra, :rs, :rb + addop_ 'sradi', 0x7C000674, :ra, :rs, :sh, :sh_ + addop_ 'srawi', 0x7C000670, :ra, :rs, :sh + addop_ 'srad', 0x7C000634, :ra, :rs, :rb + addop_ 'sraw', 0x7C000630, :ra, :rs, :rb + #addop 'mtspr', 0x7C0003A6, :spr, :rs + addop 'mtxer', 0x7C0003A6|(1<<16), :rs + addop 'mtlr', 0x7C0003A6|(8<<16), :rs + addop 'mtctr', 0x7C0003A6|(9<<16), :rs + #addop 'mfspr', 0x7C0002A6, :rt, :spr + addop 'mfxer', 0x7C0002A6|(1<<16), :rt + addop 'mflr', 0x7C0002A6|(8<<16), :rt + addop 'mfctr', 0x7C0002A6|(9<<16), :rt + addop 'mtcrf', 0x7C000120, :fxm, :rs + # alias mtcr rx -> mtcrf 0xff, rx + addop 'mfcr', 0x7C000026, :rt + addop 'lfs', 0xC0000000, :frt, :ra_i16 + addop 'lfsu', 0xC4000000, :frt, :ra_i16 + addop 'lfsx', 0x7C00042E, :frt, :ra, :rb + addop 'lfsux', 0x7C00046E, :frt, :ra, :rb + addop 'lfd', 0xC8000000, :frt, :ra_i16 + addop 'lfdu', 0xCC000000, :frt, :ra_i16 + addop 'lfdx', 0x7C0004AE, :frt, :ra, :rb + addop 'lfdux', 0x7C0004EE, :frt, :ra, :rb + addop 'stfs', 0xD0000000, :frs, :ra_i16 + addop 'stfsu', 0xD4000000, :frs, :ra_i16 + addop 'stfsx', 0x7C00052E, :frs, :ra, :rb + addop 'stfsux', 0x7C00056E, :frs, :ra, :rb + addop 'stfd', 0xD8000000, :frs, :ra_i16 + addop 'stfdu', 0xDC000000, :frs, :ra_i16 + addop 'stfdx', 0x7C0005AE, :frs, :ra, :rb + addop 'stfdux', 0x7C0005EE, :frs, :ra, :rb + addop 'stfiwx', 0x7C0007AE, :frs, :ra, :rb + addop_ 'fmr', 0xFC000090, :frt, :frb + addop_ 'fabs', 0xFC000210, :frt, :frb + addop_ 'fneg', 0xFC000050, :frt, :frb + addop_ 'fnabs', 0xFC000110, :frt, :frb + addop_ 'fadd', 0xFC00002A, :frt, :fra, :frb + addop_ 'fadds', 0xEC00002A, :frt, :fra, :frb + addop_ 'fsub', 0xFC000028, :frt, :fra, :frb + addop_ 'fsubs', 0xEC000028, :frt, :fra, :frb + addop_ 'fmul', 0xFC000032, :frt, :fra, :frc + addop_ 'fmuls', 0xEC000032, :frt, :fra, :frc + addop_ 'fdiv', 0xFC000024, :frt, :fra, :frb + addop_ 'fdivs', 0xEC000024, :frt, :fra, :frb + addop_ 'fmadd', 0xFC00003A, :frt, :fra, :frc, :frb + addop_ 'fmadds',0xEC00003A, :frt, :fra, :frc, :frb + addop_ 'fmsub', 0xFC000038, :frt, :fra, :frc, :frb + addop_ 'fmsubs',0xEC000038, :frt, :fra, :frc, :frb + addop_ 'fnmadd',0xFC00003E, :frt, :fra, :frc, :frb + addop_ 'fnmadds',0xEC00003E,:frt, :fra, :frc, :frb + addop_ 'fnmsub',0xFC00003C, :frt, :fra, :frc, :frb + addop_ 'fnmsubs',0xEC00003C,:frt, :fra, :frc, :frb + addop_ 'frsp', 0xFC000018, :frt, :frb + addop_ 'fctid', 0xFC00065C, :frt, :frb + addop_ 'fctidz',0xFC00065E, :frt, :frb + addop_ 'fctiw', 0xFC00001C, :frt, :frb + addop_ 'fctiwz',0xFC00001E, :frt, :frb + addop_ 'fcfid', 0xFC00069C, :frt, :frb + addop 'fcmpu', 0xFC000000, :bf, :fra, :frb + addop 'fcmpo', 0xFC000040, :bf, :fra, :frb + addop_ 'mffs', 0xFC00048E, :frt + addop 'mcrfs', 0xFC000080, :bf, :bfa + addop_ 'mtfsfi',0xFC00010C, :bf, :u + addop_ 'mtfsf', 0xFC00058E, :flm, :frb + addop_ 'mtfsb0',0xFC00008C, :bt + addop_ 'mtfsb1',0xFC00004C, :bt + addop 'mtocrf', 0x7C100120, :fxm, :rs + addop_ 'fsqrt', 0xFC00002C, :frt, :frb + addop_ 'fsqrts',0xEC00002C, :frt, :frb + addop_ 'fre', 0xFC000030, :frt, :frb + addop_ 'fres', 0xEC000030, :frt, :frb + addop_ 'frsqrte',0xFC000034,:frt, :frb + addop_ 'frsqrtes',0xEC000034, :frt, :frb + addop_ 'fsel', 0xFC00002E, :frt, :fra, :frc, :frb + addop 'mcrxr', 0x7C000400, :bf + addop 'icbi', 0x7C0007AC, :ra, :rb + addop 'dcbt', 0x7C00022C, :ra, :rb + addop 'dcbtst', 0x7C0001EC, :ra, :rb + addop 'dcbz', 0x7C0007EC, :ra, :rb + addop 'dcbst', 0x7C00006C, :ra, :rb + addop 'dcbf', 0x7C0000AC, :ra, :rb + addop 'isync', 0x4C00012C + addop 'lwarx', 0x7C000028, :rt, :ra, :rb + addop 'ldarx', 0x7C0000A8, :rt, :ra, :rb + addop 'stwcx.', 0x7C00012D, :rs, :ra, :rb + addop 'stdcx.', 0x7C0001AD, :rs, :ra, :rb + addop 'sync', 0x7C0004AC, :l_ + addop 'eieio', 0x7C0006AC + addop 'mftb', 0x7C0002E6, :rt, :tbr + addop 'eciwx', 0x7C00026C, :rt, :ra, :rb + addop 'ecowx', 0x7C00036C, :rs, :ra, :rb + addop 'dcbt', 0x7C00022C, :ra, :rb, :th + addop 'dcbf', 0x7C0000AC, :ra, :rb + addop 'dcbf', 0x7C0000AC, :ra, :rb, :l + addop 'sc', 0x44000002, :lev + addop 'rfid', 0x4C000024 + addop 'hrfid', 0x4C000224 + addop 'mtmsrd', 0x7C000164, :rs, :l__ + addop 'mfmsr', 0x7C0000A6, :rt + addop 'slbie', 0x7C000364, :rb + addop 'slbmte', 0x7C000324, :rs, :rb + addop 'slbmfev',0x7C0006A6, :rt, :rb + addop 'slbmfee',0x7C000726, :rt, :rb + addop 'tlbie', 0x7C000264, :rb, :l + addop 'tlbiel', 0x7C000224, :rb, :l + addop 'tlbia', 0x7C0002E4 + addop 'tlbsync',0x7C00046C + addop 'mtmsr', 0x7C000124, :rs, :l__ + addop 'lq', 0xE0000000, :rt, :ra_i16q + addop 'stq', 0xF8000002, :rs, :ra_i16s + addop 'mtsr', 0x7C0001A4, :sr, :rs + addop 'mtsrin', 0x7C0001E4, :rs, :rb + addop 'mfsr', 0x7C0004A6, :rt, :sr + addop 'mfsrin', 0x7C000526, :rt, :rb - addop_trap 'tw', 0x7C000008, :ra, :rb - addop_trap 'twi', 0xC0000000, :ra, :si - addop_trap 'td', 0x7C000088, :ra, :rb - addop_trap 'tdi', 0x08000000, :ra, :si + addop_trap 'tw', 0x7C000008, :ra, :rb + addop_trap 'twi', 0xC0000000, :ra, :si + addop_trap 'td', 0x7C000088, :ra, :rb + addop_trap 'tdi', 0x08000000, :ra, :si - # pseudo-instructions - addop 'mr', :pseudo, :ra, :rb - addop 'not', :pseudo, :ra - addop 'not', :pseudo, :ra, :rb - @opcode_list.each { |op| - if op.name =~ /^addi/ - addop op.name.sub('add', 'sub'), :pseudo, *op.args - end - if op.name =~ /^(add|sub|xor|and|or|div|mul|nand)/ and op.args.length == 3 - addop op.name, :pseudo, *op.args[1..-1] - end - } - end + # pseudo-instructions + addop 'mr', :pseudo, :ra, :rb + addop 'not', :pseudo, :ra + addop 'not', :pseudo, :ra, :rb + @opcode_list.each { |op| + if op.name =~ /^addi/ + addop op.name.sub('add', 'sub'), :pseudo, *op.args + end + if op.name =~ /^(add|sub|xor|and|or|div|mul|nand)/ and op.args.length == 3 + addop op.name, :pseudo, *op.args[1..-1] + end + } + end end end diff --git a/lib/metasm/metasm/cpu/ppc/parse.rb b/lib/metasm/metasm/cpu/ppc/parse.rb index e534a79c31..9053bc9ef3 100644 --- a/lib/metasm/metasm/cpu/ppc/parse.rb +++ b/lib/metasm/metasm/cpu/ppc/parse.rb @@ -10,46 +10,46 @@ require 'metasm/parse' module Metasm class PowerPC # TODO - def parse_arg_valid?(op, sym, arg) - case sym - when :ra, :rb, :rs, :rt; arg.kind_of?(GPR) - when :fra, :frb, :frc, :frs, :frt; arg.kind_of?(FPR) - when :ra_i16, :ra_i16s, :ra_i16q; arg.kind_of?(Memref) - when :bd, :d, :ds, :dq, :si, :ui, :li, :sh, :mb, :me, :mb_, :me_, :u; arg.kind_of?(Expression) - when :ba, :bf, :bfa, :bt; arg.kind_of?(CR) - when :ign_bo_zzz, :ign_bo_z, :ign_bo_at, :ign_bo_at2, :aa, :lk, :oe, :rc, :l; # ? - when :bb, :bh, :flm, :fxm, :l_, :l__, :lev, :nb, :sh_, :spr, :sr, :tbr, :th, :to - # TODO - else raise "internal error: mips arg #{sym.inspect}" - end - end + def parse_arg_valid?(op, sym, arg) + case sym + when :ra, :rb, :rs, :rt; arg.kind_of?(GPR) + when :fra, :frb, :frc, :frs, :frt; arg.kind_of?(FPR) + when :ra_i16, :ra_i16s, :ra_i16q; arg.kind_of?(Memref) + when :bd, :d, :ds, :dq, :si, :ui, :li, :sh, :mb, :me, :mb_, :me_, :u; arg.kind_of?(Expression) + when :ba, :bf, :bfa, :bt; arg.kind_of?(CR) + when :ign_bo_zzz, :ign_bo_z, :ign_bo_at, :ign_bo_at2, :aa, :lk, :oe, :rc, :l; # ? + when :bb, :bh, :flm, :fxm, :l_, :l__, :lev, :nb, :sh_, :spr, :sr, :tbr, :th, :to + # TODO + else raise "internal error: mips arg #{sym.inspect}" + end + end - def parse_argument(pgm) - pgm.skip_space - return if not tok = pgm.readtok - if tok.type == :string - return GPR.new(GPR.s_to_i[tok.raw]) if GPR.s_to_i[tok.raw] - return SPR.new(SPR.s_to_i[tok.raw]) if SPR.s_to_i[tok.raw] - return FPR.new(FPR.s_to_i[tok.raw]) if FPR.s_to_i[tok.raw] - return CR.new(CR.s_to_i[tok.raw]) if CR.s_to_i[tok.raw] - return MSR.new if tok.raw == 'msr' - end - pgm.unreadtok tok - arg = Expression.parse pgm - pgm.skip_space - # check memory indirection: 'off(base reg)' # XXX scaled index ? - if arg and pgm.nexttok and pgm.nexttok.type == :punct and pgm.nexttok.raw == '(' - pgm.readtok - pgm.skip_space_eol - ntok = pgm.readtok - raise tok, "Invalid base #{ntok}" unless ntok and ntok.type == :string and GPR.s_to_i[ntok.raw] - base = GPR.new GPR.s_to_i[ntok.raw] - pgm.skip_space_eol - ntok = pgm.readtok - raise tok, "Invalid memory reference, ')' expected" if not ntok or ntok.type != :punct or ntok.raw != ')' - arg = Memref.new base, arg - end - arg - end + def parse_argument(pgm) + pgm.skip_space + return if not tok = pgm.readtok + if tok.type == :string + return GPR.new(GPR.s_to_i[tok.raw]) if GPR.s_to_i[tok.raw] + return SPR.new(SPR.s_to_i[tok.raw]) if SPR.s_to_i[tok.raw] + return FPR.new(FPR.s_to_i[tok.raw]) if FPR.s_to_i[tok.raw] + return CR.new(CR.s_to_i[tok.raw]) if CR.s_to_i[tok.raw] + return MSR.new if tok.raw == 'msr' + end + pgm.unreadtok tok + arg = Expression.parse pgm + pgm.skip_space + # check memory indirection: 'off(base reg)' # XXX scaled index ? + if arg and pgm.nexttok and pgm.nexttok.type == :punct and pgm.nexttok.raw == '(' + pgm.readtok + pgm.skip_space_eol + ntok = pgm.readtok + raise tok, "Invalid base #{ntok}" unless ntok and ntok.type == :string and GPR.s_to_i[ntok.raw] + base = GPR.new GPR.s_to_i[ntok.raw] + pgm.skip_space_eol + ntok = pgm.readtok + raise tok, "Invalid memory reference, ')' expected" if not ntok or ntok.type != :punct or ntok.raw != ')' + arg = Memref.new base, arg + end + arg + end end end diff --git a/lib/metasm/metasm/cpu/python/decode.rb b/lib/metasm/metasm/cpu/python/decode.rb index 12bd7a2e0d..c76c43c2ce 100644 --- a/lib/metasm/metasm/cpu/python/decode.rb +++ b/lib/metasm/metasm/cpu/python/decode.rb @@ -8,129 +8,129 @@ require 'metasm/decode' module Metasm class Python - def build_bin_lookaside - opcode_list.inject({}) { |la, op| la.update op.bin => op } - end + def build_bin_lookaside + opcode_list.inject({}) { |la, op| la.update op.bin => op } + end - def decode_findopcode(edata) - di = DecodedInstruction.new(self) + def decode_findopcode(edata) + di = DecodedInstruction.new(self) - byte = edata.decode_imm(:u8, :little) + byte = edata.decode_imm(:u8, :little) - di if di.opcode = @bin_lookaside[byte] - end + di if di.opcode = @bin_lookaside[byte] + end - def decode_instr_op(edata, di) - di.bin_length = 1 + def decode_instr_op(edata, di) + di.bin_length = 1 - di.instruction.opname = di.opcode.name + di.instruction.opname = di.opcode.name - di.opcode.args.each { |a| - case a - when :cmp - di.bin_length += 2 - v = edata.decode_imm(:i16, @endianness) - di.instruction.args << (CMP_OP[v] || Expression[v]) - when :i16 - di.bin_length += 2 - di.instruction.args << Expression[edata.decode_imm(:i16, @endianness)] - when :u8 - di.bin_length += 1 - di.instruction.args << Expression[edata.decode_imm(:u8, @endianness)] - else - raise "unsupported arg #{a.inspect}" - end - } + di.opcode.args.each { |a| + case a + when :cmp + di.bin_length += 2 + v = edata.decode_imm(:i16, @endianness) + di.instruction.args << (CMP_OP[v] || Expression[v]) + when :i16 + di.bin_length += 2 + di.instruction.args << Expression[edata.decode_imm(:i16, @endianness)] + when :u8 + di.bin_length += 1 + di.instruction.args << Expression[edata.decode_imm(:u8, @endianness)] + else + raise "unsupported arg #{a.inspect}" + end + } - return if edata.ptr > edata.length + return if edata.ptr > edata.length - di - end + di + end - def decode_instr_interpret(di, addr) - case di.opcode.name - when 'LOAD_CONST' - if c = prog_code(addr) - cst = c[:consts][di.instruction.args.first.reduce] - if cst.kind_of? Hash and cst[:type] == :code - di.add_comment "lambda #{Expression[cst[:fileoff]]}" - else - di.add_comment cst.inspect - end - end - when 'LOAD_NAME', 'LOAD_ATTR', 'LOAD_GLOBAL', 'STORE_NAME', 'IMPORT_NAME', 'LOAD_FAST' - if c = prog_code(addr) - di.add_comment c[:names][di.instruction.args.first.reduce].inspect - end - end - di - end + def decode_instr_interpret(di, addr) + case di.opcode.name + when 'LOAD_CONST' + if c = prog_code(addr) + cst = c[:consts][di.instruction.args.first.reduce] + if cst.kind_of? Hash and cst[:type] == :code + di.add_comment "lambda #{Expression[cst[:fileoff]]}" + else + di.add_comment cst.inspect + end + end + when 'LOAD_NAME', 'LOAD_ATTR', 'LOAD_GLOBAL', 'STORE_NAME', 'IMPORT_NAME', 'LOAD_FAST' + if c = prog_code(addr) + di.add_comment c[:names][di.instruction.args.first.reduce].inspect + end + end + di + end - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end - def init_backtrace_binding - @backtrace_binding ||= {} + def init_backtrace_binding + @backtrace_binding ||= {} - opcode_list.each { |op| - binding = case op - when 'nop'; lambda { |*a| {} } - end - @backtrace_binding[op] ||= binding if binding - } + opcode_list.each { |op| + binding = case op + when 'nop'; lambda { |*a| {} } + end + @backtrace_binding[op] ||= binding if binding + } - @backtrace_binding - end + @backtrace_binding + end - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when Var; arg.symbolic - else arg - end - } + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when Var; arg.symbolic + else arg + end + } - if binding = backtrace_binding[di.opcode.basename] - binding[di, *a] - else - puts "unhandled instruction to backtrace: #{di}" if $VERBOSE - { :incomplete_binding => Expression[1] } - end - end + if binding = backtrace_binding[di.opcode.basename] + binding[di, *a] + else + puts "unhandled instruction to backtrace: #{di}" if $VERBOSE + { :incomplete_binding => Expression[1] } + end + end - def get_xrefs_x(dasm, di) - return [] if not di.opcode.props[:setip] + def get_xrefs_x(dasm, di) + return [] if not di.opcode.props[:setip] - arg = case di.opcode.name - when 'JUMP_FORWARD', 'FOR_ITER' - # relative offset - di.instruction.args.last.reduce + di.next_addr - when 'CALL_FUNCTION_VAR' - 'lol' - when /CALL/ - :unknown - else - # absolute offset from :code start - off = di.instruction.args.last.reduce - if c = prog_code(di) - off += c[:fileoff] - end - off - end + arg = case di.opcode.name + when 'JUMP_FORWARD', 'FOR_ITER' + # relative offset + di.instruction.args.last.reduce + di.next_addr + when 'CALL_FUNCTION_VAR' + 'lol' + when /CALL/ + :unknown + else + # absolute offset from :code start + off = di.instruction.args.last.reduce + if c = prog_code(di) + off += c[:fileoff] + end + off + end - [Expression[(arg.kind_of?(Var) ? arg.symbolic : arg)]] - end + [Expression[(arg.kind_of?(Var) ? arg.symbolic : arg)]] + end - def prog_code(addr) - addr = addr.address if addr.kind_of? DecodedInstruction - @last_prog_code ||= nil - return @last_prog_code if @last_prog_code and @last_prog_code[:fileoff] <= addr and @last_prog_code[:fileoff] + @last_prog_code[:code].length > addr - @last_prog_code = @program.code_at_off(addr) if @program - end + def prog_code(addr) + addr = addr.address if addr.kind_of? DecodedInstruction + @last_prog_code ||= nil + return @last_prog_code if @last_prog_code and @last_prog_code[:fileoff] <= addr and @last_prog_code[:fileoff] + @last_prog_code[:code].length > addr + @last_prog_code = @program.code_at_off(addr) if @program + end - def backtrace_is_function_return(expr, di=nil) - #Expression[expr].reduce == Expression['wtf'] - end + def backtrace_is_function_return(expr, di=nil) + #Expression[expr].reduce == Expression['wtf'] + end end end diff --git a/lib/metasm/metasm/cpu/python/main.rb b/lib/metasm/metasm/cpu/python/main.rb index f5918a4233..1cd9341382 100644 --- a/lib/metasm/metasm/cpu/python/main.rb +++ b/lib/metasm/metasm/cpu/python/main.rb @@ -7,30 +7,30 @@ require 'metasm/main' module Metasm class Python < CPU - def initialize(prog = nil) - super() - @program = prog - @endianness = (prog.respond_to?(:endianness) ? prog.endianness : :little) - @size = (prog.respond_to?(:size) ? prog.size : 32) - end + def initialize(prog = nil) + super() + @program = prog + @endianness = (prog.respond_to?(:endianness) ? prog.endianness : :little) + @size = (prog.respond_to?(:size) ? prog.size : 32) + end - class Var - include Renderable + class Var + include Renderable - attr_accessor :i + attr_accessor :i - def initialize(i); @i = i end + def initialize(i); @i = i end - def ==(o) - o.class == self.class and o.i == i - end + def ==(o) + o.class == self.class and o.i == i + end - def symbolic; "var_#{@i}".to_sym end + def symbolic; "var_#{@i}".to_sym end - def render - ["var_#@i"] - end + def render + ["var_#@i"] + end - end + end end end diff --git a/lib/metasm/metasm/cpu/python/opcodes.rb b/lib/metasm/metasm/cpu/python/opcodes.rb index 1e55d2fb95..c9285cf28d 100644 --- a/lib/metasm/metasm/cpu/python/opcodes.rb +++ b/lib/metasm/metasm/cpu/python/opcodes.rb @@ -7,174 +7,174 @@ require 'metasm/cpu/python/main' module Metasm class Python - CMP_OP = %w[< <= == != > >= in not_in is is_not exch] + CMP_OP = %w[< <= == != > >= in not_in is is_not exch] - def addop(name, bin, *args) - o = Opcode.new(name) - o.bin = bin + def addop(name, bin, *args) + o = Opcode.new(name) + o.bin = bin - args.each { |a| - o.args << a if @valid_args[a] - o.props[a] = true if @valid_props[a] - } - o.args << :i16 if o.bin >= 90 and o.props.empty? # HAVE_ARGUMENT + args.each { |a| + o.args << a if @valid_args[a] + o.props[a] = true if @valid_props[a] + } + o.args << :i16 if o.bin >= 90 and o.props.empty? # HAVE_ARGUMENT - @opcode_list << o - end + @opcode_list << o + end - def init_opcode_list - @opcode_list = [] + def init_opcode_list + @opcode_list = [] - @valid_args[:u8] = true - @valid_args[:i16] = true - @valid_args[:cmp] = true + @valid_args[:u8] = true + @valid_args[:i16] = true + @valid_args[:cmp] = true - addop 'STOP_CODE', 0, :stopexec - addop 'POP_TOP', 1 - addop 'ROT_TWO', 2 - addop 'ROT_THREE', 3 - addop 'DUP_TOP', 4 - addop 'ROT_FOUR', 5 - addop 'NOP', 9 + addop 'STOP_CODE', 0, :stopexec + addop 'POP_TOP', 1 + addop 'ROT_TWO', 2 + addop 'ROT_THREE', 3 + addop 'DUP_TOP', 4 + addop 'ROT_FOUR', 5 + addop 'NOP', 9 - addop 'UNARY_POSITIVE', 10 - addop 'UNARY_NEGATIVE', 11 - addop 'UNARY_NOT', 12 - addop 'UNARY_CONVERT', 13 + addop 'UNARY_POSITIVE', 10 + addop 'UNARY_NEGATIVE', 11 + addop 'UNARY_NOT', 12 + addop 'UNARY_CONVERT', 13 - addop 'UNARY_INVERT', 15 + addop 'UNARY_INVERT', 15 - addop 'BINARY_POWER', 19 + addop 'BINARY_POWER', 19 - addop 'BINARY_MULTIPLY', 20 - addop 'BINARY_DIVIDE', 21 - addop 'BINARY_MODULO', 22 - addop 'BINARY_ADD', 23 - addop 'BINARY_SUBTRACT', 24 - addop 'BINARY_SUBSCR', 25 - addop 'BINARY_FLOOR_DIVIDE', 26 - addop 'BINARY_TRUE_DIVIDE', 27 - addop 'INPLACE_FLOOR_DIVIDE', 28 - addop 'INPLACE_TRUE_DIVIDE', 29 + addop 'BINARY_MULTIPLY', 20 + addop 'BINARY_DIVIDE', 21 + addop 'BINARY_MODULO', 22 + addop 'BINARY_ADD', 23 + addop 'BINARY_SUBTRACT', 24 + addop 'BINARY_SUBSCR', 25 + addop 'BINARY_FLOOR_DIVIDE', 26 + addop 'BINARY_TRUE_DIVIDE', 27 + addop 'INPLACE_FLOOR_DIVIDE', 28 + addop 'INPLACE_TRUE_DIVIDE', 29 - addop 'SLICE', 30 - addop 'SLICE_1', 31 - addop 'SLICE_2', 32 - addop 'SLICE_3', 33 + addop 'SLICE', 30 + addop 'SLICE_1', 31 + addop 'SLICE_2', 32 + addop 'SLICE_3', 33 - addop 'STORE_SLICE', 40 - addop 'STORE_SLICE_1', 41 - addop 'STORE_SLICE_2', 42 - addop 'STORE_SLICE_3', 43 + addop 'STORE_SLICE', 40 + addop 'STORE_SLICE_1', 41 + addop 'STORE_SLICE_2', 42 + addop 'STORE_SLICE_3', 43 - addop 'DELETE_SLICE', 50 - addop 'DELETE_SLICE_1', 51 - addop 'DELETE_SLICE_2', 52 - addop 'DELETE_SLICE_3', 53 + addop 'DELETE_SLICE', 50 + addop 'DELETE_SLICE_1', 51 + addop 'DELETE_SLICE_2', 52 + addop 'DELETE_SLICE_3', 53 - addop 'STORE_MAP', 54 - addop 'INPLACE_ADD', 55 - addop 'INPLACE_SUBTRACT', 56 - addop 'INPLACE_MULTIPLY', 57 - addop 'INPLACE_DIVIDE', 58 - addop 'INPLACE_MODULO', 59 - addop 'STORE_SUBSCR', 60 - addop 'DELETE_SUBSCR', 61 + addop 'STORE_MAP', 54 + addop 'INPLACE_ADD', 55 + addop 'INPLACE_SUBTRACT', 56 + addop 'INPLACE_MULTIPLY', 57 + addop 'INPLACE_DIVIDE', 58 + addop 'INPLACE_MODULO', 59 + addop 'STORE_SUBSCR', 60 + addop 'DELETE_SUBSCR', 61 - addop 'BINARY_LSHIFT', 62 - addop 'BINARY_RSHIFT', 63 - addop 'BINARY_AND', 64 - addop 'BINARY_XOR', 65 - addop 'BINARY_OR', 66 - addop 'INPLACE_POWER', 67 - addop 'GET_ITER', 68 + addop 'BINARY_LSHIFT', 62 + addop 'BINARY_RSHIFT', 63 + addop 'BINARY_AND', 64 + addop 'BINARY_XOR', 65 + addop 'BINARY_OR', 66 + addop 'INPLACE_POWER', 67 + addop 'GET_ITER', 68 - addop 'PRINT_EXPR', 70 - addop 'PRINT_ITEM', 71 - addop 'PRINT_NEWLINE', 72 - addop 'PRINT_ITEM_TO', 73 - addop 'PRINT_NEWLINE_TO', 74 - addop 'INPLACE_LSHIFT', 75 - addop 'INPLACE_RSHIFT', 76 - addop 'INPLACE_AND', 77 - addop 'INPLACE_XOR', 78 - addop 'INPLACE_OR', 79 - addop 'BREAK_LOOP', 80 - addop 'WITH_CLEANUP', 81 - addop 'LOAD_LOCALS', 82 - addop 'RETURN_VALUE', 83 - addop 'IMPORT_STAR', 84 - addop 'EXEC_STMT', 85 - addop 'YIELD_VALUE', 86 - addop 'POP_BLOCK', 87 - addop 'END_FINALLY', 88 - addop 'BUILD_CLASS', 89 + addop 'PRINT_EXPR', 70 + addop 'PRINT_ITEM', 71 + addop 'PRINT_NEWLINE', 72 + addop 'PRINT_ITEM_TO', 73 + addop 'PRINT_NEWLINE_TO', 74 + addop 'INPLACE_LSHIFT', 75 + addop 'INPLACE_RSHIFT', 76 + addop 'INPLACE_AND', 77 + addop 'INPLACE_XOR', 78 + addop 'INPLACE_OR', 79 + addop 'BREAK_LOOP', 80 + addop 'WITH_CLEANUP', 81 + addop 'LOAD_LOCALS', 82 + addop 'RETURN_VALUE', 83 + addop 'IMPORT_STAR', 84 + addop 'EXEC_STMT', 85 + addop 'YIELD_VALUE', 86 + addop 'POP_BLOCK', 87 + addop 'END_FINALLY', 88 + addop 'BUILD_CLASS', 89 - #addop 'HAVE_ARGUMENT', 90 #/* Opcodes from here have an argument: */ + #addop 'HAVE_ARGUMENT', 90 #/* Opcodes from here have an argument: */ - addop 'STORE_NAME', 90 #/* Index in name list */ - addop 'DELETE_NAME', 91 #/* "" */ - addop 'UNPACK_SEQUENCE', 92 #/* Number of sequence items */ - addop 'FOR_ITER', 93, :setip - addop 'LIST_APPEND', 94 + addop 'STORE_NAME', 90 #/* Index in name list */ + addop 'DELETE_NAME', 91 #/* "" */ + addop 'UNPACK_SEQUENCE', 92 #/* Number of sequence items */ + addop 'FOR_ITER', 93, :setip + addop 'LIST_APPEND', 94 - addop 'STORE_ATTR', 95 #/* Index in name list */ - addop 'DELETE_ATTR', 96 #/* "" */ - addop 'STORE_GLOBAL', 97 #/* "" */ - addop 'DELETE_GLOBAL', 98 #/* "" */ - addop 'DUP_TOPX', 99 #/* number of items to duplicate */ - addop 'LOAD_CONST', 100 #/* Index in const list */ - addop 'LOAD_NAME', 101 #/* Index in name list */ - addop 'BUILD_TUPLE', 102 #/* Number of tuple items */ - addop 'BUILD_LIST', 103 #/* Number of list items */ - addop 'BUILD_SET', 104 #/* Number of set items */ - addop 'BUILD_MAP', 105 #/* Always zero for now */ - addop 'LOAD_ATTR', 106 #/* Index in name list */ - addop 'COMPARE_OP', 107, :cmp #/* Comparison operator */ - addop 'IMPORT_NAME', 108 #/* Index in name list */ - addop 'IMPORT_FROM', 109 #/* Index in name list */ - addop 'JUMP_FORWARD', 110, :setip, :stopexec #/* Number of bytes to skip */ + addop 'STORE_ATTR', 95 #/* Index in name list */ + addop 'DELETE_ATTR', 96 #/* "" */ + addop 'STORE_GLOBAL', 97 #/* "" */ + addop 'DELETE_GLOBAL', 98 #/* "" */ + addop 'DUP_TOPX', 99 #/* number of items to duplicate */ + addop 'LOAD_CONST', 100 #/* Index in const list */ + addop 'LOAD_NAME', 101 #/* Index in name list */ + addop 'BUILD_TUPLE', 102 #/* Number of tuple items */ + addop 'BUILD_LIST', 103 #/* Number of list items */ + addop 'BUILD_SET', 104 #/* Number of set items */ + addop 'BUILD_MAP', 105 #/* Always zero for now */ + addop 'LOAD_ATTR', 106 #/* Index in name list */ + addop 'COMPARE_OP', 107, :cmp #/* Comparison operator */ + addop 'IMPORT_NAME', 108 #/* Index in name list */ + addop 'IMPORT_FROM', 109 #/* Index in name list */ + addop 'JUMP_FORWARD', 110, :setip, :stopexec #/* Number of bytes to skip */ - addop 'JUMP_IF_FALSE_OR_POP', 111, :setip #/* Target byte offset from beginning of code */ - addop 'JUMP_IF_TRUE_OR_POP', 112, :setip #/* "" */ - addop 'JUMP_ABSOLUTE', 113, :setip, :stopexec #/* "" */ - addop 'POP_JUMP_IF_FALSE', 114, :setip #/* "" */ - addop 'POP_JUMP_IF_TRUE', 115, :setip #/* "" */ + addop 'JUMP_IF_FALSE_OR_POP', 111, :setip #/* Target byte offset from beginning of code */ + addop 'JUMP_IF_TRUE_OR_POP', 112, :setip #/* "" */ + addop 'JUMP_ABSOLUTE', 113, :setip, :stopexec #/* "" */ + addop 'POP_JUMP_IF_FALSE', 114, :setip #/* "" */ + addop 'POP_JUMP_IF_TRUE', 115, :setip #/* "" */ - addop 'LOAD_GLOBAL', 116 #/* Index in name list */ + addop 'LOAD_GLOBAL', 116 #/* Index in name list */ - addop 'CONTINUE_LOOP', 119 #/* Start of loop (absolute) */ - addop 'SETUP_LOOP', 120 #/* Target address (relative) */ - addop 'SETUP_EXCEPT', 121 #/* "" */ - addop 'SETUP_FINALLY', 122 #/* "" */ + addop 'CONTINUE_LOOP', 119 #/* Start of loop (absolute) */ + addop 'SETUP_LOOP', 120 #/* Target address (relative) */ + addop 'SETUP_EXCEPT', 121 #/* "" */ + addop 'SETUP_FINALLY', 122 #/* "" */ - addop 'LOAD_FAST', 124 #/* Local variable number */ - addop 'STORE_FAST', 125 #/* Local variable number */ - addop 'DELETE_FAST', 126 #/* Local variable number */ + addop 'LOAD_FAST', 124 #/* Local variable number */ + addop 'STORE_FAST', 125 #/* Local variable number */ + addop 'DELETE_FAST', 126 #/* Local variable number */ - addop 'RAISE_VARARGS', 130 #/* Number of raise arguments (1, 2 or 3) */ - #/* CALL_FUNCTION_XXX opcodes defined below depend on this definition */ - addop 'CALL_FUNCTION', 131, :u8, :u8, :setip #/* #args + (#kwargs<<8) */ - addop 'MAKE_FUNCTION', 132 #/* #defaults */ - addop 'BUILD_SLICE', 133 #/* Number of items */ + addop 'RAISE_VARARGS', 130 #/* Number of raise arguments (1, 2 or 3) */ + #/* CALL_FUNCTION_XXX opcodes defined below depend on this definition */ + addop 'CALL_FUNCTION', 131, :u8, :u8, :setip #/* #args + (#kwargs<<8) */ + addop 'MAKE_FUNCTION', 132 #/* #defaults */ + addop 'BUILD_SLICE', 133 #/* Number of items */ - addop 'MAKE_CLOSURE', 134 #/* #free vars */ - addop 'LOAD_CLOSURE', 135 #/* Load free variable from closure */ - addop 'LOAD_DEREF', 136 #/* Load and dereference from closure cell */ - addop 'STORE_DEREF', 137 #/* Store into cell */ + addop 'MAKE_CLOSURE', 134 #/* #free vars */ + addop 'LOAD_CLOSURE', 135 #/* Load free variable from closure */ + addop 'LOAD_DEREF', 136 #/* Load and dereference from closure cell */ + addop 'STORE_DEREF', 137 #/* Store into cell */ - #/* The next 3 opcodes must be contiguous and satisfy (CALL_FUNCTION_VAR - CALL_FUNCTION) & 3 == 1 */ - addop 'CALL_FUNCTION_VAR', 140, :u8, :u8, :setip #/* #args + (#kwargs<<8) */ - addop 'CALL_FUNCTION_KW', 141, :u8, :u8, :setip #/* #args + (#kwargs<<8) */ - addop 'CALL_FUNCTION_VAR_KW', 142, :u8, :u8, :setip #/* #args + (#kwargs<<8) */ + #/* The next 3 opcodes must be contiguous and satisfy (CALL_FUNCTION_VAR - CALL_FUNCTION) & 3 == 1 */ + addop 'CALL_FUNCTION_VAR', 140, :u8, :u8, :setip #/* #args + (#kwargs<<8) */ + addop 'CALL_FUNCTION_KW', 141, :u8, :u8, :setip #/* #args + (#kwargs<<8) */ + addop 'CALL_FUNCTION_VAR_KW', 142, :u8, :u8, :setip #/* #args + (#kwargs<<8) */ - addop 'SETUP_WITH', 143 + addop 'SETUP_WITH', 143 - #/* Support for opargs more than 16 bits long */ - addop 'EXTENDED_ARG', 145 + #/* Support for opargs more than 16 bits long */ + addop 'EXTENDED_ARG', 145 - addop 'SET_ADD', 146 - addop 'MAP_ADD', 147 - end + addop 'SET_ADD', 146 + addop 'MAP_ADD', 147 + end end end diff --git a/lib/metasm/metasm/cpu/sh4/decode.rb b/lib/metasm/metasm/cpu/sh4/decode.rb index 30c653ca14..9810e2084e 100644 --- a/lib/metasm/metasm/cpu/sh4/decode.rb +++ b/lib/metasm/metasm/cpu/sh4/decode.rb @@ -8,358 +8,360 @@ require 'metasm/decode' module Metasm class Sh4 - def build_opcode_bin_mask(op) - op.bin_mask = 0 - op.args.each { |f| - op.bin_mask |= @fields_mask[f] << @fields_shift[f] - } - op.bin_mask ^= 0xffff - end + def build_opcode_bin_mask(op) + op.bin_mask = 0 + op.args.each { |f| + op.bin_mask |= @fields_mask[f] << @fields_shift[f] + } + op.bin_mask ^= 0xffff + end - def build_bin_lookaside - lookaside = (0..0xf).inject({}) { |h, i| h.update i => [] } - opcode_list.each { |op| - build_opcode_bin_mask op - lookaside[(op.bin >> 12) & 0xf] << op - } - lookaside - end + def build_bin_lookaside + lookaside = (0..0xf).inject({}) { |h, i| h.update i => [] } + opcode_list.each { |op| + build_opcode_bin_mask op + lookaside[(op.bin >> 12) & 0xf] << op + } + lookaside + end - # depending on transfert size mode (sz flag), fmov instructions manipulate single ou double precision values - # instruction aliasing appears when sz is not handled - def transfer_size_mode(list) - return list if list.find { |op| not op.name.include? 'mov' } - @transfersz == 0 ? list.find_all { |op| op.name.include? 'fmov.s' } : list.reject { |op| op.name.include? 'fmov.s' } - end + # depending on transfert size mode (sz flag), fmov instructions manipulate single ou double precision values + # instruction aliasing appears when sz is not handled + def transfer_size_mode(list) + return list if list.find { |op| not op.name.include? 'mov' } + @transfersz == 0 ? list.find_all { |op| op.name.include? 'fmov.s' } : list.reject { |op| op.name.include? 'fmov.s' } + end - # when pr flag is set, floating point instructions are executed as double-precision operations - # thus register pair is used (DRn registers) - def precision_mode(list) - @fpprecision == 0 ? list.reject { |op| op.args.include? :drn } : list.find_all { |op| op.args.include? :frn } - end + # when pr flag is set, floating point instructions are executed as double-precision operations + # thus register pair is used (DRn registers) + def precision_mode(list) + @fpprecision == 0 ? list.reject { |op| op.args.include? :drn } : list.find_all { |op| op.args.include? :frn } + end - def decode_findopcode(edata) - di = DecodedInstruction.new(self) - val = edata.decode_imm(:u16, @endianness) - edata.ptr -= 2 - op = @bin_lookaside[(val >> 12) & 0xf].find_all { |opcode| (val & opcode.bin_mask) == opcode.bin } + def decode_findopcode(edata) + return if edata.ptr >= edata.length - op = transfer_size_mode(op) if op.length == 2 - op = precision_mode(op) if op.length == 2 + di = DecodedInstruction.new(self) + val = edata.decode_imm(:u16, @endianness) + edata.ptr -= 2 + op = @bin_lookaside[(val >> 12) & 0xf].find_all { |opcode| (val & opcode.bin_mask) == opcode.bin } - if op.length > 1 - puts "current value: #{Expression[val]}, ambiguous matches:", - op.map { |opcode| " #{opcode.name} - #{opcode.args.inspect} - #{Expression[opcode.bin]} - #{Expression[opcode.bin_mask]}" } - #raise "Sh4 - Internal error" - end + op = transfer_size_mode(op) if op.length == 2 + op = precision_mode(op) if op.length == 2 - if not op.empty? - di.opcode = op.first - di - end - end + if op.length > 1 + puts "current value: #{Expression[val]}, ambiguous matches:", + op.map { |opcode| " #{opcode.name} - #{opcode.args.inspect} - #{Expression[opcode.bin]} - #{Expression[opcode.bin_mask]}" } + #raise "Sh4 - Internal error" + end - def decode_instr_op(edata, di) - before_ptr = edata.ptr - op = di.opcode - di.instruction.opname = op.name - di.opcode.props[:memsz] = (op.name =~ /\.l|mova/ ? 32 : (op.name =~ /\.w/ ? 16 : 8)) - val = edata.decode_imm(:u16, @endianness) + if not op.empty? + di.opcode = op.first + di + end + end - field_val = lambda{ |f| - r = (val >> @fields_shift[f]) & @fields_mask[f] - case f - when :@rm, :@rn ,:@_rm, :@_rn, :@rm_, :@rn_; GPR.new(r) - when :@disppc - # The effective address is formed by calculating PC+4, - # clearing the lowest 2 bits, and adding the zero-extended 8-bit immediate i - # multiplied by 4 (32-bit)/ 2 (16-bit) / 1 (8-bit). - curaddr = di.address+4 - curaddr = (curaddr & 0xffff_fffc) if di.opcode.props[:memsz] == 32 - curaddr+r*(di.opcode.props[:memsz]/8) - when :@disprm, :@dispr0rn; (r & 0xf) * (di.opcode.props[:memsz]/8) - when :@disprmrn; (r & 0xf) * 4 - when :@dispgbr; Expression.make_signed(r, 16) - when :disp8; di.address+4+2*Expression.make_signed(r, 8) - when :disp12; di.address+4+2*Expression.make_signed(r, 12) - when :s8; Expression.make_signed(r, 8) - else r - end - } + def decode_instr_op(edata, di) + before_ptr = edata.ptr + op = di.opcode + di.instruction.opname = op.name + di.opcode.props[:memsz] = (op.name =~ /\.l|mova/ ? 32 : (op.name =~ /\.w/ ? 16 : 8)) + val = edata.decode_imm(:u16, @endianness) - op.args.each { |a| - di.instruction.args << case a - when :r0; GPR.new 0 - when :rm, :rn; GPR.new field_val[a] - when :rm_bank, :rn_bank; RBANK.new field_val[a] - when :drm, :drn; DR.new field_val[a] - when :frm, :frn; FR.new field_val[a] - when :xdm, :xdn; XDR.new field_val[a] - when :fvm, :fvn; FVR.new field_val[a] - when :vbr; VBR.new - when :gbr; GBR.new - when :sr; SR.new - when :ssr; SSR.new - when :spc; SPC.new - when :sgr; SGR.new - when :dbr; DBR.new - when :mach; MACH.new - when :macl; MACL.new - when :pr; PR.new - when :fpul; FPUL.new - when :fpscr; FPSCR.new - when :pc; PC.new + field_val = lambda{ |f| + r = (val >> @fields_shift[f]) & @fields_mask[f] + case f + when :@rm, :@rn ,:@_rm, :@_rn, :@rm_, :@rn_; GPR.new(r) + when :@disppc + # The effective address is formed by calculating PC+4, + # clearing the lowest 2 bits, and adding the zero-extended 8-bit immediate i + # multiplied by 4 (32-bit)/ 2 (16-bit) / 1 (8-bit). + curaddr = di.address+4 + curaddr = (curaddr & 0xffff_fffc) if di.opcode.props[:memsz] == 32 + curaddr+r*(di.opcode.props[:memsz]/8) + when :@disprm, :@dispr0rn; (r & 0xf) * (di.opcode.props[:memsz]/8) + when :@disprmrn; (r & 0xf) * 4 + when :@dispgbr; Expression.make_signed(r, 16) + when :disp8; di.address+4+2*Expression.make_signed(r, 8) + when :disp12; di.address+4+2*Expression.make_signed(r, 12) + when :s8; Expression.make_signed(r, 8) + else r + end + } - when :@rm, :@rn, :@disppc - Memref.new(field_val[a], nil) - when :@_rm, :@_rn - Memref.new(field_val[a], nil, :pre) - when :@rm_, :@rn_ - Memref.new(field_val[a], nil, :post) - when :@r0rm - Memref.new(GPR.new(0), GPR.new(field_val[:rm])) - when :@r0rn, :@dispr0rn - Memref.new(GPR.new(0), GPR.new(field_val[:rn])) - when :@disprm - Memref.new(field_val[a], GPR.new(field_val[:rm])) - when :@disprmrn - Memref.new(field_val[a], GPR.new(field_val[:rn])) + op.args.each { |a| + di.instruction.args << case a + when :r0; GPR.new 0 + when :rm, :rn; GPR.new field_val[a] + when :rm_bank, :rn_bank; RBANK.new field_val[a] + when :drm, :drn; DR.new field_val[a] + when :frm, :frn; FR.new field_val[a] + when :xdm, :xdn; XDR.new field_val[a] + when :fvm, :fvn; FVR.new field_val[a] + when :vbr; VBR.new + when :gbr; GBR.new + when :sr; SR.new + when :ssr; SSR.new + when :spc; SPC.new + when :sgr; SGR.new + when :dbr; DBR.new + when :mach; MACH.new + when :macl; MACL.new + when :pr; PR.new + when :fpul; FPUL.new + when :fpscr; FPSCR.new + when :pc; PC.new - when :disppc; Expression[field_val[:@disppc]] - when :s8, :disp8, :disp12; Expression[field_val[a]] - when :i16, :i8, :i5; Expression[field_val[a]] + when :@rm, :@rn, :@disppc + Memref.new(field_val[a], nil) + when :@_rm, :@_rn + Memref.new(field_val[a], nil, :pre) + when :@rm_, :@rn_ + Memref.new(field_val[a], nil, :post) + when :@r0rm + Memref.new(GPR.new(0), GPR.new(field_val[:rm])) + when :@r0rn, :@dispr0rn + Memref.new(GPR.new(0), GPR.new(field_val[:rn])) + when :@disprm + Memref.new(field_val[a], GPR.new(field_val[:rm])) + when :@disprmrn + Memref.new(field_val[a], GPR.new(field_val[:rn])) - else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" - end - } + when :disppc; Expression[field_val[:@disppc]] + when :s8, :disp8, :disp12; Expression[field_val[a]] + when :i16, :i8, :i5; Expression[field_val[a]] - di.bin_length += edata.ptr - before_ptr + else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" + end + } - return if edata.ptr > edata.length + di.bin_length += edata.ptr - before_ptr - di - end + return if edata.ptr > edata.length - def disassembler_default_func - df = DecodedFunction.new - df.backtrace_binding = {} - (0..7 ).each { |i| r = "r#{i}".to_sym ; df.backtrace_binding[r] = Expression::Unknown } - (8..15).each { |i| r = "r#{i}".to_sym ; df.backtrace_binding[r] = Expression[r] } - df.backtracked_for = [BacktraceTrace.new(Expression[:pr], :default, Expression[:pr], :x)] - df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr| - if funcaddr != :default - btfor - elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip] - btfor - else - [] - end - } - df - end + di + end - def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) - retaddrlist.map! { |retaddr| dasm.decoded[retaddr] ? dasm.decoded[retaddr].block.list.last.address : retaddr } if retaddrlist - b = f.backtrace_binding + def disassembler_default_func + df = DecodedFunction.new + df.backtrace_binding = {} + (0..7 ).each { |i| r = "r#{i}".to_sym ; df.backtrace_binding[r] = Expression::Unknown } + (8..15).each { |i| r = "r#{i}".to_sym ; df.backtrace_binding[r] = Expression[r] } + df.backtracked_for = [BacktraceTrace.new(Expression[:pr], :default, Expression[:pr], :x)] + df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr| + if funcaddr != :default + btfor + elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip] + btfor + else + [] + end + } + df + end - bt_val = lambda { |r| - next if not retaddrlist - bt = [] - b[r] = Expression::Unknown # break recursive dep - retaddrlist.each { |retaddr| - bt |= dasm.backtrace(Expression[r], retaddr, - :include_start => true, :snapshot_addr => faddr, :origin => retaddr) - } - b[r] = ((bt.length == 1) ? bt.first : Expression::Unknown) - } - wantregs = GPR::Sym if wantregs.empty? - wantregs.map { |r| r.to_sym }.each(&bt_val) - end + def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) + retaddrlist.map! { |retaddr| dasm.decoded[retaddr] ? dasm.decoded[retaddr].block.list.last.address : retaddr } if retaddrlist + b = f.backtrace_binding + + bt_val = lambda { |r| + next if not retaddrlist + bt = [] + b[r] = Expression::Unknown # break recursive dep + retaddrlist.each { |retaddr| + bt |= dasm.backtrace(Expression[r], retaddr, + :include_start => true, :snapshot_addr => faddr, :origin => retaddr) + } + b[r] = ((bt.length == 1) ? bt.first : Expression::Unknown) + } + wantregs = GPR::Sym if wantregs.empty? + wantregs.map { |r| r.to_sym }.each(&bt_val) + end - # interprets a condition code (in an opcode name) as an expression - def decode_cmp_expr(di, a0, a1) - case di.opcode.name - when 'cmp/eq'; Expression[a0, :'==', a1] - when 'cmp/ge'; Expression[a0, :'>=', a1] # signed - when 'cmp/gt'; Expression[a0, :'>', a1] # signed - when 'cmp/hi'; Expression[a0, :'>', a1] # unsigned - when 'cmp/hs'; Expression[a0, :'>=', a1] # unsigned - end - end + # interprets a condition code (in an opcode name) as an expression + def decode_cmp_expr(di, a0, a1) + case di.opcode.name + when 'cmp/eq'; Expression[a0, :'==', a1] + when 'cmp/ge'; Expression[a0, :'>=', a1] # signed + when 'cmp/gt'; Expression[a0, :'>', a1] # signed + when 'cmp/hi'; Expression[a0, :'>', a1] # unsigned + when 'cmp/hs'; Expression[a0, :'>=', a1] # unsigned + end + end - def decode_cmp_cst(di, a0) - case di.opcode.name - when 'cmp/pl'; Expression[a0, :'>', 0] # signed - when 'cmp/pz'; Expression[a0, :'>=', 0] # signed - end - end + def decode_cmp_cst(di, a0) + case di.opcode.name + when 'cmp/pl'; Expression[a0, :'>', 0] # signed + when 'cmp/pz'; Expression[a0, :'>=', 0] # signed + end + end - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end - def opsz(di) - ret = @size - ret = 8 if di and di.opcode.name =~ /\.b/ - ret = 16 if di and di.opcode.name =~ /\.w/ - ret - end + def opsz(di) + ret = @size + ret = 8 if di and di.opcode.name =~ /\.b/ + ret = 16 if di and di.opcode.name =~ /\.w/ + ret + end - def init_backtrace_binding - @backtrace_binding ||= {} + def init_backtrace_binding + @backtrace_binding ||= {} - mask = lambda { |di| (1 << opsz(di)) - 1 } # 32bits => 0xffff_ffff + mask = lambda { |di| (1 << opsz(di)) - 1 } # 32bits => 0xffff_ffff - opcode_list.map { |ol| ol.name }.uniq.each { |op| - @backtrace_binding[op] ||= case op - when 'ldc', 'ldc.l', 'lds', 'lds.l', 'stc', 'stc.l', 'mov', 'mov.l', 'sts', 'sts.l' - lambda { |di, a0, a1| { a1 => Expression[a0] }} - when 'stc.w', 'stc.b', 'mov.w', 'mov.b' - lambda { |di, a0, a1| { a1 => Expression[a0, :&, mask[di]] }} - when 'movt'; lambda { |di, a0| { a0 => :t_bit }} - when 'mova'; lambda { |di, a0, a1| { a1 => Expression[a0] }} - when 'exts.b', 'exts.w', 'extu.w' - lambda { |di, a0, a1| { a1 => Expression[a0, :&, mask[di]] }} - when 'cmp/eq', 'cmp/ge', 'cmp/ge', 'cmp/gt', 'cmp/hi', 'cmp/hs' - lambda { |di, a0, a1| { :t_bit => decode_cmp_expr(di, a0, a1) }} - when 'cmp/pl', 'cmp/pz' - lambda { |di, a0| { :t_bit => decode_cmp_cst(di, a0) }} - when 'tst'; lambda { |di, a0, a1| { :t_bit => Expression[[a0, :&, mask[di]], :==, [a1, :&, mask[di]]] }} - when 'rte'; lambda { |di| { :pc => :spc , :sr => :ssr }} - when 'rts'; lambda { |di| { :pc => :pr }} - when 'sets'; lambda { |di| { :s_bit => 1 }} - when 'sett'; lambda { |di| { :t_bit => 1 }} - when 'clrs'; lambda { |di| { :s_bit => 0 }} - when 'clrt'; lambda { |di| { :t_bit => 0 }} - when 'clrmac'; lambda { |di| { :macl => 0, :mach => 0 }} - when 'jmp'; lambda { |di, a0| { :pc => a0 }} - when 'jsr', 'bsr', 'bsrf'; lambda { |di, a0| { :pc => Expression[a0], :pr => Expression[di.address, :+, 2*2] }} - when 'dt'; lambda { |di, a0| - res = Expression[a0, :-, 1] - { :a0 => res, :t_bit => Expression[res, :==, 0] } - } - when 'add' ; lambda { |di, a0, a1| { a1 => Expression[[a0, :+, a1], :&, 0xffff_ffff] }} - when 'addc' ; lambda { |di, a0, a1| - res = Expression[[a0, :&, mask[di]], :+, [[a1, :&, mask[di]], :+, :t_bit]] - { a1 => Expression[a0, :+, [a1, :+, :t_bit]], :t_bit => Expression[res, :>, mask[di]] } - } - when 'addv' ; lambda { |di, a0, a1| - res = Expression[[a0, :&, mask[di]], :+, [[a1, :&, mask[di]]]] - { a1 => Expression[a0, :+, [a1, :+, :t_bit]], :t_bit => Expression[res, :>, mask[di]] } - } - when 'shll16', 'shll8', 'shll2', 'shll' ; lambda { |di, a0| - shift = { 'shll16' => 16, 'shll8' => 8, 'shll2' => 2, 'shll' => 1 }[op] - { a0 => Expression[[a0, :<<, shift], :&, 0xffff] } - } - when 'shlr16', 'shlr8', 'shlr2','shlr'; lambda { |di, a0| - shift = { 'shlr16' => 16, 'shlr8' => 8, 'shlr2' => 2, 'shlr' => 1 }[op] - { a0 => Expression[a0, :>>, shift] } - } - when 'rotcl'; lambda { |di, a0| { a0 => Expression[[a0, :<<, 1], :|, :t_bit], :t_bit => Expression[a0, :>>, [opsz[di], :-, 1]] }} - when 'rotcr'; lambda { |di, a0| { a0 => Expression[[a0, :>>, 1], :|, :t_bit], :t_bit => Expression[a0, :&, 1] }} - when 'rotl'; lambda { |di, a0| - shift_bit = [a0, :<<, [opsz[di], :-, 1]] - { a0 => Expression[[a0, :<<, 1], :|, shift_bit], :t_bit => shift_bit } - } - when 'rotr'; lambda { |di, a0| - shift_bit = [a0, :>>, [opsz[di], :-, 1]] - { a0 => Expression[[a0, :>>, 1], :|, shift_bit], :t_bit => shift_bit } - } - when 'shal'; lambda { |di, a0| - shift_bit = [a0, :<<, [opsz[di], :-, 1]] - { a0 => Expression[a0, :<<, 1], :t_bit => shift_bit } - } - when 'shar'; lambda { |di, a0| - shift_bit = Expression[a0, :&, 1] - { a0 => Expression[a0, :>>, 1], :t_bit => shift_bit } - } - when 'sub'; lambda { |di, a0, a1| { a1 => Expression[[a1, :-, a0], :&, 0xffff_ffff] }} - when 'subc'; lambda { |di, a0, a1| { a1 => Expression[a1, :-, [a0, :-, :t_bit]] }} - when 'and', 'and.b'; lambda { |di, a0, a1| { a1 => Expression[[a0, :&, mask[di]], :|, [[a1, :&, mask[di]]]] }} - when 'or', 'or.b'; lambda { |di, a0, a1| { a1 => Expression[[a0, :|, mask[di]], :|, [[a1, :&, mask[di]]]] }} - when 'xor', 'xor.b'; lambda { |di, a0, a1| { a1 => Expression[[a0, :|, mask[di]], :^, [[a1, :&, mask[di]]]] }} - when 'neg' ; lambda { |di, a0, a1| { a1 => Expression[mask[di], :-, a0] }} - when 'negc' ; lambda { |di, a0, a1| { a1 => Expression[[[mask[di], :-, a0], :-, :t_bit], :&, mask[di]] }} - when 'not'; lambda { |di, a0, a1| { a1 => Expression[a0, :^, mask[di]] }} - when 'nop'; lambda { |*a| {} } - when /^b/; lambda { |*a| {} } # branches - end - } + opcode_list.map { |ol| ol.name }.uniq.each { |op| + @backtrace_binding[op] ||= case op + when 'ldc', 'ldc.l', 'lds', 'lds.l', 'stc', 'stc.l', 'mov', 'mov.l', 'sts', 'sts.l' + lambda { |di, a0, a1| { a1 => Expression[a0] }} + when 'stc.w', 'stc.b', 'mov.w', 'mov.b' + lambda { |di, a0, a1| { a1 => Expression[a0, :&, mask[di]] }} + when 'movt'; lambda { |di, a0| { a0 => :t_bit }} + when 'mova'; lambda { |di, a0, a1| { a1 => Expression[a0] }} + when 'exts.b', 'exts.w', 'extu.w' + lambda { |di, a0, a1| { a1 => Expression[a0, :&, mask[di]] }} + when 'cmp/eq', 'cmp/ge', 'cmp/ge', 'cmp/gt', 'cmp/hi', 'cmp/hs' + lambda { |di, a0, a1| { :t_bit => decode_cmp_expr(di, a0, a1) }} + when 'cmp/pl', 'cmp/pz' + lambda { |di, a0| { :t_bit => decode_cmp_cst(di, a0) }} + when 'tst'; lambda { |di, a0, a1| { :t_bit => Expression[[a0, :&, mask[di]], :==, [a1, :&, mask[di]]] }} + when 'rte'; lambda { |di| { :pc => :spc , :sr => :ssr }} + when 'rts'; lambda { |di| { :pc => :pr }} + when 'sets'; lambda { |di| { :s_bit => 1 }} + when 'sett'; lambda { |di| { :t_bit => 1 }} + when 'clrs'; lambda { |di| { :s_bit => 0 }} + when 'clrt'; lambda { |di| { :t_bit => 0 }} + when 'clrmac'; lambda { |di| { :macl => 0, :mach => 0 }} + when 'jmp'; lambda { |di, a0| { :pc => a0 }} + when 'jsr', 'bsr', 'bsrf'; lambda { |di, a0| { :pc => Expression[a0], :pr => Expression[di.address, :+, 2*2] }} + when 'dt'; lambda { |di, a0| + res = Expression[a0, :-, 1] + { :a0 => res, :t_bit => Expression[res, :==, 0] } + } + when 'add' ; lambda { |di, a0, a1| { a1 => Expression[[a0, :+, a1], :&, 0xffff_ffff] }} + when 'addc' ; lambda { |di, a0, a1| + res = Expression[[a0, :&, mask[di]], :+, [[a1, :&, mask[di]], :+, :t_bit]] + { a1 => Expression[a0, :+, [a1, :+, :t_bit]], :t_bit => Expression[res, :>, mask[di]] } + } + when 'addv' ; lambda { |di, a0, a1| + res = Expression[[a0, :&, mask[di]], :+, [[a1, :&, mask[di]]]] + { a1 => Expression[a0, :+, [a1, :+, :t_bit]], :t_bit => Expression[res, :>, mask[di]] } + } + when 'shll16', 'shll8', 'shll2', 'shll' ; lambda { |di, a0| + shift = { 'shll16' => 16, 'shll8' => 8, 'shll2' => 2, 'shll' => 1 }[op] + { a0 => Expression[[a0, :<<, shift], :&, 0xffff] } + } + when 'shlr16', 'shlr8', 'shlr2','shlr'; lambda { |di, a0| + shift = { 'shlr16' => 16, 'shlr8' => 8, 'shlr2' => 2, 'shlr' => 1 }[op] + { a0 => Expression[a0, :>>, shift] } + } + when 'rotcl'; lambda { |di, a0| { a0 => Expression[[a0, :<<, 1], :|, :t_bit], :t_bit => Expression[a0, :>>, [opsz[di], :-, 1]] }} + when 'rotcr'; lambda { |di, a0| { a0 => Expression[[a0, :>>, 1], :|, :t_bit], :t_bit => Expression[a0, :&, 1] }} + when 'rotl'; lambda { |di, a0| + shift_bit = [a0, :<<, [opsz[di], :-, 1]] + { a0 => Expression[[a0, :<<, 1], :|, shift_bit], :t_bit => shift_bit } + } + when 'rotr'; lambda { |di, a0| + shift_bit = [a0, :>>, [opsz[di], :-, 1]] + { a0 => Expression[[a0, :>>, 1], :|, shift_bit], :t_bit => shift_bit } + } + when 'shal'; lambda { |di, a0| + shift_bit = [a0, :<<, [opsz[di], :-, 1]] + { a0 => Expression[a0, :<<, 1], :t_bit => shift_bit } + } + when 'shar'; lambda { |di, a0| + shift_bit = Expression[a0, :&, 1] + { a0 => Expression[a0, :>>, 1], :t_bit => shift_bit } + } + when 'sub'; lambda { |di, a0, a1| { a1 => Expression[[a1, :-, a0], :&, 0xffff_ffff] }} + when 'subc'; lambda { |di, a0, a1| { a1 => Expression[a1, :-, [a0, :-, :t_bit]] }} + when 'and', 'and.b'; lambda { |di, a0, a1| { a1 => Expression[[a0, :&, mask[di]], :|, [[a1, :&, mask[di]]]] }} + when 'or', 'or.b'; lambda { |di, a0, a1| { a1 => Expression[[a0, :|, mask[di]], :|, [[a1, :&, mask[di]]]] }} + when 'xor', 'xor.b'; lambda { |di, a0, a1| { a1 => Expression[[a0, :|, mask[di]], :^, [[a1, :&, mask[di]]]] }} + when 'neg' ; lambda { |di, a0, a1| { a1 => Expression[mask[di], :-, a0] }} + when 'negc' ; lambda { |di, a0, a1| { a1 => Expression[[[mask[di], :-, a0], :-, :t_bit], :&, mask[di]] }} + when 'not'; lambda { |di, a0, a1| { a1 => Expression[a0, :^, mask[di]] }} + when 'nop'; lambda { |*a| {} } + when /^b/; lambda { |*a| {} } # branches + end + } - @backtrace_binding - end + @backtrace_binding + end - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when GPR, XFR, XDR, FVR, DR, FR, XMTRX; arg.symbolic - when MACH, MACL, PR, FPUL, PC, FPSCR; arg.symbolic - when SR, SSR, SPC, GBR, VBR, SGR, DBR; arg.symbolic - when Memref; arg.symbolic(di.address, di.opcode.props[:memsz]/8) - else arg - end - } + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when GPR, XFR, XDR, FVR, DR, FR, XMTRX; arg.symbolic + when MACH, MACL, PR, FPUL, PC, FPSCR; arg.symbolic + when SR, SSR, SPC, GBR, VBR, SGR, DBR; arg.symbolic + when Memref; arg.symbolic(di.address, di.opcode.props[:memsz]/8) + else arg + end + } - if binding = backtrace_binding[di.opcode.basename] - bd = binding[di, *a] || {} - di.instruction.args.grep(Memref).each { |m| - next unless r = m.base and r.kind_of?(GPR) - r = r.symbolic - case m.action - when :post - bd[r] ||= Expression[r, :+, di.opcode.props[:memsz]/8] - when :pre - bd[r] ||= Expression[r, :-, di.opcode.props[:memsz]/8] - end - } - bd - else - puts "unhandled instruction to backtrace: #{di}" if $VERBOSE - {:incomplete_binding => Expression[1]} - end - end + if binding = backtrace_binding[di.opcode.basename] + bd = binding[di, *a] || {} + di.instruction.args.grep(Memref).each { |m| + next unless r = m.base and r.kind_of?(GPR) + r = r.symbolic + case m.action + when :post + bd[r] ||= Expression[r, :+, di.opcode.props[:memsz]/8] + when :pre + bd[r] ||= Expression[r, :-, di.opcode.props[:memsz]/8] + end + } + bd + else + puts "unhandled instruction to backtrace: #{di}" if $VERBOSE + {:incomplete_binding => Expression[1]} + end + end - def get_xrefs_x(dasm, di) - return [] if not di.opcode.props[:setip] + def get_xrefs_x(dasm, di) + return [] if not di.opcode.props[:setip] - val = case di.instruction.opname - when 'rts'; :pr - else di.instruction.args.last - end + val = case di.instruction.opname + when 'rts'; :pr + else di.instruction.args.last + end - val = case val - when Reg; val.symbolic - when Memref; arg.symbolic(di.address, 4) - else val - end + val = case val + when Reg; val.symbolic + when Memref; arg.symbolic(di.address, 4) + else val + end - val = case di.instruction.opname - when 'braf', 'bsrf'; Expression[[di.address, :+, 4], :+, val] - else val - end + val = case di.instruction.opname + when 'braf', 'bsrf'; Expression[[di.address, :+, 4], :+, val] + else val + end - [Expression[val]] - end + [Expression[val]] + end - def backtrace_is_function_return(expr, di=nil) - expr.reduce_rec == :pr - end + def backtrace_is_function_return(expr, di=nil) + expr.reduce_rec == :pr + end - def delay_slot(di=nil) - (di and di.opcode.props[:delay_slot]) ? 1 : 0 - end + def delay_slot(di=nil) + (di and di.opcode.props[:delay_slot]) ? 1 : 0 + end - def replace_instr_arg_immediate(i, old, new) - i.args.map! { |a| - case a - when Expression; a == old ? new : Expression[a.bind(old => new).reduce] - when Memref - a.base = (a.base == old ? new : Expression[a.base.bind(old => new).reduce]) if a.base.kind_of?(Expression) - a - else a - end - } - end + def replace_instr_arg_immediate(i, old, new) + i.args.map! { |a| + case a + when Expression; a == old ? new : Expression[a.bind(old => new).reduce] + when Memref + a.base = (a.base == old ? new : Expression[a.base.bind(old => new).reduce]) if a.base.kind_of?(Expression) + a + else a + end + } + end end end diff --git a/lib/metasm/metasm/cpu/sh4/main.rb b/lib/metasm/metasm/cpu/sh4/main.rb index 46ee31b242..4daa46e5fb 100644 --- a/lib/metasm/metasm/cpu/sh4/main.rb +++ b/lib/metasm/metasm/cpu/sh4/main.rb @@ -8,294 +8,294 @@ require 'metasm/main' module Metasm class Sh4 < CPU - def initialize(e = :little, transfersz = 0, fpprecision = 0) - super() - @endianness = e + def initialize(e = :little, transfersz = 0, fpprecision = 0) + super() + @endianness = e - # transfer size mode - # When SZ = 1 and big endian mode is selected, FMOV can - # be used for double-precision floating-point data load or - # store operations. In little endian mode, two 32-bit data size - # moves must be executed, with SZ = 0, to load or store a - # double-precision floating-point number. - transfersz = 0 if @endianness == :little - @transfersz = transfersz + # transfer size mode + # When SZ = 1 and big endian mode is selected, FMOV can + # be used for double-precision floating-point data load or + # store operations. In little endian mode, two 32-bit data size + # moves must be executed, with SZ = 0, to load or store a + # double-precision floating-point number. + transfersz = 0 if @endianness == :little + @transfersz = transfersz - # PR = 0 : Floating point instructions are executed as single - # precision operations. - # PR = 1 : Floating point instructions are executed as double- - # precision operations (the result of instructions for - # which double-precision is not supported is undefined). - # Setting [transfersz = fpprecision = 1] is reserved. - # FPU operations are undefined in this mode. - @fpprecision = fpprecision + # PR = 0 : Floating point instructions are executed as single + # precision operations. + # PR = 1 : Floating point instructions are executed as double- + # precision operations (the result of instructions for + # which double-precision is not supported is undefined). + # Setting [transfersz = fpprecision = 1] is reserved. + # FPU operations are undefined in this mode. + @fpprecision = fpprecision - @size = 32 - end - - class Reg - include Renderable - - def ==(o) - o.class == self.class and (not respond_to?(:i) or o.i == i) - end - end + @size = 32 + end + + class Reg + include Renderable + + def ==(o) + o.class == self.class and (not respond_to?(:i) or o.i == i) + end + end - # general purpose reg - class GPR < Reg - attr_accessor :i + # general purpose reg + class GPR < Reg + attr_accessor :i - def initialize(i); @i = i end - Sym = (0..15).map { |i| "r#{i}".to_sym } - - def symbolic ; Sym[@i] end + def initialize(i); @i = i end + Sym = (0..15).map { |i| "r#{i}".to_sym } + + def symbolic ; Sym[@i] end - def render ; ["r#@i"] end - end + def render ; ["r#@i"] end + end - class RBANK < Reg - attr_accessor :i + class RBANK < Reg + attr_accessor :i - def initialize(i); @i = i end - Sym = (0..7).map { |i| "r#{i}_bank".to_sym } + def initialize(i); @i = i end + Sym = (0..7).map { |i| "r#{i}_bank".to_sym } - def symbolic ; Sym[@i] end + def symbolic ; Sym[@i] end - def render ; ["r#{@i}_bank"] end - end + def render ; ["r#{@i}_bank"] end + end - # floatting-point registers - class FR < Reg - attr_accessor :i + # floatting-point registers + class FR < Reg + attr_accessor :i - def initialize(i); @i = i end - Sym = (0..15).map { |i| "fr#{i}".to_sym } + def initialize(i); @i = i end + Sym = (0..15).map { |i| "fr#{i}".to_sym } - def symbolic ; Sym[@i] end + def symbolic ; Sym[@i] end - def render ; ["fr#@i"] end - end + def render ; ["fr#@i"] end + end - # DR registers: double-precision floating-point registers - # DR0 = {FR0, FR1} - # DR2 = {FR2, FR3} - # DR4 = {FR4, FR5} - # DR6 = {FR6, FR7} - # DR8 = {FR8, FR9} - # DR10 = {FR10, FR11} - # DR12 = {FR12, FR13} - # DR14 = {FR14, FR15} - class DR < Reg - attr_accessor :i + # DR registers: double-precision floating-point registers + # DR0 = {FR0, FR1} + # DR2 = {FR2, FR3} + # DR4 = {FR4, FR5} + # DR6 = {FR6, FR7} + # DR8 = {FR8, FR9} + # DR10 = {FR10, FR11} + # DR12 = {FR12, FR13} + # DR14 = {FR14, FR15} + class DR < Reg + attr_accessor :i - def initialize(i); @i = i end - Sym = (0..7).map { |i| "dr#{i*2}".to_sym } + def initialize(i); @i = i end + Sym = (0..7).map { |i| "dr#{i*2}".to_sym } - def symbolic ; Sym[@i/2] end + def symbolic ; Sym[@i/2] end - def render ; ["dr#@i"] end - end + def render ; ["dr#@i"] end + end - # Single-precision floating-point vector registers - # FV0 = {FR0, FR1, FR2, FR3} - # FV4 = {FR4, FR5, FR6, FR7}, - # FV8 = {FR8, FR9, FR10, FR11} - # FV12 = {FR12, FR13, FR14, FR15} - class FVR < Reg - attr_accessor :i + # Single-precision floating-point vector registers + # FV0 = {FR0, FR1, FR2, FR3} + # FV4 = {FR4, FR5, FR6, FR7}, + # FV8 = {FR8, FR9, FR10, FR11} + # FV12 = {FR12, FR13, FR14, FR15} + class FVR < Reg + attr_accessor :i - def initialize(i); @i = i end - Sym = (0..3).map { |i| "fv#{i*4}".to_sym } + def initialize(i); @i = i end + Sym = (0..3).map { |i| "fv#{i*4}".to_sym } - def symbolic ; Sym[@i/4] end + def symbolic ; Sym[@i/4] end - def render ; ["fv#@i"] end - end + def render ; ["fv#@i"] end + end - # Single-precision floating-point extended registers - class XFR < Reg - attr_accessor :i + # Single-precision floating-point extended registers + class XFR < Reg + attr_accessor :i - def initialize(i); @i = i end - Sym = (0..15).map { |i| "xf#{i}".to_sym } + def initialize(i); @i = i end + Sym = (0..15).map { |i| "xf#{i}".to_sym } - def symbolic ; Sym[@i] end + def symbolic ; Sym[@i] end - def render ; ["xf#@i"] end - end + def render ; ["xf#@i"] end + end - # XD registers: single-precision floating-point vector registers - # XD0 = {XF0, XF1} - # XD2 = {XF2, XF3} - # XD4 = {XF4, XF5} - # XD6 = {XF6, XF7} - # XD8 = {XF8, XF9} - # XD10 = {XF10, XF11} - # XD12 = {XF12, XF13} - # XD14 = {XF14, XF15} - class XDR < Reg - attr_accessor :i + # XD registers: single-precision floating-point vector registers + # XD0 = {XF0, XF1} + # XD2 = {XF2, XF3} + # XD4 = {XF4, XF5} + # XD6 = {XF6, XF7} + # XD8 = {XF8, XF9} + # XD10 = {XF10, XF11} + # XD12 = {XF12, XF13} + # XD14 = {XF14, XF15} + class XDR < Reg + attr_accessor :i - def initialize(i); @i = i end - Sym = (0..7).map { |i| "xd#{i*2}".to_sym } + def initialize(i); @i = i end + Sym = (0..7).map { |i| "xd#{i*2}".to_sym } - def symbolic ; Sym[@i/2] end + def symbolic ; Sym[@i/2] end - def render ; ["xd#@i"] end - end + def render ; ["xd#@i"] end + end - # Single-precision floating-point extended register matrix - class XMTRX < Reg - def symbolic ; :xmtrx ; end - def render ; ['xmtrx'] ; end - end + # Single-precision floating-point extended register matrix + class XMTRX < Reg + def symbolic ; :xmtrx ; end + def render ; ['xmtrx'] ; end + end - # Multiply-and-accumulate register high - class MACH < Reg + # Multiply-and-accumulate register high + class MACH < Reg - def symbolic ; :mach end - def render ; ['mach'] end - end + def symbolic ; :mach end + def render ; ['mach'] end + end - # Multiply-and-accumulate register low - class MACL < Reg + # Multiply-and-accumulate register low + class MACL < Reg - def symbolic ; :macl end - def render ; ['macl'] end - end + def symbolic ; :macl end + def render ; ['macl'] end + end - # Procedure register - class PR < Reg + # Procedure register + class PR < Reg - def symbolic ; :pr end - def render ; ['pr'] end - end + def symbolic ; :pr end + def render ; ['pr'] end + end - # Floating-point communication register - class FPUL < Reg + # Floating-point communication register + class FPUL < Reg - def symbolic ; :fpul end - def render ; ['fpul'] end - end + def symbolic ; :fpul end + def render ; ['fpul'] end + end - # Program counter - class PC < Reg + # Program counter + class PC < Reg - def symbolic ; :pc end - def render ; ['pc'] end - end + def symbolic ; :pc end + def render ; ['pc'] end + end - # Floating-point status/control register - class FPSCR < Reg + # Floating-point status/control register + class FPSCR < Reg - def symbolic ; :fpscr end - def render ; ['fpscr'] end - end + def symbolic ; :fpscr end + def render ; ['fpscr'] end + end - #----------------------- Control registers ----------------------------- + #----------------------- Control registers ----------------------------- - # Status register - class SR < Reg + # Status register + class SR < Reg - def symbolic ; :sr end - def render ; ['sr'] end - end + def symbolic ; :sr end + def render ; ['sr'] end + end - # Saved status register - class SSR < Reg + # Saved status register + class SSR < Reg - def symbolic ; :ssr end - def render ; ['ssr'] end - end + def symbolic ; :ssr end + def render ; ['ssr'] end + end - # Saved program counter - class SPC < Reg + # Saved program counter + class SPC < Reg - def symbolic ; :spc end - def render ; ['spc'] end - end + def symbolic ; :spc end + def render ; ['spc'] end + end - # Global base register - class GBR < Reg + # Global base register + class GBR < Reg - def symbolic ; :spc end - def render ; ['gbr'] end - end + def symbolic ; :spc end + def render ; ['gbr'] end + end - # Vector base register - class VBR < Reg + # Vector base register + class VBR < Reg - def symbolic ; :spc end - def render ; ['vbr'] end - end + def symbolic ; :spc end + def render ; ['vbr'] end + end - # Saved general register - class SGR < Reg + # Saved general register + class SGR < Reg - def symbolic ; :sgr end - def render ; ['sgr'] end - end + def symbolic ; :sgr end + def render ; ['sgr'] end + end - # Debug base register - class DBR < Reg + # Debug base register + class DBR < Reg - def symbolic ; :dbr end - def render ; ['dbr'] end - end + def symbolic ; :dbr end + def render ; ['dbr'] end + end - class Memref - # action: pre/post (inc/dec)rement - attr_accessor :base, :disp, :action + class Memref + # action: pre/post (inc/dec)rement + attr_accessor :base, :disp, :action - def initialize(base, offset, action = nil) - base = Expression[base] if base.kind_of? Integer - @base, @disp, @action = base, offset, action - end + def initialize(base, offset, action = nil) + base = Expression[base] if base.kind_of? Integer + @base, @disp, @action = base, offset, action + end - def symbolic(orig=nil, sz=32) - b = @base - b = b.symbolic if b.kind_of? Reg + def symbolic(orig=nil, sz=32) + b = @base + b = b.symbolic if b.kind_of? Reg - b = Expression[b, :-, sz/8] if @action == :pre + b = Expression[b, :-, sz/8] if @action == :pre - if disp - o = @disp - o = o.symbolic if o.kind_of? Reg - e = Expression[b, :+, o].reduce - else - e = Expression[b].reduce - end + if disp + o = @disp + o = o.symbolic if o.kind_of? Reg + e = Expression[b, :+, o].reduce + else + e = Expression[b].reduce + end - Indirection[e, sz, orig] - end + Indirection[e, sz, orig] + end - include Renderable + include Renderable - def render - if @disp - #['@(', @base, ',', @disp, ')'] - ['[', @base, '+', @disp, ']'] - else - case @action - when :pre then ['[--', @base, ']'] - when :post then ['[', @base, '++]'] - else ['[', @base, ']'] - #when :pre then ['@-', @base] - #when :post then ['@', @base, '+'] - #else ['@', @base] - end - end - end + def render + if @disp + #['@(', @base, ',', @disp, ')'] + ['[', @base, '+', @disp, ']'] + else + case @action + when :pre then ['[--', @base, ']'] + when :post then ['[', @base, '++]'] + else ['[', @base, ']'] + #when :pre then ['@-', @base] + #when :post then ['@', @base, '+'] + #else ['@', @base] + end + end + end - end - - def init_opcode_list - init - end - - def dbg_register_list - @dbg_register_list ||= GPR::Sym - end + end + + def init_opcode_list + init + end + + def dbg_register_list + @dbg_register_list ||= GPR::Sym + end end end diff --git a/lib/metasm/metasm/cpu/sh4/opcodes.rb b/lib/metasm/metasm/cpu/sh4/opcodes.rb index 6b4913b49d..3de07acb6b 100644 --- a/lib/metasm/metasm/cpu/sh4/opcodes.rb +++ b/lib/metasm/metasm/cpu/sh4/opcodes.rb @@ -7,373 +7,373 @@ require 'metasm/cpu/sh4/main' module Metasm class Sh4 - def addop(name, bin, *args) - o = Opcode.new name, bin - - args.each { |a| - o.args << a if @fields_mask[a] - o.props[a] = true if @valid_props[a] - o.fields[a] = [@fields_mask[a], @fields_shift[a]] if @fields_mask[a] - } - - @opcode_list << o - end - - def init - @opcode_list = [] - - # :@rm_ is used for @Rm+ - # :@_rn is used for @-Rn - # :@r0rm is used for @(R0, Rm) (same for r0rn) - # :@r0gbr is used for @(R0, GBR) - @fields_mask = { - :rm => 0xf, :rn => 0xf, - :@rm => 0xf, :@rn => 0xf, - :@rm_ => 0xf, :@rn_ => 0xf, - :@_rn => 0xf, - - :frm => 0xf, :frn => 0xf, - :xdm => 0x7, :xdn => 0x7, - :drm => 0x7, :drn => 0x7, - :fvm => 0x3, :fvn => 0x3, - - :@r0rm => 0xf, :@r0rn => 0xf, - :rm_bank => 0x7, :rn_bank => 0x7, - - :@disprm => 0xff, :@dispr0rn => 0xff, :@disprmrn => 0xf0f, - :@dispgbr => 0xff, :@disppc => 0xff, - :disp8 => 0xff, :disp12 => 0xfff, :disppc => 0xff, - - :i8 => 0xff, # zero-extendded 8-bit immediate - :s8 => 0xff, # 8-bit displacement s is sign-extended, doubled and added to PC+4 - } - - @fields_shift = { - :rm => 4, :rn => 8, - :@rm => 4, :@rn => 8, - :@rm_ => 4, :@rn_ => 8, - :@_rn => 8, - - :frm => 4, :frn => 8, - :xdm => 5, :xdn => 9, - :drm => 5, :drn => 9, - :fvm => 8, :fvn => 10, - - :@r0rm => 4, :@r0rn => 8, - :rm_bank => 7, :rn_bank => 4, - - :@disprm => 0, :@dispr0rn => 0, :@disprmrn => 0, - :@dispgbr => 0, :@disppc => 0, - :disp8 => 0, :disp12 => 0, :disppc => 0, - - :i8 => 0, - :s8 => 0, - } - - # implicit operands - [:vbr, :gbr, :sr, :ssr, :spc, :sgr, :dbr, :mach, :macl, :pr, :fpul, :fpscr, :dbr, :pc, :r0].each { |a| @fields_mask[a] = @fields_shift[a] = 0 } - - @valid_props[:delay_slot] = true - - addop 'add', 0b0011 << 12 | 0b1100, :rm, :rn - addop 'add', 0b0111 << 12, :s8, :rn - addop 'addc', 0b0011 << 12 | 0b1110, :rm, :rn - addop 'addv', 0b0011 << 12 | 0b1111, :rm, :rn - - addop 'and', 0b0010 << 12 | 0b1001, :rm, :rn - addop 'and', 0b11001001 << 8, :i8, :r0 - addop 'and.b', 0b11001101 << 8, :i8, :@r0gbr - - addop 'bf', 0b10001011 << 8, :disp8, :setip - addop 'bf/s', 0b10001111 << 8, :disp8, :setip, :delay_slot - addop 'bra', 0b1010 << 12, :disp12, :setip, :stopexec, :delay_slot - addop 'braf', 0b0000 << 12 | 0b00100011, :rn, :setip, :stopexec, :delay_slot - addop 'brk', 0b0000000000111011, :stopexec # causes a pre-execution BREAK exception - addop 'bsr', 0b1011 << 12, :disp12, :setip, :saveip, :stopexec, :delay_slot - addop 'bsrf', 0b0000 << 12 | 0b00000011, :rn, :setip, :saveip, :stopexec, :delay_slot - addop 'bt', 0b10001001 << 8, :disp8, :setip - addop 'bt/s', 0b10001101 << 8, :disp8, :setip, :delay_slot - - addop 'clrmac', 0b0000000000101000 - addop 'clrs', 0b0000000001001000 - addop 'clrt', 0b0000000000001000 - - addop 'cmp/eq', 0b0011 << 12 | 0b0000, :rm, :rn - addop 'cmp/eq', 0b10001000 << 8, :s8, :r0 - addop 'cmp/ge', 0b0011 << 12 | 0b0011, :rm, :rn - addop 'cmp/gt', 0b0011 << 12 | 0b0111, :rm, :rn - addop 'cmp/hi', 0b0011 << 12 | 0b0110, :rm, :rn - addop 'cmp/hs', 0b0011 << 12 | 0b0010, :rm, :rn - addop 'cmp/pl', 0b0100 << 12 | 0b00010101, :rn - addop 'cmp/pz', 0b0100 << 12 | 0b00010001, :rn - addop 'cmp/str', 0b0010 << 12 | 0b1100, :rm, :rn - - addop 'div0s', 0b0010 << 12 | 0b0111, :rm, :rn - addop 'div0u', 0b0000000000011001 - addop 'div1', 0b0011 << 12 | 0b0100, :rm, :rn - - addop 'dmuls.l', 0b0011 << 12 | 0b1101, :rm, :rn - addop 'dmulu.l', 0b0011 << 12 | 0b0101, :rm, :rn - - addop 'dt', 0b0100 << 12 | 0b00010000, :rn - - addop 'exts.b', 0b0110 << 12 | 0b1110, :rm, :rn - addop 'exts.w', 0b0110 << 12 | 0b1111, :rm, :rn - addop 'extu.b', 0b0110 << 12 | 0b1100, :rm, :rn - addop 'extu.w', 0b0110 << 12 | 0b1101, :rm, :rn - - # fpu instructions - addop 'fabs', 0b1111 << 12 | 0b001011101, :drn - addop 'fabs', 0b1111 << 12 | 0b01011101, :frn - - addop 'fadd', 0b1111 << 12 | 0b0 << 8 | 0b00000, :drm, :drn - addop 'fadd', 0b1111 << 12 | 0b0000, :frm, :frn - - addop 'fcmp/eq', 0b1111 << 12 | 0b0 << 8 | 0b00100, :drm, :drn - addop 'fcmp/eq', 0b1111 << 12 | 0b0100, :frm, :frn - - addop 'fcmp/gt', 0b1111 << 12 | 0b0 << 8 | 0b00101, :drm, :drn - addop 'fcmp/gt', 0b1111 << 12 | 0b0101, :frm, :frn - - addop 'fcnvds', 0b1111 << 12 | 0b010111101, :drn, :fpul - addop 'fcnvsd', 0b1111 << 12 | 0b010101101, :fpul, :drn - - addop 'fdiv', 0b1111 << 12 | 0b0 << 8 | 0b00011, :drm, :drn - addop 'fdiv', 0b1111 << 12 | 0b0011, :frm, :frn - addop 'fipr', 0b1111 << 12 | 0b11101101, :fvm, :fvn - - addop 'flds', 0b1111 << 12 | 0b00011101, :frn, :fpul - addop 'fldi0', 0b1111 << 12 | 0b10001101, :frn - addop 'fldi1', 0b1111 << 12 | 0b10011101, :frn - - addop 'float', 0b1111 << 12 | 0b000101101, :fpul, :drn - addop 'float', 0b1111 << 12 | 0b00101101, :fpul, :frn - - addop 'fmac', 0b1111 << 12 | 0b1110, :fr0, :frm, :frn - - addop 'fmov', 0b1111 << 12 | 0b0 << 8 | 0b01100, :drm, :drn - addop 'fmov', 0b1111 << 12 | 0b1 << 8 | 0b01100, :drm, :xdn - addop 'fmov', 0b1111 << 12 | 0b01010, :drm, :@rn - addop 'fmov', 0b1111 << 12 | 0b01011, :drm, :@_rn - addop 'fmov', 0b1111 << 12 | 0b00111, :drm, :@r0rn - - addop 'fmov.s', 0b1111 << 12 | 0b1100, :frm, :frn - addop 'fmov.s', 0b1111 << 12 | 0b1010, :frm, :@rn - addop 'fmov.s', 0b1111 << 12 | 0b1011, :frm, :@_rn - addop 'fmov.s', 0b1111 << 12 | 0b0111, :frm, :@r0rn - - addop 'fmov', 0b1111 << 12 | 0b0 << 8 | 0b11100, :xdm, :drn - addop 'fmov', 0b1111 << 12 | 0b1 << 8 | 0b11100, :xdm, :xdn - addop 'fmov', 0b1111 << 12 | 0b11010, :xdm, :@rn - addop 'fmov', 0b1111 << 12 | 0b11011, :xdm, :@_rn - addop 'fmov', 0b1111 << 12 | 0b10111, :xdm, :@r0rn - - addop 'fmov', 0b1111 << 12 | 0b0 << 8 | 0b1000, :@rm, :drn - addop 'fmov', 0b1111 << 12 | 0b0 << 8 | 0b1001, :@rm_, :drn - addop 'fmov', 0b1111 << 12 | 0b0 << 8 | 0b0110, :@r0rm, :drn - - addop 'fmov.s', 0b1111 << 12 | 0b1000, :@rm, :frn - addop 'fmov.s', 0b1111 << 12 | 0b1001, :@rm_, :frn - addop 'fmov.s', 0b1111 << 12 | 0b0110, :@r0rm, :frn - - addop 'fmov', 0b1111 << 12 | 0b1 << 8 | 0b1000, :@rm, :xdn - addop 'fmov', 0b1111 << 12 | 0b1 << 8 | 0b1001, :@rm_, :xdn - addop 'fmov', 0b1111 << 12 | 0b1 << 8 | 0b0110, :@r0rm, :xdn - - addop 'fmul', 0b1111 << 12 | 0b0 << 8 | 0b00010, :drm, :drn - addop 'fmul', 0b1111 << 12 | 0b0010, :frm, :frn - - addop 'fneg', 0b1111 << 12 | 0b001001101, :drn - addop 'fneg', 0b1111 << 12 | 0b01001101, :frn - - addop 'frchg', 0b1111101111111101 - addop 'fschg', 0b1111001111111101 - - addop 'fsqrt', 0b1111 << 12 | 0b001101101, :drn - addop 'fsqrt', 0b1111 << 12 | 0b01101101, :frn - addop 'fsts', 0b1111 << 12 | 0b00001101, :fpul, :frn - - addop 'fsub', 0b1111 << 12 | 0b0 << 8 | 0b00001, :@drm, :drn - addop 'fsub', 0b1111 << 12 | 0b0001, :frm, :frn - - addop 'ftrc', 0b1111 << 12 | 0b000111101, :drn, :fpul - addop 'ftrc', 0b1111 << 12 | 0b00111101, :frn, :fpul - addop 'ftrv', 0b1111 << 12 | 0b0111111101, :xmtrx, :fvn - - addop 'jmp', 0b0100 << 12 | 0b00101011, :rn, :setip, :stopexec, :delay_slot - addop 'jsr', 0b0100 << 12 | 0b00001011, :rn, :setip, :saveip, :stopexec, :delay_slot - - addop 'ldc', 0b0100 << 12 | 0b00011110, :rn, :gbr - addop 'ldc', 0b0100 << 12 | 0b00001110, :rn, :sr # privileged instruction - addop 'ldc', 0b0100 << 12 | 0b00101110, :rn, :vbr # privileged instruction - addop 'ldc', 0b0100 << 12 | 0b00111110, :rn, :ssr # privileged instruction - addop 'ldc', 0b0100 << 12 | 0b01001110, :rn, :spc # privileged instruction - addop 'ldc', 0b0100 << 12 | 0b11111010, :rn, :dbr # privileged instruction - addop 'ldc', 0b0100 << 12 | 0b1 << 7 | 0b1110, :rn, :rn_bank # privileged instruction - - addop 'ldc.l', 0b0100 << 12 | 0b00010111, :@rn_, :gbr - addop 'ldc.l', 0b0100 << 12 | 0b00000111, :@rn_, :sr # privileged instruction - addop 'ldc.l', 0b0100 << 12 | 0b00100111, :@rn_, :vbr # privileged instruction - addop 'ldc.l', 0b0100 << 12 | 0b00110111, :@rn_, :ssr # privileged instruction - addop 'ldc.l', 0b0100 << 12 | 0b01000111, :@rn_, :spc # privileged instruction - addop 'ldc.l', 0b0100 << 12 | 0b11110110, :@rn_, :dbr # privileged instruction - addop 'ldc.l', 0b0100 << 12 | 0b1 << 7 | 0b0111, :@rn_, :rn_bank # privileged instruction - - addop 'lds', 0b0100 << 12 | 0b01101010, :rn, :fpscr - addop 'lds.l', 0b0100 << 12 | 0b01100110, :@rn_, :fpscr - addop 'lds', 0b0100 << 12 | 0b01011010, :rn, :fpul - addop 'lds.l', 0b0100 << 12 | 0b01010110, :@rn_, :fpul - addop 'lds', 0b0100 << 12 | 0b00001010, :rn, :mach - addop 'lds.l', 0b0100 << 12 | 0b00000110, :@rn_, :mach - addop 'lds', 0b0100 << 12 | 0b00011010, :rn, :macl - addop 'lds.l', 0b0100 << 12 | 0b00010110, :@rn_, :macl - addop 'lds', 0b0100 << 12 | 0b00101010, :rn, :pr - addop 'lds.l', 0b0100 << 12 | 0b00100110, :@rn_, :pr - - addop 'ldtlb', 0b0000000000111000 - - addop 'mac.l', 0b0000 << 12 | 0b1111, :@rm_, :@rn_ - addop 'mac.w', 0b0100 << 12 | 0b1111, :@rm_, :@rn_ - - addop 'mov', 0b0110 << 12 | 0b0011, :rm, :rn - addop 'mov', 0b1110 << 12, :s8, :rn - - addop 'mov.b', 0b0010 << 12 | 0b0000, :rm, :@rn - addop 'mov.b', 0b0010 << 12 | 0b0100, :rm, :@_rn - addop 'mov.b', 0b0000 << 12 | 0b0100, :rm, :@r0rn - addop 'mov.b', 0b11000000 << 8, :r0, :@dispgbr - addop 'mov.b', 0b10000000 << 8, :r0, :@dispr0rn - addop 'mov.b', 0b0110 << 12 | 0b0000, :@rm, :rn - addop 'mov.b', 0b0110 << 12 | 0b0100, :@rm_, :rn - addop 'mov.b', 0b0000 << 12 | 0b1100, :@r0rm, :rn - addop 'mov.b', 0b11000100 << 8, :@dispgbr, :r0 - addop 'mov.b', 0b10000100 << 8, :@dispr0rn, :r0 - - addop 'mov.l', 0b0010 << 12 | 0b0010, :rm, :@rn - addop 'mov.l', 0b0010 << 12 | 0b0110, :rm, :@_rn - addop 'mov.l', 0b0000 << 12 | 0b0110, :rm, :@r0rn - addop 'mov.l', 0b11000010 << 8, :r0, :@dispgbr - addop 'mov.l', 0b0001 << 12, :rm, :@disprmrn - addop 'mov.l', 0b0110 << 12 | 0b0010, :@rm, :rn - addop 'mov.l', 0b0110 << 12 | 0b0110, :@rm_, :rn - addop 'mov.l', 0b0000 << 12 | 0b1110, :@r0rm, :rn - addop 'mov.l', 0b11000110 << 8, :@dispgbr, :r0 - addop 'mov.l', 0b1101 << 12, :@disppc, :rn - addop 'mov.l', 0b0101 << 12, :@disprm, :rn - - addop 'mov.w', 0b0010 << 12 | 0b0001, :rm, :@rn - addop 'mov.w', 0b0010 << 12 | 0b0101, :rm, :@_rn - addop 'mov.w', 0b0000 << 12 | 0b0101, :rm, :@r0rn - addop 'mov.w', 0b11000001 << 8, :r0, :@dispgbr - addop 'mov.w', 0b10000001 << 8, :r0, :@dispr0rn - addop 'mov.w', 0b0110 << 12 | 0b0001, :@rm, :rn - addop 'mov.w', 0b0110 << 12 | 0b0101, :@rm_, :rn - addop 'mov.w', 0b0000 << 12 | 0b1101, :@r0rm, :rn - addop 'mov.w', 0b11000101 << 8, :@dispgbr, :r0 - addop 'mov.w', 0b1001 << 12, :@disppc, :rn - addop 'mov.w', 0b10000101 << 8, :@disprm, :r0 - - addop 'mova', 0b11000111 << 8, :disppc, :r0 # calculates an effective address using PC-relative with displacement addressing - addop 'movca.l', 0b0000 << 12 | 11000011, :r0, :@rn # stores the long-word in R0 to memory at the effective address specified in Rn. - - addop 'movt', 0b0000 << 12 | 0b00101001, :rn # copies the T-bit to Rn - - addop 'mul.l', 0b0000 << 12 | 0b0111, :rm, :rn - addop 'muls.w', 0b0010 << 12 | 0b1111, :rm, :rn - addop 'mulu.w', 0b0010 << 12 | 0b1110, :rm, :rn - - addop 'neg', 0b0110 << 12 | 0b1011, :rm, :rn - addop 'negc', 0b0110 << 12 | 0b1010, :rm, :rn - - addop 'nop', 0b0000000000001001 - - addop 'not', 0b0110 << 12 | 0b0111, :rm, :rn - - addop 'ocbi', 0b0000 << 12 | 0b10010011, :@rn # invalidates an operand cache block - addop 'ocbp', 0b0000 << 12 | 0b10100011, :@rn # purges an operand cache block - addop 'ocbwb', 0b0000 << 12 | 0b10110011, :@rn # write-backs an operand cache block - - addop 'or', 0b0010 << 12 | 0b1011, :rm, :rn - addop 'or', 0b11001011 << 8, :i8, :r0 - addop 'or.b', 0b11001111 << 8, :i8, :@r0gbr - - addop 'pref', 0b0000 | 0b10000011, :@rn # indicates a software-directed data prefetch - - addop 'rotcl', 0b0100 | 0b00100100, :rn - addop 'rotcr', 0b0100 | 0b00100101, :rn - addop 'rotl', 0b0100 | 0b00000100, :rn - addop 'rotr', 0b0100 | 0b00000101, :rn - - addop 'rte', 0b0000000000101011, :setip, :stopexec, :delay_slot # returns from an exception or interrupt handling routine, privileged instruction - addop 'rts', 0b0000000000001011, :setip, :stopexec, :delay_slot # returns from a subroutine - - addop 'sets', 0b0000000001011000 - addop 'sett', 0b0000000000011000 - - addop 'shad', 0b0100 << 12 | 0b1100, :rm, :rn - addop 'shal', 0b0100 << 12 | 0b00100000, :rn - addop 'shar', 0b0100 << 12 | 0b00100001, :rn - addop 'shld', 0b0100 << 12 | 0b1101, :rm, :rn - addop 'shll', 0b0100 << 12 | 0b00000000, :rn - addop 'shll2', 0b0100 << 12 | 0b00001000, :rn - addop 'shll8', 0b0100 << 12 | 0b00011000, :rn - addop 'shll16', 0b0100 << 12 | 0b00101000, :rn - addop 'shlr', 0b0100 << 12 | 0b00000001, :rn - addop 'shlr2', 0b0100 << 12 | 0b00001001, :rn - addop 'shlr8', 0b0100 << 12 | 0b00011001, :rn - addop 'shlr16', 0b0100 << 12 | 0b00101001, :rn - - addop 'sleep', 0b0000000000011011 # privileged instruction - - addop 'stc', 0b0000 << 12 | 0b00000010, :sr, :rn - addop 'stc', 0b0000 << 12 | 0b00100010, :vbr, :rn - addop 'stc', 0b0000 << 12 | 0b00110010, :ssr, :rn - addop 'stc', 0b0000 << 12 | 0b01000010, :spc, :rn - addop 'stc', 0b0000 << 12 | 0b00111010, :sgr, :rn - addop 'stc', 0b0000 << 12 | 0b11111010, :dbr, :rn - addop 'stc', 0b0000 << 12 | 0b1 << 7 | 0b0010, :rm_bank, :@_rn - addop 'stc', 0b0000 << 12 | 0b00010010, :gbr, :rn - - addop 'stc.l', 0b0100 << 12 | 0b00000011, :sr, :@_rn - addop 'stc.l', 0b0100 << 12 | 0b00100011, :vbr, :@_rn - addop 'stc.l', 0b0100 << 12 | 0b00110011, :ssr, :@_rn - addop 'stc.l', 0b0100 << 12 | 0b01000011, :spc, :@_rn - addop 'stc.l', 0b0100 << 12 | 0b00110010, :sgr, :@_rn - addop 'stc.l', 0b0100 << 12 | 0b11110010, :dbr, :@_rn - addop 'stc.l', 0b0100 << 12 | 0b1 << 7 | 0b0011, :rm_bank, :@_rn - addop 'stc.l', 0b0100 << 12 | 0b00010011, :gbr, :@_rn - - addop 'sts', 0b0000 << 12 | 0b01101010, :fpscr, :rn - addop 'sts.l', 0b0100 << 12 | 0b01100010, :fpscr, :@_rn - addop 'sts', 0b0000 << 12 | 0b01011010, :fpul, :rn - addop 'sts.l', 0b0100 << 12 | 0b01010010, :fpul, :@_rn - addop 'sts', 0b0000 << 12 | 0b00001010, :mach, :rn - addop 'sts.l', 0b0100 << 12 | 0b00000010, :mach, :@_rn - addop 'sts', 0b0000 << 12 | 0b00011010, :macl, :rn - addop 'sts.l', 0b0100 << 12 | 0b00010010, :macl, :@_rn - addop 'sts', 0b0000 << 12 | 0b00101010, :pr, :rn - addop 'sts.l', 0b0100 << 12 | 0b00100010, :pr, :@_rn - - addop 'sub', 0b0011 << 12 | 0b1000, :rm, :rn - addop 'subc', 0b0011 << 12 | 0b1010, :rm, :rn - addop 'subv', 0b0011 << 12 | 0b1011, :rm, :rn - - addop 'swap.b', 0b0110 << 12 | 0b1000, :rm, :rn - addop 'swap.w', 0b0110 << 12 | 0b1001, :rm, :rn - - addop 'tas.b', 0b0100 << 12 | 0b00011011, :@rn - addop 'trapa', 0b11000011 << 8, :i8, :setip, :stopexec # This instruction causes a pre-execution trap. - - addop 'tst', 0b0010 << 12 | 0b1000, :rm, :rn - addop 'tst', 0b11001000 << 8, :i8, :r0 - addop 'tst.b', 0b11001100 << 8, :i8, :@r0gbr - - addop 'xor', 0b0010 << 12 | 0b1010, :rm, :rn - addop 'xor', 0b11001010 << 8, :i8, :r0 - addop 'xob.b', 0b11001110 << 8, :i8, :@r0gbr - - addop 'xtrct', 0b0010 << 12 | 0b1101, :rm, :rn - end + def addop(name, bin, *args) + o = Opcode.new name, bin + + args.each { |a| + o.args << a if @fields_mask[a] + o.props[a] = true if @valid_props[a] + o.fields[a] = [@fields_mask[a], @fields_shift[a]] if @fields_mask[a] + } + + @opcode_list << o + end + + def init + @opcode_list = [] + + # :@rm_ is used for @Rm+ + # :@_rn is used for @-Rn + # :@r0rm is used for @(R0, Rm) (same for r0rn) + # :@r0gbr is used for @(R0, GBR) + @fields_mask = { + :rm => 0xf, :rn => 0xf, + :@rm => 0xf, :@rn => 0xf, + :@rm_ => 0xf, :@rn_ => 0xf, + :@_rn => 0xf, + + :frm => 0xf, :frn => 0xf, + :xdm => 0x7, :xdn => 0x7, + :drm => 0x7, :drn => 0x7, + :fvm => 0x3, :fvn => 0x3, + + :@r0rm => 0xf, :@r0rn => 0xf, + :rm_bank => 0x7, :rn_bank => 0x7, + + :@disprm => 0xff, :@dispr0rn => 0xff, :@disprmrn => 0xf0f, + :@dispgbr => 0xff, :@disppc => 0xff, + :disp8 => 0xff, :disp12 => 0xfff, :disppc => 0xff, + + :i8 => 0xff, # zero-extendded 8-bit immediate + :s8 => 0xff, # 8-bit displacement s is sign-extended, doubled and added to PC+4 + } + + @fields_shift = { + :rm => 4, :rn => 8, + :@rm => 4, :@rn => 8, + :@rm_ => 4, :@rn_ => 8, + :@_rn => 8, + + :frm => 4, :frn => 8, + :xdm => 5, :xdn => 9, + :drm => 5, :drn => 9, + :fvm => 8, :fvn => 10, + + :@r0rm => 4, :@r0rn => 8, + :rm_bank => 7, :rn_bank => 4, + + :@disprm => 0, :@dispr0rn => 0, :@disprmrn => 0, + :@dispgbr => 0, :@disppc => 0, + :disp8 => 0, :disp12 => 0, :disppc => 0, + + :i8 => 0, + :s8 => 0, + } + + # implicit operands + [:vbr, :gbr, :sr, :ssr, :spc, :sgr, :dbr, :mach, :macl, :pr, :fpul, :fpscr, :dbr, :pc, :r0].each { |a| @fields_mask[a] = @fields_shift[a] = 0 } + + @valid_props[:delay_slot] = true + + addop 'add', 0b0011 << 12 | 0b1100, :rm, :rn + addop 'add', 0b0111 << 12, :s8, :rn + addop 'addc', 0b0011 << 12 | 0b1110, :rm, :rn + addop 'addv', 0b0011 << 12 | 0b1111, :rm, :rn + + addop 'and', 0b0010 << 12 | 0b1001, :rm, :rn + addop 'and', 0b11001001 << 8, :i8, :r0 + addop 'and.b', 0b11001101 << 8, :i8, :@r0gbr + + addop 'bf', 0b10001011 << 8, :disp8, :setip + addop 'bf/s', 0b10001111 << 8, :disp8, :setip, :delay_slot + addop 'bra', 0b1010 << 12, :disp12, :setip, :stopexec, :delay_slot + addop 'braf', 0b0000 << 12 | 0b00100011, :rn, :setip, :stopexec, :delay_slot + addop 'brk', 0b0000000000111011, :stopexec # causes a pre-execution BREAK exception + addop 'bsr', 0b1011 << 12, :disp12, :setip, :saveip, :stopexec, :delay_slot + addop 'bsrf', 0b0000 << 12 | 0b00000011, :rn, :setip, :saveip, :stopexec, :delay_slot + addop 'bt', 0b10001001 << 8, :disp8, :setip + addop 'bt/s', 0b10001101 << 8, :disp8, :setip, :delay_slot + + addop 'clrmac', 0b0000000000101000 + addop 'clrs', 0b0000000001001000 + addop 'clrt', 0b0000000000001000 + + addop 'cmp/eq', 0b0011 << 12 | 0b0000, :rm, :rn + addop 'cmp/eq', 0b10001000 << 8, :s8, :r0 + addop 'cmp/ge', 0b0011 << 12 | 0b0011, :rm, :rn + addop 'cmp/gt', 0b0011 << 12 | 0b0111, :rm, :rn + addop 'cmp/hi', 0b0011 << 12 | 0b0110, :rm, :rn + addop 'cmp/hs', 0b0011 << 12 | 0b0010, :rm, :rn + addop 'cmp/pl', 0b0100 << 12 | 0b00010101, :rn + addop 'cmp/pz', 0b0100 << 12 | 0b00010001, :rn + addop 'cmp/str', 0b0010 << 12 | 0b1100, :rm, :rn + + addop 'div0s', 0b0010 << 12 | 0b0111, :rm, :rn + addop 'div0u', 0b0000000000011001 + addop 'div1', 0b0011 << 12 | 0b0100, :rm, :rn + + addop 'dmuls.l', 0b0011 << 12 | 0b1101, :rm, :rn + addop 'dmulu.l', 0b0011 << 12 | 0b0101, :rm, :rn + + addop 'dt', 0b0100 << 12 | 0b00010000, :rn + + addop 'exts.b', 0b0110 << 12 | 0b1110, :rm, :rn + addop 'exts.w', 0b0110 << 12 | 0b1111, :rm, :rn + addop 'extu.b', 0b0110 << 12 | 0b1100, :rm, :rn + addop 'extu.w', 0b0110 << 12 | 0b1101, :rm, :rn + + # fpu instructions + addop 'fabs', 0b1111 << 12 | 0b001011101, :drn + addop 'fabs', 0b1111 << 12 | 0b01011101, :frn + + addop 'fadd', 0b1111 << 12 | 0b0 << 8 | 0b00000, :drm, :drn + addop 'fadd', 0b1111 << 12 | 0b0000, :frm, :frn + + addop 'fcmp/eq', 0b1111 << 12 | 0b0 << 8 | 0b00100, :drm, :drn + addop 'fcmp/eq', 0b1111 << 12 | 0b0100, :frm, :frn + + addop 'fcmp/gt', 0b1111 << 12 | 0b0 << 8 | 0b00101, :drm, :drn + addop 'fcmp/gt', 0b1111 << 12 | 0b0101, :frm, :frn + + addop 'fcnvds', 0b1111 << 12 | 0b010111101, :drn, :fpul + addop 'fcnvsd', 0b1111 << 12 | 0b010101101, :fpul, :drn + + addop 'fdiv', 0b1111 << 12 | 0b0 << 8 | 0b00011, :drm, :drn + addop 'fdiv', 0b1111 << 12 | 0b0011, :frm, :frn + addop 'fipr', 0b1111 << 12 | 0b11101101, :fvm, :fvn + + addop 'flds', 0b1111 << 12 | 0b00011101, :frn, :fpul + addop 'fldi0', 0b1111 << 12 | 0b10001101, :frn + addop 'fldi1', 0b1111 << 12 | 0b10011101, :frn + + addop 'float', 0b1111 << 12 | 0b000101101, :fpul, :drn + addop 'float', 0b1111 << 12 | 0b00101101, :fpul, :frn + + addop 'fmac', 0b1111 << 12 | 0b1110, :fr0, :frm, :frn + + addop 'fmov', 0b1111 << 12 | 0b0 << 8 | 0b01100, :drm, :drn + addop 'fmov', 0b1111 << 12 | 0b1 << 8 | 0b01100, :drm, :xdn + addop 'fmov', 0b1111 << 12 | 0b01010, :drm, :@rn + addop 'fmov', 0b1111 << 12 | 0b01011, :drm, :@_rn + addop 'fmov', 0b1111 << 12 | 0b00111, :drm, :@r0rn + + addop 'fmov.s', 0b1111 << 12 | 0b1100, :frm, :frn + addop 'fmov.s', 0b1111 << 12 | 0b1010, :frm, :@rn + addop 'fmov.s', 0b1111 << 12 | 0b1011, :frm, :@_rn + addop 'fmov.s', 0b1111 << 12 | 0b0111, :frm, :@r0rn + + addop 'fmov', 0b1111 << 12 | 0b0 << 8 | 0b11100, :xdm, :drn + addop 'fmov', 0b1111 << 12 | 0b1 << 8 | 0b11100, :xdm, :xdn + addop 'fmov', 0b1111 << 12 | 0b11010, :xdm, :@rn + addop 'fmov', 0b1111 << 12 | 0b11011, :xdm, :@_rn + addop 'fmov', 0b1111 << 12 | 0b10111, :xdm, :@r0rn + + addop 'fmov', 0b1111 << 12 | 0b0 << 8 | 0b1000, :@rm, :drn + addop 'fmov', 0b1111 << 12 | 0b0 << 8 | 0b1001, :@rm_, :drn + addop 'fmov', 0b1111 << 12 | 0b0 << 8 | 0b0110, :@r0rm, :drn + + addop 'fmov.s', 0b1111 << 12 | 0b1000, :@rm, :frn + addop 'fmov.s', 0b1111 << 12 | 0b1001, :@rm_, :frn + addop 'fmov.s', 0b1111 << 12 | 0b0110, :@r0rm, :frn + + addop 'fmov', 0b1111 << 12 | 0b1 << 8 | 0b1000, :@rm, :xdn + addop 'fmov', 0b1111 << 12 | 0b1 << 8 | 0b1001, :@rm_, :xdn + addop 'fmov', 0b1111 << 12 | 0b1 << 8 | 0b0110, :@r0rm, :xdn + + addop 'fmul', 0b1111 << 12 | 0b0 << 8 | 0b00010, :drm, :drn + addop 'fmul', 0b1111 << 12 | 0b0010, :frm, :frn + + addop 'fneg', 0b1111 << 12 | 0b001001101, :drn + addop 'fneg', 0b1111 << 12 | 0b01001101, :frn + + addop 'frchg', 0b1111101111111101 + addop 'fschg', 0b1111001111111101 + + addop 'fsqrt', 0b1111 << 12 | 0b001101101, :drn + addop 'fsqrt', 0b1111 << 12 | 0b01101101, :frn + addop 'fsts', 0b1111 << 12 | 0b00001101, :fpul, :frn + + addop 'fsub', 0b1111 << 12 | 0b0 << 8 | 0b00001, :@drm, :drn + addop 'fsub', 0b1111 << 12 | 0b0001, :frm, :frn + + addop 'ftrc', 0b1111 << 12 | 0b000111101, :drn, :fpul + addop 'ftrc', 0b1111 << 12 | 0b00111101, :frn, :fpul + addop 'ftrv', 0b1111 << 12 | 0b0111111101, :xmtrx, :fvn + + addop 'jmp', 0b0100 << 12 | 0b00101011, :rn, :setip, :stopexec, :delay_slot + addop 'jsr', 0b0100 << 12 | 0b00001011, :rn, :setip, :saveip, :stopexec, :delay_slot + + addop 'ldc', 0b0100 << 12 | 0b00011110, :rn, :gbr + addop 'ldc', 0b0100 << 12 | 0b00001110, :rn, :sr # privileged instruction + addop 'ldc', 0b0100 << 12 | 0b00101110, :rn, :vbr # privileged instruction + addop 'ldc', 0b0100 << 12 | 0b00111110, :rn, :ssr # privileged instruction + addop 'ldc', 0b0100 << 12 | 0b01001110, :rn, :spc # privileged instruction + addop 'ldc', 0b0100 << 12 | 0b11111010, :rn, :dbr # privileged instruction + addop 'ldc', 0b0100 << 12 | 0b1 << 7 | 0b1110, :rn, :rn_bank # privileged instruction + + addop 'ldc.l', 0b0100 << 12 | 0b00010111, :@rn_, :gbr + addop 'ldc.l', 0b0100 << 12 | 0b00000111, :@rn_, :sr # privileged instruction + addop 'ldc.l', 0b0100 << 12 | 0b00100111, :@rn_, :vbr # privileged instruction + addop 'ldc.l', 0b0100 << 12 | 0b00110111, :@rn_, :ssr # privileged instruction + addop 'ldc.l', 0b0100 << 12 | 0b01000111, :@rn_, :spc # privileged instruction + addop 'ldc.l', 0b0100 << 12 | 0b11110110, :@rn_, :dbr # privileged instruction + addop 'ldc.l', 0b0100 << 12 | 0b1 << 7 | 0b0111, :@rn_, :rn_bank # privileged instruction + + addop 'lds', 0b0100 << 12 | 0b01101010, :rn, :fpscr + addop 'lds.l', 0b0100 << 12 | 0b01100110, :@rn_, :fpscr + addop 'lds', 0b0100 << 12 | 0b01011010, :rn, :fpul + addop 'lds.l', 0b0100 << 12 | 0b01010110, :@rn_, :fpul + addop 'lds', 0b0100 << 12 | 0b00001010, :rn, :mach + addop 'lds.l', 0b0100 << 12 | 0b00000110, :@rn_, :mach + addop 'lds', 0b0100 << 12 | 0b00011010, :rn, :macl + addop 'lds.l', 0b0100 << 12 | 0b00010110, :@rn_, :macl + addop 'lds', 0b0100 << 12 | 0b00101010, :rn, :pr + addop 'lds.l', 0b0100 << 12 | 0b00100110, :@rn_, :pr + + addop 'ldtlb', 0b0000000000111000 + + addop 'mac.l', 0b0000 << 12 | 0b1111, :@rm_, :@rn_ + addop 'mac.w', 0b0100 << 12 | 0b1111, :@rm_, :@rn_ + + addop 'mov', 0b0110 << 12 | 0b0011, :rm, :rn + addop 'mov', 0b1110 << 12, :s8, :rn + + addop 'mov.b', 0b0010 << 12 | 0b0000, :rm, :@rn + addop 'mov.b', 0b0010 << 12 | 0b0100, :rm, :@_rn + addop 'mov.b', 0b0000 << 12 | 0b0100, :rm, :@r0rn + addop 'mov.b', 0b11000000 << 8, :r0, :@dispgbr + addop 'mov.b', 0b10000000 << 8, :r0, :@dispr0rn + addop 'mov.b', 0b0110 << 12 | 0b0000, :@rm, :rn + addop 'mov.b', 0b0110 << 12 | 0b0100, :@rm_, :rn + addop 'mov.b', 0b0000 << 12 | 0b1100, :@r0rm, :rn + addop 'mov.b', 0b11000100 << 8, :@dispgbr, :r0 + addop 'mov.b', 0b10000100 << 8, :@dispr0rn, :r0 + + addop 'mov.l', 0b0010 << 12 | 0b0010, :rm, :@rn + addop 'mov.l', 0b0010 << 12 | 0b0110, :rm, :@_rn + addop 'mov.l', 0b0000 << 12 | 0b0110, :rm, :@r0rn + addop 'mov.l', 0b11000010 << 8, :r0, :@dispgbr + addop 'mov.l', 0b0001 << 12, :rm, :@disprmrn + addop 'mov.l', 0b0110 << 12 | 0b0010, :@rm, :rn + addop 'mov.l', 0b0110 << 12 | 0b0110, :@rm_, :rn + addop 'mov.l', 0b0000 << 12 | 0b1110, :@r0rm, :rn + addop 'mov.l', 0b11000110 << 8, :@dispgbr, :r0 + addop 'mov.l', 0b1101 << 12, :@disppc, :rn + addop 'mov.l', 0b0101 << 12, :@disprm, :rn + + addop 'mov.w', 0b0010 << 12 | 0b0001, :rm, :@rn + addop 'mov.w', 0b0010 << 12 | 0b0101, :rm, :@_rn + addop 'mov.w', 0b0000 << 12 | 0b0101, :rm, :@r0rn + addop 'mov.w', 0b11000001 << 8, :r0, :@dispgbr + addop 'mov.w', 0b10000001 << 8, :r0, :@dispr0rn + addop 'mov.w', 0b0110 << 12 | 0b0001, :@rm, :rn + addop 'mov.w', 0b0110 << 12 | 0b0101, :@rm_, :rn + addop 'mov.w', 0b0000 << 12 | 0b1101, :@r0rm, :rn + addop 'mov.w', 0b11000101 << 8, :@dispgbr, :r0 + addop 'mov.w', 0b1001 << 12, :@disppc, :rn + addop 'mov.w', 0b10000101 << 8, :@disprm, :r0 + + addop 'mova', 0b11000111 << 8, :disppc, :r0 # calculates an effective address using PC-relative with displacement addressing + addop 'movca.l', 0b0000 << 12 | 11000011, :r0, :@rn # stores the long-word in R0 to memory at the effective address specified in Rn. + + addop 'movt', 0b0000 << 12 | 0b00101001, :rn # copies the T-bit to Rn + + addop 'mul.l', 0b0000 << 12 | 0b0111, :rm, :rn + addop 'muls.w', 0b0010 << 12 | 0b1111, :rm, :rn + addop 'mulu.w', 0b0010 << 12 | 0b1110, :rm, :rn + + addop 'neg', 0b0110 << 12 | 0b1011, :rm, :rn + addop 'negc', 0b0110 << 12 | 0b1010, :rm, :rn + + addop 'nop', 0b0000000000001001 + + addop 'not', 0b0110 << 12 | 0b0111, :rm, :rn + + addop 'ocbi', 0b0000 << 12 | 0b10010011, :@rn # invalidates an operand cache block + addop 'ocbp', 0b0000 << 12 | 0b10100011, :@rn # purges an operand cache block + addop 'ocbwb', 0b0000 << 12 | 0b10110011, :@rn # write-backs an operand cache block + + addop 'or', 0b0010 << 12 | 0b1011, :rm, :rn + addop 'or', 0b11001011 << 8, :i8, :r0 + addop 'or.b', 0b11001111 << 8, :i8, :@r0gbr + + addop 'pref', 0b0000 | 0b10000011, :@rn # indicates a software-directed data prefetch + + addop 'rotcl', 0b0100 | 0b00100100, :rn + addop 'rotcr', 0b0100 | 0b00100101, :rn + addop 'rotl', 0b0100 | 0b00000100, :rn + addop 'rotr', 0b0100 | 0b00000101, :rn + + addop 'rte', 0b0000000000101011, :setip, :stopexec, :delay_slot # returns from an exception or interrupt handling routine, privileged instruction + addop 'rts', 0b0000000000001011, :setip, :stopexec, :delay_slot # returns from a subroutine + + addop 'sets', 0b0000000001011000 + addop 'sett', 0b0000000000011000 + + addop 'shad', 0b0100 << 12 | 0b1100, :rm, :rn + addop 'shal', 0b0100 << 12 | 0b00100000, :rn + addop 'shar', 0b0100 << 12 | 0b00100001, :rn + addop 'shld', 0b0100 << 12 | 0b1101, :rm, :rn + addop 'shll', 0b0100 << 12 | 0b00000000, :rn + addop 'shll2', 0b0100 << 12 | 0b00001000, :rn + addop 'shll8', 0b0100 << 12 | 0b00011000, :rn + addop 'shll16', 0b0100 << 12 | 0b00101000, :rn + addop 'shlr', 0b0100 << 12 | 0b00000001, :rn + addop 'shlr2', 0b0100 << 12 | 0b00001001, :rn + addop 'shlr8', 0b0100 << 12 | 0b00011001, :rn + addop 'shlr16', 0b0100 << 12 | 0b00101001, :rn + + addop 'sleep', 0b0000000000011011 # privileged instruction + + addop 'stc', 0b0000 << 12 | 0b00000010, :sr, :rn + addop 'stc', 0b0000 << 12 | 0b00100010, :vbr, :rn + addop 'stc', 0b0000 << 12 | 0b00110010, :ssr, :rn + addop 'stc', 0b0000 << 12 | 0b01000010, :spc, :rn + addop 'stc', 0b0000 << 12 | 0b00111010, :sgr, :rn + addop 'stc', 0b0000 << 12 | 0b11111010, :dbr, :rn + addop 'stc', 0b0000 << 12 | 0b1 << 7 | 0b0010, :rm_bank, :@_rn + addop 'stc', 0b0000 << 12 | 0b00010010, :gbr, :rn + + addop 'stc.l', 0b0100 << 12 | 0b00000011, :sr, :@_rn + addop 'stc.l', 0b0100 << 12 | 0b00100011, :vbr, :@_rn + addop 'stc.l', 0b0100 << 12 | 0b00110011, :ssr, :@_rn + addop 'stc.l', 0b0100 << 12 | 0b01000011, :spc, :@_rn + addop 'stc.l', 0b0100 << 12 | 0b00110010, :sgr, :@_rn + addop 'stc.l', 0b0100 << 12 | 0b11110010, :dbr, :@_rn + addop 'stc.l', 0b0100 << 12 | 0b1 << 7 | 0b0011, :rm_bank, :@_rn + addop 'stc.l', 0b0100 << 12 | 0b00010011, :gbr, :@_rn + + addop 'sts', 0b0000 << 12 | 0b01101010, :fpscr, :rn + addop 'sts.l', 0b0100 << 12 | 0b01100010, :fpscr, :@_rn + addop 'sts', 0b0000 << 12 | 0b01011010, :fpul, :rn + addop 'sts.l', 0b0100 << 12 | 0b01010010, :fpul, :@_rn + addop 'sts', 0b0000 << 12 | 0b00001010, :mach, :rn + addop 'sts.l', 0b0100 << 12 | 0b00000010, :mach, :@_rn + addop 'sts', 0b0000 << 12 | 0b00011010, :macl, :rn + addop 'sts.l', 0b0100 << 12 | 0b00010010, :macl, :@_rn + addop 'sts', 0b0000 << 12 | 0b00101010, :pr, :rn + addop 'sts.l', 0b0100 << 12 | 0b00100010, :pr, :@_rn + + addop 'sub', 0b0011 << 12 | 0b1000, :rm, :rn + addop 'subc', 0b0011 << 12 | 0b1010, :rm, :rn + addop 'subv', 0b0011 << 12 | 0b1011, :rm, :rn + + addop 'swap.b', 0b0110 << 12 | 0b1000, :rm, :rn + addop 'swap.w', 0b0110 << 12 | 0b1001, :rm, :rn + + addop 'tas.b', 0b0100 << 12 | 0b00011011, :@rn + addop 'trapa', 0b11000011 << 8, :i8, :setip, :stopexec # This instruction causes a pre-execution trap. + + addop 'tst', 0b0010 << 12 | 0b1000, :rm, :rn + addop 'tst', 0b11001000 << 8, :i8, :r0 + addop 'tst.b', 0b11001100 << 8, :i8, :@r0gbr + + addop 'xor', 0b0010 << 12 | 0b1010, :rm, :rn + addop 'xor', 0b11001010 << 8, :i8, :r0 + addop 'xob.b', 0b11001110 << 8, :i8, :@r0gbr + + addop 'xtrct', 0b0010 << 12 | 0b1101, :rm, :rn + end end diff --git a/lib/metasm/metasm/cpu/x86_64/compile_c.rb b/lib/metasm/metasm/cpu/x86_64/compile_c.rb index a8581ba7a7..6ffb5964f7 100644 --- a/lib/metasm/metasm/cpu/x86_64/compile_c.rb +++ b/lib/metasm/metasm/cpu/x86_64/compile_c.rb @@ -10,1022 +10,1025 @@ require 'metasm/compile_c' module Metasm class X86_64 class CCompiler < C::Compiler - # holds compiler state information for a function - # registers are saved as register number (see Reg) - class State - # variable => offset from ebp (::Integer or CExpression) - attr_accessor :offset - # the current function - attr_accessor :func - # register => CExpression - attr_accessor :cache - # array of register values used in the function (to save/restore at prolog/epilog) - attr_accessor :dirty - # the array of register values currently not available - attr_accessor :used - # the array of args in use (reg/modrm) the reg dependencies are in +used+ - attr_accessor :inuse - # variable => register for current scope (variable never on the stack) - # bound registers are also in +used+ - attr_accessor :bound - # list of reg values that are used as func args in current ABI - attr_accessor :regargs - # stack space reserved for subfunction in ABI - attr_accessor :args_space - # list of reg values that are not kept across function call - attr_accessor :abi_flushregs_call - # list of regs we can trash without restoring them - attr_accessor :abi_trashregs + # holds compiler state information for a function + # registers are saved as register number (see Reg) + class State + # variable => offset from ebp (::Integer or CExpression) + attr_accessor :offset + # the current function + attr_accessor :func + # register => CExpression + attr_accessor :cache + # array of register values used in the function (to save/restore at prolog/epilog) + attr_accessor :dirty + # the array of register values currently not available + attr_accessor :used + # the array of args in use (reg/modrm) the reg dependencies are in +used+ + attr_accessor :inuse + # variable => register for current scope (variable never on the stack) + # bound registers are also in +used+ + attr_accessor :bound + # list of reg values that are used as func args in current ABI + attr_accessor :regargs + # stack space reserved for subfunction in ABI + attr_accessor :args_space + # list of reg values that are not kept across function call + attr_accessor :abi_flushregs_call + # list of regs we can trash without restoring them + attr_accessor :abi_trashregs - # +used+ includes ebp if true - # nil if ebp is not reserved for stack variable addressing - # Reg if used - attr_accessor :saved_rbp + # +used+ includes ebp if true + # nil if ebp is not reserved for stack variable addressing + # Reg if used + attr_accessor :saved_rbp - def initialize(func) - @func = func - @offset = {} - @cache = {} - @dirty = [] - @used = [4] # rsp is always in use - @inuse = [] - @bound = {} - @regargs = [] - @args_space = 0 - @abi_flushregs_call = [0, 1, 2, 6, 7, 8, 9, 10, 11] - @abi_trashregs = @abi_flushregs_call.dup - end - end + def initialize(func) + @func = func + @offset = {} + @cache = {} + @dirty = [] + @used = [4] # rsp is always in use + @inuse = [] + @bound = {} + @regargs = [] + @args_space = 0 + @abi_flushregs_call = [0, 1, 2, 6, 7, 8, 9, 10, 11] + @abi_trashregs = @abi_flushregs_call.dup + end + end - # some address - class Address - attr_accessor :modrm, :target - def initialize(modrm, target=nil) - @modrm, @target = modrm, target - end - def sz; @modrm.adsz end - def to_s; "#" end - end + # some address + class Address + attr_accessor :modrm, :target + def initialize(modrm, target=nil) + @modrm, @target = modrm, target + end + def sz; @modrm.adsz end + def to_s; "#" end + end - def initialize(*a) - super(*a) - @cpusz = 64 - @regnummax = 15 - end + def initialize(*a) + super(*a) + @cpusz = 64 + @regnummax = 15 + end - # shortcut to add an instruction to the source - def instr(name, *args) - # XXX parse_postfix ? - @source << Instruction.new(@exeformat.cpu, name, args) - end + # shortcut to add an instruction to the source + def instr(name, *args) + # XXX parse_postfix ? + @source << Instruction.new(@exeformat.cpu, name, args) + end - # returns an available register, tries to find one not in @state.cache - # do not use with sz==8 (aliasing ah=>esp) - # does not put it in @state.inuse - def findreg(sz = @cpusz) - caching = @state.cache.keys.grep(Reg).map { |r| r.val } - if not regval = (@state.abi_trashregs - @state.used - caching).first || - ([*0..@regnummax] - @state.used).first - raise 'need more registers! (or a better compiler?)' - end - getreg(regval, sz) - end + # returns an available register, tries to find one not in @state.cache + # do not use with sz==8 (aliasing ah=>esp) + # does not put it in @state.inuse + def findreg(sz = @cpusz) + caching = @state.cache.keys.grep(Reg).map { |r| r.val } + if not regval = (@state.abi_trashregs - @state.used - caching).first || + ([*0..@regnummax] - @state.used).first + raise 'need more registers! (or a better compiler?)' + end + getreg(regval, sz) + end - # returns a Reg from a regval, mark it as dirty, flush old cache dependencies - def getreg(regval, sz=@cpusz) - flushcachereg(regval) - @state.dirty |= [regval] - Reg.new(regval, sz) - end + # returns a Reg from a regval, mark it as dirty, flush old cache dependencies + def getreg(regval, sz=@cpusz) + flushcachereg(regval) + @state.dirty |= [regval] + Reg.new(regval, sz) + end - # remove the cache keys that depends on the register - def flushcachereg(regval) - @state.cache.delete_if { |e, val| - case e - when Reg; e.val == regval - when Address; e = e.modrm ; redo - when ModRM; e.b && (e.b.val == regval) or e.i && (e.i.val == regval) - end - } - end + # remove the cache keys that depends on the register + def flushcachereg(regval) + @state.cache.delete_if { |e, val| + case e + when Reg; e.val == regval + when Address; e = e.modrm ; redo + when ModRM; e.b && (e.b.val == regval) or e.i && (e.i.val == regval) + end + } + end - # removes elements from @state.inuse, free @state.used if unreferenced - # must be the exact object present in inuse - def unuse(*vals) - vals.each { |val| - val = val.modrm if val.kind_of? Address - @state.inuse.delete val - } - # XXX cache exempt - exempt = @state.bound.values.map { |r| r.val } - exempt << 4 - exempt << 5 if @state.saved_rbp - @state.used.delete_if { |regval| - next if exempt.include? regval - not @state.inuse.find { |val| - case val - when Reg; val.val == regval - when ModRM; (val.b and val.b.val == regval) or (val.i and val.i.val == regval) - else raise 'internal error - inuse ' + val.inspect - end - } - } - end + # removes elements from @state.inuse, free @state.used if unreferenced + # must be the exact object present in inuse + def unuse(*vals) + vals.each { |val| + val = val.modrm if val.kind_of? Address + @state.inuse.delete val + } + # XXX cache exempt + exempt = @state.bound.values.map { |r| r.val } + exempt << 4 + exempt << 5 if @state.saved_rbp + @state.used.delete_if { |regval| + next if exempt.include? regval + not @state.inuse.find { |val| + case val + when Reg; val.val == regval + when ModRM; (val.b and val.b.val == regval) or (val.i and val.i.val == regval) + else raise 'internal error - inuse ' + val.inspect + end + } + } + end - # marks an arg as in use, returns the arg - def inuse(v) - case v - when Reg; @state.used |= [v.val] - when ModRM - @state.used |= [v.i.val] if v.i - @state.used |= [v.b.val] if v.b - when Address; inuse v.modrm ; return v - else return v - end - @state.inuse |= [v] - v - end + # marks an arg as in use, returns the arg + def inuse(v) + case v + when Reg; @state.used |= [v.val] + when ModRM + @state.used |= [v.i.val] if v.i + @state.used |= [v.b.val] if v.b + when Address; inuse v.modrm ; return v + else return v + end + @state.inuse |= [v] + v + end - # returns a variable storage (ModRM for stack/global, Reg/Composite for register-bound) - def findvar(var) - if ret = @state.bound[var] - return ret - end + # returns a variable storage (ModRM for stack/global, Reg/Composite for register-bound) + def findvar(var) + if ret = @state.bound[var] + return ret + end - if ret = @state.cache.index(var) - ret = ret.dup - inuse ret - return ret - end + if ret = @state.cache.index(var) + ret = ret.dup + inuse ret + return ret + end - sz = 8*sizeof(var) rescue nil # extern char foo[]; + sz = 8*sizeof(var) rescue nil # extern char foo[]; - case off = @state.offset[var] - when C::CExpression - # stack, dynamic address - # TODO - # no need to update state.cache here, never recursive - v = raise "find dynamic addr of #{var.name}" - when ::Integer - # stack - # TODO -fomit-frame-pointer ( => state.cache dependant on stack_offset... ) - v = ModRM.new(@cpusz, sz, nil, nil, @state.saved_rbp, Expression[-off]) - when nil - # global - if @exeformat.cpu.generate_PIC - v = ModRM.new(@cpusz, sz, nil, nil, Reg.from_str('rip'), Expression[var.name, :-, '$_']) - else - v = ModRM.new(@cpusz, sz, nil, nil, nil, Expression[var.name]) - end - end + case off = @state.offset[var] + when C::CExpression + # stack, dynamic address + # TODO + # no need to update state.cache here, never recursive + v = raise "find dynamic addr of #{var.name}" + when ::Integer + # stack + # TODO -fomit-frame-pointer ( => state.cache dependant on stack_offset... ) + v = ModRM.new(@cpusz, sz, nil, nil, @state.saved_rbp, Expression[-off]) + when nil + # global + if @exeformat.cpu.generate_PIC + v = ModRM.new(@cpusz, sz, nil, nil, Reg.from_str('rip'), Expression[var.name, :-, '$_']) + else + v = ModRM.new(@cpusz, sz, nil, nil, nil, Expression[var.name]) + end + end - case var.type - when C::Array; inuse Address.new(v) - else inuse v - end - end + case var.type + when C::Array; inuse Address.new(v) + else inuse v + end + end - # resolves the Address to Reg/Expr (may encode an 'lea') - def resolve_address(e) - r = e.modrm - unuse e - if r.imm and not r.b and not r.i - reg = r.imm - elsif not r.imm and ((not r.b and r.s == 1) or not r.i) - reg = r.b || r.i - elsif reg = @state.cache.index(e) - reg = reg.dup - else - reg = findreg - r.sz = reg.sz - instr 'lea', reg, r - end - inuse reg - @state.cache[reg] = e - reg - end + # resolves the Address to Reg/Expr (may encode an 'lea') + def resolve_address(e) + r = e.modrm + unuse e + if r.imm and not r.b and not r.i + reg = r.imm + elsif not r.imm and ((not r.b and r.s == 1) or not r.i) + reg = r.b || r.i + elsif reg = @state.cache.index(e) + reg = reg.dup + else + reg = findreg + r.sz = reg.sz + instr 'lea', reg, r + end + inuse reg + @state.cache[reg] = e + reg + end - # copies the arg e to a volatile location (register/composite) if it is not already - # unuses the old storage - # may return a register bigger than the type size (eg __int8 are stored in full reg size) - def make_volatile(e, type, rsz=@cpusz) - if e.kind_of? ModRM or @state.bound.index(e) - if type.integral? or type.pointer? - oldval = @state.cache[e] - unuse e - sz = typesize[type.pointer? ? :ptr : type.name]*8 - if sz < @cpusz or sz < rsz or e.sz < rsz - e2 = inuse findreg(rsz) - op = ((type.specifier == :unsigned) ? 'movzx' : 'movsx') - op = 'mov' if e.sz == e2.sz - if e2.sz == 64 and e.sz == 32 - if op == 'movsx' - instr 'movsxd', e2, e - else - instr 'mov', Reg.new(e2.val, 32), e - end - else - instr op, e2, e - end - else - e2 = inuse findreg(sz) - instr 'mov', e2, e - end - @state.cache[e2] = oldval if oldval and e.kind_of? ModRM - e2 - elsif type.float? - raise 'float unhandled' - else raise "cannot cast #{e} to #{type}" - end - elsif e.kind_of? Address - make_volatile resolve_address(e), type, rsz - elsif e.kind_of? Expression - if type.integral? or type.pointer? - e2 = inuse findreg - instr 'mov', e2, e - e2 - elsif type.float? - raise 'float unhandled' - else raise "cannot cast #{e} to #{type}" - end - else - e - end - end + # copies the arg e to a volatile location (register/composite) if it is not already + # unuses the old storage + # may return a register bigger than the type size (eg __int8 are stored in full reg size) + def make_volatile(e, type, rsz=@cpusz) + if e.kind_of? ModRM or @state.bound.index(e) + if type.integral? or type.pointer? + oldval = @state.cache[e] + unuse e + sz = typesize[type.pointer? ? :ptr : type.name]*8 + if sz < @cpusz or sz < rsz or e.sz < rsz + e2 = inuse findreg(rsz) + op = ((type.specifier == :unsigned) ? 'movzx' : 'movsx') + op = 'mov' if e.sz == e2.sz + if e2.sz == 64 and e.sz == 32 + if op == 'movsx' + instr 'movsxd', e2, e + else + instr 'mov', Reg.new(e2.val, 32), e + end + else + instr op, e2, e + end + else + e2 = inuse findreg(sz) + instr 'mov', e2, e + end + @state.cache[e2] = oldval if oldval and e.kind_of? ModRM + e2 + elsif type.float? + raise 'float unhandled' + else raise "cannot cast #{e} to #{type}" + end + elsif e.kind_of? Address + make_volatile resolve_address(e), type, rsz + elsif e.kind_of? Expression + if type.integral? or type.pointer? + e2 = inuse findreg + instr 'mov', e2, e + e2 + elsif type.float? + raise 'float unhandled' + else raise "cannot cast #{e} to #{type}" + end + else + e + end + end - # takes an argument, if the argument is an integer that does not fit in an i32, moves it to a temp reg - # the reg is unused, so use this only right when generating the offending instr (eg cmp, add..) - def i_to_i32(v) - if v.kind_of? Expression and i = v.reduce and i.kind_of?(Integer) - i &= 0xffff_ffff_ffff_ffff - if i <= 0x7fff_ffff - elsif i >= (1<<64)-0x8000_0000 - v = Expression[Expression.make_signed(i, 64)] - else - v = make_volatile(v, C::BaseType.new(:int)) - unuse v - end - end - v - end + # takes an argument, if the argument is an integer that does not fit in an i32, moves it to a temp reg + # the reg is unused, so use this only right when generating the offending instr (eg cmp, add..) + def i_to_i32(v) + if v.kind_of? Expression and i = v.reduce and i.kind_of?(Integer) + i &= 0xffff_ffff_ffff_ffff + if i <= 0x7fff_ffff + elsif i >= (1<<64)-0x8000_0000 + v = Expression[Expression.make_signed(i, 64)] + else + v = make_volatile(v, C::BaseType.new(:int)) + unuse v + end + end + v + end - # returns the instruction suffix for a comparison operator - def getcc(op, type) - case op - when :'=='; 'z' - when :'!='; 'nz' - when :'<' ; 'b' - when :'>' ; 'a' - when :'<='; 'be' - when :'>='; 'ae' - else raise "bad comparison op #{op}" - end.tr((type.specifier == :unsigned ? '' : 'ab'), 'gl') - end + # returns the instruction suffix for a comparison operator + def getcc(op, type) + case op + when :'=='; 'z' + when :'!='; 'nz' + when :'<' ; 'b' + when :'>' ; 'a' + when :'<='; 'be' + when :'>='; 'ae' + else raise "bad comparison op #{op}" + end.tr((type.specifier == :unsigned ? '' : 'ab'), 'gl') + end - # compiles a c expression, returns an X64 instruction argument - def c_cexpr_inner(expr) - case expr - when ::Integer; Expression[expr] - when C::Variable; findvar(expr) - when C::CExpression - if not expr.lexpr or not expr.rexpr - inuse c_cexpr_inner_nol(expr) - else - inuse c_cexpr_inner_l(expr) - end - when C::Label; findvar(C::Variable.new(expr.name, C::Array.new(C::BaseType.new(:void), 1))) - else puts "c_ce_i: unsupported #{expr}" if $VERBOSE - end - end + # compiles a c expression, returns an X64 instruction argument + def c_cexpr_inner(expr) + case expr + when ::Integer; Expression[expr] + when C::Variable; findvar(expr) + when C::CExpression + if not expr.lexpr or not expr.rexpr + inuse c_cexpr_inner_nol(expr) + else + inuse c_cexpr_inner_l(expr) + end + when C::Label; findvar(C::Variable.new(expr.name, C::Array.new(C::BaseType.new(:void), 1))) + else puts "c_ce_i: unsupported #{expr}" if $VERBOSE + end + end - # compile a CExpression with no lexpr - def c_cexpr_inner_nol(expr) - case expr.op - when nil - r = c_cexpr_inner(expr.rexpr) - if (expr.rexpr.kind_of? C::CExpression or expr.rexpr.kind_of? C::Variable) and - expr.type.kind_of? C::BaseType and expr.rexpr.type.kind_of? C::BaseType - r = c_cexpr_inner_cast(expr, r) - end - r - when :+ - c_cexpr_inner(expr.rexpr) - when :- - r = c_cexpr_inner(expr.rexpr) - r = make_volatile(r, expr.type) - if expr.type.integral? or expr.type.pointer? - instr 'neg', r - elsif expr.type.float? - raise 'float unhandled' - else raise - end - r - when :'++', :'--' - r = c_cexpr_inner(expr.rexpr) - inc = true if expr.op == :'++' - if expr.type.integral? or expr.type.pointer? - op = (inc ? 'inc' : 'dec') - instr op, r - elsif expr.type.float? - raise 'float unhandled' - end - r - when :& - raise 'bad precompiler ' + expr.to_s if not expr.rexpr.kind_of? C::Variable - @state.cache.each { |r_, c| - return inuse(r_) if c.kind_of? Address and c.target == expr.rexpr - } - r = c_cexpr_inner(expr.rexpr) - raise 'bad lvalue' if not r.kind_of? ModRM - unuse r - r = Address.new(r) - inuse r - r.target = expr.rexpr - r - when :* - expr.rexpr.type.name = :ptr if expr.rexpr.kind_of? C::CExpression and expr.rexpr.type.kind_of? C::BaseType and typesize[expr.rexpr.type.name] == typesize[:ptr] # hint to use Address - e = c_cexpr_inner(expr.rexpr) - sz = 8*sizeof(expr) - case e - when Address - unuse e - e = e.modrm.dup - e.sz = sz - inuse e - when ModRM; e = make_volatile(e, expr.rexpr.type) - end - case e - when Reg; unuse e ; e = inuse ModRM.new(@cpusz, sz, nil, nil, e, nil) - when Expression; e = inuse ModRM.new(@cpusz, sz, nil, nil, nil, e) - end - e - when :'!' - r = c_cexpr_inner(expr.rexpr) - r = make_volatile(r, expr.rexpr.type) - if expr.rexpr.type.integral? or expr.type.pointer? - r = make_volatile(r, expr.rexpr.type) - instr 'test', r, r - elsif expr.rexpr.type.float? - raise 'float unhandled' - else raise 'bad comparison ' + expr.to_s - end - instr 'setz', Reg.new(r.val, 8) - instr 'and', r, Expression[1] - r - else raise 'mmh ? ' + expr.to_s - end - end + # compile a CExpression with no lexpr + def c_cexpr_inner_nol(expr) + case expr.op + when nil + r = c_cexpr_inner(expr.rexpr) + if (expr.rexpr.kind_of? C::CExpression or expr.rexpr.kind_of? C::Variable) and + expr.type.kind_of? C::BaseType and expr.rexpr.type.kind_of? C::BaseType + r = c_cexpr_inner_cast(expr, r) + end + r + when :+ + c_cexpr_inner(expr.rexpr) + when :- + r = c_cexpr_inner(expr.rexpr) + r = make_volatile(r, expr.type) + if expr.type.integral? or expr.type.pointer? + instr 'neg', r + elsif expr.type.float? + raise 'float unhandled' + else raise + end + r + when :'++', :'--' + r = c_cexpr_inner(expr.rexpr) + inc = true if expr.op == :'++' + if expr.type.integral? or expr.type.pointer? + op = (inc ? 'inc' : 'dec') + instr op, r + elsif expr.type.float? + raise 'float unhandled' + end + r + when :& + raise 'bad precompiler ' + expr.to_s if not expr.rexpr.kind_of? C::Variable + @state.cache.each { |r_, c| + return inuse(r_) if c.kind_of? Address and c.target == expr.rexpr + } + r = c_cexpr_inner(expr.rexpr) + raise 'bad lvalue' if not r.kind_of? ModRM + unuse r + r = Address.new(r) + inuse r + r.target = expr.rexpr + r + when :* + expr.rexpr.type.name = :ptr if expr.rexpr.kind_of? C::CExpression and expr.rexpr.type.kind_of? C::BaseType and typesize[expr.rexpr.type.name] == typesize[:ptr] # hint to use Address + e = c_cexpr_inner(expr.rexpr) + sz = 8*sizeof(expr) + case e + when Address + unuse e + e = e.modrm.dup + e.sz = sz + inuse e + when ModRM; e = make_volatile(e, expr.rexpr.type) + end + case e + when Reg; unuse e ; e = inuse ModRM.new(@cpusz, sz, nil, nil, e, nil) + when Expression; e = inuse ModRM.new(@cpusz, sz, nil, nil, nil, e) + end + e + when :'!' + r = c_cexpr_inner(expr.rexpr) + r = make_volatile(r, expr.rexpr.type) + if expr.rexpr.type.integral? or expr.type.pointer? + r = make_volatile(r, expr.rexpr.type) + instr 'test', r, r + elsif expr.rexpr.type.float? + raise 'float unhandled' + else raise 'bad comparison ' + expr.to_s + end + instr 'setz', Reg.new(r.val, 8) + instr 'and', r, Expression[1] + r + else raise 'mmh ? ' + expr.to_s + end + end - # compile a cast (BaseType to BaseType) - def c_cexpr_inner_cast(expr, r) - if expr.type.float? or expr.rexpr.type.float? - raise 'float unhandled' - elsif (expr.type.integral? or expr.type.pointer?) and (expr.rexpr.type.integral? or expr.rexpr.type.pointer?) - tto = typesize[expr.type.pointer? ? :ptr : expr.type.name]*8 - tfrom = typesize[expr.rexpr.type.pointer? ? :ptr : expr.rexpr.type.name]*8 - r = resolve_address r if r.kind_of? Address - if r.kind_of? Expression - r = make_volatile r, expr.type - elsif tfrom > tto - case r - when ModRM - unuse r - r = r.dup - r.sz = tto - inuse r - when Reg - if r.sz == 64 and tto == 32 - instr 'mov', Reg.new(r.val, tto), Reg.new(r.val, tto) - else - instr 'and', r, Expression[(1< tto - end - end - elsif tto > tfrom - if not r.kind_of? Reg or r.sz != @cpusz - unuse r - reg = inuse findreg - op = (r.sz == reg.sz ? 'mov' : (expr.rexpr.type.specifier == :unsigned ? 'movzx' : 'movsx')) - if reg.sz == 64 and r.sz == 32 - if op == 'movsx' - instr 'movsxd', reg, r - else - instr 'mov', Reg.new(reg.val, 32), r - end - else - instr op, reg, r - end - r = reg - end - end - end - r - end + # compile a cast (BaseType to BaseType) + def c_cexpr_inner_cast(expr, r) + if expr.type.float? or expr.rexpr.type.float? + raise 'float unhandled' + elsif (expr.type.integral? or expr.type.pointer?) and (expr.rexpr.type.integral? or expr.rexpr.type.pointer?) + tto = typesize[expr.type.pointer? ? :ptr : expr.type.name]*8 + tfrom = typesize[expr.rexpr.type.pointer? ? :ptr : expr.rexpr.type.name]*8 + r = resolve_address r if r.kind_of? Address + if r.kind_of? Expression + r = make_volatile r, expr.type + elsif tfrom > tto + case r + when ModRM + unuse r + r = r.dup + r.sz = tto + inuse r + when Reg + if r.sz == 64 and tto == 32 + instr 'mov', Reg.new(r.val, tto), Reg.new(r.val, tto) + else + instr 'and', r, Expression[(1< tto + end + end + elsif tto > tfrom + if not r.kind_of? Reg or r.sz != @cpusz + unuse r + reg = inuse findreg + op = (r.sz == reg.sz ? 'mov' : (expr.rexpr.type.specifier == :unsigned ? 'movzx' : 'movsx')) + if reg.sz == 64 and r.sz == 32 + if op == 'movsx' + instr 'movsxd', reg, r + else + instr 'mov', Reg.new(reg.val, 32), r + end + else + instr op, reg, r + end + r = reg + end + end + end + r + end - # compiles a CExpression, not arithmetic (assignment, comparison etc) - def c_cexpr_inner_l(expr) - case expr.op - when :funcall - c_cexpr_inner_funcall(expr) - when :'+=', :'-=', :'*=', :'/=', :'%=', :'^=', :'&=', :'|=', :'<<=', :'>>=' - l = c_cexpr_inner(expr.lexpr) - raise 'bad lvalue' if not l.kind_of? ModRM and not @state.bound.index(l) - r = c_cexpr_inner(expr.rexpr) - op = expr.op.to_s.chop.to_sym - c_cexpr_inner_arith(l, op, r, expr.type) - l - when :'+', :'-', :'*', :'/', :'%', :'^', :'&', :'|', :'<<', :'>>' - # both sides are already cast to the same type by the precompiler - # XXX fptrs are not #integral? ... - if expr.type.integral? and expr.type.name == :ptr and expr.lexpr.type.kind_of? C::BaseType and - typesize[expr.lexpr.type.name] == typesize[:ptr] - expr.lexpr.type.name = :ptr - end - l = c_cexpr_inner(expr.lexpr) - l = make_volatile(l, expr.type) if not l.kind_of? Address - if expr.type.integral? and expr.type.name == :ptr and l.kind_of? Reg - unuse l - l = Address.new ModRM.new(l.sz, @cpusz, nil, nil, l, nil) - inuse l - end - if l.kind_of? Address and expr.type.integral? - l.modrm.imm = nil if l.modrm.imm and not l.modrm.imm.op and l.modrm.imm.rexpr == 0 - if l.modrm.b and l.modrm.i and l.modrm.s == 1 and l.modrm.b.val == l.modrm.i.val - unuse l.modrm.b if l.modrm.b != l.modrm.i - l.modrm.b = nil - l.modrm.s = 2 - end - case expr.op - when :+ - rexpr = expr.rexpr - rexpr = rexpr.rexpr while rexpr.kind_of? C::CExpression and not rexpr.op and rexpr.type.integral? and - rexpr.rexpr.kind_of? C::CExpression and rexpr.rexpr.type.integral? and - typesize[rexpr.type.name] == typesize[rexpr.rexpr.type.name] - if rexpr.kind_of? C::CExpression and rexpr.op == :* and rexpr.lexpr - r1 = c_cexpr_inner(rexpr.lexpr) - r2 = c_cexpr_inner(rexpr.rexpr) - r1, r2 = r2, r1 if r1.kind_of? Expression - if r2.kind_of? Expression and [1, 2, 4, 8].include?(rr2 = r2.reduce) - case r1 - when ModRM, Address, Reg - r1 = make_volatile(r1, rexpr.type) if not r1.kind_of? Reg - if not l.modrm.i or (l.modrm.i.val == r1.val and l.modrm.s == 1 and rr2 == 1) - unuse l, r1, r2 - l = Address.new(l.modrm.dup) - inuse l - l.modrm.i = r1 - l.modrm.s = (l.modrm.s || 0) + rr2 - return l - end - end - end - r = make_volatile(r1, rexpr.type) - c_cexpr_inner_arith(r, :*, r2, rexpr.type) - else - r = c_cexpr_inner(rexpr) - end - r = resolve_address r if r.kind_of? Address - r = make_volatile(r, rexpr.type) if r.kind_of? ModRM - case r - when Reg - unuse l - l = Address.new(l.modrm.dup) - inuse l - if l.modrm.b - if not l.modrm.i or (l.modrm.i.val == r.val and l.modrm.s == 1) - l.modrm.i = r - l.modrm.s = (l.modrm.s || 0) + 1 - unuse r - return l - end - else - l.modrm.b = r - unuse r - return l - end - when Expression - unuse l, r - l = Address.new(l.modrm.dup) - inuse l - l.modrm.imm = Expression[l.modrm.imm, :+, r] - return l - end - when :- - r = c_cexpr_inner(expr.rexpr) - r = resolve_address r if r.kind_of? Address - if r.kind_of? Expression - unuse l, r - l = Address.new(l.modrm.dup) - inuse l - l.modrm.imm = Expression[l.modrm.imm, :-, r] - return l - end - when :* - r = c_cexpr_inner(expr.rexpr) - if r.kind_of? Expression and [1, 2, 4, 8].includre?(rr = r.reduce) - if l.modrm.b and not l.modrm.i - if rr != 1 - l.modrm.i = l.modrm.b - l.modrm.s = rr - l.modrm.imm = Expression[l.modrm.imm, :*, rr] if l.modrm.imm - end - unuse r - return l - elsif l.modrm.i and not l.modrm.b and l.modrm.s*rr <= 8 - l.modrm.s *= rr - l.modrm.imm = Expression[l.modrm.imm, :*, rr] if l.modrm.imm and rr != 1 - unuse r - return l - end - end - end - end - l = make_volatile(l, expr.type) if l.kind_of? Address - r ||= c_cexpr_inner(expr.rexpr) - c_cexpr_inner_arith(l, expr.op, r, expr.type) - l - when :'=' - r = c_cexpr_inner(expr.rexpr) - l = c_cexpr_inner(expr.lexpr) - raise 'bad lvalue ' + l.inspect if not l.kind_of? ModRM and not @state.bound.index(l) - r = resolve_address r if r.kind_of? Address - r = make_volatile(r, expr.type) if l.kind_of? ModRM and r.kind_of? ModRM - unuse r - if expr.type.integral? or expr.type.pointer? - if r.kind_of? Address - m = r.modrm.dup - m.sz = l.sz - instr 'lea', l, m - else - if l.kind_of? ModRM and r.kind_of? Reg and l.sz != r.sz - raise if l.sz > r.sz - if l.sz == 8 and r.val >= 4 - reg = ([0, 1, 2, 3] - @state.used).first - if not reg - rax = Reg.new(0, r.sz) - instr 'push', rax - instr 'mov', rax, r - instr 'mov', l, Reg.new(rax.val, 8) - instr 'pop', rax - else - flushcachereg(reg) - instr 'mov', Reg.new(reg, r.sz), r - instr 'mov', l, Reg.new(reg, 8) - end - else - instr 'mov', l, Reg.new(r.val, l.sz) - end - elsif l.kind_of? ModRM and r.kind_of? Expression and l.sz == 64 - rval = r.reduce - if !rval.kind_of?(Integer) or rval > 0xffff_ffff or rval < -0x8000_0000 - r = make_volatile(r, expr.type) - unuse r - end - instr 'mov', l, r - else - instr 'mov', l, r - end - end - elsif expr.type.float? - raise 'float unhandled' - end - l - when :>, :<, :>=, :<=, :==, :'!=' - l = c_cexpr_inner(expr.lexpr) - l = make_volatile(l, expr.type) - r = c_cexpr_inner(expr.rexpr) - unuse r - if expr.lexpr.type.integral? or expr.lexpr.type.pointer? - instr 'cmp', l, i_to_i32(r) - elsif expr.lexpr.type.float? - raise 'float unhandled' - else raise 'bad comparison ' + expr.to_s - end - opcc = getcc(expr.op, expr.type) - instr 'set'+opcc, Reg.new(l.val, 8) - instr 'and', l, 1 - l - else - raise 'unhandled cexpr ' + expr.to_s - end - end + # compiles a CExpression, not arithmetic (assignment, comparison etc) + def c_cexpr_inner_l(expr) + case expr.op + when :funcall + c_cexpr_inner_funcall(expr) + when :'+=', :'-=', :'*=', :'/=', :'%=', :'^=', :'&=', :'|=', :'<<=', :'>>=' + l = c_cexpr_inner(expr.lexpr) + raise 'bad lvalue' if not l.kind_of? ModRM and not @state.bound.index(l) + r = c_cexpr_inner(expr.rexpr) + op = expr.op.to_s.chop.to_sym + c_cexpr_inner_arith(l, op, r, expr.type) + l + when :'+', :'-', :'*', :'/', :'%', :'^', :'&', :'|', :'<<', :'>>' + # both sides are already cast to the same type by the precompiler + # XXX fptrs are not #integral? ... + if expr.type.integral? and expr.type.name == :ptr and expr.lexpr.type.kind_of? C::BaseType and + typesize[expr.lexpr.type.name] == typesize[:ptr] + expr.lexpr.type.name = :ptr + end + l = c_cexpr_inner(expr.lexpr) + l = make_volatile(l, expr.type) if not l.kind_of? Address + if expr.type.integral? and expr.type.name == :ptr and l.kind_of? Reg + unuse l + l = Address.new ModRM.new(l.sz, @cpusz, nil, nil, l, nil) + inuse l + end + if l.kind_of? Address and expr.type.integral? + l.modrm.imm = nil if l.modrm.imm and not l.modrm.imm.op and l.modrm.imm.rexpr == 0 + if l.modrm.b and l.modrm.i and l.modrm.s == 1 and l.modrm.b.val == l.modrm.i.val + unuse l.modrm.b if l.modrm.b != l.modrm.i + l.modrm.b = nil + l.modrm.s = 2 + end + case expr.op + when :+ + rexpr = expr.rexpr + rexpr = rexpr.rexpr while rexpr.kind_of? C::CExpression and not rexpr.op and rexpr.type.integral? and + rexpr.rexpr.kind_of? C::CExpression and rexpr.rexpr.type.integral? and + typesize[rexpr.type.name] == typesize[rexpr.rexpr.type.name] + if rexpr.kind_of? C::CExpression and rexpr.op == :* and rexpr.lexpr + r1 = c_cexpr_inner(rexpr.lexpr) + r2 = c_cexpr_inner(rexpr.rexpr) + r1, r2 = r2, r1 if r1.kind_of? Expression + if r2.kind_of? Expression and [1, 2, 4, 8].include?(rr2 = r2.reduce) + case r1 + when ModRM, Address, Reg + r1 = make_volatile(r1, rexpr.type) if not r1.kind_of? Reg + if not l.modrm.i or (l.modrm.i.val == r1.val and l.modrm.s == 1 and rr2 == 1) + unuse l, r1, r2 + l = Address.new(l.modrm.dup) + inuse l + l.modrm.i = r1 + l.modrm.s = (l.modrm.s || 0) + rr2 + return l + end + end + end + r = make_volatile(r1, rexpr.type) + c_cexpr_inner_arith(r, :*, r2, rexpr.type) + else + r = c_cexpr_inner(rexpr) + end + r = resolve_address r if r.kind_of? Address + r = make_volatile(r, rexpr.type) if r.kind_of? ModRM + case r + when Reg + unuse l + l = Address.new(l.modrm.dup) + inuse l + if l.modrm.b + if not l.modrm.i or (l.modrm.i.val == r.val and l.modrm.s == 1) + l.modrm.i = r + l.modrm.s = (l.modrm.s || 0) + 1 + unuse r + return l + end + else + l.modrm.b = r + unuse r + return l + end + when Expression + unuse l, r + l = Address.new(l.modrm.dup) + inuse l + l.modrm.imm = Expression[l.modrm.imm, :+, r] + return l + end + when :- + r = c_cexpr_inner(expr.rexpr) + r = resolve_address r if r.kind_of? Address + if r.kind_of? Expression + unuse l, r + l = Address.new(l.modrm.dup) + inuse l + l.modrm.imm = Expression[l.modrm.imm, :-, r] + return l + end + when :* + r = c_cexpr_inner(expr.rexpr) + if r.kind_of? Expression and [1, 2, 4, 8].includre?(rr = r.reduce) + if l.modrm.b and not l.modrm.i + if rr != 1 + l.modrm.i = l.modrm.b + l.modrm.s = rr + l.modrm.imm = Expression[l.modrm.imm, :*, rr] if l.modrm.imm + end + unuse r + return l + elsif l.modrm.i and not l.modrm.b and l.modrm.s*rr <= 8 + l.modrm.s *= rr + l.modrm.imm = Expression[l.modrm.imm, :*, rr] if l.modrm.imm and rr != 1 + unuse r + return l + end + end + end + end + l = make_volatile(l, expr.type) if l.kind_of? Address + r ||= c_cexpr_inner(expr.rexpr) + c_cexpr_inner_arith(l, expr.op, r, expr.type) + l + when :'=' + r = c_cexpr_inner(expr.rexpr) + l = c_cexpr_inner(expr.lexpr) + raise 'bad lvalue ' + l.inspect if not l.kind_of? ModRM and not @state.bound.index(l) + r = resolve_address r if r.kind_of? Address + r = make_volatile(r, expr.type) if l.kind_of? ModRM and r.kind_of? ModRM + unuse r + if expr.type.integral? or expr.type.pointer? + if r.kind_of? Address + m = r.modrm.dup + m.sz = l.sz + instr 'lea', l, m + else + if l.kind_of? ModRM and r.kind_of? Reg and l.sz != r.sz + raise if l.sz > r.sz + if l.sz == 8 and r.val >= 4 + reg = ([0, 1, 2, 3] - @state.used).first + if not reg + rax = Reg.new(0, r.sz) + instr 'push', rax + instr 'mov', rax, r + instr 'mov', l, Reg.new(rax.val, 8) + instr 'pop', rax + else + flushcachereg(reg) + instr 'mov', Reg.new(reg, r.sz), r + instr 'mov', l, Reg.new(reg, 8) + end + else + instr 'mov', l, Reg.new(r.val, l.sz) + end + elsif l.kind_of? ModRM and r.kind_of? Expression and l.sz == 64 + rval = r.reduce + if !rval.kind_of?(Integer) or rval > 0xffff_ffff or rval < -0x8000_0000 + r = make_volatile(r, expr.type) + unuse r + end + instr 'mov', l, r + else + instr 'mov', l, r + end + end + elsif expr.type.float? + raise 'float unhandled' + end + l + when :>, :<, :>=, :<=, :==, :'!=' + l = c_cexpr_inner(expr.lexpr) + l = make_volatile(l, expr.type) + r = c_cexpr_inner(expr.rexpr) + r = make_volatile(r, expr.type) if r.kind_of?(ModRM) and r.sz != l.sz + unuse r + if expr.lexpr.type.integral? or expr.lexpr.type.pointer? + instr 'cmp', l, i_to_i32(r) + elsif expr.lexpr.type.float? + raise 'float unhandled' + else raise 'bad comparison ' + expr.to_s + end + opcc = getcc(expr.op, expr.type) + instr 'set'+opcc, Reg.new(l.val, 8) + instr 'and', l, 1 + l + else + raise 'unhandled cexpr ' + expr.to_s + end + end - # compiles a subroutine call - def c_cexpr_inner_funcall(expr) - backup = [] - rax = Reg.new(0, 64) + # compiles a subroutine call + def c_cexpr_inner_funcall(expr) + backup = [] + rax = Reg.new(0, 64) - ft = expr.lexpr.type - ft = ft.pointed if ft.pointer? - ft = nil if not ft.kind_of? C::Function + ft = expr.lexpr.type + ft = ft.pointed if ft.pointer? + ft = nil if not ft.kind_of? C::Function - regargsmask = @state.regargs.dup - if ft - ft.args.each_with_index { |a, i| - if rn = a.has_attribute_var('register') - regargsmask.insert(i, Reg.from_str(rn).val) - end - } - end - regargsmask = regargsmask[0, expr.rexpr.length] + regargsmask = @state.regargs.dup + if ft + ft.args.each_with_index { |a, i| + if rn = a.has_attribute_var('register') + regargsmask.insert(i, Reg.from_str(rn).val) + end + } + end + regargsmask = regargsmask[0, expr.rexpr.length] - (@state.abi_flushregs_call | regargsmask.compact.uniq).each { |reg| - next if reg == 4 - next if reg == 5 and @state.saved_rbp - if not @state.used.include? reg - if not @state.abi_trashregs.include? reg - @state.dirty |= [reg] - end - next - end - backup << reg - instr 'push', Reg.new(reg, 64) - @state.used.delete reg - } + (@state.abi_flushregs_call | regargsmask.compact.uniq).each { |reg| + next if reg == 4 + next if reg == 5 and @state.saved_rbp + if not @state.used.include? reg + if not @state.abi_trashregs.include? reg + @state.dirty |= [reg] + end + next + end + backup << reg + instr 'push', Reg.new(reg, 64) + @state.used.delete reg + } - stackargs = expr.rexpr.zip(regargsmask).map { |a, r| a if not r }.compact + stackargs = expr.rexpr.zip(regargsmask).map { |a, r| a if not r }.compact - # preserve 16byte stack align under windows - stackalign = true if @state.args_space > 0 and (stackargs + backup).length & 1 == 1 - instr 'sub', Reg.new(4, @cpusz), Expression[8] if stackalign + # preserve 16byte stack align under windows + stackalign = true if @state.args_space > 0 and (stackargs + backup).length & 1 == 1 + instr 'sub', Reg.new(4, @cpusz), Expression[8] if stackalign - stackargs.reverse_each { |arg| - raise 'arg unhandled' if not arg.type.integral? or arg.type.pointer? - a = c_cexpr_inner(arg) - a = resolve_address a if a.kind_of? Address - a = make_volatile(a, arg.type) if a.kind_of? ModRM and arg.type.name != :__int64 - unuse a - instr 'push', a - } + stackargs.reverse_each { |arg| + raise 'arg unhandled' if not arg.type.integral? or arg.type.pointer? + a = c_cexpr_inner(arg) + a = resolve_address a if a.kind_of? Address + a = make_volatile(a, arg.type) if a.kind_of? ModRM and arg.type.name != :__int64 + unuse a + instr 'push', a + } - regargs_unuse = [] - regargsmask.zip(expr.rexpr).reverse_each { |ra, arg| - next if not arg or not ra - a = c_cexpr_inner(arg) - a = resolve_address a if a.kind_of? Address - r = Reg.new(ra, a.respond_to?(:sz) ? a.sz : 64) - instr 'mov', r, a if not a.kind_of? Reg or a.val != r.val - unuse a - regargs_unuse << r if not @state.inuse.include? ra - inuse r - } - instr 'sub', Reg.new(4, 64), Expression[@state.args_space] if @state.args_space > 0 # TODO prealloc that at func start + regargs_unuse = [] + regargsmask.zip(expr.rexpr).reverse_each { |ra, arg| + next if not arg or not ra + a = c_cexpr_inner(arg) + a = resolve_address a if a.kind_of? Address + r = Reg.new(ra, a.respond_to?(:sz) ? a.sz : 64) + instr 'mov', r, a if not a.kind_of? Reg or a.val != r.val + unuse a + regargs_unuse << r if not @state.inuse.include? ra + inuse r + } + instr 'sub', Reg.new(4, 64), Expression[@state.args_space] if @state.args_space > 0 # TODO prealloc that at func start - if ft.kind_of? C::Function and ft.varargs and @state.args_space == 0 - # gcc stores here the nr of xmm args passed, real args are passed the standard way - instr 'xor', rax, rax - inuse rax - regargs_unuse << rax - end + if ft.kind_of? C::Function and ft.varargs and @state.args_space == 0 + # gcc stores here the nr of xmm args passed, real args are passed the standard way + instr 'xor', rax, rax + inuse rax + regargs_unuse << rax + end - if expr.lexpr.kind_of? C::Variable and expr.lexpr.type.kind_of? C::Function - instr 'call', Expression[expr.lexpr.name] - else - ptr = c_cexpr_inner(expr.lexpr) - unuse ptr - ptr = make_volatile(ptr, expr.lexpr.type) if ptr.kind_of? Address - instr 'call', ptr - end - regargs_unuse.each { |r| unuse r } - argsz = @state.args_space + stackargs.length * 8 - argsz += 8 if stackalign - instr 'add', Reg.new(4, @cpusz), Expression[argsz] if argsz > 0 + if expr.lexpr.kind_of? C::Variable and expr.lexpr.type.kind_of? C::Function + instr 'call', Expression[expr.lexpr.name] + else + ptr = c_cexpr_inner(expr.lexpr) + unuse ptr + ptr = make_volatile(ptr, expr.lexpr.type) if ptr.kind_of? Address + instr 'call', ptr + end + regargs_unuse.each { |r| unuse r } + argsz = @state.args_space + stackargs.length * 8 + argsz += 8 if stackalign + instr 'add', Reg.new(4, @cpusz), Expression[argsz] if argsz > 0 - @state.abi_flushregs_call.each { |reg| flushcachereg reg } - @state.used |= backup - if @state.used.include?(0) - retreg = inuse findreg - else - retreg = inuse getreg(0) - end - backup.reverse_each { |reg| - if retreg.kind_of? Reg and reg == 0 - instr 'pop', Reg.new(retreg.val, 64) - instr 'xchg', Reg.new(reg, 64), Reg.new(retreg.val, 64) - else - instr 'pop', Reg.new(reg, 64) - end - inuse getreg(reg) - } - retreg - end + @state.abi_flushregs_call.each { |reg| flushcachereg reg } + @state.used |= backup + if @state.used.include?(0) + retreg = inuse findreg + else + retreg = inuse getreg(0) + end + backup.reverse_each { |reg| + if retreg.kind_of? Reg and reg == 0 + instr 'pop', Reg.new(retreg.val, 64) + instr 'xchg', Reg.new(reg, 64), Reg.new(retreg.val, 64) + else + instr 'pop', Reg.new(reg, 64) + end + inuse getreg(reg) + } + retreg + end - # compiles/optimizes arithmetic operations - def c_cexpr_inner_arith(l, op, r, type) - # optimizes *2 -> <<1 - if r.kind_of? Expression and (rr = r.reduce).kind_of? ::Integer - if type.integral? or type.pointer? - log2 = lambda { |v| - # TODO lol - i = 0 - i += 1 while (1 << i) < v - i if (1 << i) == v - } - if (lr = log2[rr]).kind_of? ::Integer - case op - when :*; return c_cexpr_inner_arith(l, :<<, Expression[lr], type) - when :/; return c_cexpr_inner_arith(l, :>>, Expression[lr], type) - when :%; return c_cexpr_inner_arith(l, :&, Expression[rr-1], type) - end - else - # TODO /r => *(r^(-1)), *3 => stuff with magic constants.. - end - end - end + # compiles/optimizes arithmetic operations + def c_cexpr_inner_arith(l, op, r, type) + # optimizes *2 -> <<1 + if r.kind_of? Expression and (rr = r.reduce).kind_of? ::Integer + if type.integral? or type.pointer? + log2 = lambda { |v| + # TODO lol + i = 0 + i += 1 while (1 << i) < v + i if (1 << i) == v + } + if (lr = log2[rr]).kind_of? ::Integer + case op + when :*; return c_cexpr_inner_arith(l, :<<, Expression[lr], type) + when :/; return c_cexpr_inner_arith(l, :>>, Expression[lr], type) + when :%; return c_cexpr_inner_arith(l, :&, Expression[rr-1], type) + end + else + # TODO /r => *(r^(-1)), *3 => stuff with magic constants.. + end + end + end - if type.float? - raise 'float unhandled' - else - c_cexpr_inner_arith_int(l, op, r, type) - end - end + if type.float? + raise 'float unhandled' + else + c_cexpr_inner_arith_int(l, op, r, type) + end + end - # compile an integral arithmetic expression, reg-sized - def c_cexpr_inner_arith_int(l, op, r, type) - op = case op - when :+; 'add' - when :-; 'sub' - when :&; 'and' - when :|; 'or' - when :^; 'xor' - when :>>; type.specifier == :unsigned ? 'shr' : 'sar' - when :<<; 'shl' - when :*; 'mul' - when :/; 'div' - when :%; 'mod' - end + # compile an integral arithmetic expression, reg-sized + def c_cexpr_inner_arith_int(l, op, r, type) + op = case op + when :+; 'add' + when :-; 'sub' + when :&; 'and' + when :|; 'or' + when :^; 'xor' + when :>>; type.specifier == :unsigned ? 'shr' : 'sar' + when :<<; 'shl' + when :*; 'mul' + when :/; 'div' + when :%; 'mod' + end - case op - when 'add', 'sub', 'and', 'or', 'xor' - r = make_volatile(r, type) if l.kind_of?(ModRM) and r.kind_of?(ModRM) - r = make_volatile(r, type) if r.kind_of?(ModRM) and r.sz != l.sz # add rax, word [moo] - unuse r - r = Reg.new(r.val, l.sz) if r.kind_of?(Reg) and l.kind_of?(ModRM) and l.sz and l.sz != r.sz # add byte ptr [rax], bl - instr op, l, i_to_i32(r) - when 'shr', 'sar', 'shl' - if r.kind_of? Expression - instr op, l, r - else - # XXX bouh - r = make_volatile(r, C::BaseType.new(:__int8, :unsigned)) - unuse r - if r.val != 1 - rcx = Reg.new(1, 64) - instr 'xchg', rcx, Reg.new(r.val, 64) - l = Reg.new(r.val, l.sz) if l.kind_of? Reg and l.val == 1 - end - instr op, l, Reg.new(1, 8) - instr 'xchg', rcx, Reg.new(r.val, 64) if r.val != 1 - end - when 'mul' - if l.kind_of? ModRM - if r.kind_of? Expression - ll = findreg - instr 'imul', ll, l, r - else - ll = make_volatile(l, type) - unuse ll - instr 'imul', ll, r - end - instr 'mov', l, ll - else - instr 'imul', l, r - end - unuse r - when 'div', 'mod' - lv = l.val if l.kind_of? Reg - rax = Reg.from_str 'rax' - rdx = Reg.from_str 'rdx' - if @state.used.include? rax.val and lv != rax.val - instr 'push', rax - saved_rax = true - end - if @state.used.include? rdx.val and lv != rdx.val - instr 'push', rdx - saved_rdx = true - end + case op + when 'add', 'sub', 'and', 'or', 'xor' + r = make_volatile(r, type) if l.kind_of?(ModRM) and r.kind_of?(ModRM) + r = make_volatile(r, type) if r.kind_of?(ModRM) and r.sz != l.sz # add rax, word [moo] + unuse r + r = Reg.new(r.val, l.sz) if r.kind_of?(Reg) and l.kind_of?(ModRM) and l.sz and l.sz != r.sz # add byte ptr [rax], bl + instr op, l, i_to_i32(r) + when 'shr', 'sar', 'shl' + if r.kind_of? Expression + instr op, l, r + else + # XXX bouh + r = make_volatile(r, C::BaseType.new(:__int8, :unsigned)) + unuse r + if r.val != 1 + rcx = Reg.new(1, 64) + instr 'xchg', rcx, Reg.new(r.val, 64) + l = Reg.new(r.val, l.sz) if l.kind_of? Reg and l.val == 1 + end + instr op, l, Reg.new(1, 8) + instr 'xchg', rcx, Reg.new(r.val, 64) if r.val != 1 + end + when 'mul' + if l.kind_of? ModRM + if r.kind_of? Expression + ll = findreg + instr 'imul', ll, l, r + else + ll = make_volatile(l, type) + unuse ll + instr 'imul', ll, r + end + instr 'mov', l, ll + else + r = make_volatile(r, type) if r.kind_of?(ModRM) and r.sz != l.sz + instr 'imul', l, r + end + unuse r + when 'div', 'mod' + lv = l.val if l.kind_of? Reg + rax = Reg.from_str 'rax' + rdx = Reg.from_str 'rdx' + if @state.used.include? rax.val and lv != rax.val + instr 'push', rax + saved_rax = true + end + if @state.used.include? rdx.val and lv != rdx.val + instr 'push', rdx + saved_rdx = true + end - instr 'mov', rax, l if lv != rax.val + instr 'mov', rax, l if lv != rax.val - if r.kind_of? Expression - instr 'push', r - rsp = Reg.from_str 'rsp' - r = ModRM.new(@cpusz, 64, nil, nil, rsp, nil) - need_pop = true - end + if r.kind_of? Expression + instr 'push', r + rsp = Reg.from_str 'rsp' + r = ModRM.new(@cpusz, 64, nil, nil, rsp, nil) + need_pop = true + end - if type.specifier == :unsigned - instr 'mov', rdx, Expression[0] - instr 'div', r - else - instr 'cdq' - instr 'idiv', r - end - unuse r + if type.specifier == :unsigned + instr 'mov', rdx, Expression[0] + instr 'div', r + else + instr 'cdq' + instr 'idiv', r + end + unuse r - instr 'add', rsp, 8 if need_pop + instr 'add', rsp, 8 if need_pop - if op == 'div' - instr 'mov', l, rax if lv != rax.val - else - instr 'mov', l, rdx if lv != rdx.val - end + if op == 'div' + instr 'mov', l, rax if lv != rax.val + else + instr 'mov', l, rdx if lv != rdx.val + end - instr 'pop', rdx if saved_rdx - instr 'pop', rax if saved_rax - end - end + instr 'pop', rdx if saved_rdx + instr 'pop', rax if saved_rax + end + end - def c_cexpr(expr) - case expr.op - when :+, :-, :*, :/, :&, :|, :^, :%, :[], nil, :'.', :'->', - :>, :<, :<=, :>=, :==, :'!=', :'!' - # skip no-ops - c_cexpr(expr.lexpr) if expr.lexpr.kind_of? C::CExpression - c_cexpr(expr.rexpr) if expr.rexpr.kind_of? C::CExpression - else unuse c_cexpr_inner(expr) - end - end + def c_cexpr(expr) + case expr.op + when :+, :-, :*, :/, :&, :|, :^, :%, :[], nil, :'.', :'->', + :>, :<, :<=, :>=, :==, :'!=', :'!' + # skip no-ops + c_cexpr(expr.lexpr) if expr.lexpr.kind_of? C::CExpression + c_cexpr(expr.rexpr) if expr.rexpr.kind_of? C::CExpression + else unuse c_cexpr_inner(expr) + end + end - def c_block_exit(block) - @state.cache.delete_if { |k, v| - case v - when C::Variable; block.symbol.index v - when Address; block.symbol.index v.target - end - } - block.symbol.each { |s| - unuse @state.bound.delete(s) - } - end + def c_block_exit(block) + @state.cache.delete_if { |k, v| + case v + when C::Variable; block.symbol.index v + when Address; block.symbol.index v.target + end + } + block.symbol.each { |s| + unuse @state.bound.delete(s) + } + end - def c_decl(var) - if var.type.kind_of? C::Array and - var.type.length.kind_of? C::CExpression - reg = c_cexpr_inner(var.type.length) - unuse reg - instr 'sub', Reg.new(4, @cpusz), reg - # TODO - end - end + def c_decl(var) + if var.type.kind_of? C::Array and + var.type.length.kind_of? C::CExpression + reg = c_cexpr_inner(var.type.length) + unuse reg + instr 'sub', Reg.new(4, @cpusz), reg + # TODO + end + end - def c_ifgoto(expr, target) - case o = expr.op - when :<, :>, :<=, :>=, :==, :'!=' - l = c_cexpr_inner(expr.lexpr) - r = c_cexpr_inner(expr.rexpr) - r = make_volatile(r, expr.type) if r.kind_of? ModRM and l.kind_of? ModRM - if l.kind_of? Expression - o = { :< => :>, :> => :<, :>= => :<=, :<= => :>= }[o] || o - l, r = r, l - end - unuse l, r - if expr.lexpr.type.integral? or expr.lexpr.type.pointer? - r = Reg.new(r.val, l.sz) if r.kind_of? Reg and r.sz != l.sz # XXX - instr 'cmp', l, i_to_i32(r) - elsif expr.lexpr.type.float? - raise 'float unhandled' - else raise 'bad comparison ' + expr.to_s - end - op = 'j' + getcc(o, expr.lexpr.type) - instr op, Expression[target] - when :'!' - r = c_cexpr_inner(expr.rexpr) - r = make_volatile(r, expr.rexpr.type) - unuse r - instr 'test', r, r - instr 'jz', Expression[target] - else - r = c_cexpr_inner(expr) - r = make_volatile(r, expr.type) - unuse r - instr 'test', r, r - instr 'jnz', Expression[target] - end - end + def c_ifgoto(expr, target) + case o = expr.op + when :<, :>, :<=, :>=, :==, :'!=' + l = c_cexpr_inner(expr.lexpr) + r = c_cexpr_inner(expr.rexpr) + r = make_volatile(r, expr.type) if r.kind_of? ModRM and l.kind_of? ModRM + r = make_volatile(r, expr.type) if r.kind_of?(ModRM) and r.sz != l.sz + if l.kind_of? Expression + o = { :< => :>, :> => :<, :>= => :<=, :<= => :>= }[o] || o + l, r = r, l + end + unuse l, r + if expr.lexpr.type.integral? or expr.lexpr.type.pointer? + r = Reg.new(r.val, l.sz) if r.kind_of? Reg and r.sz != l.sz # XXX + instr 'cmp', l, i_to_i32(r) + elsif expr.lexpr.type.float? + raise 'float unhandled' + else raise 'bad comparison ' + expr.to_s + end + op = 'j' + getcc(o, expr.lexpr.type) + instr op, Expression[target] + when :'!' + r = c_cexpr_inner(expr.rexpr) + r = make_volatile(r, expr.rexpr.type) + unuse r + instr 'test', r, r + instr 'jz', Expression[target] + else + r = c_cexpr_inner(expr) + r = make_volatile(r, expr.type) + unuse r + instr 'test', r, r + instr 'jnz', Expression[target] + end + end - def c_goto(target) - instr 'jmp', Expression[target] - end + def c_goto(target) + instr 'jmp', Expression[target] + end - def c_label(name) - @state.cache.clear - @source << '' << Label.new(name) - end + def c_label(name) + @state.cache.clear + @source << '' << Label.new(name) + end - def c_return(expr) - return if not expr - @state.cache.delete_if { |r, v| r.kind_of? Reg and r.val == 0 and expr != v } - r = c_cexpr_inner(expr) - r = make_volatile(r, expr.type) - unuse r - instr 'mov', Reg.new(0, r.sz), r if r.val != 0 - end + def c_return(expr) + return if not expr + @state.cache.delete_if { |r, v| r.kind_of? Reg and r.val == 0 and expr != v } + r = c_cexpr_inner(expr) + r = make_volatile(r, expr.type) + unuse r + instr 'mov', Reg.new(0, r.sz), r if r.val != 0 + end - def c_asm(stmt) - if stmt.output or stmt.input or stmt.clobber - raise # TODO (handle %%0 => rax, gas, etc) - else - raise 'asm refering variables unhandled' if @state.func.initializer.symbol.keys.find { |sym| stmt.body =~ /\b#{Regexp.escape(sym)}\b/ } - @source << stmt.body - end - end + def c_asm(stmt) + if stmt.output or stmt.input or stmt.clobber + raise # TODO (handle %%0 => rax, gas, etc) + else + raise 'asm refering variables unhandled' if @state.func.initializer.symbol.keys.find { |sym| stmt.body =~ /\b#{Regexp.escape(sym)}\b/ } + @source << stmt.body + end + end - def c_init_state(func) - @state = State.new(func) - args = func.type.args.dup - if @parser.lexer.definition['__MS_X86_64_ABI__'] - @state.args_space = 32 - @state.regargs = [1, 2, 8, 9] - else - @state.args_space = 0 - @state.regargs = [7, 6, 2, 1, 8, 9] - end - c_reserve_stack(func.initializer) - off = @state.offset.values.max.to_i - off = 0 if off < 0 + def c_init_state(func) + @state = State.new(func) + args = func.type.args.dup + if @parser.lexer.definition['__MS_X86_64_ABI__'] + @state.args_space = 32 + @state.regargs = [1, 2, 8, 9] + else + @state.args_space = 0 + @state.regargs = [7, 6, 2, 1, 8, 9] + end + c_reserve_stack(func.initializer) + off = @state.offset.values.max.to_i + off = 0 if off < 0 - argoff = 2*8 + @state.args_space - rlist = @state.regargs.dup - args.each { |a| - if a.has_attribute_var('register') - off = c_reserve_stack_var(a, off) - @state.offset[a] = off - elsif r = rlist.shift - if @state.args_space > 0 - # use reserved space to spill regargs - off = -16-8*@state.regargs.index(r) - else - off = c_reserve_stack_var(a, off) - end - @state.offset[a] = off - else - @state.offset[a] = -argoff - argoff = (argoff + sizeof(a) + 7) / 8 * 8 - end - } - if not @state.offset.values.grep(::Integer).empty? - @state.saved_rbp = Reg.new(5, @cpusz) - @state.used << 5 - end - end + argoff = 2*8 + @state.args_space + rlist = @state.regargs.dup + args.each { |a| + if a.has_attribute_var('register') + off = c_reserve_stack_var(a, off) + @state.offset[a] = off + elsif r = rlist.shift + if @state.args_space > 0 + # use reserved space to spill regargs + off = -16-8*@state.regargs.index(r) + else + off = c_reserve_stack_var(a, off) + end + @state.offset[a] = off + else + @state.offset[a] = -argoff + argoff = (argoff + sizeof(a) + 7) / 8 * 8 + end + } + if not @state.offset.values.grep(::Integer).empty? + @state.saved_rbp = Reg.new(5, @cpusz) + @state.used << 5 + end + end - def c_prolog - localspc = @state.offset.values.grep(::Integer).max - return if @state.func.attributes.to_a.include? 'naked' - @state.dirty -= @state.abi_trashregs - if localspc - localspc = (localspc + 7) / 8 * 8 - if @state.args_space > 0 and (localspc/8 + @state.dirty.length) & 1 == 1 - # ensure 16-o stack align on windows - localspc += 8 - end - ebp = @state.saved_rbp - esp = Reg.new(4, ebp.sz) - instr 'push', ebp - instr 'mov', ebp, esp - instr 'sub', esp, Expression[localspc] if localspc > 0 + def c_prolog + localspc = @state.offset.values.grep(::Integer).max + return if @state.func.attributes.to_a.include? 'naked' + @state.dirty -= @state.abi_trashregs + if localspc + localspc = (localspc + 7) / 8 * 8 + if @state.args_space > 0 and (localspc/8 + @state.dirty.length) & 1 == 1 + # ensure 16-o stack align on windows + localspc += 8 + end + ebp = @state.saved_rbp + esp = Reg.new(4, ebp.sz) + instr 'push', ebp + instr 'mov', ebp, esp + instr 'sub', esp, Expression[localspc] if localspc > 0 - rlist = @state.regargs.dup - @state.func.type.args.each { |a| - if rn = a.has_attribute_var('register') - r = Reg.from_str(rn).val - elsif r = rlist.shift - else next - end - v = findvar(a) - instr 'mov', v, Reg.new(r, v.sz) - } - elsif @state.args_space > 0 and @state.dirty.length & 1 == 0 - instr 'sub', Reg.new(4, @cpusz), Expression[8] - end - @state.dirty.each { |reg| - instr 'push', Reg.new(reg, @cpusz) - } - end + rlist = @state.regargs.dup + @state.func.type.args.each { |a| + if rn = a.has_attribute_var('register') + r = Reg.from_str(rn).val + elsif r = rlist.shift + else next + end + v = findvar(a) + instr 'mov', v, Reg.new(r, v.sz) + } + elsif @state.args_space > 0 and @state.dirty.length & 1 == 0 + instr 'sub', Reg.new(4, @cpusz), Expression[8] + end + @state.dirty.each { |reg| + instr 'push', Reg.new(reg, @cpusz) + } + end - def c_epilog - return if @state.func.attributes.to_a.include? 'naked' - @state.dirty.reverse_each { |reg| - instr 'pop', Reg.new(reg, @cpusz) - } - if ebp = @state.saved_rbp - instr 'mov', Reg.new(4, ebp.sz), ebp - instr 'pop', ebp - elsif @state.args_space > 0 and @state.dirty.length & 1 == 0 - instr 'add', Reg.new(4, @cpusz), Expression[8] - end - instr 'ret' - end + def c_epilog + return if @state.func.attributes.to_a.include? 'naked' + @state.dirty.reverse_each { |reg| + instr 'pop', Reg.new(reg, @cpusz) + } + if ebp = @state.saved_rbp + instr 'mov', Reg.new(4, ebp.sz), ebp + instr 'pop', ebp + elsif @state.args_space > 0 and @state.dirty.length & 1 == 0 + instr 'add', Reg.new(4, @cpusz), Expression[8] + end + instr 'ret' + end - def c_program_epilog - end + def c_program_epilog + end end - def new_ccompiler(parser, exe=ExeFormat.new) - exe.cpu = self if not exe.instance_variable_get('@cpu') - CCompiler.new(parser, exe) - end + def new_ccompiler(parser, exe=ExeFormat.new) + exe.cpu = self if not exe.instance_variable_get('@cpu') + CCompiler.new(parser, exe) + end end end diff --git a/lib/metasm/metasm/cpu/x86_64/debug.rb b/lib/metasm/metasm/cpu/x86_64/debug.rb index 10d35e5507..006336b3f3 100644 --- a/lib/metasm/metasm/cpu/x86_64/debug.rb +++ b/lib/metasm/metasm/cpu/x86_64/debug.rb @@ -8,52 +8,52 @@ require 'metasm/cpu/x86_64/opcodes' module Metasm class X86_64 - def dbg_register_pc - @dbg_register_pc ||= :rip - end - def dbg_register_flags - @dbg_register_flags ||= :rflags - end + def dbg_register_pc + @dbg_register_pc ||= :rip + end + def dbg_register_flags + @dbg_register_flags ||= :rflags + end - def dbg_register_list - @dbg_register_list ||= [:rax, :rbx, :rcx, :rdx, :rsi, :rdi, :rbp, :rsp, :r8, :r9, :r10, :r11, :r12, :r13, :r14, :r15, :rip] - end + def dbg_register_list + @dbg_register_list ||= [:rax, :rbx, :rcx, :rdx, :rsi, :rdi, :rbp, :rsp, :r8, :r9, :r10, :r11, :r12, :r13, :r14, :r15, :rip] + end - def dbg_register_size - @dbg_register_size ||= Hash.new(64).update(:cs => 16, :ds => 16, :es => 16, :fs => 16, :gs => 16) - end + def dbg_register_size + @dbg_register_size ||= Hash.new(64).update(:cs => 16, :ds => 16, :es => 16, :fs => 16, :gs => 16) + end - def dbg_func_arg(dbg, argnr) - if dbg.class.name =~ /win/i - list = [:rcx, :rdx, :r8, :r9] - off = 0x20 - else - list = [:rdi, :rsi, :rdx, :rcx, :r8, :r9] - off = 0 - end - if r = list[argnr] - dbg.get_reg_value(r) - else - argnr -= list.length - dbg.memory_read_int(Expression[:esp, :+, off + 8 + 8*argnr]) - end - end - def dbg_func_arg_set(dbg, argnr, arg) - if dbg.class.name =~ /win/i - list = [:rcx, :rdx, :r8, :r9] - off = 0x20 - else - list = [:rdi, :rsi, :rdx, :rcx, :r8, :r9] - off = 0 - end - if r = list[argnr] - dbg.set_reg_value(r, arg) - else - argnr -= list.length - dbg.memory_write_int(Expression[:esp, :+, off + 8 + 8*argnr], arg) - end - end + def dbg_func_arg(dbg, argnr) + if dbg.class.name =~ /win/i + list = [:rcx, :rdx, :r8, :r9] + off = 0x20 + else + list = [:rdi, :rsi, :rdx, :rcx, :r8, :r9] + off = 0 + end + if r = list[argnr] + dbg.get_reg_value(r) + else + argnr -= list.length + dbg.memory_read_int(Expression[:esp, :+, off + 8 + 8*argnr]) + end + end + def dbg_func_arg_set(dbg, argnr, arg) + if dbg.class.name =~ /win/i + list = [:rcx, :rdx, :r8, :r9] + off = 0x20 + else + list = [:rdi, :rsi, :rdx, :rcx, :r8, :r9] + off = 0 + end + if r = list[argnr] + dbg.set_reg_value(r, arg) + else + argnr -= list.length + dbg.memory_write_int(Expression[:esp, :+, off + 8 + 8*argnr], arg) + end + end - # what's left is inherited from Ia32 + # what's left is inherited from Ia32 end end diff --git a/lib/metasm/metasm/cpu/x86_64/decode.rb b/lib/metasm/metasm/cpu/x86_64/decode.rb index 59729fa054..f6b2595964 100644 --- a/lib/metasm/metasm/cpu/x86_64/decode.rb +++ b/lib/metasm/metasm/cpu/x86_64/decode.rb @@ -9,303 +9,303 @@ require 'metasm/decode' module Metasm class X86_64 - class ModRM - def self.decode(edata, byte, endianness, adsz, opsz, seg=nil, regclass=Reg, pfx={}) - m = (byte >> 6) & 3 - rm = byte & 7 + class ModRM + def self.decode(edata, byte, endianness, adsz, opsz, seg=nil, regclass=Reg, pfx={}) + m = (byte >> 6) & 3 + rm = byte & 7 - if m == 3 - rm |= 8 if pfx[:rex_b] and (regclass != SimdReg or opsz != 64) # mm8 -> mm0 - return regclass.new(rm, opsz) - end + if m == 3 + rm |= 8 if pfx[:rex_b] and (regclass != SimdReg or opsz != 64) # mm8 -> mm0 + return regclass.new(rm, opsz) + end - adsz ||= 64 + adsz ||= 64 - # mod 0/1/2 m 4 => sib - # mod 0 m 5 => rip+imm - # sib: i 4 => no index, b 5 => no base + # mod 0/1/2 m 4 => sib + # mod 0 m 5 => rip+imm + # sib: i 4 => no index, b 5 => no base - s = i = b = imm = nil - if rm == 4 - sib = edata.get_byte.to_i + s = i = b = imm = nil + if rm == 4 + sib = edata.get_byte.to_i - ii = (sib >> 3) & 7 - ii |= 8 if pfx[:rex_x] - if ii != 4 - s = 1 << ((sib >> 6) & 3) - if pfx[:mrmvex] - i = SimdReg.new(ii, pfx[:mrmvex]) - else - i = Reg.new(ii, adsz) - end + ii = (sib >> 3) & 7 + ii |= 8 if pfx[:rex_x] + if ii != 4 + s = 1 << ((sib >> 6) & 3) + if pfx[:mrmvex] + i = SimdReg.new(ii, pfx[:mrmvex]) + else + i = Reg.new(ii, adsz) + end - end + end - bb = sib & 7 - if bb == 5 and m == 0 - m = 2 # :i32 follows - else - bb |= 8 if pfx[:rex_b] - b = Reg.new(bb, adsz) - end - elsif rm == 5 and m == 0 - b = Reg.new(16, adsz) - m = 2 # :i32 follows - else - rm |= 8 if pfx[:rex_b] - b = Reg.new(rm, adsz) - end + bb = sib & 7 + if bb == 5 and m == 0 + m = 2 # :i32 follows + else + bb |= 8 if pfx[:rex_b] + b = Reg.new(bb, adsz) + end + elsif rm == 5 and m == 0 + b = Reg.new(16, adsz) + m = 2 # :i32 follows + else + rm |= 8 if pfx[:rex_b] + b = Reg.new(rm, adsz) + end - case m - when 1; itype = :i8 - when 2; itype = :i32 - end - imm = Expression[edata.decode_imm(itype, endianness)] if itype + case m + when 1; itype = :i8 + when 2; itype = :i32 + end + imm = Expression[edata.decode_imm(itype, endianness)] if itype - if imm and imm.reduce.kind_of? Integer and imm.reduce < -0x100_0000 - # probably a base address -> unsigned - imm = Expression[imm.reduce & ((1 << adsz) - 1)] - end + if imm and imm.reduce.kind_of? Integer and imm.reduce < -0x100_0000 + # probably a base address -> unsigned + imm = Expression[imm.reduce & ((1 << adsz) - 1)] + end - opsz = pfx[:argsz] if pfx[:argsz] - new adsz, opsz, s, i, b, imm, seg - end - end + opsz = pfx[:argsz] if pfx[:argsz] + new adsz, opsz, s, i, b, imm, seg + end + end - def decode_prefix(instr, byte) - x = super(instr, byte) - if instr.prefix.delete :rex - # rex ignored if not last - instr.prefix.delete :rex_b - instr.prefix.delete :rex_x - instr.prefix.delete :rex_r - instr.prefix.delete :rex_w - end - if byte & 0xf0 == 0x40 - x = instr.prefix[:rex] = byte - instr.prefix[:rex_b] = 1 if byte & 1 > 0 - instr.prefix[:rex_x] = 1 if byte & 2 > 0 - instr.prefix[:rex_r] = 1 if byte & 4 > 0 - instr.prefix[:rex_w] = 1 if byte & 8 > 0 - end - x - end + def decode_prefix(instr, byte) + x = super(instr, byte) + if instr.prefix.delete :rex + # rex ignored if not last + instr.prefix.delete :rex_b + instr.prefix.delete :rex_x + instr.prefix.delete :rex_r + instr.prefix.delete :rex_w + end + if byte & 0xf0 == 0x40 + x = instr.prefix[:rex] = byte + instr.prefix[:rex_b] = 1 if byte & 1 > 0 + instr.prefix[:rex_x] = 1 if byte & 2 > 0 + instr.prefix[:rex_r] = 1 if byte & 4 > 0 + instr.prefix[:rex_w] = 1 if byte & 8 > 0 + end + x + end - def decode_instr_op(edata, di) - before_ptr = edata.ptr - op = di.opcode - di.instruction.opname = op.name - bseq = edata.read(op.bin.length).unpack('C*') # decode_findopcode ensures that data >= op.length - pfx = di.instruction.prefix || {} + def decode_instr_op(edata, di) + before_ptr = edata.ptr + op = di.opcode + di.instruction.opname = op.name + bseq = edata.read(op.bin.length).unpack('C*') # decode_findopcode ensures that data >= op.length + pfx = di.instruction.prefix || {} - field_val = lambda { |f| - if fld = op.fields[f] - (bseq[fld[0]] >> fld[1]) & @fields_mask[f] - end - } - field_val_r = lambda { |f| - v = field_val[f] - v |= 8 if v and (op.fields[f][1] == 3 ? pfx[:rex_r] : pfx[:rex_b]) # gruick ? - v - } + field_val = lambda { |f| + if fld = op.fields[f] + (bseq[fld[0]] >> fld[1]) & @fields_mask[f] + end + } + field_val_r = lambda { |f| + v = field_val[f] + v |= 8 if v and (op.fields[f][1] == 3 ? pfx[:rex_r] : pfx[:rex_b]) # gruick ? + v + } - pfx[:rex_r] = 1 if op.fields[:vex_r] and field_val[:vex_r] == 0 - pfx[:rex_b] = 1 if op.fields[:vex_b] and field_val[:vex_b] == 0 - pfx[:rex_x] = 1 if op.fields[:vex_x] and field_val[:vex_x] == 0 - pfx[:rex_w] = 1 if op.fields[:vex_w] and field_val[:vex_w] == 1 - di.instruction.prefix = pfx if not di.instruction.prefix and not pfx.empty? # for opsz(di) + vex_w + pfx[:rex_r] = 1 if op.fields[:vex_r] and field_val[:vex_r] == 0 + pfx[:rex_b] = 1 if op.fields[:vex_b] and field_val[:vex_b] == 0 + pfx[:rex_x] = 1 if op.fields[:vex_x] and field_val[:vex_x] == 0 + pfx[:rex_w] = 1 if op.fields[:vex_w] and field_val[:vex_w] == 1 + di.instruction.prefix = pfx if not di.instruction.prefix and not pfx.empty? # for opsz(di) + vex_w - case op.props[:needpfx] - when 0x66; pfx.delete :opsz - when 0x67; pfx.delete :adsz - when 0xF2, 0xF3; pfx.delete :rep - end + case op.props[:needpfx] + when 0x66; pfx.delete :opsz + when 0x67; pfx.delete :adsz + when 0xF2, 0xF3; pfx.delete :rep + end - if op.props[:setip] and not op.props[:stopexec] and pfx[:seg] - case pfx.delete(:seg).val - when 1; pfx[:jmphint] = 'hintnojmp' - when 3; pfx[:jmphint] = 'hintjmp' - end - end + if op.props[:setip] and not op.props[:stopexec] and pfx[:seg] + case pfx.delete(:seg).val + when 1; pfx[:jmphint] = 'hintnojmp' + when 3; pfx[:jmphint] = 'hintjmp' + end + end - opsz = op.props[:argsz] || opsz(di) - adsz = pfx[:adsz] ? 32 : 64 - mmxsz = (op.props[:xmmx] && pfx[:opsz]) ? 128 : 64 + opsz = op.props[:argsz] || opsz(di) + adsz = pfx[:adsz] ? 32 : 64 + mmxsz = (op.props[:xmmx] && pfx[:opsz]) ? 128 : 64 - op.args.each { |a| - di.instruction.args << case a - when :reg; Reg.new field_val_r[a], opsz - when :eeec; CtrlReg.new field_val_r[a] - when :eeed; DbgReg.new field_val_r[a] - when :eeet; TstReg.new field_val_r[a] - when :seg2, :seg2A, :seg3, :seg3A; SegReg.new field_val[a] - when :regmmx; SimdReg.new field_val[a], mmxsz # rex_r ignored - when :regxmm; SimdReg.new field_val_r[a], 128 - when :regymm; SimdReg.new field_val_r[a], 256 + op.args.each { |a| + di.instruction.args << case a + when :reg; Reg.new field_val_r[a], opsz + when :eeec; CtrlReg.new field_val_r[a] + when :eeed; DbgReg.new field_val_r[a] + when :eeet; TstReg.new field_val_r[a] + when :seg2, :seg2A, :seg3, :seg3A; SegReg.new field_val[a] + when :regmmx; SimdReg.new field_val[a], mmxsz # rex_r ignored + when :regxmm; SimdReg.new field_val_r[a], 128 + when :regymm; SimdReg.new field_val_r[a], 256 - when :farptr; Farptr.decode edata, @endianness, opsz - when :i8, :u8, :i16, :u16, :i32, :u32, :i64, :u64; Expression[edata.decode_imm(a, @endianness)] - when :i # 64bit constants are sign-extended from :i32 - type = (opsz == 64 ? op.props[:imm64] ? :a64 : :i32 : "#{op.props[:unsigned_imm] ? 'a' : 'i'}#{opsz}".to_sym ) - v = edata.decode_imm(type, @endianness) - v &= 0xffff_ffff_ffff_ffff if opsz == 64 and op.props[:unsigned_imm] and v.kind_of? Integer - Expression[v] + when :farptr; Farptr.decode edata, @endianness, opsz + when :i8, :u8, :i16, :u16, :i32, :u32, :i64, :u64; Expression[edata.decode_imm(a, @endianness)] + when :i # 64bit constants are sign-extended from :i32 + type = (opsz == 64 ? op.props[:imm64] ? :a64 : :i32 : "#{op.props[:unsigned_imm] ? 'a' : 'i'}#{opsz}".to_sym ) + v = edata.decode_imm(type, @endianness) + v &= 0xffff_ffff_ffff_ffff if opsz == 64 and op.props[:unsigned_imm] and v.kind_of? Integer + Expression[v] - when :mrm_imm; ModRM.new(adsz, opsz, nil, nil, nil, Expression[edata.decode_imm("a#{adsz}".to_sym, @endianness)], pfx.delete(:seg)) - when :modrm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, opsz, pfx.delete(:seg), Reg, pfx - when :modrmmmx; ModRM.decode edata, field_val[:modrm], @endianness, adsz, mmxsz, pfx.delete(:seg), SimdReg, pfx.merge(:argsz => op.props[:argsz]) - when :modrmxmm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 128, pfx.delete(:seg), SimdReg, pfx.merge(:argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex]) - when :modrmymm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 256, pfx.delete(:seg), SimdReg, pfx.merge(:argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex]) + when :mrm_imm; ModRM.new(adsz, opsz, nil, nil, nil, Expression[edata.decode_imm("a#{adsz}".to_sym, @endianness)], pfx.delete(:seg)) + when :modrm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, opsz, pfx.delete(:seg), Reg, pfx + when :modrmmmx; ModRM.decode edata, field_val[:modrm], @endianness, adsz, mmxsz, pfx.delete(:seg), SimdReg, pfx.merge(:argsz => op.props[:argsz]) + when :modrmxmm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 128, pfx.delete(:seg), SimdReg, pfx.merge(:argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex]) + when :modrmymm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 256, pfx.delete(:seg), SimdReg, pfx.merge(:argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex]) - when :vexvreg; Reg.new((field_val[:vex_vvvv] ^ 0xf), opsz) - when :vexvxmm; SimdReg.new((field_val[:vex_vvvv] ^ 0xf), 128) - when :vexvymm; SimdReg.new((field_val[:vex_vvvv] ^ 0xf), 256) - when :i4xmm; SimdReg.new(edata.decode_imm(:u8, @endianness) >> 4, 128) - when :i4ymm; SimdReg.new(edata.decode_imm(:u8, @endianness) >> 4, 256) + when :vexvreg; Reg.new((field_val[:vex_vvvv] ^ 0xf), opsz) + when :vexvxmm; SimdReg.new((field_val[:vex_vvvv] ^ 0xf), 128) + when :vexvymm; SimdReg.new((field_val[:vex_vvvv] ^ 0xf), 256) + when :i4xmm; SimdReg.new(edata.decode_imm(:u8, @endianness) >> 4, 128) + when :i4ymm; SimdReg.new(edata.decode_imm(:u8, @endianness) >> 4, 256) - when :regfp; FpReg.new field_val[a] - when :imm_val1; Expression[1] - when :imm_val3; Expression[3] - when :reg_cl; Reg.new 1, 8 - when :reg_eax; Reg.new 0, opsz - when :reg_dx; Reg.new 2, 16 - when :regfp0; FpReg.new nil - else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" - end - } + when :regfp; FpReg.new field_val[a] + when :imm_val1; Expression[1] + when :imm_val3; Expression[3] + when :reg_cl; Reg.new 1, 8 + when :reg_eax; Reg.new 0, opsz + when :reg_dx; Reg.new 2, 16 + when :regfp0; FpReg.new nil + else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" + end + } - di.bin_length += edata.ptr - before_ptr + di.bin_length += edata.ptr - before_ptr - return if edata.ptr > edata.length + return if edata.ptr > edata.length - if op.name == 'movsx' or op.name == 'movzx' or op.name == 'movsxd' - if op.name == 'movsxd' - di.instruction.args[1].sz = 32 - elsif opsz == 8 - di.instruction.args[1].sz = 8 - else - di.instruction.args[1].sz = 16 - end - if pfx[:rex_w] - di.instruction.args[0].sz = 64 - elsif pfx[:opsz] - di.instruction.args[0].sz = 16 - else - di.instruction.args[0].sz = 32 - end - elsif op.name == 'crc32' - di.instruction.args[0].sz = 32 - end + if op.name == 'movsx' or op.name == 'movzx' or op.name == 'movsxd' + if op.name == 'movsxd' + di.instruction.args[1].sz = 32 + elsif opsz == 8 + di.instruction.args[1].sz = 8 + else + di.instruction.args[1].sz = 16 + end + if pfx[:rex_w] + di.instruction.args[0].sz = 64 + elsif pfx[:opsz] + di.instruction.args[0].sz = 16 + else + di.instruction.args[0].sz = 32 + end + elsif op.name == 'crc32' + di.instruction.args[0].sz = 32 + end - # sil => bh - di.instruction.args.each { |a| a.val += 12 if a.kind_of? Reg and a.sz == 8 and not pfx[:rex] and a.val >= 4 and a.val <= 8 } + # sil => bh + di.instruction.args.each { |a| a.val += 12 if a.kind_of? Reg and a.sz == 8 and not pfx[:rex] and a.val >= 4 and a.val <= 8 } - case pfx.delete(:rep) - when :nz - if di.opcode.props[:strop] - pfx[:rep] = 'rep' - elsif di.opcode.props[:stropz] - pfx[:rep] = 'repnz' - end - when :z - if di.opcode.props[:strop] - pfx[:rep] = 'rep' - elsif di.opcode.props[:stropz] - pfx[:rep] = 'repz' - end - end + case pfx.delete(:rep) + when :nz + if di.opcode.props[:strop] + pfx[:rep] = 'rep' + elsif di.opcode.props[:stropz] + pfx[:rep] = 'repnz' + end + when :z + if di.opcode.props[:strop] + pfx[:rep] = 'rep' + elsif di.opcode.props[:stropz] + pfx[:rep] = 'repz' + end + end - di - end + di + end - def decode_instr_interpret(di, addr) - super(di, addr) + def decode_instr_interpret(di, addr) + super(di, addr) - # [rip + 42] => [rip - addr + foo] - if m = di.instruction.args.grep(ModRM).first and - ((m.b and m.b.val == 16) or (m.i and m.i.val == 16)) and - m.imm and m.imm.reduce.kind_of?(Integer) - m.imm = Expression[[:-, di.address + di.bin_length], :+, di.address+di.bin_length+m.imm.reduce] - end + # [rip + 42] => [rip - addr + foo] + if m = di.instruction.args.grep(ModRM).first and + ((m.b and m.b.val == 16) or (m.i and m.i.val == 16)) and + m.imm and m.imm.reduce.kind_of?(Integer) + m.imm = Expression[[:-, di.address + di.bin_length], :+, di.address+di.bin_length+m.imm.reduce] + end - di - end + di + end - def opsz(di, op=nil) - if di and di.instruction.prefix and di.instruction.prefix[:rex_w]; 64 - elsif di and di.instruction.prefix and di.instruction.prefix[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 16 - elsif di and (op || di.opcode).props[:auto64]; 64 - else 32 - end - end + def opsz(di, op=nil) + if di and di.instruction.prefix and di.instruction.prefix[:rex_w]; 64 + elsif di and di.instruction.prefix and di.instruction.prefix[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 16 + elsif di and (op || di.opcode).props[:auto64]; 64 + else 32 + end + end - def adsz(di, op=nil) - if di and di.instruction.prefix and di.instruction.prefix[:adsz] and (op || di.opcode).props[:needpfx] != 0x67; 32 - else 64 - end - end + def adsz(di, op=nil) + if di and di.instruction.prefix and di.instruction.prefix[:adsz] and (op || di.opcode).props[:needpfx] != 0x67; 32 + else 64 + end + end - def register_symbols - [:rax, :rcx, :rdx, :rbx, :rsp, :rbp, :rsi, :rdi, :r8, :r9, :r10, :r11, :r12, :r13, :r14, :r15] - end + def register_symbols + [:rax, :rcx, :rdx, :rbx, :rsp, :rbp, :rsi, :rdi, :r8, :r9, :r10, :r11, :r12, :r13, :r14, :r15] + end - # returns a DecodedFunction from a parsed C function prototype - def decode_c_function_prototype(cp, sym, orig=nil) - sym = cp.toplevel.symbol[sym] if sym.kind_of?(::String) - df = DecodedFunction.new - orig ||= Expression[sym.name] + # returns a DecodedFunction from a parsed C function prototype + def decode_c_function_prototype(cp, sym, orig=nil) + sym = cp.toplevel.symbol[sym] if sym.kind_of?(::String) + df = DecodedFunction.new + orig ||= Expression[sym.name] - new_bt = lambda { |expr, rlen| - df.backtracked_for << BacktraceTrace.new(expr, orig, expr, rlen ? :r : :x, rlen) - } + new_bt = lambda { |expr, rlen| + df.backtracked_for << BacktraceTrace.new(expr, orig, expr, rlen ? :r : :x, rlen) + } - # return instr emulation - if sym.has_attribute 'noreturn' or sym.has_attribute '__noreturn__' - df.noreturn = true - else - new_bt[Indirection[:rsp, @size/8, orig], nil] - end + # return instr emulation + if sym.has_attribute 'noreturn' or sym.has_attribute '__noreturn__' + df.noreturn = true + else + new_bt[Indirection[:rsp, @size/8, orig], nil] + end - # register dirty (MS standard ABI) - [:rax, :rcx, :rdx, :r8, :r9, :r10, :r11].each { |r| - df.backtrace_binding.update r => Expression::Unknown - } + # register dirty (MS standard ABI) + [:rax, :rcx, :rdx, :r8, :r9, :r10, :r11].each { |r| + df.backtrace_binding.update r => Expression::Unknown + } - if cp.lexer.definition['__MS_X86_64_ABI__'] - reg_args = [:rcx, :rdx, :r8, :r9] - else - reg_args = [:rdi, :rsi, :rdx, :rcx, :r8, :r9] - end + if cp.lexer.definition['__MS_X86_64_ABI__'] + reg_args = [:rcx, :rdx, :r8, :r9] + else + reg_args = [:rdi, :rsi, :rdx, :rcx, :r8, :r9] + end - al = cp.typesize[:ptr] - df.backtrace_binding[:rsp] = Expression[:rsp, :+, al] + al = cp.typesize[:ptr] + df.backtrace_binding[:rsp] = Expression[:rsp, :+, al] - # scan args for function pointers - # TODO walk structs/unions.. - stackoff = al - sym.type.args.to_a.zip(reg_args).each { |a, r| - if not r - r = Indirection[[:rsp, :+, stackoff], al, orig] - stackoff += (cp.sizeof(a) + al - 1) / al * al - end - if a.type.untypedef.kind_of? C::Pointer - pt = a.type.untypedef.type.untypedef - if pt.kind_of? C::Function - new_bt[r, nil] - df.backtracked_for.last.detached = true - elsif pt.kind_of? C::Struct - new_bt[r, al] - else - new_bt[r, cp.sizeof(nil, pt)] - end - end - } + # scan args for function pointers + # TODO walk structs/unions.. + stackoff = al + sym.type.args.to_a.zip(reg_args).each { |a, r| + if not r + r = Indirection[[:rsp, :+, stackoff], al, orig] + stackoff += (cp.sizeof(a) + al - 1) / al * al + end + if a.type.untypedef.kind_of? C::Pointer + pt = a.type.untypedef.type.untypedef + if pt.kind_of? C::Function + new_bt[r, nil] + df.backtracked_for.last.detached = true + elsif pt.kind_of? C::Struct + new_bt[r, al] + else + new_bt[r, cp.sizeof(nil, pt)] + end + end + } - df - end + df + end - def backtrace_update_function_binding_check(dasm, faddr, f, b) - # TODO save regs according to ABI - end + def backtrace_update_function_binding_check(dasm, faddr, f, b) + # TODO save regs according to ABI + end end end diff --git a/lib/metasm/metasm/cpu/x86_64/encode.rb b/lib/metasm/metasm/cpu/x86_64/encode.rb index cba3e13c8f..d965197dac 100644 --- a/lib/metasm/metasm/cpu/x86_64/encode.rb +++ b/lib/metasm/metasm/cpu/x86_64/encode.rb @@ -9,285 +9,285 @@ require 'metasm/encode' module Metasm class X86_64 - class ModRM - def self.encode_reg(reg, mregval = 0) - v = reg.kind_of?(Reg) ? reg.val_enc : reg.val & 7 - 0xc0 | (mregval << 3) | v - end + class ModRM + def self.encode_reg(reg, mregval = 0) + v = reg.kind_of?(Reg) ? reg.val_enc : reg.val & 7 + 0xc0 | (mregval << 3) | v + end - def encode(reg = 0, endianness = :little) - reg = reg.val if reg.kind_of? Ia32::Argument + def encode(reg = 0, endianness = :little) + reg = reg.val if reg.kind_of? Ia32::Argument - ret = EncodedData.new << (reg << 3) + ret = EncodedData.new << (reg << 3) - # add bits in the first octet of ret.data (1.9 compatibility layer) - or_bits = lambda { |v| # rape me - if ret.data[0].kind_of? Integer - ret.data[0] |= v - else - ret.data[0] = (ret.data[0].unpack('C').first | v).chr - end - } + # add bits in the first octet of ret.data (1.9 compatibility layer) + or_bits = lambda { |v| # rape me + if ret.data[0].kind_of? Integer + ret.data[0] |= v + else + ret.data[0] = (ret.data[0].unpack('C').first | v).chr + end + } - if not self.b and not self.i - # imm only, use sib - or_bits[4] - imm = self.imm || Expression[0] - [ret << ((4 << 3) | 5) << imm.encode(:i32, endianness)] + if not self.b and not self.i + # imm only, use sib + or_bits[4] + imm = self.imm || Expression[0] + [ret << ((4 << 3) | 5) << imm.encode(:i32, endianness)] - elsif (self.b and self.b.val == 16) or (self.i and self.i.val == 16) # rip+imm (rip == addr of the octet after the current instr) - # should have been filtered by #parse, but just in case - raise "invalid rip addressing #{self}" if (self.i and self.b) or (self.s and self.s != 1) - or_bits[5] - imm = self.imm || Expression[0] - [ret << imm.encode(:i32, endianness)] + elsif (self.b and self.b.val == 16) or (self.i and self.i.val == 16) # rip+imm (rip == addr of the octet after the current instr) + # should have been filtered by #parse, but just in case + raise "invalid rip addressing #{self}" if (self.i and self.b) or (self.s and self.s != 1) + or_bits[5] + imm = self.imm || Expression[0] + [ret << imm.encode(:i32, endianness)] - elsif not self.b and self.s != 1 - # sib with no b - raise EncodeError, "Invalid ModRM #{self}" if @i.val == 4 # XXX 12 ? - or_bits[4] - s = {8=>3, 4=>2, 2=>1}[@s] - imm = self.imm || Expression[0] - fu = (s << 6) | (@i.val_enc << 3) | 5 - fu = fu.chr if s >= 2 # rb1.9 encoding fix - [ret << fu << imm.encode(:i32, endianness)] - else - imm = @imm.reduce if self.imm - imm = nil if imm == 0 + elsif not self.b and self.s != 1 + # sib with no b + raise EncodeError, "Invalid ModRM #{self}" if @i.val == 4 # XXX 12 ? + or_bits[4] + s = {8=>3, 4=>2, 2=>1}[@s] + imm = self.imm || Expression[0] + fu = (s << 6) | (@i.val_enc << 3) | 5 + fu = fu.chr if s >= 2 # rb1.9 encoding fix + [ret << fu << imm.encode(:i32, endianness)] + else + imm = @imm.reduce if self.imm + imm = nil if imm == 0 - if not self.i or (not self.b and self.s == 1) - # no sib byte (except for [esp]) - @s, @i, @b = nil, nil, @s if not self.b - or_bits[@b.val_enc] - ret << 0x24 if @b.val_enc == 4 # XXX val_enc ? - else - # sib - or_bits[4] + if not self.i or (not self.b and self.s == 1) + # no sib byte (except for [esp]) + @s, @i, @b = nil, nil, @s if not self.b + or_bits[@b.val_enc] + ret << 0x24 if @b.val_enc == 4 # XXX val_enc ? + else + # sib + or_bits[4] - @b, @i = @i, @b if @s == 1 and @i.kind_of?(Reg) and (@i.val_enc == 4 or @b.val_enc == 5) + @b, @i = @i, @b if @s == 1 and @i.kind_of?(Reg) and (@i.val_enc == 4 or @b.val_enc == 5) - raise EncodeError, "Invalid ModRM #{self}" if @i.val == 4 + raise EncodeError, "Invalid ModRM #{self}" if @i.val == 4 - s = {8=>3, 4=>2, 2=>1, 1=>0}[@s] - fu = (s << 6) | (@i.val_enc << 3) | @b.val_enc - fu = fu.chr if s >= 2 # rb1.9 encoding fix - ret << fu - end + s = {8=>3, 4=>2, 2=>1, 1=>0}[@s] + fu = (s << 6) | (@i.val_enc << 3) | @b.val_enc + fu = fu.chr if s >= 2 # rb1.9 encoding fix + ret << fu + end - imm ||= 0 if @b.val_enc == 5 - if imm - case Expression.in_range?(imm, :i8) - when true - or_bits[1<<6] - [ret << Expression.encode_imm(imm, :i8, endianness)] - when false - or_bits[2<<6] - [ret << Expression.encode_imm(imm, :a32, endianness)] - when nil - rets = ret.dup - or_bits[1<<6] - ret << @imm.encode(:i8, endianness) - rets, ret = ret, rets # or_bits[] modifies ret directly - or_bits[2<<6] - ret << @imm.encode(:a32, endianness) - [ret, rets] - end - else - [ret] - end - end - end - end + imm ||= 0 if @b.val_enc == 5 + if imm + case Expression.in_range?(imm, :i8) + when true + or_bits[1<<6] + [ret << Expression.encode_imm(imm, :i8, endianness)] + when false + or_bits[2<<6] + [ret << Expression.encode_imm(imm, :a32, endianness)] + when nil + rets = ret.dup + or_bits[1<<6] + ret << @imm.encode(:i8, endianness) + rets, ret = ret, rets # or_bits[] modifies ret directly + or_bits[2<<6] + ret << @imm.encode(:a32, endianness) + [ret, rets] + end + else + [ret] + end + end + end + end - # returns all forms of the encoding of instruction i using opcode op - # program may be used to create a new label for relative jump/call - def encode_instr_op(program, i, op) - base = op.bin.dup - oi = op.args.zip(i.args) - set_field = lambda { |f, v| - fld = op.fields[f] - base[fld[0]] |= v << fld[1] - } + # returns all forms of the encoding of instruction i using opcode op + # program may be used to create a new label for relative jump/call + def encode_instr_op(program, i, op) + base = op.bin.dup + oi = op.args.zip(i.args) + set_field = lambda { |f, v| + fld = op.fields[f] + base[fld[0]] |= v << fld[1] + } - # - # handle prefixes and bit fields - # - pfx = i.prefix.map { |k, v| - case k - when :jmp; {:jmp => 0x3e, :nojmp => 0x2e}[v] - when :lock; 0xf0 - when :rep; {'repnz' => 0xf2, 'repz' => 0xf3, 'rep' => 0xf2}[v] - when :jmphint; {'hintjmp' => 0x3e, 'hintnojmp' => 0x2e}[v] - when :seg; [0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65][v.val] - end - }.compact.pack 'C*' + # + # handle prefixes and bit fields + # + pfx = i.prefix.map { |k, v| + case k + when :jmp; {:jmp => 0x3e, :nojmp => 0x2e}[v] + when :lock; 0xf0 + when :rep; {'repnz' => 0xf2, 'repz' => 0xf3, 'rep' => 0xf2}[v] + when :jmphint; {'hintjmp' => 0x3e, 'hintnojmp' => 0x2e}[v] + when :seg; [0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65][v.val] + end + }.compact.pack 'C*' - rex_w = rex_r = rex_x = rex_b = 0 - if op.name == 'movsx' or op.name == 'movzx' or op.name == 'movsxd' - case i.args[0].sz - when 64; rex_w = 1 - when 32 - when 16; pfx << 0x66 - end - elsif op.name == 'crc32' - case i.args[1].sz - when 64; rex_w = 1 - when 32; - when 16; pfx << 0x66 - end - else - opsz = op.props[:argsz] || i.prefix[:sz] - oi.each { |oa, ia| - case oa - when :reg, :reg_eax, :modrm, :mrm_imm - raise EncodeError, "Incompatible arg size in #{i}" if ia.sz and opsz and opsz != ia.sz - opsz = ia.sz - end - } - opsz ||= 64 if op.props[:auto64] - opsz = op.props[:opsz] if op.props[:opsz] # XXX ? - case opsz - when 64; rex_w = 1 if not op.props[:auto64] and (not op.props[:argsz] or op.props[:opsz] == 64) - when 32; raise EncodeError, "Incompatible arg size in #{i}" if op.props[:auto64] - when 16; pfx << 0x66 - end - end - opsz ||= @size + rex_w = rex_r = rex_x = rex_b = 0 + if op.name == 'movsx' or op.name == 'movzx' or op.name == 'movsxd' + case i.args[0].sz + when 64; rex_w = 1 + when 32 + when 16; pfx << 0x66 + end + elsif op.name == 'crc32' + case i.args[1].sz + when 64; rex_w = 1 + when 32; + when 16; pfx << 0x66 + end + else + opsz = op.props[:argsz] || i.prefix[:sz] + oi.each { |oa, ia| + case oa + when :reg, :reg_eax, :modrm, :mrm_imm + raise EncodeError, "Incompatible arg size in #{i}" if ia.sz and opsz and opsz != ia.sz + opsz = ia.sz + end + } + opsz ||= 64 if op.props[:auto64] + opsz = op.props[:opsz] if op.props[:opsz] # XXX ? + case opsz + when 64; rex_w = 1 if not op.props[:auto64] and (not op.props[:argsz] or op.props[:opsz] == 64) + when 32; raise EncodeError, "Incompatible arg size in #{i}" if op.props[:auto64] + when 16; pfx << 0x66 + end + end + opsz ||= @size - # addrsize override / segment override / rex_bx - if mrm = i.args.grep(ModRM).first - mrm.encode(0, @endianness) if mrm.b or mrm.i # may reorder b/i, which must be correct for rex - rex_b = 1 if mrm.b and mrm.b.val_rex.to_i > 0 - rex_x = 1 if mrm.i and mrm.i.val_rex.to_i > 0 - pfx << 0x67 if (mrm.b and mrm.b.sz == 32) or (mrm.i and mrm.i.sz == 32) or op.props[:adsz] == 32 - pfx << [0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65][mrm.seg.val] if mrm.seg - elsif op.props[:adsz] == 32 - pfx << 0x67 - end + # addrsize override / segment override / rex_bx + if mrm = i.args.grep(ModRM).first + mrm.encode(0, @endianness) if mrm.b or mrm.i # may reorder b/i, which must be correct for rex + rex_b = 1 if mrm.b and mrm.b.val_rex.to_i > 0 + rex_x = 1 if mrm.i and mrm.i.val_rex.to_i > 0 + pfx << 0x67 if (mrm.b and mrm.b.sz == 32) or (mrm.i and mrm.i.sz == 32) or op.props[:adsz] == 32 + pfx << [0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65][mrm.seg.val] if mrm.seg + elsif op.props[:adsz] == 32 + pfx << 0x67 + end - # - # encode embedded arguments - # - postponed = [] - oi.each { |oa, ia| - case oa - when :reg - set_field[oa, ia.val_enc] - if op.fields[:reg][1] == 3 - rex_r = ia.val_rex || 0 - else - rex_b = ia.val_rex || 0 - end - when :seg3, :seg3A, :seg2, :seg2A, :eeec, :eeed, :eeet, :regfp, :regmmx, :regxmm, :regymm - set_field[oa, ia.val & 7] - rex_r = 1 if ia.val > 7 - pfx << 0x66 if oa == :regmmx and op.props[:xmmx] and ia.sz == 128 - when :vexvreg, :vexvxmm, :vexvymm - set_field[:vex_vvvv, ia.val ^ 0xf] - when :imm_val1, :imm_val3, :reg_cl, :reg_eax, :reg_dx, :regfp0 - # implicit - when :modrm, :modrmmmx, :modrmxmm, :modrmymm - # postpone, but we must set rex now - case ia - when ModRM - ia.encode(0, @endianness) # could swap b/i - rex_x = ia.i.val_rex || 0 if ia.i - rex_b = ia.b.val_rex || 0 if ia.b - when Reg - rex_b = ia.val_rex || 0 - else - rex_b = ia.val >> 3 - end - postponed << [oa, ia] - else - postponed << [oa, ia] - end - } + # + # encode embedded arguments + # + postponed = [] + oi.each { |oa, ia| + case oa + when :reg + set_field[oa, ia.val_enc] + if op.fields[:reg][1] == 3 + rex_r = ia.val_rex || 0 + else + rex_b = ia.val_rex || 0 + end + when :seg3, :seg3A, :seg2, :seg2A, :eeec, :eeed, :eeet, :regfp, :regmmx, :regxmm, :regymm + set_field[oa, ia.val & 7] + rex_r = 1 if ia.val > 7 + pfx << 0x66 if oa == :regmmx and op.props[:xmmx] and ia.sz == 128 + when :vexvreg, :vexvxmm, :vexvymm + set_field[:vex_vvvv, ia.val ^ 0xf] + when :imm_val1, :imm_val3, :reg_cl, :reg_eax, :reg_dx, :regfp0 + # implicit + when :modrm, :modrmmmx, :modrmxmm, :modrmymm + # postpone, but we must set rex now + case ia + when ModRM + ia.encode(0, @endianness) # could swap b/i + rex_x = ia.i.val_rex || 0 if ia.i + rex_b = ia.b.val_rex || 0 if ia.b + when Reg + rex_b = ia.val_rex || 0 + else + rex_b = ia.val >> 3 + end + postponed << [oa, ia] + else + postponed << [oa, ia] + end + } - if !(op.args & [:modrm, :modrmmmx, :modrmxmm, :modrmymm]).empty? - # reg field of modrm - regval = (base[-1] >> 3) & 7 - base.pop - end + if !(op.args & [:modrm, :modrmmmx, :modrmxmm, :modrmymm]).empty? + # reg field of modrm + regval = (base[-1] >> 3) & 7 + base.pop + end - # convert label name for jmp/call/loop to relative offset - if op.props[:setip] and op.name[0, 3] != 'ret' and i.args.first.kind_of? Expression - postlabel = program.new_label('post'+op.name) - target = postponed.first[1] - target = target.rexpr if target.kind_of? Expression and target.op == :+ and not target.lexpr - postponed.first[1] = Expression[target, :-, postlabel] - end + # convert label name for jmp/call/loop to relative offset + if op.props[:setip] and op.name[0, 3] != 'ret' and i.args.first.kind_of? Expression + postlabel = program.new_label('post'+op.name) + target = postponed.first[1] + target = target.rexpr if target.kind_of? Expression and target.op == :+ and not target.lexpr + postponed.first[1] = Expression[target, :-, postlabel] + end - pfx << op.props[:needpfx] if op.props[:needpfx] + pfx << op.props[:needpfx] if op.props[:needpfx] - if op.fields[:vex_r] - set_field[:vex_r, rex_r ^ 1] - set_field[:vex_x, rex_x ^ 1] if op.fields[:vex_x] - set_field[:vex_b, rex_b ^ 1] if op.fields[:vex_b] - set_field[:vex_w, rex_w] if op.fields[:vex_w] - elsif rex_r + rex_x + rex_b + rex_w >= 1 or i.args.grep(Reg).find { |r| r.sz == 8 and r.val >= 4 and r.val < 8 } - rex = 0x40 - rex |= 1 if rex_b == 1 - rex |= 2 if rex_x == 1 - rex |= 4 if rex_r == 1 - rex |= 8 if rex_w == 1 - pfx << rex - end - ret = EncodedData.new(pfx + base.pack('C*')) + if op.fields[:vex_r] + set_field[:vex_r, rex_r ^ 1] + set_field[:vex_x, rex_x ^ 1] if op.fields[:vex_x] + set_field[:vex_b, rex_b ^ 1] if op.fields[:vex_b] + set_field[:vex_w, rex_w] if op.fields[:vex_w] + elsif rex_r + rex_x + rex_b + rex_w >= 1 or i.args.grep(Reg).find { |r| r.sz == 8 and r.val >= 4 and r.val < 8 } + rex = 0x40 + rex |= 1 if rex_b == 1 + rex |= 2 if rex_x == 1 + rex |= 4 if rex_r == 1 + rex |= 8 if rex_w == 1 + pfx << rex + end + ret = EncodedData.new(pfx + base.pack('C*')) - postponed.each { |oa, ia| - case oa - when :modrm, :modrmmmx, :modrmxmm, :modrmymm - if ia.kind_of? ModRM - ed = ia.encode(regval, @endianness) - if ed.kind_of?(::Array) - if ed.length > 1 - # we know that no opcode can have more than 1 modrm - ary = [] - ed.each { |m| ary << (ret.dup << m) } - ret = ary - next - else - ed = ed.first - end - end - else - ed = ModRM.encode_reg(ia, regval) - end - when :mrm_imm; ed = ia.imm.encode("a#{op.props[:adsz] || 64}".to_sym, @endianness) - when :i8, :u8, :i16, :u16, :i32, :u32, :i64, :u64; ed = ia.encode(oa, @endianness) - when :i - type = if opsz == 64 - if op.props[:imm64] - :a64 - else - if _ia = ia.reduce and _ia.kind_of?(Integer) and _ia > 0 and (_ia >> 63) == 1 - # handle 0xffffffff_ffffffff -> -1, which should fit in i32 - ia = Expression[_ia - (1 << 64)] - end - :i32 - end - else - "a#{opsz}".to_sym - end - ed = ia.encode(type, @endianness) - when :i4xmm, :i4ymm - ed = ia.val << 4 # u8 - else raise SyntaxError, "Internal error: want to encode field #{oa.inspect} as arg in #{i}" - end + postponed.each { |oa, ia| + case oa + when :modrm, :modrmmmx, :modrmxmm, :modrmymm + if ia.kind_of? ModRM + ed = ia.encode(regval, @endianness) + if ed.kind_of?(::Array) + if ed.length > 1 + # we know that no opcode can have more than 1 modrm + ary = [] + ed.each { |m| ary << (ret.dup << m) } + ret = ary + next + else + ed = ed.first + end + end + else + ed = ModRM.encode_reg(ia, regval) + end + when :mrm_imm; ed = ia.imm.encode("a#{op.props[:adsz] || 64}".to_sym, @endianness) + when :i8, :u8, :i16, :u16, :i32, :u32, :i64, :u64; ed = ia.encode(oa, @endianness) + when :i + type = if opsz == 64 + if op.props[:imm64] + :a64 + else + if _ia = ia.reduce and _ia.kind_of?(Integer) and _ia > 0 and (_ia >> 63) == 1 + # handle 0xffffffff_ffffffff -> -1, which should fit in i32 + ia = Expression[_ia - (1 << 64)] + end + :i32 + end + else + "a#{opsz}".to_sym + end + ed = ia.encode(type, @endianness) + when :i4xmm, :i4ymm + ed = ia.val << 4 # u8 + else raise SyntaxError, "Internal error: want to encode field #{oa.inspect} as arg in #{i}" + end - if ret.kind_of?(::Array) - ret.each { |e| e << ed } - else - ret << ed - end - } + if ret.kind_of?(::Array) + ret.each { |e| e << ed } + else + ret << ed + end + } - # we know that no opcode with setip accept both modrm and immediate arg, so ret is not an ::Array - ret.add_export(postlabel, ret.virtsize) if postlabel + # we know that no opcode with setip accept both modrm and immediate arg, so ret is not an ::Array + ret.add_export(postlabel, ret.virtsize) if postlabel - ret - end + ret + end end end diff --git a/lib/metasm/metasm/cpu/x86_64/main.rb b/lib/metasm/metasm/cpu/x86_64/main.rb index 7c9b74250c..1503d9e86b 100644 --- a/lib/metasm/metasm/cpu/x86_64/main.rb +++ b/lib/metasm/metasm/cpu/x86_64/main.rb @@ -11,134 +11,134 @@ module Metasm # The x86_64, 64-bit extension of the x86 CPU (x64, em64t, amd64...) class X86_64 < Ia32 - # FpReg, SegReg, Farptr unchanged + # FpReg, SegReg, Farptr unchanged - # XMM extended to 16 regs, YMM - class SimdReg < Ia32::SimdReg - double_map 64 => (0..7).map { |n| "mm#{n}" }, - 128 => (0..15).map { |n| "xmm#{n}" }, - 256 => (0..15).map { |n| "ymm#{n}" } + # XMM extended to 16 regs, YMM + class SimdReg < Ia32::SimdReg + double_map 64 => (0..7).map { |n| "mm#{n}" }, + 128 => (0..15).map { |n| "xmm#{n}" }, + 256 => (0..15).map { |n| "ymm#{n}" } - def val_enc - @val & 7 - end + def val_enc + @val & 7 + end - def val_rex - @val >> 3 - end - end + def val_rex + @val >> 3 + end + end - # general purpose registers, all sizes - # 8 new gprs (r8..r15), set bit R in the REX prefix to reference them (or X/B if in ModRM) - # aonethusaontehsanothe with 8bit subreg: with no rex prefix, refers to ah ch dh bh (as usual) - # but whenever the prefix is present, those become unavailable and encodie spl..dil (low byte of rsp/rdi) - class Reg < Ia32::Reg - double_map 8 => %w{ al cl dl bl spl bpl sil dil r8b r9b r10b r11b r12b r13b r14b r15b ah ch dh bh}, - 16 => %w{ ax cx dx bx sp bp si di r8w r9w r10w r11w r12w r13w r14w r15w}, - 32 => %w{eax ecx edx ebx esp ebp esi edi r8d r9d r10d r11d r12d r13d r14d r15d eip}, - 64 => %w{rax rcx rdx rbx rsp rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15 rip} + # general purpose registers, all sizes + # 8 new gprs (r8..r15), set bit R in the REX prefix to reference them (or X/B if in ModRM) + # aonethusaontehsanothe with 8bit subreg: with no rex prefix, refers to ah ch dh bh (as usual) + # but whenever the prefix is present, those become unavailable and encodie spl..dil (low byte of rsp/rdi) + class Reg < Ia32::Reg + double_map 8 => %w{ al cl dl bl spl bpl sil dil r8b r9b r10b r11b r12b r13b r14b r15b ah ch dh bh}, + 16 => %w{ ax cx dx bx sp bp si di r8w r9w r10w r11w r12w r13w r14w r15w}, + 32 => %w{eax ecx edx ebx esp ebp esi edi r8d r9d r10d r11d r12d r13d r14d r15d eip}, + 64 => %w{rax rcx rdx rbx rsp rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15 rip} - Sym = @i_to_s[64].map { |s| s.to_sym } + Sym = @i_to_s[64].map { |s| s.to_sym } - # returns a symbolic representation of the register: - # cx => :rcx & 0xffff - # ah => (:rax >> 8) & 0xff - # XXX in x64, 32bits operations are zero-extended to 64bits (eg mov rax, 0x1234_ffff_ffff ; add eax, 1 => rax == 0 - def symbolic(di=nil) - s = Sym[@val] - s = di.next_addr if s == :rip and di - if @sz == 8 and to_s[-1] == ?h - Expression[[Sym[@val-16], :>>, 8], :&, 0xff] - elsif @sz == 8 - Expression[s, :&, 0xff] - elsif @sz == 16 - Expression[s, :&, 0xffff] - elsif @sz == 32 - Expression[s, :&, 0xffffffff] - else - s - end - end + # returns a symbolic representation of the register: + # cx => :rcx & 0xffff + # ah => (:rax >> 8) & 0xff + # XXX in x64, 32bits operations are zero-extended to 64bits (eg mov rax, 0x1234_ffff_ffff ; add eax, 1 => rax == 0 + def symbolic(di=nil) + s = Sym[@val] + s = di.next_addr if s == :rip and di + if @sz == 8 and to_s[-1] == ?h + Expression[[Sym[@val-16], :>>, 8], :&, 0xff] + elsif @sz == 8 + Expression[s, :&, 0xff] + elsif @sz == 16 + Expression[s, :&, 0xffff] + elsif @sz == 32 + Expression[s, :&, 0xffffffff] + else + s + end + end - # checks if two registers have bits in common - def share?(other) - raise 'TODO' - # XXX TODO wtf does formula this do ? - other.val % (other.sz >> 1) == @val % (@sz >> 1) and (other.sz != @sz or @sz != 8 or other.val == @val) - end + # checks if two registers have bits in common + def share?(other) + raise 'TODO' + # XXX TODO wtf does formula this do ? + other.val % (other.sz >> 1) == @val % (@sz >> 1) and (other.sz != @sz or @sz != 8 or other.val == @val) + end - # returns the part of @val to encode in an instruction field - def val_enc - if @sz == 8 and @val >= 16; @val-12 # ah, bh, ch, dh - elsif @val >= 16 # rip - else @val & 7 # others - end - end + # returns the part of @val to encode in an instruction field + def val_enc + if @sz == 8 and @val >= 16; @val-12 # ah, bh, ch, dh + elsif @val >= 16 # rip + else @val & 7 # others + end + end - # returns the part of @val to encode in an instruction's rex prefix - def val_rex - if @sz == 8 and @val >= 16 # ah, bh, ch, dh: rex forbidden - elsif @val >= 16 # rip - else @val >> 3 # others - end - end - end + # returns the part of @val to encode in an instruction's rex prefix + def val_rex + if @sz == 8 and @val >= 16 # ah, bh, ch, dh: rex forbidden + elsif @val >= 16 # rip + else @val >> 3 # others + end + end + end - # ModRM represents indirections (eg dword ptr [eax+4*ebx+12h]) - # 16bit mode unavailable in x64 - # opcodes use 64bit addressing by default, use adsz override (67h) prefix to switch to 32 - # immediate values are encoded as :i32 sign-extended to 64bits - class ModRM < Ia32::ModRM - # mod 0/1/2 m 4 => sib - # mod 0 m 5 => rip+imm - # sib: i 4 => no index, b 5 => no base - end + # ModRM represents indirections (eg dword ptr [eax+4*ebx+12h]) + # 16bit mode unavailable in x64 + # opcodes use 64bit addressing by default, use adsz override (67h) prefix to switch to 32 + # immediate values are encoded as :i32 sign-extended to 64bits + class ModRM < Ia32::ModRM + # mod 0/1/2 m 4 => sib + # mod 0 m 5 => rip+imm + # sib: i 4 => no index, b 5 => no base + end - class DbgReg < Ia32::DbgReg - simple_map((0..15).map { |i| [i, "dr#{i}"] }) - end + class DbgReg < Ia32::DbgReg + simple_map((0..15).map { |i| [i, "dr#{i}"] }) + end - class CtrlReg < Ia32::CtrlReg - simple_map((0..15).map { |i| [i, "cr#{i}"] }) - end + class CtrlReg < Ia32::CtrlReg + simple_map((0..15).map { |i| [i, "cr#{i}"] }) + end - class TstReg < Ia32::TstReg - simple_map((0..15).map { |i| [i, "tr#{i}"] }) - end + class TstReg < Ia32::TstReg + simple_map((0..15).map { |i| [i, "tr#{i}"] }) + end - # Create a new instance of an X86 cpu - # arguments (any order) - # - instruction set (386, 486, sse2...) [latest] - # - endianness [:little] - def initialize(*a) - super(:latest) - @size = 64 - a.delete @size - @endianness = (a & [:big, :little]).first || :little - a.delete @endianness - @family = a.pop || :latest - raise "Invalid arguments #{a.inspect}" if not a.empty? - raise "Invalid X86_64 family #{@family.inspect}" if not respond_to?("init_#@family") - end + # Create a new instance of an X86 cpu + # arguments (any order) + # - instruction set (386, 486, sse2...) [latest] + # - endianness [:little] + def initialize(*a) + super(:latest) + @size = 64 + a.delete @size + @endianness = (a & [:big, :little]).first || :little + a.delete @endianness + @family = a.pop || :latest + raise "Invalid arguments #{a.inspect}" if not a.empty? + raise "Invalid X86_64 family #{@family.inspect}" if not respond_to?("init_#@family") + end - # defines some preprocessor macros to say who we are: - # TODO - def tune_prepro(pp) - super(pp, :itsmeX64) # ask Ia32's to just call super() - pp.define_weak('_M_AMD64') - pp.define_weak('_M_X64') - pp.define_weak('__amd64__') - pp.define_weak('__x86_64__') - end + # defines some preprocessor macros to say who we are: + # TODO + def tune_prepro(pp) + super(pp, :itsmeX64) # ask Ia32's to just call super() + pp.define_weak('_M_AMD64') + pp.define_weak('_M_X64') + pp.define_weak('__amd64__') + pp.define_weak('__x86_64__') + end - def str_to_reg(str) - # X86_64::Reg != Ia32::Reg - Reg.s_to_i.has_key?(str) ? Reg.from_str(str) : SimdReg.s_to_i.has_key?(str) ? SimdReg.from_str(str) : nil - end + def str_to_reg(str) + # X86_64::Reg != Ia32::Reg + Reg.s_to_i.has_key?(str) ? Reg.from_str(str) : SimdReg.s_to_i.has_key?(str) ? SimdReg.from_str(str) : nil + end - def shortname - "x64#{'_be' if @endianness == :big}" - end + def shortname + "x64#{'_be' if @endianness == :big}" + end end X64 = X86_64 diff --git a/lib/metasm/metasm/cpu/x86_64/opcodes.rb b/lib/metasm/metasm/cpu/x86_64/opcodes.rb index 149c090948..cb01f18fa6 100644 --- a/lib/metasm/metasm/cpu/x86_64/opcodes.rb +++ b/lib/metasm/metasm/cpu/x86_64/opcodes.rb @@ -9,128 +9,128 @@ require 'metasm/cpu/ia32/opcodes' module Metasm class X86_64 - def init_cpu_constants - super() - [:i32, :u32, :i64, :u64].each { |a| @valid_args[a] = true } - end + def init_cpu_constants + super() + [:i32, :u32, :i64, :u64].each { |a| @valid_args[a] = true } + end - def init_386_common_only - super() - # :imm64 => accept a real int64 as :i argument - # :auto64 => ignore rex_w, always 64-bit op - # :op32no64 => if write to a 32-bit reg, dont zero the top 32-bits of dest - [:imm64, :auto64, :op32no64].each { |a| @valid_props[a] = true } - @opcode_list.delete_if { |o| o.bin[0].to_i & 0xf0 == 0x40 } # now REX prefix - @opcode_list.each { |o| - o.props[:imm64] = true if o.bin == [0xB8] # mov reg, - o.props[:auto64] = true if o.name =~ /^(j.*|loop.*|call|enter|leave|push|pop|ret)$/ - } - addop 'movsxd', [0x63], :mrm - addop('cdqe', [0x98]) { |o| o.props[:opsz] = 64 } - addop('cqo', [0x99]) { |o| o.props[:opsz] = 64 } - end + def init_386_common_only + super() + # :imm64 => accept a real int64 as :i argument + # :auto64 => ignore rex_w, always 64-bit op + # :op32no64 => if write to a 32-bit reg, dont zero the top 32-bits of dest + [:imm64, :auto64, :op32no64].each { |a| @valid_props[a] = true } + @opcode_list.delete_if { |o| o.bin[0].to_i & 0xf0 == 0x40 } # now REX prefix + @opcode_list.each { |o| + o.props[:imm64] = true if o.bin == [0xB8] # mov reg, + o.props[:auto64] = true if o.name =~ /^(j.*|loop.*|call|enter|leave|push|pop|ret)$/ + } + addop 'movsxd', [0x63], :mrm + addop('cdqe', [0x98]) { |o| o.props[:opsz] = 64 } + addop('cqo', [0x99]) { |o| o.props[:opsz] = 64 } + end - # all x86_64 cpu understand <= sse2 instrs - def init_x8664_only - init_386_common_only - init_386_only - init_387_only - init_486_only - init_pentium_only - init_p6_only - init_sse_only - init_sse2_only + # all x86_64 cpu understand <= sse2 instrs + def init_x8664_only + init_386_common_only + init_386_only + init_387_only + init_486_only + init_pentium_only + init_p6_only + init_sse_only + init_sse2_only - @opcode_list.delete_if { |o| - o.args.include?(:seg2) or - o.args.include?(:seg2A) or - o.args.include?(:farptr) or - %w[aaa aad aam aas bound daa das into jcxz jecxz - lds les loadall arpl pusha pushad popa - popad].include?(o.name.split('.')[0]) - # split needed for lds.a32 - } + @opcode_list.delete_if { |o| + o.args.include?(:seg2) or + o.args.include?(:seg2A) or + o.args.include?(:farptr) or + %w[aaa aad aam aas bound daa das into jcxz jecxz + lds les loadall arpl pusha pushad popa + popad].include?(o.name.split('.')[0]) + # split needed for lds.a32 + } - @opcode_list.each { |o| - o.props[:auto64] = true if o.name =~ /^(enter|leave|[sl]gdt|[sl]idt|[sl]ldt|[sl]tr|push|pop|syscall)$/ - } + @opcode_list.each { |o| + o.props[:auto64] = true if o.name =~ /^(enter|leave|[sl]gdt|[sl]idt|[sl]ldt|[sl]tr|push|pop|syscall)$/ + } - addop('cmpxchg16b', [0x0F, 0xC7], 1) { |o| o.props[:opsz] = 64 ; o.props[:argsz] = 128 } - addop('iretq', [0xCF], nil, :stopexec, :setip) { |o| o.props[:opsz] = 64 } ; opcode_list.unshift opcode_list.pop - addop 'swapgs', [0x0F, 0x01, 0xF8] + addop('cmpxchg16b', [0x0F, 0xC7], 1) { |o| o.props[:opsz] = 64 ; o.props[:argsz] = 128 } + addop('iretq', [0xCF], nil, :stopexec, :setip) { |o| o.props[:opsz] = 64 } ; opcode_list.unshift opcode_list.pop + addop 'swapgs', [0x0F, 0x01, 0xF8] - addop('movq', [0x0F, 0x6E], :mrmmmx, {:d => [1, 4]}) { |o| o.args = [:modrm, :regmmx] ; o.props[:opsz] = o.props[:argsz] = 64 } - addop('movq', [0x0F, 0x6E], :mrmxmm, {:d => [1, 4]}) { |o| o.args = [:modrm, :regxmm] ; o.props[:opsz] = o.props[:argsz] = 64 ; o.props[:needpfx] = 0x66 } - addop('jcxz', [0xE3], nil, :setip, :i8) { |o| o.props[:adsz] = 32 } # actually 16 (cx), but x64 in general says pfx 0x67 => adsz = 32 - addop('jrcxz', [0xE3], nil, :setip, :i8) { |o| o.props[:adsz] = 64 } - end + addop('movq', [0x0F, 0x6E], :mrmmmx, {:d => [1, 4]}) { |o| o.args = [:modrm, :regmmx] ; o.props[:opsz] = o.props[:argsz] = 64 } + addop('movq', [0x0F, 0x6E], :mrmxmm, {:d => [1, 4]}) { |o| o.args = [:modrm, :regxmm] ; o.props[:opsz] = o.props[:argsz] = 64 ; o.props[:needpfx] = 0x66 } + addop('jcxz', [0xE3], nil, :setip, :i8) { |o| o.props[:adsz] = 32 } # actually 16 (cx), but x64 in general says pfx 0x67 => adsz = 32 + addop('jrcxz', [0xE3], nil, :setip, :i8) { |o| o.props[:adsz] = 64 } + end - def init_sse3 - init_x8664_only - init_sse3_only - end + def init_sse3 + init_x8664_only + init_sse3_only + end - def init_sse41_only - super() - addop('pextrq', [0x0F, 0x3A, 0x16], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:opsz] = o.props[:argsz] = 64 } - addop('pinsrq', [0x0F, 0x3A, 0x22], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:opsz] = o.props[:argsz] = 64 } - end + def init_sse41_only + super() + addop('pextrq', [0x0F, 0x3A, 0x16], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:opsz] = o.props[:argsz] = 64 } + addop('pinsrq', [0x0F, 0x3A, 0x22], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:opsz] = o.props[:argsz] = 64 } + end - def init_avx_only - super() - addop('rdfsbase', [0x0F, 0xAE], 0, :modrmR) { |o| o.props[:needpfx] = 0xF3 } - addop('rdgsbase', [0x0F, 0xAE], 1, :modrmR) { |o| o.props[:needpfx] = 0xF3 } - addop('wrfsbase', [0x0F, 0xAE], 2, :modrmR) { |o| o.props[:needpfx] = 0xF3 } - addop('wrgsbase', [0x0F, 0xAE], 3, :modrmR) { |o| o.props[:needpfx] = 0xF3 } - end + def init_avx_only + super() + addop('rdfsbase', [0x0F, 0xAE], 0, :modrmR) { |o| o.props[:needpfx] = 0xF3 } + addop('rdgsbase', [0x0F, 0xAE], 1, :modrmR) { |o| o.props[:needpfx] = 0xF3 } + addop('wrfsbase', [0x0F, 0xAE], 2, :modrmR) { |o| o.props[:needpfx] = 0xF3 } + addop('wrgsbase', [0x0F, 0xAE], 3, :modrmR) { |o| o.props[:needpfx] = 0xF3 } + end - def addop_macrostr(name, bin, type) - super(name, bin, type) - bin = bin.dup - bin[0] |= 1 - addop(name+'q', bin) { |o| o.props[:opsz] = 64 ; o.props[type] = true } - end + def addop_macrostr(name, bin, type) + super(name, bin, type) + bin = bin.dup + bin[0] |= 1 + addop(name+'q', bin) { |o| o.props[:opsz] = 64 ; o.props[type] = true } + end - def addop_macroret(name, bin, *args) - addop(name + '.i64', bin, nil, :stopexec, :setip, *args) { |o| o.props[:opsz] = 64 } - super(name, bin, *args) - end + def addop_macroret(name, bin, *args) + addop(name + '.i64', bin, nil, :stopexec, :setip, *args) { |o| o.props[:opsz] = 64 } + super(name, bin, *args) + end - def addop_post(op) - if op.fields[:d] or op.fields[:w] or op.fields[:s] or op.args.first == :regfp0 - return super(op) - end + def addop_post(op) + if op.fields[:d] or op.fields[:w] or op.fields[:s] or op.args.first == :regfp0 + return super(op) + end - if op.props[:needpfx] - @opcode_list.unshift op - else - @opcode_list << op - end + if op.props[:needpfx] + @opcode_list.unshift op + else + @opcode_list << op + end - if op.args == [:i] or op.name == 'ret' - # define opsz-override version for ambiguous opcodes - op16 = op.dup - op16.name << '.i16' - op16.props[:opsz] = 16 - @opcode_list << op16 - # push call ret jz can't 32bit - op64 = op.dup - op64.name << '.i64' - op64.props[:opsz] = 64 - @opcode_list << op64 - elsif op.props[:strop] or op.props[:stropz] or op.args.include? :mrm_imm or - op.args.include? :modrm or op.name =~ /loop|xlat/ - # define adsz-override version for ambiguous opcodes (movsq) - # XXX loop pfx 67 = rip+ecx, 66/rex ignored - op32 = op.dup - op32.name << '.a32' - op32.props[:adsz] = 32 - @opcode_list << op32 - op64 = op.dup - op64.name << '.a64' - op64.props[:adsz] = 64 - @opcode_list << op64 - end - end + if op.args == [:i] or op.name == 'ret' + # define opsz-override version for ambiguous opcodes + op16 = op.dup + op16.name << '.i16' + op16.props[:opsz] = 16 + @opcode_list << op16 + # push call ret jz can't 32bit + op64 = op.dup + op64.name << '.i64' + op64.props[:opsz] = 64 + @opcode_list << op64 + elsif op.props[:strop] or op.props[:stropz] or op.args.include? :mrm_imm or + op.args.include? :modrm or op.name =~ /loop|xlat/ + # define adsz-override version for ambiguous opcodes (movsq) + # XXX loop pfx 67 = rip+ecx, 66/rex ignored + op32 = op.dup + op32.name << '.a32' + op32.props[:adsz] = 32 + @opcode_list << op32 + op64 = op.dup + op64.name << '.a64' + op64.props[:adsz] = 64 + @opcode_list << op64 + end + end end end diff --git a/lib/metasm/metasm/cpu/x86_64/parse.rb b/lib/metasm/metasm/cpu/x86_64/parse.rb index 8a558ba1df..abc0081225 100644 --- a/lib/metasm/metasm/cpu/x86_64/parse.rb +++ b/lib/metasm/metasm/cpu/x86_64/parse.rb @@ -10,67 +10,67 @@ require 'metasm/parse' module Metasm class X86_64 - def parse_parser_instruction(lexer, instr) - case instr.raw.downcase - when '.mode', '.bits' - if tok = lexer.readtok and tok.type == :string and tok.raw == '64' - lexer.skip_space - raise instr, 'syntax error' if ntok = lexer.nexttok and ntok.type != :eol - else - raise instr, 'invalid cpu mode, 64bit only' - end - else super(lexer, instr) - end - end + def parse_parser_instruction(lexer, instr) + case instr.raw.downcase + when '.mode', '.bits' + if tok = lexer.readtok and tok.type == :string and tok.raw == '64' + lexer.skip_space + raise instr, 'syntax error' if ntok = lexer.nexttok and ntok.type != :eol + else + raise instr, 'invalid cpu mode, 64bit only' + end + else super(lexer, instr) + end + end - def parse_prefix(i, pfx) - super(i, pfx) or (i.prefix[:sz] = 64 if pfx == 'code64') - end + def parse_prefix(i, pfx) + super(i, pfx) or (i.prefix[:sz] = 64 if pfx == 'code64') + end - # needed due to how ruby inheritance works wrt constants - def parse_argregclasslist - [Reg, SimdReg, SegReg, DbgReg, TstReg, CtrlReg, FpReg] - end - # same inheritance sh*t - def parse_modrm(lex, tok, cpu) - ModRM.parse(lex, tok, cpu) - end + # needed due to how ruby inheritance works wrt constants + def parse_argregclasslist + [Reg, SimdReg, SegReg, DbgReg, TstReg, CtrlReg, FpReg] + end + # same inheritance sh*t + def parse_modrm(lex, tok, cpu) + ModRM.parse(lex, tok, cpu) + end - def parse_instruction_checkproto(i) - # check ah vs rex prefix - return if i.args.find { |a| a.kind_of? Reg and a.sz == 8 and a.val >= 16 and - op = opcode_list.find { |op_| op_.name == i.opname } and - ((not op.props[:auto64] and i.args.find { |aa| aa.respond_to? :sz and aa.sz == 64 }) or - i.args.find { |aa| aa.kind_of? Reg and aa.val >= 8 and aa.val < 16 } or # XXX mov ah, cr12... - i.args.grep(ModRM).find { |aa| (aa.b and aa.b.val >= 8 and aa.b.val < 16) or (aa.i and aa.i.val >= 8 and aa.i.val < 16) }) - } - super(i) - end + def parse_instruction_checkproto(i) + # check ah vs rex prefix + return if i.args.find { |a| a.kind_of? Reg and a.sz == 8 and a.val >= 16 and + op = opcode_list.find { |op_| op_.name == i.opname } and + ((not op.props[:auto64] and i.args.find { |aa| aa.respond_to? :sz and aa.sz == 64 }) or + i.args.find { |aa| aa.kind_of? Reg and aa.val >= 8 and aa.val < 16 } or # XXX mov ah, cr12... + i.args.grep(ModRM).find { |aa| (aa.b and aa.b.val >= 8 and aa.b.val < 16) or (aa.i and aa.i.val >= 8 and aa.i.val < 16) }) + } + super(i) + end - # check if the argument matches the opcode's argument spec - def parse_arg_valid?(o, spec, arg) - return if arg.kind_of? ModRM and ((arg.b and arg.b.val == 16 and arg.i) or (arg.i and arg.i.val == 16 and (arg.b or arg.s != 1))) - return if arg.kind_of? Reg and arg.sz >= 32 and arg.val == 16 # eip/rip only in modrm - return if o.props[:auto64] and arg.respond_to? :sz and arg.sz == 32 - # vex c4/c5 - return if o.fields[:vex_r] and not o.fields[:vex_b] and (spec == :modrm or spec == :modrmxmm or spec == :modrmymm) and (((arg.kind_of?(SimdReg) or arg.kind_of?(Reg)) and arg.val >= 8) or (arg.kind_of?(ModRM) and ((arg.b and arg.b.val >= 8) or (arg.i and arg.i.val >= 8)))) - if o.name == 'movsxd' - return if not arg.kind_of? Reg and not arg.kind_of? ModRM - arg.sz ||= 32 - if spec == :reg - return if not arg.kind_of? Reg - return arg.sz >= 32 - else - return arg.sz == 32 - end - end - return if o.name == 'xchg' and spec == :reg and o.args.include?(:reg_eax) and arg.kind_of?(Reg) and arg.sz == 32 and arg.val == 0 + # check if the argument matches the opcode's argument spec + def parse_arg_valid?(o, spec, arg) + return if arg.kind_of? ModRM and ((arg.b and arg.b.val == 16 and arg.i) or (arg.i and arg.i.val == 16 and (arg.b or arg.s != 1))) + return if arg.kind_of? Reg and arg.sz >= 32 and arg.val == 16 # eip/rip only in modrm + return if o.props[:auto64] and arg.respond_to? :sz and arg.sz == 32 + # vex c4/c5 + return if o.fields[:vex_r] and not o.fields[:vex_b] and (spec == :modrm or spec == :modrmxmm or spec == :modrmymm) and (((arg.kind_of?(SimdReg) or arg.kind_of?(Reg)) and arg.val >= 8) or (arg.kind_of?(ModRM) and ((arg.b and arg.b.val >= 8) or (arg.i and arg.i.val >= 8)))) + if o.name == 'movsxd' + return if not arg.kind_of? Reg and not arg.kind_of? ModRM + arg.sz ||= 32 + if spec == :reg + return if not arg.kind_of? Reg + return arg.sz >= 32 + else + return arg.sz == 32 + end + end + return if o.name == 'xchg' and spec == :reg and o.args.include?(:reg_eax) and arg.kind_of?(Reg) and arg.sz == 32 and arg.val == 0 - super(o, spec, arg) - end + super(o, spec, arg) + end - def check_reserved_name(name) - Reg.s_to_i[name] - end + def check_reserved_name(name) + Reg.s_to_i[name] + end end end diff --git a/lib/metasm/metasm/cpu/x86_64/render.rb b/lib/metasm/metasm/cpu/x86_64/render.rb index c14813f01c..aad901bc8a 100644 --- a/lib/metasm/metasm/cpu/x86_64/render.rb +++ b/lib/metasm/metasm/cpu/x86_64/render.rb @@ -9,27 +9,27 @@ require 'metasm/render' module Metasm class X86_64 - def gui_hilight_word_regexp_init - ret = {} + def gui_hilight_word_regexp_init + ret = {} - %w[a b c d].each { |r| - ret["#{r}l"] = "[re]?#{r}x|#{r}l" - ret["#{r}h"] = "[re]?#{r}x|#{r}h" - ret["#{r}x"] = ret["e#{r}x"] = ret["r#{r}x"] = "[re]?#{r}x|#{r}[hl]" - } + %w[a b c d].each { |r| + ret["#{r}l"] = "[re]?#{r}x|#{r}l" + ret["#{r}h"] = "[re]?#{r}x|#{r}h" + ret["#{r}x"] = ret["e#{r}x"] = ret["r#{r}x"] = "[re]?#{r}x|#{r}[hl]" + } - %w[sp bp si di].each { |r| - ret["#{r}l"] = ret[r] = ret["e#{r}"] = ret["r#{r}"] = "[re]?#{r}|#{r}l" - } + %w[sp bp si di].each { |r| + ret["#{r}l"] = ret[r] = ret["e#{r}"] = ret["r#{r}"] = "[re]?#{r}|#{r}l" + } - (8..15).each { |i| - r = "r#{i}" - ret[r+'b'] = ret[r+'w'] = ret[r+'d'] = ret[r] = "#{r}[bwd]?" - } + (8..15).each { |i| + r = "r#{i}" + ret[r+'b'] = ret[r+'w'] = ret[r+'d'] = ret[r] = "#{r}[bwd]?" + } - ret['eip'] = ret['rip'] = '[re]ip' + ret['eip'] = ret['rip'] = '[re]ip' - ret - end + ret + end end end diff --git a/lib/metasm/metasm/cpu/z80/decode.rb b/lib/metasm/metasm/cpu/z80/decode.rb index f4f6898646..512e6be1b0 100644 --- a/lib/metasm/metasm/cpu/z80/decode.rb +++ b/lib/metasm/metasm/cpu/z80/decode.rb @@ -9,305 +9,305 @@ require 'metasm/decode' module Metasm class Z80 - def build_opcode_bin_mask(op) - # bit = 0 if can be mutated by an field value, 1 if fixed by opcode - op.bin_mask = Array.new(op.bin.length, 0) - op.fields.each { |f, (oct, off)| - op.bin_mask[oct] |= (@fields_mask[f] << off) - } - op.bin_mask.map! { |v| 255 ^ v } - end + def build_opcode_bin_mask(op) + # bit = 0 if can be mutated by an field value, 1 if fixed by opcode + op.bin_mask = Array.new(op.bin.length, 0) + op.fields.each { |f, (oct, off)| + op.bin_mask[oct] |= (@fields_mask[f] << off) + } + op.bin_mask.map! { |v| 255 ^ v } + end - def build_bin_lookaside - # sets up a hash byte value => list of opcodes that may match - # opcode.bin_mask is built here - lookaside = Array.new(256) { [] } - opcode_list.each { |op| - build_opcode_bin_mask op - b = op.bin[0] - msk = op.bin_mask[0] - next @unknown_opcode = op if not b - for i in b..(b | (255^msk)) - lookaside[i] << op if i & msk == b & msk - end - } - lookaside - end + def build_bin_lookaside + # sets up a hash byte value => list of opcodes that may match + # opcode.bin_mask is built here + lookaside = Array.new(256) { [] } + opcode_list.each { |op| + build_opcode_bin_mask op + b = op.bin[0] + msk = op.bin_mask[0] + next @unknown_opcode = op if not b + for i in b..(b | (255^msk)) + lookaside[i] << op if i & msk == b & msk + end + } + lookaside + end - def decode_prefix(instr, byte) - case byte - when 0xDD; instr.prefix = 0xDD - when 0xFD; instr.prefix = 0xFD - # implicit 'else return false' - end - end + def decode_prefix(instr, byte) + case byte + when 0xDD; instr.prefix = 0xDD + when 0xFD; instr.prefix = 0xFD + # implicit 'else return false' + end + end - # tries to find the opcode encoded at edata.ptr - # if no match, tries to match a prefix (update di.instruction.prefix) - # on match, edata.ptr points to the first byte of the opcode (after prefixes) - def decode_findopcode(edata) - di = DecodedInstruction.new self - while edata.ptr < edata.data.length - byte = edata.data[edata.ptr] - byte = byte.unpack('C').first if byte.kind_of?(::String) - return di if di.opcode = @bin_lookaside[byte].find { |op| - # fetch the relevant bytes from edata - bseq = edata.data[edata.ptr, op.bin.length].unpack('C*') - # check against full opcode mask - op.bin.zip(bseq, op.bin_mask).all? { |b1, b2, m| b2 and ((b1 & m) == (b2 & m)) } - } + # tries to find the opcode encoded at edata.ptr + # if no match, tries to match a prefix (update di.instruction.prefix) + # on match, edata.ptr points to the first byte of the opcode (after prefixes) + def decode_findopcode(edata) + di = DecodedInstruction.new self + while edata.ptr < edata.data.length + byte = edata.data[edata.ptr] + byte = byte.unpack('C').first if byte.kind_of?(::String) + return di if di.opcode = @bin_lookaside[byte].find { |op| + # fetch the relevant bytes from edata + bseq = edata.data[edata.ptr, op.bin.length].unpack('C*') + # check against full opcode mask + op.bin.zip(bseq, op.bin_mask).all? { |b1, b2, m| b2 and ((b1 & m) == (b2 & m)) } + } - if decode_prefix(di.instruction, edata.get_byte) - nb = edata.data[edata.ptr] - nb = nb.unpack('C').first if nb.kind_of?(::String) - case nb - when 0xCB - # DD CB [] - di.instruction.prefix |= edata.get_byte << 8 - di.bin_length += 2 - opc = edata.data[edata.ptr+1] - opc = opc.unpack('C').first if opc.kind_of?(::String) - bseq = [0xCB, opc] - # XXX in decode_instr_op, byte[0] is the immediate displacement instead of cb - return di if di.opcode = @bin_lookaside[nb].find { |op| - op.bin.zip(bseq, op.bin_mask).all? { |b1, b2, m| b2 and ((b1 & m) == (b2 & m)) } - } - when 0xED - di.instruction.prefix = nil - end - else - di.opcode = @unknown_opcode - return di - end - di.bin_length += 1 - end - end + if decode_prefix(di.instruction, edata.get_byte) + nb = edata.data[edata.ptr] + nb = nb.unpack('C').first if nb.kind_of?(::String) + case nb + when 0xCB + # DD CB [] + di.instruction.prefix |= edata.get_byte << 8 + di.bin_length += 2 + opc = edata.data[edata.ptr+1] + opc = opc.unpack('C').first if opc.kind_of?(::String) + bseq = [0xCB, opc] + # XXX in decode_instr_op, byte[0] is the immediate displacement instead of cb + return di if di.opcode = @bin_lookaside[nb].find { |op| + op.bin.zip(bseq, op.bin_mask).all? { |b1, b2, m| b2 and ((b1 & m) == (b2 & m)) } + } + when 0xED + di.instruction.prefix = nil + end + else + di.opcode = @unknown_opcode + return di + end + di.bin_length += 1 + end + end - def decode_instr_op(edata, di) - before_ptr = edata.ptr - op = di.opcode - di.instruction.opname = op.name - bseq = edata.read(op.bin.length).unpack('C*') # decode_findopcode ensures that data >= op.length - pfx = di.instruction.prefix + def decode_instr_op(edata, di) + before_ptr = edata.ptr + op = di.opcode + di.instruction.opname = op.name + bseq = edata.read(op.bin.length).unpack('C*') # decode_findopcode ensures that data >= op.length + pfx = di.instruction.prefix - field_val = lambda { |f| - if fld = op.fields[f] - (bseq[fld[0]] >> fld[1]) & @fields_mask[f] - end - } + field_val = lambda { |f| + if fld = op.fields[f] + (bseq[fld[0]] >> fld[1]) & @fields_mask[f] + end + } - op.args.each { |a| - di.instruction.args << case a - when :i8, :u8, :i16, :u16; Expression[edata.decode_imm(a, @endianness)] - when :iy; Expression[field_val[a]] - when :iy8; Expression[field_val[a]*8] + op.args.each { |a| + di.instruction.args << case a + when :i8, :u8, :i16, :u16; Expression[edata.decode_imm(a, @endianness)] + when :iy; Expression[field_val[a]] + when :iy8; Expression[field_val[a]*8] - when :rp - v = field_val[a] - Reg.new(16, v) - when :rp2 - v = field_val[a] - v = 4 if v == 3 - Reg.new(16, v) - when :ry, :rz - v = field_val[a] - if v == 6 - Memref.new(Reg.from_str('HL'), nil, 1) - else - Reg.new(8, v) - end + when :rp + v = field_val[a] + Reg.new(16, v) + when :rp2 + v = field_val[a] + v = 4 if v == 3 + Reg.new(16, v) + when :ry, :rz + v = field_val[a] + if v == 6 + Memref.new(Reg.from_str('HL'), nil, 1) + else + Reg.new(8, v) + end - when :r_a; Reg.from_str('A') - when :r_af; Reg.from_str('AF') - when :r_hl; Reg.from_str('HL') - when :r_de; Reg.from_str('DE') - when :r_sp; Reg.from_str('SP') - when :r_i; Reg.from_str('I') + when :r_a; Reg.from_str('A') + when :r_af; Reg.from_str('AF') + when :r_hl; Reg.from_str('HL') + when :r_de; Reg.from_str('DE') + when :r_sp; Reg.from_str('SP') + when :r_i; Reg.from_str('I') - when :m16; Memref.new(nil, edata.decode_imm(:u16, @endianness), nil) - when :m_bc; Memref.new(Reg.from_str('BC'), nil, 1) - when :m_de; Memref.new(Reg.from_str('DE'), nil, 1) - when :m_sp; Memref.new(Reg.from_str('SP'), nil, 2) - when :m_hl; Memref.new(Reg.from_str('HL'), nil, 1) - when :mf8; Memref.new(nil, 0xff00 + edata.decode_imm(:u8, @endianness), 1) - when :mfc; Memref.new(Reg.from_str('C'), 0xff00, 1) + when :m16; Memref.new(nil, edata.decode_imm(:u16, @endianness), nil) + when :m_bc; Memref.new(Reg.from_str('BC'), nil, 1) + when :m_de; Memref.new(Reg.from_str('DE'), nil, 1) + when :m_sp; Memref.new(Reg.from_str('SP'), nil, 2) + when :m_hl; Memref.new(Reg.from_str('HL'), nil, 1) + when :mf8; Memref.new(nil, 0xff00 + edata.decode_imm(:u8, @endianness), 1) + when :mfc; Memref.new(Reg.from_str('C'), 0xff00, 1) - else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" - end - } + else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" + end + } - case pfx - when 0xDD - when 0xFD - when 0xCBDD - when 0xCBFD - end + case pfx + when 0xDD + when 0xFD + when 0xCBDD + when 0xCBFD + end - di.bin_length += edata.ptr - before_ptr + di.bin_length += edata.ptr - before_ptr - return if edata.ptr > edata.length + return if edata.ptr > edata.length - di - end + di + end - # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding } - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end - def backtrace_binding=(b) @backtrace_binding = b end + # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding } + def backtrace_binding + @backtrace_binding ||= init_backtrace_binding + end + def backtrace_binding=(b) @backtrace_binding = b end - # populate the @backtrace_binding hash with default values - def init_backtrace_binding - @backtrace_binding ||= {} + # populate the @backtrace_binding hash with default values + def init_backtrace_binding + @backtrace_binding ||= {} - mask = 0xffff + mask = 0xffff - opcode_list.map { |ol| ol.basename }.uniq.sort.each { |op| - binding = case op - when 'ld'; lambda { |di, a0, a1, *aa| a2 = aa[0] ; a2 ? { a0 => Expression[a1, :+, a2] } : { a0 => Expression[a1] } } - when 'ldi'; lambda { |di, a0, a1| hl = (a0 == :a ? a1 : a0) ; { a0 => Expression[a1], hl => Expression[hl, :+, 1] } } - when 'ldd'; lambda { |di, a0, a1| hl = (a0 == :a ? a1 : a0) ; { a0 => Expression[a1], hl => Expression[hl, :-, 1] } } - when 'add', 'adc', 'sub', 'sbc', 'and', 'xor', 'or' - lambda { |di, a0, a1| - e_op = { 'add' => :+, 'adc' => :+, 'sub' => :-, 'sbc' => :-, 'and' => :&, 'xor' => :^, 'or' => :| }[op] - ret = Expression[a0, e_op, a1] - ret = Expression[ret, e_op, :flag_c] if op == 'adc' or op == 'sbc' - ret = Expression[ret.reduce] if not a0.kind_of? Indirection - { a0 => ret } - } - when 'cp', 'cmp'; lambda { |di, *a| {} } - when 'inc'; lambda { |di, a0| { a0 => Expression[a0, :+, 1] } } - when 'dec'; lambda { |di, a0| { a0 => Expression[a0, :-, 1] } } - when 'not'; lambda { |di, a0| { a0 => Expression[a0, :^, mask] } } - when 'push' - lambda { |di, a0| { :sp => Expression[:sp, :-, 2], - Indirection[:sp, 2, di.address] => Expression[a0] } } - when 'pop' - lambda { |di, a0| { :sp => Expression[:sp, :+, 2], - a0 => Indirection[:sp, 2, di.address] } } - when 'call' - lambda { |di, a0| { :sp => Expression[:sp, :-, 2], - Indirection[:sp, 2, di.address] => Expression[di.next_addr] } - } - when 'ret', 'reti'; lambda { |di, *a| { :sp => Expression[:sp, :+, 2] } } - # TODO callCC, retCC ... - when 'bswap' - lambda { |di, a0| { a0 => Expression[ - [[a0, :&, 0xff00], :>>, 8], :|, - [[a0, :&, 0x00ff], :<<, 8]] } } - when 'nop', /^j/; lambda { |di, *a| {} } - end + opcode_list.map { |ol| ol.basename }.uniq.sort.each { |op| + binding = case op + when 'ld'; lambda { |di, a0, a1, *aa| a2 = aa[0] ; a2 ? { a0 => Expression[a1, :+, a2] } : { a0 => Expression[a1] } } + when 'ldi'; lambda { |di, a0, a1| hl = (a0 == :a ? a1 : a0) ; { a0 => Expression[a1], hl => Expression[hl, :+, 1] } } + when 'ldd'; lambda { |di, a0, a1| hl = (a0 == :a ? a1 : a0) ; { a0 => Expression[a1], hl => Expression[hl, :-, 1] } } + when 'add', 'adc', 'sub', 'sbc', 'and', 'xor', 'or' + lambda { |di, a0, a1| + e_op = { 'add' => :+, 'adc' => :+, 'sub' => :-, 'sbc' => :-, 'and' => :&, 'xor' => :^, 'or' => :| }[op] + ret = Expression[a0, e_op, a1] + ret = Expression[ret, e_op, :flag_c] if op == 'adc' or op == 'sbc' + ret = Expression[ret.reduce] if not a0.kind_of? Indirection + { a0 => ret } + } + when 'cp', 'cmp'; lambda { |di, *a| {} } + when 'inc'; lambda { |di, a0| { a0 => Expression[a0, :+, 1] } } + when 'dec'; lambda { |di, a0| { a0 => Expression[a0, :-, 1] } } + when 'not'; lambda { |di, a0| { a0 => Expression[a0, :^, mask] } } + when 'push' + lambda { |di, a0| { :sp => Expression[:sp, :-, 2], + Indirection[:sp, 2, di.address] => Expression[a0] } } + when 'pop' + lambda { |di, a0| { :sp => Expression[:sp, :+, 2], + a0 => Indirection[:sp, 2, di.address] } } + when 'call' + lambda { |di, a0| { :sp => Expression[:sp, :-, 2], + Indirection[:sp, 2, di.address] => Expression[di.next_addr] } + } + when 'ret', 'reti'; lambda { |di, *a| { :sp => Expression[:sp, :+, 2] } } + # TODO callCC, retCC ... + when 'bswap' + lambda { |di, a0| { a0 => Expression[ + [[a0, :&, 0xff00], :>>, 8], :|, + [[a0, :&, 0x00ff], :<<, 8]] } } + when 'nop', /^j/; lambda { |di, *a| {} } + end - # TODO flags ? + # TODO flags ? - @backtrace_binding[op] ||= binding if binding - } - @backtrace_binding - end + @backtrace_binding[op] ||= binding if binding + } + @backtrace_binding + end - def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when Memref, Reg; arg.symbolic(di) - else arg - end - } + def get_backtrace_binding(di) + a = di.instruction.args.map { |arg| + case arg + when Memref, Reg; arg.symbolic(di) + else arg + end + } - if binding = backtrace_binding[di.opcode.basename] - binding[di, *a] - else - puts "unhandled instruction to backtrace: #{di}" if $VERBOSE - # assume nothing except the 1st arg is modified - case a[0] - when Indirection, Symbol; { a[0] => Expression::Unknown } - when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} - else {} - end.update(:incomplete_binding => Expression[1]) - end - end + if binding = backtrace_binding[di.opcode.basename] + binding[di, *a] + else + puts "unhandled instruction to backtrace: #{di}" if $VERBOSE + # assume nothing except the 1st arg is modified + case a[0] + when Indirection, Symbol; { a[0] => Expression::Unknown } + when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} + else {} + end.update(:incomplete_binding => Expression[1]) + end + end - # patch a forward binding from the backtrace binding - def fix_fwdemu_binding(di, fbd) - case di.opcode.name - when 'push', 'call'; fbd[Indirection[[:sp, :-, 2], 2]] = fbd.delete(Indirection[:sp, 2]) - end - fbd - end + # patch a forward binding from the backtrace binding + def fix_fwdemu_binding(di, fbd) + case di.opcode.name + when 'push', 'call'; fbd[Indirection[[:sp, :-, 2], 2]] = fbd.delete(Indirection[:sp, 2]) + end + fbd + end - def get_xrefs_x(dasm, di) - return [] if not di.opcode.props[:setip] + def get_xrefs_x(dasm, di) + return [] if not di.opcode.props[:setip] - case di.opcode.basename - when 'ret', 'reti' - return [Indirection[:sp, 2, di.address]] - when /^jr|^djnz/ - # jmp/call are absolute addrs, only jr/djnz are relative - # also, the asm source should display the relative offset - return [Expression[[di.address, :+, di.bin_length], :+, di.instruction.args.first]] - end + case di.opcode.basename + when 'ret', 'reti' + return [Indirection[:sp, 2, di.address]] + when /^jr|^djnz/ + # jmp/call are absolute addrs, only jr/djnz are relative + # also, the asm source should display the relative offset + return [Expression[[di.address, :+, di.bin_length], :+, di.instruction.args.first]] + end - case tg = di.instruction.args.first - when Memref; [Expression[tg.symbolic(di)]] - when Reg; [Expression[tg.symbolic(di)]] - when Expression, ::Integer; [Expression[tg]] - else - puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG - [] - end - end + case tg = di.instruction.args.first + when Memref; [Expression[tg.symbolic(di)]] + when Reg; [Expression[tg.symbolic(di)]] + when Expression, ::Integer; [Expression[tg]] + else + puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG + [] + end + end - # checks if expr is a valid return expression matching the :saveip instruction - def backtrace_is_function_return(expr, di=nil) - expr = Expression[expr].reduce_rec - expr.kind_of?(Indirection) and expr.len == 2 and expr.target == Expression[:sp] - end + # checks if expr is a valid return expression matching the :saveip instruction + def backtrace_is_function_return(expr, di=nil) + expr = Expression[expr].reduce_rec + expr.kind_of?(Indirection) and expr.len == 2 and expr.target == Expression[:sp] + end - # updates the function backtrace_binding - # if the function is big and no specific register is given, do nothing (the binding will be lazily updated later, on demand) - def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) - b = f.backtrace_binding + # updates the function backtrace_binding + # if the function is big and no specific register is given, do nothing (the binding will be lazily updated later, on demand) + def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) + b = f.backtrace_binding - bt_val = lambda { |r| - next if not retaddrlist - b[r] = Expression::Unknown - bt = [] - retaddrlist.each { |retaddr| - bt |= dasm.backtrace(Expression[r], retaddr, :include_start => true, - :snapshot_addr => faddr, :origin => retaddr) - } - if bt.length != 1 - b[r] = Expression::Unknown - else - b[r] = bt.first - end - } + bt_val = lambda { |r| + next if not retaddrlist + b[r] = Expression::Unknown + bt = [] + retaddrlist.each { |retaddr| + bt |= dasm.backtrace(Expression[r], retaddr, :include_start => true, + :snapshot_addr => faddr, :origin => retaddr) + } + if bt.length != 1 + b[r] = Expression::Unknown + else + b[r] = bt.first + end + } - if not wantregs.empty? - wantregs.each(&bt_val) - else - bt_val[:sp] - end + if not wantregs.empty? + wantregs.each(&bt_val) + else + bt_val[:sp] + end - b - end + b + end - # returns true if the expression is an address on the stack - def backtrace_is_stack_address(expr) - Expression[expr].expr_externals.include?(:sp) - end + # returns true if the expression is an address on the stack + def backtrace_is_stack_address(expr) + Expression[expr].expr_externals.include?(:sp) + end - # updates an instruction's argument replacing an expression with another (eg label renamed) - def replace_instr_arg_immediate(i, old, new) - i.args.map! { |a| - case a - when Expression; a == old ? new : Expression[a.bind(old => new).reduce] - when Memref - a.offset = (a.offset == old ? new : Expression[a.offset.bind(old => new).reduce]) if a.offset - a - else a - end - } - end + # updates an instruction's argument replacing an expression with another (eg label renamed) + def replace_instr_arg_immediate(i, old, new) + i.args.map! { |a| + case a + when Expression; a == old ? new : Expression[a.bind(old => new).reduce] + when Memref + a.offset = (a.offset == old ? new : Expression[a.offset.bind(old => new).reduce]) if a.offset + a + else a + end + } + end end end diff --git a/lib/metasm/metasm/cpu/z80/main.rb b/lib/metasm/metasm/cpu/z80/main.rb index 516c919e8b..e6bc14b08b 100644 --- a/lib/metasm/metasm/cpu/z80/main.rb +++ b/lib/metasm/metasm/cpu/z80/main.rb @@ -8,60 +8,60 @@ require 'metasm/main' module Metasm class Z80 < CPU - class Reg - class << self - attr_accessor :s_to_i, :i_to_s - end - @i_to_s = { 8 => { 0 => 'B', 1 => 'C', 2 => 'D', 3 => 'E', - 4 => 'H', 5 => 'L', 7 => 'A' }, - 16 => { 0 => 'BC', 1 => 'DE', 2 => 'HL', 3 => 'SP', - 4 => 'AF' } } # AF is 3 too - @s_to_i = @i_to_s.inject({}) { |h, (sz, rh)| - h.update rh.inject({}) { |hh, (i, n)| - hh.update n => [sz, i] } } + class Reg + class << self + attr_accessor :s_to_i, :i_to_s + end + @i_to_s = { 8 => { 0 => 'B', 1 => 'C', 2 => 'D', 3 => 'E', + 4 => 'H', 5 => 'L', 7 => 'A' }, + 16 => { 0 => 'BC', 1 => 'DE', 2 => 'HL', 3 => 'SP', + 4 => 'AF' } } # AF is 3 too + @s_to_i = @i_to_s.inject({}) { |h, (sz, rh)| + h.update rh.inject({}) { |hh, (i, n)| + hh.update n => [sz, i] } } - attr_accessor :sz, :i - def initialize(sz, i) - @sz = sz - @i = i - end + attr_accessor :sz, :i + def initialize(sz, i) + @sz = sz + @i = i + end - def symbolic(orig=nil) ; to_s.to_sym ; end + def symbolic(orig=nil) ; to_s.to_sym ; end - def self.from_str(s) - raise "Bad name #{s.inspect}" if not x = @s_to_i[s] - new(*x) - end - end + def self.from_str(s) + raise "Bad name #{s.inspect}" if not x = @s_to_i[s] + new(*x) + end + end - class Memref - attr_accessor :base, :offset, :sz - def initialize(base, offset, sz=nil) - @base = base - offset = Expression[offset] if offset - @offset = offset - @sz = sz - end + class Memref + attr_accessor :base, :offset, :sz + def initialize(base, offset, sz=nil) + @base = base + offset = Expression[offset] if offset + @offset = offset + @sz = sz + end - def symbolic(orig) - p = nil - p = Expression[p, :+, @base.symbolic] if base - p = Expression[p, :+, @offset] if offset - Indirection[p.reduce, @sz, orig] - end - end + def symbolic(orig) + p = nil + p = Expression[p, :+, @base.symbolic] if base + p = Expression[p, :+, @offset] if offset + Indirection[p.reduce, @sz, orig] + end + end - def initialize(family = :latest) - super() - @endianness = :little - @size = 16 - @family = family - end + def initialize(family = :latest) + super() + @endianness = :little + @size = 16 + @family = family + end - def init_opcode_list - send("init_#@family") - @opcode_list - end + def init_opcode_list + send("init_#@family") + @opcode_list + end end end diff --git a/lib/metasm/metasm/cpu/z80/opcodes.rb b/lib/metasm/metasm/cpu/z80/opcodes.rb index 055de44249..9377aaba6d 100644 --- a/lib/metasm/metasm/cpu/z80/opcodes.rb +++ b/lib/metasm/metasm/cpu/z80/opcodes.rb @@ -8,217 +8,217 @@ require 'metasm/cpu/z80/main' module Metasm class Z80 - def addop(name, bin, *args) - o = Opcode.new name, bin - args.each { |a| - o.args << a if @fields_mask[a] or @valid_args[a] - o.props[a] = true if @valid_props[a] - o.fields[a] = [bin.length-1, @fields_shift[a]] if @fields_mask[a] - raise "wtf #{a.inspect}" unless @valid_args[a] or @valid_props[a] or @fields_mask[a] - } - @opcode_list << o - end + def addop(name, bin, *args) + o = Opcode.new name, bin + args.each { |a| + o.args << a if @fields_mask[a] or @valid_args[a] + o.props[a] = true if @valid_props[a] + o.fields[a] = [bin.length-1, @fields_shift[a]] if @fields_mask[a] + raise "wtf #{a.inspect}" unless @valid_args[a] or @valid_props[a] or @fields_mask[a] + } + @opcode_list << o + end - def addop_macrocc(name, bin, *args) - %w[nz z nc c po pe p m].each_with_index { |cc, i| - dbin = bin.dup - dbin[0] |= i << 3 - addop name + cc, dbin, *args - } - end + def addop_macrocc(name, bin, *args) + %w[nz z nc c po pe p m].each_with_index { |cc, i| + dbin = bin.dup + dbin[0] |= i << 3 + addop name + cc, dbin, *args + } + end - # data from http://www.z80.info/decoding.htm - def init_z80_common - @opcode_list = [] - @valid_args.update [:i8, :u8, :i16, :u16, :m16, - :r_a, :r_af, :r_hl, :r_de, :r_sp, :r_i, - :m_bc, :m_de, :m_sp, :m_hl, :mf8, :mfc - ].inject({}) { |h, v| h.update v => true } - @fields_mask.update :rz => 7, :ry => 7, :rp => 3, :rp2 => 3, :iy => 7, :iy8 => 7 - @fields_shift.update :rz => 0, :ry => 3, :rp => 4, :rp2 => 4, :iy => 3, :iy8 => 3 + # data from http://www.z80.info/decoding.htm + def init_z80_common + @opcode_list = [] + @valid_args.update [:i8, :u8, :i16, :u16, :m16, + :r_a, :r_af, :r_hl, :r_de, :r_sp, :r_i, + :m_bc, :m_de, :m_sp, :m_hl, :mf8, :mfc + ].inject({}) { |h, v| h.update v => true } + @fields_mask.update :rz => 7, :ry => 7, :rp => 3, :rp2 => 3, :iy => 7, :iy8 => 7 + @fields_shift.update :rz => 0, :ry => 3, :rp => 4, :rp2 => 4, :iy => 3, :iy8 => 3 - # some opcodes are in init_z80 when they are not part of the GB ABI - addop 'nop', [0b00_000_000] - addop 'jr', [0b00_011_000], :setip, :stopexec, :i8 - %w[nz z nc c].each_with_index { |cc, i| - addop 'jr' + cc, [0b00_100_000 | (i << 3)], :setip, :i8 - } - addop 'ld', [0b00_000_001], :rp, :i16 - addop 'add', [0b00_001_001], :r_hl, :rp + # some opcodes are in init_z80 when they are not part of the GB ABI + addop 'nop', [0b00_000_000] + addop 'jr', [0b00_011_000], :setip, :stopexec, :i8 + %w[nz z nc c].each_with_index { |cc, i| + addop 'jr' + cc, [0b00_100_000 | (i << 3)], :setip, :i8 + } + addop 'ld', [0b00_000_001], :rp, :i16 + addop 'add', [0b00_001_001], :r_hl, :rp - addop 'ld', [0b00_000_010], :m_bc, :r_a - addop 'ld', [0b00_001_010], :r_a, :m_bc - addop 'ld', [0b00_010_010], :m_de, :r_a - addop 'ld', [0b00_011_010], :r_a, :m_de + addop 'ld', [0b00_000_010], :m_bc, :r_a + addop 'ld', [0b00_001_010], :r_a, :m_bc + addop 'ld', [0b00_010_010], :m_de, :r_a + addop 'ld', [0b00_011_010], :r_a, :m_de - addop 'inc', [0b00_000_011], :rp - addop 'dec', [0b00_001_011], :rp - addop 'inc', [0b00_000_100], :ry - addop 'dec', [0b00_000_101], :ry - addop 'ld', [0b00_000_110], :ry, :i8 + addop 'inc', [0b00_000_011], :rp + addop 'dec', [0b00_001_011], :rp + addop 'inc', [0b00_000_100], :ry + addop 'dec', [0b00_000_101], :ry + addop 'ld', [0b00_000_110], :ry, :i8 - addop 'rlca', [0b00_000_111] # rotate - addop 'rrca', [0b00_001_111] - addop 'rla', [0b00_010_111] - addop 'rra', [0b00_011_111] + addop 'rlca', [0b00_000_111] # rotate + addop 'rrca', [0b00_001_111] + addop 'rla', [0b00_010_111] + addop 'rra', [0b00_011_111] - addop 'daa', [0b00_100_111] - addop 'cpl', [0b00_101_111] - addop 'scf', [0b00_110_111] - addop 'ccf', [0b00_111_111] + addop 'daa', [0b00_100_111] + addop 'cpl', [0b00_101_111] + addop 'scf', [0b00_110_111] + addop 'ccf', [0b00_111_111] - addop 'halt', [0b01_110_110] # ld (HL), (HL) - addop 'ld', [0b01_000_000], :ry, :rz + addop 'halt', [0b01_110_110] # ld (HL), (HL) + addop 'ld', [0b01_000_000], :ry, :rz - addop 'add', [0b10_000_000], :r_a, :rz - addop 'adc', [0b10_001_000], :r_a, :rz - addop 'sub', [0b10_010_000], :r_a, :rz - addop 'sbc', [0b10_011_000], :r_a, :rz - addop 'and', [0b10_100_000], :r_a, :rz - addop 'xor', [0b10_101_000], :r_a, :rz - addop 'or', [0b10_110_000], :r_a, :rz - addop 'cmp', [0b10_111_000], :r_a, :rz # alias cp - addop 'cp', [0b10_111_000], :r_a, :rz # compare + addop 'add', [0b10_000_000], :r_a, :rz + addop 'adc', [0b10_001_000], :r_a, :rz + addop 'sub', [0b10_010_000], :r_a, :rz + addop 'sbc', [0b10_011_000], :r_a, :rz + addop 'and', [0b10_100_000], :r_a, :rz + addop 'xor', [0b10_101_000], :r_a, :rz + addop 'or', [0b10_110_000], :r_a, :rz + addop 'cmp', [0b10_111_000], :r_a, :rz # alias cp + addop 'cp', [0b10_111_000], :r_a, :rz # compare - addop_macrocc 'ret', [0b11_000_000], :setip - addop 'pop', [0b11_000_001], :rp2 - addop 'ret', [0b11_001_001], :stopexec, :setip - addop 'jmp', [0b11_101_001], :r_hl, :setip, :stopexec # alias jp - addop 'jp', [0b11_101_001], :r_hl, :setip, :stopexec - addop 'ld', [0b11_111_001], :r_sp, :r_hl - addop_macrocc 'j', [0b11_000_010], :setip, :u16 # alias jp - addop_macrocc 'jp', [0b11_000_010], :setip, :u16 - addop 'jmp', [0b11_000_011], :setip, :stopexec, :u16 # alias jp - addop 'jp', [0b11_000_011], :setip, :stopexec, :u16 + addop_macrocc 'ret', [0b11_000_000], :setip + addop 'pop', [0b11_000_001], :rp2 + addop 'ret', [0b11_001_001], :stopexec, :setip + addop 'jmp', [0b11_101_001], :r_hl, :setip, :stopexec # alias jp + addop 'jp', [0b11_101_001], :r_hl, :setip, :stopexec + addop 'ld', [0b11_111_001], :r_sp, :r_hl + addop_macrocc 'j', [0b11_000_010], :setip, :u16 # alias jp + addop_macrocc 'jp', [0b11_000_010], :setip, :u16 + addop 'jmp', [0b11_000_011], :setip, :stopexec, :u16 # alias jp + addop 'jp', [0b11_000_011], :setip, :stopexec, :u16 - addop 'di', [0b11_110_011] # disable interrupts - addop 'ei', [0b11_111_011] - addop_macrocc 'call', [0b11_000_100], :u16, :setip, :saveip - addop 'push', [0b11_000_101], :rp2 - addop 'call', [0b11_001_101], :u16, :setip, :saveip, :stopexec + addop 'di', [0b11_110_011] # disable interrupts + addop 'ei', [0b11_111_011] + addop_macrocc 'call', [0b11_000_100], :u16, :setip, :saveip + addop 'push', [0b11_000_101], :rp2 + addop 'call', [0b11_001_101], :u16, :setip, :saveip, :stopexec - addop 'add', [0b11_000_110], :r_a, :i8 - addop 'adc', [0b11_001_110], :r_a, :i8 - addop 'sub', [0b11_010_110], :r_a, :i8 - addop 'sbc', [0b11_011_110], :r_a, :i8 - addop 'and', [0b11_100_110], :r_a, :i8 - addop 'xor', [0b11_101_110], :r_a, :i8 - addop 'or', [0b11_110_110], :r_a, :i8 - addop 'cp', [0b11_111_110], :r_a, :i8 + addop 'add', [0b11_000_110], :r_a, :i8 + addop 'adc', [0b11_001_110], :r_a, :i8 + addop 'sub', [0b11_010_110], :r_a, :i8 + addop 'sbc', [0b11_011_110], :r_a, :i8 + addop 'and', [0b11_100_110], :r_a, :i8 + addop 'xor', [0b11_101_110], :r_a, :i8 + addop 'or', [0b11_110_110], :r_a, :i8 + addop 'cp', [0b11_111_110], :r_a, :i8 - addop 'rst', [0b11_000_111], :iy8 # call off in page 0 + addop 'rst', [0b11_000_111], :iy8 # call off in page 0 - addop 'rlc', [0xCB, 0b00_000_000], :rz # rotate - addop 'rrc', [0xCB, 0b00_001_000], :rz - addop 'rl', [0xCB, 0b00_010_000], :rz - addop 'rr', [0xCB, 0b00_011_000], :rz - addop 'sla', [0xCB, 0b00_100_000], :rz # shift - addop 'sra', [0xCB, 0b00_101_000], :rz - addop 'srl', [0xCB, 0b00_111_000], :rz - addop 'bit', [0xCB, 0b01_000_000], :iy, :rz # bit test - addop 'res', [0xCB, 0b10_000_000], :iy, :rz # bit reset - addop 'set', [0xCB, 0b11_000_000], :iy, :rz # bit set - end + addop 'rlc', [0xCB, 0b00_000_000], :rz # rotate + addop 'rrc', [0xCB, 0b00_001_000], :rz + addop 'rl', [0xCB, 0b00_010_000], :rz + addop 'rr', [0xCB, 0b00_011_000], :rz + addop 'sla', [0xCB, 0b00_100_000], :rz # shift + addop 'sra', [0xCB, 0b00_101_000], :rz + addop 'srl', [0xCB, 0b00_111_000], :rz + addop 'bit', [0xCB, 0b01_000_000], :iy, :rz # bit test + addop 'res', [0xCB, 0b10_000_000], :iy, :rz # bit reset + addop 'set', [0xCB, 0b11_000_000], :iy, :rz # bit set + end - # standard z80 - def init_z80 - init_z80_common + # standard z80 + def init_z80 + init_z80_common - addop 'ex', [0b00_001_000], :r_af # XXX really ex AF, AF' ... - addop 'djnz', [0b00_010_000], :setip, :i8 + addop 'ex', [0b00_001_000], :r_af # XXX really ex AF, AF' ... + addop 'djnz', [0b00_010_000], :setip, :i8 - addop 'ld', [0b00_100_010], :m16, :r_hl - addop 'ld', [0b00_101_010], :r_hl, :m16 - addop 'ld', [0b00_110_010], :m16, :r_a - addop 'ld', [0b00_111_010], :r_a, :m16 + addop 'ld', [0b00_100_010], :m16, :r_hl + addop 'ld', [0b00_101_010], :r_hl, :m16 + addop 'ld', [0b00_110_010], :m16, :r_a + addop 'ld', [0b00_111_010], :r_a, :m16 - addop 'exx', [0b11_011_001] - addop 'out', [0b11_010_011], :i8, :r_a - addop 'in', [0b11_011_011], :r_a, :i8 + addop 'exx', [0b11_011_001] + addop 'out', [0b11_010_011], :i8, :r_a + addop 'in', [0b11_011_011], :r_a, :i8 - addop 'ex', [0b11_100_011], :m_sp, :r_hl - addop 'ex', [0b11_101_011], :r_de, :r_hl + addop 'ex', [0b11_100_011], :m_sp, :r_hl + addop 'ex', [0b11_101_011], :r_de, :r_hl - addop 'sll', [0xCB, 0b00_110_000], :rz + addop 'sll', [0xCB, 0b00_110_000], :rz - addop 'in', [0xED, 0b01_110_000], :u16 - addop 'in', [0xED, 0b01_000_000], :ry, :u16 - addop 'out', [0xED, 0b01_110_001], :u16 - addop 'out', [0xED, 0b01_000_001], :u16, :ry - addop 'sbc', [0xED, 0b01_000_010], :r_hl, :rp - addop 'adc', [0xED, 0b01_001_010], :r_hl, :rp - addop 'ld', [0xED, 0b01_000_011], :m16, :rp - addop 'ld', [0xED, 0b01_001_011], :rp, :m16 - addop 'neg', [0xED, 0b01_000_100], :r_a, :iy # dummy int field - addop 'retn', [0xED, 0b01_000_101], :stopexec # dummy int != 1 ? (1 = reti) - addop 'reti', [0xED, 0b01_001_101], :stopexec, :setip - addop 'im', [0xED, 0b01_000_110], :iy - addop 'ld', [0xED, 0b01_000_111], :r_i, :r_a - addop 'ld', [0xED, 0b01_001_111], :r_r, :r_a - addop 'ld', [0xED, 0b01_010_111], :r_a, :r_i - addop 'ld', [0xED, 0b01_011_111], :r_a, :r_r - addop 'rrd', [0xED, 0b01_100_111] - addop 'rld', [0xED, 0b01_101_111] + addop 'in', [0xED, 0b01_110_000], :u16 + addop 'in', [0xED, 0b01_000_000], :ry, :u16 + addop 'out', [0xED, 0b01_110_001], :u16 + addop 'out', [0xED, 0b01_000_001], :u16, :ry + addop 'sbc', [0xED, 0b01_000_010], :r_hl, :rp + addop 'adc', [0xED, 0b01_001_010], :r_hl, :rp + addop 'ld', [0xED, 0b01_000_011], :m16, :rp + addop 'ld', [0xED, 0b01_001_011], :rp, :m16 + addop 'neg', [0xED, 0b01_000_100], :r_a, :iy # dummy int field + addop 'retn', [0xED, 0b01_000_101], :stopexec # dummy int != 1 ? (1 = reti) + addop 'reti', [0xED, 0b01_001_101], :stopexec, :setip + addop 'im', [0xED, 0b01_000_110], :iy + addop 'ld', [0xED, 0b01_000_111], :r_i, :r_a + addop 'ld', [0xED, 0b01_001_111], :r_r, :r_a + addop 'ld', [0xED, 0b01_010_111], :r_a, :r_i + addop 'ld', [0xED, 0b01_011_111], :r_a, :r_r + addop 'rrd', [0xED, 0b01_100_111] + addop 'rld', [0xED, 0b01_101_111] - addop 'ldi', [0xED, 0b10_100_000] - addop 'ldd', [0xED, 0b10_101_000] - addop 'ldir', [0xED, 0b10_110_000] - addop 'lddr', [0xED, 0b10_111_000] - addop 'cpi', [0xED, 0b10_100_001] - addop 'cpd', [0xED, 0b10_101_001] - addop 'cpir', [0xED, 0b10_110_001] - addop 'cpdr', [0xED, 0b10_111_001] - addop 'ini', [0xED, 0b10_100_010] - addop 'ind', [0xED, 0b10_101_010] - addop 'inir', [0xED, 0b10_110_010] - addop 'indr', [0xED, 0b10_111_010] - addop 'outi', [0xED, 0b10_100_011] - addop 'outd', [0xED, 0b10_101_011] - addop 'otir', [0xED, 0b10_110_011] - addop 'otdr', [0xED, 0b10_111_011] + addop 'ldi', [0xED, 0b10_100_000] + addop 'ldd', [0xED, 0b10_101_000] + addop 'ldir', [0xED, 0b10_110_000] + addop 'lddr', [0xED, 0b10_111_000] + addop 'cpi', [0xED, 0b10_100_001] + addop 'cpd', [0xED, 0b10_101_001] + addop 'cpir', [0xED, 0b10_110_001] + addop 'cpdr', [0xED, 0b10_111_001] + addop 'ini', [0xED, 0b10_100_010] + addop 'ind', [0xED, 0b10_101_010] + addop 'inir', [0xED, 0b10_110_010] + addop 'indr', [0xED, 0b10_111_010] + addop 'outi', [0xED, 0b10_100_011] + addop 'outd', [0xED, 0b10_101_011] + addop 'otir', [0xED, 0b10_110_011] + addop 'otdr', [0xED, 0b10_111_011] - addop 'unk_ed', [0xED], :i8 + addop 'unk_ed', [0xED], :i8 - addop 'unk_nop', [], :i8 # undefined opcode = nop - @unknown_opcode = @opcode_list.last - end + addop 'unk_nop', [], :i8 # undefined opcode = nop + @unknown_opcode = @opcode_list.last + end - # gameboy processor - # from http://nocash.emubase.de/pandocs.htm#cpucomparisionwithz80 - def init_gb - init_z80_common + # gameboy processor + # from http://nocash.emubase.de/pandocs.htm#cpucomparisionwithz80 + def init_gb + init_z80_common - addop 'ld', [0x08], :m16, :r_sp - addop 'stop', [0x10] + addop 'ld', [0x08], :m16, :r_sp + addop 'stop', [0x10] - addop 'ldi', [0x22], :m_hl, :r_a # (hl++) <- a - addop 'ldi', [0x2A], :r_a, :m_hl - addop 'ldd', [0x32], :m_hl, :r_a # (hl--) <- a - addop 'ldd', [0x3A], :r_a, :m_hl + addop 'ldi', [0x22], :m_hl, :r_a # (hl++) <- a + addop 'ldi', [0x2A], :r_a, :m_hl + addop 'ldd', [0x32], :m_hl, :r_a # (hl--) <- a + addop 'ldd', [0x3A], :r_a, :m_hl - addop 'reti', [0xD9], :setip, :stopexec + addop 'reti', [0xD9], :setip, :stopexec - # override retpo/jpo - @opcode_list.delete_if { |op| op.bin[0] & 0xE5 == 0xE0 } # rm E0 E2 E8 EA F0 F2 F8 FA - addop 'ld', [0xE0], :mf8, :r_a # (0xff00 + :i8) - addop 'ld', [0xE2], :mfc, :r_a # (0xff00 + :r_c) - addop 'add', [0xE8], :r_sp, :i8 - addop 'ld', [0xEA], :m16, :r_a - addop 'ld', [0xF0], :r_a, :mf8 - addop 'ld', [0xF2], :r_a, :mfc - addop 'ld', [0xF8], :r_hl, :r_sp, :i8 # hl <- sp+:i8 - addop 'ld', [0xFA], :r_a, :m16 + # override retpo/jpo + @opcode_list.delete_if { |op| op.bin[0] & 0xE5 == 0xE0 } # rm E0 E2 E8 EA F0 F2 F8 FA + addop 'ld', [0xE0], :mf8, :r_a # (0xff00 + :i8) + addop 'ld', [0xE2], :mfc, :r_a # (0xff00 + :r_c) + addop 'add', [0xE8], :r_sp, :i8 + addop 'ld', [0xEA], :m16, :r_a + addop 'ld', [0xF0], :r_a, :mf8 + addop 'ld', [0xF2], :r_a, :mfc + addop 'ld', [0xF8], :r_hl, :r_sp, :i8 # hl <- sp+:i8 + addop 'ld', [0xFA], :r_a, :m16 - addop 'swap', [0xCB, 0x30], :rz + addop 'swap', [0xCB, 0x30], :rz - addop 'inv_dd', [0xDD], :stopexec # invalid prefixes - addop 'inv_ed', [0xED], :stopexec - addop 'inv_fd', [0xFD], :stopexec + addop 'inv_dd', [0xDD], :stopexec # invalid prefixes + addop 'inv_ed', [0xED], :stopexec + addop 'inv_fd', [0xFD], :stopexec - addop 'unk_nop', [], :i8 # undefined opcode = nop - @unknown_opcode = @opcode_list.last - end + addop 'unk_nop', [], :i8 # undefined opcode = nop + @unknown_opcode = @opcode_list.last + end - alias init_latest init_z80 + alias init_latest init_z80 end end diff --git a/lib/metasm/metasm/cpu/z80/render.rb b/lib/metasm/metasm/cpu/z80/render.rb index 51a7b2e81b..8f788a0c02 100644 --- a/lib/metasm/metasm/cpu/z80/render.rb +++ b/lib/metasm/metasm/cpu/z80/render.rb @@ -9,51 +9,51 @@ require 'metasm/render' module Metasm class Z80 - class Reg - include Renderable - def render ; [self.class.i_to_s[@sz][@i]] end - end - class Memref - include Renderable - def render - r = ['('] - r << @base if @base - r << '+' if @base and @offset - r << @offset if @offset - r << ')' - end - end + class Reg + include Renderable + def render ; [self.class.i_to_s[@sz][@i]] end + end + class Memref + include Renderable + def render + r = ['('] + r << @base if @base + r << '+' if @base and @offset + r << @offset if @offset + r << ')' + end + end - def render_instruction(i) - r = [] - r << i.opname - if not i.args.empty? - r << ' ' - i.args.each { |a_| r << a_ << ', ' } - r.pop - end - r - end + def render_instruction(i) + r = [] + r << i.opname + if not i.args.empty? + r << ' ' + i.args.each { |a_| r << a_ << ', ' } + r.pop + end + r + end - def gui_hilight_word_regexp_init - ret = {} + def gui_hilight_word_regexp_init + ret = {} - # { 'B' => 'B|BC', 'BC' => 'B|C|BC' } + # { 'B' => 'B|BC', 'BC' => 'B|C|BC' } - %w[BC DE HL].each { |w| - l0, l1 = w.split(//) - ret[l0] = "#{l0}#{l1}?" - ret[l1] = "#{l0}?#{l1}" - ret[w] = "#{l0}|#{l0}?#{l1}" - } + %w[BC DE HL].each { |w| + l0, l1 = w.split(//) + ret[l0] = "#{l0}#{l1}?" + ret[l1] = "#{l0}?#{l1}" + ret[w] = "#{l0}|#{l0}?#{l1}" + } - ret - end + ret + end - def gui_hilight_word_regexp(word) - @gui_hilight_word_hash ||= gui_hilight_word_regexp_init - @gui_hilight_word_hash[word] or super(word) - end + def gui_hilight_word_regexp(word) + @gui_hilight_word_hash ||= gui_hilight_word_regexp_init + @gui_hilight_word_hash[word] or super(word) + end end end diff --git a/lib/metasm/metasm/debug.rb b/lib/metasm/metasm/debug.rb index 26e88df103..753232646a 100644 --- a/lib/metasm/metasm/debug.rb +++ b/lib/metasm/metasm/debug.rb @@ -6,1440 +6,1440 @@ module Metasm # this class implements a high-level debugging API (abstract superclass) class Debugger - class Breakpoint - attr_accessor :address, - # context where the bp was defined - :pid, :tid, - # bool: oneshot ? - :oneshot, - # current bp state: :active, :inactive (internal use), :disabled (user-specified) - :state, - # type: type of breakpoint (:bpx = soft, :hwbp = hard, :bpm = memory) - :type, - # Expression if this is a conditionnal bp - # may be a Proc, String or Expression, evaluated every time the breakpoint hits - # if it returns 0 or false, the breakpoint is ignored - :condition, - # Proc to run if this bp has a callback - :action, - # Proc to run to emulate the overwritten instr behavior - # used to avoid unset/singlestep/re-set, more multithread friendly - # may be a DecodedInstruction for lazy initialization, see Debugger#init_bpx/has_emul_instr(bpx) - :emul_instr, - # internal data, cpu-specific (overwritten byte for a softbp, memory type/size for hwbp..) - :internal, - # reference breakpoints sharing a target implementation (same hw debug register, soft bp addr...) - # shared is an array of Breakpoints, the same Array object in all shared breakpoints - # owner is a hash key => shared (dbg.breakpoint) - # key is an identifier for the Bp class in owner (bp.address) - :hash_shared, :hash_owner, :hash_key, - # user-defined breakpoint-specific stuff - :userdata - - # append the breakpoint to hash_owner + hash_shared - def add(owner=@hash_owner) - @hash_owner = owner - @hash_key ||= @address - return add_bpm if @type == :bpm - if pv = owner[@hash_key] - @hash_shared = pv.hash_shared - @internal ||= pv.internal - @emul_instr ||= pv.emul_instr - else - owner[@hash_key] = self - @hash_shared = [] - end - @hash_shared << self - end - - # register a bpm: add references to all page start covered in @hash_owner - def add_bpm - m = @address + @internal[:len] - a = @address & -0x1000 - @hash_shared = [self] - - @internal ||= {} - @internal[:orig_prot] ||= {} - while a < m - if pv = @hash_owner[a] - if not pv.hash_shared.include?(self) - pv.hash_shared.concat @hash_shared-pv.hash_shared - @hash_shared.each { |bpm| bpm.hash_shared = pv.hash_shared } - end - @internal[:orig_prot][a] = pv.internal[:orig_prot][a] - else - @hash_owner[a] = self - end - a += 0x1000 - end - end - - # delete the breakpoint from hash_shared, and hash_owner if empty - def del - return del_bpm if @type == :bpm - @hash_shared.delete self - if @hash_shared.empty? - @hash_owner.delete @hash_key - elsif @hash_owner[@hash_key] == self - @hash_owner[@hash_key] = @hash_shared.first - end - end - - # unregister a bpm - def del_bpm - m = @address + @internal[:len] - a = @address & -0x1000 - @hash_shared.delete self - while a < m - pv = @hash_owner[a] - if pv == self - if opv = @hash_shared.find { |bpm| - bpm.address < a + 0x1000 and bpm.address + bpm.internal[:len] > a - } - @hash_owner[a] = opv - else - @hash_owner.delete a - - # split hash_shared on disjoint ranges - prev_shared = @hash_shared.find_all { |bpm| - bpm.address < a + 0x1000 and bpm.address + bpm.internal[:len] <= a - } - - prev_shared.each { |bpm| - bpm.hash_shared = prev_shared - @hash_shared.delete bpm - } - end - end - a += 0x1000 - end - end - end - - # per-process data - attr_accessor :memory, :cpu, :disassembler, :breakpoint, :breakpoint_memory, - :modulemap, :symbols, :symbols_len - # per-thread data - attr_accessor :state, :info, :breakpoint_thread, :singlestep_cb, :run_method, - :run_args, :breakpoint_cause - - # which/where per-process/thread stuff is stored - attr_accessor :pid_stuff, :tid_stuff, :pid_stuff_list, :tid_stuff_list - - # global debugger callbacks, called whenever such event occurs - attr_accessor :callback_singlestep, :callback_bpx, :callback_hwbp, :callback_bpm, - :callback_exception, :callback_newthread, :callback_endthread, - :callback_newprocess, :callback_endprocess, :callback_loadlibrary - - # global switches, specify wether to break on exception/thread event - # can be a Proc that is evaluated (arg = info parameter of the evt_func) - # trace_children is a bool to tell if we should debug subprocesses spawned - # by the target - attr_accessor :pass_all_exceptions, :ignore_newthread, :ignore_endthread, - :trace_children - - # link to the user-interface object if available - attr_accessor :gui - - # initializes the disassembler internal data - subclasses should call super() - def initialize - @pid_stuff = {} - @tid_stuff = {} - @log_proc = nil - @state = :dead - @info = '' - # stuff saved when we switch pids - @pid_stuff_list = [:memory, :cpu, :disassembler, :symbols, :symbols_len, - :modulemap, :breakpoint, :breakpoint_memory, :tid, :tid_stuff, - :dead_process] - @tid_stuff_list = [:state, :info, :breakpoint_thread, :singlestep_cb, - :run_method, :run_args, :breakpoint_cause, :dead_thread] - @callback_loadlibrary = lambda { |h| loadsyms(h[:address]) ; continue } - @callback_newprocess = lambda { |h| log "process #{@pid} attached" } - @callback_endprocess = lambda { |h| log "process #{@pid} died" } - initialize_newpid - initialize_newtid - end - - def dasm; disassembler; end - - def shortname; self.class.name.split('::').last.downcase; end - - attr_reader :pid - # change pid and associated cached data - # this will also re-load the previously selected tid for this process - def pid=(npid) - return if npid == pid - raise "invalid pid" if not check_pid(npid) - swapout_pid - @pid = npid - swapin_pid - end - alias set_pid pid= - - attr_reader :tid - def tid=(ntid) - return if ntid == tid - raise "invalid tid" if not check_tid(ntid) - swapout_tid - @tid = ntid - swapin_tid - end - alias set_tid tid= - - # creates stuff related to a new process being debugged - # includes disassembler, modulemap, symbols, breakpoints - # subclasses should check that @pid maps to a real process and raise() otherwise - # to be called with @pid/@tid set, calls initialize_memory+initialize_cpu - def initialize_newpid - return if not pid - @pid_stuff_list.each { |s| instance_variable_set("@#{s}", nil) } - - @symbols = {} - @symbols_len = {} - @modulemap = {} - @breakpoint = {} - @breakpoint_memory = {} - @tid_stuff = {} - initialize_cpu - initialize_memory - initialize_disassembler - end - - # subclasses should check that @tid maps to a real thread and raise() otherwise - def initialize_newtid - return if not tid - @tid_stuff_list.each { |s| instance_variable_set("@#{s}", nil) } - - @state = :stopped - @info = 'new' - @breakpoint_thread = {} - gui.swapin_tid if @disassembler and gui.respond_to?(:swapin_tid) - end - - # initialize the disassembler from @cpu/@memory - def initialize_disassembler - return if not @memory or not @cpu - @disassembler = Shellcode.decode(@memory, @cpu).disassembler - gui.swapin_pid if gui.respond_to?(:swapin_pid) - end - - # we're switching focus from one pid to another, save current pid data - def swapout_pid - return if not pid - swapout_tid - gui.swapout_pid if gui.respond_to?(:swapout_pid) - @pid_stuff[@pid] ||= {} - @pid_stuff_list.each { |fld| - @pid_stuff[@pid][fld] = instance_variable_get("@#{fld}") - } - end - - # we're switching focus from one tid to another, save current tid data - def swapout_tid - return if not tid - gui.swapout_tid if gui.respond_to?(:swapout_tid) - @tid_stuff[@tid] ||= {} - @tid_stuff_list.each { |fld| - @tid_stuff[@tid][fld] = instance_variable_get("@#{fld}") - } - end - - # we're switching focus from one pid to another, load current pid data - def swapin_pid - return initialize_newpid if not @pid_stuff[@pid] - - @pid_stuff_list.each { |fld| - instance_variable_set("@#{fld}", @pid_stuff[@pid][fld]) - } - swapin_tid - gui.swapin_pid if gui.respond_to?(:swapin_pid) - end - - # we're switching focus from one tid to another, load current tid data - def swapin_tid - return initialize_newtid if not @tid_stuff[@tid] - - @tid_stuff_list.each { |fld| - instance_variable_set("@#{fld}", @tid_stuff[@tid][fld]) - } - gui.swapin_tid if gui.respond_to?(:swapin_tid) - end - - # delete references to the current pid - # switch to another pid, set @state = :dead if none available - def del_pid - @pid_stuff.delete @pid - if @pid = @pid_stuff.keys.first - swapin_pid - else - @state = :dead - @info = '' - @tid = nil - end - end - - # delete references to the current thread - def del_tid - @tid_stuff.delete @tid - if @tid = @tid_stuff.keys.first - swapin_tid - else - del_tid_notid - end - end - - # wipe the whole process when no TID is left - # XXX we may have a pending evt_newthread... - def del_tid_notid - del_pid - end - - - # change the debugger to a specific pid/tid - # if given a block, run the block and then restore the original pid/tid - # pid may be an object that respond to #pid/#tid - def switch_context(npid, ntid=nil, &b) - if npid.respond_to?(:pid) - ntid ||= npid.tid - npid = npid.pid - end - oldpid = pid - oldtid = tid - set_pid npid - set_tid ntid if ntid - if b - # shortcut begin..ensure overhead - return b.call if oldpid == pid and oldtid == tid - - begin - b.call - ensure - set_pid oldpid - set_tid oldtid - end - end - end - alias set_context switch_context - - # iterate over all pids, yield in the context of this pid - def each_pid(&b) - # ensure @pid is last, so that we finish in the current context - lst = @pid_stuff.keys - [@pid] - lst << @pid - return lst if not b - lst.each { |p| - set_pid p - b.call - } - end - - # iterate over all tids of the current process, yield in its context - def each_tid(&b) - lst = @tid_stuff.keys - [@tid] - lst << @tid - return lst if not b - lst.each { |t| - set_tid t rescue next - b.call - } - end - - # iterate over all tids of all pids, yield in their context - def each_pid_tid(&b) - each_pid { each_tid { b.call } } - end - - - # create a thread/process breakpoint - # addr can be a numeric address, an Expression that is resolved, or - # a String that is parsed+resolved - # info's keys are set to the breakpoint - # standard keys are :type, :oneshot, :condition, :action - # returns the Breakpoint object - def add_bp(addr, info={}) - info[:pid] ||= @pid - # dont define :tid for bpx, otherwise on del_bp we may switch_context to this thread that may not be stopped -> cant ptrace_write - info[:tid] ||= @tid if info[:pid] == @pid and info[:type] == :hwbp - - b = Breakpoint.new - info.each { |k, v| - b.send("#{k}=", v) - } - - switch_context(b) { - addr = resolve_expr(addr) if not addr.kind_of? ::Integer - b.address = addr - - b.hash_owner ||= case b.type - when :bpm; @breakpoint_memory - when :hwbp; @breakpoint_thread - when :bpx; @breakpoint - end - # XXX bpm may hash_share with an :active, but be larger and still need enable() - b.add - - enable_bp(b) if not info[:state] - } - - b - end - - # remove a breakpoint - def del_bp(b) - disable_bp(b) - b.del - end - - # activate an inactive breakpoint - def enable_bp(b) - return if b.state == :active - if not b.hash_shared.find { |bb| bb.state == :active } - switch_context(b) { - if not b.internal - init_bpx(b) if b.type == :bpx - b.internal ||= {} - b.hash_shared.each { |bb| bb.internal ||= b.internal } - end - do_enable_bp(b) - } - end - b.state = :active - end - - # deactivate an active breakpoint - def disable_bp(b, newstate = :inactive) - return if b.state != :active - b.state = newstate - return if b.hash_shared.find { |bb| bb.state == :active } - switch_context(b) { - do_disable_bp(b) - } - end - - - # delete all breakpoints defined in the current thread - def del_all_breakpoints_thread - @breakpoint_thread.values.map { |b| b.hash_shared }.flatten.uniq.each { |b| del_bp(b) } - end - - # delete all breakpoints for the current process and all its threads - def del_all_breakpoints - each_tid { del_all_breakpoints_thread } - @breakpoint.values.map { |b| b.hash_shared }.flatten.uniq.each { |b| del_bp(b) } - @breakpoint_memory.values.uniq.map { |b| b.hash_shared }.flatten.uniq.each { |b| del_bp(b) } - end - - # calls do_enable_bpm for bpms, or @cpu.dbg_enable_bp - def do_enable_bp(b) - if b.type == :bpm; do_enable_bpm(b) - else @cpu.dbg_enable_bp(self, b) - end - end - - # calls do_disable_bpm for bpms, or @cpu.dbg_disable_bp - def do_disable_bp(b) - if b.type == :bpm; do_disable_bpm(b) - else @cpu.dbg_disable_bp(self, b) - end - end - - # called in the context of the target when a bpx is to be initialized - # may (lazily) initialize b.emul_instr for virtual singlestep - def init_bpx(b) - # dont bother setting stuff up if it is never to be used - return if b.oneshot and not b.condition - - # lazy setup of b.emul_instr: delay building emulating lambda to if/when actually needed - # we still need to disassemble now and update @disassembler, before we patch the memory for the bpx - di = init_bpx_disassemble(b.address) - b.hash_shared.each { |bb| bb.emul_instr = di } - end - - # retrieve the di at a given address, disassemble if needed - # TODO make it so this doesn't interfere with other 'real' disassembler later commands, eg disassemble() or disassemble_fast_deep() - # (right now, when they see the block already present they stop all processing) - def init_bpx_disassemble(addr) - @disassembler.disassemble_fast_block(addr) - @disassembler.di_at(addr) - end - - # checks if bp has an emul_instr - # do the lazy initialization if needed - def has_emul_instr(bp) - if bp.emul_instr.kind_of?(DecodedInstruction) - if di = bp.emul_instr and fdbd = @disassembler.get_fwdemu_binding(di, register_pc) and - fdbd.all? { |k, v| (k.kind_of?(Symbol) or k.kind_of?(Indirection)) and - k != :incomplete_binding and v != Expression::Unknown } - # setup a lambda that will mimic, using the debugger primitives, the actual execution of the instruction - bp.emul_instr = lambda { - fdbd.map { |k, v| - k = Indirection[emulinstr_resv(k.pointer), k.len] if k.kind_of?(Indirection) - [k, emulinstr_resv(v)] - }.each { |k, v| - if k.to_s =~ /flags?_(.+)/i - f = $1.downcase.to_sym - set_flag_value(f, v) - elsif k.kind_of?(Symbol) - set_reg_value(k, v) - elsif k.kind_of?(Indirection) - memory_write_int(k.pointer, v, k.len) - end - } - } - bp.hash_shared.each { |bb| bb.emul_instr = bp.emul_instr } - else - bp.hash_shared.each { |bb| bb.emul_instr = nil } - end - end - - bp.emul_instr - end - - def emulinstr_resv(e) - r = e - flags = Expression[r].externals.uniq.find_all { |f| f.to_s =~ /flags?_(.+)/i } - if flags.first - bd = {} - flags.each { |f| - f.to_s =~ /flags?_(.+)/i - bd[f] = get_flag_value($1.downcase.to_sym) - } - r = r.bind(bd) - end - resolve(r) - end - - # sets a breakpoint on execution - def bpx(addr, oneshot=false, cond=nil, &action) - h = { :type => :bpx } - h[:oneshot] = true if oneshot - h[:condition] = cond if cond - h[:action] = action if action - add_bp(addr, h) - end - - # sets a hardware breakpoint - # mtype in :r :w :x - # mlen is the size of the memory zone to cover - # mlen may be constrained by the architecture - def hwbp(addr, mtype=:x, mlen=1, oneshot=false, cond=nil, &action) - h = { :type => :hwbp } - h[:hash_owner] = @breakpoint_thread - addr = resolve_expr(addr) if not addr.kind_of? ::Integer - mtype = mtype.to_sym - h[:hash_key] = [addr, mtype, mlen] - h[:internal] = { :type => mtype, :len => mlen } - h[:oneshot] = true if oneshot - h[:condition] = cond if cond - h[:action] = action if action - add_bp(addr, h) - end - - # sets a memory breakpoint - # mtype is :r :w :rw or :x - # mlen is the size of the memory zone to cover - def bpm(addr, mtype=:r, mlen=4096, oneshot=false, cond=nil, &action) - h = { :type => :bpm } - addr = resolve_expr(addr) if not addr.kind_of? ::Integer - h[:hash_key] = addr & -4096 # XXX actually referenced at addr, addr+4096, ... addr+len - h[:internal] = { :type => mtype, :len => mlen } - h[:oneshot] = true if oneshot - h[:condition] = cond if cond - h[:action] = action if action - add_bp(addr, h) - end - - - # define the lambda to use to log stuff - def set_log_proc(l=nil, &b) - @log_proc = l || b - end - - # show information to the user, uses log_proc if defined - def log(*a) - if @log_proc - a.each { |aa| @log_proc[aa] } - else - puts(*a) if $VERBOSE - end - end - - - # marks the current cache of memory/regs invalid - def invalidate - @memory.invalidate if @memory - end - - # invalidates the EncodedData backend for the dasm sections - def dasm_invalidate - disassembler.sections.each_value { |s| s.data.invalidate if s.data.respond_to?(:invalidate) } if disassembler - end - - # return all breakpoints set on a specific address (or all bp) - def all_breakpoints(addr=nil) - ret = [] - if addr - if b = @breakpoint[addr] - ret |= b.hash_shared - end - else - @breakpoint.each_value { |bb| ret |= bb.hash_shared } - end - - @breakpoint_thread.each_value { |bb| - next if addr and bb.address != addr - ret |= bb.hash_shared - } - - @breakpoint_memory.each_value { |bb| - next if addr and (bb.address+bb.internal[:len] <= addr or bb.address > addr) - ret |= bb.hash_shared - } - - ret - end - - # return on of the breakpoints at address addr - def find_breakpoint(addr=nil, &b) - return @breakpoint[addr] if @breakpoint[addr] and (not b or b.call(@breakpoint[addr])) - all_breakpoints(addr).find { |bp| b.call bp } - end - - - # to be called right before resuming execution of the target - # run_m is the method that should be called if the execution is stopped - # due to a side-effect of the debugger (bpx with wrong condition etc) - # returns nil if the execution should be avoided (just deleted the dead thread/process) - def check_pre_run(run_m, *run_a) - if @dead_process - del_pid - return - elsif @dead_thread - del_tid - return - elsif @state == :running - return - end - @cpu.dbg_check_pre_run(self) if @cpu.respond_to?(:dbg_check_pre_run) - @breakpoint_cause = nil - @run_method = run_m - @run_args = run_a - @info = nil - true - end - - - # called when the target stops due to a singlestep exception - def evt_singlestep(b=nil) - b ||= find_singlestep - return evt_exception(:type => 'singlestep') if not b - - @state = :stopped - @info = 'singlestep' - @cpu.dbg_evt_singlestep(self) if @cpu.respond_to?(:dbg_evt_singlestep) - - callback_singlestep[] if callback_singlestep - - if cb = @singlestep_cb - @singlestep_cb = nil - cb.call # call last, as the cb may change singlestep_cb/state/etc - end - end - - # returns true if the singlestep is due to us - def find_singlestep - return @cpu.dbg_find_singlestep(self) if @cpu.respond_to?(:dbg_find_singlestep) - @run_method == :singlestep - end - - # called when the target stops due to a soft breakpoint exception - def evt_bpx(b=nil) - b ||= find_bp_bpx - # TODO handle race: - # bpx foo ; thread hits foo ; we bc foo ; os notify us of bp hit but we already cleared everything related to 'bpx foo' -> unhandled bp exception - return evt_exception(:type => 'breakpoint') if not b - - @state = :stopped - @info = 'breakpoint' - @cpu.dbg_evt_bpx(self, b) if @cpu.respond_to?(:dbg_evt_bpx) - - callback_bpx[b] if callback_bpx - - post_evt_bp(b) - end - - # return the breakpoint that is responsible for the evt_bpx - def find_bp_bpx - return @cpu.dbg_find_bpx(self) if @cpu.respond_to?(:dbg_find_bpx) - @breakpoint[pc] - end - - # called when the target stops due to a hwbp exception - def evt_hwbp(b=nil) - b ||= find_bp_hwbp - return evt_exception(:type => 'hwbp') if not b - - @state = :stopped - @info = 'hwbp' - @cpu.dbg_evt_hwbp(self, b) if @cpu.respond_to?(:dbg_evt_hwbp) - - callback_hwbp[b] if callback_hwbp - - post_evt_bp(b) - end - - # return the breakpoint that is responsible for the evt_hwbp - def find_bp_hwbp - return @cpu.dbg_find_hwbp(self) if @cpu.respond_to?(:dbg_find_hwbp) - @breakpoint_thread.find { |b| b.address == pc } - end - - # called for archs where the same interrupt is generated for hwbp and singlestep - # checks if a hwbp matches, then call evt_hwbp, else call evt_singlestep (which - # will forward to evt_exception if singlestep does not match either) - def evt_hwbp_singlestep - if b = find_bp_hwbp - evt_hwbp(b) - else - evt_singlestep - end - end - - # called when the target stops due to a memory exception caused by a memory bp - # called by evt_exception - def evt_bpm(b) - @state = :stopped - @info = 'bpm' - - callback_bpm[b] if callback_bpm - - post_evt_bp(b) - end - - # return a bpm whose page coverage includes the fault described in info - def find_bp_bpm(info) - @breakpoint_memory[info[:fault_addr] & -0x1000] - end - - # returns true if the fault described in info is valid to trigger b - def check_bpm_range(b, info) - return if b.address+b.internal[:len] <= info[:fault_addr] - return if b.address >= info[:fault_addr] + info[:fault_len] - case b.internal[:type] - when :r; info[:fault_access] == :r # or info[:fault_access] == :x - when :w; info[:fault_access] == :w - when :x; info[:fault_access] == :x # XXX non-NX cpu => check pc is in bpm range ? - when :rw; true - end - end - - # handles breakpoint conditions/callbacks etc - def post_evt_bp(b) - @breakpoint_cause = b - - found_valid_active = false - - pre_callback_pc = pc - - # XXX may have many active bps with callback that continue/singlestep/singlestep{}... - b.hash_shared.dup.find_all { |bb| - # ignore inactive bps - next if bb.state != :active - - # ignore out-of-range bpms - next if bb.type == :bpm and not check_bpm_range(bb, b.internal) - - # check condition - case bb.condition - when nil; cd = 1 - when Proc; cd = bb.condition.call - when String, Expression; cd = resolve_expr(bb.condition) - else raise "unknown bp condition #{bb.condition.inspect}" - end - next if not cd or cd == 0 - - found_valid_active = true - - # oneshot - del_bp(bb) if bb.oneshot - - bb.action - }.each { |bb| bb.action.call } - - # discard @breakpoint_cause if a bp callback did modify register_pc - @breakpoint_cause = nil if pc != pre_callback_pc - - # we did break due to a bp whose condition is not true: resume - # (unless a callback already resumed) - resume_badbreak(b) if not found_valid_active and @state == :stopped - end - - # called whenever the target stops due to an exception - # type may be: - # * 'access violation', :fault_addr, :fault_len, :fault_access (:r/:w/:x) - # anything else for other exceptions (access violation is special to handle bpm) - # ... - def evt_exception(info={}) - if info[:type] == 'access violation' and b = find_bp_bpm(info) - info[:fault_len] ||= 1 - b.internal.update info - return evt_bpm(b) - end - - @state = :stopped - @info = "exception #{info[:type]}" - - callback_exception[info] if callback_exception - - pass = pass_all_exceptions - pass = pass[info] if pass.kind_of? Proc - if pass - pass_current_exception - resume_badbreak - end - end - - def evt_newthread(info={}) - @state = :stopped - @info = 'new thread' - - callback_newthread[info] if callback_newthread - - ign = ignore_newthread - ign = ign[info] if ign.kind_of? Proc - if ign - continue - end - end - - def evt_endthread(info={}) - @state = :stopped - @info = 'end thread' - # mark the thread as to be deleted on next check_pre_run - @dead_thread = true - - callback_endthread[info] if callback_endthread - - ign = ignore_endthread - ign = ign[info] if ign.kind_of? Proc - if ign - continue - end - end - - def evt_newprocess(info={}) - @state = :stopped - @info = 'new process' - - callback_newprocess[info] if callback_newprocess - end - - def evt_endprocess(info={}) - @state = :stopped - @info = 'end process' - @dead_process = true - - callback_endprocess[info] if callback_endprocess - end - - def evt_loadlibrary(info={}) - @state = :stopped - @info = 'loadlibrary' - - callback_loadlibrary[info] if callback_loadlibrary - end - - # called when we did break due to a breakpoint whose condition is invalid - # resume execution as if we never stopped - # disable offending bp + singlestep if needed - def resume_badbreak(b=nil) - # ensure we didn't delete b - if b and b.hash_shared.find { |bb| bb.state == :active } - rm = @run_method - if rm == :singlestep - singlestep_bp(b) - else - ra = @run_args - singlestep_bp(b) { send rm, *ra } - end - else - send @run_method, *@run_args - end - end - - # singlesteps over an active breakpoint and run its block - # if the breakpoint provides an emulation stub, run that, otherwise - # disable the breakpoint, singlestep, and re-enable - def singlestep_bp(bp, &b) - if has_emul_instr(bp) - @state = :stopped - bp.emul_instr.call - b.call if b - else - bp.hash_shared.each { |bb| - disable_bp(bb, :temp_inactive) if bb.state == :active - } - # this *should* work with different bps stopping the current instr - prev_sscb = @singlestep_cb - singlestep { - bp.hash_shared.each { |bb| - enable_bp(bb) if bb.state == :temp_inactive - } - prev_sscb[] if prev_sscb - b.call if b - } - end - end - - # checks if @breakpoint_cause is valid, or was obsoleted by the user changing pc - def check_breakpoint_cause - if bp = @breakpoint_cause and - (bp.type == :bpx or (bp.type == :hwbp and bp.internal[:type] == :x)) and - pc != bp.address - bp = @breakpoint_cause = nil - end - bp - end - - # checks if the running target has stopped (nonblocking) - # returns false if no debug event happened - def check_target - do_check_target - end - - # waits until the running target stops (due to a breakpoint, fault, etc) - def wait_target - do_wait_target while @state == :running - end - - # resume execution of the target - # bypasses a software breakpoint on pc if needed - # thread breakpoints must be manually disabled before calling continue - def continue - if b = check_breakpoint_cause and b.hash_shared.find { |bb| bb.state == :active } - singlestep_bp(b) { - next if not check_pre_run(:continue) - do_continue - } - else - return if not check_pre_run(:continue) - do_continue - end - end - alias run continue - - # continue ; wait_target - def continue_wait - continue - wait_target - end - - # resume execution of the target one instruction at a time - def singlestep(&b) - @singlestep_cb = b - bp = check_breakpoint_cause - return if not check_pre_run(:singlestep) - if bp and bp.hash_shared.find { |bb| bb.state == :active } and has_emul_instr(bp) - @state = :stopped - bp.emul_instr.call - invalidate - evt_singlestep(true) - else - do_singlestep - end - end - - # singlestep ; wait_target - def singlestep_wait(&b) - singlestep(&b) - wait_target - end - - # tests if the specified instructions should be stepover() using singlestep or - # by putting a breakpoint at next_addr - def need_stepover(di = di_at(pc)) - di and @cpu.dbg_need_stepover(self, di.address, di) - end - - # stepover: singlesteps, but do not enter in subfunctions - def stepover - di = di_at(pc) - if need_stepover(di) - bpx di.next_addr, true, Expression[:tid, :==, @tid] - continue - else - singlestep - end - end - - # stepover ; wait_target - def stepover_wait - stepover - wait_target - end - - # checks if an instruction should stop the stepout() (eg it is a return instruction) - def end_stepout(di = di_at(pc)) - di and @cpu.dbg_end_stepout(self, di.address, di) - end - - # stepover until finding the last instruction of the function - def stepout - # TODO thread-local bps - while not end_stepout - stepover - wait_target - end - do_singlestep - end - - def stepout_wait - stepout - wait_target - end - - # set a singleshot breakpoint, run the process, and wait - def go(target, cond=nil) - bpx(target, true, cond) - continue_wait - end - - # continue_wait until @state == :dead - def run_forever - continue_wait until @state == :dead - end - - # decode the Instruction at the address, use the @disassembler cache if available - def di_at(addr) - @disassembler.di_at(addr) || @disassembler.disassemble_instruction(addr) - end - - # list the general purpose register names available for the target - def register_list - @cpu.dbg_register_list - end - - # hash { register_name => register_size_in_bits } - def register_size - @cpu.dbg_register_size - end - - # retrieves the name of the register holding the program counter (address of the next instruction) - def register_pc - @cpu.dbg_register_pc - end - - # retrieve the name of the register holding the stack pointer - def register_sp - @cpu.dbg_register_sp - end - - # then name of the register holding the cpu flags - def register_flags - @cpu.dbg_register_flags - end - - # list of flags available in the flag register - def flag_list - @cpu.dbg_flag_list - end - - # retreive the value of the program counter register (eip) - def pc - get_reg_value(register_pc) - end - alias ip pc - - # change the value of pc - def pc=(v) - set_reg_value(register_pc, v) - end - alias ip= pc= - - # retrieve the value of the stack pointer register - def sp - get_reg_value(register_sp) - end - - # update the stack pointer - def sp=(v) - set_reg_value(register_sp, v) - end - - # retrieve the value of a flag (0/1) - def get_flag_value(f) - @cpu.dbg_get_flag(self, f) - end - - # retrieve the value of a flag (true/false) - def get_flag(f) - get_flag_value(f) != 0 - end - - # change the value of a flag - def set_flag_value(f, v) - (v && v != 0) ? set_flag(f) : unset_flag(f) - end - - # switch the value of a flag (true->false, false->true) - def toggle_flag(f) - set_flag_value(f, 1-get_flag_value(f)) - end - - # set the value of the flag to true - def set_flag(f) - @cpu.dbg_set_flag(self, f) - end - - # set the value of the flag to false - def unset_flag(f) - @cpu.dbg_unset_flag(self, f) - end - - # returns the name of the module containing addr or nil - def addr2module(addr) - @modulemap.keys.find { |k| @modulemap[k][0] <= addr and @modulemap[k][1] > addr } - end - - # returns a string describing addr in term of symbol (eg 'libc.so.6!printf+2f') - def addrname(addr) - (addr2module(addr) || '???') + '!' + - if s = @symbols[addr] ? addr : @symbols_len.keys.find { |s_| s_ < addr and s_ + @symbols_len[s_] > addr } - @symbols[s] + (addr == s ? '' : ('+%x' % (addr-s))) - else '%08x' % addr - end - end - - # same as addrname, but scan preceding addresses if no symbol matches - def addrname!(addr) - (addr2module(addr) || '???') + '!' + - if s = @symbols[addr] ? addr : - @symbols_len.keys.find { |s_| s_ < addr and s_ + @symbols_len[s_] > addr } || - @symbols.keys.sort.find_all { |s_| s_ < addr and s_ + 0x10000 > addr }.max - @symbols[s] + (addr == s ? '' : ('+%x' % (addr-s))) - else '%08x' % addr - end - end - - # loads the symbols from a mapped module - def loadsyms(addr, name='%08x'%addr.to_i) - if addr.kind_of? String - modules.each { |m| - if m.path =~ /#{addr}/i - addr = m.addr - name = File.basename m.path - break - end - } - return if not addr.kind_of? Integer - end - return if not peek = @memory.get_page(addr, 4) - if peek == "\x7fELF" - cls = LoadedELF - elsif peek[0, 2] == "MZ" and @memory[addr+@memory[addr+0x3c,4].unpack('V').first, 4] == "PE\0\0" - cls = LoadedPE - else return - end - - begin - e = cls.load @memory[addr, 0x1000_0000] - e.load_address = addr - e.decode_header - e.decode_exports - rescue - # cache the error so that we dont hit it every time - @modulemap[addr.to_s(16)] ||= [addr, addr+0x1000] - return - end - - if n = e.module_name and n != name - name = n - end - - @modulemap[name] ||= [addr, addr+e.module_size] - - cnt = 0 - e.module_symbols.each { |n_, a, l| - cnt += 1 - a += addr - @disassembler.set_label_at(a, n_, false) - @symbols[a] = n_ # XXX store "lib!sym" ? - if l and l > 1; @symbols_len[a] = l - else @symbols_len.delete a # we may overwrite an existing symbol, keep len in sync - end - } - log "loaded #{cnt} symbols from #{name}" - - true - end - - # scan the target memory for loaded libraries, load their symbols - def scansyms(addr=0, max=@memory.length-0x1000-addr) - while addr <= max - loadsyms(addr) - addr += 0x1000 - end - end - - # load symbols from all libraries found by the OS module - def loadallsyms(&b) - modules.each { |m| - b.call(m.addr) if b - loadsyms(m.addr, m.path) - } - end - - # see Disassembler#load_map - def load_map(str, off=0) - str = File.read(str) if File.exist?(str) - sks = @disassembler.sections.keys.sort - str.each_line { |l| - case l.strip - when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style - a = $1.to_i(16) + off - n = $3 - when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style - # see Disassembler for comments - a = sks[$1.to_i(16)] + $2.to_i(16) + off - n = $3 - else next - end - @disassembler.set_label_at(a, n, false) - @symbols[a] = n - } - - end - - # parses the expression contained in arg - def parse_expr(arg) - parse_expr!(arg.dup) - end - - # parses the expression contained in arg, updates arg to point after the expr - def parse_expr!(arg, &b) - return if not e = IndExpression.parse_string!(arg) { |s| - # handle 400000 -> 0x400000 - # XXX no way to override and force decimal interpretation.. - if s.length > 4 and not @disassembler.get_section_at(s.to_i) and @disassembler.get_section_at(s.to_i(16)) - s.to_i(16) - else - s.to_i - end - } - - # resolve ambiguous symbol names/hex values - bd = {} - e.externals.grep(::String).each { |ex| - if not v = register_list.find { |r| ex.downcase == r.to_s.downcase } || - (b && b.call(ex)) || symbols.index(ex) - lst = symbols.values.find_all { |s| s.downcase.include? ex.downcase } - case lst.length - when 0 - if ex =~ /^[0-9a-f]+$/i and @disassembler.get_section_at(ex.to_i(16)) - v = ex.to_i(16) - else - raise "unknown symbol name #{ex}" - end - when 1 - v = symbols.index(lst.first) - log "using #{lst.first} for #{ex}" - else - suggest = lst[0, 50].join(', ') - suggest = suggest[0, 125] + '...' if suggest.length > 128 - raise "ambiguous symbol name #{ex}: #{suggest} ?" - end - end - bd[ex] = v - } - e = e.bind(bd) - - e - end - - # resolves an expression involving register values and/or memory indirection using the current context - # uses #register_list, #get_reg_value, @mem, @cpu - # :tid/:pid resolve to current thread - def resolve_expr(e) - e = parse_expr(e) if e.kind_of? ::String - bd = { :tid => @tid, :pid => @pid } - Expression[e].externals.each { |ex| - next if bd[ex] - case ex - when ::Symbol; bd[ex] = get_reg_value(ex) - when ::String; bd[ex] = @symbols.index(ex) || @disassembler.prog_binding[ex] || 0 - end - } - Expression[e].bind(bd).reduce { |i| - if i.kind_of? Indirection and p = i.pointer.reduce and p.kind_of? ::Integer - i.len ||= @cpu.size/8 - p &= (1 << @cpu.size) - 1 if p < 0 - Expression.decode_imm(@memory, i.len, @cpu, p) - end - } - end - alias resolve resolve_expr - - # return/yield an array of [addr, addr symbolic name] corresponding to the current stack trace - def stacktrace(maxdepth=500, &b) - @cpu.dbg_stacktrace(self, maxdepth, &b) - end - - # accepts a range or begin/end address to read memory, or a register name - def [](arg0, arg1=nil) - if arg1 - arg0 = resolve_expr(arg0) if not arg0.kind_of? ::Integer - arg1 = resolve_expr(arg1) if not arg1.kind_of? ::Integer - @memory[arg0, arg1].to_str - elsif arg0.kind_of? ::Range - arg0.begin = resolve_expr(arg0.begin) if not arg0.begin.kind_of? ::Integer # cannot happen, invalid ruby Range - arg0.end = resolve_expr(arg0.end) if not arg0.end.kind_of? ::Integer - @memory[arg0].to_str - else - get_reg_value(arg0) - end - end - - # accepts a range or begin/end address to write memory, or a register name - def []=(arg0, arg1, val=nil) - arg1, val = val, arg1 if not val - if arg1 - arg0 = resolve_expr(arg0) if not arg0.kind_of? ::Integer - arg1 = resolve_expr(arg1) if not arg1.kind_of? ::Integer - @memory[arg0, arg1] = val - elsif arg0.kind_of? ::Range - arg0.begin = resolve_expr(arg0.begin) if not arg0.begin.kind_of? ::Integer # cannot happen, invalid ruby Range - arg0.end = resolve_expr(arg0.end) if not arg0.end.kind_of? ::Integer - @memory[arg0] = val - else - set_reg_value(arg0, val) - end - end - - - # read an int from the target memory, int of sz bytes (defaults to cpu.size) - def memory_read_int(addr, sz=@cpu.size/8) - addr = resolve_expr(addr) if not addr.kind_of? ::Integer - Expression.decode_imm(@memory, sz, @cpu, addr) - end - - # write an int in the target memory - def memory_write_int(addr, val, sz=@cpu.size/8) - addr = resolve_expr(addr) if not addr.kind_of? ::Integer - val = resolve_expr(val) if not val.kind_of? ::Integer - @memory[addr, sz] = Expression.encode_imm(val, sz, @cpu) - end - - # retrieve an argument (call at a function entrypoint) - def func_arg(nr) - @cpu.dbg_func_arg(self, nr) - end - def func_arg_set(nr, val) - @cpu.dbg_func_arg_set(self, nr, val) - end - - # retrieve a function returned value (call at func exitpoint) - def func_retval - @cpu.dbg_func_retval(self) - end - def func_retval_set(val) - @cpu.dbg_func_retval_set(self, val) - end - def func_retval=(val) - @cpu.dbg_func_retval_set(self, val) - end - - # retrieve a function return address (call at func entry/exit) - def func_retaddr - @cpu.dbg_func_retaddr(self) - end - def func_retaddr_set(addr) - @cpu.dbg_func_retaddr_set(self, addr) - end - def func_retaddr=(addr) - @cpu.dbg_func_retaddr_set(self, addr) - end - - def load_plugin(plugin_filename) - if not File.exist?(plugin_filename) and defined? Metasmdir - # try autocomplete - pf = File.join(Metasmdir, 'samples', 'dbg-plugins', plugin_filename) - if File.exist?(pf) - plugin_filename = pf - elsif File.exist?(pf + '.rb') - plugin_filename = pf + '.rb' - end - end - if (not File.exist?(plugin_filename) or File.directory?(plugin_filename)) and File.exist?(plugin_filename + '.rb') - plugin_filename += '.rb' - end - - instance_eval File.read(plugin_filename) - end - - # return the list of memory mappings of the current process - # array of [start, len, perms, infos] - def mappings - [[0, @memory.length]] - end - - # return a list of Process::Modules (with a #path, #addr) for the current process - def modules - [] - end - - # list debugged pids - def list_debug_pids - @pid_stuff.keys | [@pid].compact - end - - # return a list of OS::Process listing all alive processes (incl not debugged) - # default version only includes current debugged pids - def list_processes - list_debug_pids.map { |p| OS::Process.new(p) } - end - - # check if pid is valid - def check_pid(pid) - list_processes.find { |p| p.pid == pid } - end - - # list debugged tids - def list_debug_tids - @tid_stuff.keys | [@tid].compact - end - - # list of thread ids existing in the current process (incl not debugged) - # default version only lists debugged tids - alias list_threads list_debug_tids - - # check if tid is valid for the current process - def check_tid(tid) - list_threads.include?(tid) - end - - # see EData#pattern_scan - # scans only mapped areas of @memory, using os_process.mappings - def pattern_scan(pat, start=0, len=@memory.length-start, &b) - ret = [] - mappings.each { |maddr, mlen, perm, *o_| - next if perm !~ /r/i - mlen -= start-maddr if maddr < start - maddr = start if maddr < start - mlen = start+len-maddr if maddr+mlen > start+len - next if mlen <= 0 - EncodedData.new(read_mapped_range(maddr, mlen)).pattern_scan(pat) { |off| - off += maddr - ret << off if not b or b.call(off) - } - } - ret - end - - def read_mapped_range(addr, len) - # try to use a single get_page call - s = @memory.get_page(addr, len) || '' - s.length == len ? s : (s = @memory[addr, len] ? s.to_str : nil) - end + class Breakpoint + attr_accessor :address, + # context where the bp was defined + :pid, :tid, + # bool: oneshot ? + :oneshot, + # current bp state: :active, :inactive (internal use), :disabled (user-specified) + :state, + # type: type of breakpoint (:bpx = soft, :hwbp = hard, :bpm = memory) + :type, + # Expression if this is a conditionnal bp + # may be a Proc, String or Expression, evaluated every time the breakpoint hits + # if it returns 0 or false, the breakpoint is ignored + :condition, + # Proc to run if this bp has a callback + :action, + # Proc to run to emulate the overwritten instr behavior + # used to avoid unset/singlestep/re-set, more multithread friendly + # may be a DecodedInstruction for lazy initialization, see Debugger#init_bpx/has_emul_instr(bpx) + :emul_instr, + # internal data, cpu-specific (overwritten byte for a softbp, memory type/size for hwbp..) + :internal, + # reference breakpoints sharing a target implementation (same hw debug register, soft bp addr...) + # shared is an array of Breakpoints, the same Array object in all shared breakpoints + # owner is a hash key => shared (dbg.breakpoint) + # key is an identifier for the Bp class in owner (bp.address) + :hash_shared, :hash_owner, :hash_key, + # user-defined breakpoint-specific stuff + :userdata + + # append the breakpoint to hash_owner + hash_shared + def add(owner=@hash_owner) + @hash_owner = owner + @hash_key ||= @address + return add_bpm if @type == :bpm + if pv = owner[@hash_key] + @hash_shared = pv.hash_shared + @internal ||= pv.internal + @emul_instr ||= pv.emul_instr + else + owner[@hash_key] = self + @hash_shared = [] + end + @hash_shared << self + end + + # register a bpm: add references to all page start covered in @hash_owner + def add_bpm + m = @address + @internal[:len] + a = @address & -0x1000 + @hash_shared = [self] + + @internal ||= {} + @internal[:orig_prot] ||= {} + while a < m + if pv = @hash_owner[a] + if not pv.hash_shared.include?(self) + pv.hash_shared.concat @hash_shared-pv.hash_shared + @hash_shared.each { |bpm| bpm.hash_shared = pv.hash_shared } + end + @internal[:orig_prot][a] = pv.internal[:orig_prot][a] + else + @hash_owner[a] = self + end + a += 0x1000 + end + end + + # delete the breakpoint from hash_shared, and hash_owner if empty + def del + return del_bpm if @type == :bpm + @hash_shared.delete self + if @hash_shared.empty? + @hash_owner.delete @hash_key + elsif @hash_owner[@hash_key] == self + @hash_owner[@hash_key] = @hash_shared.first + end + end + + # unregister a bpm + def del_bpm + m = @address + @internal[:len] + a = @address & -0x1000 + @hash_shared.delete self + while a < m + pv = @hash_owner[a] + if pv == self + if opv = @hash_shared.find { |bpm| + bpm.address < a + 0x1000 and bpm.address + bpm.internal[:len] > a + } + @hash_owner[a] = opv + else + @hash_owner.delete a + + # split hash_shared on disjoint ranges + prev_shared = @hash_shared.find_all { |bpm| + bpm.address < a + 0x1000 and bpm.address + bpm.internal[:len] <= a + } + + prev_shared.each { |bpm| + bpm.hash_shared = prev_shared + @hash_shared.delete bpm + } + end + end + a += 0x1000 + end + end + end + + # per-process data + attr_accessor :memory, :cpu, :disassembler, :breakpoint, :breakpoint_memory, + :modulemap, :symbols, :symbols_len + # per-thread data + attr_accessor :state, :info, :breakpoint_thread, :singlestep_cb, :run_method, + :run_args, :breakpoint_cause + + # which/where per-process/thread stuff is stored + attr_accessor :pid_stuff, :tid_stuff, :pid_stuff_list, :tid_stuff_list + + # global debugger callbacks, called whenever such event occurs + attr_accessor :callback_singlestep, :callback_bpx, :callback_hwbp, :callback_bpm, + :callback_exception, :callback_newthread, :callback_endthread, + :callback_newprocess, :callback_endprocess, :callback_loadlibrary + + # global switches, specify wether to break on exception/thread event + # can be a Proc that is evaluated (arg = info parameter of the evt_func) + # trace_children is a bool to tell if we should debug subprocesses spawned + # by the target + attr_accessor :pass_all_exceptions, :ignore_newthread, :ignore_endthread, + :trace_children + + # link to the user-interface object if available + attr_accessor :gui + + # initializes the disassembler internal data - subclasses should call super() + def initialize + @pid_stuff = {} + @tid_stuff = {} + @log_proc = nil + @state = :dead + @info = '' + # stuff saved when we switch pids + @pid_stuff_list = [:memory, :cpu, :disassembler, :symbols, :symbols_len, + :modulemap, :breakpoint, :breakpoint_memory, :tid, :tid_stuff, + :dead_process] + @tid_stuff_list = [:state, :info, :breakpoint_thread, :singlestep_cb, + :run_method, :run_args, :breakpoint_cause, :dead_thread] + @callback_loadlibrary = lambda { |h| loadsyms(h[:address]) ; continue } + @callback_newprocess = lambda { |h| log "process #{@pid} attached" } + @callback_endprocess = lambda { |h| log "process #{@pid} died" } + initialize_newpid + initialize_newtid + end + + def dasm; disassembler; end + + def shortname; self.class.name.split('::').last.downcase; end + + attr_reader :pid + # change pid and associated cached data + # this will also re-load the previously selected tid for this process + def pid=(npid) + return if npid == pid + raise "invalid pid" if not check_pid(npid) + swapout_pid + @pid = npid + swapin_pid + end + alias set_pid pid= + + attr_reader :tid + def tid=(ntid) + return if ntid == tid + raise "invalid tid" if not check_tid(ntid) + swapout_tid + @tid = ntid + swapin_tid + end + alias set_tid tid= + + # creates stuff related to a new process being debugged + # includes disassembler, modulemap, symbols, breakpoints + # subclasses should check that @pid maps to a real process and raise() otherwise + # to be called with @pid/@tid set, calls initialize_memory+initialize_cpu + def initialize_newpid + return if not pid + @pid_stuff_list.each { |s| instance_variable_set("@#{s}", nil) } + + @symbols = {} + @symbols_len = {} + @modulemap = {} + @breakpoint = {} + @breakpoint_memory = {} + @tid_stuff = {} + initialize_cpu + initialize_memory + initialize_disassembler + end + + # subclasses should check that @tid maps to a real thread and raise() otherwise + def initialize_newtid + return if not tid + @tid_stuff_list.each { |s| instance_variable_set("@#{s}", nil) } + + @state = :stopped + @info = 'new' + @breakpoint_thread = {} + gui.swapin_tid if @disassembler and gui.respond_to?(:swapin_tid) + end + + # initialize the disassembler from @cpu/@memory + def initialize_disassembler + return if not @memory or not @cpu + @disassembler = Shellcode.decode(@memory, @cpu).disassembler + gui.swapin_pid if gui.respond_to?(:swapin_pid) + end + + # we're switching focus from one pid to another, save current pid data + def swapout_pid + return if not pid + swapout_tid + gui.swapout_pid if gui.respond_to?(:swapout_pid) + @pid_stuff[@pid] ||= {} + @pid_stuff_list.each { |fld| + @pid_stuff[@pid][fld] = instance_variable_get("@#{fld}") + } + end + + # we're switching focus from one tid to another, save current tid data + def swapout_tid + return if not tid + gui.swapout_tid if gui.respond_to?(:swapout_tid) + @tid_stuff[@tid] ||= {} + @tid_stuff_list.each { |fld| + @tid_stuff[@tid][fld] = instance_variable_get("@#{fld}") + } + end + + # we're switching focus from one pid to another, load current pid data + def swapin_pid + return initialize_newpid if not @pid_stuff[@pid] + + @pid_stuff_list.each { |fld| + instance_variable_set("@#{fld}", @pid_stuff[@pid][fld]) + } + swapin_tid + gui.swapin_pid if gui.respond_to?(:swapin_pid) + end + + # we're switching focus from one tid to another, load current tid data + def swapin_tid + return initialize_newtid if not @tid_stuff[@tid] + + @tid_stuff_list.each { |fld| + instance_variable_set("@#{fld}", @tid_stuff[@tid][fld]) + } + gui.swapin_tid if gui.respond_to?(:swapin_tid) + end + + # delete references to the current pid + # switch to another pid, set @state = :dead if none available + def del_pid + @pid_stuff.delete @pid + if @pid = @pid_stuff.keys.first + swapin_pid + else + @state = :dead + @info = '' + @tid = nil + end + end + + # delete references to the current thread + def del_tid + @tid_stuff.delete @tid + if @tid = @tid_stuff.keys.first + swapin_tid + else + del_tid_notid + end + end + + # wipe the whole process when no TID is left + # XXX we may have a pending evt_newthread... + def del_tid_notid + del_pid + end + + + # change the debugger to a specific pid/tid + # if given a block, run the block and then restore the original pid/tid + # pid may be an object that respond to #pid/#tid + def switch_context(npid, ntid=nil, &b) + if npid.respond_to?(:pid) + ntid ||= npid.tid + npid = npid.pid + end + oldpid = pid + oldtid = tid + set_pid npid + set_tid ntid if ntid + if b + # shortcut begin..ensure overhead + return b.call if oldpid == pid and oldtid == tid + + begin + b.call + ensure + set_pid oldpid + set_tid oldtid + end + end + end + alias set_context switch_context + + # iterate over all pids, yield in the context of this pid + def each_pid(&b) + # ensure @pid is last, so that we finish in the current context + lst = @pid_stuff.keys - [@pid] + lst << @pid + return lst if not b + lst.each { |p| + set_pid p + b.call + } + end + + # iterate over all tids of the current process, yield in its context + def each_tid(&b) + lst = @tid_stuff.keys - [@tid] + lst << @tid + return lst if not b + lst.each { |t| + set_tid t rescue next + b.call + } + end + + # iterate over all tids of all pids, yield in their context + def each_pid_tid(&b) + each_pid { each_tid { b.call } } + end + + + # create a thread/process breakpoint + # addr can be a numeric address, an Expression that is resolved, or + # a String that is parsed+resolved + # info's keys are set to the breakpoint + # standard keys are :type, :oneshot, :condition, :action + # returns the Breakpoint object + def add_bp(addr, info={}) + info[:pid] ||= @pid + # dont define :tid for bpx, otherwise on del_bp we may switch_context to this thread that may not be stopped -> cant ptrace_write + info[:tid] ||= @tid if info[:pid] == @pid and info[:type] == :hwbp + + b = Breakpoint.new + info.each { |k, v| + b.send("#{k}=", v) + } + + switch_context(b) { + addr = resolve_expr(addr) if not addr.kind_of? ::Integer + b.address = addr + + b.hash_owner ||= case b.type + when :bpm; @breakpoint_memory + when :hwbp; @breakpoint_thread + when :bpx; @breakpoint + end + # XXX bpm may hash_share with an :active, but be larger and still need enable() + b.add + + enable_bp(b) if not info[:state] + } + + b + end + + # remove a breakpoint + def del_bp(b) + disable_bp(b) + b.del + end + + # activate an inactive breakpoint + def enable_bp(b) + return if b.state == :active + if not b.hash_shared.find { |bb| bb.state == :active } + switch_context(b) { + if not b.internal + init_bpx(b) if b.type == :bpx + b.internal ||= {} + b.hash_shared.each { |bb| bb.internal ||= b.internal } + end + do_enable_bp(b) + } + end + b.state = :active + end + + # deactivate an active breakpoint + def disable_bp(b, newstate = :inactive) + return if b.state != :active + b.state = newstate + return if b.hash_shared.find { |bb| bb.state == :active } + switch_context(b) { + do_disable_bp(b) + } + end + + + # delete all breakpoints defined in the current thread + def del_all_breakpoints_thread + @breakpoint_thread.values.map { |b| b.hash_shared }.flatten.uniq.each { |b| del_bp(b) } + end + + # delete all breakpoints for the current process and all its threads + def del_all_breakpoints + each_tid { del_all_breakpoints_thread } + @breakpoint.values.map { |b| b.hash_shared }.flatten.uniq.each { |b| del_bp(b) } + @breakpoint_memory.values.uniq.map { |b| b.hash_shared }.flatten.uniq.each { |b| del_bp(b) } + end + + # calls do_enable_bpm for bpms, or @cpu.dbg_enable_bp + def do_enable_bp(b) + if b.type == :bpm; do_enable_bpm(b) + else @cpu.dbg_enable_bp(self, b) + end + end + + # calls do_disable_bpm for bpms, or @cpu.dbg_disable_bp + def do_disable_bp(b) + if b.type == :bpm; do_disable_bpm(b) + else @cpu.dbg_disable_bp(self, b) + end + end + + # called in the context of the target when a bpx is to be initialized + # may (lazily) initialize b.emul_instr for virtual singlestep + def init_bpx(b) + # dont bother setting stuff up if it is never to be used + return if b.oneshot and not b.condition + + # lazy setup of b.emul_instr: delay building emulating lambda to if/when actually needed + # we still need to disassemble now and update @disassembler, before we patch the memory for the bpx + di = init_bpx_disassemble(b.address) + b.hash_shared.each { |bb| bb.emul_instr = di } + end + + # retrieve the di at a given address, disassemble if needed + # TODO make it so this doesn't interfere with other 'real' disassembler later commands, eg disassemble() or disassemble_fast_deep() + # (right now, when they see the block already present they stop all processing) + def init_bpx_disassemble(addr) + @disassembler.disassemble_fast_block(addr) + @disassembler.di_at(addr) + end + + # checks if bp has an emul_instr + # do the lazy initialization if needed + def has_emul_instr(bp) + if bp.emul_instr.kind_of?(DecodedInstruction) + if di = bp.emul_instr and fdbd = @disassembler.get_fwdemu_binding(di, register_pc) and + fdbd.all? { |k, v| (k.kind_of?(Symbol) or k.kind_of?(Indirection)) and + k != :incomplete_binding and v != Expression::Unknown } + # setup a lambda that will mimic, using the debugger primitives, the actual execution of the instruction + bp.emul_instr = lambda { + fdbd.map { |k, v| + k = Indirection[emulinstr_resv(k.pointer), k.len] if k.kind_of?(Indirection) + [k, emulinstr_resv(v)] + }.each { |k, v| + if k.to_s =~ /flags?_(.+)/i + f = $1.downcase.to_sym + set_flag_value(f, v) + elsif k.kind_of?(Symbol) + set_reg_value(k, v) + elsif k.kind_of?(Indirection) + memory_write_int(k.pointer, v, k.len) + end + } + } + bp.hash_shared.each { |bb| bb.emul_instr = bp.emul_instr } + else + bp.hash_shared.each { |bb| bb.emul_instr = nil } + end + end + + bp.emul_instr + end + + def emulinstr_resv(e) + r = e + flags = Expression[r].externals.uniq.find_all { |f| f.to_s =~ /flags?_(.+)/i } + if flags.first + bd = {} + flags.each { |f| + f.to_s =~ /flags?_(.+)/i + bd[f] = get_flag_value($1.downcase.to_sym) + } + r = r.bind(bd) + end + resolve(r) + end + + # sets a breakpoint on execution + def bpx(addr, oneshot=false, cond=nil, &action) + h = { :type => :bpx } + h[:oneshot] = true if oneshot + h[:condition] = cond if cond + h[:action] = action if action + add_bp(addr, h) + end + + # sets a hardware breakpoint + # mtype in :r :w :x + # mlen is the size of the memory zone to cover + # mlen may be constrained by the architecture + def hwbp(addr, mtype=:x, mlen=1, oneshot=false, cond=nil, &action) + h = { :type => :hwbp } + h[:hash_owner] = @breakpoint_thread + addr = resolve_expr(addr) if not addr.kind_of? ::Integer + mtype = mtype.to_sym + h[:hash_key] = [addr, mtype, mlen] + h[:internal] = { :type => mtype, :len => mlen } + h[:oneshot] = true if oneshot + h[:condition] = cond if cond + h[:action] = action if action + add_bp(addr, h) + end + + # sets a memory breakpoint + # mtype is :r :w :rw or :x + # mlen is the size of the memory zone to cover + def bpm(addr, mtype=:r, mlen=4096, oneshot=false, cond=nil, &action) + h = { :type => :bpm } + addr = resolve_expr(addr) if not addr.kind_of? ::Integer + h[:hash_key] = addr & -4096 # XXX actually referenced at addr, addr+4096, ... addr+len + h[:internal] = { :type => mtype, :len => mlen } + h[:oneshot] = true if oneshot + h[:condition] = cond if cond + h[:action] = action if action + add_bp(addr, h) + end + + + # define the lambda to use to log stuff + def set_log_proc(l=nil, &b) + @log_proc = l || b + end + + # show information to the user, uses log_proc if defined + def log(*a) + if @log_proc + a.each { |aa| @log_proc[aa] } + else + puts(*a) if $VERBOSE + end + end + + + # marks the current cache of memory/regs invalid + def invalidate + @memory.invalidate if @memory + end + + # invalidates the EncodedData backend for the dasm sections + def dasm_invalidate + disassembler.sections.each_value { |s| s.data.invalidate if s.data.respond_to?(:invalidate) } if disassembler + end + + # return all breakpoints set on a specific address (or all bp) + def all_breakpoints(addr=nil) + ret = [] + if addr + if b = @breakpoint[addr] + ret |= b.hash_shared + end + else + @breakpoint.each_value { |bb| ret |= bb.hash_shared } + end + + @breakpoint_thread.each_value { |bb| + next if addr and bb.address != addr + ret |= bb.hash_shared + } + + @breakpoint_memory.each_value { |bb| + next if addr and (bb.address+bb.internal[:len] <= addr or bb.address > addr) + ret |= bb.hash_shared + } + + ret + end + + # return on of the breakpoints at address addr + def find_breakpoint(addr=nil, &b) + return @breakpoint[addr] if @breakpoint[addr] and (not b or b.call(@breakpoint[addr])) + all_breakpoints(addr).find { |bp| b.call bp } + end + + + # to be called right before resuming execution of the target + # run_m is the method that should be called if the execution is stopped + # due to a side-effect of the debugger (bpx with wrong condition etc) + # returns nil if the execution should be avoided (just deleted the dead thread/process) + def check_pre_run(run_m, *run_a) + if @dead_process + del_pid + return + elsif @dead_thread + del_tid + return + elsif @state == :running + return + end + @cpu.dbg_check_pre_run(self) if @cpu.respond_to?(:dbg_check_pre_run) + @breakpoint_cause = nil + @run_method = run_m + @run_args = run_a + @info = nil + true + end + + + # called when the target stops due to a singlestep exception + def evt_singlestep(b=nil) + b ||= find_singlestep + return evt_exception(:type => 'singlestep') if not b + + @state = :stopped + @info = 'singlestep' + @cpu.dbg_evt_singlestep(self) if @cpu.respond_to?(:dbg_evt_singlestep) + + callback_singlestep[] if callback_singlestep + + if cb = @singlestep_cb + @singlestep_cb = nil + cb.call # call last, as the cb may change singlestep_cb/state/etc + end + end + + # returns true if the singlestep is due to us + def find_singlestep + return @cpu.dbg_find_singlestep(self) if @cpu.respond_to?(:dbg_find_singlestep) + @run_method == :singlestep + end + + # called when the target stops due to a soft breakpoint exception + def evt_bpx(b=nil) + b ||= find_bp_bpx + # TODO handle race: + # bpx foo ; thread hits foo ; we bc foo ; os notify us of bp hit but we already cleared everything related to 'bpx foo' -> unhandled bp exception + return evt_exception(:type => 'breakpoint') if not b + + @state = :stopped + @info = 'breakpoint' + @cpu.dbg_evt_bpx(self, b) if @cpu.respond_to?(:dbg_evt_bpx) + + callback_bpx[b] if callback_bpx + + post_evt_bp(b) + end + + # return the breakpoint that is responsible for the evt_bpx + def find_bp_bpx + return @cpu.dbg_find_bpx(self) if @cpu.respond_to?(:dbg_find_bpx) + @breakpoint[pc] + end + + # called when the target stops due to a hwbp exception + def evt_hwbp(b=nil) + b ||= find_bp_hwbp + return evt_exception(:type => 'hwbp') if not b + + @state = :stopped + @info = 'hwbp' + @cpu.dbg_evt_hwbp(self, b) if @cpu.respond_to?(:dbg_evt_hwbp) + + callback_hwbp[b] if callback_hwbp + + post_evt_bp(b) + end + + # return the breakpoint that is responsible for the evt_hwbp + def find_bp_hwbp + return @cpu.dbg_find_hwbp(self) if @cpu.respond_to?(:dbg_find_hwbp) + @breakpoint_thread.find { |b| b.address == pc } + end + + # called for archs where the same interrupt is generated for hwbp and singlestep + # checks if a hwbp matches, then call evt_hwbp, else call evt_singlestep (which + # will forward to evt_exception if singlestep does not match either) + def evt_hwbp_singlestep + if b = find_bp_hwbp + evt_hwbp(b) + else + evt_singlestep + end + end + + # called when the target stops due to a memory exception caused by a memory bp + # called by evt_exception + def evt_bpm(b) + @state = :stopped + @info = 'bpm' + + callback_bpm[b] if callback_bpm + + post_evt_bp(b) + end + + # return a bpm whose page coverage includes the fault described in info + def find_bp_bpm(info) + @breakpoint_memory[info[:fault_addr] & -0x1000] + end + + # returns true if the fault described in info is valid to trigger b + def check_bpm_range(b, info) + return if b.address+b.internal[:len] <= info[:fault_addr] + return if b.address >= info[:fault_addr] + info[:fault_len] + case b.internal[:type] + when :r; info[:fault_access] == :r # or info[:fault_access] == :x + when :w; info[:fault_access] == :w + when :x; info[:fault_access] == :x # XXX non-NX cpu => check pc is in bpm range ? + when :rw; true + end + end + + # handles breakpoint conditions/callbacks etc + def post_evt_bp(b) + @breakpoint_cause = b + + found_valid_active = false + + pre_callback_pc = pc + + # XXX may have many active bps with callback that continue/singlestep/singlestep{}... + b.hash_shared.dup.find_all { |bb| + # ignore inactive bps + next if bb.state != :active + + # ignore out-of-range bpms + next if bb.type == :bpm and not check_bpm_range(bb, b.internal) + + # check condition + case bb.condition + when nil; cd = 1 + when Proc; cd = bb.condition.call + when String, Expression; cd = resolve_expr(bb.condition) + else raise "unknown bp condition #{bb.condition.inspect}" + end + next if not cd or cd == 0 + + found_valid_active = true + + # oneshot + del_bp(bb) if bb.oneshot + + bb.action + }.each { |bb| bb.action.call } + + # discard @breakpoint_cause if a bp callback did modify register_pc + @breakpoint_cause = nil if pc != pre_callback_pc + + # we did break due to a bp whose condition is not true: resume + # (unless a callback already resumed) + resume_badbreak(b) if not found_valid_active and @state == :stopped + end + + # called whenever the target stops due to an exception + # type may be: + # * 'access violation', :fault_addr, :fault_len, :fault_access (:r/:w/:x) + # anything else for other exceptions (access violation is special to handle bpm) + # ... + def evt_exception(info={}) + if info[:type] == 'access violation' and b = find_bp_bpm(info) + info[:fault_len] ||= 1 + b.internal.update info + return evt_bpm(b) + end + + @state = :stopped + @info = "exception #{info[:type]}" + + callback_exception[info] if callback_exception + + pass = pass_all_exceptions + pass = pass[info] if pass.kind_of? Proc + if pass + pass_current_exception + resume_badbreak + end + end + + def evt_newthread(info={}) + @state = :stopped + @info = 'new thread' + + callback_newthread[info] if callback_newthread + + ign = ignore_newthread + ign = ign[info] if ign.kind_of? Proc + if ign + continue + end + end + + def evt_endthread(info={}) + @state = :stopped + @info = 'end thread' + # mark the thread as to be deleted on next check_pre_run + @dead_thread = true + + callback_endthread[info] if callback_endthread + + ign = ignore_endthread + ign = ign[info] if ign.kind_of? Proc + if ign + continue + end + end + + def evt_newprocess(info={}) + @state = :stopped + @info = 'new process' + + callback_newprocess[info] if callback_newprocess + end + + def evt_endprocess(info={}) + @state = :stopped + @info = 'end process' + @dead_process = true + + callback_endprocess[info] if callback_endprocess + end + + def evt_loadlibrary(info={}) + @state = :stopped + @info = 'loadlibrary' + + callback_loadlibrary[info] if callback_loadlibrary + end + + # called when we did break due to a breakpoint whose condition is invalid + # resume execution as if we never stopped + # disable offending bp + singlestep if needed + def resume_badbreak(b=nil) + # ensure we didn't delete b + if b and b.hash_shared.find { |bb| bb.state == :active } + rm = @run_method + if rm == :singlestep + singlestep_bp(b) + else + ra = @run_args + singlestep_bp(b) { send rm, *ra } + end + else + send @run_method, *@run_args + end + end + + # singlesteps over an active breakpoint and run its block + # if the breakpoint provides an emulation stub, run that, otherwise + # disable the breakpoint, singlestep, and re-enable + def singlestep_bp(bp, &b) + if has_emul_instr(bp) + @state = :stopped + bp.emul_instr.call + b.call if b + else + bp.hash_shared.each { |bb| + disable_bp(bb, :temp_inactive) if bb.state == :active + } + # this *should* work with different bps stopping the current instr + prev_sscb = @singlestep_cb + singlestep { + bp.hash_shared.each { |bb| + enable_bp(bb) if bb.state == :temp_inactive + } + prev_sscb[] if prev_sscb + b.call if b + } + end + end + + # checks if @breakpoint_cause is valid, or was obsoleted by the user changing pc + def check_breakpoint_cause + if bp = @breakpoint_cause and + (bp.type == :bpx or (bp.type == :hwbp and bp.internal[:type] == :x)) and + pc != bp.address + bp = @breakpoint_cause = nil + end + bp + end + + # checks if the running target has stopped (nonblocking) + # returns false if no debug event happened + def check_target + do_check_target + end + + # waits until the running target stops (due to a breakpoint, fault, etc) + def wait_target + do_wait_target while @state == :running + end + + # resume execution of the target + # bypasses a software breakpoint on pc if needed + # thread breakpoints must be manually disabled before calling continue + def continue + if b = check_breakpoint_cause and b.hash_shared.find { |bb| bb.state == :active } + singlestep_bp(b) { + next if not check_pre_run(:continue) + do_continue + } + else + return if not check_pre_run(:continue) + do_continue + end + end + alias run continue + + # continue ; wait_target + def continue_wait + continue + wait_target + end + + # resume execution of the target one instruction at a time + def singlestep(&b) + @singlestep_cb = b + bp = check_breakpoint_cause + return if not check_pre_run(:singlestep) + if bp and bp.hash_shared.find { |bb| bb.state == :active } and has_emul_instr(bp) + @state = :stopped + bp.emul_instr.call + invalidate + evt_singlestep(true) + else + do_singlestep + end + end + + # singlestep ; wait_target + def singlestep_wait(&b) + singlestep(&b) + wait_target + end + + # tests if the specified instructions should be stepover() using singlestep or + # by putting a breakpoint at next_addr + def need_stepover(di = di_at(pc)) + di and @cpu.dbg_need_stepover(self, di.address, di) + end + + # stepover: singlesteps, but do not enter in subfunctions + def stepover + di = di_at(pc) + if need_stepover(di) + bpx di.next_addr, true, Expression[:tid, :==, @tid] + continue + else + singlestep + end + end + + # stepover ; wait_target + def stepover_wait + stepover + wait_target + end + + # checks if an instruction should stop the stepout() (eg it is a return instruction) + def end_stepout(di = di_at(pc)) + di and @cpu.dbg_end_stepout(self, di.address, di) + end + + # stepover until finding the last instruction of the function + def stepout + # TODO thread-local bps + while not end_stepout + stepover + wait_target + end + do_singlestep + end + + def stepout_wait + stepout + wait_target + end + + # set a singleshot breakpoint, run the process, and wait + def go(target, cond=nil) + bpx(target, true, cond) + continue_wait + end + + # continue_wait until @state == :dead + def run_forever + continue_wait until @state == :dead + end + + # decode the Instruction at the address, use the @disassembler cache if available + def di_at(addr) + @disassembler.di_at(addr) || @disassembler.disassemble_instruction(addr) + end + + # list the general purpose register names available for the target + def register_list + @cpu.dbg_register_list + end + + # hash { register_name => register_size_in_bits } + def register_size + @cpu.dbg_register_size + end + + # retrieves the name of the register holding the program counter (address of the next instruction) + def register_pc + @cpu.dbg_register_pc + end + + # retrieve the name of the register holding the stack pointer + def register_sp + @cpu.dbg_register_sp + end + + # then name of the register holding the cpu flags + def register_flags + @cpu.dbg_register_flags + end + + # list of flags available in the flag register + def flag_list + @cpu.dbg_flag_list + end + + # retreive the value of the program counter register (eip) + def pc + get_reg_value(register_pc) + end + alias ip pc + + # change the value of pc + def pc=(v) + set_reg_value(register_pc, v) + end + alias ip= pc= + + # retrieve the value of the stack pointer register + def sp + get_reg_value(register_sp) + end + + # update the stack pointer + def sp=(v) + set_reg_value(register_sp, v) + end + + # retrieve the value of a flag (0/1) + def get_flag_value(f) + @cpu.dbg_get_flag(self, f) + end + + # retrieve the value of a flag (true/false) + def get_flag(f) + get_flag_value(f) != 0 + end + + # change the value of a flag + def set_flag_value(f, v) + (v && v != 0) ? set_flag(f) : unset_flag(f) + end + + # switch the value of a flag (true->false, false->true) + def toggle_flag(f) + set_flag_value(f, 1-get_flag_value(f)) + end + + # set the value of the flag to true + def set_flag(f) + @cpu.dbg_set_flag(self, f) + end + + # set the value of the flag to false + def unset_flag(f) + @cpu.dbg_unset_flag(self, f) + end + + # returns the name of the module containing addr or nil + def addr2module(addr) + @modulemap.keys.find { |k| @modulemap[k][0] <= addr and @modulemap[k][1] > addr } + end + + # returns a string describing addr in term of symbol (eg 'libc.so.6!printf+2f') + def addrname(addr) + (addr2module(addr) || '???') + '!' + + if s = @symbols[addr] ? addr : @symbols_len.keys.find { |s_| s_ < addr and s_ + @symbols_len[s_] > addr } + @symbols[s] + (addr == s ? '' : ('+%x' % (addr-s))) + else '%08x' % addr + end + end + + # same as addrname, but scan preceding addresses if no symbol matches + def addrname!(addr) + (addr2module(addr) || '???') + '!' + + if s = @symbols[addr] ? addr : + @symbols_len.keys.find { |s_| s_ < addr and s_ + @symbols_len[s_] > addr } || + @symbols.keys.sort.find_all { |s_| s_ < addr and s_ + 0x10000 > addr }.max + @symbols[s] + (addr == s ? '' : ('+%x' % (addr-s))) + else '%08x' % addr + end + end + + # loads the symbols from a mapped module + def loadsyms(addr, name='%08x'%addr.to_i) + if addr.kind_of? String + modules.each { |m| + if m.path =~ /#{addr}/i + addr = m.addr + name = File.basename m.path + break + end + } + return if not addr.kind_of? Integer + end + return if not peek = @memory.get_page(addr, 4) + if peek == "\x7fELF" + cls = LoadedELF + elsif peek[0, 2] == "MZ" and @memory[addr+@memory[addr+0x3c,4].unpack('V').first, 4] == "PE\0\0" + cls = LoadedPE + else return + end + + begin + e = cls.load @memory[addr, 0x1000_0000] + e.load_address = addr + e.decode_header + e.decode_exports + rescue + # cache the error so that we dont hit it every time + @modulemap[addr.to_s(16)] ||= [addr, addr+0x1000] + return + end + + if n = e.module_name and n != name + name = n + end + + @modulemap[name] ||= [addr, addr+e.module_size] + + cnt = 0 + e.module_symbols.each { |n_, a, l| + cnt += 1 + a += addr + @disassembler.set_label_at(a, n_, false) + @symbols[a] = n_ # XXX store "lib!sym" ? + if l and l > 1; @symbols_len[a] = l + else @symbols_len.delete a # we may overwrite an existing symbol, keep len in sync + end + } + log "loaded #{cnt} symbols from #{name}" + + true + end + + # scan the target memory for loaded libraries, load their symbols + def scansyms(addr=0, max=@memory.length-0x1000-addr) + while addr <= max + loadsyms(addr) + addr += 0x1000 + end + end + + # load symbols from all libraries found by the OS module + def loadallsyms(&b) + modules.each { |m| + b.call(m.addr) if b + loadsyms(m.addr, m.path) + } + end + + # see Disassembler#load_map + def load_map(str, off=0) + str = File.read(str) if File.exist?(str) + sks = @disassembler.sections.keys.sort + str.each_line { |l| + case l.strip + when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style + a = $1.to_i(16) + off + n = $3 + when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style + # see Disassembler for comments + a = sks[$1.to_i(16)] + $2.to_i(16) + off + n = $3 + else next + end + @disassembler.set_label_at(a, n, false) + @symbols[a] = n + } + + end + + # parses the expression contained in arg + def parse_expr(arg) + parse_expr!(arg.dup) + end + + # parses the expression contained in arg, updates arg to point after the expr + def parse_expr!(arg, &b) + return if not e = IndExpression.parse_string!(arg) { |s| + # handle 400000 -> 0x400000 + # XXX no way to override and force decimal interpretation.. + if s.length > 4 and not @disassembler.get_section_at(s.to_i) and @disassembler.get_section_at(s.to_i(16)) + s.to_i(16) + else + s.to_i + end + } + + # resolve ambiguous symbol names/hex values + bd = {} + e.externals.grep(::String).each { |ex| + if not v = register_list.find { |r| ex.downcase == r.to_s.downcase } || + (b && b.call(ex)) || symbols.index(ex) + lst = symbols.values.find_all { |s| s.downcase.include? ex.downcase } + case lst.length + when 0 + if ex =~ /^[0-9a-f]+$/i and @disassembler.get_section_at(ex.to_i(16)) + v = ex.to_i(16) + else + raise "unknown symbol name #{ex}" + end + when 1 + v = symbols.index(lst.first) + log "using #{lst.first} for #{ex}" + else + suggest = lst[0, 50].join(', ') + suggest = suggest[0, 125] + '...' if suggest.length > 128 + raise "ambiguous symbol name #{ex}: #{suggest} ?" + end + end + bd[ex] = v + } + e = e.bind(bd) + + e + end + + # resolves an expression involving register values and/or memory indirection using the current context + # uses #register_list, #get_reg_value, @mem, @cpu + # :tid/:pid resolve to current thread + def resolve_expr(e) + e = parse_expr(e) if e.kind_of? ::String + bd = { :tid => @tid, :pid => @pid } + Expression[e].externals.each { |ex| + next if bd[ex] + case ex + when ::Symbol; bd[ex] = get_reg_value(ex) + when ::String; bd[ex] = @symbols.index(ex) || @disassembler.prog_binding[ex] || 0 + end + } + Expression[e].bind(bd).reduce { |i| + if i.kind_of? Indirection and p = i.pointer.reduce and p.kind_of? ::Integer + i.len ||= @cpu.size/8 + p &= (1 << @cpu.size) - 1 if p < 0 + Expression.decode_imm(@memory, i.len, @cpu, p) + end + } + end + alias resolve resolve_expr + + # return/yield an array of [addr, addr symbolic name] corresponding to the current stack trace + def stacktrace(maxdepth=500, &b) + @cpu.dbg_stacktrace(self, maxdepth, &b) + end + + # accepts a range or begin/end address to read memory, or a register name + def [](arg0, arg1=nil) + if arg1 + arg0 = resolve_expr(arg0) if not arg0.kind_of? ::Integer + arg1 = resolve_expr(arg1) if not arg1.kind_of? ::Integer + @memory[arg0, arg1].to_str + elsif arg0.kind_of? ::Range + arg0.begin = resolve_expr(arg0.begin) if not arg0.begin.kind_of? ::Integer # cannot happen, invalid ruby Range + arg0.end = resolve_expr(arg0.end) if not arg0.end.kind_of? ::Integer + @memory[arg0].to_str + else + get_reg_value(arg0) + end + end + + # accepts a range or begin/end address to write memory, or a register name + def []=(arg0, arg1, val=nil) + arg1, val = val, arg1 if not val + if arg1 + arg0 = resolve_expr(arg0) if not arg0.kind_of? ::Integer + arg1 = resolve_expr(arg1) if not arg1.kind_of? ::Integer + @memory[arg0, arg1] = val + elsif arg0.kind_of? ::Range + arg0.begin = resolve_expr(arg0.begin) if not arg0.begin.kind_of? ::Integer # cannot happen, invalid ruby Range + arg0.end = resolve_expr(arg0.end) if not arg0.end.kind_of? ::Integer + @memory[arg0] = val + else + set_reg_value(arg0, val) + end + end + + + # read an int from the target memory, int of sz bytes (defaults to cpu.size) + def memory_read_int(addr, sz=@cpu.size/8) + addr = resolve_expr(addr) if not addr.kind_of? ::Integer + Expression.decode_imm(@memory, sz, @cpu, addr) + end + + # write an int in the target memory + def memory_write_int(addr, val, sz=@cpu.size/8) + addr = resolve_expr(addr) if not addr.kind_of? ::Integer + val = resolve_expr(val) if not val.kind_of? ::Integer + @memory[addr, sz] = Expression.encode_imm(val, sz, @cpu) + end + + # retrieve an argument (call at a function entrypoint) + def func_arg(nr) + @cpu.dbg_func_arg(self, nr) + end + def func_arg_set(nr, val) + @cpu.dbg_func_arg_set(self, nr, val) + end + + # retrieve a function returned value (call at func exitpoint) + def func_retval + @cpu.dbg_func_retval(self) + end + def func_retval_set(val) + @cpu.dbg_func_retval_set(self, val) + end + def func_retval=(val) + @cpu.dbg_func_retval_set(self, val) + end + + # retrieve a function return address (call at func entry/exit) + def func_retaddr + @cpu.dbg_func_retaddr(self) + end + def func_retaddr_set(addr) + @cpu.dbg_func_retaddr_set(self, addr) + end + def func_retaddr=(addr) + @cpu.dbg_func_retaddr_set(self, addr) + end + + def load_plugin(plugin_filename) + if not File.exist?(plugin_filename) and defined? Metasmdir + # try autocomplete + pf = File.join(Metasmdir, 'samples', 'dbg-plugins', plugin_filename) + if File.exist?(pf) + plugin_filename = pf + elsif File.exist?(pf + '.rb') + plugin_filename = pf + '.rb' + end + end + if (not File.exist?(plugin_filename) or File.directory?(plugin_filename)) and File.exist?(plugin_filename + '.rb') + plugin_filename += '.rb' + end + + instance_eval File.read(plugin_filename) + end + + # return the list of memory mappings of the current process + # array of [start, len, perms, infos] + def mappings + [[0, @memory.length]] + end + + # return a list of Process::Modules (with a #path, #addr) for the current process + def modules + [] + end + + # list debugged pids + def list_debug_pids + @pid_stuff.keys | [@pid].compact + end + + # return a list of OS::Process listing all alive processes (incl not debugged) + # default version only includes current debugged pids + def list_processes + list_debug_pids.map { |p| OS::Process.new(p) } + end + + # check if pid is valid + def check_pid(pid) + list_processes.find { |p| p.pid == pid } + end + + # list debugged tids + def list_debug_tids + @tid_stuff.keys | [@tid].compact + end + + # list of thread ids existing in the current process (incl not debugged) + # default version only lists debugged tids + alias list_threads list_debug_tids + + # check if tid is valid for the current process + def check_tid(tid) + list_threads.include?(tid) + end + + # see EData#pattern_scan + # scans only mapped areas of @memory, using os_process.mappings + def pattern_scan(pat, start=0, len=@memory.length-start, &b) + ret = [] + mappings.each { |maddr, mlen, perm, *o_| + next if perm !~ /r/i + mlen -= start-maddr if maddr < start + maddr = start if maddr < start + mlen = start+len-maddr if maddr+mlen > start+len + next if mlen <= 0 + EncodedData.new(read_mapped_range(maddr, mlen)).pattern_scan(pat) { |off| + off += maddr + ret << off if not b or b.call(off) + } + } + ret + end + + def read_mapped_range(addr, len) + # try to use a single get_page call + s = @memory.get_page(addr, len) || '' + s.length == len ? s : (s = @memory[addr, len] ? s.to_str : nil) + end end end diff --git a/lib/metasm/metasm/decode.rb b/lib/metasm/metasm/decode.rb index 7c0b1af609..caaca6c81d 100644 --- a/lib/metasm/metasm/decode.rb +++ b/lib/metasm/metasm/decode.rb @@ -12,233 +12,233 @@ module Metasm # symbolic pointer dereference # API similar to Expression class Indirection < ExpressionType - # Expression (the pointer) - attr_accessor :target - alias pointer target - alias pointer= target= - # length in bytes of data referenced - attr_accessor :len - # address of the instruction who generated the indirection - attr_accessor :origin + # Expression (the pointer) + attr_accessor :target + alias pointer target + alias pointer= target= + # length in bytes of data referenced + attr_accessor :len + # address of the instruction who generated the indirection + attr_accessor :origin - def initialize(target, len, origin) - @target, @len, @origin = target, len, origin - end + def initialize(target, len, origin) + @target, @len, @origin = target, len, origin + end - def reduce_rec - ptr = Expression[@target.reduce] - (ptr == Expression::Unknown) ? ptr : Indirection.new(ptr, @len, @origin) - end + def reduce_rec + ptr = Expression[@target.reduce] + (ptr == Expression::Unknown) ? ptr : Indirection.new(ptr, @len, @origin) + end - def bind(h) - h[self] || Indirection.new(@target.bind(h), @len, @origin) - end + def bind(h) + h[self] || Indirection.new(@target.bind(h), @len, @origin) + end - def hash ; @target.hash^@len.to_i end - def eql?(o) o.class == self.class and [o.target, o.len] == [@target, @len] end - alias == eql? + def hash ; @target.hash^@len.to_i end + def eql?(o) o.class == self.class and [o.target, o.len] == [@target, @len] end + alias == eql? - include Renderable - def render - ret = [] - qual = {1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword'}[len] || "_#{len*8}bits" if len - ret << "#{qual} ptr " if qual - ret << '[' << @target << ']' - end + include Renderable + def render + ret = [] + qual = {1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword'}[len] || "_#{len*8}bits" if len + ret << "#{qual} ptr " if qual + ret << '[' << @target << ']' + end - # returns the complexity of the expression (number of externals +1 per indirection) - def complexity - 1+@target.complexity - end + # returns the complexity of the expression (number of externals +1 per indirection) + def complexity + 1+@target.complexity + end - def self.[](t, l, o=nil) - new(Expression[*t], l, o) - end + def self.[](t, l, o=nil) + new(Expression[*t], l, o) + end - def inspect - "Indirection[#{@target.inspect.sub(/^Expression/, '')}, #{@len.inspect}#{', '+@origin.inspect if @origin}]" - end + def inspect + "Indirection[#{@target.inspect.sub(/^Expression/, '')}, #{@len.inspect}#{', '+@origin.inspect if @origin}]" + end - def externals - @target.externals - end + def externals + @target.externals + end - def match_rec(target, vars) - return false if not target.kind_of? Indirection - t = target.target - if vars[t] - return false if @target != vars[t] - elsif vars.has_key? t - vars[t] = @target - elsif t.kind_of? ExpressionType - return false if not @target.match_rec(t, vars) - else - return false if targ != @target - end - if vars[target.len] - return false if @len != vars[target.len] - elsif vars.has_key? target.len - vars[target.len] = @len - else - return false if target.len != @len - end - vars - end + def match_rec(target, vars) + return false if not target.kind_of? Indirection + t = target.target + if vars[t] + return false if @target != vars[t] + elsif vars.has_key? t + vars[t] = @target + elsif t.kind_of? ExpressionType + return false if not @target.match_rec(t, vars) + else + return false if targ != @target + end + if vars[target.len] + return false if @len != vars[target.len] + elsif vars.has_key? target.len + vars[target.len] = @len + else + return false if target.len != @len + end + vars + end end class Expression - # returns the complexity of the expression (number of externals +1 per indirection) - def complexity - case @lexpr - when ExpressionType; @lexpr.complexity - when nil, ::Numeric; 0 - else 1 - end + - case @rexpr - when ExpressionType; @rexpr.complexity - when nil, ::Numeric; 0 - else 1 - end - end + # returns the complexity of the expression (number of externals +1 per indirection) + def complexity + case @lexpr + when ExpressionType; @lexpr.complexity + when nil, ::Numeric; 0 + else 1 + end + + case @rexpr + when ExpressionType; @rexpr.complexity + when nil, ::Numeric; 0 + else 1 + end + end - def expr_indirections - ret = case @lexpr - when Indirection; [@lexpr] - when ExpressionType; @lexpr.expr_indirections - else [] - end - case @rexpr - when Indirection; ret << @rexpr - when ExpressionType; ret.concat @rexpr.expr_indirections - else ret - end - end + def expr_indirections + ret = case @lexpr + when Indirection; [@lexpr] + when ExpressionType; @lexpr.expr_indirections + else [] + end + case @rexpr + when Indirection; ret << @rexpr + when ExpressionType; ret.concat @rexpr.expr_indirections + else ret + end + end end class EncodedData - # returns an ::Integer from self.ptr, advances ptr - # bytes from rawsize to virtsize = 0 - # ignores self.relocations - def get_byte - @ptr += 1 - if @ptr <= @data.length - b = @data[ptr-1] - b = b.unpack('C').first if b.kind_of? ::String # 1.9 - b - elsif @ptr <= @virtsize - 0 - end - end + # returns an ::Integer from self.ptr, advances ptr + # bytes from rawsize to virtsize = 0 + # ignores self.relocations + def get_byte + @ptr += 1 + if @ptr <= @data.length + b = @data[ptr-1] + b = b.unpack('C').first if b.kind_of? ::String # 1.9 + b + elsif @ptr <= @virtsize + 0 + end + end - # reads len bytes from self.data, advances ptr - # bytes from rawsize to virtsize are returned as zeroes - # ignores self.relocations - def read(len=@virtsize-@ptr) - vlen = len - vlen = @virtsize-@ptr if len > @virtsize-@ptr - str = (@ptr < @data.length) ? @data[@ptr, vlen] : '' - str = str.to_str.ljust(vlen, "\0") if str.length < vlen - @ptr += len - str - end + # reads len bytes from self.data, advances ptr + # bytes from rawsize to virtsize are returned as zeroes + # ignores self.relocations + def read(len=@virtsize-@ptr) + vlen = len + vlen = @virtsize-@ptr if len > @virtsize-@ptr + str = (@ptr < @data.length) ? @data[@ptr, vlen] : '' + str = str.to_str.ljust(vlen, "\0") if str.length < vlen + @ptr += len + str + end - # decodes an immediate value from self.ptr, advances ptr - # returns an Expression on relocation, or an ::Integer - # if ptr has a relocation but the type/endianness does not match, the reloc is ignored and a warning is issued - # TODO arg type => sign+len - def decode_imm(type, endianness) - raise "invalid imm type #{type.inspect}" if not isz = Expression::INT_SIZE[type] - if rel = @reloc[@ptr] - if Expression::INT_SIZE[rel.type] == isz and rel.endianness == endianness - @ptr += rel.length - return rel.target - end - puts "W: Immediate type/endianness mismatch, ignoring relocation #{rel.target.inspect} (wanted #{type.inspect})" if $DEBUG - end - Expression.decode_imm(read(isz/8), type, endianness) - end - alias decode_immediate decode_imm + # decodes an immediate value from self.ptr, advances ptr + # returns an Expression on relocation, or an ::Integer + # if ptr has a relocation but the type/endianness does not match, the reloc is ignored and a warning is issued + # TODO arg type => sign+len + def decode_imm(type, endianness) + raise "invalid imm type #{type.inspect}" if not isz = Expression::INT_SIZE[type] + if rel = @reloc[@ptr] + if Expression::INT_SIZE[rel.type] == isz and rel.endianness == endianness + @ptr += rel.length + return rel.target + end + puts "W: Immediate type/endianness mismatch, ignoring relocation #{rel.target.inspect} (wanted #{type.inspect})" if $DEBUG + end + Expression.decode_imm(read(isz/8), type, endianness) + end + alias decode_immediate decode_imm end class Expression - # decodes an immediate from a raw binary string - # type may be a length in bytes, interpreted as unsigned, or an expression type (eg :u32) - # endianness is either an endianness or an object than responds to endianness - def self.decode_imm(str, type, endianness, off=0) - type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer - endianness = endianness.endianness if not endianness.kind_of? ::Symbol - str = str[off, INT_SIZE[type]/8].to_s - str = str.reverse if endianness == :little - val = str.unpack('C*').inject(0) { |val_, b| (val_ << 8) | b } - val = make_signed(val, INT_SIZE[type]) if type.to_s[0] == ?i - val - end - class << self - alias decode_immediate decode_imm - end + # decodes an immediate from a raw binary string + # type may be a length in bytes, interpreted as unsigned, or an expression type (eg :u32) + # endianness is either an endianness or an object than responds to endianness + def self.decode_imm(str, type, endianness, off=0) + type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer + endianness = endianness.endianness if not endianness.kind_of? ::Symbol + str = str[off, INT_SIZE[type]/8].to_s + str = str.reverse if endianness == :little + val = str.unpack('C*').inject(0) { |val_, b| (val_ << 8) | b } + val = make_signed(val, INT_SIZE[type]) if type.to_s[0] == ?i + val + end + class << self + alias decode_immediate decode_imm + end end class CPU - # decodes the instruction at edata.ptr, mapped at virtual address off - # returns a DecodedInstruction or nil - def decode_instruction(edata, addr) - @bin_lookaside ||= build_bin_lookaside - di = decode_findopcode edata if edata.ptr <= edata.length - di.address = addr if di - di = decode_instr_op(edata, di) if di - decode_instr_interpret(di, addr) if di - end + # decodes the instruction at edata.ptr, mapped at virtual address off + # returns a DecodedInstruction or nil + def decode_instruction(edata, addr) + @bin_lookaside ||= build_bin_lookaside + di = decode_findopcode edata if edata.ptr <= edata.length + di.address = addr if di + di = decode_instr_op(edata, di) if di + decode_instr_interpret(di, addr) if di + end - # matches the binary opcode at edata.ptr - # returns di or nil - def decode_findopcode(edata) - DecodedInstruction.new self - end + # matches the binary opcode at edata.ptr + # returns di or nil + def decode_findopcode(edata) + DecodedInstruction.new self + end - # decodes di.instruction - # returns di or nil - def decode_instr_op(edata, di) - end + # decodes di.instruction + # returns di or nil + def decode_instr_op(edata, di) + end - # may modify di.instruction.args for eg jump offset => absolute address - # returns di or nil - def decode_instr_interpret(di, addr) - di - end + # may modify di.instruction.args for eg jump offset => absolute address + # returns di or nil + def decode_instr_interpret(di, addr) + di + end - # number of instructions following a jump that are still executed - def delay_slot(di=nil) - 0 - end + # number of instructions following a jump that are still executed + def delay_slot(di=nil) + 0 + end - def disassembler_default_func - DecodedFunction.new - end + def disassembler_default_func + DecodedFunction.new + end - # return something like backtrace_binding in the forward direction - # set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer - def get_fwdemu_binding(di, pc_reg=nil) - fdi = di.backtrace_binding ||= get_backtrace_binding(di) - fdi = fix_fwdemu_binding(di, fdi) - if pc_reg - if di.opcode.props[:setip] - xr = get_xrefs_x(nil, di) - if xr and xr.length == 1 - fdi[pc_reg] = xr[0] - else - fdi[:incomplete_binding] = Expression[1] - end - else - fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length] - end - end - fdi - end + # return something like backtrace_binding in the forward direction + # set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer + def get_fwdemu_binding(di, pc_reg=nil) + fdi = di.backtrace_binding ||= get_backtrace_binding(di) + fdi = fix_fwdemu_binding(di, fdi) + if pc_reg + if di.opcode.props[:setip] + xr = get_xrefs_x(nil, di) + if xr and xr.length == 1 + fdi[pc_reg] = xr[0] + else + fdi[:incomplete_binding] = Expression[1] + end + else + fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length] + end + end + fdi + end - # patch a forward binding from the backtrace binding - # useful only on specific instructions that update a register *and* dereference that register (eg push) - def fix_fwdemu_binding(di, fbd) - fbd - end + # patch a forward binding from the backtrace binding + # useful only on specific instructions that update a register *and* dereference that register (eg push) + def fix_fwdemu_binding(di, fbd) + fbd + end end end diff --git a/lib/metasm/metasm/decompile.rb b/lib/metasm/metasm/decompile.rb index 6dc7bda9a3..9de18061cb 100644 --- a/lib/metasm/metasm/decompile.rb +++ b/lib/metasm/metasm/decompile.rb @@ -14,2645 +14,2645 @@ class C::Block; attr_accessor :decompdata; end class DecodedFunction; attr_accessor :decompdata; end class CPU - def decompile_check_abi(dcmp, entry, func) - end + def decompile_check_abi(dcmp, entry, func) + end end class Decompiler - # TODO add methods to C::CExpr - AssignOp = [:'=', :'+=', :'-=', :'*=', :'/=', :'%=', :'^=', :'&=', :'|=', :'>>=', :'<<=', :'++', :'--'] - - attr_accessor :dasm, :c_parser - attr_accessor :forbid_optimize_dataflow, :forbid_optimize_code, :forbid_decompile_ifwhile, :forbid_decompile_types, :forbid_optimize_labels - # recursive flag: for each subfunction, recurse is decremented, when 0 only the prototype is decompiled, when <0 nothing is done - attr_accessor :recurse - - def initialize(dasm, cp = dasm.c_parser) - @dasm = dasm - @recurse = 1/0.0 # Infinity - @c_parser = cp || @dasm.cpu.new_cparser - end - - # decompile recursively function from an entrypoint, then perform global optimisation (static vars, ...) - # should be called once after everything is decompiled (global optimizations may bring bad results otherwise) - # use decompile_func for incremental decompilation - # returns the c_parser - def decompile(*entry) - entry.each { |f| decompile_func(f) } - finalize - @c_parser - end - - # decompile a function, decompiling subfunctions as needed - # may return :restart, which means that the decompilation should restart from the entrypoint (and bubble up) (eg a new codepath is found which may changes dependency in blocks etc) - def decompile_func(entry) - return if @recurse < 0 - entry = @dasm.normalize entry - return if not @dasm.decoded[entry] - - # create a new toplevel function to hold our code - func = C::Variable.new - func.name = @dasm.auto_label_at(entry, 'func') - if f = @dasm.function[entry] and f.decompdata and f.decompdata[:return_type] - rettype = f.decompdata[:return_type] - else - rettype = C::BaseType.new(:int) - end - func.type = C::Function.new rettype, [] - if @c_parser.toplevel.symbol[func.name] - return if @recurse == 0 - if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name } - # recursive dependency: declare prototype - puts "function #{func.name} is recursive: predecompiling for prototype" if $VERBOSE - pre_recurse = @recurse - @recurse = 0 - @c_parser.toplevel.symbol.delete func.name - decompile_func(entry) - @recurse = pre_recurse - if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name } - @c_parser.toplevel.statements << C::Declaration.new(func) - end - end - return - end - @c_parser.toplevel.symbol[func.name] = func - puts "decompiling #{func.name}" if $VERBOSE - - while catch(:restart) { do_decompile_func(entry, func) } == :restart - retval = :restart - end - - @c_parser.toplevel.symbol[func.name] = func # recursive func prototype could have overwritten us - @c_parser.toplevel.statements << C::Declaration.new(func) - - puts " decompiled #{func.name}" if $VERBOSE - - retval - end - - # calls decompile_func with recurse -= 1 (internal use) - def decompile_func_rec(entry) - @recurse -= 1 - decompile_func(entry) - ensure - @recurse += 1 - end - - def do_decompile_func(entry, func) - # find decodedinstruction graph of the function, decompile subfuncs - myblocks = listblocks_func(entry) - - # [esp+8] => [:frameptr-12] - makestackvars entry, myblocks.map { |b, to| @dasm.decoded[b].block } - - # find registry dependencies between blocks - deps = @dasm.cpu.decompile_func_finddeps(self, myblocks, func) - - scope = func.initializer = C::Block.new(@c_parser.toplevel) - if df = @dasm.function[entry] - scope.decompdata = df.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}} - else - scope.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}} - end - - # di blocks => raw c statements, declare variables - @dasm.cpu.decompile_blocks(self, myblocks, deps, func) - - simplify_goto(scope) - namestackvars(scope) - unalias_vars(scope, func) - decompile_c_types(scope) - optimize(scope) - remove_unreferenced_vars(scope) - cleanup_var_decl(scope, func) - if @recurse > 0 - decompile_controlseq(scope) - optimize_vars(scope) - optimize_ctrl(scope) - optimize_vars(scope) - remove_unreferenced_vars(scope) - simplify_varname_noalias(scope) - rename_variables(scope) - end - @dasm.cpu.decompile_check_abi(self, entry, func) - - case ret = scope.statements.last - when C::CExpression; puts "no return at end of func" if $VERBOSE - when C::Return - if not ret.value - scope.statements.pop - else - v = ret.value - v = v.rexpr if v.kind_of? C::CExpression and not v.op and v.rexpr.kind_of? C::Typed - func.type.type = v.type - end - end - - if @recurse == 0 - # we need only the prototype - func.initializer = nil - end - end - - # redecompile a function, redecompiles functions calling it if its prototype changed - def redecompile(name) - @c_parser.toplevel.statements.delete_if { |st| st.kind_of? C::Declaration and st.var.name == name } - oldvar = @c_parser.toplevel.symbol.delete name - - decompile_func(name) - - if oldvar and newvar = @c_parser.toplevel.symbol[name] and oldvar.type.kind_of? C::Function and newvar.type.kind_of? C::Function - o, n = oldvar.type, newvar.type - if o.type != n.type or o.args.to_a.length != n.args.to_a.length or o.args.to_a.zip(n.args.to_a).find { |oa, na| oa.type != na.type } - # XXX a may depend on b and c, and b may depend on c -> redecompile c twice - # XXX if the dcmp is unstable, may also infinite loop on mutually recursive funcs.. - @c_parser.toplevel.statements.dup.each { |st| - next if not st.kind_of? C::Declaration - next if not st.var.initializer - next if st.var.name == name - next if not walk_ce(st) { |ce| break true if ce.op == :funcall and ce.lexpr.kind_of? C::Variable and ce.lexpr.name == name } - redecompile(st.var.name) - } - end - end - end - - def new_global_var(addr, type, scope=nil) - addr = @dasm.normalize(addr) - - # (almost) NULL ptr - return if addr.kind_of? Fixnum and addr >= 0 and addr < 32 - - # check preceding structure we're hitting - # TODO check what we step over when defining a new static struct - 0x100.times { |i_| - next if not n = @dasm.get_label_at(addr-i_) - next if not v = @c_parser.toplevel.symbol[n] - next if not v.type.pointer? or not v.type.pointed.untypedef.kind_of? C::Union - break if i_ == 0 # XXX it crashes later if we dont break here - next if sizeof(v.type.pointed) <= i_ - return structoffset(v.type.pointed.untypedef, C::CExpression[v], i_, nil) - } - - ptype = type.pointed.untypedef if type.pointer? - if ptype.kind_of? C::Function - name = @dasm.auto_label_at(addr, 'sub', 'xref', 'byte', 'word', 'dword', 'unk') - if @dasm.get_section_at(addr) and @recurse > 0 - puts "found function pointer to #{name}" if $VERBOSE - @dasm.disassemble(addr) if not @dasm.decoded[addr] # TODO disassemble_fast ? - f = @dasm.function[addr] ||= DecodedFunction.new - # TODO detect thunks (__noreturn) - f.decompdata ||= { :stackoff_type => {}, :stackoff_name => {} } - if not s = @c_parser.toplevel.symbol[name] or not s.initializer or not s.type.untypedef.kind_of? C::Function - os = @c_parser.toplevel.symbol.delete name - @c_parser.toplevel.statements.delete_if { |ts| ts.kind_of? C::Declaration and ts.var.name == name } - aoff = 1 - ptype.args.to_a.each { |a| - aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr] - f.decompdata[:stackoff_type][aoff] ||= a.type - f.decompdata[:stackoff_name][aoff] ||= a.name if a.name - aoff += sizeof(a) # ary ? - } - decompile_func_rec(addr) - s = @c_parser.toplevel.symbol[name] - walk_ce([@c_parser.toplevel, scope]) { |ce| - ce.lexpr = s if ce.lexpr == os - ce.rexpr = s if ce.rexpr == os - } if os and s # update existing references to old instance - # else redecompile with new prototye ? - end - end - end - - name = case (type.pointer? && tsz = sizeof(nil, ptype)) - when 1; 'byte' - when 2; 'word' - when 4; 'dword' - else 'unk' - end - name = 'stru' if ptype.kind_of? C::Union - name = @dasm.auto_label_at(addr, name, 'xref', 'byte', 'word', 'dword', 'unk', 'stru') - - if not var = @c_parser.toplevel.symbol[name] - var = C::Variable.new - var.name = name - var.type = type.pointer? ? C::Array.new(ptype) : type - @c_parser.toplevel.symbol[var.name] = var - @c_parser.toplevel.statements << C::Declaration.new(var) - end - if ptype.kind_of? C::Union and type.pointer? and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length - # TODO struct init, array, fptrs.. - elsif type.pointer? and not type.pointed.untypedef.kind_of? C::Function and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length and - [1, 2, 4].include? tsz and (not var.type.pointer? or sizeof(var.type.pointed) != sizeof(type.pointed) or not var.initializer) - # TODO do not overlap other statics (but labels may refer to elements of the array...) - data = (0..256).map { - v = s[0].decode_imm("u#{tsz*8}".to_sym, @dasm.cpu.endianness) - v = decompile_cexpr(v, @c_parser.toplevel) if v.kind_of? Expression # relocation - v - } - var.initializer = data.map { |v| C::CExpression[v, C::BaseType.new(:int)] } unless (data - [0]).empty? - if (tsz == 1 or tsz == 2) and eos = data.index(0) and (0..3).all? { |i| data[i] >= 0x20 and data[i] < 0x7f } # printable str - # XXX 0x80 with ruby1.9... - var.initializer = C::CExpression[data[0, eos].pack('C*'), C::Pointer.new(ptype)] rescue nil - end - if var.initializer.kind_of? ::Array and i = var.initializer.first and i.kind_of? C::CExpression and not i.op and i.rexpr.kind_of? C::Variable and - i.rexpr.type.kind_of? C::Function and not @dasm.get_section_at(@dasm.normalize(i.rexpr.name)) # iat_ExternalFunc - i.type = i.rexpr.type - type = var.type = C::Array.new(C::Pointer.new(i.type)) - var.initializer = [i] - end - var.initializer = nil if var.initializer.kind_of? ::Array and not type.untypedef.kind_of? C::Array - end - - # TODO patch existing references to addr ? (or would they have already triggered new_global_var?) - - # return the object to use to replace the raw addr - var - end - - # return an array of [address of block start, list of block to]] - # decompile subfunctions - def listblocks_func(entry) - @autofuncs ||= [] - blocks = [] - entry = dasm.normalize entry - todo = [entry] - while a = todo.pop - next if blocks.find { |aa, at| aa == a } - next if not di = @dasm.di_at(a) - blocks << [a, []] - di.block.each_to { |ta, type| - next if type == :indirect - ta = dasm.normalize ta - if type != :subfuncret and not @dasm.function[ta] and - (not @dasm.function[entry] or @autofuncs.include? entry) and - di.block.list.last.opcode.props[:saveip] - # possible noreturn function - # XXX call $+5; pop eax - @autofuncs << ta - @dasm.function[ta] = DecodedFunction.new - puts "autofunc #{Expression[ta]}" if $VERBOSE - end - - if @dasm.function[ta] and type != :subfuncret - f = dasm.auto_label_at(ta, 'func') - ta = dasm.normalize($1) if f =~ /^thunk_(.*)/ - ret = decompile_func_rec(ta) if (ta != entry or di.block.to_subfuncret) - throw :restart, :restart if ret == :restart - else - @dasm.auto_label_at(ta, 'label') if blocks.find { |aa, at| aa == ta } - blocks.last[1] |= [ta] - todo << ta - end - } - end - blocks - end - - # backtraces an expression from addr - # returns an integer, a label name, or an Expression - # XXX '(GetProcAddr("foo"))()' should not decompile to 'foo()' - def backtrace_target(expr, addr) - if n = @dasm.backtrace(expr, addr).first - return expr if n == Expression::Unknown - n = Expression[n].reduce_rec - n = @dasm.get_label_at(n) || n - n = $1 if n.kind_of? ::String and n =~ /^thunk_(.*)/ - n - else - expr - end - end - - # patches instruction's backtrace_binding to replace things referring to a static stack offset from func start by :frameptr+off - def makestackvars(funcstart, blocks) - blockstart = nil - cache_di = nil - cache = {} # [i_s, e, type] => backtrace - tovar = lambda { |di, e, i_s| - case e - when Expression; Expression[tovar[di, e.lexpr, i_s], e.op, tovar[di, e.rexpr, i_s]].reduce - when Indirection; Indirection[tovar[di, e.target, i_s], e.len, e.origin] - when :frameptr; e - when ::Symbol - cache.clear if cache_di != di ; cache_di = di - vals = cache[[e, i_s, 0]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => blockstart, - :include_start => i_s, :no_check => true, :terminals => [:frameptr]) - # backtrace only to blockstart first - if vals.length == 1 and ee = vals.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or - (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer) or - (not ee.lexpr and ee.op == :+ and ee.rexpr.kind_of? Indirection and eep = ee.rexpr.pointer and - (eep == Expression[:frameptr] or (eep.lexpr == :frameptr and eep.op == :+ and eep.rexpr.kind_of? ::Integer)))) - ee - else - # fallback on full run (could restart from blockstart with ee, but may reevaluate addr_binding.. - vals = cache[[e, i_s, 1]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => funcstart, - :include_start => i_s, :no_check => true, :terminals => [:frameptr]) - if vals.length == 1 and ee = vals.first and (ee.kind_of? Expression and (ee == Expression[:frameptr] or - (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))) - ee - else e - end - end - else e - end - } - - # must not change bt_bindings until everything is backtracked - repl_bind = {} # di => bt_bd - - @dasm.cpu.decompile_makestackvars(@dasm, funcstart, blocks) { |block| - block.list.each { |di| - bd = di.backtrace_binding ||= @dasm.cpu.get_backtrace_binding(di) - newbd = repl_bind[di] = {} - bd.each { |k, v| - k = tovar[di, k, true] if k.kind_of? Indirection - next if k == Expression[:frameptr] or (k.kind_of? Expression and k.lexpr == :frameptr and k.op == :+ and k.rexpr.kind_of? ::Integer) - newbd[k] = tovar[di, v, false] - } - } - } - - repl_bind.each { |di, bd| di.backtrace_binding = bd } - end - - # give a name to a stackoffset (relative to start of func) - # 4 => :arg_0, -8 => :var_4 etc - def stackoff_to_varname(off) - if off >= @c_parser.typesize[:ptr]; 'arg_%X' % ( off-@c_parser.typesize[:ptr]) # 4 => arg_0, 8 => arg_4.. - elsif off > 0; 'arg_0%X' % off - elsif off == 0; 'retaddr' - elsif off <= -@dasm.cpu.size/8; 'var_%X' % (-off-@dasm.cpu.size/8) # -4 => var_0, -8 => var_4.. - else 'var_0%X' % -off - end - end - - # turns an Expression to a CExpression, create+declares needed variables in scope - def decompile_cexpr(e, scope, itype=nil) - case e - when Expression - if e.op == :'=' and e.lexpr.kind_of? ::String and e.lexpr =~ /^dummy_metasm_/ - decompile_cexpr(e.rexpr, scope, itype) - elsif e.op == :+ and e.rexpr.kind_of? ::Integer and e.rexpr < 0 - decompile_cexpr(Expression[e.lexpr, :-, -e.rexpr], scope, itype) - elsif e.lexpr - a = decompile_cexpr(e.lexpr, scope, itype) - C::CExpression[a, e.op, decompile_cexpr(e.rexpr, scope, itype)] - elsif e.op == :+ - decompile_cexpr(e.rexpr, scope, itype) - else - a = decompile_cexpr(e.rexpr, scope, itype) - C::CExpression[e.op, a] - end - when Indirection - case e.len - when 1, 2, 4, 8 - bt = C::BaseType.new("__int#{e.len*8}".to_sym) - else - bt = C::Struct.new - bt.members = [C::Variable.new('data', C::Array.new(C::BaseType.new(:__int8), e.len))] - end - itype = C::Pointer.new(bt) - p = decompile_cexpr(e.target, scope, itype) - p = C::CExpression[[p], itype] if not p.type.kind_of? C::Pointer - C::CExpression[:*, p] - when ::Integer - C::CExpression[e] - when C::CExpression - e - else - name = e.to_s - if not s = scope.symbol_ancestors[name] - s = C::Variable.new - s.type = C::BaseType.new(:__int32) - case e - when ::String # edata relocation (rel.length = size of pointer) - return @c_parser.toplevel.symbol[e] || new_global_var(e, itype || C::BaseType.new(:int), scope) - when ::Symbol; s.storage = :register ; s.add_attribute("register(#{name})") - else s.type.qualifier = [:volatile] - puts "decompile_cexpr unhandled #{e.inspect}, using #{e.to_s.inspect}" if $VERBOSE - end - s.name = name - scope.symbol[s.name] = s - scope.statements << C::Declaration.new(s) - end - s - end - end - - # simplify goto -> goto / goto -> return - def simplify_goto(scope, keepret = false) - if not keepret and scope.statements[-1].kind_of? C::Return and not scope.statements[-2].kind_of? C::Label - scope.statements.insert(-2, C::Label.new("ret_label")) - end - - jumpto = {} - walk(scope) { |s| - next if not s.kind_of? C::Block - s.statements.each_with_index { |ss, i| - case ss - when C::Goto, C::Return - while l = s.statements[i -= 1] and l.kind_of? C::Label - jumpto[l.name] = ss - end - end - } - } - - simpler = lambda { |s| - case s - when C::Goto - if jumpto[s.target] - r = jumpto[s.target].dup - r.value = r.value.deep_dup if r.kind_of? C::Return and r.value.kind_of? C::CExpression - r - end - when C::Return - if not keepret and scope.statements[-1].kind_of? C::Return and s.value == scope.statements[-1].value and s != scope.statements[-1] - C::Goto.new(scope.statements[-2].name) - end - end - } - - walk(scope) { |s| - case s - when C::Block - s.statements.each_with_index { |ss, i| - if sp = simpler[ss] - ss = s.statements[i] = sp - end - } - when C::If - if sp = simpler[s.bthen] - s.bthen = sp - end - end - } - - # remove unreferenced labels - remove_labels(scope) - - walk(scope) { |s| - next if not s.kind_of? C::Block - del = false - # remove dead code goto a; goto b; if (0) { z: bla; } => rm goto b - s.statements.delete_if { |st| - case st - when C::Goto, C::Return - olddel = del - del = true - olddel - else - del = false - end - } - # if () { goto x; } x: - s.statements.each_with_index { |ss, i| - if ss.kind_of? C::If - t = ss.bthen - t = t.statements.first if t.kind_of? C::Block - if t.kind_of? C::Goto and s.statements[i+1].kind_of? C::Label and s.statements[i+1].name == t.target - ss.bthen = C::Block.new(scope) - end - end - } - } - - remove_labels(scope) - end - - # changes ifgoto, goto to while/ifelse.. - def decompile_controlseq(scope) - # TODO replace all this crap by a method using the graph representation - scope.statements = decompile_cseq_if(scope.statements, scope) - remove_labels(scope) - scope.statements = decompile_cseq_if(scope.statements, scope) - remove_labels(scope) - # TODO harmonize _if/_while api (if returns a replacement, while patches) - decompile_cseq_while(scope.statements, scope) - decompile_cseq_switch(scope) - end - - # optimize if() { a; } to if() a; - def optimize_ctrl(scope) - simplify_goto(scope, true) - - # break/continue - # XXX if (foo) while (bar) goto bla; bla: should => break - walk = lambda { |e, brk, cnt| - case e - when C::Block - walk[e.statements, brk, cnt] - e - when ::Array - e.each_with_index { |st, i| - case st - when C::While, C::DoWhile - l1 = (e[i+1].name if e[i+1].kind_of? C::Label) - l2 = (e[i-1].name if e[i-1].kind_of? C::Label) - e[i].body = walk[st.body, l1, l2] - else - e[i] = walk[st, brk, cnt] - end - } - e - when C::If - e.bthen = walk[e.bthen, brk, cnt] if e.bthen - e.belse = walk[e.belse, brk, cnt] if e.belse - e - when C::While, C::DoWhile - e.body = walk[e.body, nil, nil] - e - when C::Goto - if e.target == brk - C::Break.new - elsif e.target == cnt - C::Continue.new - else e - end - else e - end - } - walk[scope, nil, nil] - - remove_labels(scope) - - # while (1) { a; if(b) { c; return; }; d; } => while (1) { a; if (b) break; d; } c; - while st = scope.statements.last and st.kind_of? C::While and st.test.kind_of? C::CExpression and - not st.test.op and st.test.rexpr == 1 and st.body.kind_of? C::Block - break if not i = st.body.statements.find { |ist| - ist.kind_of? C::If and not ist.belse and ist.bthen.kind_of? C::Block and ist.bthen.statements.last.kind_of? C::Return - } - walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of? C::Block and sst.outer == i.bthen } - scope.statements.concat i.bthen.statements - i.bthen = C::Break.new - end - - patch_test = lambda { |ce| - ce = ce.rexpr if ce.kind_of? C::CExpression and ce.op == :'!' - # if (a+1) => if (a != -1) - if ce.kind_of? C::CExpression and (ce.op == :+ or ce.op == :-) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and ce.lexpr - ce.rexpr.rexpr = -ce.rexpr.rexpr if ce.op == :+ - ce.op = :'!=' - end - } - - walk(scope) { |ce| - case ce - when C::If - patch_test[ce.test] - if ce.bthen.kind_of? C::Block - case ce.bthen.statements.length - when 1 - walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of? C::Block and sst.outer == ce.bthen } - ce.bthen = ce.bthen.statements.first - when 0 - if not ce.belse and i = ce.bthen.outer.statements.index(ce) - ce.bthen.outer.statements[i] = ce.test # TODO remove sideeffectless parts - end - end - end - if ce.belse.kind_of? C::Block and ce.belse.statements.length == 1 - walk(ce.belse.statements) { |sst| sst.outer = ce.belse.outer if sst.kind_of? C::Block and sst.outer == ce.belse } - ce.belse = ce.belse.statements.first - end - when C::While, C::DoWhile - patch_test[ce.test] - if ce.body.kind_of? C::Block - case ce.body.statements.length - when 1 - walk(ce.body.statements) { |sst| sst.outer = ce.body.outer if sst.kind_of? C::Block and sst.outer == ce.body } - ce.body = ce.body.statements.first - when 0 - if ce.kind_of? C::DoWhile and i = ce.body.outer.statements.index(ce) - ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body) - end - ce.body = nil - end - end - end - } - walk(scope) { |ce| - next if not ce.kind_of? C::Block - st = ce.statements - st.length.times { |n| - while st[n].kind_of? C::If and st[n+1].kind_of? C::If and not st[n].belse and not st[n+1].belse and ( - (st[n].bthen.kind_of? C::Return and st[n+1].bthen.kind_of? C::Return and st[n].bthen.value == st[n+1].bthen.value) or - (st[n].bthen.kind_of? C::Break and st[n+1].bthen.kind_of? C::Break) or - (st[n].bthen.kind_of? C::Continue and st[n+1].bthen.kind_of? C::Continue)) - # if (a) return x; if (b) return x; => if (a || b) return x; - st[n].test = C::CExpression[st[n].test, :'||', st[n+1].test] - st.delete_at(n+1) - end - } - } - end - - # ifgoto => ifthen - # ary is an array of statements where we try to find if () {} [else {}] - # recurses to then/else content - def decompile_cseq_if(ary, scope) - return ary if forbid_decompile_ifwhile - # the array of decompiled statements to use as replacement - ret = [] - # list of labels appearing in ary - inner_labels = ary.grep(C::Label).map { |l| l.name } - while s = ary.shift - # recurse if it's not the first run - if s.kind_of? C::If - s.bthen.statements = decompile_cseq_if(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block - s.belse.statements = decompile_cseq_if(s.belse.statements, s.belse) if s.belse.kind_of? C::Block - end - - # if (a) goto x; if (b) goto x; => if (a || b) goto x; - while s.kind_of? C::If and s.bthen.kind_of? C::Goto and not s.belse and ary.first.kind_of? C::If and ary.first.bthen.kind_of? C::Goto and - not ary.first.belse and s.bthen.target == ary.first.bthen.target - s.test = C::CExpression[s.test, :'||', ary.shift.test] - end - - # if (a) goto x; b; x: => if (!a) { b; } - if s.kind_of? C::If and s.bthen.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == s.bthen.target } - # if {goto l;} a; l: => if (!) {a;} - s.test = C::CExpression.negate s.test - s.bthen = C::Block.new(scope) - s.bthen.statements = decompile_cseq_if(ary[0..ary.index(l)], s.bthen) - s.bthen.statements.pop # remove l: from bthen, it is in ary (was needed in bthen for inner ifs) - ary[0...ary.index(l)] = [] - end - - if s.kind_of? C::If and (s.bthen.kind_of? C::Block or s.bthen.kind_of? C::Goto) - s.bthen = C::Block.new(scope, [s.bthen]) if s.bthen.kind_of? C::Goto - - bts = s.bthen.statements - - # if (a) if (b) { c; } => if (a && b) { c; } - if bts.length == 1 and bts.first.kind_of? C::If and not bts.first.belse - s.test = C::CExpression[s.test, :'&&', bts.first.test] - bts = bts.first.bthen - bts = s.bthen.statements = bts.kind_of?(C::Block) ? bts.statements : [bts] - end - - # if (a) { if (b) goto c; d; } c: => if (a && !b) { d; } - if bts.first.kind_of? C::If and l = bts.first.bthen and (l = l.kind_of?(C::Block) ? l.statements.first : l) and l.kind_of? C::Goto and ary[0].kind_of? C::Label and l.target == ary[0].name - s.test = C::CExpression[s.test, :'&&', C::CExpression.negate(bts.first.test)] - if e = bts.shift.belse - bts.unshift e - end - end - - # if () { goto a; } a: - if bts.last.kind_of? C::Goto and ary[0].kind_of? C::Label and bts.last.target == ary[0].name - bts.pop - end - - # if { a; goto outer; } b; return; => if (!) { b; return; } a; goto outer; - if bts.last.kind_of? C::Goto and not inner_labels.include? bts.last.target and g = ary.find { |ss| ss.kind_of? C::Goto or ss.kind_of? C::Return } and g.kind_of? C::Return - s.test = C::CExpression.negate s.test - ary[0..ary.index(g)], bts[0..-1] = bts, ary[0..ary.index(g)] - end - - # if { a; goto l; } b; l: => if {a;} else {b;} - if bts.last.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == bts.last.target } - s.belse = C::Block.new(scope) - s.belse.statements = decompile_cseq_if(ary[0...ary.index(l)], s.belse) - ary[0...ary.index(l)] = [] - bts.pop - end - - # if { a; l: b; goto any;} c; goto l; => if { a; } else { c; } b; goto any; - if not s.belse and (bts.last.kind_of? C::Goto or bts.last.kind_of? C::Return) and g = ary.grep(C::Goto).first and l = bts.grep(C::Label).find { |l_| l_.name == g.target } - s.belse = C::Block.new(scope) - s.belse.statements = decompile_cseq_if(ary[0...ary.index(g)], s.belse) - ary[0..ary.index(g)], bts[bts.index(l)..-1] = bts[bts.index(l)..-1], [] - end - - # if { a; b; c; } else { d; b; c; } => if {a;} else {d;} b; c; - if s.belse - bes = s.belse.statements - while not bts.empty? - if bts.last.kind_of? C::Label; ary.unshift bts.pop - elsif bes.last.kind_of? C::Label; ary.unshift bes.pop - elsif bts.last.to_s == bes.last.to_s; ary.unshift bes.pop ; bts.pop - else break - end - end - - # if () { a; } else { b; } => if () { a; } else b; - # if () { a; } else {} => if () { a; } - case bes.length - when 0; s.belse = nil - #when 1; s.belse = bes.first - end - end - - # if () {} else { a; } => if (!) { a; } - # if () { a; } => if () a; - case bts.length - when 0; s.test, s.bthen, s.belse = C::CExpression.negate(s.test), s.belse, nil if s.belse - #when 1; s.bthen = bts.first # later (allows simpler handling in _while) - end - end - - # l1: l2: if () goto l1; goto l2; => if(!) goto l2; goto l1; - if s.kind_of? C::If - ls = s.bthen - ls = ls.statements.last if ls.kind_of? C::Block - if ls.kind_of? C::Goto - if li = inner_labels.index(ls.target) - table = inner_labels - else - table = ary.map { |st| st.name if st.kind_of? C::Label }.compact.reverse - li = table.index(ls.target) || table.length - end - g = ary.find { |ss| - break if ss.kind_of? C::Return - next if not ss.kind_of? C::Goto - table.index(ss.target).to_i > li - } - if g - s.test = C::CExpression.negate s.test - if not s.bthen.kind_of? C::Block - ls = C::Block.new(scope) - ls.statements << s.bthen - s.bthen = ls - end - ary[0..ary.index(g)], s.bthen.statements = s.bthen.statements, decompile_cseq_if(ary[0..ary.index(g)], scope) - end - end - end - - ret << s - end - ret - end - - def decompile_cseq_while(ary, scope) - return if forbid_decompile_ifwhile - - # find the next instruction that is not a label - ni = lambda { |l| ary[ary.index(l)..-1].find { |s| not s.kind_of? C::Label } } - - # TODO XXX get rid of #index - finished = false ; while not finished ; finished = true # 1.9 does not support 'retry' - ary.each { |s| - case s - when C::Label - if ss = ni[s] and ss.kind_of? C::If and not ss.belse and ss.bthen.kind_of? C::Block - if ss.bthen.statements.last.kind_of? C::Goto and ss.bthen.statements.last.target == s.name - ss.bthen.statements.pop - if l = ary[ary.index(ss)+1] and l.kind_of? C::Label - ss.bthen.statements.grep(C::If).each { |i| - i.bthen = C::Break.new if i.bthen.kind_of? C::Goto and i.bthen.target == l.name - } - end - ary[ary.index(ss)] = C::While.new(ss.test, ss.bthen) - elsif ss.bthen.statements.last.kind_of? C::Return and g = ary[ary.index(s)+1..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name } - wb = C::Block.new(scope) - wb.statements = decompile_cseq_while(ary[ary.index(ss)+1...ary.index(g)], wb) - w = C::While.new(C::CExpression.negate(ss.test), wb) - ary[ary.index(ss)..ary.index(g)] = [w, *ss.bthen.statements] - finished = false ; break #retry - end - end - if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name } - wb = C::Block.new(scope) - wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb) - w = C::While.new(C::CExpression[1], wb) - ary[ary.index(s)..ary.index(g)] = [w] - finished = false ; break #retry - end - if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::If and not _s.belse and gt = _s.bthen and - (gt = gt.kind_of?(C::Block) && gt.statements.length == 1 ? gt.statements.first : gt) and gt.kind_of? C::Goto and gt.target == s.name } - wb = C::Block.new(scope) - wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb) - w = C::DoWhile.new(g.test, wb) - ary[ary.index(s)..ary.index(g)] = [w] - finished = false ; break #retry - end - when C::If - decompile_cseq_while(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block - decompile_cseq_while(s.belse.statements, s.belse) if s.belse.kind_of? C::Block - when C::While, C::DoWhile - decompile_cseq_while(s.body.statements, s.body) if s.body.kind_of? C::Block - end - } - end - ary - end - - # TODO - def decompile_cseq_switch(scope) - uncast = lambda { |e| e = e.rexpr while e.kind_of? C::CExpression and not e.op ; e } - walk(scope) { |s| - # XXX pfff... - next if not s.kind_of? C::If - # if (v < 12) return ((void(*)())(tableaddr+4*v))(); - t = s.bthen - t = t.statements.first if t.kind_of? C::Block and t.statements.length == 1 - next if not t.kind_of? C::Return or not t.respond_to? :from_instr - next if t.from_instr.comment.to_a.include? 'switch' - next if not t.value.kind_of? C::CExpression or t.value.op != :funcall or t.value.rexpr != [] or not t.value.lexpr.kind_of? C::CExpression or t.value.lexpr.op - p = uncast[t.value.lexpr.rexpr] - next if not p.kind_of? C::CExpression or p.op != :* or p.lexpr - p = uncast[p.rexpr] - next if not p.kind_of? C::CExpression or p.op != :+ - r, l = uncast[p.rexpr], uncast[p.lexpr] - r, l = l, r if r.kind_of? C::CExpression - next if not r.kind_of? ::Integer or not l.kind_of? C::CExpression or l.op != :* or not l.lexpr - lr, ll = uncast[l.rexpr], uncast[l.lexpr] - lr, ll = ll, lr if not ll.kind_of? ::Integer - next if ll != sizeof(nil, C::Pointer.new(C::BaseType.new(:void))) - base, index = r, lr - if s.test.kind_of? C::CExpression and (s.test.op == :<= or s.test.op == :<) and s.test.lexpr == index and - s.test.rexpr.kind_of? C::CExpression and not s.test.rexpr.op and s.test.rexpr.rexpr.kind_of? ::Integer - t.from_instr.add_comment 'switch' - sup = s.test.rexpr.rexpr - rng = ((s.test.op == :<) ? (0...sup) : (0..sup)) - from = t.from_instr.address - rng.map { |i| @dasm.backtrace(Indirection[base+ll*i, ll, from], from, :type => :x, :origin => from, :maxdepth => 0) } - @dasm.disassemble - throw :restart, :restart - end - puts "unhandled switch() at #{t.from_instr}" if $VERBOSE - } - end - - # remove unused labels - def remove_labels(scope) - return if forbid_optimize_labels - - used = [] - walk(scope) { |ss| - used |= [ss.target] if ss.kind_of? C::Goto - } - walk(scope) { |s| - next if not s.kind_of? C::Block - s.statements.delete_if { |l| - l.kind_of? C::Label and not used.include? l.name - } - } - - # remove implicit continue; at end of loop - walk(scope) { |s| - next if not s.kind_of? C::While - if s.body.kind_of? C::Block and s.body.statements.last.kind_of? C::Continue - s.body.statements.pop - end - } - end - - # checks if expr is a var (var or *&var) - def isvar(ce, var) - if var.stackoff and ce.kind_of? C::CExpression - return unless ce.op == :* and not ce.lexpr - ce = ce.rexpr - ce = ce.rexpr while ce.kind_of? C::CExpression and not ce.op - return unless ce.kind_of? C::CExpression and ce.op == :& and not ce.lexpr - ce = ce.rexpr - end - ce == var - end - - # checks if expr reads var - def ce_read(ce_, var) - isvar(ce_, var) or - walk_ce(ce_) { |ce| - case ce.op - when :funcall; break true if isvar(ce.lexpr, var) or ce.rexpr.find { |a| isvar(a, var) } - when :'='; break true if isvar(ce.rexpr, var) - break ce_read(ce.rexpr, var) if isvar(ce.lexpr, var) # *&var = 2 - else break true if isvar(ce.lexpr, var) or isvar(ce.rexpr, var) - end - } - end - - # checks if expr writes var - def ce_write(ce_, var) - walk_ce(ce_) { |ce| - break true if AssignOp.include?(ce.op) and (isvar(ce.lexpr, var) or - (((ce.op == :'++' or ce.op == :'--') and isvar(ce.rexpr, var)))) - } - end - - # patches a set of exprs, replacing oldce by newce - def ce_patch(exprs, oldce, newce) - walk_ce(exprs) { |ce| - case ce.op - when :funcall - ce.lexpr = newce if ce.lexpr == oldce - ce.rexpr.each_with_index { |a, i| ce.rexpr[i] = newce if a == oldce } - else - ce.lexpr = newce if ce.lexpr == oldce - ce.rexpr = newce if ce.rexpr == oldce - end - } - end - - - # duplicate vars per domain value - # eg eax = 1; foo(eax); eax = 2; bar(eax); => eax = 1; foo(eax) eax_1 = 2; bar(eax_1); - # eax = 1; if (bla) eax = 2; foo(eax); => no change - def unalias_vars(scope, func) - g = c_to_graph(scope) - - # unalias func args first, they may include __attr__((out)) needed by the others - funcalls = [] - walk_ce(scope) { |ce| funcalls << ce if ce.op == :funcall } - vars = scope.symbol.values.sort_by { |v| walk_ce(funcalls) { |ce| break true if ce.rexpr == v } ? 0 : 1 } - - # find the domains of var aliases - vars.each { |var| unalias_var(var, scope, g) } - end - - # duplicates a var per domain value - def unalias_var(var, scope, g = c_to_graph(scope)) - # [label, index] of references to var (reading it, writing it, ro/wo it (eg eax = *eax => eax_0 = *eax_1)) - read = {} - write = {} - ro = {} - wo = {} - - # list of [l, i] for which domain is not known - unchecked = [] - - # mark all exprs of the graph - # TODO handle var_14 __attribute__((out)) = &curvar <=> curvar write - r = var.has_attribute_var('register') - g.exprs.each { |label, exprs| - exprs.each_with_index { |ce, i| - if ce_read(ce, var) - if (ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or - (ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym)) - (ro[label] ||= []) << i - (wo[label] ||= []) << i - unchecked << [label, i, :up] << [label, i, :down] - else - (read[label] ||= []) << i - unchecked << [label, i] - end - elsif ce_write(ce, var) - (write[label] ||= []) << i - unchecked << [label, i] - end - } - } - - # stuff when filling the domain (flood algorithm) - dom = dom_ro = dom_wo = todo_up = todo_down = func_top = nil - - # flood by walking the graph up from [l, i] (excluded) - # marks stuff do walk down - walk_up = lambda { |l, i| - todo_w = [[l, i-1]] - done_w = [] - while o = todo_w.pop - next if done_w.include? o - done_w << o - l, i = o - loop do - if read[l].to_a.include? i - # XXX not optimal (should mark only the uppest read) - todo_down |= [[l, i]] if not dom.include? [l, i] - dom |= [[l, i]] - elsif write[l].to_a.include? i - todo_down |= [[l, i]] if not dom.include? [l, i] - dom |= [[l, i]] - break - elsif wo[l].to_a.include? i - todo_down |= [[l, i]] if not dom_wo.include? [l, i, :down] - dom_wo |= [[l, i, :down]] - break - end - i -= 1 - if i < 0 - g.from_optim[l].to_a.each { |ll| - todo_w << [ll, g.exprs[ll].to_a.length-1] - } - func_top = true if g.from_optim[l].to_a.empty? - break - end - end - end - } - - # flood by walking the graph down from [l, i] (excluded) - # malks stuff to walk up - walk_down = lambda { |l, i| - todo_w = [[l, i+1]] - done_w = [] - while o = todo_w.pop - next if done_w.include? o - done_w << o - l, i = o - loop do - if read[l].to_a.include? i - todo_up |= [[l, i]] if not dom.include? [l, i] - dom |= [[l, i]] - elsif write[l].to_a.include? i - break - elsif ro[l].to_a.include? i - todo_up |= [[l, i]] if not dom_ro.include? [l, i, :up] - dom_ro |= [[l, i, :up]] - break - end - i += 1 - if i >= g.exprs[l].to_a.length - g.to_optim[l].to_a.each { |ll| - todo_w << [ll, 0] - } - break - end - end - end - } - - # check it out - while o = unchecked.shift - dom = [] - dom_ro = [] - dom_wo = [] - func_top = false - - todo_up = [] - todo_down = [] - - # init - if read[o[0]].to_a.include? o[1] - todo_up << o - todo_down << o - dom << o - elsif write[o[0]].to_a.include? o[1] - todo_down << o - dom << o - elsif o[2] == :up - todo_up << o - dom_ro << o - elsif o[2] == :down - todo_down << o - dom_wo << o - else raise - end - - # loop - while todo_up.first or todo_down.first - todo_up.each { |oo| walk_up[oo[0], oo[1]] } - todo_up.clear - - todo_down.each { |oo| walk_down[oo[0], oo[1]] } - todo_down.clear - end - - unchecked -= dom + dom_wo + dom_ro - - next if func_top - - # patch - n_i = 0 - n_i += 1 while scope.symbol_ancestors[newvarname = "#{var.name}_a#{n_i}"] - - nv = var.dup - nv.storage = :register if nv.has_attribute_var('register') - nv.attributes = nv.attributes.dup if nv.attributes - nv.name = newvarname - scope.statements << C::Declaration.new(nv) - scope.symbol[nv.name] = nv - - dom.each { |oo| ce_patch(g.exprs[oo[0]][oo[1]], var, nv) } - dom_ro.each { |oo| - ce = g.exprs[oo[0]][oo[1]] - if ce.op == :funcall or ce.rexpr.kind_of? C::CExpression - ce_patch(ce.rexpr, var, nv) - else - ce.rexpr = nv - end - } - dom_wo.each { |oo| - ce = g.exprs[oo[0]][oo[1]] - if ce.op == :funcall - elsif ce.lexpr.kind_of? C::CExpression - ce_patch(ce.lexpr, var, nv) - else - ce.lexpr = nv - end - } - - # check if the var is only used as an __out__ parameter - if false and dom_ro.empty? and dom_wo.empty? and dom.length == 2 and # TODO - arg.has_attribute('out') and not arg.has_attribute('in') - # *(int32*)&var_10 = &var_4; - # set_pointed_value(*(int32*)&var_10); => writeonly var_4, may start a new domain - nv.add_attribute('out') - end - end - end - - # revert the unaliasing namechange of vars where no alias subsists - def simplify_varname_noalias(scope) - names = scope.symbol.keys - names.delete_if { |k| - next if not b = k[/^(.*)_a\d+$/, 1] - next if scope.symbol[k].stackoff.to_i > 0 - if not names.find { |n| n != k and (n == b or n[/^(.*)_a\d+$/, 1] == b) } - scope.symbol[b] = scope.symbol.delete(k) - scope.symbol[b].name = b - end - } - end - - # patch scope to transform :frameoff-x into &var_x - def namestackvars(scope) - off2var = {} - newvar = lambda { |o, n| - if not v = off2var[o] - v = off2var[o] = C::Variable.new - v.type = C::BaseType.new(:void) - v.name = n - v.stackoff = o - scope.symbol[v.name] = v - scope.statements << C::Declaration.new(v) - end - v - } - - scope.decompdata[:stackoff_name].each { |o, n| newvar[o, n] } - scope.decompdata[:stackoff_type].each { |o, t| newvar[o, stackoff_to_varname(o)] } - - walk_ce(scope) { |e| - next if e.op != :+ and e.op != :- - next if not e.lexpr.kind_of? C::Variable or e.lexpr.name != 'frameptr' - next if not e.rexpr.kind_of? C::CExpression or e.rexpr.op or not e.rexpr.rexpr.kind_of? ::Integer - off = e.rexpr.rexpr - off = -off if e.op == :- - v = newvar[off, stackoff_to_varname(off)] - e.replace C::CExpression[:&, v] - } - end - - # assign type to vars (regs, stack & global) - # types are found by subfunction argument types & indirections, and propagated through assignments etc - # TODO when updating the type of a var, update the type of all cexprs where it appears - def decompile_c_types(scope) - return if forbid_decompile_types - - # TODO *(int8*)(ptr+8); *(int32*)(ptr+12) => automatic struct - - # name => type - types = {} - - pscopevar = lambda { |e| - e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.rexpr.kind_of? C::CExpression - if e.kind_of? C::CExpression and e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable - e.rexpr.name if scope.symbol[e.rexpr.name] - end - } - scopevar = lambda { |e| - e = e.rexpr if e.kind_of? C::CExpression and not e.op - if e.kind_of? C::Variable and scope.symbol[e.name] - e.name - elsif e.kind_of? C::CExpression and e.op == :* and not e.lexpr - pscopevar[e.rexpr] - end - } - globalvar = lambda { |e| - e = e.rexpr if e.kind_of? C::CExpression and not e.op - if e.kind_of? ::Integer and @dasm.get_section_at(e) - e - elsif e.kind_of? C::Variable and not scope.symbol[e.name] and @c_parser.toplevel.symbol[e.name] and @dasm.get_section_at(e.name) - e.name - end - } - - # check if a newly found type for o is better than current type - # order: foo* > void* > foo - better_type = lambda { |t0, t1| - t1 == C::BaseType.new(:void) or (t0.pointer? and t1.kind_of? C::BaseType) or t0.untypedef.kind_of? C::Union or - (t0.kind_of? C::BaseType and t1.kind_of? C::BaseType and (@c_parser.typesize[t0.name] > @c_parser.typesize[t1.name] or (t0.name == t1.name and t0.qualifier))) or - (t0.pointer? and t1.pointer? and better_type[t0.pointed, t1.pointed]) - } - - update_global_type = lambda { |e, t| - if ne = new_global_var(e, t, scope) - ne.type = t if better_type[t, ne.type] # TODO patch existing scopes using ne - # TODO rename (dword_xx -> byte_xx etc) - e = scope.symbol_ancestors[e] || e if e.kind_of? String # exe reloc - walk_ce(scope) { |ce| - ce.lexpr = ne if ce.lexpr == e - ce.rexpr = ne if ce.rexpr == e - if ce.op == :* and not ce.lexpr and ce.rexpr == ne and ne.type.pointer? and ne.type.pointed.untypedef.kind_of? C::Union - # *struct -> struct->bla - ce.rexpr = structoffset(ne.type.pointed.untypedef, ce.rexpr, 0, sizeof(ce.type)) - elsif ce.lexpr == ne or ce.rexpr == ne - # set ce type according to l/r - # TODO set ce.parent type etc - ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type - end - } - end - } - - propagate_type = nil # fwd declaration - propagating = [] # recursion guard (x = &x) - # check if need to change the type of a var - # propagate_type if type is updated - update_type = lambda { |n, t| - next if propagating.include? n - o = scope.symbol[n].stackoff - next if not o and t.untypedef.kind_of? C::Union - next if o and scope.decompdata[:stackoff_type][o] and t != scope.decompdata[:stackoff_type][o] - next if t0 = types[n] and not better_type[t, t0] - next if o and (t.integral? or t.pointer?) and o % sizeof(t) != 0 # keep vars aligned - types[n] = t - next if t == t0 - propagating << n - propagate_type[n, t] - propagating.delete n - next if not o - t = t.untypedef - if t.kind_of? C::Struct - t.members.to_a.each { |m| - mo = t.offsetof(@c_parser, m.name) - next if mo == 0 - scope.symbol.each { |vn, vv| - update_type[vn, m.type] if vv.stackoff == o+mo - } - } - end - } - - # try to update the type of a var from knowing the type of an expr (through dereferences etc) - known_type = lambda { |e, t| - loop do - e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.type == t - if o = scopevar[e] - update_type[o, t] - elsif o = globalvar[e] - update_global_type[o, t] - elsif not e.kind_of? C::CExpression - elsif o = pscopevar[e] and t.pointer? - update_type[o, t.pointed] - elsif e.op == :* and not e.lexpr - e = e.rexpr - t = C::Pointer.new(t) - next - elsif t.pointer? and e.op == :+ and e.lexpr.kind_of? C::CExpression and e.lexpr.type.integral? and e.rexpr.kind_of? C::Variable - e.lexpr, e.rexpr = e.rexpr, e.lexpr - next - elsif e.op == :+ and e.lexpr and e.rexpr.kind_of? C::CExpression - if not e.rexpr.op and e.rexpr.rexpr.kind_of? ::Integer - if t.pointer? and e.rexpr.rexpr < 0x1000 and (e.rexpr.rexpr % sizeof(t.pointed)) == 0 # XXX relocatable + base=0.. - e = e.lexpr # (int)*(x+2) === (int) *x - next - elsif globalvar[e.rexpr.rexpr] - known_type[e.lexpr, C::BaseType.new(:int)] - e = e.rexpr - next - end - elsif t.pointer? and (e.lexpr.kind_of? C::CExpression and e.lexpr.lexpr and [:<<, :>>, :*, :&].include? e.lexpr.op) or - (o = scopevar[e.lexpr] and types[o] and types[o].integral? and - !(o = scopevar[e.rexpr] and types[o] and types[o].integral?)) - e.lexpr, e.rexpr = e.rexpr, e.lexpr # swap - e = e.lexpr - next - elsif t.pointer? and ((e.rexpr.kind_of? C::CExpression and e.rexpr.lexpr and [:<<, :>>, :*, :&].include? e.rexpr.op) or - (o = scopevar[e.rexpr] and types[o] and types[o].integral? and - !(o = scopevar[e.lexpr] and types[o] and types[o].integral?))) - e = e.lexpr - next - end - end - break - end - } - - # we found a type for a var, propagate it through affectations - propagate_type = lambda { |var, type| - walk_ce(scope) { |ce| - next if ce.op != :'=' - - if ce.lexpr.kind_of? C::Variable and ce.lexpr.name == var - known_type[ce.rexpr, type] - next - end - if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == var - known_type[ce.lexpr, type] - next - end - - # int **x; y = **x => int y - t = type - l = ce.lexpr - while l.kind_of? C::CExpression and l.op == :* and not l.lexpr - if var == pscopevar[l.rexpr] - known_type[ce.rexpr, t] - break - elsif t.pointer? - l = l.rexpr - t = t.pointed - else break - end - end - - # int **x; **x = y => int y - t = type - r = ce.rexpr - while r.kind_of? C::CExpression and r.op == :* and not r.lexpr - if var == pscopevar[r.rexpr] - known_type[ce.lexpr, t] - break - elsif t.pointer? - r = r.rexpr - t = t.pointed - else break - end - end - - # TODO int *x; *x = *y; ? - } - } - - # put all those macros in use - # use user-defined types first - scope.symbol.each_value { |v| - next if not v.kind_of? C::Variable or not v.stackoff or not t = scope.decompdata[:stackoff_type][v.stackoff] - known_type[v, t] - } - - # try to infer types from C semantics - later = [] - walk_ce(scope) { |ce| - if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and (ce.rexpr.op == :funcall or (ce.rexpr.op == nil and ce.rexpr.rexpr.kind_of? ::Integer and - ce.rexpr.rexpr.abs < 0x10000 and (not ce.lexpr.kind_of? C::CExpression or ce.lexpr.op != :'*' or ce.lexpr.lexpr))) - # var = int - known_type[ce.lexpr, ce.rexpr.type] - elsif ce.op == :funcall - f = ce.lexpr.type - f = f.pointed if f.pointer? - next if not f.kind_of? C::Function - # cast func args to arg prototypes - f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] } - elsif ce.op == :* and not ce.lexpr - if e = ce.rexpr and e.kind_of? C::CExpression and not e.op and e = e.rexpr and e.kind_of? C::CExpression and - e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable and e.rexpr.stackoff - # skip *(__int32*)&var_12 for now, avoid saying var12 is an int if it may be a ptr or anything - later << [ce.rexpr, C::Pointer.new(ce.type)] - next - end - known_type[ce.rexpr, C::Pointer.new(ce.type)] - elsif not ce.op and ce.type.pointer? and ce.type.pointed.kind_of? C::Function - # cast to fptr: must be a fptr - known_type[ce.rexpr, ce.type] - end - } - - later.each { |ce, t| known_type[ce, t] } - - # offsets have types now - types.each { |v, t| - # keep var type qualifiers - q = scope.symbol[v].type.qualifier - scope.symbol[v].type = t - t.qualifier = q if q - } - - - # remove offsets to struct members - # XXX this defeats antialiasing - # off => [structoff, membername, membertype] - memb = {} - types.dup.each { |n, t| - v = scope.symbol[n] - next if not o = v.stackoff - t = t.untypedef - if t.kind_of? C::Struct - t.members.to_a.each { |tm| - moff = t.offsetof(@c_parser, tm.name) - next if moff == 0 - types.delete_if { |vv, tt| scope.symbol[vv].stackoff == o+moff } - memb[o+moff] = [v, tm.name, tm.type] - } - end - } - - # patch local variables into the CExprs, incl unknown offsets - varat = lambda { |n| - v = scope.symbol[n] - if s = memb[v.stackoff] - v = C::CExpression[s[0], :'.', s[1], s[2]] - else - v.type = types[n] || C::BaseType.new(:int) - end - v - } - - maycast = lambda { |v, e| - if sizeof(v) != sizeof(e) - v = C::CExpression[:*, [[:&, v], C::Pointer.new(e.type)]] - end - v - } - maycast_p = lambda { |v, e| - if not e.type.pointer? or sizeof(v) != sizeof(nil, e.type.pointed) - C::CExpression[[:&, v], e.type] - else - C::CExpression[:&, v] - end - } - - walk_ce(scope, true) { |ce| - case - when ce.op == :funcall - ce.rexpr.map! { |re| - if o = scopevar[re]; C::CExpression[maycast[varat[o], re]] - elsif o = pscopevar[re]; C::CExpression[maycast_p[varat[o], re]] - else re - end - } - when o = scopevar[ce.lexpr]; ce.lexpr = maycast[varat[o], ce.lexpr] - when o = scopevar[ce.rexpr]; ce.rexpr = maycast[varat[o], ce.rexpr] - ce.rexpr = C::CExpression[ce.rexpr] if not ce.op and ce.rexpr.kind_of? C::Variable - when o = pscopevar[ce.lexpr]; ce.lexpr = maycast_p[varat[o], ce.lexpr] - when o = pscopevar[ce.rexpr]; ce.rexpr = maycast_p[varat[o], ce.rexpr] - when o = scopevar[ce]; ce.replace C::CExpression[maycast[varat[o], ce]] - when o = pscopevar[ce]; ce.replace C::CExpression[maycast_p[varat[o], ce]] - end - } - - fix_type_overlap(scope) - fix_pointer_arithmetic(scope) - - # if int32 var_4 is always var_4 & 255, change type to int8 - varuse = Hash.new(0) - varandff = Hash.new(0) - varandffff = Hash.new(0) - walk_ce(scope) { |ce| - if ce.op == :& and ce.lexpr.kind_of? C::Variable and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer - case ce.rexpr.rexpr - when 0xff; varandff[ce.lexpr.name] += 1 - when 0xffff; varandffff[ce.lexpr.name] += 1 - end - end - varuse[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable - varuse[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable - } - varandff.each { |k, v| - scope.symbol[k].type = C::BaseType.new(:__int8, :unsigned) if varuse[k] == v - } - varandffff.each { |k, v| - scope.symbol[k].type = C::BaseType.new(:__int16, :unsigned) if varuse[k] == v - } - - # propagate types to cexprs - walk_ce(scope, true) { |ce| - if ce.op - ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type rescue next - if ce.op == :'=' and ce.rexpr.kind_of? C::Typed and ce.rexpr.type != ce.type and (not ce.rexpr.type.integral? or not ce.type.integral?) - known_type[ce.rexpr, ce.type] if ce.type.pointer? and ce.type.pointed.untypedef.kind_of? C::Function # localvar = &struct with fptr - ce.rexpr = C::CExpression[[ce.rexpr], ce.type] - end - elsif ce.type.pointer? and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sizeof(ce.rexpr.rexpr.type) == sizeof(ce.type.pointed) - ce.type = ce.rexpr.type - end - } - end - - # struct foo { int i; int j; struct { int k; int l; } m; }; bla+12 => &bla->m.l - # st is a struct, ptr is an expr pointing to a struct, off is a numeric offset from ptr, msz is the size of the pointed member (nil ignored) - def structoffset(st, ptr, off, msz) - tabidx = off / sizeof(st) - off -= tabidx * sizeof(st) - ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of? C::Array - return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list - (ptr.kind_of? C::CExpression and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and - not s.type.untypedef.kind_of? C::Union)) - - m_ptr = lambda { |m| - if ptr.kind_of? C::CExpression and ptr.op == :& and not ptr.lexpr - C::CExpression[ptr.rexpr, :'.', m.name] - else - C::CExpression[ptr, :'->', m.name] - end - } - - # recursive proc to list all named members, including in anonymous substructs - submemb = lambda { |sm| sm.name ? sm : sm.type.kind_of?(C::Union) ? sm.type.members.to_a.map { |ssm| submemb[ssm] } : nil } - mbs = st.members.to_a.map { |m| submemb[m] }.flatten.compact - mo = mbs.inject({}) { |h, m| h.update m => st.offsetof(@c_parser, m.name) } - - if sm = mbs.find { |m| mo[m] == off and (not msz or sizeof(m) == msz) } || - mbs.find { |m| mo[m] <= off and mo[m]+sizeof(m) > off } - off -= mo[sm] - sst = sm.type.untypedef - #return ptr if mo[sm] == 0 and sst.pointer? and sst.type.untypedef == st # TODO fix infinite recursion on mutually recursive ptrs - ptr = C::CExpression[:&, m_ptr[sm]] - if sst.kind_of? C::Union - return structoffset(sst, ptr, off, msz) - end - end - - if off != 0 - C::CExpression[[[ptr], C::Pointer.new(C::BaseType.new(:__int8))], :+, [off]] - else - ptr - end - end - - # fix pointer arithmetic (eg int foo += 4 => int* foo += 1) - # use struct member access (eg *(structptr+8) => structptr->bla) - # must be run only once, right after type setting - def fix_pointer_arithmetic(scope) - walk_ce(scope, true) { |ce| - if ce.lexpr and ce.lexpr.type.pointer? and [:&, :>>, :<<].include? ce.op - ce.lexpr = C::CExpression[[ce.lexpr], C::BaseType.new(:int)] - end - - if ce.op == :+ and ce.lexpr and ((ce.lexpr.type.integral? and ce.rexpr.type.pointer?) or (ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Union)) - ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr - end - - if ce.op == :* and not ce.lexpr and ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Struct - s = ce.rexpr.type.pointed.untypedef - m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 } - if sizeof(m) != sizeof(ce) - ce.rexpr = C::CExpression[[ce.rexpr, C::Pointer.new(s)], C::Pointer.new(ce.type)] - next - end - # *structptr => structptr->member - ce.lexpr = ce.rexpr - ce.op = :'->' - ce.rexpr = m.name - ce.type = m.type - next - elsif ce.op == :'=' and ce.lexpr.type.untypedef.kind_of? C::Struct - s = ce.lexpr.type.untypedef - m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 } - ce.lexpr = C::CExpression.new(ce.lexpr, :'.', m.name, m.type) - ce.type = m.type - next - end - - if ce.op == :+ and ce.lexpr and ce.lexpr.type.pointer? and not ce.type.pointer? - ce.type = ce.lexpr.type - end - - if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr - ce.replace C::CExpression[ce.rexpr.rexpr] - end - - next if not ce.lexpr or not ce.lexpr.type.pointer? - if ce.op == :+ and (s = ce.lexpr.type.pointed.untypedef).kind_of? C::Union and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and - ce.rexpr.rexpr.kind_of? ::Integer and o = ce.rexpr.rexpr - # structptr + 4 => &structptr->member - ce.replace structoffset(s, ce.lexpr, o, nil) - elsif [:+, :-, :'+=', :'-='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ((not ce.rexpr.op and i = ce.rexpr.rexpr) or - (ce.rexpr.op == :* and i = ce.rexpr.lexpr and ((i.kind_of? C::CExpression and not i.op and i = i.rexpr) or true))) and - i.kind_of? ::Integer and psz = sizeof(nil, ce.lexpr.type.pointed) and i % psz == 0 - # ptr += 4 => ptr += 1 - if not ce.rexpr.op - ce.rexpr.rexpr /= psz - else - ce.rexpr.lexpr.rexpr /= psz - if ce.rexpr.lexpr.rexpr == 1 - ce.rexpr = ce.rexpr.rexpr - end - end - ce.type = ce.lexpr.type - - elsif (ce.op == :+ or ce.op == :-) and sizeof(nil, ce.lexpr.type.pointed) != 1 - # ptr+x => (ptrtype*)(((__int8*)ptr)+x) - # XXX create struct ? - ce.rexpr = C::CExpression[ce.rexpr, C::BaseType.new(:int)] if not ce.rexpr.type.integral? - if sizeof(nil, ce.lexpr.type.pointed) != 1 - ptype = ce.lexpr.type - p = C::CExpression[[ce.lexpr], C::Pointer.new(C::BaseType.new(:__int8))] - ce.replace C::CExpression[[p, ce.op, ce.rexpr, p.type], ptype] - end - end - } - end - - # handling of var overlapping (eg __int32 var_10; __int8 var_F => replace all var_F by *(&var_10 + 1)) - # must be done before fix_pointer_arithmetic - def fix_type_overlap(scope) - varinfo = {} - scope.symbol.each_value { |var| - next if not off = var.stackoff - len = sizeof(var) - varinfo[var] = [off, len] - } - - varinfo.each { |v1, (o1, l1)| - next if not v1.type.integral? - varinfo.each { |v2, (o2, l2)| - # XXX o1 may overlap o2 AND another (int32 v_10; int32 v_E; int32 v_C;) - # TODO should check stuff with aliasing domains - next if v1.name == v2.name or o1 >= o2+l2 or o1+l1 <= o2 or l1 > l2 or (l2 == l1 and o2 >= o1) - # v1 => *(&v2+delta) - p = C::CExpression[:&, v2] - p = C::CExpression[p, :+, [o1-o2]] - p = C::CExpression[p, C::Pointer.new(v1.type)] if v1.type != p.type.type - p = C::CExpression[:*, p] - walk_ce(scope) { |ce| - ce.lexpr = p if ce.lexpr == v1 - ce.rexpr = p if ce.rexpr == v1 - } - } - } - end - - # to be run with scope = function body with only CExpr/Decl/Label/Goto/IfGoto/Return, with correct variables types - # will transform += 1 to ++, inline them to prev/next statement ('++x; if (x)..' => 'if (++x)..') - # remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;') - # remove useless casts ('(int)i' with 'int i;' => 'i') - def optimize(scope) - optimize_code(scope) - optimize_vars(scope) - optimize_vars(scope) # 1st run may transform i = i+1 into i++ which second run may coalesce into if(i) - end - - # simplify cexpressions (char & 255, redundant casts, etc) - def optimize_code(scope) - return if forbid_optimize_code - - sametype = lambda { |t1, t2| - t1 = t1.untypedef - t2 = t2.untypedef - t1 = t1.pointed.untypedef if t1.pointer? and t1.pointed.untypedef.kind_of? C::Function - t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of? C::Function - t1 == t2 or - (t1.kind_of? C::Function and t2.kind_of? C::Function and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and - t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or - (t1.kind_of? C::BaseType and t1.integral? and t2.kind_of? C::BaseType and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or - (t1.pointer? and t2.pointer? and sametype[t1.type, t2.type]) - } - - # most of this is a CExpr#reduce - future_array = [] - walk_ce(scope, true) { |ce| - # (whatever)0 => 0 - if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 - ce.replace ce.rexpr - end - - # *&bla => bla if types ok - if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sametype[ce.rexpr.type.pointed, ce.rexpr.rexpr.type] - ce.replace C::CExpression[ce.rexpr.rexpr] - end - - # int x + 0xffffffff -> x-1 - if ce.lexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and [:+, :-, :'+=', :'-=', :'!=', :==, :>, :<, :>=, :<=].include? ce.op and - ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr)))-1 - ce.op = {:+ => :-, :- => :+, :'+=' => :'-=', :'-=' => :'+='}[ce.op] - ce.rexpr.rexpr = 1 - end - - # int *ptr; *(ptr + 4) => ptr[4] - if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer? - ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr - future_array << var.name - end - - # char x; x & 255 => x - if ce.op == :& and ce.lexpr and (ce.lexpr.type.integral? or ce.lexpr.type.pointer?) and ce.rexpr.kind_of? C::CExpression and - not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and m = (1 << (8*sizeof(ce.lexpr))) - 1 and - ce.rexpr.rexpr & m == m - ce.replace C::CExpression[ce.lexpr] - end - - # a + -b => a - b - if ce.op == :+ and ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :- and not ce.rexpr.lexpr - ce.op, ce.rexpr = :-, ce.rexpr.rexpr - end - - # (((int) i >> 31) & 1) => i < 0 - if ce.op == :& and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 and - ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :>> and ce.lexpr.rexpr.kind_of? C::CExpression and - not ce.lexpr.rexpr.op and ce.lexpr.rexpr.rexpr == sizeof(ce.lexpr.lexpr) * 8 - 1 - ce.replace C::CExpression[ce.lexpr.lexpr, :<, [0]] - end - - # a-b == 0 => a == b - if ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and [:==, :'!=', :<, :>, :<=, :>=].include? ce.op and - ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :- and ce.lexpr.lexpr - ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr - end - - # (a > 0) != 0 - if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and - [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op - ce.replace ce.lexpr - end - - # (a < b) != ( [(a < 0) == !(b < 0)] && [(a < 0) != (a < b)] ) => jl - # a true if !r => a<0 == b<0 or a>=0 => a>=0 or b>=0 - # a>=b => true if r => a<0 == b>=0 and a<0 => a<0 and b>=0 - - # x != (a && (b != x)) => [x && (!a || b)] || [!x && !(!a || b)] - if ce.op == :'!=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :< and ce.rexpr.kind_of? C::CExpression and - ce.rexpr.op == :'&&' and ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.op == :'!=' and - ce.rexpr.rexpr.rexpr == ce.lexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall } - x, a, b = ce.lexpr, ce.rexpr.lexpr, ce.rexpr.rexpr.lexpr - ce.replace C::CExpression[ [x, :'&&', [[:'!',a],:'||',b]] , :'||', [[:'!', x], :'&&', [:'!', [[:'!',a],:'||',b]]] ] - optimize_code(ce) - end - # (a != b) || a => a || b - if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :'!=' and ce.lexpr.lexpr == ce.rexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall } - ce.lexpr, ce.rexpr = ce.rexpr, ce.lexpr.rexpr - optimize_code(ce) - end - # (a=0 && b<0) || (a>=b) && (a>=0 && b<0) => (signed)a < (signed)b - if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.rexpr.kind_of? C::CExpression and ce.lexpr.op == :'&&' and ce.rexpr.op == :'&&' and - ce.lexpr.lexpr.kind_of? C::CExpression and ce.lexpr.lexpr.op == :< - a, b = ce.lexpr.lexpr.lexpr, ce.lexpr.lexpr.rexpr - if ce.lexpr.rexpr === C::CExpression[[a, :'>=', [0]], :'&&', [b, :'<', [0]]].negate and - ce.rexpr.lexpr === ce.lexpr.lexpr.negate and ce.rexpr.rexpr === ce.lexpr.rexpr.negate - ce.replace C::CExpression[a, :'<', b] - end - end - # a && 1 - if (ce.op == :'||' or ce.op == :'&&') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer - if ((ce.op == :'||' and ce.rexpr.rexpr == 0) or (ce.op == :'&&' and ce.rexpr.rexpr != 0)) - ce.replace C::CExpression[ce.lexpr] - elsif not walk_ce(ce) { |ce_| break true if ce.op == :funcall } # cannot wipe if sideeffect - ce.replace C::CExpression[[ce.op == :'||' ? 1 : 0]] - end - end - # (b < c || b >= c) - if (ce.op == :'||' or ce.op == :'&&') and C::CExpression.negate(ce.lexpr) == C::CExpression[ce.rexpr] - ce.replace C::CExpression[[(ce.op == :'||') ? 1 : 0]] - end - - # (a < b) | (a == b) => a <= b - if ce.op == :| and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :== and ce.lexpr.kind_of? C::CExpression and - (ce.lexpr.op == :< or ce.lexpr.op == :>) and ce.lexpr.lexpr == ce.rexpr.lexpr and ce.lexpr.rexpr == ce.rexpr.rexpr - ce.op = {:< => :<=, :> => :>=}[ce.lexpr.op] - ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr - end - - # a == 0 => !a - if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 - ce.lexpr, ce.op, ce.rexpr = nil, :'!', ce.lexpr - end - - if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer - ce.replace C::CExpression[[ce.rexpr.rexpr == 0 ? 1 : 0]] - end - - # !(bool) => bool - if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and [:'==', :'!=', :<, :>, :<=, :>=, :'||', :'&&', :'!'].include? ce.rexpr.op - ce.replace ce.rexpr.negate - end - - # (foo)(bar)x => (foo)x - if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression - ce.rexpr = ce.rexpr.rexpr - end - - # &struct.1stmember => &struct - if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :'.' and s = ce.rexpr.lexpr.type and - s.kind_of? C::Union and s.offsetof(@c_parser, ce.rexpr.rexpr) == 0 - ce.rexpr = ce.rexpr.lexpr - ce.type = C::Pointer.new(ce.rexpr.type) - end - - # (1stmember*)structptr => &structptr->1stmember - if not ce.op and ce.type.pointer? and not ce.type.pointed.void? and ce.rexpr.kind_of? C::Typed and ce.rexpr.type.pointer? and - s = ce.rexpr.type.pointed.untypedef and s.kind_of? C::Union and ce.type.pointed.untypedef != s - ce.rexpr = C::CExpression[structoffset(s, ce.rexpr, 0, sizeof(ce.type.pointed))] - #ce.replace ce.rexpr if not ce.type.pointed.untypedef.kind_of? C::Function or (ce.rexpr.type.pointer? and - #ce.rexpr.type.pointed.untypedef.kind_of? C::Function) # XXX ugly - # int32* v1 = (int32*)pstruct; - # z = v1+4 if v1 is not cast, the + is invalid (sizeof pointed changes) - # TODO when finding type of pstruct, set type of v1 accordingly - end - - # (&foo)->bar => foo.bar - if ce.op == :'->' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :& and not ce.lexpr.lexpr - ce.lexpr = ce.lexpr.rexpr - ce.op = :'.' - end - - # (foo)bla => bla if bla of type foo - if not ce.op and ce.rexpr.kind_of? C::Typed and sametype[ce.type, ce.rexpr.type] - ce.replace C::CExpression[ce.rexpr] - end - if ce.lexpr.kind_of? C::CExpression and not ce.lexpr.op and ce.lexpr.rexpr.kind_of? C::Variable and ce.lexpr.type == ce.lexpr.rexpr.type - ce.lexpr = ce.lexpr.rexpr - end - - if ce.op == :'=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :* and not ce.lexpr.lexpr and ce.lexpr.rexpr.kind_of? C::CExpression and - not ce.lexpr.rexpr.op and ce.lexpr.rexpr.type.pointer? and ce.lexpr.rexpr.type.pointed != ce.rexpr.type - ce.lexpr.rexpr.type = C::Pointer.new(ce.rexpr.type) - optimize_code(ce.lexpr) - end - } - - # if there is a ptr[4], change all *ptr to ptr[0] for consistency - # do this after the first pass, which may change &*ptr to ptr - walk_ce(scope) { |ce| - if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::Variable and future_array.include? ce.rexpr.name - ce.lexpr, ce.op, ce.rexpr = ce.rexpr, :'[]', C::CExpression[0] - end - } if not future_array.empty? - - # if (x != 0) => if (x) - walk(scope) { |st| - if st.kind_of? C::If and st.test.kind_of? C::CExpression and st.test.op == :'!=' and - st.test.rexpr.kind_of? C::CExpression and not st.test.rexpr.op and st.test.rexpr.rexpr == 0 - st.test = C::CExpression[st.test.lexpr] - end - } - end - - # checks if an expr has sideeffects (funcall, var assignment, mem dereference, use var out of scope if specified) - def sideeffect(exp, scope=nil) - case exp - when nil, ::Numeric, ::String; false - when ::Array; exp.any? { |_e| sideeffect _e, scope } - when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include? :volatile - when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or AssignOp.include?(exp.op) or - sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope) - else true # failsafe - end - end - - # converts C code to a graph of cexprs (nodes = cexprs, edges = codepaths) - # returns a CGraph - class CGraph - # exprs: label => [exprs], to: label => [labels], block: label => are exprs standalone (vs If#test), start: 1st label - attr_accessor :exprs, :to, :block, :start, :to_optim, :from_optim - end - def c_to_graph(st) - g = CGraph.new - g.exprs = {} # label => [exprs] - g.to = {} # label => [labels] - g.block = {} # label => is label in a block? (vs If#test) - anon_label = 0 # when no label is there, use anon_label++ - # converts C code to a graph of codepath of cexprs - to_graph = lambda { |stmt, l_cur, l_after, l_cont, l_break| - case stmt - when C::Label; g.to[l_cur] = [stmt.name] ; g.to[stmt.name] = [l_after] - when C::Goto; g.to[l_cur] = [stmt.target] - when C::Continue; g.to[l_cur] = [l_cont] - when C::Break; g.to[l_cur] = [l_break] - when C::CExpression - g.exprs[l_cur] = [stmt] - g.to[l_cur] = [l_after] - when C::Return - g.exprs[l_cur] = [stmt.value] if stmt.value - g.to[l_cur] = [] - when C::Block - to_graph[stmt.statements, l_cur, l_after, l_cont, l_break] - when ::Array - g.exprs[l_cur] = [] - g.block[l_cur] = true - stmt.each_with_index { |s, i| - case s - when C::Declaration - when C::CExpression - g.exprs[l_cur] << s - else - l = anon_label += 1 - ll = anon_label += 1 - g.to[l_cur] = [l] - g.block[l_cur] = true - to_graph[stmt[i], l, ll, l_cont, l_break] - l_cur = ll - g.exprs[l_cur] = [] - end - } - g.to[l_cur] = [l_after].compact - when C::If - g.exprs[l_cur] = [stmt.test] - lt = anon_label += 1 - to_graph[stmt.bthen, lt, l_after, l_cont, l_break] - le = anon_label += 1 - to_graph[stmt.belse, le, l_after, l_cont, l_break] - g.to[l_cur] = [lt, le] - when C::While, C::DoWhile - la = anon_label += 1 - if stmt.kind_of? C::DoWhile - lt, lb = la, l_cur - else - lt, lb = l_cur, la - end - g.exprs[lt] = [stmt.test] - g.to[lt] = [lb, l_after] - to_graph[stmt.body, lb, lt, lt, l_after] - when C::Asm, nil; g.to[l_cur] = [l_after] - else puts "to_graph unhandled #{stmt.class}: #{stmt}" if $VERBOSE - end - } - - g.start = anon_label - to_graph[st, g.start, nil, nil, nil] - - # optimize graph - g.to_optim = {} - g.to.each { |k, v| g.to_optim[k] = v.uniq } - g.exprs.delete_if { |k, v| v == [] } - g.to_optim.delete_if { |k, v| - if v.length == 1 and not g.exprs[k] and v != [k] - g.to_optim.each_value { |t| if i = t.index(k) ; t[i] = v.first ; end } - true - elsif v.length == 0 and not g.exprs[k] - g.to_optim.each_value { |t| t.delete k } - true - end - } - - g.from_optim = {} - g.to_optim.each { |k, v| v.each { |t| (g.from_optim[t] ||= []) << k } } - - g - end - - # dataflow optimization - # condenses expressions (++x; if (x) => if (++x)) - # remove local var assignment (x = 1; f(x); x = 2; g(x); => f(1); g(2); etc) - def optimize_vars(scope) - return if forbid_optimize_dataflow - - g = c_to_graph(scope) - - # walks a cexpr in evaluation order (not strictly, but this is not strictly defined anyway..) - # returns the first subexpr to read var in ce - # returns :write if var is rewritten - # returns nil if var not read - # may return a cexpr var += 2 - find_next_read_ce = lambda { |ce_, var| - walk_ce(ce_, true) { |ce| - case ce.op - when :funcall - break ce if ce.lexpr == var or ce.rexpr.find { |a| a == var } - when :'=' - # a=a / a=a+1 => yield a, not :write - break ce if ce.rexpr == var - break :write if ce.lexpr == var - else - break ce if ce.lexpr == var or ce.rexpr == var - end - } - } - - # badlabels is a list of labels that may be reached without passing through the first invocation block - find_next_read_rec = lambda { |label, idx, var, done, badlabels| - next if done.include? label - done << label if idx == 0 - - idx += 1 while ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var] - next ret if ret - - to = g.to_optim[label].to_a.map { |t| - break [:split] if badlabels.include? t - find_next_read_rec[t, 0, var, done, badlabels] - }.compact - - tw = to - [:write] - if to.include? :split or tw.length > 1 - :split - elsif tw.length == 1 - tw.first - elsif to.include? :write - :write - end - } - # return the previous subexpr reading var with no fwd path to another reading (otherwise split), see loop comment for reason - find_next_read = nil - find_prev_read_rec = lambda { |label, idx, var, done| - next if done.include? label - done << label if idx == g.exprs[label].length-1 - - idx -= 1 while idx >= 0 and ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var] - if ret.kind_of? C::CExpression - fwchk = find_next_read[label, idx+1, var] - ret = fwchk if not fwchk.kind_of? C::CExpression - end - next ret if ret - - from = g.from_optim[label].to_a.map { |f| - find_prev_read_rec[f, g.exprs[f].to_a.length-1, var, done] - }.compact - - next :split if from.include? :split - fw = from - [:write] - if fw.length == 1 - fw.first - elsif fw.length > 1 - :split - elsif from.include? :write - :write - end - } - - # list of labels reachable without using a label - badlab = {} - build_badlabel = lambda { |label| - next if badlab[label] - badlab[label] = [] - todo = [g.start] - while l = todo.pop - next if l == label or badlab[label].include? l - badlab[label] << l - todo.concat g.to_optim[l].to_a - end - } - - # returns the next subexpr where var is read - # returns :write if var is written before being read - # returns :split if the codepath splits with both subpath reading or codepath merges with another - # returns nil if var is never read - # idx is the index of the first cexpr at g.exprs[label] to look at - find_next_read = lambda { |label, idx, var| - find_next_read_rec[label, idx, var, [], []] - } - find_prev_read = lambda { |label, idx, var| - find_prev_read_rec[label, idx, var, []] - } - # same as find_next_read, but returns :split if there exist a path from g.start to the read without passing through label - find_next_read_bl = lambda { |label, idx, var| - build_badlabel[label] - find_next_read_rec[label, idx, var, [], badlab[label]] - } - - # walk each node, optimize data accesses there - # replace no longer useful exprs with CExpr[nil, nil, nil], those are wiped later. - g.exprs.each { |label, exprs| - next if not g.block[label] - i = 0 - while i < exprs.length - e = exprs[i] - i += 1 - - # TODO x = x + 1 => x += 1 => ++x here, move all other optimizations after (in optim_code) - # needs also int & 0xffffffff -> int, *&var etc (decomp_type? optim_type?) - if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of? C::Variable and - scope.symbol[v.name] and not v.type.qualifier.to_a.include? :volatile - next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of? C::CExpression) and - !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression) - next if oe.op == :& and not oe.lexpr # no &(++eax) - - # merge pre/postincrement into next/prev var usage - # find_prev_read must fwd check when it finds something, to avoid - # while(x) x++; return x; to be converted to while(x++); return x; (return wrong value) - case oe.op - when e.op - # bla(i--); --i bla(--i); --i ++i; bla(i++) => ignore - next if pos == :pre or oe.lexpr - # ++i; bla(++i) => bla(i += 2) - oe.lexpr = oe.rexpr - oe.op = ((oe.op == :'++') ? :'+=' : :'-=') - oe.rexpr = C::CExpression[2] - - when :'++', :'--' # opposite of e.op - if (pos == :post and not oe.lexpr) or (pos == :pre and not oe.rexpr) - # ++i; bla(--i) => bla(i) - # bla(i--); ++i => bla(i) - oe.op = nil - elsif pos == :post - # ++i; bla(i--) => bla(i+1) - oe.op = ((oe.op == :'++') ? :- : :+) - oe.rexpr = C::CExpression[1] - elsif pos == :pre - # bla(--i); ++i => bla(i-1) - oe.lexpr = oe.rexpr - oe.op = ((oe.op == :'++') ? :+ : :-) - oe.rexpr = C::CExpression[1] - end - when :'+=', :'-=' - # TODO i++; i += 4 => i += 5 - next - when *AssignOp - next # ++i; i |= 4 => ignore - else - if pos == :post and v == oe.lexpr; oe.lexpr = C::CExpression[e.op, v] - elsif pos == :post and v == oe.rexpr; oe.rexpr = C::CExpression[e.op, v] - elsif pos == :prev and v == oe.rexpr; oe.rexpr = C::CExpression[v, e.op] - elsif pos == :prev and v == oe.lexpr; oe.lexpr = C::CExpression[v, e.op] - else raise 'foobar' # find_dir_read failed - end - end - - i -= 1 - exprs.delete_at(i) - e.lexpr = e.op = e.rexpr = nil - - - elsif e.op == :'=' and v = e.lexpr and v.kind_of? C::Variable and scope.symbol[v.name] and - not v.type.qualifier.to_a.include? :volatile and not find_next_read_ce[e.rexpr, v] - - # reduce trivial static assignments - if (e.rexpr.kind_of? C::CExpression and iv = e.rexpr.reduce(@c_parser) and iv.kind_of? ::Integer) or - (e.rexpr.kind_of? C::CExpression and e.rexpr.op == :& and not e.rexpr.lexpr and e.rexpr.lexpr.kind_of? C::Variable) or - (e.rexpr.kind_of? C::Variable and e.rexpr.type.kind_of? C::Array) - rewritten = false - readers = [] - discard = [e] - g.exprs.each { |l, el| - el.each_with_index { |ce, ci| - if ce_write(ce, v) and [label, i-1] != [l, ci] - if ce == e - discard << ce - else - rewritten = true - break - end - elsif ce_read(ce, v) - if walk_ce(ce) { |_ce| break true if _ce.op == :& and not _ce.lexpr and _ce.rexpr == v } - # i = 2 ; j = &i =!> j = &2 - rewritten = true - break - end - readers << ce - end - } if not rewritten - } - if not rewritten - ce_patch(readers, v, C::CExpression[iv || e.rexpr]) - discard.each { |d| d.lexpr = d.op = d.rexpr = nil } - next - end - end - - case nr = find_next_read[label, i, v] - when C::CExpression - # read in one place only, try to patch rexpr in there - r = e.rexpr - - # must check for conflicts (x = y; y += 1; foo(x) =!> foo(y)) - # XXX x = a[1]; *(a+1) = 28; foo(x)... - isfunc = false - depend_vars = [] - walk_ce(C::CExpression[r]) { |ce| - isfunc = true if ce.op == :func and (not ce.lexpr.kind_of? C::Variable or - not ce.lexpr.has_attribute('pure')) # XXX is there a C attr for func depending only on staticvars+param ? - depend_vars << ce.lexpr if ce.lexpr.kind_of? C::Variable - depend_vars << ce.rexpr if ce.rexpr.kind_of? C::Variable and (ce.lexpr or ce.op != :&) # a = &v; v = 12; func(a) => func(&v) - depend_vars << ce if ce.lvalue? - depend_vars.concat(ce.rexpr.grep(C::Variable)) if ce.rexpr.kind_of? ::Array - } - depend_vars.uniq! - - # XXX x = 1; if () { x = 2; } foo(x) =!> foo(1) (find_next_read will return this) - # we'll just redo a find_next_read like - # XXX b = &a; a = 1; *b = 2; foo(a) unhandled & generate bad C - l_l = label - l_i = i - while g.exprs[l_l].to_a.each_with_index { |ce_, n_i| - next if n_i < l_i - # count occurences of read v in ce_ - cnt = 0 - bad = false - walk_ce(ce_) { |ce| - case ce.op - when :funcall - bad = true if isfunc - ce.rexpr.each { |a| cnt += 1 if a == v } - cnt += 1 if ce.lexpr == v - when :'=' - bad = true if depend_vars.include? ce.lexpr - cnt += 1 if ce.rexpr == v - else - bad = true if (ce.op == :'++' or ce.op == :'--') and depend_vars.include? ce.rexpr - bad = true if AssignOp.include? ce.op and depend_vars.include? ce.lexpr - cnt += 1 if ce.lexpr == v - cnt += 1 if ce.rexpr == v - end - } - case cnt - when 0 - break if bad - next - when 1 # good - break if e.complexity > 10 and ce_.complexity > 3 # try to keep the C readable - # x = 1; y = x; z = x; => cannot suppress x - nr = find_next_read[l_l, n_i+1, v] - break if (nr.kind_of? C::CExpression or nr == :split) and not walk_ce(ce_) { |ce| break true if ce.op == :'=' and ce.lexpr == v } - else break # a = 1; b = a + a => fail - end - - # TODO XXX x = 1; y = x; z = x; - res = walk_ce(ce_, true) { |ce| - case ce.op - when :funcall - if ce.rexpr.to_a.each_with_index { |a,i_| - next if a != v - ce.rexpr[i_] = r - break :done - } == :done - break :done - elsif ce.lexpr == v - ce.lexpr = r - break :done - elsif isfunc - break :fail - end - when *AssignOp - break :fail if not ce.lexpr and depend_vars.include? ce.rexpr # ++depend - if ce.rexpr == v - ce.rexpr = r - break :done - elsif ce.lexpr == v or depend_vars.include? ce.lexpr - break :fail - end - else - break :fail if ce.op == :& and not ce.lexpr and ce.rexpr == v - if ce.lexpr == v - ce.lexpr = r - break :done - elsif ce.rexpr == v - ce_.type = r.type if not ce_.op and ce_.rexpr == v # return (int32)eax - ce.rexpr = r - break :done - end - end - } - case res - when :done - i -= 1 - exprs.delete_at(i) - e.lexpr = e.op = e.rexpr = nil - break - when :fail - break - end - } - # ignore branches that will never reuse v - may_to = g.to_optim[l_l].find_all { |to| find_next_read[to, 0, v].kind_of? C::CExpression } - if may_to.length == 1 and to = may_to.first and to != l_l and g.from_optim[to] == [l_l] - l_i = 0 - l_l = to - else break - end - end - - when nil, :write - # useless assignment (value never read later) - # XXX foo = &bar; bar = 12; baz(*foo) - e.replace(C::CExpression[e.rexpr]) - # remove sideeffectless subexprs - loop do - case e.op - when :funcall, *AssignOp - else - l = (e.lexpr.kind_of? C::CExpression and sideeffect(e.lexpr)) - r = (e.rexpr.kind_of? C::CExpression and sideeffect(e.rexpr)) - if l and r # could split... - elsif l - e.replace(e.lexpr) - next - elsif r - e.replace(e.rexpr) - next - else # remove the assignment altogether - i -= 1 - exprs.delete_at(i) - e.lexpr = e.op = e.rexpr = nil - end - end - break - end - end - end - end - } - - # wipe cexprs marked in the previous step - walk(scope) { |st| - next if not st.kind_of? C::Block - st.statements.delete_if { |e| e.kind_of? C::CExpression and not e.lexpr and not e.op and not e.rexpr } - } - - # reoptimize cexprs - walk_ce(scope, true) { |ce| - # redo some simplification that may become available after variable propagation - # int8 & 255 => int8 - if ce.op == :& and ce.lexpr and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr))) - 1 - ce.replace C::CExpression[ce.lexpr] - end - - # int *ptr; *(ptr + 4) => ptr[4] - if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer? - ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr - end - - # useless casts - if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and (ce.rexpr.rexpr.kind_of? C::CExpression or - (ce.type.pointer? and ce.rexpr.rexpr == 0 and not ce.type.pointed.untypedef.kind_of? C::Union)) # keep ((struct*)0)->memb - ce.rexpr = ce.rexpr.rexpr - end - if not ce.op and ce.rexpr.kind_of? C::CExpression and (ce.type == ce.rexpr.type or (ce.type.integral? and ce.rexpr.type.integral?)) - ce.replace ce.rexpr - end - # useless casts (type)*((oeua)Ptype) - if not ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of? C::CExpression and not ce.rexpr.rexpr.op and - p = ce.rexpr.rexpr.rexpr and p.kind_of? C::Typed and p.type.pointer? and ce.type == p.type.pointed - ce.op = ce.rexpr.op - ce.rexpr = ce.rexpr.rexpr.rexpr - end - # (a > 0) != 0 - if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and - [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op - ce.replace ce.lexpr - end - # a == 0 => !a - if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 - ce.replace C::CExpression[:'!', ce.lexpr] - end - # !(int)a => !a - if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression - ce.rexpr = ce.rexpr.rexpr - end - # (int)a < (int)b => a < b TODO uint <-> int - if [:<, :<=, :>, :>=].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.lexpr.kind_of? C::CExpression and not ce.rexpr.op and not ce.lexpr.op and - ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.type.pointer? and ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.type.pointer? - ce.rexpr = ce.rexpr.rexpr - ce.lexpr = ce.lexpr.rexpr - end - - # a & 3 & 1 - while (ce.op == :& or ce.op == :|) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and - ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == ce.op and ce.lexpr.lexpr and - ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.rexpr.kind_of? ::Integer - ce.lexpr, ce.rexpr.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr.rexpr.send(ce.op, ce.rexpr.rexpr) - end - - # x = x | 4 => x |= 4 - if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and [:+, :-, :*, :/, :|, :&, :^, :>>, :<<].include? ce.rexpr.op and ce.rexpr.lexpr == ce.lexpr - ce.op = (ce.rexpr.op.to_s + '=').to_sym - ce.rexpr = ce.rexpr.rexpr - end - - # x += 1 => ++x - if (ce.op == :'+=' or ce.op == :'-=') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 - ce.lexpr, ce.op, ce.rexpr = nil, {:'+=' => :'++', :'-=' => :'--'}[ce.op], ce.lexpr - end - - # --x+1 => x-- - if (ce.op == :+ or ce.op == :-) and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == {:+ => :'--', :- => :'++'}[ce.op] and - ce.lexpr.rexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 - ce.lexpr, ce.op, ce.rexpr = ce.lexpr.rexpr, ce.lexpr.op, nil - end - } - end - - def remove_unreferenced_vars(scope) - used = {} - walk_ce(scope) { |ce| - # remove unreferenced local vars - used[ce.rexpr.name] = true if ce.rexpr.kind_of? C::Variable - used[ce.lexpr.name] = true if ce.lexpr.kind_of? C::Variable - ce.rexpr.each { |v| used[v.name] = true if v.kind_of? C::Variable } if ce.rexpr.kind_of?(::Array) - } - unused = scope.symbol.keys.find_all { |n| not used[n] } - unused.each { |v| scope.symbol[v].add_attribute 'unused' } # fastcall args need it - scope.statements.delete_if { |sm| sm.kind_of? C::Declaration and unused.include? sm.var.name } - scope.symbol.delete_if { |n, v| unused.include? n } - end - - def finalize - optimize_global - true - end - - def optimize_global - # check all global vars (pointers to global data) - tl = @c_parser.toplevel - vars = tl.symbol.keys.find_all { |k| tl.symbol[k].kind_of? C::Variable and not tl.symbol[k].type.kind_of? C::Function } - countref = Hash.new(0) - - walk_ce(tl) { |ce| - # XXX int foo; void bar() { int foo; } => false negative - countref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable - countref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable - } - - vars.delete_if { |v| countref[v] == 0 } - countref.delete_if { |k, v| not vars.include? k } - - # by default globals are C::Arrays - # if all references are *foo, dereference the var type - # TODO allow foo to appear (change to &foo) (but still disallow casts/foo+12 etc) - countderef = Hash.new(0) - walk_ce(tl) { |ce| - if ce.op == :* and not ce.lexpr - r = ce.rexpr - elsif ce.op == :'->' - r = C::CExpression[ce.lexpr] - else next - end - # compare type.type cause var is an Array and the cast is a Pointer - countderef[r.rexpr.name] += 1 if r.kind_of? C::CExpression and not r.op and r.rexpr.kind_of? C::Variable and - sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil - } - vars.each { |n| - if countref[n] == countderef[n] - v = tl.symbol[n] - target = C::CExpression[:*, [v]] - v.type = v.type.type - v.initializer = v.initializer.first if v.initializer.kind_of? ::Array - walk_ce(tl) { |ce| - if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v] - ce.op = :'.' - elsif ce.lexpr == target - ce.lexpr = v - end - ce.rexpr = v if ce.rexpr == target - ce.lexpr, ce.op, ce.rexpr = nil, nil, v if ce == target - } - end - } - - # if a global var appears only in one function, make it a static variable - tl.statements.each { |st| - next if not st.kind_of? C::Declaration or not st.var.type.kind_of? C::Function or not scope = st.var.initializer - localcountref = Hash.new(0) - walk_ce(scope) { |ce| - localcountref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable - localcountref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable - } - - vars.delete_if { |n| - next if scope.symbol[n] - next if localcountref[n] != countref[n] - v = tl.symbol.delete(n) - tl.statements.delete_if { |d| d.kind_of? C::Declaration and d.var.name == n } - - if countref[n] == 1 and v.initializer.kind_of? C::CExpression and v.initializer.rexpr.kind_of? String - walk_ce(scope) { |ce| - if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == n - if not ce.op - ce.replace v.initializer - else - ce.rexpr = v.initializer - end - elsif ce.lexpr.kind_of? C::Variable and ce.lexpr.name == n - ce.lexpr = v.initializer - end - } - else - v.storage = :static - scope.symbol[v.name] = v - scope.statements.unshift C::Declaration.new(v) - end - - true - } - } - end - - # reorder statements to put decl first, move assignments to decl, move args to func prototype - def cleanup_var_decl(scope, func) - scope.symbol.each_value { |v| v.type = C::BaseType.new(:int) if v.type.void? } - - args = func.type.args - decl = [] - scope.statements.delete_if { |sm| - next if not sm.kind_of? C::Declaration - if sm.var.stackoff.to_i > 0 and sm.var.name !~ /_a(\d+)$/ # aliased vars: use 1st domain only - args << sm.var - else - decl << sm - end - true - } - - # move trivial affectations to initialiser - # XXX a = 1 ; b = a ; a = 2 - go = true # break from delete_if does not delete.. - scope.statements.delete_if { |st| - if go and st.kind_of? C::CExpression and st.op == :'=' and st.rexpr.kind_of? C::CExpression and not st.rexpr.op and - st.rexpr.rexpr.kind_of? ::Integer and st.lexpr.kind_of? C::Variable and scope.symbol[st.lexpr.name] - st.lexpr.initializer = st.rexpr - else - go = false - end - } - - # reorder declarations - scope.statements[0, 0] = decl.sort_by { |sm| [-sm.var.stackoff.to_i, sm.var.name] } - - # ensure arglist has no hole (create&add unreferenced args) - func.type.args = [] - argoff = @c_parser.typesize[:ptr] - args.sort_by { |sm| sm.stackoff.to_i }.each { |a| - # XXX misalignment ? - if not curoff = a.stackoff - func.type.args << a # __fastcall - next - end - while curoff > argoff - wantarg = C::Variable.new - wantarg.name = scope.decompdata[:stackoff_name][argoff] || stackoff_to_varname(argoff) - wantarg.type = C::BaseType.new(:int) - wantarg.attributes = ['unused'] - func.type.args << wantarg - scope.symbol[wantarg.name] = wantarg - argoff += @c_parser.typesize[:ptr] - end - func.type.args << a - argoff += @c_parser.typesize[:ptr] - } - end - - # rename local variables from subfunc arg names - def rename_variables(scope) - funcs = [] - cntrs = [] - cmpi = [] - - walk_ce(scope) { |ce| - funcs << ce if ce.op == :funcall - cntrs << (ce.lexpr || ce.rexpr) if ce.op == :'++' - cmpi << ce.lexpr if [:<, :>, :<=, :>=, :==, :'!='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.kind_of? ::Integer - } - - rename = lambda { |var, name| - var = var.rexpr if var.kind_of? C::CExpression and not var.op - next if not var.kind_of? C::Variable or not scope.symbol[var.name] or not name - next if (var.name !~ /^(var|arg)_/ and not var.storage == :register) or not scope.symbol[var.name] or name =~ /^(var|arg)_/ - s = scope.symbol_ancestors - n = name - i = 0 - n = name + "#{i+=1}" while s[n] - scope.symbol[n] = scope.symbol.delete(var.name) - var.name = n - } - - funcs.each { |ce| - next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function - ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| rename[a, fa.name] if fa } - } - funcs.each { |ce| - next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function - ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| - next if not a.kind_of? C::CExpression or a.op != :& or a.lexpr - next if not fa or not fa.name - rename[a.rexpr, fa.name.sub(/^l?p/, '')] - } - } - (cntrs & cmpi).each { |v| rename[v, 'cntr'] } - end - - # yield each CExpr member (recursive, allows arrays, order: self(!post), lexpr, rexpr, self(post)) - # if given a non-CExpr, walks it until it finds a CExpr to yield - def walk_ce(ce, post=false, &b) - case ce - when C::CExpression - yield ce if not post - walk_ce(ce.lexpr, post, &b) - walk_ce(ce.rexpr, post, &b) - yield ce if post - when ::Array - ce.each { |ce_| walk_ce(ce_, post, &b) } - when C::Statement - case ce - when C::Block; walk_ce(ce.statements, post, &b) - when C::If - walk_ce(ce.test, post, &b) - walk_ce(ce.bthen, post, &b) - walk_ce(ce.belse, post, &b) if ce.belse - when C::While, C::DoWhile - walk_ce(ce.test, post, &b) - walk_ce(ce.body, post, &b) - when C::Return - walk_ce(ce.value, post, &b) if ce.value - end - when C::Declaration - walk_ce(ce.var.initializer, post, &b) if ce.var.initializer - end - nil - end - - # yields each statement (recursive) - def walk(scope, post=false, &b) - case scope - when ::Array; scope.each { |s| walk(s, post, &b) } - when C::Statement - yield scope if not post - case scope - when C::Block; walk(scope.statements, post, &b) - when C::If - yield scope.test - walk(scope.bthen, post, &b) - walk(scope.belse, post, &b) if scope.belse - when C::While, C::DoWhile - yield scope.test - walk(scope.body, post, &b) - when C::Return - yield scope.value - end - yield scope if post - when C::Declaration - walk(scope.var.initializer, post, &b) if scope.var.initializer - end - end - - # forwards to @c_parser, handles cast to Array (these should not happen btw...) - def sizeof(var, type=nil) - var, type = nil, var if var.kind_of? C::Type and not type - type ||= var.type - return @c_parser.typesize[:ptr] if type.kind_of? C::Array and not var.kind_of? C::Variable - @c_parser.sizeof(var, type) rescue -1 - end + # TODO add methods to C::CExpr + AssignOp = [:'=', :'+=', :'-=', :'*=', :'/=', :'%=', :'^=', :'&=', :'|=', :'>>=', :'<<=', :'++', :'--'] + + attr_accessor :dasm, :c_parser + attr_accessor :forbid_optimize_dataflow, :forbid_optimize_code, :forbid_decompile_ifwhile, :forbid_decompile_types, :forbid_optimize_labels + # recursive flag: for each subfunction, recurse is decremented, when 0 only the prototype is decompiled, when <0 nothing is done + attr_accessor :recurse + + def initialize(dasm, cp = dasm.c_parser) + @dasm = dasm + @recurse = 1/0.0 # Infinity + @c_parser = cp || @dasm.cpu.new_cparser + end + + # decompile recursively function from an entrypoint, then perform global optimisation (static vars, ...) + # should be called once after everything is decompiled (global optimizations may bring bad results otherwise) + # use decompile_func for incremental decompilation + # returns the c_parser + def decompile(*entry) + entry.each { |f| decompile_func(f) } + finalize + @c_parser + end + + # decompile a function, decompiling subfunctions as needed + # may return :restart, which means that the decompilation should restart from the entrypoint (and bubble up) (eg a new codepath is found which may changes dependency in blocks etc) + def decompile_func(entry) + return if @recurse < 0 + entry = @dasm.normalize entry + return if not @dasm.decoded[entry] + + # create a new toplevel function to hold our code + func = C::Variable.new + func.name = @dasm.auto_label_at(entry, 'func') + if f = @dasm.function[entry] and f.decompdata and f.decompdata[:return_type] + rettype = f.decompdata[:return_type] + else + rettype = C::BaseType.new(:int) + end + func.type = C::Function.new rettype, [] + if @c_parser.toplevel.symbol[func.name] + return if @recurse == 0 + if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name } + # recursive dependency: declare prototype + puts "function #{func.name} is recursive: predecompiling for prototype" if $VERBOSE + pre_recurse = @recurse + @recurse = 0 + @c_parser.toplevel.symbol.delete func.name + decompile_func(entry) + @recurse = pre_recurse + if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name } + @c_parser.toplevel.statements << C::Declaration.new(func) + end + end + return + end + @c_parser.toplevel.symbol[func.name] = func + puts "decompiling #{func.name}" if $VERBOSE + + while catch(:restart) { do_decompile_func(entry, func) } == :restart + retval = :restart + end + + @c_parser.toplevel.symbol[func.name] = func # recursive func prototype could have overwritten us + @c_parser.toplevel.statements << C::Declaration.new(func) + + puts " decompiled #{func.name}" if $VERBOSE + + retval + end + + # calls decompile_func with recurse -= 1 (internal use) + def decompile_func_rec(entry) + @recurse -= 1 + decompile_func(entry) + ensure + @recurse += 1 + end + + def do_decompile_func(entry, func) + # find decodedinstruction graph of the function, decompile subfuncs + myblocks = listblocks_func(entry) + + # [esp+8] => [:frameptr-12] + makestackvars entry, myblocks.map { |b, to| @dasm.decoded[b].block } + + # find registry dependencies between blocks + deps = @dasm.cpu.decompile_func_finddeps(self, myblocks, func) + + scope = func.initializer = C::Block.new(@c_parser.toplevel) + if df = @dasm.function[entry] + scope.decompdata = df.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}} + else + scope.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}} + end + + # di blocks => raw c statements, declare variables + @dasm.cpu.decompile_blocks(self, myblocks, deps, func) + + simplify_goto(scope) + namestackvars(scope) + unalias_vars(scope, func) + decompile_c_types(scope) + optimize(scope) + remove_unreferenced_vars(scope) + cleanup_var_decl(scope, func) + if @recurse > 0 + decompile_controlseq(scope) + optimize_vars(scope) + optimize_ctrl(scope) + optimize_vars(scope) + remove_unreferenced_vars(scope) + simplify_varname_noalias(scope) + rename_variables(scope) + end + @dasm.cpu.decompile_check_abi(self, entry, func) + + case ret = scope.statements.last + when C::CExpression; puts "no return at end of func" if $VERBOSE + when C::Return + if not ret.value + scope.statements.pop + else + v = ret.value + v = v.rexpr if v.kind_of? C::CExpression and not v.op and v.rexpr.kind_of? C::Typed + func.type.type = v.type + end + end + + if @recurse == 0 + # we need only the prototype + func.initializer = nil + end + end + + # redecompile a function, redecompiles functions calling it if its prototype changed + def redecompile(name) + @c_parser.toplevel.statements.delete_if { |st| st.kind_of? C::Declaration and st.var.name == name } + oldvar = @c_parser.toplevel.symbol.delete name + + decompile_func(name) + + if oldvar and newvar = @c_parser.toplevel.symbol[name] and oldvar.type.kind_of? C::Function and newvar.type.kind_of? C::Function + o, n = oldvar.type, newvar.type + if o.type != n.type or o.args.to_a.length != n.args.to_a.length or o.args.to_a.zip(n.args.to_a).find { |oa, na| oa.type != na.type } + # XXX a may depend on b and c, and b may depend on c -> redecompile c twice + # XXX if the dcmp is unstable, may also infinite loop on mutually recursive funcs.. + @c_parser.toplevel.statements.dup.each { |st| + next if not st.kind_of? C::Declaration + next if not st.var.initializer + next if st.var.name == name + next if not walk_ce(st) { |ce| break true if ce.op == :funcall and ce.lexpr.kind_of? C::Variable and ce.lexpr.name == name } + redecompile(st.var.name) + } + end + end + end + + def new_global_var(addr, type, scope=nil) + addr = @dasm.normalize(addr) + + # (almost) NULL ptr + return if addr.kind_of? Fixnum and addr >= 0 and addr < 32 + + # check preceding structure we're hitting + # TODO check what we step over when defining a new static struct + 0x100.times { |i_| + next if not n = @dasm.get_label_at(addr-i_) + next if not v = @c_parser.toplevel.symbol[n] + next if not v.type.pointer? or not v.type.pointed.untypedef.kind_of? C::Union + break if i_ == 0 # XXX it crashes later if we dont break here + next if sizeof(v.type.pointed) <= i_ + return structoffset(v.type.pointed.untypedef, C::CExpression[v], i_, nil) + } + + ptype = type.pointed.untypedef if type.pointer? + if ptype.kind_of? C::Function + name = @dasm.auto_label_at(addr, 'sub', 'xref', 'byte', 'word', 'dword', 'unk') + if @dasm.get_section_at(addr) and @recurse > 0 + puts "found function pointer to #{name}" if $VERBOSE + @dasm.disassemble(addr) if not @dasm.decoded[addr] # TODO disassemble_fast ? + f = @dasm.function[addr] ||= DecodedFunction.new + # TODO detect thunks (__noreturn) + f.decompdata ||= { :stackoff_type => {}, :stackoff_name => {} } + if not s = @c_parser.toplevel.symbol[name] or not s.initializer or not s.type.untypedef.kind_of? C::Function + os = @c_parser.toplevel.symbol.delete name + @c_parser.toplevel.statements.delete_if { |ts| ts.kind_of? C::Declaration and ts.var.name == name } + aoff = 1 + ptype.args.to_a.each { |a| + aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr] + f.decompdata[:stackoff_type][aoff] ||= a.type + f.decompdata[:stackoff_name][aoff] ||= a.name if a.name + aoff += sizeof(a) # ary ? + } + decompile_func_rec(addr) + s = @c_parser.toplevel.symbol[name] + walk_ce([@c_parser.toplevel, scope]) { |ce| + ce.lexpr = s if ce.lexpr == os + ce.rexpr = s if ce.rexpr == os + } if os and s # update existing references to old instance + # else redecompile with new prototye ? + end + end + end + + name = case (type.pointer? && tsz = sizeof(nil, ptype)) + when 1; 'byte' + when 2; 'word' + when 4; 'dword' + else 'unk' + end + name = 'stru' if ptype.kind_of? C::Union + name = @dasm.auto_label_at(addr, name, 'xref', 'byte', 'word', 'dword', 'unk', 'stru') + + if not var = @c_parser.toplevel.symbol[name] + var = C::Variable.new + var.name = name + var.type = type.pointer? ? C::Array.new(ptype) : type + @c_parser.toplevel.symbol[var.name] = var + @c_parser.toplevel.statements << C::Declaration.new(var) + end + if ptype.kind_of? C::Union and type.pointer? and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length + # TODO struct init, array, fptrs.. + elsif type.pointer? and not type.pointed.untypedef.kind_of? C::Function and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length and + [1, 2, 4].include? tsz and (not var.type.pointer? or sizeof(var.type.pointed) != sizeof(type.pointed) or not var.initializer) + # TODO do not overlap other statics (but labels may refer to elements of the array...) + data = (0..256).map { + v = s[0].decode_imm("u#{tsz*8}".to_sym, @dasm.cpu.endianness) + v = decompile_cexpr(v, @c_parser.toplevel) if v.kind_of? Expression # relocation + v + } + var.initializer = data.map { |v| C::CExpression[v, C::BaseType.new(:int)] } unless (data - [0]).empty? + if (tsz == 1 or tsz == 2) and eos = data.index(0) and (0..3).all? { |i| data[i] >= 0x20 and data[i] < 0x7f } # printable str + # XXX 0x80 with ruby1.9... + var.initializer = C::CExpression[data[0, eos].pack('C*'), C::Pointer.new(ptype)] rescue nil + end + if var.initializer.kind_of? ::Array and i = var.initializer.first and i.kind_of? C::CExpression and not i.op and i.rexpr.kind_of? C::Variable and + i.rexpr.type.kind_of? C::Function and not @dasm.get_section_at(@dasm.normalize(i.rexpr.name)) # iat_ExternalFunc + i.type = i.rexpr.type + type = var.type = C::Array.new(C::Pointer.new(i.type)) + var.initializer = [i] + end + var.initializer = nil if var.initializer.kind_of? ::Array and not type.untypedef.kind_of? C::Array + end + + # TODO patch existing references to addr ? (or would they have already triggered new_global_var?) + + # return the object to use to replace the raw addr + var + end + + # return an array of [address of block start, list of block to]] + # decompile subfunctions + def listblocks_func(entry) + @autofuncs ||= [] + blocks = [] + entry = dasm.normalize entry + todo = [entry] + while a = todo.pop + next if blocks.find { |aa, at| aa == a } + next if not di = @dasm.di_at(a) + blocks << [a, []] + di.block.each_to { |ta, type| + next if type == :indirect + ta = dasm.normalize ta + if type != :subfuncret and not @dasm.function[ta] and + (not @dasm.function[entry] or @autofuncs.include? entry) and + di.block.list.last.opcode.props[:saveip] + # possible noreturn function + # XXX call $+5; pop eax + @autofuncs << ta + @dasm.function[ta] = DecodedFunction.new + puts "autofunc #{Expression[ta]}" if $VERBOSE + end + + if @dasm.function[ta] and type != :subfuncret + f = dasm.auto_label_at(ta, 'func') + ta = dasm.normalize($1) if f =~ /^thunk_(.*)/ + ret = decompile_func_rec(ta) if (ta != entry or di.block.to_subfuncret) + throw :restart, :restart if ret == :restart + else + @dasm.auto_label_at(ta, 'label') if blocks.find { |aa, at| aa == ta } + blocks.last[1] |= [ta] + todo << ta + end + } + end + blocks + end + + # backtraces an expression from addr + # returns an integer, a label name, or an Expression + # XXX '(GetProcAddr("foo"))()' should not decompile to 'foo()' + def backtrace_target(expr, addr) + if n = @dasm.backtrace(expr, addr).first + return expr if n == Expression::Unknown + n = Expression[n].reduce_rec + n = @dasm.get_label_at(n) || n + n = $1 if n.kind_of? ::String and n =~ /^thunk_(.*)/ + n + else + expr + end + end + + # patches instruction's backtrace_binding to replace things referring to a static stack offset from func start by :frameptr+off + def makestackvars(funcstart, blocks) + blockstart = nil + cache_di = nil + cache = {} # [i_s, e, type] => backtrace + tovar = lambda { |di, e, i_s| + case e + when Expression; Expression[tovar[di, e.lexpr, i_s], e.op, tovar[di, e.rexpr, i_s]].reduce + when Indirection; Indirection[tovar[di, e.target, i_s], e.len, e.origin] + when :frameptr; e + when ::Symbol + cache.clear if cache_di != di ; cache_di = di + vals = cache[[e, i_s, 0]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => blockstart, + :include_start => i_s, :no_check => true, :terminals => [:frameptr]) + # backtrace only to blockstart first + if vals.length == 1 and ee = vals.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or + (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer) or + (not ee.lexpr and ee.op == :+ and ee.rexpr.kind_of? Indirection and eep = ee.rexpr.pointer and + (eep == Expression[:frameptr] or (eep.lexpr == :frameptr and eep.op == :+ and eep.rexpr.kind_of? ::Integer)))) + ee + else + # fallback on full run (could restart from blockstart with ee, but may reevaluate addr_binding.. + vals = cache[[e, i_s, 1]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => funcstart, + :include_start => i_s, :no_check => true, :terminals => [:frameptr]) + if vals.length == 1 and ee = vals.first and (ee.kind_of? Expression and (ee == Expression[:frameptr] or + (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))) + ee + else e + end + end + else e + end + } + + # must not change bt_bindings until everything is backtracked + repl_bind = {} # di => bt_bd + + @dasm.cpu.decompile_makestackvars(@dasm, funcstart, blocks) { |block| + block.list.each { |di| + bd = di.backtrace_binding ||= @dasm.cpu.get_backtrace_binding(di) + newbd = repl_bind[di] = {} + bd.each { |k, v| + k = tovar[di, k, true] if k.kind_of? Indirection + next if k == Expression[:frameptr] or (k.kind_of? Expression and k.lexpr == :frameptr and k.op == :+ and k.rexpr.kind_of? ::Integer) + newbd[k] = tovar[di, v, false] + } + } + } + + repl_bind.each { |di, bd| di.backtrace_binding = bd } + end + + # give a name to a stackoffset (relative to start of func) + # 4 => :arg_0, -8 => :var_4 etc + def stackoff_to_varname(off) + if off >= @c_parser.typesize[:ptr]; 'arg_%X' % ( off-@c_parser.typesize[:ptr]) # 4 => arg_0, 8 => arg_4.. + elsif off > 0; 'arg_0%X' % off + elsif off == 0; 'retaddr' + elsif off <= -@dasm.cpu.size/8; 'var_%X' % (-off-@dasm.cpu.size/8) # -4 => var_0, -8 => var_4.. + else 'var_0%X' % -off + end + end + + # turns an Expression to a CExpression, create+declares needed variables in scope + def decompile_cexpr(e, scope, itype=nil) + case e + when Expression + if e.op == :'=' and e.lexpr.kind_of? ::String and e.lexpr =~ /^dummy_metasm_/ + decompile_cexpr(e.rexpr, scope, itype) + elsif e.op == :+ and e.rexpr.kind_of? ::Integer and e.rexpr < 0 + decompile_cexpr(Expression[e.lexpr, :-, -e.rexpr], scope, itype) + elsif e.lexpr + a = decompile_cexpr(e.lexpr, scope, itype) + C::CExpression[a, e.op, decompile_cexpr(e.rexpr, scope, itype)] + elsif e.op == :+ + decompile_cexpr(e.rexpr, scope, itype) + else + a = decompile_cexpr(e.rexpr, scope, itype) + C::CExpression[e.op, a] + end + when Indirection + case e.len + when 1, 2, 4, 8 + bt = C::BaseType.new("__int#{e.len*8}".to_sym) + else + bt = C::Struct.new + bt.members = [C::Variable.new('data', C::Array.new(C::BaseType.new(:__int8), e.len))] + end + itype = C::Pointer.new(bt) + p = decompile_cexpr(e.target, scope, itype) + p = C::CExpression[[p], itype] if not p.type.kind_of? C::Pointer + C::CExpression[:*, p] + when ::Integer + C::CExpression[e] + when C::CExpression + e + else + name = e.to_s + if not s = scope.symbol_ancestors[name] + s = C::Variable.new + s.type = C::BaseType.new(:__int32) + case e + when ::String # edata relocation (rel.length = size of pointer) + return @c_parser.toplevel.symbol[e] || new_global_var(e, itype || C::BaseType.new(:int), scope) + when ::Symbol; s.storage = :register ; s.add_attribute("register(#{name})") + else s.type.qualifier = [:volatile] + puts "decompile_cexpr unhandled #{e.inspect}, using #{e.to_s.inspect}" if $VERBOSE + end + s.name = name + scope.symbol[s.name] = s + scope.statements << C::Declaration.new(s) + end + s + end + end + + # simplify goto -> goto / goto -> return + def simplify_goto(scope, keepret = false) + if not keepret and scope.statements[-1].kind_of? C::Return and not scope.statements[-2].kind_of? C::Label + scope.statements.insert(-2, C::Label.new("ret_label")) + end + + jumpto = {} + walk(scope) { |s| + next if not s.kind_of? C::Block + s.statements.each_with_index { |ss, i| + case ss + when C::Goto, C::Return + while l = s.statements[i -= 1] and l.kind_of? C::Label + jumpto[l.name] = ss + end + end + } + } + + simpler = lambda { |s| + case s + when C::Goto + if jumpto[s.target] + r = jumpto[s.target].dup + r.value = r.value.deep_dup if r.kind_of? C::Return and r.value.kind_of? C::CExpression + r + end + when C::Return + if not keepret and scope.statements[-1].kind_of? C::Return and s.value == scope.statements[-1].value and s != scope.statements[-1] + C::Goto.new(scope.statements[-2].name) + end + end + } + + walk(scope) { |s| + case s + when C::Block + s.statements.each_with_index { |ss, i| + if sp = simpler[ss] + ss = s.statements[i] = sp + end + } + when C::If + if sp = simpler[s.bthen] + s.bthen = sp + end + end + } + + # remove unreferenced labels + remove_labels(scope) + + walk(scope) { |s| + next if not s.kind_of? C::Block + del = false + # remove dead code goto a; goto b; if (0) { z: bla; } => rm goto b + s.statements.delete_if { |st| + case st + when C::Goto, C::Return + olddel = del + del = true + olddel + else + del = false + end + } + # if () { goto x; } x: + s.statements.each_with_index { |ss, i| + if ss.kind_of? C::If + t = ss.bthen + t = t.statements.first if t.kind_of? C::Block + if t.kind_of? C::Goto and s.statements[i+1].kind_of? C::Label and s.statements[i+1].name == t.target + ss.bthen = C::Block.new(scope) + end + end + } + } + + remove_labels(scope) + end + + # changes ifgoto, goto to while/ifelse.. + def decompile_controlseq(scope) + # TODO replace all this crap by a method using the graph representation + scope.statements = decompile_cseq_if(scope.statements, scope) + remove_labels(scope) + scope.statements = decompile_cseq_if(scope.statements, scope) + remove_labels(scope) + # TODO harmonize _if/_while api (if returns a replacement, while patches) + decompile_cseq_while(scope.statements, scope) + decompile_cseq_switch(scope) + end + + # optimize if() { a; } to if() a; + def optimize_ctrl(scope) + simplify_goto(scope, true) + + # break/continue + # XXX if (foo) while (bar) goto bla; bla: should => break + walk = lambda { |e, brk, cnt| + case e + when C::Block + walk[e.statements, brk, cnt] + e + when ::Array + e.each_with_index { |st, i| + case st + when C::While, C::DoWhile + l1 = (e[i+1].name if e[i+1].kind_of? C::Label) + l2 = (e[i-1].name if e[i-1].kind_of? C::Label) + e[i].body = walk[st.body, l1, l2] + else + e[i] = walk[st, brk, cnt] + end + } + e + when C::If + e.bthen = walk[e.bthen, brk, cnt] if e.bthen + e.belse = walk[e.belse, brk, cnt] if e.belse + e + when C::While, C::DoWhile + e.body = walk[e.body, nil, nil] + e + when C::Goto + if e.target == brk + C::Break.new + elsif e.target == cnt + C::Continue.new + else e + end + else e + end + } + walk[scope, nil, nil] + + remove_labels(scope) + + # while (1) { a; if(b) { c; return; }; d; } => while (1) { a; if (b) break; d; } c; + while st = scope.statements.last and st.kind_of? C::While and st.test.kind_of? C::CExpression and + not st.test.op and st.test.rexpr == 1 and st.body.kind_of? C::Block + break if not i = st.body.statements.find { |ist| + ist.kind_of? C::If and not ist.belse and ist.bthen.kind_of? C::Block and ist.bthen.statements.last.kind_of? C::Return + } + walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of? C::Block and sst.outer == i.bthen } + scope.statements.concat i.bthen.statements + i.bthen = C::Break.new + end + + patch_test = lambda { |ce| + ce = ce.rexpr if ce.kind_of? C::CExpression and ce.op == :'!' + # if (a+1) => if (a != -1) + if ce.kind_of? C::CExpression and (ce.op == :+ or ce.op == :-) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and ce.lexpr + ce.rexpr.rexpr = -ce.rexpr.rexpr if ce.op == :+ + ce.op = :'!=' + end + } + + walk(scope) { |ce| + case ce + when C::If + patch_test[ce.test] + if ce.bthen.kind_of? C::Block + case ce.bthen.statements.length + when 1 + walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of? C::Block and sst.outer == ce.bthen } + ce.bthen = ce.bthen.statements.first + when 0 + if not ce.belse and i = ce.bthen.outer.statements.index(ce) + ce.bthen.outer.statements[i] = ce.test # TODO remove sideeffectless parts + end + end + end + if ce.belse.kind_of? C::Block and ce.belse.statements.length == 1 + walk(ce.belse.statements) { |sst| sst.outer = ce.belse.outer if sst.kind_of? C::Block and sst.outer == ce.belse } + ce.belse = ce.belse.statements.first + end + when C::While, C::DoWhile + patch_test[ce.test] + if ce.body.kind_of? C::Block + case ce.body.statements.length + when 1 + walk(ce.body.statements) { |sst| sst.outer = ce.body.outer if sst.kind_of? C::Block and sst.outer == ce.body } + ce.body = ce.body.statements.first + when 0 + if ce.kind_of? C::DoWhile and i = ce.body.outer.statements.index(ce) + ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body) + end + ce.body = nil + end + end + end + } + walk(scope) { |ce| + next if not ce.kind_of? C::Block + st = ce.statements + st.length.times { |n| + while st[n].kind_of? C::If and st[n+1].kind_of? C::If and not st[n].belse and not st[n+1].belse and ( + (st[n].bthen.kind_of? C::Return and st[n+1].bthen.kind_of? C::Return and st[n].bthen.value == st[n+1].bthen.value) or + (st[n].bthen.kind_of? C::Break and st[n+1].bthen.kind_of? C::Break) or + (st[n].bthen.kind_of? C::Continue and st[n+1].bthen.kind_of? C::Continue)) + # if (a) return x; if (b) return x; => if (a || b) return x; + st[n].test = C::CExpression[st[n].test, :'||', st[n+1].test] + st.delete_at(n+1) + end + } + } + end + + # ifgoto => ifthen + # ary is an array of statements where we try to find if () {} [else {}] + # recurses to then/else content + def decompile_cseq_if(ary, scope) + return ary if forbid_decompile_ifwhile + # the array of decompiled statements to use as replacement + ret = [] + # list of labels appearing in ary + inner_labels = ary.grep(C::Label).map { |l| l.name } + while s = ary.shift + # recurse if it's not the first run + if s.kind_of? C::If + s.bthen.statements = decompile_cseq_if(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block + s.belse.statements = decompile_cseq_if(s.belse.statements, s.belse) if s.belse.kind_of? C::Block + end + + # if (a) goto x; if (b) goto x; => if (a || b) goto x; + while s.kind_of? C::If and s.bthen.kind_of? C::Goto and not s.belse and ary.first.kind_of? C::If and ary.first.bthen.kind_of? C::Goto and + not ary.first.belse and s.bthen.target == ary.first.bthen.target + s.test = C::CExpression[s.test, :'||', ary.shift.test] + end + + # if (a) goto x; b; x: => if (!a) { b; } + if s.kind_of? C::If and s.bthen.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == s.bthen.target } + # if {goto l;} a; l: => if (!) {a;} + s.test = C::CExpression.negate s.test + s.bthen = C::Block.new(scope) + s.bthen.statements = decompile_cseq_if(ary[0..ary.index(l)], s.bthen) + s.bthen.statements.pop # remove l: from bthen, it is in ary (was needed in bthen for inner ifs) + ary[0...ary.index(l)] = [] + end + + if s.kind_of? C::If and (s.bthen.kind_of? C::Block or s.bthen.kind_of? C::Goto) + s.bthen = C::Block.new(scope, [s.bthen]) if s.bthen.kind_of? C::Goto + + bts = s.bthen.statements + + # if (a) if (b) { c; } => if (a && b) { c; } + if bts.length == 1 and bts.first.kind_of? C::If and not bts.first.belse + s.test = C::CExpression[s.test, :'&&', bts.first.test] + bts = bts.first.bthen + bts = s.bthen.statements = bts.kind_of?(C::Block) ? bts.statements : [bts] + end + + # if (a) { if (b) goto c; d; } c: => if (a && !b) { d; } + if bts.first.kind_of? C::If and l = bts.first.bthen and (l = l.kind_of?(C::Block) ? l.statements.first : l) and l.kind_of? C::Goto and ary[0].kind_of? C::Label and l.target == ary[0].name + s.test = C::CExpression[s.test, :'&&', C::CExpression.negate(bts.first.test)] + if e = bts.shift.belse + bts.unshift e + end + end + + # if () { goto a; } a: + if bts.last.kind_of? C::Goto and ary[0].kind_of? C::Label and bts.last.target == ary[0].name + bts.pop + end + + # if { a; goto outer; } b; return; => if (!) { b; return; } a; goto outer; + if bts.last.kind_of? C::Goto and not inner_labels.include? bts.last.target and g = ary.find { |ss| ss.kind_of? C::Goto or ss.kind_of? C::Return } and g.kind_of? C::Return + s.test = C::CExpression.negate s.test + ary[0..ary.index(g)], bts[0..-1] = bts, ary[0..ary.index(g)] + end + + # if { a; goto l; } b; l: => if {a;} else {b;} + if bts.last.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == bts.last.target } + s.belse = C::Block.new(scope) + s.belse.statements = decompile_cseq_if(ary[0...ary.index(l)], s.belse) + ary[0...ary.index(l)] = [] + bts.pop + end + + # if { a; l: b; goto any;} c; goto l; => if { a; } else { c; } b; goto any; + if not s.belse and (bts.last.kind_of? C::Goto or bts.last.kind_of? C::Return) and g = ary.grep(C::Goto).first and l = bts.grep(C::Label).find { |l_| l_.name == g.target } + s.belse = C::Block.new(scope) + s.belse.statements = decompile_cseq_if(ary[0...ary.index(g)], s.belse) + ary[0..ary.index(g)], bts[bts.index(l)..-1] = bts[bts.index(l)..-1], [] + end + + # if { a; b; c; } else { d; b; c; } => if {a;} else {d;} b; c; + if s.belse + bes = s.belse.statements + while not bts.empty? + if bts.last.kind_of? C::Label; ary.unshift bts.pop + elsif bes.last.kind_of? C::Label; ary.unshift bes.pop + elsif bts.last.to_s == bes.last.to_s; ary.unshift bes.pop ; bts.pop + else break + end + end + + # if () { a; } else { b; } => if () { a; } else b; + # if () { a; } else {} => if () { a; } + case bes.length + when 0; s.belse = nil + #when 1; s.belse = bes.first + end + end + + # if () {} else { a; } => if (!) { a; } + # if () { a; } => if () a; + case bts.length + when 0; s.test, s.bthen, s.belse = C::CExpression.negate(s.test), s.belse, nil if s.belse + #when 1; s.bthen = bts.first # later (allows simpler handling in _while) + end + end + + # l1: l2: if () goto l1; goto l2; => if(!) goto l2; goto l1; + if s.kind_of? C::If + ls = s.bthen + ls = ls.statements.last if ls.kind_of? C::Block + if ls.kind_of? C::Goto + if li = inner_labels.index(ls.target) + table = inner_labels + else + table = ary.map { |st| st.name if st.kind_of? C::Label }.compact.reverse + li = table.index(ls.target) || table.length + end + g = ary.find { |ss| + break if ss.kind_of? C::Return + next if not ss.kind_of? C::Goto + table.index(ss.target).to_i > li + } + if g + s.test = C::CExpression.negate s.test + if not s.bthen.kind_of? C::Block + ls = C::Block.new(scope) + ls.statements << s.bthen + s.bthen = ls + end + ary[0..ary.index(g)], s.bthen.statements = s.bthen.statements, decompile_cseq_if(ary[0..ary.index(g)], scope) + end + end + end + + ret << s + end + ret + end + + def decompile_cseq_while(ary, scope) + return if forbid_decompile_ifwhile + + # find the next instruction that is not a label + ni = lambda { |l| ary[ary.index(l)..-1].find { |s| not s.kind_of? C::Label } } + + # TODO XXX get rid of #index + finished = false ; while not finished ; finished = true # 1.9 does not support 'retry' + ary.each { |s| + case s + when C::Label + if ss = ni[s] and ss.kind_of? C::If and not ss.belse and ss.bthen.kind_of? C::Block + if ss.bthen.statements.last.kind_of? C::Goto and ss.bthen.statements.last.target == s.name + ss.bthen.statements.pop + if l = ary[ary.index(ss)+1] and l.kind_of? C::Label + ss.bthen.statements.grep(C::If).each { |i| + i.bthen = C::Break.new if i.bthen.kind_of? C::Goto and i.bthen.target == l.name + } + end + ary[ary.index(ss)] = C::While.new(ss.test, ss.bthen) + elsif ss.bthen.statements.last.kind_of? C::Return and g = ary[ary.index(s)+1..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name } + wb = C::Block.new(scope) + wb.statements = decompile_cseq_while(ary[ary.index(ss)+1...ary.index(g)], wb) + w = C::While.new(C::CExpression.negate(ss.test), wb) + ary[ary.index(ss)..ary.index(g)] = [w, *ss.bthen.statements] + finished = false ; break #retry + end + end + if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name } + wb = C::Block.new(scope) + wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb) + w = C::While.new(C::CExpression[1], wb) + ary[ary.index(s)..ary.index(g)] = [w] + finished = false ; break #retry + end + if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::If and not _s.belse and gt = _s.bthen and + (gt = gt.kind_of?(C::Block) && gt.statements.length == 1 ? gt.statements.first : gt) and gt.kind_of? C::Goto and gt.target == s.name } + wb = C::Block.new(scope) + wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb) + w = C::DoWhile.new(g.test, wb) + ary[ary.index(s)..ary.index(g)] = [w] + finished = false ; break #retry + end + when C::If + decompile_cseq_while(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block + decompile_cseq_while(s.belse.statements, s.belse) if s.belse.kind_of? C::Block + when C::While, C::DoWhile + decompile_cseq_while(s.body.statements, s.body) if s.body.kind_of? C::Block + end + } + end + ary + end + + # TODO + def decompile_cseq_switch(scope) + uncast = lambda { |e| e = e.rexpr while e.kind_of? C::CExpression and not e.op ; e } + walk(scope) { |s| + # XXX pfff... + next if not s.kind_of? C::If + # if (v < 12) return ((void(*)())(tableaddr+4*v))(); + t = s.bthen + t = t.statements.first if t.kind_of? C::Block and t.statements.length == 1 + next if not t.kind_of? C::Return or not t.respond_to? :from_instr + next if t.from_instr.comment.to_a.include? 'switch' + next if not t.value.kind_of? C::CExpression or t.value.op != :funcall or t.value.rexpr != [] or not t.value.lexpr.kind_of? C::CExpression or t.value.lexpr.op + p = uncast[t.value.lexpr.rexpr] + next if not p.kind_of? C::CExpression or p.op != :* or p.lexpr + p = uncast[p.rexpr] + next if not p.kind_of? C::CExpression or p.op != :+ + r, l = uncast[p.rexpr], uncast[p.lexpr] + r, l = l, r if r.kind_of? C::CExpression + next if not r.kind_of? ::Integer or not l.kind_of? C::CExpression or l.op != :* or not l.lexpr + lr, ll = uncast[l.rexpr], uncast[l.lexpr] + lr, ll = ll, lr if not ll.kind_of? ::Integer + next if ll != sizeof(nil, C::Pointer.new(C::BaseType.new(:void))) + base, index = r, lr + if s.test.kind_of? C::CExpression and (s.test.op == :<= or s.test.op == :<) and s.test.lexpr == index and + s.test.rexpr.kind_of? C::CExpression and not s.test.rexpr.op and s.test.rexpr.rexpr.kind_of? ::Integer + t.from_instr.add_comment 'switch' + sup = s.test.rexpr.rexpr + rng = ((s.test.op == :<) ? (0...sup) : (0..sup)) + from = t.from_instr.address + rng.map { |i| @dasm.backtrace(Indirection[base+ll*i, ll, from], from, :type => :x, :origin => from, :maxdepth => 0) } + @dasm.disassemble + throw :restart, :restart + end + puts "unhandled switch() at #{t.from_instr}" if $VERBOSE + } + end + + # remove unused labels + def remove_labels(scope) + return if forbid_optimize_labels + + used = [] + walk(scope) { |ss| + used |= [ss.target] if ss.kind_of? C::Goto + } + walk(scope) { |s| + next if not s.kind_of? C::Block + s.statements.delete_if { |l| + l.kind_of? C::Label and not used.include? l.name + } + } + + # remove implicit continue; at end of loop + walk(scope) { |s| + next if not s.kind_of? C::While + if s.body.kind_of? C::Block and s.body.statements.last.kind_of? C::Continue + s.body.statements.pop + end + } + end + + # checks if expr is a var (var or *&var) + def isvar(ce, var) + if var.stackoff and ce.kind_of? C::CExpression + return unless ce.op == :* and not ce.lexpr + ce = ce.rexpr + ce = ce.rexpr while ce.kind_of? C::CExpression and not ce.op + return unless ce.kind_of? C::CExpression and ce.op == :& and not ce.lexpr + ce = ce.rexpr + end + ce == var + end + + # checks if expr reads var + def ce_read(ce_, var) + isvar(ce_, var) or + walk_ce(ce_) { |ce| + case ce.op + when :funcall; break true if isvar(ce.lexpr, var) or ce.rexpr.find { |a| isvar(a, var) } + when :'='; break true if isvar(ce.rexpr, var) + break ce_read(ce.rexpr, var) if isvar(ce.lexpr, var) # *&var = 2 + else break true if isvar(ce.lexpr, var) or isvar(ce.rexpr, var) + end + } + end + + # checks if expr writes var + def ce_write(ce_, var) + walk_ce(ce_) { |ce| + break true if AssignOp.include?(ce.op) and (isvar(ce.lexpr, var) or + (((ce.op == :'++' or ce.op == :'--') and isvar(ce.rexpr, var)))) + } + end + + # patches a set of exprs, replacing oldce by newce + def ce_patch(exprs, oldce, newce) + walk_ce(exprs) { |ce| + case ce.op + when :funcall + ce.lexpr = newce if ce.lexpr == oldce + ce.rexpr.each_with_index { |a, i| ce.rexpr[i] = newce if a == oldce } + else + ce.lexpr = newce if ce.lexpr == oldce + ce.rexpr = newce if ce.rexpr == oldce + end + } + end + + + # duplicate vars per domain value + # eg eax = 1; foo(eax); eax = 2; bar(eax); => eax = 1; foo(eax) eax_1 = 2; bar(eax_1); + # eax = 1; if (bla) eax = 2; foo(eax); => no change + def unalias_vars(scope, func) + g = c_to_graph(scope) + + # unalias func args first, they may include __attr__((out)) needed by the others + funcalls = [] + walk_ce(scope) { |ce| funcalls << ce if ce.op == :funcall } + vars = scope.symbol.values.sort_by { |v| walk_ce(funcalls) { |ce| break true if ce.rexpr == v } ? 0 : 1 } + + # find the domains of var aliases + vars.each { |var| unalias_var(var, scope, g) } + end + + # duplicates a var per domain value + def unalias_var(var, scope, g = c_to_graph(scope)) + # [label, index] of references to var (reading it, writing it, ro/wo it (eg eax = *eax => eax_0 = *eax_1)) + read = {} + write = {} + ro = {} + wo = {} + + # list of [l, i] for which domain is not known + unchecked = [] + + # mark all exprs of the graph + # TODO handle var_14 __attribute__((out)) = &curvar <=> curvar write + r = var.has_attribute_var('register') + g.exprs.each { |label, exprs| + exprs.each_with_index { |ce, i| + if ce_read(ce, var) + if (ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or + (ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym)) + (ro[label] ||= []) << i + (wo[label] ||= []) << i + unchecked << [label, i, :up] << [label, i, :down] + else + (read[label] ||= []) << i + unchecked << [label, i] + end + elsif ce_write(ce, var) + (write[label] ||= []) << i + unchecked << [label, i] + end + } + } + + # stuff when filling the domain (flood algorithm) + dom = dom_ro = dom_wo = todo_up = todo_down = func_top = nil + + # flood by walking the graph up from [l, i] (excluded) + # marks stuff do walk down + walk_up = lambda { |l, i| + todo_w = [[l, i-1]] + done_w = [] + while o = todo_w.pop + next if done_w.include? o + done_w << o + l, i = o + loop do + if read[l].to_a.include? i + # XXX not optimal (should mark only the uppest read) + todo_down |= [[l, i]] if not dom.include? [l, i] + dom |= [[l, i]] + elsif write[l].to_a.include? i + todo_down |= [[l, i]] if not dom.include? [l, i] + dom |= [[l, i]] + break + elsif wo[l].to_a.include? i + todo_down |= [[l, i]] if not dom_wo.include? [l, i, :down] + dom_wo |= [[l, i, :down]] + break + end + i -= 1 + if i < 0 + g.from_optim[l].to_a.each { |ll| + todo_w << [ll, g.exprs[ll].to_a.length-1] + } + func_top = true if g.from_optim[l].to_a.empty? + break + end + end + end + } + + # flood by walking the graph down from [l, i] (excluded) + # malks stuff to walk up + walk_down = lambda { |l, i| + todo_w = [[l, i+1]] + done_w = [] + while o = todo_w.pop + next if done_w.include? o + done_w << o + l, i = o + loop do + if read[l].to_a.include? i + todo_up |= [[l, i]] if not dom.include? [l, i] + dom |= [[l, i]] + elsif write[l].to_a.include? i + break + elsif ro[l].to_a.include? i + todo_up |= [[l, i]] if not dom_ro.include? [l, i, :up] + dom_ro |= [[l, i, :up]] + break + end + i += 1 + if i >= g.exprs[l].to_a.length + g.to_optim[l].to_a.each { |ll| + todo_w << [ll, 0] + } + break + end + end + end + } + + # check it out + while o = unchecked.shift + dom = [] + dom_ro = [] + dom_wo = [] + func_top = false + + todo_up = [] + todo_down = [] + + # init + if read[o[0]].to_a.include? o[1] + todo_up << o + todo_down << o + dom << o + elsif write[o[0]].to_a.include? o[1] + todo_down << o + dom << o + elsif o[2] == :up + todo_up << o + dom_ro << o + elsif o[2] == :down + todo_down << o + dom_wo << o + else raise + end + + # loop + while todo_up.first or todo_down.first + todo_up.each { |oo| walk_up[oo[0], oo[1]] } + todo_up.clear + + todo_down.each { |oo| walk_down[oo[0], oo[1]] } + todo_down.clear + end + + unchecked -= dom + dom_wo + dom_ro + + next if func_top + + # patch + n_i = 0 + n_i += 1 while scope.symbol_ancestors[newvarname = "#{var.name}_a#{n_i}"] + + nv = var.dup + nv.storage = :register if nv.has_attribute_var('register') + nv.attributes = nv.attributes.dup if nv.attributes + nv.name = newvarname + scope.statements << C::Declaration.new(nv) + scope.symbol[nv.name] = nv + + dom.each { |oo| ce_patch(g.exprs[oo[0]][oo[1]], var, nv) } + dom_ro.each { |oo| + ce = g.exprs[oo[0]][oo[1]] + if ce.op == :funcall or ce.rexpr.kind_of? C::CExpression + ce_patch(ce.rexpr, var, nv) + else + ce.rexpr = nv + end + } + dom_wo.each { |oo| + ce = g.exprs[oo[0]][oo[1]] + if ce.op == :funcall + elsif ce.lexpr.kind_of? C::CExpression + ce_patch(ce.lexpr, var, nv) + else + ce.lexpr = nv + end + } + + # check if the var is only used as an __out__ parameter + if false and dom_ro.empty? and dom_wo.empty? and dom.length == 2 and # TODO + arg.has_attribute('out') and not arg.has_attribute('in') + # *(int32*)&var_10 = &var_4; + # set_pointed_value(*(int32*)&var_10); => writeonly var_4, may start a new domain + nv.add_attribute('out') + end + end + end + + # revert the unaliasing namechange of vars where no alias subsists + def simplify_varname_noalias(scope) + names = scope.symbol.keys + names.delete_if { |k| + next if not b = k[/^(.*)_a\d+$/, 1] + next if scope.symbol[k].stackoff.to_i > 0 + if not names.find { |n| n != k and (n == b or n[/^(.*)_a\d+$/, 1] == b) } + scope.symbol[b] = scope.symbol.delete(k) + scope.symbol[b].name = b + end + } + end + + # patch scope to transform :frameoff-x into &var_x + def namestackvars(scope) + off2var = {} + newvar = lambda { |o, n| + if not v = off2var[o] + v = off2var[o] = C::Variable.new + v.type = C::BaseType.new(:void) + v.name = n + v.stackoff = o + scope.symbol[v.name] = v + scope.statements << C::Declaration.new(v) + end + v + } + + scope.decompdata[:stackoff_name].each { |o, n| newvar[o, n] } + scope.decompdata[:stackoff_type].each { |o, t| newvar[o, stackoff_to_varname(o)] } + + walk_ce(scope) { |e| + next if e.op != :+ and e.op != :- + next if not e.lexpr.kind_of? C::Variable or e.lexpr.name != 'frameptr' + next if not e.rexpr.kind_of? C::CExpression or e.rexpr.op or not e.rexpr.rexpr.kind_of? ::Integer + off = e.rexpr.rexpr + off = -off if e.op == :- + v = newvar[off, stackoff_to_varname(off)] + e.replace C::CExpression[:&, v] + } + end + + # assign type to vars (regs, stack & global) + # types are found by subfunction argument types & indirections, and propagated through assignments etc + # TODO when updating the type of a var, update the type of all cexprs where it appears + def decompile_c_types(scope) + return if forbid_decompile_types + + # TODO *(int8*)(ptr+8); *(int32*)(ptr+12) => automatic struct + + # name => type + types = {} + + pscopevar = lambda { |e| + e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.rexpr.kind_of? C::CExpression + if e.kind_of? C::CExpression and e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable + e.rexpr.name if scope.symbol[e.rexpr.name] + end + } + scopevar = lambda { |e| + e = e.rexpr if e.kind_of? C::CExpression and not e.op + if e.kind_of? C::Variable and scope.symbol[e.name] + e.name + elsif e.kind_of? C::CExpression and e.op == :* and not e.lexpr + pscopevar[e.rexpr] + end + } + globalvar = lambda { |e| + e = e.rexpr if e.kind_of? C::CExpression and not e.op + if e.kind_of? ::Integer and @dasm.get_section_at(e) + e + elsif e.kind_of? C::Variable and not scope.symbol[e.name] and @c_parser.toplevel.symbol[e.name] and @dasm.get_section_at(e.name) + e.name + end + } + + # check if a newly found type for o is better than current type + # order: foo* > void* > foo + better_type = lambda { |t0, t1| + t1 == C::BaseType.new(:void) or (t0.pointer? and t1.kind_of? C::BaseType) or t0.untypedef.kind_of? C::Union or + (t0.kind_of? C::BaseType and t1.kind_of? C::BaseType and (@c_parser.typesize[t0.name] > @c_parser.typesize[t1.name] or (t0.name == t1.name and t0.qualifier))) or + (t0.pointer? and t1.pointer? and better_type[t0.pointed, t1.pointed]) + } + + update_global_type = lambda { |e, t| + if ne = new_global_var(e, t, scope) + ne.type = t if better_type[t, ne.type] # TODO patch existing scopes using ne + # TODO rename (dword_xx -> byte_xx etc) + e = scope.symbol_ancestors[e] || e if e.kind_of? String # exe reloc + walk_ce(scope) { |ce| + ce.lexpr = ne if ce.lexpr == e + ce.rexpr = ne if ce.rexpr == e + if ce.op == :* and not ce.lexpr and ce.rexpr == ne and ne.type.pointer? and ne.type.pointed.untypedef.kind_of? C::Union + # *struct -> struct->bla + ce.rexpr = structoffset(ne.type.pointed.untypedef, ce.rexpr, 0, sizeof(ce.type)) + elsif ce.lexpr == ne or ce.rexpr == ne + # set ce type according to l/r + # TODO set ce.parent type etc + ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type + end + } + end + } + + propagate_type = nil # fwd declaration + propagating = [] # recursion guard (x = &x) + # check if need to change the type of a var + # propagate_type if type is updated + update_type = lambda { |n, t| + next if propagating.include? n + o = scope.symbol[n].stackoff + next if not o and t.untypedef.kind_of? C::Union + next if o and scope.decompdata[:stackoff_type][o] and t != scope.decompdata[:stackoff_type][o] + next if t0 = types[n] and not better_type[t, t0] + next if o and (t.integral? or t.pointer?) and o % sizeof(t) != 0 # keep vars aligned + types[n] = t + next if t == t0 + propagating << n + propagate_type[n, t] + propagating.delete n + next if not o + t = t.untypedef + if t.kind_of? C::Struct + t.members.to_a.each { |m| + mo = t.offsetof(@c_parser, m.name) + next if mo == 0 + scope.symbol.each { |vn, vv| + update_type[vn, m.type] if vv.stackoff == o+mo + } + } + end + } + + # try to update the type of a var from knowing the type of an expr (through dereferences etc) + known_type = lambda { |e, t| + loop do + e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.type == t + if o = scopevar[e] + update_type[o, t] + elsif o = globalvar[e] + update_global_type[o, t] + elsif not e.kind_of? C::CExpression + elsif o = pscopevar[e] and t.pointer? + update_type[o, t.pointed] + elsif e.op == :* and not e.lexpr + e = e.rexpr + t = C::Pointer.new(t) + next + elsif t.pointer? and e.op == :+ and e.lexpr.kind_of? C::CExpression and e.lexpr.type.integral? and e.rexpr.kind_of? C::Variable + e.lexpr, e.rexpr = e.rexpr, e.lexpr + next + elsif e.op == :+ and e.lexpr and e.rexpr.kind_of? C::CExpression + if not e.rexpr.op and e.rexpr.rexpr.kind_of? ::Integer + if t.pointer? and e.rexpr.rexpr < 0x1000 and (e.rexpr.rexpr % sizeof(t.pointed)) == 0 # XXX relocatable + base=0.. + e = e.lexpr # (int)*(x+2) === (int) *x + next + elsif globalvar[e.rexpr.rexpr] + known_type[e.lexpr, C::BaseType.new(:int)] + e = e.rexpr + next + end + elsif t.pointer? and (e.lexpr.kind_of? C::CExpression and e.lexpr.lexpr and [:<<, :>>, :*, :&].include? e.lexpr.op) or + (o = scopevar[e.lexpr] and types[o] and types[o].integral? and + !(o = scopevar[e.rexpr] and types[o] and types[o].integral?)) + e.lexpr, e.rexpr = e.rexpr, e.lexpr # swap + e = e.lexpr + next + elsif t.pointer? and ((e.rexpr.kind_of? C::CExpression and e.rexpr.lexpr and [:<<, :>>, :*, :&].include? e.rexpr.op) or + (o = scopevar[e.rexpr] and types[o] and types[o].integral? and + !(o = scopevar[e.lexpr] and types[o] and types[o].integral?))) + e = e.lexpr + next + end + end + break + end + } + + # we found a type for a var, propagate it through affectations + propagate_type = lambda { |var, type| + walk_ce(scope) { |ce| + next if ce.op != :'=' + + if ce.lexpr.kind_of? C::Variable and ce.lexpr.name == var + known_type[ce.rexpr, type] + next + end + if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == var + known_type[ce.lexpr, type] + next + end + + # int **x; y = **x => int y + t = type + l = ce.lexpr + while l.kind_of? C::CExpression and l.op == :* and not l.lexpr + if var == pscopevar[l.rexpr] + known_type[ce.rexpr, t] + break + elsif t.pointer? + l = l.rexpr + t = t.pointed + else break + end + end + + # int **x; **x = y => int y + t = type + r = ce.rexpr + while r.kind_of? C::CExpression and r.op == :* and not r.lexpr + if var == pscopevar[r.rexpr] + known_type[ce.lexpr, t] + break + elsif t.pointer? + r = r.rexpr + t = t.pointed + else break + end + end + + # TODO int *x; *x = *y; ? + } + } + + # put all those macros in use + # use user-defined types first + scope.symbol.each_value { |v| + next if not v.kind_of? C::Variable or not v.stackoff or not t = scope.decompdata[:stackoff_type][v.stackoff] + known_type[v, t] + } + + # try to infer types from C semantics + later = [] + walk_ce(scope) { |ce| + if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and (ce.rexpr.op == :funcall or (ce.rexpr.op == nil and ce.rexpr.rexpr.kind_of? ::Integer and + ce.rexpr.rexpr.abs < 0x10000 and (not ce.lexpr.kind_of? C::CExpression or ce.lexpr.op != :'*' or ce.lexpr.lexpr))) + # var = int + known_type[ce.lexpr, ce.rexpr.type] + elsif ce.op == :funcall + f = ce.lexpr.type + f = f.pointed if f.pointer? + next if not f.kind_of? C::Function + # cast func args to arg prototypes + f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] } + elsif ce.op == :* and not ce.lexpr + if e = ce.rexpr and e.kind_of? C::CExpression and not e.op and e = e.rexpr and e.kind_of? C::CExpression and + e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable and e.rexpr.stackoff + # skip *(__int32*)&var_12 for now, avoid saying var12 is an int if it may be a ptr or anything + later << [ce.rexpr, C::Pointer.new(ce.type)] + next + end + known_type[ce.rexpr, C::Pointer.new(ce.type)] + elsif not ce.op and ce.type.pointer? and ce.type.pointed.kind_of? C::Function + # cast to fptr: must be a fptr + known_type[ce.rexpr, ce.type] + end + } + + later.each { |ce, t| known_type[ce, t] } + + # offsets have types now + types.each { |v, t| + # keep var type qualifiers + q = scope.symbol[v].type.qualifier + scope.symbol[v].type = t + t.qualifier = q if q + } + + + # remove offsets to struct members + # XXX this defeats antialiasing + # off => [structoff, membername, membertype] + memb = {} + types.dup.each { |n, t| + v = scope.symbol[n] + next if not o = v.stackoff + t = t.untypedef + if t.kind_of? C::Struct + t.members.to_a.each { |tm| + moff = t.offsetof(@c_parser, tm.name) + next if moff == 0 + types.delete_if { |vv, tt| scope.symbol[vv].stackoff == o+moff } + memb[o+moff] = [v, tm.name, tm.type] + } + end + } + + # patch local variables into the CExprs, incl unknown offsets + varat = lambda { |n| + v = scope.symbol[n] + if s = memb[v.stackoff] + v = C::CExpression[s[0], :'.', s[1], s[2]] + else + v.type = types[n] || C::BaseType.new(:int) + end + v + } + + maycast = lambda { |v, e| + if sizeof(v) != sizeof(e) + v = C::CExpression[:*, [[:&, v], C::Pointer.new(e.type)]] + end + v + } + maycast_p = lambda { |v, e| + if not e.type.pointer? or sizeof(v) != sizeof(nil, e.type.pointed) + C::CExpression[[:&, v], e.type] + else + C::CExpression[:&, v] + end + } + + walk_ce(scope, true) { |ce| + case + when ce.op == :funcall + ce.rexpr.map! { |re| + if o = scopevar[re]; C::CExpression[maycast[varat[o], re]] + elsif o = pscopevar[re]; C::CExpression[maycast_p[varat[o], re]] + else re + end + } + when o = scopevar[ce.lexpr]; ce.lexpr = maycast[varat[o], ce.lexpr] + when o = scopevar[ce.rexpr]; ce.rexpr = maycast[varat[o], ce.rexpr] + ce.rexpr = C::CExpression[ce.rexpr] if not ce.op and ce.rexpr.kind_of? C::Variable + when o = pscopevar[ce.lexpr]; ce.lexpr = maycast_p[varat[o], ce.lexpr] + when o = pscopevar[ce.rexpr]; ce.rexpr = maycast_p[varat[o], ce.rexpr] + when o = scopevar[ce]; ce.replace C::CExpression[maycast[varat[o], ce]] + when o = pscopevar[ce]; ce.replace C::CExpression[maycast_p[varat[o], ce]] + end + } + + fix_type_overlap(scope) + fix_pointer_arithmetic(scope) + + # if int32 var_4 is always var_4 & 255, change type to int8 + varuse = Hash.new(0) + varandff = Hash.new(0) + varandffff = Hash.new(0) + walk_ce(scope) { |ce| + if ce.op == :& and ce.lexpr.kind_of? C::Variable and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer + case ce.rexpr.rexpr + when 0xff; varandff[ce.lexpr.name] += 1 + when 0xffff; varandffff[ce.lexpr.name] += 1 + end + end + varuse[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable + varuse[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable + } + varandff.each { |k, v| + scope.symbol[k].type = C::BaseType.new(:__int8, :unsigned) if varuse[k] == v + } + varandffff.each { |k, v| + scope.symbol[k].type = C::BaseType.new(:__int16, :unsigned) if varuse[k] == v + } + + # propagate types to cexprs + walk_ce(scope, true) { |ce| + if ce.op + ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type rescue next + if ce.op == :'=' and ce.rexpr.kind_of? C::Typed and ce.rexpr.type != ce.type and (not ce.rexpr.type.integral? or not ce.type.integral?) + known_type[ce.rexpr, ce.type] if ce.type.pointer? and ce.type.pointed.untypedef.kind_of? C::Function # localvar = &struct with fptr + ce.rexpr = C::CExpression[[ce.rexpr], ce.type] + end + elsif ce.type.pointer? and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sizeof(ce.rexpr.rexpr.type) == sizeof(ce.type.pointed) + ce.type = ce.rexpr.type + end + } + end + + # struct foo { int i; int j; struct { int k; int l; } m; }; bla+12 => &bla->m.l + # st is a struct, ptr is an expr pointing to a struct, off is a numeric offset from ptr, msz is the size of the pointed member (nil ignored) + def structoffset(st, ptr, off, msz) + tabidx = off / sizeof(st) + off -= tabidx * sizeof(st) + ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of? C::Array + return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list + (ptr.kind_of? C::CExpression and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and + not s.type.untypedef.kind_of? C::Union)) + + m_ptr = lambda { |m| + if ptr.kind_of? C::CExpression and ptr.op == :& and not ptr.lexpr + C::CExpression[ptr.rexpr, :'.', m.name] + else + C::CExpression[ptr, :'->', m.name] + end + } + + # recursive proc to list all named members, including in anonymous substructs + submemb = lambda { |sm| sm.name ? sm : sm.type.kind_of?(C::Union) ? sm.type.members.to_a.map { |ssm| submemb[ssm] } : nil } + mbs = st.members.to_a.map { |m| submemb[m] }.flatten.compact + mo = mbs.inject({}) { |h, m| h.update m => st.offsetof(@c_parser, m.name) } + + if sm = mbs.find { |m| mo[m] == off and (not msz or sizeof(m) == msz) } || + mbs.find { |m| mo[m] <= off and mo[m]+sizeof(m) > off } + off -= mo[sm] + sst = sm.type.untypedef + #return ptr if mo[sm] == 0 and sst.pointer? and sst.type.untypedef == st # TODO fix infinite recursion on mutually recursive ptrs + ptr = C::CExpression[:&, m_ptr[sm]] + if sst.kind_of? C::Union + return structoffset(sst, ptr, off, msz) + end + end + + if off != 0 + C::CExpression[[[ptr], C::Pointer.new(C::BaseType.new(:__int8))], :+, [off]] + else + ptr + end + end + + # fix pointer arithmetic (eg int foo += 4 => int* foo += 1) + # use struct member access (eg *(structptr+8) => structptr->bla) + # must be run only once, right after type setting + def fix_pointer_arithmetic(scope) + walk_ce(scope, true) { |ce| + if ce.lexpr and ce.lexpr.type.pointer? and [:&, :>>, :<<].include? ce.op + ce.lexpr = C::CExpression[[ce.lexpr], C::BaseType.new(:int)] + end + + if ce.op == :+ and ce.lexpr and ((ce.lexpr.type.integral? and ce.rexpr.type.pointer?) or (ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Union)) + ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr + end + + if ce.op == :* and not ce.lexpr and ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Struct + s = ce.rexpr.type.pointed.untypedef + m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 } + if sizeof(m) != sizeof(ce) + ce.rexpr = C::CExpression[[ce.rexpr, C::Pointer.new(s)], C::Pointer.new(ce.type)] + next + end + # *structptr => structptr->member + ce.lexpr = ce.rexpr + ce.op = :'->' + ce.rexpr = m.name + ce.type = m.type + next + elsif ce.op == :'=' and ce.lexpr.type.untypedef.kind_of? C::Struct + s = ce.lexpr.type.untypedef + m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 } + ce.lexpr = C::CExpression.new(ce.lexpr, :'.', m.name, m.type) + ce.type = m.type + next + end + + if ce.op == :+ and ce.lexpr and ce.lexpr.type.pointer? and not ce.type.pointer? + ce.type = ce.lexpr.type + end + + if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr + ce.replace C::CExpression[ce.rexpr.rexpr] + end + + next if not ce.lexpr or not ce.lexpr.type.pointer? + if ce.op == :+ and (s = ce.lexpr.type.pointed.untypedef).kind_of? C::Union and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and + ce.rexpr.rexpr.kind_of? ::Integer and o = ce.rexpr.rexpr + # structptr + 4 => &structptr->member + ce.replace structoffset(s, ce.lexpr, o, nil) + elsif [:+, :-, :'+=', :'-='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ((not ce.rexpr.op and i = ce.rexpr.rexpr) or + (ce.rexpr.op == :* and i = ce.rexpr.lexpr and ((i.kind_of? C::CExpression and not i.op and i = i.rexpr) or true))) and + i.kind_of? ::Integer and psz = sizeof(nil, ce.lexpr.type.pointed) and i % psz == 0 + # ptr += 4 => ptr += 1 + if not ce.rexpr.op + ce.rexpr.rexpr /= psz + else + ce.rexpr.lexpr.rexpr /= psz + if ce.rexpr.lexpr.rexpr == 1 + ce.rexpr = ce.rexpr.rexpr + end + end + ce.type = ce.lexpr.type + + elsif (ce.op == :+ or ce.op == :-) and sizeof(nil, ce.lexpr.type.pointed) != 1 + # ptr+x => (ptrtype*)(((__int8*)ptr)+x) + # XXX create struct ? + ce.rexpr = C::CExpression[ce.rexpr, C::BaseType.new(:int)] if not ce.rexpr.type.integral? + if sizeof(nil, ce.lexpr.type.pointed) != 1 + ptype = ce.lexpr.type + p = C::CExpression[[ce.lexpr], C::Pointer.new(C::BaseType.new(:__int8))] + ce.replace C::CExpression[[p, ce.op, ce.rexpr, p.type], ptype] + end + end + } + end + + # handling of var overlapping (eg __int32 var_10; __int8 var_F => replace all var_F by *(&var_10 + 1)) + # must be done before fix_pointer_arithmetic + def fix_type_overlap(scope) + varinfo = {} + scope.symbol.each_value { |var| + next if not off = var.stackoff + len = sizeof(var) + varinfo[var] = [off, len] + } + + varinfo.each { |v1, (o1, l1)| + next if not v1.type.integral? + varinfo.each { |v2, (o2, l2)| + # XXX o1 may overlap o2 AND another (int32 v_10; int32 v_E; int32 v_C;) + # TODO should check stuff with aliasing domains + next if v1.name == v2.name or o1 >= o2+l2 or o1+l1 <= o2 or l1 > l2 or (l2 == l1 and o2 >= o1) + # v1 => *(&v2+delta) + p = C::CExpression[:&, v2] + p = C::CExpression[p, :+, [o1-o2]] + p = C::CExpression[p, C::Pointer.new(v1.type)] if v1.type != p.type.type + p = C::CExpression[:*, p] + walk_ce(scope) { |ce| + ce.lexpr = p if ce.lexpr == v1 + ce.rexpr = p if ce.rexpr == v1 + } + } + } + end + + # to be run with scope = function body with only CExpr/Decl/Label/Goto/IfGoto/Return, with correct variables types + # will transform += 1 to ++, inline them to prev/next statement ('++x; if (x)..' => 'if (++x)..') + # remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;') + # remove useless casts ('(int)i' with 'int i;' => 'i') + def optimize(scope) + optimize_code(scope) + optimize_vars(scope) + optimize_vars(scope) # 1st run may transform i = i+1 into i++ which second run may coalesce into if(i) + end + + # simplify cexpressions (char & 255, redundant casts, etc) + def optimize_code(scope) + return if forbid_optimize_code + + sametype = lambda { |t1, t2| + t1 = t1.untypedef + t2 = t2.untypedef + t1 = t1.pointed.untypedef if t1.pointer? and t1.pointed.untypedef.kind_of? C::Function + t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of? C::Function + t1 == t2 or + (t1.kind_of? C::Function and t2.kind_of? C::Function and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and + t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or + (t1.kind_of? C::BaseType and t1.integral? and t2.kind_of? C::BaseType and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or + (t1.pointer? and t2.pointer? and sametype[t1.type, t2.type]) + } + + # most of this is a CExpr#reduce + future_array = [] + walk_ce(scope, true) { |ce| + # (whatever)0 => 0 + if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 + ce.replace ce.rexpr + end + + # *&bla => bla if types ok + if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sametype[ce.rexpr.type.pointed, ce.rexpr.rexpr.type] + ce.replace C::CExpression[ce.rexpr.rexpr] + end + + # int x + 0xffffffff -> x-1 + if ce.lexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and [:+, :-, :'+=', :'-=', :'!=', :==, :>, :<, :>=, :<=].include? ce.op and + ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr)))-1 + ce.op = {:+ => :-, :- => :+, :'+=' => :'-=', :'-=' => :'+='}[ce.op] + ce.rexpr.rexpr = 1 + end + + # int *ptr; *(ptr + 4) => ptr[4] + if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer? + ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr + future_array << var.name + end + + # char x; x & 255 => x + if ce.op == :& and ce.lexpr and (ce.lexpr.type.integral? or ce.lexpr.type.pointer?) and ce.rexpr.kind_of? C::CExpression and + not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and m = (1 << (8*sizeof(ce.lexpr))) - 1 and + ce.rexpr.rexpr & m == m + ce.replace C::CExpression[ce.lexpr] + end + + # a + -b => a - b + if ce.op == :+ and ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :- and not ce.rexpr.lexpr + ce.op, ce.rexpr = :-, ce.rexpr.rexpr + end + + # (((int) i >> 31) & 1) => i < 0 + if ce.op == :& and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 and + ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :>> and ce.lexpr.rexpr.kind_of? C::CExpression and + not ce.lexpr.rexpr.op and ce.lexpr.rexpr.rexpr == sizeof(ce.lexpr.lexpr) * 8 - 1 + ce.replace C::CExpression[ce.lexpr.lexpr, :<, [0]] + end + + # a-b == 0 => a == b + if ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and [:==, :'!=', :<, :>, :<=, :>=].include? ce.op and + ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :- and ce.lexpr.lexpr + ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr + end + + # (a > 0) != 0 + if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and + [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op + ce.replace ce.lexpr + end + + # (a < b) != ( [(a < 0) == !(b < 0)] && [(a < 0) != (a < b)] ) => jl + # a true if !r => a<0 == b<0 or a>=0 => a>=0 or b>=0 + # a>=b => true if r => a<0 == b>=0 and a<0 => a<0 and b>=0 + + # x != (a && (b != x)) => [x && (!a || b)] || [!x && !(!a || b)] + if ce.op == :'!=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :< and ce.rexpr.kind_of? C::CExpression and + ce.rexpr.op == :'&&' and ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.op == :'!=' and + ce.rexpr.rexpr.rexpr == ce.lexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall } + x, a, b = ce.lexpr, ce.rexpr.lexpr, ce.rexpr.rexpr.lexpr + ce.replace C::CExpression[ [x, :'&&', [[:'!',a],:'||',b]] , :'||', [[:'!', x], :'&&', [:'!', [[:'!',a],:'||',b]]] ] + optimize_code(ce) + end + # (a != b) || a => a || b + if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :'!=' and ce.lexpr.lexpr == ce.rexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall } + ce.lexpr, ce.rexpr = ce.rexpr, ce.lexpr.rexpr + optimize_code(ce) + end + # (a=0 && b<0) || (a>=b) && (a>=0 && b<0) => (signed)a < (signed)b + if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.rexpr.kind_of? C::CExpression and ce.lexpr.op == :'&&' and ce.rexpr.op == :'&&' and + ce.lexpr.lexpr.kind_of? C::CExpression and ce.lexpr.lexpr.op == :< + a, b = ce.lexpr.lexpr.lexpr, ce.lexpr.lexpr.rexpr + if ce.lexpr.rexpr === C::CExpression[[a, :'>=', [0]], :'&&', [b, :'<', [0]]].negate and + ce.rexpr.lexpr === ce.lexpr.lexpr.negate and ce.rexpr.rexpr === ce.lexpr.rexpr.negate + ce.replace C::CExpression[a, :'<', b] + end + end + # a && 1 + if (ce.op == :'||' or ce.op == :'&&') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer + if ((ce.op == :'||' and ce.rexpr.rexpr == 0) or (ce.op == :'&&' and ce.rexpr.rexpr != 0)) + ce.replace C::CExpression[ce.lexpr] + elsif not walk_ce(ce) { |ce_| break true if ce.op == :funcall } # cannot wipe if sideeffect + ce.replace C::CExpression[[ce.op == :'||' ? 1 : 0]] + end + end + # (b < c || b >= c) + if (ce.op == :'||' or ce.op == :'&&') and C::CExpression.negate(ce.lexpr) == C::CExpression[ce.rexpr] + ce.replace C::CExpression[[(ce.op == :'||') ? 1 : 0]] + end + + # (a < b) | (a == b) => a <= b + if ce.op == :| and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :== and ce.lexpr.kind_of? C::CExpression and + (ce.lexpr.op == :< or ce.lexpr.op == :>) and ce.lexpr.lexpr == ce.rexpr.lexpr and ce.lexpr.rexpr == ce.rexpr.rexpr + ce.op = {:< => :<=, :> => :>=}[ce.lexpr.op] + ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr + end + + # a == 0 => !a + if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 + ce.lexpr, ce.op, ce.rexpr = nil, :'!', ce.lexpr + end + + if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer + ce.replace C::CExpression[[ce.rexpr.rexpr == 0 ? 1 : 0]] + end + + # !(bool) => bool + if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and [:'==', :'!=', :<, :>, :<=, :>=, :'||', :'&&', :'!'].include? ce.rexpr.op + ce.replace ce.rexpr.negate + end + + # (foo)(bar)x => (foo)x + if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression + ce.rexpr = ce.rexpr.rexpr + end + + # &struct.1stmember => &struct + if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :'.' and s = ce.rexpr.lexpr.type and + s.kind_of? C::Union and s.offsetof(@c_parser, ce.rexpr.rexpr) == 0 + ce.rexpr = ce.rexpr.lexpr + ce.type = C::Pointer.new(ce.rexpr.type) + end + + # (1stmember*)structptr => &structptr->1stmember + if not ce.op and ce.type.pointer? and not ce.type.pointed.void? and ce.rexpr.kind_of? C::Typed and ce.rexpr.type.pointer? and + s = ce.rexpr.type.pointed.untypedef and s.kind_of? C::Union and ce.type.pointed.untypedef != s + ce.rexpr = C::CExpression[structoffset(s, ce.rexpr, 0, sizeof(ce.type.pointed))] + #ce.replace ce.rexpr if not ce.type.pointed.untypedef.kind_of? C::Function or (ce.rexpr.type.pointer? and + #ce.rexpr.type.pointed.untypedef.kind_of? C::Function) # XXX ugly + # int32* v1 = (int32*)pstruct; + # z = v1+4 if v1 is not cast, the + is invalid (sizeof pointed changes) + # TODO when finding type of pstruct, set type of v1 accordingly + end + + # (&foo)->bar => foo.bar + if ce.op == :'->' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :& and not ce.lexpr.lexpr + ce.lexpr = ce.lexpr.rexpr + ce.op = :'.' + end + + # (foo)bla => bla if bla of type foo + if not ce.op and ce.rexpr.kind_of? C::Typed and sametype[ce.type, ce.rexpr.type] + ce.replace C::CExpression[ce.rexpr] + end + if ce.lexpr.kind_of? C::CExpression and not ce.lexpr.op and ce.lexpr.rexpr.kind_of? C::Variable and ce.lexpr.type == ce.lexpr.rexpr.type + ce.lexpr = ce.lexpr.rexpr + end + + if ce.op == :'=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :* and not ce.lexpr.lexpr and ce.lexpr.rexpr.kind_of? C::CExpression and + not ce.lexpr.rexpr.op and ce.lexpr.rexpr.type.pointer? and ce.lexpr.rexpr.type.pointed != ce.rexpr.type + ce.lexpr.rexpr.type = C::Pointer.new(ce.rexpr.type) + optimize_code(ce.lexpr) + end + } + + # if there is a ptr[4], change all *ptr to ptr[0] for consistency + # do this after the first pass, which may change &*ptr to ptr + walk_ce(scope) { |ce| + if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::Variable and future_array.include? ce.rexpr.name + ce.lexpr, ce.op, ce.rexpr = ce.rexpr, :'[]', C::CExpression[0] + end + } if not future_array.empty? + + # if (x != 0) => if (x) + walk(scope) { |st| + if st.kind_of? C::If and st.test.kind_of? C::CExpression and st.test.op == :'!=' and + st.test.rexpr.kind_of? C::CExpression and not st.test.rexpr.op and st.test.rexpr.rexpr == 0 + st.test = C::CExpression[st.test.lexpr] + end + } + end + + # checks if an expr has sideeffects (funcall, var assignment, mem dereference, use var out of scope if specified) + def sideeffect(exp, scope=nil) + case exp + when nil, ::Numeric, ::String; false + when ::Array; exp.any? { |_e| sideeffect _e, scope } + when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include? :volatile + when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or AssignOp.include?(exp.op) or + sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope) + else true # failsafe + end + end + + # converts C code to a graph of cexprs (nodes = cexprs, edges = codepaths) + # returns a CGraph + class CGraph + # exprs: label => [exprs], to: label => [labels], block: label => are exprs standalone (vs If#test), start: 1st label + attr_accessor :exprs, :to, :block, :start, :to_optim, :from_optim + end + def c_to_graph(st) + g = CGraph.new + g.exprs = {} # label => [exprs] + g.to = {} # label => [labels] + g.block = {} # label => is label in a block? (vs If#test) + anon_label = 0 # when no label is there, use anon_label++ + # converts C code to a graph of codepath of cexprs + to_graph = lambda { |stmt, l_cur, l_after, l_cont, l_break| + case stmt + when C::Label; g.to[l_cur] = [stmt.name] ; g.to[stmt.name] = [l_after] + when C::Goto; g.to[l_cur] = [stmt.target] + when C::Continue; g.to[l_cur] = [l_cont] + when C::Break; g.to[l_cur] = [l_break] + when C::CExpression + g.exprs[l_cur] = [stmt] + g.to[l_cur] = [l_after] + when C::Return + g.exprs[l_cur] = [stmt.value] if stmt.value + g.to[l_cur] = [] + when C::Block + to_graph[stmt.statements, l_cur, l_after, l_cont, l_break] + when ::Array + g.exprs[l_cur] = [] + g.block[l_cur] = true + stmt.each_with_index { |s, i| + case s + when C::Declaration + when C::CExpression + g.exprs[l_cur] << s + else + l = anon_label += 1 + ll = anon_label += 1 + g.to[l_cur] = [l] + g.block[l_cur] = true + to_graph[stmt[i], l, ll, l_cont, l_break] + l_cur = ll + g.exprs[l_cur] = [] + end + } + g.to[l_cur] = [l_after].compact + when C::If + g.exprs[l_cur] = [stmt.test] + lt = anon_label += 1 + to_graph[stmt.bthen, lt, l_after, l_cont, l_break] + le = anon_label += 1 + to_graph[stmt.belse, le, l_after, l_cont, l_break] + g.to[l_cur] = [lt, le] + when C::While, C::DoWhile + la = anon_label += 1 + if stmt.kind_of? C::DoWhile + lt, lb = la, l_cur + else + lt, lb = l_cur, la + end + g.exprs[lt] = [stmt.test] + g.to[lt] = [lb, l_after] + to_graph[stmt.body, lb, lt, lt, l_after] + when C::Asm, nil; g.to[l_cur] = [l_after] + else puts "to_graph unhandled #{stmt.class}: #{stmt}" if $VERBOSE + end + } + + g.start = anon_label + to_graph[st, g.start, nil, nil, nil] + + # optimize graph + g.to_optim = {} + g.to.each { |k, v| g.to_optim[k] = v.uniq } + g.exprs.delete_if { |k, v| v == [] } + g.to_optim.delete_if { |k, v| + if v.length == 1 and not g.exprs[k] and v != [k] + g.to_optim.each_value { |t| if i = t.index(k) ; t[i] = v.first ; end } + true + elsif v.length == 0 and not g.exprs[k] + g.to_optim.each_value { |t| t.delete k } + true + end + } + + g.from_optim = {} + g.to_optim.each { |k, v| v.each { |t| (g.from_optim[t] ||= []) << k } } + + g + end + + # dataflow optimization + # condenses expressions (++x; if (x) => if (++x)) + # remove local var assignment (x = 1; f(x); x = 2; g(x); => f(1); g(2); etc) + def optimize_vars(scope) + return if forbid_optimize_dataflow + + g = c_to_graph(scope) + + # walks a cexpr in evaluation order (not strictly, but this is not strictly defined anyway..) + # returns the first subexpr to read var in ce + # returns :write if var is rewritten + # returns nil if var not read + # may return a cexpr var += 2 + find_next_read_ce = lambda { |ce_, var| + walk_ce(ce_, true) { |ce| + case ce.op + when :funcall + break ce if ce.lexpr == var or ce.rexpr.find { |a| a == var } + when :'=' + # a=a / a=a+1 => yield a, not :write + break ce if ce.rexpr == var + break :write if ce.lexpr == var + else + break ce if ce.lexpr == var or ce.rexpr == var + end + } + } + + # badlabels is a list of labels that may be reached without passing through the first invocation block + find_next_read_rec = lambda { |label, idx, var, done, badlabels| + next if done.include? label + done << label if idx == 0 + + idx += 1 while ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var] + next ret if ret + + to = g.to_optim[label].to_a.map { |t| + break [:split] if badlabels.include? t + find_next_read_rec[t, 0, var, done, badlabels] + }.compact + + tw = to - [:write] + if to.include? :split or tw.length > 1 + :split + elsif tw.length == 1 + tw.first + elsif to.include? :write + :write + end + } + # return the previous subexpr reading var with no fwd path to another reading (otherwise split), see loop comment for reason + find_next_read = nil + find_prev_read_rec = lambda { |label, idx, var, done| + next if done.include? label + done << label if idx == g.exprs[label].length-1 + + idx -= 1 while idx >= 0 and ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var] + if ret.kind_of? C::CExpression + fwchk = find_next_read[label, idx+1, var] + ret = fwchk if not fwchk.kind_of? C::CExpression + end + next ret if ret + + from = g.from_optim[label].to_a.map { |f| + find_prev_read_rec[f, g.exprs[f].to_a.length-1, var, done] + }.compact + + next :split if from.include? :split + fw = from - [:write] + if fw.length == 1 + fw.first + elsif fw.length > 1 + :split + elsif from.include? :write + :write + end + } + + # list of labels reachable without using a label + badlab = {} + build_badlabel = lambda { |label| + next if badlab[label] + badlab[label] = [] + todo = [g.start] + while l = todo.pop + next if l == label or badlab[label].include? l + badlab[label] << l + todo.concat g.to_optim[l].to_a + end + } + + # returns the next subexpr where var is read + # returns :write if var is written before being read + # returns :split if the codepath splits with both subpath reading or codepath merges with another + # returns nil if var is never read + # idx is the index of the first cexpr at g.exprs[label] to look at + find_next_read = lambda { |label, idx, var| + find_next_read_rec[label, idx, var, [], []] + } + find_prev_read = lambda { |label, idx, var| + find_prev_read_rec[label, idx, var, []] + } + # same as find_next_read, but returns :split if there exist a path from g.start to the read without passing through label + find_next_read_bl = lambda { |label, idx, var| + build_badlabel[label] + find_next_read_rec[label, idx, var, [], badlab[label]] + } + + # walk each node, optimize data accesses there + # replace no longer useful exprs with CExpr[nil, nil, nil], those are wiped later. + g.exprs.each { |label, exprs| + next if not g.block[label] + i = 0 + while i < exprs.length + e = exprs[i] + i += 1 + + # TODO x = x + 1 => x += 1 => ++x here, move all other optimizations after (in optim_code) + # needs also int & 0xffffffff -> int, *&var etc (decomp_type? optim_type?) + if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of? C::Variable and + scope.symbol[v.name] and not v.type.qualifier.to_a.include? :volatile + next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of? C::CExpression) and + !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression) + next if oe.op == :& and not oe.lexpr # no &(++eax) + + # merge pre/postincrement into next/prev var usage + # find_prev_read must fwd check when it finds something, to avoid + # while(x) x++; return x; to be converted to while(x++); return x; (return wrong value) + case oe.op + when e.op + # bla(i--); --i bla(--i); --i ++i; bla(i++) => ignore + next if pos == :pre or oe.lexpr + # ++i; bla(++i) => bla(i += 2) + oe.lexpr = oe.rexpr + oe.op = ((oe.op == :'++') ? :'+=' : :'-=') + oe.rexpr = C::CExpression[2] + + when :'++', :'--' # opposite of e.op + if (pos == :post and not oe.lexpr) or (pos == :pre and not oe.rexpr) + # ++i; bla(--i) => bla(i) + # bla(i--); ++i => bla(i) + oe.op = nil + elsif pos == :post + # ++i; bla(i--) => bla(i+1) + oe.op = ((oe.op == :'++') ? :- : :+) + oe.rexpr = C::CExpression[1] + elsif pos == :pre + # bla(--i); ++i => bla(i-1) + oe.lexpr = oe.rexpr + oe.op = ((oe.op == :'++') ? :+ : :-) + oe.rexpr = C::CExpression[1] + end + when :'+=', :'-=' + # TODO i++; i += 4 => i += 5 + next + when *AssignOp + next # ++i; i |= 4 => ignore + else + if pos == :post and v == oe.lexpr; oe.lexpr = C::CExpression[e.op, v] + elsif pos == :post and v == oe.rexpr; oe.rexpr = C::CExpression[e.op, v] + elsif pos == :prev and v == oe.rexpr; oe.rexpr = C::CExpression[v, e.op] + elsif pos == :prev and v == oe.lexpr; oe.lexpr = C::CExpression[v, e.op] + else raise 'foobar' # find_dir_read failed + end + end + + i -= 1 + exprs.delete_at(i) + e.lexpr = e.op = e.rexpr = nil + + + elsif e.op == :'=' and v = e.lexpr and v.kind_of? C::Variable and scope.symbol[v.name] and + not v.type.qualifier.to_a.include? :volatile and not find_next_read_ce[e.rexpr, v] + + # reduce trivial static assignments + if (e.rexpr.kind_of? C::CExpression and iv = e.rexpr.reduce(@c_parser) and iv.kind_of? ::Integer) or + (e.rexpr.kind_of? C::CExpression and e.rexpr.op == :& and not e.rexpr.lexpr and e.rexpr.lexpr.kind_of? C::Variable) or + (e.rexpr.kind_of? C::Variable and e.rexpr.type.kind_of? C::Array) + rewritten = false + readers = [] + discard = [e] + g.exprs.each { |l, el| + el.each_with_index { |ce, ci| + if ce_write(ce, v) and [label, i-1] != [l, ci] + if ce == e + discard << ce + else + rewritten = true + break + end + elsif ce_read(ce, v) + if walk_ce(ce) { |_ce| break true if _ce.op == :& and not _ce.lexpr and _ce.rexpr == v } + # i = 2 ; j = &i =!> j = &2 + rewritten = true + break + end + readers << ce + end + } if not rewritten + } + if not rewritten + ce_patch(readers, v, C::CExpression[iv || e.rexpr]) + discard.each { |d| d.lexpr = d.op = d.rexpr = nil } + next + end + end + + case nr = find_next_read[label, i, v] + when C::CExpression + # read in one place only, try to patch rexpr in there + r = e.rexpr + + # must check for conflicts (x = y; y += 1; foo(x) =!> foo(y)) + # XXX x = a[1]; *(a+1) = 28; foo(x)... + isfunc = false + depend_vars = [] + walk_ce(C::CExpression[r]) { |ce| + isfunc = true if ce.op == :func and (not ce.lexpr.kind_of? C::Variable or + not ce.lexpr.has_attribute('pure')) # XXX is there a C attr for func depending only on staticvars+param ? + depend_vars << ce.lexpr if ce.lexpr.kind_of? C::Variable + depend_vars << ce.rexpr if ce.rexpr.kind_of? C::Variable and (ce.lexpr or ce.op != :&) # a = &v; v = 12; func(a) => func(&v) + depend_vars << ce if ce.lvalue? + depend_vars.concat(ce.rexpr.grep(C::Variable)) if ce.rexpr.kind_of? ::Array + } + depend_vars.uniq! + + # XXX x = 1; if () { x = 2; } foo(x) =!> foo(1) (find_next_read will return this) + # we'll just redo a find_next_read like + # XXX b = &a; a = 1; *b = 2; foo(a) unhandled & generate bad C + l_l = label + l_i = i + while g.exprs[l_l].to_a.each_with_index { |ce_, n_i| + next if n_i < l_i + # count occurences of read v in ce_ + cnt = 0 + bad = false + walk_ce(ce_) { |ce| + case ce.op + when :funcall + bad = true if isfunc + ce.rexpr.each { |a| cnt += 1 if a == v } + cnt += 1 if ce.lexpr == v + when :'=' + bad = true if depend_vars.include? ce.lexpr + cnt += 1 if ce.rexpr == v + else + bad = true if (ce.op == :'++' or ce.op == :'--') and depend_vars.include? ce.rexpr + bad = true if AssignOp.include? ce.op and depend_vars.include? ce.lexpr + cnt += 1 if ce.lexpr == v + cnt += 1 if ce.rexpr == v + end + } + case cnt + when 0 + break if bad + next + when 1 # good + break if e.complexity > 10 and ce_.complexity > 3 # try to keep the C readable + # x = 1; y = x; z = x; => cannot suppress x + nr = find_next_read[l_l, n_i+1, v] + break if (nr.kind_of? C::CExpression or nr == :split) and not walk_ce(ce_) { |ce| break true if ce.op == :'=' and ce.lexpr == v } + else break # a = 1; b = a + a => fail + end + + # TODO XXX x = 1; y = x; z = x; + res = walk_ce(ce_, true) { |ce| + case ce.op + when :funcall + if ce.rexpr.to_a.each_with_index { |a,i_| + next if a != v + ce.rexpr[i_] = r + break :done + } == :done + break :done + elsif ce.lexpr == v + ce.lexpr = r + break :done + elsif isfunc + break :fail + end + when *AssignOp + break :fail if not ce.lexpr and depend_vars.include? ce.rexpr # ++depend + if ce.rexpr == v + ce.rexpr = r + break :done + elsif ce.lexpr == v or depend_vars.include? ce.lexpr + break :fail + end + else + break :fail if ce.op == :& and not ce.lexpr and ce.rexpr == v + if ce.lexpr == v + ce.lexpr = r + break :done + elsif ce.rexpr == v + ce_.type = r.type if not ce_.op and ce_.rexpr == v # return (int32)eax + ce.rexpr = r + break :done + end + end + } + case res + when :done + i -= 1 + exprs.delete_at(i) + e.lexpr = e.op = e.rexpr = nil + break + when :fail + break + end + } + # ignore branches that will never reuse v + may_to = g.to_optim[l_l].find_all { |to| find_next_read[to, 0, v].kind_of? C::CExpression } + if may_to.length == 1 and to = may_to.first and to != l_l and g.from_optim[to] == [l_l] + l_i = 0 + l_l = to + else break + end + end + + when nil, :write + # useless assignment (value never read later) + # XXX foo = &bar; bar = 12; baz(*foo) + e.replace(C::CExpression[e.rexpr]) + # remove sideeffectless subexprs + loop do + case e.op + when :funcall, *AssignOp + else + l = (e.lexpr.kind_of? C::CExpression and sideeffect(e.lexpr)) + r = (e.rexpr.kind_of? C::CExpression and sideeffect(e.rexpr)) + if l and r # could split... + elsif l + e.replace(e.lexpr) + next + elsif r + e.replace(e.rexpr) + next + else # remove the assignment altogether + i -= 1 + exprs.delete_at(i) + e.lexpr = e.op = e.rexpr = nil + end + end + break + end + end + end + end + } + + # wipe cexprs marked in the previous step + walk(scope) { |st| + next if not st.kind_of? C::Block + st.statements.delete_if { |e| e.kind_of? C::CExpression and not e.lexpr and not e.op and not e.rexpr } + } + + # reoptimize cexprs + walk_ce(scope, true) { |ce| + # redo some simplification that may become available after variable propagation + # int8 & 255 => int8 + if ce.op == :& and ce.lexpr and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr))) - 1 + ce.replace C::CExpression[ce.lexpr] + end + + # int *ptr; *(ptr + 4) => ptr[4] + if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer? + ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr + end + + # useless casts + if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and (ce.rexpr.rexpr.kind_of? C::CExpression or + (ce.type.pointer? and ce.rexpr.rexpr == 0 and not ce.type.pointed.untypedef.kind_of? C::Union)) # keep ((struct*)0)->memb + ce.rexpr = ce.rexpr.rexpr + end + if not ce.op and ce.rexpr.kind_of? C::CExpression and (ce.type == ce.rexpr.type or (ce.type.integral? and ce.rexpr.type.integral?)) + ce.replace ce.rexpr + end + # useless casts (type)*((oeua)Ptype) + if not ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of? C::CExpression and not ce.rexpr.rexpr.op and + p = ce.rexpr.rexpr.rexpr and p.kind_of? C::Typed and p.type.pointer? and ce.type == p.type.pointed + ce.op = ce.rexpr.op + ce.rexpr = ce.rexpr.rexpr.rexpr + end + # (a > 0) != 0 + if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and + [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op + ce.replace ce.lexpr + end + # a == 0 => !a + if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 + ce.replace C::CExpression[:'!', ce.lexpr] + end + # !(int)a => !a + if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression + ce.rexpr = ce.rexpr.rexpr + end + # (int)a < (int)b => a < b TODO uint <-> int + if [:<, :<=, :>, :>=].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.lexpr.kind_of? C::CExpression and not ce.rexpr.op and not ce.lexpr.op and + ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.type.pointer? and ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.type.pointer? + ce.rexpr = ce.rexpr.rexpr + ce.lexpr = ce.lexpr.rexpr + end + + # a & 3 & 1 + while (ce.op == :& or ce.op == :|) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and + ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == ce.op and ce.lexpr.lexpr and + ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.rexpr.kind_of? ::Integer + ce.lexpr, ce.rexpr.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr.rexpr.send(ce.op, ce.rexpr.rexpr) + end + + # x = x | 4 => x |= 4 + if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and [:+, :-, :*, :/, :|, :&, :^, :>>, :<<].include? ce.rexpr.op and ce.rexpr.lexpr == ce.lexpr + ce.op = (ce.rexpr.op.to_s + '=').to_sym + ce.rexpr = ce.rexpr.rexpr + end + + # x += 1 => ++x + if (ce.op == :'+=' or ce.op == :'-=') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 + ce.lexpr, ce.op, ce.rexpr = nil, {:'+=' => :'++', :'-=' => :'--'}[ce.op], ce.lexpr + end + + # --x+1 => x-- + if (ce.op == :+ or ce.op == :-) and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == {:+ => :'--', :- => :'++'}[ce.op] and + ce.lexpr.rexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 + ce.lexpr, ce.op, ce.rexpr = ce.lexpr.rexpr, ce.lexpr.op, nil + end + } + end + + def remove_unreferenced_vars(scope) + used = {} + walk_ce(scope) { |ce| + # remove unreferenced local vars + used[ce.rexpr.name] = true if ce.rexpr.kind_of? C::Variable + used[ce.lexpr.name] = true if ce.lexpr.kind_of? C::Variable + ce.rexpr.each { |v| used[v.name] = true if v.kind_of? C::Variable } if ce.rexpr.kind_of?(::Array) + } + unused = scope.symbol.keys.find_all { |n| not used[n] } + unused.each { |v| scope.symbol[v].add_attribute 'unused' } # fastcall args need it + scope.statements.delete_if { |sm| sm.kind_of? C::Declaration and unused.include? sm.var.name } + scope.symbol.delete_if { |n, v| unused.include? n } + end + + def finalize + optimize_global + true + end + + def optimize_global + # check all global vars (pointers to global data) + tl = @c_parser.toplevel + vars = tl.symbol.keys.find_all { |k| tl.symbol[k].kind_of? C::Variable and not tl.symbol[k].type.kind_of? C::Function } + countref = Hash.new(0) + + walk_ce(tl) { |ce| + # XXX int foo; void bar() { int foo; } => false negative + countref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable + countref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable + } + + vars.delete_if { |v| countref[v] == 0 } + countref.delete_if { |k, v| not vars.include? k } + + # by default globals are C::Arrays + # if all references are *foo, dereference the var type + # TODO allow foo to appear (change to &foo) (but still disallow casts/foo+12 etc) + countderef = Hash.new(0) + walk_ce(tl) { |ce| + if ce.op == :* and not ce.lexpr + r = ce.rexpr + elsif ce.op == :'->' + r = C::CExpression[ce.lexpr] + else next + end + # compare type.type cause var is an Array and the cast is a Pointer + countderef[r.rexpr.name] += 1 if r.kind_of? C::CExpression and not r.op and r.rexpr.kind_of? C::Variable and + sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil + } + vars.each { |n| + if countref[n] == countderef[n] + v = tl.symbol[n] + target = C::CExpression[:*, [v]] + v.type = v.type.type + v.initializer = v.initializer.first if v.initializer.kind_of? ::Array + walk_ce(tl) { |ce| + if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v] + ce.op = :'.' + elsif ce.lexpr == target + ce.lexpr = v + end + ce.rexpr = v if ce.rexpr == target + ce.lexpr, ce.op, ce.rexpr = nil, nil, v if ce == target + } + end + } + + # if a global var appears only in one function, make it a static variable + tl.statements.each { |st| + next if not st.kind_of? C::Declaration or not st.var.type.kind_of? C::Function or not scope = st.var.initializer + localcountref = Hash.new(0) + walk_ce(scope) { |ce| + localcountref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable + localcountref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable + } + + vars.delete_if { |n| + next if scope.symbol[n] + next if localcountref[n] != countref[n] + v = tl.symbol.delete(n) + tl.statements.delete_if { |d| d.kind_of? C::Declaration and d.var.name == n } + + if countref[n] == 1 and v.initializer.kind_of? C::CExpression and v.initializer.rexpr.kind_of? String + walk_ce(scope) { |ce| + if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == n + if not ce.op + ce.replace v.initializer + else + ce.rexpr = v.initializer + end + elsif ce.lexpr.kind_of? C::Variable and ce.lexpr.name == n + ce.lexpr = v.initializer + end + } + else + v.storage = :static + scope.symbol[v.name] = v + scope.statements.unshift C::Declaration.new(v) + end + + true + } + } + end + + # reorder statements to put decl first, move assignments to decl, move args to func prototype + def cleanup_var_decl(scope, func) + scope.symbol.each_value { |v| v.type = C::BaseType.new(:int) if v.type.void? } + + args = func.type.args + decl = [] + scope.statements.delete_if { |sm| + next if not sm.kind_of? C::Declaration + if sm.var.stackoff.to_i > 0 and sm.var.name !~ /_a(\d+)$/ # aliased vars: use 1st domain only + args << sm.var + else + decl << sm + end + true + } + + # move trivial affectations to initialiser + # XXX a = 1 ; b = a ; a = 2 + go = true # break from delete_if does not delete.. + scope.statements.delete_if { |st| + if go and st.kind_of? C::CExpression and st.op == :'=' and st.rexpr.kind_of? C::CExpression and not st.rexpr.op and + st.rexpr.rexpr.kind_of? ::Integer and st.lexpr.kind_of? C::Variable and scope.symbol[st.lexpr.name] + st.lexpr.initializer = st.rexpr + else + go = false + end + } + + # reorder declarations + scope.statements[0, 0] = decl.sort_by { |sm| [-sm.var.stackoff.to_i, sm.var.name] } + + # ensure arglist has no hole (create&add unreferenced args) + func.type.args = [] + argoff = @c_parser.typesize[:ptr] + args.sort_by { |sm| sm.stackoff.to_i }.each { |a| + # XXX misalignment ? + if not curoff = a.stackoff + func.type.args << a # __fastcall + next + end + while curoff > argoff + wantarg = C::Variable.new + wantarg.name = scope.decompdata[:stackoff_name][argoff] || stackoff_to_varname(argoff) + wantarg.type = C::BaseType.new(:int) + wantarg.attributes = ['unused'] + func.type.args << wantarg + scope.symbol[wantarg.name] = wantarg + argoff += @c_parser.typesize[:ptr] + end + func.type.args << a + argoff += @c_parser.typesize[:ptr] + } + end + + # rename local variables from subfunc arg names + def rename_variables(scope) + funcs = [] + cntrs = [] + cmpi = [] + + walk_ce(scope) { |ce| + funcs << ce if ce.op == :funcall + cntrs << (ce.lexpr || ce.rexpr) if ce.op == :'++' + cmpi << ce.lexpr if [:<, :>, :<=, :>=, :==, :'!='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.kind_of? ::Integer + } + + rename = lambda { |var, name| + var = var.rexpr if var.kind_of? C::CExpression and not var.op + next if not var.kind_of? C::Variable or not scope.symbol[var.name] or not name + next if (var.name !~ /^(var|arg)_/ and not var.storage == :register) or not scope.symbol[var.name] or name =~ /^(var|arg)_/ + s = scope.symbol_ancestors + n = name + i = 0 + n = name + "#{i+=1}" while s[n] + scope.symbol[n] = scope.symbol.delete(var.name) + var.name = n + } + + funcs.each { |ce| + next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function + ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| rename[a, fa.name] if fa } + } + funcs.each { |ce| + next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function + ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| + next if not a.kind_of? C::CExpression or a.op != :& or a.lexpr + next if not fa or not fa.name + rename[a.rexpr, fa.name.sub(/^l?p/, '')] + } + } + (cntrs & cmpi).each { |v| rename[v, 'cntr'] } + end + + # yield each CExpr member (recursive, allows arrays, order: self(!post), lexpr, rexpr, self(post)) + # if given a non-CExpr, walks it until it finds a CExpr to yield + def walk_ce(ce, post=false, &b) + case ce + when C::CExpression + yield ce if not post + walk_ce(ce.lexpr, post, &b) + walk_ce(ce.rexpr, post, &b) + yield ce if post + when ::Array + ce.each { |ce_| walk_ce(ce_, post, &b) } + when C::Statement + case ce + when C::Block; walk_ce(ce.statements, post, &b) + when C::If + walk_ce(ce.test, post, &b) + walk_ce(ce.bthen, post, &b) + walk_ce(ce.belse, post, &b) if ce.belse + when C::While, C::DoWhile + walk_ce(ce.test, post, &b) + walk_ce(ce.body, post, &b) + when C::Return + walk_ce(ce.value, post, &b) if ce.value + end + when C::Declaration + walk_ce(ce.var.initializer, post, &b) if ce.var.initializer + end + nil + end + + # yields each statement (recursive) + def walk(scope, post=false, &b) + case scope + when ::Array; scope.each { |s| walk(s, post, &b) } + when C::Statement + yield scope if not post + case scope + when C::Block; walk(scope.statements, post, &b) + when C::If + yield scope.test + walk(scope.bthen, post, &b) + walk(scope.belse, post, &b) if scope.belse + when C::While, C::DoWhile + yield scope.test + walk(scope.body, post, &b) + when C::Return + yield scope.value + end + yield scope if post + when C::Declaration + walk(scope.var.initializer, post, &b) if scope.var.initializer + end + end + + # forwards to @c_parser, handles cast to Array (these should not happen btw...) + def sizeof(var, type=nil) + var, type = nil, var if var.kind_of? C::Type and not type + type ||= var.type + return @c_parser.typesize[:ptr] if type.kind_of? C::Array and not var.kind_of? C::Variable + @c_parser.sizeof(var, type) rescue -1 + end end end diff --git a/lib/metasm/metasm/disassemble.rb b/lib/metasm/metasm/disassemble.rb index 01a91eb803..3b0a13d6ef 100644 --- a/lib/metasm/metasm/disassemble.rb +++ b/lib/metasm/metasm/disassemble.rb @@ -10,2210 +10,2210 @@ require 'metasm/decode' module Metasm # holds information for decoded instructions: the original opcode, a pointer to the InstructionBlock, etc class DecodedInstruction - # the instance of InstructionBlock this di is into - attr_accessor :block - # our offset (in bytes) from the start of the block, used only for hexdump - attr_accessor :block_offset - # the address of the instruction's first byte in memory - attr_accessor :address - # the disassembled data - attr_accessor :instruction, :opcode - # our, length in bytes - attr_accessor :bin_length - # array of arbitrary strings - attr_accessor :comment - # a cache of the binding used by the backtracker to emulate this instruction - attr_accessor :backtrace_binding + # the instance of InstructionBlock this di is into + attr_accessor :block + # our offset (in bytes) from the start of the block, used only for hexdump + attr_accessor :block_offset + # the address of the instruction's first byte in memory + attr_accessor :address + # the disassembled data + attr_accessor :instruction, :opcode + # our, length in bytes + attr_accessor :bin_length + # array of arbitrary strings + attr_accessor :comment + # a cache of the binding used by the backtracker to emulate this instruction + attr_accessor :backtrace_binding - # create a new DecodedInstruction with an Instruction whose cpu is the argument - # can take an existing Instruction as argument - def initialize(arg, addr=nil) - case arg - when Instruction - @instruction = arg - @opcode = @instruction.cpu.opcode_list.find { |op| op.name == @instruction.opname } if @instruction.cpu - else @instruction = Instruction.new(arg) - end - @bin_length = 0 - @address = addr if addr - end + # create a new DecodedInstruction with an Instruction whose cpu is the argument + # can take an existing Instruction as argument + def initialize(arg, addr=nil) + case arg + when Instruction + @instruction = arg + @opcode = @instruction.cpu.opcode_list.find { |op| op.name == @instruction.opname } if @instruction.cpu + else @instruction = Instruction.new(arg) + end + @bin_length = 0 + @address = addr if addr + end - def next_addr=(a) @next_addr = a end - def next_addr - (@next_addr ||= nil) || (address + @bin_length) if address - end + def next_addr=(a) @next_addr = a end + def next_addr + (@next_addr ||= nil) || (address + @bin_length) if address + end - def show - if block - bin = @block.edata.data[@block.edata_ptr+@block_offset, @bin_length].unpack('C*').map { |c| '%02x' % c }.join - if @bin_length > 12 - bin = bin[0, 20] + "..<+#{@bin_length-10}>" - end - " #{@instruction.to_s.ljust(44)} ; @#{Expression[address]} #{bin} #{@comment.sort[0,6].join(' ') if comment}" - else - "#{@instruction}#{' ; ' + @comment.join(' ') if comment}" - end - end + def show + if block + bin = @block.edata.data[@block.edata_ptr+@block_offset, @bin_length].unpack('C*').map { |c| '%02x' % c }.join + if @bin_length > 12 + bin = bin[0, 20] + "..<+#{@bin_length-10}>" + end + " #{@instruction.to_s.ljust(44)} ; @#{Expression[address]} #{bin} #{@comment.sort[0,6].join(' ') if comment}" + else + "#{@instruction}#{' ; ' + @comment.join(' ') if comment}" + end + end - include Renderable - def render - ret = [] - ret << Expression[address] << ' ' if address - ret << @instruction - ret << ' ; ' << @comment if comment - ret - end + include Renderable + def render + ret = [] + ret << Expression[address] << ' ' if address + ret << @instruction + ret << ' ; ' << @comment if comment + ret + end - def add_comment(c) - @comment ||= [] - @comment |= [c] - end + def add_comment(c) + @comment ||= [] + @comment |= [c] + end - # returns a copy of the DecInstr, with duplicated #instruction ("deep_copy") - def dup - new = super() - new.instruction = @instruction.dup - new - end + # returns a copy of the DecInstr, with duplicated #instruction ("deep_copy") + def dup + new = super() + new.instruction = @instruction.dup + new + end end # holds information on a backtracked expression near begin and end of instruction blocks (#backtracked_for) class BacktraceTrace - # address of the instruction in the block from which rebacktrace should start (use with from_subfuncret bool) - # address is nil if the backtrace is from block start - # exclude_instr is a bool saying if the backtrace should start at address or at the preceding instruction - # these are optional: if absent, expr is to be rebacktracked when a new codepath arrives at the beginning of the block - attr_accessor :address, :from_subfuncret, :exclude_instr - # address of the instruction that initiated the backtrace - attr_accessor :origin - # the Expression to backtrace at this point - attr_accessor :expr - # the original backtracked Expression - attr_accessor :orig_expr - # length of r/w xref (in bytes) - attr_accessor :len - # :r/:w/:x - attr_accessor :type - # bool: true if this maps to a :x that should not have a from when resolved - attr_accessor :detached - # maxdepth at the point of the object creation - attr_accessor :maxdepth + # address of the instruction in the block from which rebacktrace should start (use with from_subfuncret bool) + # address is nil if the backtrace is from block start + # exclude_instr is a bool saying if the backtrace should start at address or at the preceding instruction + # these are optional: if absent, expr is to be rebacktracked when a new codepath arrives at the beginning of the block + attr_accessor :address, :from_subfuncret, :exclude_instr + # address of the instruction that initiated the backtrace + attr_accessor :origin + # the Expression to backtrace at this point + attr_accessor :expr + # the original backtracked Expression + attr_accessor :orig_expr + # length of r/w xref (in bytes) + attr_accessor :len + # :r/:w/:x + attr_accessor :type + # bool: true if this maps to a :x that should not have a from when resolved + attr_accessor :detached + # maxdepth at the point of the object creation + attr_accessor :maxdepth - def initialize(expr, origin, orig_expr, type, len=nil, maxdepth=nil) - @expr, @origin, @orig_expr, @type = expr, origin, orig_expr, type - @len = len if len - @maxdepth = maxdepth if maxdepth - end + def initialize(expr, origin, orig_expr, type, len=nil, maxdepth=nil) + @expr, @origin, @orig_expr, @type = expr, origin, orig_expr, type + @len = len if len + @maxdepth = maxdepth if maxdepth + end - def hash ; [origin, expr].hash ; end - def eql?(o) - o.class == self.class and - [ address, from_subfuncret, exclude_instr, origin, orig_expr, len, type, detached] == - [o.address, o.from_subfuncret, o.exclude_instr, o.origin, o.orig_expr, o.len, o.type, o.detached] - end - alias == eql? + def hash ; [origin, expr].hash ; end + def eql?(o) + o.class == self.class and + [ address, from_subfuncret, exclude_instr, origin, orig_expr, len, type, detached] == + [o.address, o.from_subfuncret, o.exclude_instr, o.origin, o.orig_expr, o.len, o.type, o.detached] + end + alias == eql? end # a cross-reference, tracks read/write/execute memory accesses by decoded instructions class Xref - # :r/:w/:x - attr_accessor :type - # length of r/w (in bytes) - attr_accessor :len - # address of the instruction responsible of the xref - attr_accessor :origin - # XXX list of instructions intervening in the backtrace ? + # :r/:w/:x + attr_accessor :type + # length of r/w (in bytes) + attr_accessor :len + # address of the instruction responsible of the xref + attr_accessor :origin + # XXX list of instructions intervening in the backtrace ? - def initialize(type, origin, len=nil) - @origin, @type = origin, type - @len = len if len - end + def initialize(type, origin, len=nil) + @origin, @type = origin, type + @len = len if len + end - def hash ; @origin.hash ; end - def eql?(o) o.class == self.class and [type, len, origin] == [o.type, o.len, o.origin] end - alias == eql? + def hash ; @origin.hash ; end + def eql?(o) o.class == self.class and [type, len, origin] == [o.type, o.len, o.origin] end + alias == eql? end # holds a list of contiguous decoded instructions, forming an uninterrupted block (except for eg CPU exceptions) # most attributes are either a value or an array of values, use the associated iterator. class InstructionBlock - # address of the first instruction - attr_accessor :address - # pointer to raw data - attr_accessor :edata, :edata_ptr - # list of DecodedInstructions - attr_accessor :list - # address of instructions giving control directly to us - # includes addr of normal instruction when call flow continues to us past the end of the preceding block - # does not include addresses of subfunction return instructions - # may be nil or an array - attr_accessor :from_normal - # address of instructions called/jumped to - attr_accessor :to_normal - # address of an instruction that calls a subfunction which returns to us - attr_accessor :from_subfuncret - # address of instruction executed after a called subfunction returns - attr_accessor :to_subfuncret - # address of instructions executed indirectly through us (callback in a subfunction, SEH...) - # XXX from_indirect is not populated for now - attr_accessor :from_indirect, :to_indirect - # array of BacktraceTrace - # when a new code path comes to us, it should be backtracked for the values of :r/:w/:x using btt with no address - # for internal use only (block splitting): btt with an address - attr_accessor :backtracked_for + # address of the first instruction + attr_accessor :address + # pointer to raw data + attr_accessor :edata, :edata_ptr + # list of DecodedInstructions + attr_accessor :list + # address of instructions giving control directly to us + # includes addr of normal instruction when call flow continues to us past the end of the preceding block + # does not include addresses of subfunction return instructions + # may be nil or an array + attr_accessor :from_normal + # address of instructions called/jumped to + attr_accessor :to_normal + # address of an instruction that calls a subfunction which returns to us + attr_accessor :from_subfuncret + # address of instruction executed after a called subfunction returns + attr_accessor :to_subfuncret + # address of instructions executed indirectly through us (callback in a subfunction, SEH...) + # XXX from_indirect is not populated for now + attr_accessor :from_indirect, :to_indirect + # array of BacktraceTrace + # when a new code path comes to us, it should be backtracked for the values of :r/:w/:x using btt with no address + # for internal use only (block splitting): btt with an address + attr_accessor :backtracked_for - # create a new InstructionBlock based at address - # also accepts a DecodedInstruction or an Array of them to initialize from - def initialize(arg0, edata=nil, edata_ptr=nil) - @list = [] - case arg0 - when DecodedInstruction - @address = arg0.address - add_di(arg0) - when Array - @address = arg0.first.address if not arg0.empty? - arg0.each { |di| add_di(di) } - else - @address = arg0 - end - edata_ptr ||= edata ? edata.ptr : 0 - @edata, @edata_ptr = edata, edata_ptr - @backtracked_for = [] - end + # create a new InstructionBlock based at address + # also accepts a DecodedInstruction or an Array of them to initialize from + def initialize(arg0, edata=nil, edata_ptr=nil) + @list = [] + case arg0 + when DecodedInstruction + @address = arg0.address + add_di(arg0) + when Array + @address = arg0.first.address if not arg0.empty? + arg0.each { |di| add_di(di) } + else + @address = arg0 + end + edata_ptr ||= edata ? edata.ptr : 0 + @edata, @edata_ptr = edata, edata_ptr + @backtracked_for = [] + end - def bin_length - (di = @list.last) ? di.block_offset + di.bin_length : 0 - end + def bin_length + (di = @list.last) ? di.block_offset + di.bin_length : 0 + end - # splits the current block into a new one with all di from address addr to end - # caller is responsible for rebacktracing new.bt_for to regenerate correct old.btt/new.btt - def split(addr) - raise "invalid split @#{Expression[addr]}" if not idx = @list.index(@list.find { |di| di.address == addr }) or idx == 0 - off = @list[idx].block_offset - new_b = self.class.new(addr, @edata, @edata_ptr + off) - new_b.add_di @list.delete_at(idx) while @list[idx] - new_b.to_normal, @to_normal = to_normal, new_b.to_normal - new_b.to_subfuncret, @to_subfuncret = to_subfuncret, new_b.to_subfuncret - new_b.add_from @list.last.address - add_to new_b.address - @backtracked_for.delete_if { |btt| - if btt.address and new_b.list.find { |di| di.address == btt.address } - new_b.backtracked_for << btt - true - end - } - new_b - end + # splits the current block into a new one with all di from address addr to end + # caller is responsible for rebacktracing new.bt_for to regenerate correct old.btt/new.btt + def split(addr) + raise "invalid split @#{Expression[addr]}" if not idx = @list.index(@list.find { |di| di.address == addr }) or idx == 0 + off = @list[idx].block_offset + new_b = self.class.new(addr, @edata, @edata_ptr + off) + new_b.add_di @list.delete_at(idx) while @list[idx] + new_b.to_normal, @to_normal = to_normal, new_b.to_normal + new_b.to_subfuncret, @to_subfuncret = to_subfuncret, new_b.to_subfuncret + new_b.add_from @list.last.address + add_to new_b.address + @backtracked_for.delete_if { |btt| + if btt.address and new_b.list.find { |di| di.address == btt.address } + new_b.backtracked_for << btt + true + end + } + new_b + end - # adds a decodedinstruction to the block list, updates di.block and di.block_offset - def add_di(di) - di.block = self - di.block_offset = bin_length - di.address ||= @address + di.block_offset - @list << di - end + # adds a decodedinstruction to the block list, updates di.block and di.block_offset + def add_di(di) + di.block = self + di.block_offset = bin_length + di.address ||= @address + di.block_offset + @list << di + end end # a factorized subfunction as seen by the disassembler class DecodedFunction - # when backtracking an instruction that calls us, use this binding and then the instruction's - # the binding is lazily filled up for non-external functions, register by register, when - # a backtraced expression depends on it - attr_accessor :backtrace_binding - # same as InstructionBlock#backtracked_for - # includes the expression responsible of the function return (eg [esp] on ia32) - attr_accessor :backtracked_for - # addresses of instruction causing the function to return - attr_accessor :return_address - # a lambda called for dynamic backtrace_binding generation - attr_accessor :btbind_callback - # a lambda called for dynamic backtracked_for - attr_accessor :btfor_callback - # bool, if false the function is actually being disassembled - attr_accessor :finalized - # bool, if true the function does not return (eg exit() or ExitProcess()) - attr_accessor :noreturn - # hash stackoff => varname - # varname is a single String object shared by all ExpressionStrings (to allow renames) - attr_accessor :localvars - # hash stack offset => di address - attr_accessor :localvars_xrefs + # when backtracking an instruction that calls us, use this binding and then the instruction's + # the binding is lazily filled up for non-external functions, register by register, when + # a backtraced expression depends on it + attr_accessor :backtrace_binding + # same as InstructionBlock#backtracked_for + # includes the expression responsible of the function return (eg [esp] on ia32) + attr_accessor :backtracked_for + # addresses of instruction causing the function to return + attr_accessor :return_address + # a lambda called for dynamic backtrace_binding generation + attr_accessor :btbind_callback + # a lambda called for dynamic backtracked_for + attr_accessor :btfor_callback + # bool, if false the function is actually being disassembled + attr_accessor :finalized + # bool, if true the function does not return (eg exit() or ExitProcess()) + attr_accessor :noreturn + # hash stackoff => varname + # varname is a single String object shared by all ExpressionStrings (to allow renames) + attr_accessor :localvars + # hash stack offset => di address + attr_accessor :localvars_xrefs - # if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth] - # else update lazily the binding from expr.externals, and return backtrace_binding - def get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth) - if btbind_callback - @btbind_callback[dasm, @backtrace_binding, funcaddr, calladdr, expr, origin, maxdepth] - elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest] - target.get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth) - else - unk_regs = expr.externals.grep(Symbol).uniq - @backtrace_binding.keys - [:unknown] - dasm.cpu.backtrace_update_function_binding(dasm, funcaddr, self, return_address, *unk_regs) if not unk_regs.empty? - @backtrace_binding - end - end + # if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth] + # else update lazily the binding from expr.externals, and return backtrace_binding + def get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth) + if btbind_callback + @btbind_callback[dasm, @backtrace_binding, funcaddr, calladdr, expr, origin, maxdepth] + elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest] + target.get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth) + else + unk_regs = expr.externals.grep(Symbol).uniq - @backtrace_binding.keys - [:unknown] + dasm.cpu.backtrace_update_function_binding(dasm, funcaddr, self, return_address, *unk_regs) if not unk_regs.empty? + @backtrace_binding + end + end - # if btfor_callback is defined, calls it with args [dasm, bt_for, funcaddr, calladdr] - # else return backtracked_for - def get_backtracked_for(dasm, funcaddr, calladdr) - if btfor_callback - @btfor_callback[dasm, @backtracked_for, funcaddr, calladdr] - elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest] - target.get_backtracked_for(dasm, funcaddr, calladdr) - else - @backtracked_for - end - end + # if btfor_callback is defined, calls it with args [dasm, bt_for, funcaddr, calladdr] + # else return backtracked_for + def get_backtracked_for(dasm, funcaddr, calladdr) + if btfor_callback + @btfor_callback[dasm, @backtracked_for, funcaddr, calladdr] + elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest] + target.get_backtracked_for(dasm, funcaddr, calladdr) + else + @backtracked_for + end + end - def initialize - @backtracked_for = [] - @backtrace_binding = {} - end + def initialize + @backtracked_for = [] + @backtrace_binding = {} + end - def get_localvar_stackoff(off, di=nil, str=nil) - if di - @localvars_xrefs ||= {} - @localvars_xrefs[off] ||= [] - @localvars_xrefs[off] |= [di.address] - end - @localvars ||= {} - @localvars[off] ||= (str || (off > 0 ? 'arg_%X' % off : 'var_%X' % -off)) - end + def get_localvar_stackoff(off, di=nil, str=nil) + if di + @localvars_xrefs ||= {} + @localvars_xrefs[off] ||= [] + @localvars_xrefs[off] |= [di.address] + end + @localvars ||= {} + @localvars[off] ||= (str || (off > 0 ? 'arg_%X' % off : 'var_%X' % -off)) + end end class CPU - # return the thing to backtrace to find +value+ before the execution of this instruction - # eg backtrace_emu('inc eax', Expression[:eax]) => Expression[:eax + 1] - # (the value of :eax after 'inc eax' is the value of :eax before plus 1) - # may return Expression::Unknown - def backtrace_emu(di, value) - Expression[Expression[value].bind(di.backtrace_binding ||= get_backtrace_binding(di)).reduce] - end + # return the thing to backtrace to find +value+ before the execution of this instruction + # eg backtrace_emu('inc eax', Expression[:eax]) => Expression[:eax + 1] + # (the value of :eax after 'inc eax' is the value of :eax before plus 1) + # may return Expression::Unknown + def backtrace_emu(di, value) + Expression[Expression[value].bind(di.backtrace_binding ||= get_backtrace_binding(di)).reduce] + end - # returns a list of Expressions/Integer to backtrace to find an execution target - def get_xrefs_x(dasm, di) - end + # returns a list of Expressions/Integer to backtrace to find an execution target + def get_xrefs_x(dasm, di) + end - # returns a list of [type, address, len] - def get_xrefs_rw(dasm, di) - get_xrefs_r(dasm, di).map { |addr, len| [:r, addr, len] } + get_xrefs_w(dasm, di).map { |addr, len| [:w, addr, len] } - end + # returns a list of [type, address, len] + def get_xrefs_rw(dasm, di) + get_xrefs_r(dasm, di).map { |addr, len| [:r, addr, len] } + get_xrefs_w(dasm, di).map { |addr, len| [:w, addr, len] } + end - # returns a list [addr, len] - def get_xrefs_r(dasm, di) - b = di.backtrace_binding ||= get_backtrace_binding(di) - r = b.values - x = get_xrefs_x(dasm, di) - r |= x if x - (r.grep(Indirection) + r.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] } - end + # returns a list [addr, len] + def get_xrefs_r(dasm, di) + b = di.backtrace_binding ||= get_backtrace_binding(di) + r = b.values + x = get_xrefs_x(dasm, di) + r |= x if x + (r.grep(Indirection) + r.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] } + end - # returns a list [addr, len] - def get_xrefs_w(dasm, di) - b = di.backtrace_binding ||= get_backtrace_binding(di) - w = b.keys - (w.grep(Indirection) + w.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] } - end + # returns a list [addr, len] + def get_xrefs_w(dasm, di) + b = di.backtrace_binding ||= get_backtrace_binding(di) + w = b.keys + (w.grep(Indirection) + w.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] } + end - # checks if the expression corresponds to a function return value with the instruction - # (eg di == 'call something' and expr == [esp]) - def backtrace_is_function_return(expr, di=nil) - end + # checks if the expression corresponds to a function return value with the instruction + # (eg di == 'call something' and expr == [esp]) + def backtrace_is_function_return(expr, di=nil) + end - # updates f.backtrace_binding when a new return address has been found - # TODO update also when anything changes inside the function (new loop found etc) - use backtracked_for ? - def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) - end + # updates f.backtrace_binding when a new return address has been found + # TODO update also when anything changes inside the function (new loop found etc) - use backtracked_for ? + def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) + end - # returns if the expression is an address on the stack - # (to avoid trying to backtrace its absolute address until we found function boundaries) - def backtrace_is_stack_address(expr) - end + # returns if the expression is an address on the stack + # (to avoid trying to backtrace its absolute address until we found function boundaries) + def backtrace_is_stack_address(expr) + end - # updates the instruction arguments: replace an expression with another (eg when a label is renamed) - def replace_instr_arg_immediate(i, old, new) - i.args.map! { |a| - case a - when Expression; Expression[a.bind(old => new).reduce] - else a - end - } - end + # updates the instruction arguments: replace an expression with another (eg when a label is renamed) + def replace_instr_arg_immediate(i, old, new) + i.args.map! { |a| + case a + when Expression; Expression[a.bind(old => new).reduce] + else a + end + } + end - # a callback called whenever a backtrace is successful - # di is the decodedinstruction at the backtrace's origin - def backtrace_found_result(dasm, di, expr, type, len) - end + # a callback called whenever a backtrace is successful + # di is the decodedinstruction at the backtrace's origin + def backtrace_found_result(dasm, di, expr, type, len) + end end class ExeFormat - # returns a string containing asm-style section declaration - def dump_section_header(addr, edata) - "\n// section at #{Expression[addr]}" - end + # returns a string containing asm-style section declaration + def dump_section_header(addr, edata) + "\n// section at #{Expression[addr]}" + end - # returns an array of expressions that may be executed by this instruction - def get_xrefs_x(dasm, di) @cpu.get_xrefs_x(dasm, di) end + # returns an array of expressions that may be executed by this instruction + def get_xrefs_x(dasm, di) @cpu.get_xrefs_x(dasm, di) end - # returns an array of [type, expression, length] that may be accessed by this instruction (type is :r/:w, len is in bytes) - def get_xrefs_rw(dasm, di) @cpu.get_xrefs_rw(dasm, di) end + # returns an array of [type, expression, length] that may be accessed by this instruction (type is :r/:w, len is in bytes) + def get_xrefs_rw(dasm, di) @cpu.get_xrefs_rw(dasm, di) end end # a disassembler class # holds a copy of a program sections, a list of decoded instructions, xrefs # is able to backtrace an expression from an address following the call flow (backwards) class Disassembler - attr_accessor :program, :cpu - # binding (jointure of @sections.values.exports) - attr_accessor :prog_binding - # hash addr => edata - attr_accessor :sections - # hash addr => DecodedInstruction - attr_accessor :decoded - # hash addr => DecodedFunction (includes 'imported' functions) - attr_accessor :function - # hash addr => (array of) xrefs - access with +add_xref+/+each_xref+ - attr_accessor :xrefs - # bool, true to check write xrefs on each instr disasm (default true) - attr_accessor :check_smc - # list of [addr to disassemble, (optional)who jumped to it, (optional)got there by a subfunction return] - attr_accessor :addrs_todo - # hash address => binding - attr_accessor :address_binding - # number of blocks to backtrace before aborting if no result is found (defaults to class.backtrace_maxblocks, 50 by default) - attr_accessor :backtrace_maxblocks - # maximum backtrace length for :r/:w, defaults to backtrace_maxblocks - attr_accessor :backtrace_maxblocks_data - # max bt length for backtrace_fast blocks, default=0 - attr_accessor :backtrace_maxblocks_fast - # max complexity for an Expr during backtrace before abort - attr_accessor :backtrace_maxcomplexity, :backtrace_maxcomplexity_data - # maximum number of instructions inside a basic block, split past this limit - attr_accessor :disassemble_maxblocklength - # a cparser that parsed some C header files, prototypes are converted to DecodedFunction when jumped to - attr_accessor :c_parser - # hash address => array of strings - # default dasm dump will only show comments at beginning of code blocks - attr_accessor :comment - # bool, set to true (default) if functions with undetermined binding should be assumed to return with ABI-conforming binding (conserve frame ptr) - attr_accessor :funcs_stdabi - # callback called whenever an instruction will backtrace :x (before the backtrace is started) - # arguments: |addr of origin, array of exprs to backtrace| - # must return the replacement array, nil == [] - attr_accessor :callback_newaddr - # called whenever an instruction is decoded and added to an instruction block. arg: the new decoded instruction - # returns the new di to consider (nil to end block) - attr_accessor :callback_newinstr - # called whenever the disassembler tries to disassemble an addresse that has been written to. arg: the address - attr_accessor :callback_selfmodifying - # called when the disassembler stops (stopexec/undecodable instruction) - attr_accessor :callback_stopaddr - # callback called before each backtrace that may take some time - attr_accessor :callback_prebacktrace - # callback called once all addresses have been disassembled - attr_accessor :callback_finished - # pointer to the gui widget we're displayed in - attr_accessor :gui + attr_accessor :program, :cpu + # binding (jointure of @sections.values.exports) + attr_accessor :prog_binding + # hash addr => edata + attr_accessor :sections + # hash addr => DecodedInstruction + attr_accessor :decoded + # hash addr => DecodedFunction (includes 'imported' functions) + attr_accessor :function + # hash addr => (array of) xrefs - access with +add_xref+/+each_xref+ + attr_accessor :xrefs + # bool, true to check write xrefs on each instr disasm (default true) + attr_accessor :check_smc + # list of [addr to disassemble, (optional)who jumped to it, (optional)got there by a subfunction return] + attr_accessor :addrs_todo + # hash address => binding + attr_accessor :address_binding + # number of blocks to backtrace before aborting if no result is found (defaults to class.backtrace_maxblocks, 50 by default) + attr_accessor :backtrace_maxblocks + # maximum backtrace length for :r/:w, defaults to backtrace_maxblocks + attr_accessor :backtrace_maxblocks_data + # max bt length for backtrace_fast blocks, default=0 + attr_accessor :backtrace_maxblocks_fast + # max complexity for an Expr during backtrace before abort + attr_accessor :backtrace_maxcomplexity, :backtrace_maxcomplexity_data + # maximum number of instructions inside a basic block, split past this limit + attr_accessor :disassemble_maxblocklength + # a cparser that parsed some C header files, prototypes are converted to DecodedFunction when jumped to + attr_accessor :c_parser + # hash address => array of strings + # default dasm dump will only show comments at beginning of code blocks + attr_accessor :comment + # bool, set to true (default) if functions with undetermined binding should be assumed to return with ABI-conforming binding (conserve frame ptr) + attr_accessor :funcs_stdabi + # callback called whenever an instruction will backtrace :x (before the backtrace is started) + # arguments: |addr of origin, array of exprs to backtrace| + # must return the replacement array, nil == [] + attr_accessor :callback_newaddr + # called whenever an instruction is decoded and added to an instruction block. arg: the new decoded instruction + # returns the new di to consider (nil to end block) + attr_accessor :callback_newinstr + # called whenever the disassembler tries to disassemble an addresse that has been written to. arg: the address + attr_accessor :callback_selfmodifying + # called when the disassembler stops (stopexec/undecodable instruction) + attr_accessor :callback_stopaddr + # callback called before each backtrace that may take some time + attr_accessor :callback_prebacktrace + # callback called once all addresses have been disassembled + attr_accessor :callback_finished + # pointer to the gui widget we're displayed in + attr_accessor :gui - @@backtrace_maxblocks = 50 + @@backtrace_maxblocks = 50 - # creates a new disassembler - def initialize(program, cpu=program.cpu) - reinitialize(program, cpu) - end + # creates a new disassembler + def initialize(program, cpu=program.cpu) + reinitialize(program, cpu) + end - # resets the program - def reinitialize(program, cpu=program.cpu) - @program = program - @cpu = cpu - @sections = {} - @decoded = {} - @xrefs = {} - @function = {} - @check_smc = true - @prog_binding = {} - @old_prog_binding = {} # same as prog_binding, but keep old var names - @addrs_todo = [] - @addrs_done = [] - @address_binding = {} - @backtrace_maxblocks = @@backtrace_maxblocks - @backtrace_maxblocks_fast = 0 - @backtrace_maxcomplexity = 40 - @backtrace_maxcomplexity_data = 5 - @disassemble_maxblocklength = 100 - @comment = {} - @funcs_stdabi = true - end + # resets the program + def reinitialize(program, cpu=program.cpu) + @program = program + @cpu = cpu + @sections = {} + @decoded = {} + @xrefs = {} + @function = {} + @check_smc = true + @prog_binding = {} + @old_prog_binding = {} # same as prog_binding, but keep old var names + @addrs_todo = [] + @addrs_done = [] + @address_binding = {} + @backtrace_maxblocks = @@backtrace_maxblocks + @backtrace_maxblocks_fast = 0 + @backtrace_maxcomplexity = 40 + @backtrace_maxcomplexity_data = 5 + @disassemble_maxblocklength = 100 + @comment = {} + @funcs_stdabi = true + end - # adds a section, updates prog_binding - # base addr is an Integer or a String (label name for offset 0) - def add_section(encoded, base) - encoded, base = base, encoded if base.kind_of? EncodedData - case base - when ::Integer - when ::String - raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0 - if ed = get_edata_at(base) - ed.del_export(base) - end - encoded.add_export base, 0 - else raise "invalid section base #{base.inspect} - expected string or integer" - end + # adds a section, updates prog_binding + # base addr is an Integer or a String (label name for offset 0) + def add_section(encoded, base) + encoded, base = base, encoded if base.kind_of? EncodedData + case base + when ::Integer + when ::String + raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0 + if ed = get_edata_at(base) + ed.del_export(base) + end + encoded.add_export base, 0 + else raise "invalid section base #{base.inspect} - expected string or integer" + end - @sections[base] = encoded - @label_alias_cache = nil - encoded.binding(base).each { |k, v| - @old_prog_binding[k] = @prog_binding[k] = v.reduce - } + @sections[base] = encoded + @label_alias_cache = nil + encoded.binding(base).each { |k, v| + @old_prog_binding[k] = @prog_binding[k] = v.reduce + } - # update section_edata.reloc - # label -> list of relocs that refers to it - @inv_section_reloc ||= {} - @sections.each { |b, e| - e.reloc.each { |o, r| - r.target.externals.grep(::String).each { |ext| (@inv_section_reloc[ext] ||= []) << [b, e, o, r] } - } - } + # update section_edata.reloc + # label -> list of relocs that refers to it + @inv_section_reloc ||= {} + @sections.each { |b, e| + e.reloc.each { |o, r| + r.target.externals.grep(::String).each { |ext| (@inv_section_reloc[ext] ||= []) << [b, e, o, r] } + } + } - self - end + self + end - def add_xref(addr, x) - case @xrefs[addr] - when nil; @xrefs[addr] = x - when x - when ::Array; @xrefs[addr] |= [x] - else @xrefs[addr] = [@xrefs[addr], x] - end - end + def add_xref(addr, x) + case @xrefs[addr] + when nil; @xrefs[addr] = x + when x + when ::Array; @xrefs[addr] |= [x] + else @xrefs[addr] = [@xrefs[addr], x] + end + end - # yields each xref to a given address, optionnaly restricted to a type - def each_xref(addr, type=nil) - addr = normalize addr + # yields each xref to a given address, optionnaly restricted to a type + def each_xref(addr, type=nil) + addr = normalize addr - x = @xrefs[addr] - x = case x - when nil; [] - when ::Array; x.dup - else [x] - end + x = @xrefs[addr] + x = case x + when nil; [] + when ::Array; x.dup + else [x] + end - x.delete_if { |x_| x_.type != type } if type + x.delete_if { |x_| x_.type != type } if type - # add pseudo-xrefs for exe relocs - if (not type or type == :reloc) and l = get_label_at(addr) and a = @inv_section_reloc[l] - a.each { |b, e, o, r| - addr = Expression[b]+o - # ignore relocs embedded in an already-listed instr - x << Xref.new(:reloc, addr) if not x.find { |x_| - next if not x_.origin or not di_at(x_.origin) - (addr - x_.origin) < @decoded[x_.origin].bin_length rescue false - } - } - end + # add pseudo-xrefs for exe relocs + if (not type or type == :reloc) and l = get_label_at(addr) and a = @inv_section_reloc[l] + a.each { |b, e, o, r| + addr = Expression[b]+o + # ignore relocs embedded in an already-listed instr + x << Xref.new(:reloc, addr) if not x.find { |x_| + next if not x_.origin or not di_at(x_.origin) + (addr - x_.origin) < @decoded[x_.origin].bin_length rescue false + } + } + end - x.each { |x_| yield x_ } - end + x.each { |x_| yield x_ } + end - # parses a C header file, from which function prototypes will be converted to DecodedFunction when found in the code flow - def parse_c_file(file) - parse_c File.read(file), file - end + # parses a C header file, from which function prototypes will be converted to DecodedFunction when found in the code flow + def parse_c_file(file) + parse_c File.read(file), file + end - # parses a C string for function prototypes - def parse_c(str, filename=nil, lineno=1) - @c_parser_constcache = nil - @c_parser ||= @cpu.new_cparser - @c_parser.lexer.define_weak('__METASM__DECODE__') - @c_parser.parse(str, filename, lineno) - rescue ParseError - @c_parser.lexer.feed! '' - raise - end + # parses a C string for function prototypes + def parse_c(str, filename=nil, lineno=1) + @c_parser_constcache = nil + @c_parser ||= @cpu.new_cparser + @c_parser.lexer.define_weak('__METASM__DECODE__') + @c_parser.parse(str, filename, lineno) + rescue ParseError + @c_parser.lexer.feed! '' + raise + end - # list the constants ([name, integer value]) defined in the C code (#define / enums) - def c_constants - @c_parser_constcache ||= @c_parser.numeric_constants - end + # list the constants ([name, integer value]) defined in the C code (#define / enums) + def c_constants + @c_parser_constcache ||= @c_parser.numeric_constants + end - # returns the canonical form of addr (absolute address integer or label of start of section + section offset) - def normalize(addr) - return addr if not addr or addr == :default - addr = Expression[addr].bind(@old_prog_binding).reduce if not addr.kind_of? Integer - addr %= 1 << [@cpu.size, 32].max if @cpu and addr.kind_of? Integer - addr - end + # returns the canonical form of addr (absolute address integer or label of start of section + section offset) + def normalize(addr) + return addr if not addr or addr == :default + addr = Expression[addr].bind(@old_prog_binding).reduce if not addr.kind_of? Integer + addr %= 1 << [@cpu.size, 32].max if @cpu and addr.kind_of? Integer + addr + end - # returns [edata, edata_base] or nil - # edata.ptr points to addr - def get_section_at(addr, memcheck=true) - case addr = normalize(addr) - when ::Integer - if s = @sections.find { |b, e| b.kind_of? ::Integer and addr >= b and addr < b + e.length } || - @sections.find { |b, e| b.kind_of? ::Integer and addr == b + e.length } # end label - s[1].ptr = addr - s[0] - return if memcheck and s[1].data.respond_to?(:page_invalid?) and s[1].data.page_invalid?(s[1].ptr) - [s[1], s[0]] - end - when Expression - if addr.op == :+ and addr.rexpr.kind_of? ::Integer and addr.rexpr >= 0 and addr.lexpr.kind_of? ::String and e = @sections[addr.lexpr] - e.ptr = addr.rexpr - return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr) - [e, Expression[addr.lexpr]] - elsif addr.op == :+ and addr.rexpr.kind_of? ::String and not addr.lexpr and e = @sections[addr.rexpr] - e.ptr = 0 - return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr) - [e, addr.rexpr] - end - end - end + # returns [edata, edata_base] or nil + # edata.ptr points to addr + def get_section_at(addr, memcheck=true) + case addr = normalize(addr) + when ::Integer + if s = @sections.find { |b, e| b.kind_of? ::Integer and addr >= b and addr < b + e.length } || + @sections.find { |b, e| b.kind_of? ::Integer and addr == b + e.length } # end label + s[1].ptr = addr - s[0] + return if memcheck and s[1].data.respond_to?(:page_invalid?) and s[1].data.page_invalid?(s[1].ptr) + [s[1], s[0]] + end + when Expression + if addr.op == :+ and addr.rexpr.kind_of? ::Integer and addr.rexpr >= 0 and addr.lexpr.kind_of? ::String and e = @sections[addr.lexpr] + e.ptr = addr.rexpr + return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr) + [e, Expression[addr.lexpr]] + elsif addr.op == :+ and addr.rexpr.kind_of? ::String and not addr.lexpr and e = @sections[addr.rexpr] + e.ptr = 0 + return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr) + [e, addr.rexpr] + end + end + end - # returns the label at the specified address, creates it if needed using "prefix_addr" - # renames the existing label if it is in the form rewritepfx_addr - # returns nil if the address is not known and is not a string - def auto_label_at(addr, base='xref', *rewritepfx) - addr = Expression[addr].reduce - addrstr = "#{base}_#{Expression[addr]}" - return if addrstr !~ /^\w+$/ - e, b = get_section_at(addr) - if not e - l = Expression[addr].reduce_rec if Expression[addr].reduce_rec.kind_of? ::String - l ||= addrstr if addr.kind_of? Expression and addr.externals.grep(::Symbol).empty? - elsif not l = e.inv_export[e.ptr] - l = @program.new_label(addrstr) - e.add_export l, e.ptr - @label_alias_cache = nil - @old_prog_binding[l] = @prog_binding[l] = b + e.ptr - elsif rewritepfx.find { |p| base != p and addrstr.sub(base, p) == l } - newl = addrstr - newl = @program.new_label(newl) unless @old_prog_binding[newl] and @old_prog_binding[newl] == @prog_binding[l] # avoid _uuid when a -> b -> a - rename_label l, newl - l = newl - end - l - end + # returns the label at the specified address, creates it if needed using "prefix_addr" + # renames the existing label if it is in the form rewritepfx_addr + # returns nil if the address is not known and is not a string + def auto_label_at(addr, base='xref', *rewritepfx) + addr = Expression[addr].reduce + addrstr = "#{base}_#{Expression[addr]}" + return if addrstr !~ /^\w+$/ + e, b = get_section_at(addr) + if not e + l = Expression[addr].reduce_rec if Expression[addr].reduce_rec.kind_of? ::String + l ||= addrstr if addr.kind_of? Expression and addr.externals.grep(::Symbol).empty? + elsif not l = e.inv_export[e.ptr] + l = @program.new_label(addrstr) + e.add_export l, e.ptr + @label_alias_cache = nil + @old_prog_binding[l] = @prog_binding[l] = b + e.ptr + elsif rewritepfx.find { |p| base != p and addrstr.sub(base, p) == l } + newl = addrstr + newl = @program.new_label(newl) unless @old_prog_binding[newl] and @old_prog_binding[newl] == @prog_binding[l] # avoid _uuid when a -> b -> a + rename_label l, newl + l = newl + end + l + end - # returns a hash associating addr => list of labels at this addr - # label_alias[a] may be nil if a new label is created elsewhere in the edata with the same name - def label_alias - if not @label_alias_cache - @label_alias_cache = {} - @prog_binding.each { |k, v| - (@label_alias_cache[v] ||= []) << k - } - end - @label_alias_cache - end + # returns a hash associating addr => list of labels at this addr + # label_alias[a] may be nil if a new label is created elsewhere in the edata with the same name + def label_alias + if not @label_alias_cache + @label_alias_cache = {} + @prog_binding.each { |k, v| + (@label_alias_cache[v] ||= []) << k + } + end + @label_alias_cache + end - # decodes instructions from an entrypoint, (tries to) follows code flow - def disassemble(*entrypoints) - nil while disassemble_mainiter(entrypoints) - self - end + # decodes instructions from an entrypoint, (tries to) follows code flow + def disassemble(*entrypoints) + nil while disassemble_mainiter(entrypoints) + self + end - attr_accessor :entrypoints + attr_accessor :entrypoints - # do one operation relevant to disassembling - # returns nil once done - def disassemble_mainiter(entrypoints=[]) - @entrypoints ||= [] - if @addrs_todo.empty? and entrypoints.empty? - post_disassemble - puts 'disassembly finished' if $VERBOSE - @callback_finished[] if callback_finished - return false - elsif @addrs_todo.empty? - ep = entrypoints.shift - l = auto_label_at(normalize(ep), 'entrypoint') - puts "start disassemble from #{l} (#{entrypoints.length})" if $VERBOSE and not entrypoints.empty? - @entrypoints << l - @addrs_todo << [ep] - else - disassemble_step - end - true - end + # do one operation relevant to disassembling + # returns nil once done + def disassemble_mainiter(entrypoints=[]) + @entrypoints ||= [] + if @addrs_todo.empty? and entrypoints.empty? + post_disassemble + puts 'disassembly finished' if $VERBOSE + @callback_finished[] if callback_finished + return false + elsif @addrs_todo.empty? + ep = entrypoints.shift + l = auto_label_at(normalize(ep), 'entrypoint') + puts "start disassemble from #{l} (#{entrypoints.length})" if $VERBOSE and not entrypoints.empty? + @entrypoints << l + @addrs_todo << [ep] + else + disassemble_step + end + true + end - def post_disassemble - @decoded.each_value { |di| - next if not di.kind_of? DecodedInstruction - next if not di.opcode or not di.opcode.props[:saveip] - if not di.block.to_subfuncret - di.add_comment 'noreturn' - # there is no need to re-loop on all :saveip as check_noret is transitive - di.block.each_to_normal { |fa| check_noreturn_function(fa) } - end - } - @function.each { |addr, f| - next if not @decoded[addr] - if not f.finalized - f.finalized = true + def post_disassemble + @decoded.each_value { |di| + next if not di.kind_of? DecodedInstruction + next if not di.opcode or not di.opcode.props[:saveip] + if not di.block.to_subfuncret + di.add_comment 'noreturn' + # there is no need to re-loop on all :saveip as check_noret is transitive + di.block.each_to_normal { |fa| check_noreturn_function(fa) } + end + } + @function.each { |addr, f| + next if not @decoded[addr] + if not f.finalized + f.finalized = true puts " finalize subfunc #{Expression[addr]}" if debug_backtrace - backtrace_update_function_binding(addr, f) - if not f.return_address - detect_function_thunk(addr) - end - end - bd = f.backtrace_binding.reject { |k, v| Expression[k] == Expression[v] or Expression[v] == Expression::Unknown } - unk = f.backtrace_binding.map { |k, v| k if v == Expression::Unknown }.compact - bd[unk.map { |u| Expression[u].to_s }.sort.join(',')] = Expression::Unknown if not unk.empty? - add_comment(addr, "function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', ')) - add_comment(addr, "function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')) if f.return_address - } - end + backtrace_update_function_binding(addr, f) + if not f.return_address + detect_function_thunk(addr) + end + end + bd = f.backtrace_binding.reject { |k, v| Expression[k] == Expression[v] or Expression[v] == Expression::Unknown } + unk = f.backtrace_binding.map { |k, v| k if v == Expression::Unknown }.compact + bd[unk.map { |u| Expression[u].to_s }.sort.join(',')] = Expression::Unknown if not unk.empty? + add_comment(addr, "function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', ')) + add_comment(addr, "function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')) if f.return_address + } + end - # disassembles one block from addrs_todo - # adds next addresses to handle to addrs_todo - # if @function[:default] exists, jumps to unknows locations are interpreted as to @function[:default] - def disassemble_step - return if not todo = @addrs_todo.pop or @addrs_done.include? todo - @addrs_done << todo if todo[1] + # disassembles one block from addrs_todo + # adds next addresses to handle to addrs_todo + # if @function[:default] exists, jumps to unknows locations are interpreted as to @function[:default] + def disassemble_step + return if not todo = @addrs_todo.pop or @addrs_done.include? todo + @addrs_done << todo if todo[1] - # from_sfret is true if from is the address of a function call that returns to addr - addr, from, from_subfuncret = todo + # from_sfret is true if from is the address of a function call that returns to addr + addr, from, from_subfuncret = todo - return if from == Expression::Unknown + return if from == Expression::Unknown - puts "disassemble_step #{Expression[addr]} #{Expression[from] if from} #{from_subfuncret} (/#{@addrs_todo.length})" if $DEBUG + puts "disassemble_step #{Expression[addr]} #{Expression[from] if from} #{from_subfuncret} (/#{@addrs_todo.length})" if $DEBUG - addr = normalize(addr) + addr = normalize(addr) - if from and from_subfuncret and di_at(from) - @decoded[from].block.each_to_normal { |subfunc| - subfunc = normalize(subfunc) - next if not f = @function[subfunc] or f.finalized - f.finalized = true + if from and from_subfuncret and di_at(from) + @decoded[from].block.each_to_normal { |subfunc| + subfunc = normalize(subfunc) + next if not f = @function[subfunc] or f.finalized + f.finalized = true puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace - backtrace_update_function_binding(subfunc, f) - if not f.return_address - detect_function_thunk(subfunc) - end - } - end - - if di = @decoded[addr] - if di.kind_of? DecodedInstruction - split_block(di.block, di.address, true) if not di.block_head? # this updates di.block - di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default - bf = di.block - elsif di == true - bf = @function[addr] - end - elsif bf = @function[addr] - detect_function_thunk_noreturn(from) if bf.noreturn - elsif s = get_section_at(addr) - block = InstructionBlock.new(normalize(addr), s[0]) - block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default - disassemble_block(block) - elsif from and c_parser and name = Expression[addr].reduce_rec and name.kind_of? ::String and - s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function - bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s) - detect_function_thunk_noreturn(from) if bf.noreturn - elsif from - if bf = @function[:default] - puts "using default function for #{Expression[addr]} from #{Expression[from]}" if $DEBUG - if name = Expression[addr].reduce_rec and name.kind_of? ::String - @function[addr] = @function[:default].dup - else - addr = :default - end - if @decoded[from] - @decoded[from].block.add_to addr - end - else - puts "not disassembling unknown address #{Expression[addr]} from #{Expression[from]}" if $DEBUG - end - if from != :default - add_xref(addr, Xref.new(:x, from)) - add_xref(Expression::Unknown, Xref.new(:x, from)) - end - else - puts "not disassembling unknown address #{Expression[addr]}" if $VERBOSE - end - - if bf and from and from != :default - if bf.kind_of? DecodedFunction - bff = bf.get_backtracked_for(self, addr, from) - else - bff = bf.backtracked_for - end - end - bff.each { |btt| - next if btt.address - if @decoded[from].kind_of? DecodedInstruction and @decoded[from].opcode.props[:saveip] and not from_subfuncret and not @function[addr] - backtrace_check_found(btt.expr, @decoded[addr], btt.origin, btt.type, btt.len, btt.maxdepth, btt.detached) - end - next if backtrace_check_funcret(btt, addr, from) - backtrace(btt.expr, from, - :include_start => true, :from_subfuncret => from_subfuncret, - :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, - :len => btt.len, :detached => btt.detached, :maxdepth => btt.maxdepth) - } if bff - end - - # splits an InstructionBlock, updates the blocks backtracked_for - def split_block(block, address=nil, rebacktrace=false) - if not address # invoked as split_block(0x401012) - return if not @decoded[block].kind_of? DecodedInstruction - block, address = @decoded[block].block, block - end - return block if address == block.address - new_b = block.split address - if rebacktrace - new_b.backtracked_for.dup.each { |btt| - backtrace(btt.expr, btt.address, - :only_upto => block.list.last.address, - :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret, - :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len, - :detached => btt.detached, :maxdepth => btt.maxdepth) - } - end - new_b - end - - # disassembles a new instruction block at block.address (must be normalized) - def disassemble_block(block) - raise if not block.list.empty? - di_addr = block.address - delay_slot = nil - di = nil - - # try not to run for too long - # loop usage: break if the block continues to the following instruction, else return - @disassemble_maxblocklength.times { - # check collision into a known block - break if @decoded[di_addr] - - # check self-modifying code - if @check_smc - #(-7...di.bin_length).each { |off| # uncomment to check for unaligned rewrites - waddr = di_addr #di_addr + off - each_xref(waddr, :w) { |x| - #next if off + x.len < 0 - puts "W: disasm: self-modifying code at #{Expression[waddr]}" if $VERBOSE - add_comment(di_addr, "overwritten by #{@decoded[x.origin]}") - @callback_selfmodifying[di_addr] if callback_selfmodifying - return - } - #} - end - - # decode instruction - block.edata.ptr = di_addr - block.address + block.edata_ptr - if not di = @cpu.decode_instruction(block.edata, di_addr) - ed = block.edata - break if ed.ptr >= ed.length and get_section_at(di_addr) and di = block.list.last - puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*')}"} at #{Expression[di_addr]}" if $VERBOSE - return - end - - @decoded[di_addr] = di - block.add_di di - puts di if $DEBUG - - if callback_newinstr - ndi = @callback_newinstr[di] - if not ndi or not ndi.block - block.list.delete di - if ndi - block.add_di ndi - ndi.bin_length = di.bin_length if ndi.bin_length == 0 - @decoded[di_addr] = ndi - end - end - di = ndi - end - return if not di - block = di.block - - di_addr = di.next_addr - - backtrace_xrefs_di_rw(di) - - if not di_addr or di.opcode.props[:stopexec] or not @program.get_xrefs_x(self, di).empty? - # do not backtrace until delay slot is finished (eg MIPS: di is a - # ret and the delay slot holds stack fixup needed to calc func_binding) - # XXX if the delay slot is also xref_x or :stopexec it is ignored - delay_slot ||= [di, @cpu.delay_slot(di)] - end - - if delay_slot - di, delay = delay_slot - if delay == 0 or not di_addr - backtrace_xrefs_di_x(di) - if di.opcode.props[:stopexec] or not di_addr; return - else break - end - end - delay_slot[1] = delay - 1 - end - } - - ar = [di_addr] - ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr - ar.each { |di_addr_| backtrace(di_addr_, di.address, :origin => di.address, :type => :x) } - - block - end - - # retrieve the list of execution crossrefs due to the decodedinstruction - # returns a list of symbolic expressions - def get_xrefs_x(di) - @program.get_xrefs_x(self, di) - end - - # retrieve the list of data r/w crossrefs due to the decodedinstruction - # returns a list of [type, symbolic expression, length] - def get_xrefs_rw(di) - @program.get_xrefs_rw(self, di) - end - - # disassembles_fast from a list of entrypoints, also dasm subfunctions - def disassemble_fast_deep(*entrypoints) - @entrypoints ||= [] - @entrypoints |= entrypoints - - entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) } - - @callback_finished[] if callback_finished - end - - def do_disassemble_fast_deep(ep) - disassemble_fast(ep) { |fa, di| - fa = normalize(fa) - do_disassemble_fast_deep(fa) - if di and ndi = di_at(fa) - ndi.block.add_from_normal(di.address) - end - } - end - - # disassembles fast from a list of entrypoints - # see disassemble_fast_step - def disassemble_fast(entrypoint, maxdepth=-1, &b) - ep = [entrypoint] - until ep.empty? - disassemble_fast_step(ep, &b) - maxdepth -= 1 - ep.delete_if { |a| not @decoded[normalize(a[0])] } if maxdepth == 0 - end - check_noreturn_function(entrypoint) - end - - # disassembles one block from the ary, see disassemble_fast_block - def disassemble_fast_step(todo, &b) - return if not x = todo.pop - addr, from, from_subfuncret = x - - addr = normalize(addr) - - if di = @decoded[addr] - if di.kind_of? DecodedInstruction - split_block(di.block, di.address) if not di.block_head? - di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default - end - elsif s = get_section_at(addr) - block = InstructionBlock.new(normalize(addr), s[0]) - block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default - todo.concat disassemble_fast_block(block, &b) - elsif name = Expression[addr].reduce_rec and name.kind_of? ::String and not @function[addr] - if c_parser and s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function - @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s) - detect_function_thunk_noreturn(from) if @function[addr].noreturn - elsif @function[:default] - @function[addr] = @function[:default].dup - end - end - - disassemble_fast_checkfunc(addr) - end - - # check if an addr has an xref :x from a :saveip, if so mark as Function - def disassemble_fast_checkfunc(addr) - if @decoded[addr].kind_of? DecodedInstruction and not @function[addr] - func = false - each_xref(addr, :x) { |x_| - func = true if odi = di_at(x_.origin) and odi.opcode.props[:saveip] - } - if func - auto_label_at(addr, 'sub', 'loc', 'xref') - @function[addr] = (@function[:default] || DecodedFunction.new).dup - @function[addr].finalized = true - detect_function_thunk(addr) - puts "found new function #{get_label_at(addr)} at #{Expression[addr]}" if $VERBOSE - end - end - end - - # disassembles fast a new instruction block at block.address (must be normalized) - # does not recurse into subfunctions - # assumes all :saveip returns, except those pointing to a subfunc with noreturn - # yields subfunction addresses (targets of :saveip) - # no backtrace for :x (change with backtrace_maxblocks_fast) - # returns a todo-style ary - # assumes @addrs_todo is empty - def disassemble_fast_block(block, &b) - block = InstructionBlock.new(normalize(block), get_section_at(block)[0]) if not block.kind_of? InstructionBlock - di_addr = block.address - delay_slot = nil - di = nil - ret = [] - - return ret if @decoded[di_addr] - - @disassemble_maxblocklength.times { - break if @decoded[di_addr] - - # decode instruction - block.edata.ptr = di_addr - block.address + block.edata_ptr - if not di = @cpu.decode_instruction(block.edata, di_addr) - break if block.edata.ptr >= block.edata.length and get_section_at(di_addr) and di = block.list.last - return ret - end - - @decoded[di_addr] = di - block.add_di di - puts di if $DEBUG - - if callback_newinstr - ndi = @callback_newinstr[di] - if not ndi or not ndi.block - block.list.delete di - if ndi - block.add_di ndi - ndi.bin_length = di.bin_length if ndi.bin_length == 0 - @decoded[di_addr] = ndi - end - end - di = ndi - end - return ret if not di - - di_addr = di.next_addr - - if di.opcode.props[:stopexec] or di.opcode.props[:setip] - if di.opcode.props[:setip] - @addrs_todo = [] - ar = @program.get_xrefs_x(self, di) - ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr - ar.each { |expr| - backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast) - } - end - if di.opcode.props[:saveip] - @addrs_todo = [] - ret.concat disassemble_fast_block_subfunc(di, &b) - else - ret.concat @addrs_todo - @addrs_todo = [] - end - delay_slot ||= [di, @cpu.delay_slot(di)] - end - - if delay_slot - if delay_slot[1] <= 0 - return ret if delay_slot[0].opcode.props[:stopexec] - break - end - delay_slot[1] -= 1 - end - } - - ar = [di_addr] - ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr - ar.each { |a| - di.block.add_to_normal(a) - ret << [a, di.address] - } - ret - end - - # handles when disassemble_fast encounters a call to a subfunction - def disassemble_fast_block_subfunc(di) - funcs = di.block.to_normal.to_a - do_ret = funcs.empty? - ret = [] - na = di.next_addr + di.bin_length * @cpu.delay_slot(di) - funcs.each { |fa| - fa = normalize(fa) - disassemble_fast_checkfunc(fa) - yield fa, di if block_given? - if f = @function[fa] and bf = f.get_backtracked_for(self, fa, di.address) and not bf.empty? - # this includes retaddr unless f is noreturn - bf.each { |btt| - next if btt.type != :x - bt = backtrace(btt.expr, di.address, :include_start => true, :origin => btt.origin, :maxdepth => [@backtrace_maxblocks_fast, 1].max) - if btt.detached - ret.concat bt # callback argument - elsif bt.find { |a| normalize(a) == na } - do_ret = true - end - } - elsif not f or not f.noreturn - do_ret = true - end - } - if do_ret - di.block.add_to_subfuncret(na) - ret << [na, di.address, true] - di.block.add_to_normal :default if not di.block.to_normal and @function[:default] - end - ret - end - - # trace whose xrefs this di is responsible of - def backtrace_xrefs_di_rw(di) - get_xrefs_rw(di).each { |type, ptr, len| - backtrace(ptr, di.address, :origin => di.address, :type => type, :len => len).each { |xaddr| - next if xaddr == Expression::Unknown - if @check_smc and type == :w - #len.times { |off| # check unaligned ? - waddr = xaddr #+ off - if wdi = di_at(waddr) - puts "W: disasm: #{di} overwrites #{wdi}" if $VERBOSE - wdi.add_comment "overwritten by #{di}" - end - #} - end - } - } - end - - # trace xrefs for execution - def backtrace_xrefs_di_x(di) - ar = @program.get_xrefs_x(self, di) - ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr - ar.each { |expr| backtrace(expr, di.address, :origin => di.address, :type => :x) } - end - - # checks if the function starting at funcaddr is an external function thunk (eg jmp [SomeExtFunc]) - # the argument must be the address of a decodedinstruction that is the first of a function, - # which must not have return_addresses - # returns the new thunk name if it was changed - def detect_function_thunk(funcaddr) - # check thunk linearity (no conditionnal branch etc) - addr = funcaddr - count = 0 - while b = block_at(addr) - count += 1 - return if count > 5 or b.list.length > 5 - if b.to_subfuncret and not b.to_subfuncret.empty? - return if b.to_subfuncret.length != 1 - addr = normalize(b.to_subfuncret.first) - return if not b.to_normal or b.to_normal.length != 1 - # check that the subfunction is simple (eg get_eip) - return if not sf = @function[normalize(b.to_normal.first)] - return if not btb = sf.backtrace_binding - btb = btb.dup - btb.delete_if { |k, v| Expression[k] == Expression[v] } - return if btb.length > 2 or btb.values.include? Expression::Unknown - else - return if not bt = b.to_normal - if bt.include? :default - addr = :default - break - elsif bt.length != 1 - return - end - addr = normalize(bt.first) - end - end - fname = Expression[addr].reduce_rec - if funcaddr != addr and f = @function[funcaddr] - # forward get_backtrace_binding to target - f.backtrace_binding = { :thunk => addr } - f.noreturn = true if @function[addr] and @function[addr].noreturn - end - return if not fname.kind_of? ::String - l = auto_label_at(funcaddr, 'sub', 'loc') - return if l[0, 4] != 'sub_' - puts "found thunk for #{fname} at #{Expression[funcaddr]}" if $DEBUG - rename_label(l, @program.new_label("thunk_#{fname}")) - end - - # this is called when reaching a noreturn function call, with the call address - # it is responsible for detecting the actual 'call' instruction leading to this - # noreturn function, and eventually mark the call target as a thunk - def detect_function_thunk_noreturn(addr) - 5.times { - return if not di = di_at(addr) - if di.opcode.props[:saveip] and not di.block.to_subfuncret - if di.block.to_normal.to_a.length == 1 - taddr = normalize(di.block.to_normal.first) - if di_at(taddr) - @function[taddr] ||= DecodedFunction.new - return detect_function_thunk(taddr) - end - end - break - else - from = di.block.from_normal.to_a + di.block.from_subfuncret.to_a - if from.length == 1 - addr = from.first - else break - end - end - } - end - - # given an address, detect if it may be a noreturn fuction - # it is if all its end blocks are calls to noreturn functions - # if it is, create a @function[fa] with noreturn = true - # should only be called with fa = target of a call - def check_noreturn_function(fa) - fb = function_blocks(fa, false, false) - lasts = fb.keys.find_all { |k| fb[k] == [] } - return if lasts.empty? - if lasts.all? { |la| - b = block_at(la) - next if not di = b.list.last - (di.opcode.props[:saveip] and b.to_normal.to_a.all? { |tfa| - tf = function_at(tfa) and tf.noreturn - }) or (di.opcode.props[:stopexec] and not di.opcode.props[:setip]) - } - # yay - @function[fa] ||= DecodedFunction.new - @function[fa].noreturn = true - end - end - - - # walks the backtrace tree from an address, passing along an object - # - # the steps are (1st = event, followed by hash keys) - # - # for each decoded instruction encountered: - # :di :di - # - # when backtracking to a block through a decodedfunction: - # (yield for each of the block's subfunctions) - # (the decodedinstruction responsible for the call will be yield next) - # :func :func, :funcaddr, :addr, :depth - # - # when jumping from one block to another (excluding :loop): # XXX include :loops ? - # :up :from, :to, :sfret - # - # when the backtrack has nothing to backtrack to (eg program entrypoint): - # :end :addr - # - # when the backtrack stops by taking too long to complete: - # :maxdepth :addr - # - # when the backtrack stops for encountering the specified stop address: - # :stopaddr :addr - # - # when rebacktracking a block already seen in the current branch: - # (looptrace is an array of [obj, block end addr, from_subfuncret], from oldest to newest) - # :loop :looptrace - # - # when the address does not match a known instruction/function: - # :unknown_addr :addr - # - # the block return value is used as follow for :di, :func, :up and :loop: - # false => the backtrace stops for the branch - # nil => the backtrace continues with the current object - # anything else => the backtrace continues with this object - # - # method arguments: - # obj is the initial value of the object - # addr is the address where the backtrace starts - # include_start is a bool specifying if the backtrace should start at addr or just before - # from_subfuncret is a bool specifying if addr points to a decodedinstruction that calls a subfunction - # stopaddr is an [array of] address of instruction, the backtrace will stop just after executing it - # maxdepth is the maximum depth (in blocks) for each backtrace branch. - # (defaults to dasm.backtrace_maxblocks, which defaults do Dasm.backtrace_maxblocks) - def backtrace_walk(obj, addr, include_start, from_subfuncret, stopaddr, maxdepth) - start_addr = normalize(addr) - stopaddr = [stopaddr] if stopaddr and not stopaddr.kind_of? ::Array - - # array of [obj, addr, from_subfuncret, loopdetect] - # loopdetect is an array of [obj, addr, from_type] of each end of block encountered - todo = [] - - # array of [obj, blockaddr] - # avoids rewalking the same value - done = [] - - # updates todo with the addresses to backtrace next - walk_up = lambda { |w_obj, w_addr, w_loopdetect| - if w_loopdetect.length > maxdepth - yield :maxdepth, w_obj, :addr => w_addr, :loopdetect => w_loopdetect - elsif stopaddr and stopaddr.include?(w_addr) - yield :stopaddr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect - elsif w_di = @decoded[w_addr] and w_di != w_di.block.list.first and w_di.address != w_di.block.address - prevdi = w_di.block.list[w_di.block.list.index(w_di)-1] - todo << [w_obj, prevdi.address, :normal, w_loopdetect] - elsif w_di - next if done.include? [w_obj, w_addr] - done << [w_obj, w_addr] - hadsomething = false - w_di.block.each_from { |f_addr, f_type| - next if f_type == :indirect - hadsomething = true - o_f_addr = f_addr - f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot - if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == f_addr and l_type == f_type } - f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect) - if f_obj and f_obj != w_obj # should avoid infinite loops - f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)] - end - else - f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => f_type, :loopdetect => w_loopdetect, :real_to => o_f_addr) - end - next if f_obj == false - f_obj ||= w_obj - f_loopdetect ||= w_loopdetect - # only count non-trivial paths in loopdetect (ignore linear links) - add_detect = [[f_obj, f_addr, f_type]] - add_detect = [] if @decoded[f_addr].kind_of? DecodedInstruction and tmp = @decoded[f_addr].block and - ((w_di.block.from_subfuncret.to_a == [] and w_di.block.from_normal == [f_addr] and - tmp.to_normal == [w_di.address] and tmp.to_subfuncret.to_a == []) or - (w_di.block.from_subfuncret == [f_addr] and tmp.to_subfuncret == [w_di.address])) - todo << [f_obj, f_addr, f_type, f_loopdetect + add_detect ] - } - yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if not hadsomething - elsif @function[w_addr] and w_addr != :default and w_addr != Expression::Unknown - next if done.include? [w_obj, w_addr] - oldlen = todo.length - each_xref(w_addr, :x) { |x| - f_addr = x.origin - o_f_addr = f_addr - f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot - if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == w_addr } - f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect) - if f_obj and f_obj != w_obj - f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)] - end - else - f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => :normal, :loopdetect => w_loopdetect, :real_to => o_f_addr) - end - next if f_obj == false - f_obj ||= w_obj - f_loopdetect ||= w_loopdetect - todo << [f_obj, f_addr, :normal, f_loopdetect + [[f_obj, f_addr, :normal]] ] - } - yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if todo.length == oldlen - else - yield :unknown_addr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect - end - } - - if include_start - todo << [obj, start_addr, from_subfuncret ? :subfuncret : :normal, []] - else - walk_up[obj, start_addr, []] - end - - while not todo.empty? - obj, addr, type, loopdetect = todo.pop - di = @decoded[addr] - if di and type == :subfuncret - di.block.each_to_normal { |sf| - next if not f = @function[normalize(sf)] - s_obj = yield(:func, obj, :func => f, :funcaddr => sf, :addr => addr, :loopdetect => loopdetect) - next if s_obj == false - s_obj ||= obj - if l = loopdetect.find { |l_obj, l_addr, l_type| addr == l_addr and l_type == :normal } - l_obj = yield(:loop, s_obj, :looptrace => loopdetect[loopdetect.index(l)..-1], :loopdetect => loopdetect) - if l_obj and l_obj != s_obj - s_loopdetect = loopdetect[0...loopdetect.index(l)] - end - next if l_obj == false - s_obj = l_obj if l_obj - end - s_loopdetect ||= loopdetect - todo << [s_obj, addr, :normal, s_loopdetect + [[s_obj, addr, :normal]] ] - } - elsif di - # XXX should interpolate index if di is not in block.list, but what if the addresses are not Comparable ? - di.block.list[0..(di.block.list.index(di) || -1)].reverse_each { |di_| - di = di_ # XXX not sure.. - if stopaddr and ea = di.next_addr and stopaddr.include?(ea) - yield :stopaddr, obj, :addr => ea, :loopdetect => loopdetect - break - end - ex_obj = obj - obj = yield(:di, obj, :di => di, :loopdetect => loopdetect) - break if obj == false - obj ||= ex_obj - } - walk_up[obj, di.block.address, loopdetect] if obj - elsif @function[addr] and addr != :default and addr != Expression::Unknown - ex_obj = obj - obj = yield(:func, obj, :func => @function[addr], :funcaddr => addr, :addr => addr, :loopdetect => loopdetect) - next if obj == false - obj ||= ex_obj - walk_up[obj, addr, loopdetect] - else - yield :unknown_addr, obj, :addr => addr, :loopdetect => loopdetect - end - end - end - - # iterates over all instructions of a function from a given entrypoint - # carries an object while walking, the object is yielded every instruction - # every block is walked only once, after all previous blocks are done (if possible) - # on a 'jz', a [:clone] event is yielded for every path beside the first - # on a juction (eg a -> b -> d, a -> c -> d), a [:merge] event occurs if froms have different objs - # event list: - # [:di, , , ] - # [:clone, , , ] - # [:merge, , { => , => , ...}, ] - # [:subfunc, , , ] - # all events should return an object - # :merge has a copy of object1 at the end so that uninterested callers can always return args[-1] - # if an event returns false, the trace stops for the current branch - def function_walk(addr_start, obj_start) - # addresses of instrs already seen => obj - done = {} - todo = [[addr_start, obj_start]] - - while hop = todo.pop - addr, obj = hop - next if done.has_key?(done) - - di = di_at(addr) - next if not di - - if done.empty? - dilist = di.block.list[di.block.list.index(di)..-1] - else - # new block, check all 'from' have been seen - if not hop[2] - # may retry later - all_ok = true - di.block.each_from_samefunc(self) { |fa| all_ok = false unless done.has_key?(fa) } - if not all_ok - todo.unshift([addr, obj, true]) - next - end - end - - froms = {} - di.block.each_from_samefunc(self) { |fa| froms[fa] = done[fa] if done[fa] } - if froms.values.uniq.length > 1 - obj = yield([:merge, addr, froms, froms.values.first]) - next if obj == false - end - - dilist = di.block.list - end - - if dilist.each { |_di| - break if done.has_key?(_di.address) # looped back into addr_start - done[_di.address] = obj - obj = yield([:di, _di.address, _di, obj]) - break if obj == false # also return false for the previous 'if' - } - - from = dilist.last.address - - if di.block.to_normal and di.block.to_normal[0] and - di.block.to_subfuncret and di.block.to_subfuncret[0] - # current instruction block calls into a subfunction - obj = di.block.to_normal.map { |subf| - yield([:subfunc, subf, from, obj]) - }.first # propagate 1st subfunc result - next if obj == false - end - - wantclone = false - di.block.each_to_samefunc(self) { |ta| - if wantclone - nobj = yield([:clone, ta, from, obj]) - next if obj == false - todo << [ta, nobj] - else - todo << [ta, obj] - wantclone = true - end - } - end - end - end - - # holds a backtrace result until a snapshot_addr is encountered - class StoppedExpr - attr_accessor :exprs - def initialize(e) @exprs = e end - end - - - attr_accessor :debug_backtrace - - # backtraces the value of an expression from start_addr - # updates blocks backtracked_for if type is set - # uses backtrace_walk - # all values returned are from backtrace_check_found (which may generate xrefs, labels, addrs to dasm) unless :no_check is specified - # options: - # :include_start => start backtracking including start_addr - # :from_subfuncret => - # :origin => origin to set for xrefs when resolution is successful - # :orig_expr => initial expression - # :type => xref type (:r, :w, :x, :addr) when :x, the results are added to #addrs_todo - # :len => xref len (for :r/:w) - # :snapshot_addr => addr (or array of) where the backtracker should stop - # if a snapshot_addr is given, values found are ignored if continuing the backtrace does not get to it (eg maxdepth/unk_addr/end) - # :maxdepth => maximum number of blocks to backtrace - # :detached => true if backtracking type :x and the result should not have from = origin set in @addrs_todo - # :max_complexity{_data} => maximum complexity of the expression before aborting its backtrace - # :log => Array, will be updated with the backtrace evolution - # :only_upto => backtrace only to update bt_for for current block & previous ending at only_upto - # :no_check => don't use backtrace_check_found (will not backtrace indirection static values) - # :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals) (only supported with no_check) - def backtrace(expr, start_addr, nargs={}) - include_start = nargs.delete :include_start - from_subfuncret = nargs.delete :from_subfuncret - origin = nargs.delete :origin - origexpr = nargs.delete :orig_expr - type = nargs.delete :type - len = nargs.delete :len - snapshot_addr = nargs.delete(:snapshot_addr) || nargs.delete(:stopaddr) - maxdepth = nargs.delete(:maxdepth) || @backtrace_maxblocks - detached = nargs.delete :detached - max_complexity = nargs.delete(:max_complexity) || @backtrace_maxcomplexity - max_complexity_data = nargs.delete(:max_complexity) || @backtrace_maxcomplexity_data - bt_log = nargs.delete :log # array to receive the ongoing backtrace info - only_upto = nargs.delete :only_upto - no_check = nargs.delete :no_check - terminals = nargs.delete(:terminals) || [] - raise ArgumentError, "invalid argument to backtrace #{nargs.keys.inspect}" if not nargs.empty? - - expr = Expression[expr] - - origexpr = expr if origin == start_addr - - start_addr = normalize(start_addr) - di = @decoded[start_addr] - - if not snapshot_addr and @cpu.backtrace_is_stack_address(expr) + backtrace_update_function_binding(subfunc, f) + if not f.return_address + detect_function_thunk(subfunc) + end + } + end + + if di = @decoded[addr] + if di.kind_of? DecodedInstruction + split_block(di.block, di.address, true) if not di.block_head? # this updates di.block + di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default + bf = di.block + elsif di == true + bf = @function[addr] + end + elsif bf = @function[addr] + detect_function_thunk_noreturn(from) if bf.noreturn + elsif s = get_section_at(addr) + block = InstructionBlock.new(normalize(addr), s[0]) + block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default + disassemble_block(block) + elsif from and c_parser and name = Expression[addr].reduce_rec and name.kind_of? ::String and + s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function + bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s) + detect_function_thunk_noreturn(from) if bf.noreturn + elsif from + if bf = @function[:default] + puts "using default function for #{Expression[addr]} from #{Expression[from]}" if $DEBUG + if name = Expression[addr].reduce_rec and name.kind_of? ::String + @function[addr] = @function[:default].dup + else + addr = :default + end + if @decoded[from] + @decoded[from].block.add_to addr + end + else + puts "not disassembling unknown address #{Expression[addr]} from #{Expression[from]}" if $DEBUG + end + if from != :default + add_xref(addr, Xref.new(:x, from)) + add_xref(Expression::Unknown, Xref.new(:x, from)) + end + else + puts "not disassembling unknown address #{Expression[addr]}" if $VERBOSE + end + + if bf and from and from != :default + if bf.kind_of? DecodedFunction + bff = bf.get_backtracked_for(self, addr, from) + else + bff = bf.backtracked_for + end + end + bff.each { |btt| + next if btt.address + if @decoded[from].kind_of? DecodedInstruction and @decoded[from].opcode.props[:saveip] and not from_subfuncret and not @function[addr] + backtrace_check_found(btt.expr, @decoded[addr], btt.origin, btt.type, btt.len, btt.maxdepth, btt.detached) + end + next if backtrace_check_funcret(btt, addr, from) + backtrace(btt.expr, from, + :include_start => true, :from_subfuncret => from_subfuncret, + :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, + :len => btt.len, :detached => btt.detached, :maxdepth => btt.maxdepth) + } if bff + end + + # splits an InstructionBlock, updates the blocks backtracked_for + def split_block(block, address=nil, rebacktrace=false) + if not address # invoked as split_block(0x401012) + return if not @decoded[block].kind_of? DecodedInstruction + block, address = @decoded[block].block, block + end + return block if address == block.address + new_b = block.split address + if rebacktrace + new_b.backtracked_for.dup.each { |btt| + backtrace(btt.expr, btt.address, + :only_upto => block.list.last.address, + :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret, + :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len, + :detached => btt.detached, :maxdepth => btt.maxdepth) + } + end + new_b + end + + # disassembles a new instruction block at block.address (must be normalized) + def disassemble_block(block) + raise if not block.list.empty? + di_addr = block.address + delay_slot = nil + di = nil + + # try not to run for too long + # loop usage: break if the block continues to the following instruction, else return + @disassemble_maxblocklength.times { + # check collision into a known block + break if @decoded[di_addr] + + # check self-modifying code + if @check_smc + #(-7...di.bin_length).each { |off| # uncomment to check for unaligned rewrites + waddr = di_addr #di_addr + off + each_xref(waddr, :w) { |x| + #next if off + x.len < 0 + puts "W: disasm: self-modifying code at #{Expression[waddr]}" if $VERBOSE + add_comment(di_addr, "overwritten by #{@decoded[x.origin]}") + @callback_selfmodifying[di_addr] if callback_selfmodifying + return + } + #} + end + + # decode instruction + block.edata.ptr = di_addr - block.address + block.edata_ptr + if not di = @cpu.decode_instruction(block.edata, di_addr) + ed = block.edata + break if ed.ptr >= ed.length and get_section_at(di_addr) and di = block.list.last + puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*')}"} at #{Expression[di_addr]}" if $VERBOSE + return + end + + @decoded[di_addr] = di + block.add_di di + puts di if $DEBUG + + if callback_newinstr + ndi = @callback_newinstr[di] + if not ndi or not ndi.block + block.list.delete di + if ndi + block.add_di ndi + ndi.bin_length = di.bin_length if ndi.bin_length == 0 + @decoded[di_addr] = ndi + end + end + di = ndi + end + return if not di + block = di.block + + di_addr = di.next_addr + + backtrace_xrefs_di_rw(di) + + if not di_addr or di.opcode.props[:stopexec] or not @program.get_xrefs_x(self, di).empty? + # do not backtrace until delay slot is finished (eg MIPS: di is a + # ret and the delay slot holds stack fixup needed to calc func_binding) + # XXX if the delay slot is also xref_x or :stopexec it is ignored + delay_slot ||= [di, @cpu.delay_slot(di)] + end + + if delay_slot + di, delay = delay_slot + if delay == 0 or not di_addr + backtrace_xrefs_di_x(di) + if di.opcode.props[:stopexec] or not di_addr; return + else break + end + end + delay_slot[1] = delay - 1 + end + } + + ar = [di_addr] + ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr + ar.each { |di_addr_| backtrace(di_addr_, di.address, :origin => di.address, :type => :x) } + + block + end + + # retrieve the list of execution crossrefs due to the decodedinstruction + # returns a list of symbolic expressions + def get_xrefs_x(di) + @program.get_xrefs_x(self, di) + end + + # retrieve the list of data r/w crossrefs due to the decodedinstruction + # returns a list of [type, symbolic expression, length] + def get_xrefs_rw(di) + @program.get_xrefs_rw(self, di) + end + + # disassembles_fast from a list of entrypoints, also dasm subfunctions + def disassemble_fast_deep(*entrypoints) + @entrypoints ||= [] + @entrypoints |= entrypoints + + entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) } + + @callback_finished[] if callback_finished + end + + def do_disassemble_fast_deep(ep) + disassemble_fast(ep) { |fa, di| + fa = normalize(fa) + do_disassemble_fast_deep(fa) + if di and ndi = di_at(fa) + ndi.block.add_from_normal(di.address) + end + } + end + + # disassembles fast from a list of entrypoints + # see disassemble_fast_step + def disassemble_fast(entrypoint, maxdepth=-1, &b) + ep = [entrypoint] + until ep.empty? + disassemble_fast_step(ep, &b) + maxdepth -= 1 + ep.delete_if { |a| not @decoded[normalize(a[0])] } if maxdepth == 0 + end + check_noreturn_function(entrypoint) + end + + # disassembles one block from the ary, see disassemble_fast_block + def disassemble_fast_step(todo, &b) + return if not x = todo.pop + addr, from, from_subfuncret = x + + addr = normalize(addr) + + if di = @decoded[addr] + if di.kind_of? DecodedInstruction + split_block(di.block, di.address) if not di.block_head? + di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default + end + elsif s = get_section_at(addr) + block = InstructionBlock.new(normalize(addr), s[0]) + block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default + todo.concat disassemble_fast_block(block, &b) + elsif name = Expression[addr].reduce_rec and name.kind_of? ::String and not @function[addr] + if c_parser and s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function + @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s) + detect_function_thunk_noreturn(from) if @function[addr].noreturn + elsif @function[:default] + @function[addr] = @function[:default].dup + end + end + + disassemble_fast_checkfunc(addr) + end + + # check if an addr has an xref :x from a :saveip, if so mark as Function + def disassemble_fast_checkfunc(addr) + if @decoded[addr].kind_of? DecodedInstruction and not @function[addr] + func = false + each_xref(addr, :x) { |x_| + func = true if odi = di_at(x_.origin) and odi.opcode.props[:saveip] + } + if func + auto_label_at(addr, 'sub', 'loc', 'xref') + @function[addr] = (@function[:default] || DecodedFunction.new).dup + @function[addr].finalized = true + detect_function_thunk(addr) + puts "found new function #{get_label_at(addr)} at #{Expression[addr]}" if $VERBOSE + end + end + end + + # disassembles fast a new instruction block at block.address (must be normalized) + # does not recurse into subfunctions + # assumes all :saveip returns, except those pointing to a subfunc with noreturn + # yields subfunction addresses (targets of :saveip) + # no backtrace for :x (change with backtrace_maxblocks_fast) + # returns a todo-style ary + # assumes @addrs_todo is empty + def disassemble_fast_block(block, &b) + block = InstructionBlock.new(normalize(block), get_section_at(block)[0]) if not block.kind_of? InstructionBlock + di_addr = block.address + delay_slot = nil + di = nil + ret = [] + + return ret if @decoded[di_addr] + + @disassemble_maxblocklength.times { + break if @decoded[di_addr] + + # decode instruction + block.edata.ptr = di_addr - block.address + block.edata_ptr + if not di = @cpu.decode_instruction(block.edata, di_addr) + break if block.edata.ptr >= block.edata.length and get_section_at(di_addr) and di = block.list.last + return ret + end + + @decoded[di_addr] = di + block.add_di di + puts di if $DEBUG + + if callback_newinstr + ndi = @callback_newinstr[di] + if not ndi or not ndi.block + block.list.delete di + if ndi + block.add_di ndi + ndi.bin_length = di.bin_length if ndi.bin_length == 0 + @decoded[di_addr] = ndi + end + end + di = ndi + end + return ret if not di + + di_addr = di.next_addr + + if di.opcode.props[:stopexec] or di.opcode.props[:setip] + if di.opcode.props[:setip] + @addrs_todo = [] + ar = @program.get_xrefs_x(self, di) + ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr + ar.each { |expr| + backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast) + } + end + if di.opcode.props[:saveip] + @addrs_todo = [] + ret.concat disassemble_fast_block_subfunc(di, &b) + else + ret.concat @addrs_todo + @addrs_todo = [] + end + delay_slot ||= [di, @cpu.delay_slot(di)] + end + + if delay_slot + if delay_slot[1] <= 0 + return ret if delay_slot[0].opcode.props[:stopexec] + break + end + delay_slot[1] -= 1 + end + } + + ar = [di_addr] + ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr + ar.each { |a| + di.block.add_to_normal(a) + ret << [a, di.address] + } + ret + end + + # handles when disassemble_fast encounters a call to a subfunction + def disassemble_fast_block_subfunc(di) + funcs = di.block.to_normal.to_a + do_ret = funcs.empty? + ret = [] + na = di.next_addr + di.bin_length * @cpu.delay_slot(di) + funcs.each { |fa| + fa = normalize(fa) + disassemble_fast_checkfunc(fa) + yield fa, di if block_given? + if f = @function[fa] and bf = f.get_backtracked_for(self, fa, di.address) and not bf.empty? + # this includes retaddr unless f is noreturn + bf.each { |btt| + next if btt.type != :x + bt = backtrace(btt.expr, di.address, :include_start => true, :origin => btt.origin, :maxdepth => [@backtrace_maxblocks_fast, 1].max) + if btt.detached + ret.concat bt # callback argument + elsif bt.find { |a| normalize(a) == na } + do_ret = true + end + } + elsif not f or not f.noreturn + do_ret = true + end + } + if do_ret + di.block.add_to_subfuncret(na) + ret << [na, di.address, true] + di.block.add_to_normal :default if not di.block.to_normal and @function[:default] + end + ret + end + + # trace whose xrefs this di is responsible of + def backtrace_xrefs_di_rw(di) + get_xrefs_rw(di).each { |type, ptr, len| + backtrace(ptr, di.address, :origin => di.address, :type => type, :len => len).each { |xaddr| + next if xaddr == Expression::Unknown + if @check_smc and type == :w + #len.times { |off| # check unaligned ? + waddr = xaddr #+ off + if wdi = di_at(waddr) + puts "W: disasm: #{di} overwrites #{wdi}" if $VERBOSE + wdi.add_comment "overwritten by #{di}" + end + #} + end + } + } + end + + # trace xrefs for execution + def backtrace_xrefs_di_x(di) + ar = @program.get_xrefs_x(self, di) + ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr + ar.each { |expr| backtrace(expr, di.address, :origin => di.address, :type => :x) } + end + + # checks if the function starting at funcaddr is an external function thunk (eg jmp [SomeExtFunc]) + # the argument must be the address of a decodedinstruction that is the first of a function, + # which must not have return_addresses + # returns the new thunk name if it was changed + def detect_function_thunk(funcaddr) + # check thunk linearity (no conditionnal branch etc) + addr = funcaddr + count = 0 + while b = block_at(addr) + count += 1 + return if count > 5 or b.list.length > 5 + if b.to_subfuncret and not b.to_subfuncret.empty? + return if b.to_subfuncret.length != 1 + addr = normalize(b.to_subfuncret.first) + return if not b.to_normal or b.to_normal.length != 1 + # check that the subfunction is simple (eg get_eip) + return if not sf = @function[normalize(b.to_normal.first)] + return if not btb = sf.backtrace_binding + btb = btb.dup + btb.delete_if { |k, v| Expression[k] == Expression[v] } + return if btb.length > 2 or btb.values.include? Expression::Unknown + else + return if not bt = b.to_normal + if bt.include? :default + addr = :default + break + elsif bt.length != 1 + return + end + addr = normalize(bt.first) + end + end + fname = Expression[addr].reduce_rec + if funcaddr != addr and f = @function[funcaddr] + # forward get_backtrace_binding to target + f.backtrace_binding = { :thunk => addr } + f.noreturn = true if @function[addr] and @function[addr].noreturn + end + return if not fname.kind_of? ::String + l = auto_label_at(funcaddr, 'sub', 'loc') + return if l[0, 4] != 'sub_' + puts "found thunk for #{fname} at #{Expression[funcaddr]}" if $DEBUG + rename_label(l, @program.new_label("thunk_#{fname}")) + end + + # this is called when reaching a noreturn function call, with the call address + # it is responsible for detecting the actual 'call' instruction leading to this + # noreturn function, and eventually mark the call target as a thunk + def detect_function_thunk_noreturn(addr) + 5.times { + return if not di = di_at(addr) + if di.opcode.props[:saveip] and not di.block.to_subfuncret + if di.block.to_normal.to_a.length == 1 + taddr = normalize(di.block.to_normal.first) + if di_at(taddr) + @function[taddr] ||= DecodedFunction.new + return detect_function_thunk(taddr) + end + end + break + else + from = di.block.from_normal.to_a + di.block.from_subfuncret.to_a + if from.length == 1 + addr = from.first + else break + end + end + } + end + + # given an address, detect if it may be a noreturn fuction + # it is if all its end blocks are calls to noreturn functions + # if it is, create a @function[fa] with noreturn = true + # should only be called with fa = target of a call + def check_noreturn_function(fa) + fb = function_blocks(fa, false, false) + lasts = fb.keys.find_all { |k| fb[k] == [] } + return if lasts.empty? + if lasts.all? { |la| + b = block_at(la) + next if not di = b.list.last + (di.opcode.props[:saveip] and b.to_normal.to_a.all? { |tfa| + tf = function_at(tfa) and tf.noreturn + }) or (di.opcode.props[:stopexec] and not di.opcode.props[:setip]) + } + # yay + @function[fa] ||= DecodedFunction.new + @function[fa].noreturn = true + end + end + + + # walks the backtrace tree from an address, passing along an object + # + # the steps are (1st = event, followed by hash keys) + # + # for each decoded instruction encountered: + # :di :di + # + # when backtracking to a block through a decodedfunction: + # (yield for each of the block's subfunctions) + # (the decodedinstruction responsible for the call will be yield next) + # :func :func, :funcaddr, :addr, :depth + # + # when jumping from one block to another (excluding :loop): # XXX include :loops ? + # :up :from, :to, :sfret + # + # when the backtrack has nothing to backtrack to (eg program entrypoint): + # :end :addr + # + # when the backtrack stops by taking too long to complete: + # :maxdepth :addr + # + # when the backtrack stops for encountering the specified stop address: + # :stopaddr :addr + # + # when rebacktracking a block already seen in the current branch: + # (looptrace is an array of [obj, block end addr, from_subfuncret], from oldest to newest) + # :loop :looptrace + # + # when the address does not match a known instruction/function: + # :unknown_addr :addr + # + # the block return value is used as follow for :di, :func, :up and :loop: + # false => the backtrace stops for the branch + # nil => the backtrace continues with the current object + # anything else => the backtrace continues with this object + # + # method arguments: + # obj is the initial value of the object + # addr is the address where the backtrace starts + # include_start is a bool specifying if the backtrace should start at addr or just before + # from_subfuncret is a bool specifying if addr points to a decodedinstruction that calls a subfunction + # stopaddr is an [array of] address of instruction, the backtrace will stop just after executing it + # maxdepth is the maximum depth (in blocks) for each backtrace branch. + # (defaults to dasm.backtrace_maxblocks, which defaults do Dasm.backtrace_maxblocks) + def backtrace_walk(obj, addr, include_start, from_subfuncret, stopaddr, maxdepth) + start_addr = normalize(addr) + stopaddr = [stopaddr] if stopaddr and not stopaddr.kind_of? ::Array + + # array of [obj, addr, from_subfuncret, loopdetect] + # loopdetect is an array of [obj, addr, from_type] of each end of block encountered + todo = [] + + # array of [obj, blockaddr] + # avoids rewalking the same value + done = [] + + # updates todo with the addresses to backtrace next + walk_up = lambda { |w_obj, w_addr, w_loopdetect| + if w_loopdetect.length > maxdepth + yield :maxdepth, w_obj, :addr => w_addr, :loopdetect => w_loopdetect + elsif stopaddr and stopaddr.include?(w_addr) + yield :stopaddr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect + elsif w_di = @decoded[w_addr] and w_di != w_di.block.list.first and w_di.address != w_di.block.address + prevdi = w_di.block.list[w_di.block.list.index(w_di)-1] + todo << [w_obj, prevdi.address, :normal, w_loopdetect] + elsif w_di + next if done.include? [w_obj, w_addr] + done << [w_obj, w_addr] + hadsomething = false + w_di.block.each_from { |f_addr, f_type| + next if f_type == :indirect + hadsomething = true + o_f_addr = f_addr + f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot + if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == f_addr and l_type == f_type } + f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect) + if f_obj and f_obj != w_obj # should avoid infinite loops + f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)] + end + else + f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => f_type, :loopdetect => w_loopdetect, :real_to => o_f_addr) + end + next if f_obj == false + f_obj ||= w_obj + f_loopdetect ||= w_loopdetect + # only count non-trivial paths in loopdetect (ignore linear links) + add_detect = [[f_obj, f_addr, f_type]] + add_detect = [] if @decoded[f_addr].kind_of? DecodedInstruction and tmp = @decoded[f_addr].block and + ((w_di.block.from_subfuncret.to_a == [] and w_di.block.from_normal == [f_addr] and + tmp.to_normal == [w_di.address] and tmp.to_subfuncret.to_a == []) or + (w_di.block.from_subfuncret == [f_addr] and tmp.to_subfuncret == [w_di.address])) + todo << [f_obj, f_addr, f_type, f_loopdetect + add_detect ] + } + yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if not hadsomething + elsif @function[w_addr] and w_addr != :default and w_addr != Expression::Unknown + next if done.include? [w_obj, w_addr] + oldlen = todo.length + each_xref(w_addr, :x) { |x| + f_addr = x.origin + o_f_addr = f_addr + f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot + if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == w_addr } + f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect) + if f_obj and f_obj != w_obj + f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)] + end + else + f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => :normal, :loopdetect => w_loopdetect, :real_to => o_f_addr) + end + next if f_obj == false + f_obj ||= w_obj + f_loopdetect ||= w_loopdetect + todo << [f_obj, f_addr, :normal, f_loopdetect + [[f_obj, f_addr, :normal]] ] + } + yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if todo.length == oldlen + else + yield :unknown_addr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect + end + } + + if include_start + todo << [obj, start_addr, from_subfuncret ? :subfuncret : :normal, []] + else + walk_up[obj, start_addr, []] + end + + while not todo.empty? + obj, addr, type, loopdetect = todo.pop + di = @decoded[addr] + if di and type == :subfuncret + di.block.each_to_normal { |sf| + next if not f = @function[normalize(sf)] + s_obj = yield(:func, obj, :func => f, :funcaddr => sf, :addr => addr, :loopdetect => loopdetect) + next if s_obj == false + s_obj ||= obj + if l = loopdetect.find { |l_obj, l_addr, l_type| addr == l_addr and l_type == :normal } + l_obj = yield(:loop, s_obj, :looptrace => loopdetect[loopdetect.index(l)..-1], :loopdetect => loopdetect) + if l_obj and l_obj != s_obj + s_loopdetect = loopdetect[0...loopdetect.index(l)] + end + next if l_obj == false + s_obj = l_obj if l_obj + end + s_loopdetect ||= loopdetect + todo << [s_obj, addr, :normal, s_loopdetect + [[s_obj, addr, :normal]] ] + } + elsif di + # XXX should interpolate index if di is not in block.list, but what if the addresses are not Comparable ? + di.block.list[0..(di.block.list.index(di) || -1)].reverse_each { |di_| + di = di_ # XXX not sure.. + if stopaddr and ea = di.next_addr and stopaddr.include?(ea) + yield :stopaddr, obj, :addr => ea, :loopdetect => loopdetect + break + end + ex_obj = obj + obj = yield(:di, obj, :di => di, :loopdetect => loopdetect) + break if obj == false + obj ||= ex_obj + } + walk_up[obj, di.block.address, loopdetect] if obj + elsif @function[addr] and addr != :default and addr != Expression::Unknown + ex_obj = obj + obj = yield(:func, obj, :func => @function[addr], :funcaddr => addr, :addr => addr, :loopdetect => loopdetect) + next if obj == false + obj ||= ex_obj + walk_up[obj, addr, loopdetect] + else + yield :unknown_addr, obj, :addr => addr, :loopdetect => loopdetect + end + end + end + + # iterates over all instructions of a function from a given entrypoint + # carries an object while walking, the object is yielded every instruction + # every block is walked only once, after all previous blocks are done (if possible) + # on a 'jz', a [:clone] event is yielded for every path beside the first + # on a juction (eg a -> b -> d, a -> c -> d), a [:merge] event occurs if froms have different objs + # event list: + # [:di, , , ] + # [:clone, , , ] + # [:merge, , { => , => , ...}, ] + # [:subfunc, , , ] + # all events should return an object + # :merge has a copy of object1 at the end so that uninterested callers can always return args[-1] + # if an event returns false, the trace stops for the current branch + def function_walk(addr_start, obj_start) + # addresses of instrs already seen => obj + done = {} + todo = [[addr_start, obj_start]] + + while hop = todo.pop + addr, obj = hop + next if done.has_key?(done) + + di = di_at(addr) + next if not di + + if done.empty? + dilist = di.block.list[di.block.list.index(di)..-1] + else + # new block, check all 'from' have been seen + if not hop[2] + # may retry later + all_ok = true + di.block.each_from_samefunc(self) { |fa| all_ok = false unless done.has_key?(fa) } + if not all_ok + todo.unshift([addr, obj, true]) + next + end + end + + froms = {} + di.block.each_from_samefunc(self) { |fa| froms[fa] = done[fa] if done[fa] } + if froms.values.uniq.length > 1 + obj = yield([:merge, addr, froms, froms.values.first]) + next if obj == false + end + + dilist = di.block.list + end + + if dilist.each { |_di| + break if done.has_key?(_di.address) # looped back into addr_start + done[_di.address] = obj + obj = yield([:di, _di.address, _di, obj]) + break if obj == false # also return false for the previous 'if' + } + + from = dilist.last.address + + if di.block.to_normal and di.block.to_normal[0] and + di.block.to_subfuncret and di.block.to_subfuncret[0] + # current instruction block calls into a subfunction + obj = di.block.to_normal.map { |subf| + yield([:subfunc, subf, from, obj]) + }.first # propagate 1st subfunc result + next if obj == false + end + + wantclone = false + di.block.each_to_samefunc(self) { |ta| + if wantclone + nobj = yield([:clone, ta, from, obj]) + next if obj == false + todo << [ta, nobj] + else + todo << [ta, obj] + wantclone = true + end + } + end + end + end + + # holds a backtrace result until a snapshot_addr is encountered + class StoppedExpr + attr_accessor :exprs + def initialize(e) @exprs = e end + end + + + attr_accessor :debug_backtrace + + # backtraces the value of an expression from start_addr + # updates blocks backtracked_for if type is set + # uses backtrace_walk + # all values returned are from backtrace_check_found (which may generate xrefs, labels, addrs to dasm) unless :no_check is specified + # options: + # :include_start => start backtracking including start_addr + # :from_subfuncret => + # :origin => origin to set for xrefs when resolution is successful + # :orig_expr => initial expression + # :type => xref type (:r, :w, :x, :addr) when :x, the results are added to #addrs_todo + # :len => xref len (for :r/:w) + # :snapshot_addr => addr (or array of) where the backtracker should stop + # if a snapshot_addr is given, values found are ignored if continuing the backtrace does not get to it (eg maxdepth/unk_addr/end) + # :maxdepth => maximum number of blocks to backtrace + # :detached => true if backtracking type :x and the result should not have from = origin set in @addrs_todo + # :max_complexity{_data} => maximum complexity of the expression before aborting its backtrace + # :log => Array, will be updated with the backtrace evolution + # :only_upto => backtrace only to update bt_for for current block & previous ending at only_upto + # :no_check => don't use backtrace_check_found (will not backtrace indirection static values) + # :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals) (only supported with no_check) + def backtrace(expr, start_addr, nargs={}) + include_start = nargs.delete :include_start + from_subfuncret = nargs.delete :from_subfuncret + origin = nargs.delete :origin + origexpr = nargs.delete :orig_expr + type = nargs.delete :type + len = nargs.delete :len + snapshot_addr = nargs.delete(:snapshot_addr) || nargs.delete(:stopaddr) + maxdepth = nargs.delete(:maxdepth) || @backtrace_maxblocks + detached = nargs.delete :detached + max_complexity = nargs.delete(:max_complexity) || @backtrace_maxcomplexity + max_complexity_data = nargs.delete(:max_complexity) || @backtrace_maxcomplexity_data + bt_log = nargs.delete :log # array to receive the ongoing backtrace info + only_upto = nargs.delete :only_upto + no_check = nargs.delete :no_check + terminals = nargs.delete(:terminals) || [] + raise ArgumentError, "invalid argument to backtrace #{nargs.keys.inspect}" if not nargs.empty? + + expr = Expression[expr] + + origexpr = expr if origin == start_addr + + start_addr = normalize(start_addr) + di = @decoded[start_addr] + + if not snapshot_addr and @cpu.backtrace_is_stack_address(expr) puts " not backtracking stack address #{expr}" if debug_backtrace - return [] - end + return [] + end - if type == :r or type == :w - max_complexity = max_complexity_data - maxdepth = @backtrace_maxblocks_data if backtrace_maxblocks_data and maxdepth > @backtrace_maxblocks_data - end + if type == :r or type == :w + max_complexity = max_complexity_data + maxdepth = @backtrace_maxblocks_data if backtrace_maxblocks_data and maxdepth > @backtrace_maxblocks_data + end - if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr, - di, origin, type, len, maxdepth, detached, snapshot_addr)) - # no need to update backtracked_for - return vals - elsif maxdepth <= 0 - return [Expression::Unknown] - end + if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr, + di, origin, type, len, maxdepth, detached, snapshot_addr)) + # no need to update backtracked_for + return vals + elsif maxdepth <= 0 + return [Expression::Unknown] + end - # create initial backtracked_for - if type and origin == start_addr and di - btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-1) - btt.address = di.address - btt.exclude_instr = true if not include_start - btt.from_subfuncret = true if from_subfuncret and include_start - btt.detached = true if detached - di.block.backtracked_for |= [btt] - end + # create initial backtracked_for + if type and origin == start_addr and di + btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-1) + btt.address = di.address + btt.exclude_instr = true if not include_start + btt.from_subfuncret = true if from_subfuncret and include_start + btt.detached = true if detached + di.block.backtracked_for |= [btt] + end - @callback_prebacktrace[] if callback_prebacktrace + @callback_prebacktrace[] if callback_prebacktrace - # list of Expression/Integer - result = [] + # list of Expression/Integer + result = [] puts "backtracking #{type} #{expr} from #{di || Expression[start_addr || 0]} for #{@decoded[origin]}" if debug_backtrace or $DEBUG - bt_log << [:start, expr, start_addr] if bt_log - backtrace_walk(expr, start_addr, include_start, from_subfuncret, snapshot_addr, maxdepth) { |ev, expr_, h| - expr = expr_ - case ev - when :unknown_addr, :maxdepth + bt_log << [:start, expr, start_addr] if bt_log + backtrace_walk(expr, start_addr, include_start, from_subfuncret, snapshot_addr, maxdepth) { |ev, expr_, h| + expr = expr_ + case ev + when :unknown_addr, :maxdepth puts " backtrace end #{ev} #{expr}" if debug_backtrace - result |= [expr] if not snapshot_addr - @addrs_todo << [expr, (detached ? nil : origin)] if not snapshot_addr and type == :x and origin - when :end - if not expr.kind_of? StoppedExpr - oldexpr = expr - expr = backtrace_emu_blockup(h[:addr], expr) + result |= [expr] if not snapshot_addr + @addrs_todo << [expr, (detached ? nil : origin)] if not snapshot_addr and type == :x and origin + when :end + if not expr.kind_of? StoppedExpr + oldexpr = expr + expr = backtrace_emu_blockup(h[:addr], expr) puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace - bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr - if expr != oldexpr and not snapshot_addr and vals = (no_check ? - (!need_backtrace(expr, terminals) and [expr]) : - backtrace_check_found(expr, nil, origin, type, len, - maxdepth-h[:loopdetect].length, detached, snapshot_addr)) - result |= vals - next - end - end + bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr + if expr != oldexpr and not snapshot_addr and vals = (no_check ? + (!need_backtrace(expr, terminals) and [expr]) : + backtrace_check_found(expr, nil, origin, type, len, + maxdepth-h[:loopdetect].length, detached, snapshot_addr)) + result |= vals + next + end + end puts " backtrace end #{ev} #{expr}" if debug_backtrace - if not snapshot_addr - result |= [expr] + if not snapshot_addr + result |= [expr] - btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1) - btt.detached = true if detached - @decoded[h[:addr]].block.backtracked_for |= [btt] if @decoded[h[:addr]] - @function[h[:addr]].backtracked_for |= [btt] if @function[h[:addr]] and h[:addr] != :default - @addrs_todo << [expr, (detached ? nil : origin)] if type == :x and origin - end - when :stopaddr - if not expr.kind_of? StoppedExpr - oldexpr = expr - expr = backtrace_emu_blockup(h[:addr], expr) + btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1) + btt.detached = true if detached + @decoded[h[:addr]].block.backtracked_for |= [btt] if @decoded[h[:addr]] + @function[h[:addr]].backtracked_for |= [btt] if @function[h[:addr]] and h[:addr] != :default + @addrs_todo << [expr, (detached ? nil : origin)] if type == :x and origin + end + when :stopaddr + if not expr.kind_of? StoppedExpr + oldexpr = expr + expr = backtrace_emu_blockup(h[:addr], expr) puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace - bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr - end + bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr + end puts " backtrace end #{ev} #{expr}" if debug_backtrace - result |= ((expr.kind_of?(StoppedExpr)) ? expr.exprs : [expr]) - when :loop - next false if expr.kind_of? StoppedExpr - t = h[:looptrace] - oldexpr = t[0][0] - next false if expr == oldexpr # unmodifying loop + result |= ((expr.kind_of?(StoppedExpr)) ? expr.exprs : [expr]) + when :loop + next false if expr.kind_of? StoppedExpr + t = h[:looptrace] + oldexpr = t[0][0] + next false if expr == oldexpr # unmodifying loop puts " bt loop at #{Expression[t[0][1]]}: #{oldexpr} => #{expr} (#{t.map { |z| Expression[z[1]] }.join(' <- ')})" if debug_backtrace - false - when :up - next false if only_upto and h[:to] != only_upto - next expr if expr.kind_of? StoppedExpr - oldexpr = expr - expr = backtrace_emu_blockup(h[:from], expr) + false + when :up + next false if only_upto and h[:to] != only_upto + next expr if expr.kind_of? StoppedExpr + oldexpr = expr + expr = backtrace_emu_blockup(h[:from], expr) puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace - bt_log << [:up, expr, oldexpr, h[:from], h[:to]] if bt_log + bt_log << [:up, expr, oldexpr, h[:from], h[:to]] if bt_log - if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : - backtrace_check_found(expr, @decoded[h[:from]], origin, type, len, - maxdepth-h[:loopdetect].length, detached, snapshot_addr)) - if snapshot_addr - expr = StoppedExpr.new vals - next expr - else - result |= vals - bt_log << [:found, vals, h[:from]] if bt_log - next false - end - end + if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : + backtrace_check_found(expr, @decoded[h[:from]], origin, type, len, + maxdepth-h[:loopdetect].length, detached, snapshot_addr)) + if snapshot_addr + expr = StoppedExpr.new vals + next expr + else + result |= vals + bt_log << [:found, vals, h[:from]] if bt_log + next false + end + end - if origin and type - # update backtracked_for - update_btf = lambda { |btf, new_btt| - # returns true if btf was modified - if i = btf.index(new_btt) - btf[i] = new_btt if btf[i].maxdepth < new_btt.maxdepth - else - btf << new_btt - end - } + if origin and type + # update backtracked_for + update_btf = lambda { |btf, new_btt| + # returns true if btf was modified + if i = btf.index(new_btt) + btf[i] = new_btt if btf[i].maxdepth < new_btt.maxdepth + else + btf << new_btt + end + } - btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1) - btt.detached = true if detached - if x = di_at(h[:from]) - update_btf[x.block.backtracked_for, btt] - end - if x = @function[h[:from]] and h[:from] != :default - update_btf[x.backtracked_for, btt] - end - if x = di_at(h[:to]) - btt = btt.dup - btt.address = x.address - btt.from_subfuncret = true if h[:sfret] == :subfuncret - if backtrace_check_funcret(btt, h[:from], h[:real_to] || h[:to]) + btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1) + btt.detached = true if detached + if x = di_at(h[:from]) + update_btf[x.block.backtracked_for, btt] + end + if x = @function[h[:from]] and h[:from] != :default + update_btf[x.backtracked_for, btt] + end + if x = di_at(h[:to]) + btt = btt.dup + btt.address = x.address + btt.from_subfuncret = true if h[:sfret] == :subfuncret + if backtrace_check_funcret(btt, h[:from], h[:real_to] || h[:to]) puts " function returns to caller" if debug_backtrace - next false - end - if not update_btf[x.block.backtracked_for, btt] + next false + end + if not update_btf[x.block.backtracked_for, btt] puts " already backtraced" if debug_backtrace - next false - end - end - end - expr - when :di, :func - next if expr.kind_of? StoppedExpr - if not snapshot_addr and @cpu.backtrace_is_stack_address(expr) + next false + end + end + end + expr + when :di, :func + next if expr.kind_of? StoppedExpr + if not snapshot_addr and @cpu.backtrace_is_stack_address(expr) puts " not backtracking stack address #{expr}" if debug_backtrace - next false - end + next false + end oldexpr = expr - case ev - when :di - h[:addr] = h[:di].address - expr = backtrace_emu_instr(h[:di], expr) - bt_log << [ev, expr, oldexpr, h[:di], h[:addr]] if bt_log and expr != oldexpr - when :func - expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, origin, maxdepth-h[:loopdetect].length) - if snapshot_addr and snapshot_addr == h[:funcaddr] - # XXX recursiveness detection needs to be fixed + case ev + when :di + h[:addr] = h[:di].address + expr = backtrace_emu_instr(h[:di], expr) + bt_log << [ev, expr, oldexpr, h[:di], h[:addr]] if bt_log and expr != oldexpr + when :func + expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, origin, maxdepth-h[:loopdetect].length) + if snapshot_addr and snapshot_addr == h[:funcaddr] + # XXX recursiveness detection needs to be fixed puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_backtrace - next false - end - bt_log << [ev, expr, oldexpr, h[:funcaddr], h[:addr]] if bt_log and expr != oldexpr - end + next false + end + bt_log << [ev, expr, oldexpr, h[:funcaddr], h[:addr]] if bt_log and expr != oldexpr + end puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr - if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr, - h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached, snapshot_addr)) - if snapshot_addr - expr = StoppedExpr.new vals - else - result |= vals - bt_log << [:found, vals, h[:addr]] if bt_log - next false - end - elsif expr.complexity > max_complexity + if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr, + h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached, snapshot_addr)) + if snapshot_addr + expr = StoppedExpr.new vals + else + result |= vals + bt_log << [:found, vals, h[:addr]] if bt_log + next false + end + elsif expr.complexity > max_complexity puts " backtrace aborting, expr too complex" if debug_backtrace - next false - end - expr - else raise ev.inspect - end - } + next false + end + expr + else raise ev.inspect + end + } puts ' backtrace result: ' + result.map { |r| Expression[r] }.join(', ') if debug_backtrace - result - end + result + end - # checks if the BacktraceTrace is a call to a known subfunction - # returns true and updates self.addrs_todo - def backtrace_check_funcret(btt, funcaddr, instraddr) - if di = @decoded[instraddr] and @function[funcaddr] and btt.type == :x and - not btt.from_subfuncret and - @cpu.backtrace_is_function_return(btt.expr, @decoded[btt.origin]) and - retaddr = backtrace_emu_instr(di, btt.expr) and - not need_backtrace(retaddr) + # checks if the BacktraceTrace is a call to a known subfunction + # returns true and updates self.addrs_todo + def backtrace_check_funcret(btt, funcaddr, instraddr) + if di = @decoded[instraddr] and @function[funcaddr] and btt.type == :x and + not btt.from_subfuncret and + @cpu.backtrace_is_function_return(btt.expr, @decoded[btt.origin]) and + retaddr = backtrace_emu_instr(di, btt.expr) and + not need_backtrace(retaddr) puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if debug_backtrace - di.block.add_to_subfuncret normalize(retaddr) - if @decoded[funcaddr].kind_of? DecodedInstruction - # check that all callers :saveip returns (eg recursive call that was resolved - # before we found funcaddr was a function) - @decoded[funcaddr].block.each_from_normal { |fm| - if fdi = di_at(fm) and fdi.opcode.props[:saveip] and not fdi.block.to_subfuncret - backtrace_check_funcret(btt, funcaddr, fm) - end - } - end - if not @function[funcaddr].finalized - # the function is not fully disassembled: arrange for the retaddr to be - # disassembled only after the subfunction is finished - # for that we walk the code from the call, mark each block start, and insert the sfret - # just before the 1st function block address in @addrs_todo (which is pop()ed by dasm_step) - faddrlist = [] - todo = [] - di.block.each_to_normal { |t| todo << normalize(t) } - while a = todo.pop - next if faddrlist.include? a or not get_section_at(a) - faddrlist << a - if @decoded[a].kind_of? DecodedInstruction - @decoded[a].block.each_to_samefunc(self) { |t| todo << normalize(t) } - end - end + di.block.add_to_subfuncret normalize(retaddr) + if @decoded[funcaddr].kind_of? DecodedInstruction + # check that all callers :saveip returns (eg recursive call that was resolved + # before we found funcaddr was a function) + @decoded[funcaddr].block.each_from_normal { |fm| + if fdi = di_at(fm) and fdi.opcode.props[:saveip] and not fdi.block.to_subfuncret + backtrace_check_funcret(btt, funcaddr, fm) + end + } + end + if not @function[funcaddr].finalized + # the function is not fully disassembled: arrange for the retaddr to be + # disassembled only after the subfunction is finished + # for that we walk the code from the call, mark each block start, and insert the sfret + # just before the 1st function block address in @addrs_todo (which is pop()ed by dasm_step) + faddrlist = [] + todo = [] + di.block.each_to_normal { |t| todo << normalize(t) } + while a = todo.pop + next if faddrlist.include? a or not get_section_at(a) + faddrlist << a + if @decoded[a].kind_of? DecodedInstruction + @decoded[a].block.each_to_samefunc(self) { |t| todo << normalize(t) } + end + end - idx = @addrs_todo.index(@addrs_todo.find { |r, i, sfr| faddrlist.include? normalize(r) }) || -1 - @addrs_todo.insert(idx, [retaddr, instraddr, true]) - else - @addrs_todo << [retaddr, instraddr, true] - end - true - end - end + idx = @addrs_todo.index(@addrs_todo.find { |r, i, sfr| faddrlist.include? normalize(r) }) || -1 + @addrs_todo.insert(idx, [retaddr, instraddr, true]) + else + @addrs_todo << [retaddr, instraddr, true] + end + true + end + end - # applies one decodedinstruction to an expression - def backtrace_emu_instr(di, expr) - @cpu.backtrace_emu(di, expr) - end + # applies one decodedinstruction to an expression + def backtrace_emu_instr(di, expr) + @cpu.backtrace_emu(di, expr) + end - # applies one subfunction to an expression - def backtrace_emu_subfunc(func, funcaddr, calladdr, expr, origin, maxdepth) - bind = func.get_backtrace_binding(self, funcaddr, calladdr, expr, origin, maxdepth) - Expression[expr.bind(bind).reduce] - end + # applies one subfunction to an expression + def backtrace_emu_subfunc(func, funcaddr, calladdr, expr, origin, maxdepth) + bind = func.get_backtrace_binding(self, funcaddr, calladdr, expr, origin, maxdepth) + Expression[expr.bind(bind).reduce] + end - # applies a location binding - def backtrace_emu_blockup(addr, expr) - (ab = @address_binding[addr]) ? Expression[expr.bind(ab).reduce] : expr - end + # applies a location binding + def backtrace_emu_blockup(addr, expr) + (ab = @address_binding[addr]) ? Expression[expr.bind(ab).reduce] : expr + end - def backtrace_update_function_binding(addr, func=@function[addr], retaddrs=func.return_address) - @cpu.backtrace_update_function_binding(self, addr, func, retaddrs) - end + def backtrace_update_function_binding(addr, func=@function[addr], retaddrs=func.return_address) + @cpu.backtrace_update_function_binding(self, addr, func, retaddrs) + end - # static resolution of indirections - def resolve(expr) - binding = Expression[expr].expr_indirections.inject(@old_prog_binding) { |binding_, ind| - e = get_edata_at(resolve(ind.target)) - return expr if not e - binding_.merge ind => Expression[ e.decode_imm("u#{8*ind.len}".to_sym, @cpu.endianness) ] - } - Expression[expr].bind(binding).reduce - end + # static resolution of indirections + def resolve(expr) + binding = Expression[expr].expr_indirections.inject(@old_prog_binding) { |binding_, ind| + e = get_edata_at(resolve(ind.target)) + return expr if not e + binding_.merge ind => Expression[ e.decode_imm("u#{8*ind.len}".to_sym, @cpu.endianness) ] + } + Expression[expr].bind(binding).reduce + end - # returns true if the expression needs more backtrace - # it checks for the presence of a symbol (not :unknown), which means it depends on some register value - def need_backtrace(expr, terminals=[]) - return if expr.kind_of? ::Integer - !(expr.externals.grep(::Symbol) - [:unknown] - terminals).empty? - end + # returns true if the expression needs more backtrace + # it checks for the presence of a symbol (not :unknown), which means it depends on some register value + def need_backtrace(expr, terminals=[]) + return if expr.kind_of? ::Integer + !(expr.externals.grep(::Symbol) - [:unknown] - terminals).empty? + end - # returns an array of expressions, or nil if expr needs more backtrace - # it needs more backtrace if expr.externals include a Symbol != :unknown (symbol == register value) - # if it need no more backtrace, expr's indirections are recursively resolved - # xrefs are created, and di args are updated (immediate => label) - # if type is :x, addrs_todo is updated, and if di starts a block, expr is checked to see if it may be a subfunction return value - # - # expr indirection are solved by first finding the value of the pointer, and then rebacktracking for write-type access - # detached is true if type is :x and from should not be set in addrs_todo (indirect call flow, eg external function callback) - # if the backtrace ends pre entrypoint, returns the value encoded in the raw binary - # XXX global variable (modified by another function), exported data, multithreaded app.. - # TODO handle memory aliasing (mov ebx, eax ; write [ebx] ; read [eax]) - # TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names - # TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX - # eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4 - def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached, snapshot_addr=nil) - # only entrypoints or block starts called by a :saveip are checked for being a function - # want to execute [esp] from a block start - if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and ( - # which is an entrypoint.. - (not di.block.from_normal and not di.block.from_subfuncret) or - # ..or called from a saveip - (bool = false ; di.block.each_from_normal { |fn| bool = true if @decoded[fn] and @decoded[fn].opcode.props[:saveip] } ; bool)) + # returns an array of expressions, or nil if expr needs more backtrace + # it needs more backtrace if expr.externals include a Symbol != :unknown (symbol == register value) + # if it need no more backtrace, expr's indirections are recursively resolved + # xrefs are created, and di args are updated (immediate => label) + # if type is :x, addrs_todo is updated, and if di starts a block, expr is checked to see if it may be a subfunction return value + # + # expr indirection are solved by first finding the value of the pointer, and then rebacktracking for write-type access + # detached is true if type is :x and from should not be set in addrs_todo (indirect call flow, eg external function callback) + # if the backtrace ends pre entrypoint, returns the value encoded in the raw binary + # XXX global variable (modified by another function), exported data, multithreaded app.. + # TODO handle memory aliasing (mov ebx, eax ; write [ebx] ; read [eax]) + # TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names + # TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX + # eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4 + def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached, snapshot_addr=nil) + # only entrypoints or block starts called by a :saveip are checked for being a function + # want to execute [esp] from a block start + if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and ( + # which is an entrypoint.. + (not di.block.from_normal and not di.block.from_subfuncret) or + # ..or called from a saveip + (bool = false ; di.block.each_from_normal { |fn| bool = true if @decoded[fn] and @decoded[fn].opcode.props[:saveip] } ; bool)) - # now we can mark the current address a function start - # the actual return address will be found later (we tell the caller to continue the backtrace) - addr = di.address - l = auto_label_at(addr, 'sub', 'loc', 'xref') - if not f = @function[addr] - f = @function[addr] = DecodedFunction.new - puts "found new function #{l} at #{Expression[addr]}" if $VERBOSE - end - f.finalized = false + # now we can mark the current address a function start + # the actual return address will be found later (we tell the caller to continue the backtrace) + addr = di.address + l = auto_label_at(addr, 'sub', 'loc', 'xref') + if not f = @function[addr] + f = @function[addr] = DecodedFunction.new + puts "found new function #{l} at #{Expression[addr]}" if $VERBOSE + end + f.finalized = false - if @decoded[origin] - f.return_address ||= [] - f.return_address |= [origin] - @decoded[origin].add_comment "endsub #{l}" - # TODO add_xref (to update the comment on rename_label) - end + if @decoded[origin] + f.return_address ||= [] + f.return_address |= [origin] + @decoded[origin].add_comment "endsub #{l}" + # TODO add_xref (to update the comment on rename_label) + end - f.backtracked_for |= @decoded[addr].block.backtracked_for.find_all { |btt| not btt.address } - end + f.backtracked_for |= @decoded[addr].block.backtracked_for.find_all { |btt| not btt.address } + end - return if need_backtrace(expr) - if snapshot_addr - return if expr.expr_externals(true).find { |ee| ee.kind_of?(Indirection) } - end + return if need_backtrace(expr) + if snapshot_addr + return if expr.expr_externals(true).find { |ee| ee.kind_of?(Indirection) } + end puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expression[origin] if origin}" if debug_backtrace - result = backtrace_value(expr, maxdepth) - # keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this) - #result << expr if not type # XXX returning multiple values for nothing is too confusing, TODO fix decompiler - result.uniq! + result = backtrace_value(expr, maxdepth) + # keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this) + #result << expr if not type # XXX returning multiple values for nothing is too confusing, TODO fix decompiler + result.uniq! - # create xrefs/labels - result.each { |e| - backtrace_found_result(e, di, type, origin, len, detached) - } if type and origin + # create xrefs/labels + result.each { |e| + backtrace_found_result(e, di, type, origin, len, detached) + } if type and origin - result - end + result + end - # returns an array of expressions with Indirections resolved (recursive with backtrace_indirection) - def backtrace_value(expr, maxdepth) - # array of expression with all indirections resolved - result = [Expression[expr.reduce]] + # returns an array of expressions with Indirections resolved (recursive with backtrace_indirection) + def backtrace_value(expr, maxdepth) + # array of expression with all indirections resolved + result = [Expression[expr.reduce]] - # solve each indirection sequentially, clone expr for each value (aka cross-product) - result.first.expr_indirections.uniq.each { |i| - next_result = [] - backtrace_indirection(i, maxdepth).each { |rr| - next_result |= result.map { |e| Expression[e.bind(i => rr).reduce] } - } - result = next_result - } + # solve each indirection sequentially, clone expr for each value (aka cross-product) + result.first.expr_indirections.uniq.each { |i| + next_result = [] + backtrace_indirection(i, maxdepth).each { |rr| + next_result |= result.map { |e| Expression[e.bind(i => rr).reduce] } + } + result = next_result + } - result.uniq - end + result.uniq + end - # returns the array of values pointed by the indirection at its invocation (ind.origin) - # first resolves the pointer using backtrace_value, if it does not point in edata keep the original pointer - # then backtraces from ind.origin until it finds an :w xref origin - # if no :w access is found, returns the value encoded in the raw section data - # TODO handle unaligned (partial?) writes - def backtrace_indirection(ind, maxdepth) - if not ind.origin - puts "backtrace_ind: no origin for #{ind}" if $VERBOSE - return [ind] - end + # returns the array of values pointed by the indirection at its invocation (ind.origin) + # first resolves the pointer using backtrace_value, if it does not point in edata keep the original pointer + # then backtraces from ind.origin until it finds an :w xref origin + # if no :w access is found, returns the value encoded in the raw section data + # TODO handle unaligned (partial?) writes + def backtrace_indirection(ind, maxdepth) + if not ind.origin + puts "backtrace_ind: no origin for #{ind}" if $VERBOSE + return [ind] + end - ret = [] + ret = [] - decode_imm = lambda { |addr, len| - edata = get_edata_at(addr) - if edata - Expression[ edata.decode_imm("u#{8*len}".to_sym, @cpu.endianness) ] - else - Expression::Unknown - end - } + decode_imm = lambda { |addr, len| + edata = get_edata_at(addr) + if edata + Expression[ edata.decode_imm("u#{8*len}".to_sym, @cpu.endianness) ] + else + Expression::Unknown + end + } - # resolve pointers (they may include Indirections) - backtrace_value(ind.target, maxdepth).each { |ptr| - # find write xrefs to the ptr - refs = [] - each_xref(ptr, :w) { |x| - # XXX should be rebacktracked on new xref - next if not @decoded[x.origin] - refs |= [x.origin] - } if ptr != Expression::Unknown + # resolve pointers (they may include Indirections) + backtrace_value(ind.target, maxdepth).each { |ptr| + # find write xrefs to the ptr + refs = [] + each_xref(ptr, :w) { |x| + # XXX should be rebacktracked on new xref + next if not @decoded[x.origin] + refs |= [x.origin] + } if ptr != Expression::Unknown - if refs.empty? - if get_section_at(ptr) - # static data, newer written : return encoded value - ret |= [decode_imm[ptr, ind.len]] - next - else - # unknown pointer : backtrace the indirection, hope it solves itself - initval = ind - end - else - # wait until we find a write xref, then backtrace the written value - initval = true - end + if refs.empty? + if get_section_at(ptr) + # static data, newer written : return encoded value + ret |= [decode_imm[ptr, ind.len]] + next + else + # unknown pointer : backtrace the indirection, hope it solves itself + initval = ind + end + else + # wait until we find a write xref, then backtrace the written value + initval = true + end - # wait until we arrive at an xref'ing instruction, then backtrace the written value - backtrace_walk(initval, ind.origin, true, false, nil, maxdepth-1) { |ev, expr, h| - case ev - when :unknown_addr, :maxdepth, :stopaddr + # wait until we arrive at an xref'ing instruction, then backtrace the written value + backtrace_walk(initval, ind.origin, true, false, nil, maxdepth-1) { |ev, expr, h| + case ev + when :unknown_addr, :maxdepth, :stopaddr puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtrace - ret |= [Expression::Unknown] - when :end - if not refs.empty? and (expr == true or not need_backtrace(expr)) - if expr == true - # found a path avoiding the :w xrefs, read the encoded initial value - ret |= [decode_imm[ptr, ind.len]] - else - bd = expr.expr_indirections.inject({}) { |h_, i| h_.update i => decode_imm[i.target, i.len] } - ret |= [Expression[expr.bind(bd).reduce]] - end - else - # unknown pointer, backtrace did not resolve... - ret |= [Expression::Unknown] - end - when :di - di = h[:di] - if expr == true - next true if not refs.include? di.address - # find the expression to backtrace: assume this is the :w xref from this di - writes = get_xrefs_rw(di) - writes = writes.find_all { |x_type, x_ptr, x_len| x_type == :w and x_len == ind.len } - if writes.length != 1 - puts "backtrace_ind: incompatible xrefs to #{ptr} from #{di}" if $DEBUG - ret |= [Expression::Unknown] - next false - end - expr = Indirection.new(writes[0][1], ind.len, di.address) - end - expr = backtrace_emu_instr(di, expr) - # may have new indirections... recall bt_value ? - #if not need_backtrace(expr) - if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty? - ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length) - false - else - expr - end - when :func - next true if expr == true # XXX - expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, ind.origin, maxdepth-h[:loopdetect].length) - #if not need_backtrace(expr) - if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty? - ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length) - false - else - expr - end - end - } - } + ret |= [Expression::Unknown] + when :end + if not refs.empty? and (expr == true or not need_backtrace(expr)) + if expr == true + # found a path avoiding the :w xrefs, read the encoded initial value + ret |= [decode_imm[ptr, ind.len]] + else + bd = expr.expr_indirections.inject({}) { |h_, i| h_.update i => decode_imm[i.target, i.len] } + ret |= [Expression[expr.bind(bd).reduce]] + end + else + # unknown pointer, backtrace did not resolve... + ret |= [Expression::Unknown] + end + when :di + di = h[:di] + if expr == true + next true if not refs.include? di.address + # find the expression to backtrace: assume this is the :w xref from this di + writes = get_xrefs_rw(di) + writes = writes.find_all { |x_type, x_ptr, x_len| x_type == :w and x_len == ind.len } + if writes.length != 1 + puts "backtrace_ind: incompatible xrefs to #{ptr} from #{di}" if $DEBUG + ret |= [Expression::Unknown] + next false + end + expr = Indirection.new(writes[0][1], ind.len, di.address) + end + expr = backtrace_emu_instr(di, expr) + # may have new indirections... recall bt_value ? + #if not need_backtrace(expr) + if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty? + ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length) + false + else + expr + end + when :func + next true if expr == true # XXX + expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, ind.origin, maxdepth-h[:loopdetect].length) + #if not need_backtrace(expr) + if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty? + ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length) + false + else + expr + end + end + } + } - ret - end + ret + end - # creates xrefs, updates addrs_todo, updates instr args - def backtrace_found_result(expr, di, type, origin, len, detached) - n = normalize(expr) - fallthrough = true if type == :x and o = di_at(origin) and not o.opcode.props[:stopexec] and n == o.block.list.last.next_addr # delay_slot - add_xref(n, Xref.new(type, origin, len)) if origin != :default and origin != Expression::Unknown and not fallthrough - unk = true if n == Expression::Unknown + # creates xrefs, updates addrs_todo, updates instr args + def backtrace_found_result(expr, di, type, origin, len, detached) + n = normalize(expr) + fallthrough = true if type == :x and o = di_at(origin) and not o.opcode.props[:stopexec] and n == o.block.list.last.next_addr # delay_slot + add_xref(n, Xref.new(type, origin, len)) if origin != :default and origin != Expression::Unknown and not fallthrough + unk = true if n == Expression::Unknown - add_xref(n, Xref.new(:addr, di.address)) if di and di.address != origin and not unk - base = { nil => 'loc', 1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword' }[len] || 'xref' - base = 'sub' if @function[n] - n = Expression[auto_label_at(n, base, 'xref') || n] if not fallthrough - n = Expression[n] + add_xref(n, Xref.new(:addr, di.address)) if di and di.address != origin and not unk + base = { nil => 'loc', 1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword' }[len] || 'xref' + base = 'sub' if @function[n] + n = Expression[auto_label_at(n, base, 'xref') || n] if not fallthrough + n = Expression[n] - # update instr args - # TODO trace expression evolution to allow handling of - # mov eax, 28 ; add eax, 4 ; jmp eax - # => mov eax, (loc_xx-4) - if di and not unk and expr != n # and di.address == origin - @cpu.replace_instr_arg_immediate(di.instruction, expr, n) - end - if @decoded[origin] and not unk - @cpu.backtrace_found_result(self, @decoded[origin], expr, type, len) - end + # update instr args + # TODO trace expression evolution to allow handling of + # mov eax, 28 ; add eax, 4 ; jmp eax + # => mov eax, (loc_xx-4) + if di and not unk and expr != n # and di.address == origin + @cpu.replace_instr_arg_immediate(di.instruction, expr, n) + end + if @decoded[origin] and not unk + @cpu.backtrace_found_result(self, @decoded[origin], expr, type, len) + end - # add comment - if type and @decoded[origin] # and not @decoded[origin].instruction.args.include? n - @decoded[origin].add_comment "#{type}#{len}:#{n}" if not fallthrough - end + # add comment + if type and @decoded[origin] # and not @decoded[origin].instruction.args.include? n + @decoded[origin].add_comment "#{type}#{len}:#{n}" if not fallthrough + end - # check if target is a string - if di and type == :r and (len == 1 or len == 2) and s = get_section_at(n) - l = s[0].inv_export[s[0].ptr] - case len - when 1; str = s[0].read(32).unpack('C*') - when 2; str = s[0].read(64).unpack('v*') - end - str = str.inject('') { |str_, c| - case c - when 0x20..0x7e, ?\n, ?\r, ?\t; str_ << c - else break str_ - end - } - if str.length >= 4 - di.add_comment "#{'L' if len == 2}#{str.inspect}" - str = 'a_' + str.downcase.delete('^a-z0-9')[0, 12] - if str.length >= 8 and l[0, 5] == 'byte_' - rename_label(l, @program.new_label(str)) - end - end - end + # check if target is a string + if di and type == :r and (len == 1 or len == 2) and s = get_section_at(n) + l = s[0].inv_export[s[0].ptr] + case len + when 1; str = s[0].read(32).unpack('C*') + when 2; str = s[0].read(64).unpack('v*') + end + str = str.inject('') { |str_, c| + case c + when 0x20..0x7e, ?\n, ?\r, ?\t; str_ << c + else break str_ + end + } + if str.length >= 4 + di.add_comment "#{'L' if len == 2}#{str.inspect}" + str = 'a_' + str.downcase.delete('^a-z0-9')[0, 12] + if str.length >= 8 and l[0, 5] == 'byte_' + rename_label(l, @program.new_label(str)) + end + end + end - # XXX all this should be done in backtrace() { } - if type == :x and origin - if detached - o = @decoded[origin] ? origin : di ? di.address : nil # lib function callback have origin == libfuncname, so we must find a block somewhere else - origin = nil - @decoded[o].block.add_to_indirect(normalize(n)) if @decoded[o] and not unk - else - @decoded[origin].block.add_to_normal(normalize(n)) if @decoded[origin] and not unk - end - @addrs_todo << [n, origin] - end - end + # XXX all this should be done in backtrace() { } + if type == :x and origin + if detached + o = @decoded[origin] ? origin : di ? di.address : nil # lib function callback have origin == libfuncname, so we must find a block somewhere else + origin = nil + @decoded[o].block.add_to_indirect(normalize(n)) if @decoded[o] and not unk + else + @decoded[origin].block.add_to_normal(normalize(n)) if @decoded[origin] and not unk + end + @addrs_todo << [n, origin] + end + end - def inspect - "" % object_id - end + def inspect + "" % object_id + end - def to_s - a = '' - dump { |l| a << l << "\n" } - a - end + def to_s + a = '' + dump { |l| a << l << "\n" } + a + end - # dumps the source, optionnally including data - # yields (defaults puts) each line - def dump(dump_data=true, &b) - b ||= lambda { |l| puts l } - @sections.sort_by { |addr, edata| addr.kind_of?(::Integer) ? addr : 0 }.each { |addr, edata| - addr = Expression[addr] if addr.kind_of? ::String - blockoffs = @decoded.values.grep(DecodedInstruction).map { |di| Expression[di.block.address, :-, addr].reduce if di.block_head? }.grep(::Integer).sort.reject { |o| o < 0 or o >= edata.length } - b[@program.dump_section_header(addr, edata)] - if not dump_data and edata.length > 16*1024 and blockoffs.empty? - b["// [#{edata.length} data bytes]"] - next - end - unk_off = 0 # last off displayed - # blocks.sort_by { |b| b.addr }.each { |b| - while unk_off < edata.length - if unk_off == blockoffs.first - blockoffs.shift - di = @decoded[addr+unk_off] - if unk_off != di.block.edata_ptr - b["\n// ------ overlap (#{unk_off-di.block.edata_ptr}) ------"] - elsif di.block.from_normal.kind_of? ::Array - b["\n"] - end - dump_block(di.block, &b) - unk_off += [di.block.bin_length, 1].max - unk_off = blockoffs.first if blockoffs.first and unk_off > blockoffs.first - else - next_off = blockoffs.first || edata.length - if dump_data or next_off - unk_off < 16 - unk_off = dump_data(addr + unk_off, edata, unk_off, &b) - else - b["// [#{next_off - unk_off} data bytes]"] - unk_off = next_off - end - end - end - } - end + # dumps the source, optionnally including data + # yields (defaults puts) each line + def dump(dump_data=true, &b) + b ||= lambda { |l| puts l } + @sections.sort_by { |addr, edata| addr.kind_of?(::Integer) ? addr : 0 }.each { |addr, edata| + addr = Expression[addr] if addr.kind_of? ::String + blockoffs = @decoded.values.grep(DecodedInstruction).map { |di| Expression[di.block.address, :-, addr].reduce if di.block_head? }.grep(::Integer).sort.reject { |o| o < 0 or o >= edata.length } + b[@program.dump_section_header(addr, edata)] + if not dump_data and edata.length > 16*1024 and blockoffs.empty? + b["// [#{edata.length} data bytes]"] + next + end + unk_off = 0 # last off displayed + # blocks.sort_by { |b| b.addr }.each { |b| + while unk_off < edata.length + if unk_off == blockoffs.first + blockoffs.shift + di = @decoded[addr+unk_off] + if unk_off != di.block.edata_ptr + b["\n// ------ overlap (#{unk_off-di.block.edata_ptr}) ------"] + elsif di.block.from_normal.kind_of? ::Array + b["\n"] + end + dump_block(di.block, &b) + unk_off += [di.block.bin_length, 1].max + unk_off = blockoffs.first if blockoffs.first and unk_off > blockoffs.first + else + next_off = blockoffs.first || edata.length + if dump_data or next_off - unk_off < 16 + unk_off = dump_data(addr + unk_off, edata, unk_off, &b) + else + b["// [#{next_off - unk_off} data bytes]"] + unk_off = next_off + end + end + end + } + end - # dumps a block of decoded instructions - def dump_block(block, &b) - b ||= lambda { |l| puts l } - block = @decoded[block].block if @decoded[block] - dump_block_header(block, &b) - block.list.each { |di| b[di.show] } - end + # dumps a block of decoded instructions + def dump_block(block, &b) + b ||= lambda { |l| puts l } + block = @decoded[block].block if @decoded[block] + dump_block_header(block, &b) + block.list.each { |di| b[di.show] } + end - # shows the xrefs/labels at block start - def dump_block_header(block, &b) - b ||= lambda { |l| puts l } - xr = [] - each_xref(block.address) { |x| - case x.type - when :x; xr << Expression[x.origin] - when :r, :w; xr << "#{x.type}#{x.len}:#{Expression[x.origin]}" - end - } - if not xr.empty? - b["\n// Xrefs: #{xr[0, 8].join(' ')}#{' ...' if xr.length > 8}"] - end - if block.edata.inv_export[block.edata_ptr] and label_alias[block.address] - b["\n"] if xr.empty? - label_alias[block.address].each { |name| b["#{name}:"] } - end - if c = @comment[block.address] - c = c.join("\n") if c.kind_of? ::Array - c.each_line { |l| b["// #{l}"] } - end - end + # shows the xrefs/labels at block start + def dump_block_header(block, &b) + b ||= lambda { |l| puts l } + xr = [] + each_xref(block.address) { |x| + case x.type + when :x; xr << Expression[x.origin] + when :r, :w; xr << "#{x.type}#{x.len}:#{Expression[x.origin]}" + end + } + if not xr.empty? + b["\n// Xrefs: #{xr[0, 8].join(' ')}#{' ...' if xr.length > 8}"] + end + if block.edata.inv_export[block.edata_ptr] and label_alias[block.address] + b["\n"] if xr.empty? + label_alias[block.address].each { |name| b["#{name}:"] } + end + if c = @comment[block.address] + c = c.join("\n") if c.kind_of? ::Array + c.each_line { |l| b["// #{l}"] } + end + end - # dumps data/labels, honours @xrefs.len if exists - # dumps one line only - # stops on end of edata/@decoded/@xref - # returns the next offset to display - # TODO array-style data access - def dump_data(addr, edata, off, &b) - b ||= lambda { |l| puts l } - if l = edata.inv_export[off] and label_alias[addr] - l_list = label_alias[addr].sort - l = l_list.pop || l - l_list.each { |ll| - b["#{ll}:"] - } - l = (l + ' ').ljust(16) - else l = '' - end - elemlen = 1 # size of each element we dump (db by default) - dumplen = -off % 16 # number of octets to dump - dumplen = 16 if dumplen == 0 - cmt = [] - each_xref(addr) { |x| - dumplen = elemlen = x.len if x.len == 2 or x.len == 4 - cmt << " #{x.type}#{x.len}:#{Expression[x.origin]}" - } - cmt = " ; @#{Expression[addr]}" + cmt.sort[0, 6].join - if r = edata.reloc[off] - dumplen = elemlen = r.type.to_s[1..-1].to_i/8 - end - dataspec = { 1 => 'db ', 2 => 'dw ', 4 => 'dd ', 8 => 'dq ' }[elemlen] - if not dataspec - dataspec = 'db ' - elemlen = 1 - end - l << dataspec + # dumps data/labels, honours @xrefs.len if exists + # dumps one line only + # stops on end of edata/@decoded/@xref + # returns the next offset to display + # TODO array-style data access + def dump_data(addr, edata, off, &b) + b ||= lambda { |l| puts l } + if l = edata.inv_export[off] and label_alias[addr] + l_list = label_alias[addr].sort + l = l_list.pop || l + l_list.each { |ll| + b["#{ll}:"] + } + l = (l + ' ').ljust(16) + else l = '' + end + elemlen = 1 # size of each element we dump (db by default) + dumplen = -off % 16 # number of octets to dump + dumplen = 16 if dumplen == 0 + cmt = [] + each_xref(addr) { |x| + dumplen = elemlen = x.len if x.len == 2 or x.len == 4 + cmt << " #{x.type}#{x.len}:#{Expression[x.origin]}" + } + cmt = " ; @#{Expression[addr]}" + cmt.sort[0, 6].join + if r = edata.reloc[off] + dumplen = elemlen = r.type.to_s[1..-1].to_i/8 + end + dataspec = { 1 => 'db ', 2 => 'dw ', 4 => 'dd ', 8 => 'dq ' }[elemlen] + if not dataspec + dataspec = 'db ' + elemlen = 1 + end + l << dataspec - # dup(?) - if off >= edata.data.length - dups = edata.virtsize - off - @prog_binding.each_value { |a| - tmp = Expression[a, :-, addr].reduce - dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups - } - @xrefs.each_key { |a| - tmp = Expression[a, :-, addr].reduce - dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups - } - dups /= elemlen - dups = 1 if dups < 1 - b[(l + "#{dups} dup(?)").ljust(48) << cmt] - return off + dups*elemlen - end + # dup(?) + if off >= edata.data.length + dups = edata.virtsize - off + @prog_binding.each_value { |a| + tmp = Expression[a, :-, addr].reduce + dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups + } + @xrefs.each_key { |a| + tmp = Expression[a, :-, addr].reduce + dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups + } + dups /= elemlen + dups = 1 if dups < 1 + b[(l + "#{dups} dup(?)").ljust(48) << cmt] + return off + dups*elemlen + end - vals = [] - edata.ptr = off - dups = dumplen/elemlen - elemsym = "u#{elemlen*8}".to_sym - while edata.ptr < edata.data.length - if vals.length > dups and vals.last != vals.first - # we have a dup(), unread the last element which is different - vals.pop - addr = Expression[addr, :-, elemlen].reduce - edata.ptr -= elemlen - break - end - break if vals.length == dups and vals.uniq.length > 1 - vals << edata.decode_imm(elemsym, @cpu.endianness) - addr += elemlen - if i = (1-elemlen..0).find { |i_| - t = addr + i_ - @xrefs[t] or @decoded[t] or edata.reloc[edata.ptr+i_] or edata.inv_export[edata.ptr+i_] - } - # i < 0 - edata.ptr += i - addr += i - break - end - break if edata.reloc[edata.ptr-elemlen] - end + vals = [] + edata.ptr = off + dups = dumplen/elemlen + elemsym = "u#{elemlen*8}".to_sym + while edata.ptr < edata.data.length + if vals.length > dups and vals.last != vals.first + # we have a dup(), unread the last element which is different + vals.pop + addr = Expression[addr, :-, elemlen].reduce + edata.ptr -= elemlen + break + end + break if vals.length == dups and vals.uniq.length > 1 + vals << edata.decode_imm(elemsym, @cpu.endianness) + addr += elemlen + if i = (1-elemlen..0).find { |i_| + t = addr + i_ + @xrefs[t] or @decoded[t] or edata.reloc[edata.ptr+i_] or edata.inv_export[edata.ptr+i_] + } + # i < 0 + edata.ptr += i + addr += i + break + end + break if edata.reloc[edata.ptr-elemlen] + end - # line of repeated value => dup() - if vals.length > 8 and vals.uniq.length == 1 - b[(l << "#{vals.length} dup(#{Expression[vals.first]})").ljust(48) << cmt] - return edata.ptr - end + # line of repeated value => dup() + if vals.length > 8 and vals.uniq.length == 1 + b[(l << "#{vals.length} dup(#{Expression[vals.first]})").ljust(48) << cmt] + return edata.ptr + end - # recognize strings - vals = vals.inject([]) { |vals_, value| - if (elemlen == 1 or elemlen == 2) - case value - when 0x20..0x7e, 0x0a, 0x0d - if vals_.last.kind_of? ::String; vals_.last << value ; vals_ - else vals_ << value.chr - end - else vals_ << value - end - else vals_ << value - end - } + # recognize strings + vals = vals.inject([]) { |vals_, value| + if (elemlen == 1 or elemlen == 2) + case value + when 0x20..0x7e, 0x0a, 0x0d + if vals_.last.kind_of? ::String; vals_.last << value ; vals_ + else vals_ << value.chr + end + else vals_ << value + end + else vals_ << value + end + } - vals.map! { |value| - if value.kind_of? ::String - if value.length > 2 # or value == vals.first or value == vals.last # if there is no xref, don't care - value.inspect - else - value.unpack('C*').map { |c| Expression[c] } - end - else - Expression[value] - end - } - vals.flatten! + vals.map! { |value| + if value.kind_of? ::String + if value.length > 2 # or value == vals.first or value == vals.last # if there is no xref, don't care + value.inspect + else + value.unpack('C*').map { |c| Expression[c] } + end + else + Expression[value] + end + } + vals.flatten! - b[(l << vals.join(', ')).ljust(48) << cmt] + b[(l << vals.join(', ')).ljust(48) << cmt] - edata.ptr - end + edata.ptr + end - def decompiler - parse_c '' if not c_parser - @decompiler ||= Decompiler.new(self) - end - def decompiler=(dc) - @decompiler = dc - end - def decompile(*addr) - decompiler.decompile(*addr) - end - def decompile_func(addr) - decompiler.decompile_func(addr) - end + def decompiler + parse_c '' if not c_parser + @decompiler ||= Decompiler.new(self) + end + def decompiler=(dc) + @decompiler = dc + end + def decompile(*addr) + decompiler.decompile(*addr) + end + def decompile_func(addr) + decompiler.decompile_func(addr) + end - # allows us to be AutoExe.loaded - def self.autoexe_load(f, &b) - d = load(f, &b) - d.program - end + # allows us to be AutoExe.loaded + def self.autoexe_load(f, &b) + d = load(f, &b) + d.program + end end end diff --git a/lib/metasm/metasm/disassemble_api.rb b/lib/metasm/metasm/disassemble_api.rb index 416988a806..70c1788f54 100644 --- a/lib/metasm/metasm/disassemble_api.rb +++ b/lib/metasm/metasm/disassemble_api.rb @@ -7,1837 +7,1837 @@ module Metasm class InstructionBlock - # adds an address to the from_normal/from_subfuncret list - def add_from(addr, type=:normal) - send "add_from_#{type}", addr - end - def add_from_normal(addr) - @from_normal ||= [] - @from_normal |= [addr] - end - def add_from_subfuncret(addr) - @from_subfuncret ||= [] - @from_subfuncret |= [addr] - end - def add_from_indirect(addr) - @from_indirect ||= [] - @from_indirect |= [addr] - end - # iterates over every from address, yields [address, type in [:normal, :subfuncret, :indirect]] - def each_from - each_from_normal { |a| yield a, :normal } - each_from_subfuncret { |a| yield a, :subfuncret } - each_from_indirect { |a| yield a, :indirect } - end - def each_from_normal(&b) - @from_normal.each(&b) if from_normal - end - def each_from_subfuncret(&b) - @from_subfuncret.each(&b) if from_subfuncret - end - def each_from_indirect(&b) - @from_indirect.each(&b) if from_indirect - end + # adds an address to the from_normal/from_subfuncret list + def add_from(addr, type=:normal) + send "add_from_#{type}", addr + end + def add_from_normal(addr) + @from_normal ||= [] + @from_normal |= [addr] + end + def add_from_subfuncret(addr) + @from_subfuncret ||= [] + @from_subfuncret |= [addr] + end + def add_from_indirect(addr) + @from_indirect ||= [] + @from_indirect |= [addr] + end + # iterates over every from address, yields [address, type in [:normal, :subfuncret, :indirect]] + def each_from + each_from_normal { |a| yield a, :normal } + each_from_subfuncret { |a| yield a, :subfuncret } + each_from_indirect { |a| yield a, :indirect } + end + def each_from_normal(&b) + @from_normal.each(&b) if from_normal + end + def each_from_subfuncret(&b) + @from_subfuncret.each(&b) if from_subfuncret + end + def each_from_indirect(&b) + @from_indirect.each(&b) if from_indirect + end - def add_to(addr, type=:normal) - send "add_to_#{type}", addr - end - def add_to_normal(addr) - @to_normal ||= [] - @to_normal |= [addr] - end - def add_to_subfuncret(addr) - @to_subfuncret ||= [] - @to_subfuncret |= [addr] - end - def add_to_indirect(addr) - @to_indirect ||= [] - @to_indirect |= [addr] - end - def each_to - each_to_normal { |a| yield a, :normal } - each_to_subfuncret { |a| yield a, :subfuncret } - each_to_indirect { |a| yield a, :indirect } - end - def each_to_normal(&b) - @to_normal.each(&b) if to_normal - end - def each_to_subfuncret(&b) - @to_subfuncret.each(&b) if to_subfuncret - end - def each_to_indirect(&b) - @to_indirect.each(&b) if to_indirect - end + def add_to(addr, type=:normal) + send "add_to_#{type}", addr + end + def add_to_normal(addr) + @to_normal ||= [] + @to_normal |= [addr] + end + def add_to_subfuncret(addr) + @to_subfuncret ||= [] + @to_subfuncret |= [addr] + end + def add_to_indirect(addr) + @to_indirect ||= [] + @to_indirect |= [addr] + end + def each_to + each_to_normal { |a| yield a, :normal } + each_to_subfuncret { |a| yield a, :subfuncret } + each_to_indirect { |a| yield a, :indirect } + end + def each_to_normal(&b) + @to_normal.each(&b) if to_normal + end + def each_to_subfuncret(&b) + @to_subfuncret.each(&b) if to_subfuncret + end + def each_to_indirect(&b) + @to_indirect.each(&b) if to_indirect + end - # yields all from that are from the same function - def each_from_samefunc(dasm, &b) - return if dasm.function[address] - @from_subfuncret.each(&b) if from_subfuncret - @from_normal.each(&b) if from_normal - end + # yields all from that are from the same function + def each_from_samefunc(dasm, &b) + return if dasm.function[address] + @from_subfuncret.each(&b) if from_subfuncret + @from_normal.each(&b) if from_normal + end - # yields all from that are not in the same subfunction as this block - def each_from_otherfunc(dasm, &b) - @from_normal.each(&b) if from_normal and dasm.function[address] - @from_subfuncret.each(&b) if from_subfuncret and dasm.function[address] - @from_indirect.each(&b) if from_indirect - end + # yields all from that are not in the same subfunction as this block + def each_from_otherfunc(dasm, &b) + @from_normal.each(&b) if from_normal and dasm.function[address] + @from_subfuncret.each(&b) if from_subfuncret and dasm.function[address] + @from_indirect.each(&b) if from_indirect + end - # yields all to that are in the same subfunction as this block - def each_to_samefunc(dasm) - each_to { |to, type| - next if type != :normal and type != :subfuncret - to = dasm.normalize(to) - yield to if not dasm.function[to] - } - end + # yields all to that are in the same subfunction as this block + def each_to_samefunc(dasm) + each_to { |to, type| + next if type != :normal and type != :subfuncret + to = dasm.normalize(to) + yield to if not dasm.function[to] + } + end - # yields all to that are not in the same subfunction as this block - def each_to_otherfunc(dasm) - each_to { |to, type| - to = dasm.normalize(to) - yield to if type == :indirect or dasm.function[to] or not dasm.decoded[to] - } - end + # yields all to that are not in the same subfunction as this block + def each_to_otherfunc(dasm) + each_to { |to, type| + to = dasm.normalize(to) + yield to if type == :indirect or dasm.function[to] or not dasm.decoded[to] + } + end - # returns the array used in each_from_samefunc - def from_samefunc(dasm) - ary = [] - each_from_samefunc(dasm) { |a| ary << a } - ary - end - def from_otherfunc(dasm) - ary = [] - each_from_otherfunc(dasm) { |a| ary << a } - ary - end - def to_samefunc(dasm) - ary = [] - each_to_samefunc(dasm) { |a| ary << a } - ary - end - def to_otherfunc(dasm) - ary = [] - each_to_otherfunc(dasm) { |a| ary << a } - ary - end + # returns the array used in each_from_samefunc + def from_samefunc(dasm) + ary = [] + each_from_samefunc(dasm) { |a| ary << a } + ary + end + def from_otherfunc(dasm) + ary = [] + each_from_otherfunc(dasm) { |a| ary << a } + ary + end + def to_samefunc(dasm) + ary = [] + each_to_samefunc(dasm) { |a| ary << a } + ary + end + def to_otherfunc(dasm) + ary = [] + each_to_otherfunc(dasm) { |a| ary << a } + ary + end end class DecodedInstruction - # checks if this instruction is the first of its IBlock - def block_head? - self == @block.list.first - end + # checks if this instruction is the first of its IBlock + def block_head? + self == @block.list.first + end end class CPU - # compat alias, for scripts using older version of metasm - def get_backtrace_binding(di) backtrace_binding(di) end + # compat alias, for scripts using older version of metasm + def get_backtrace_binding(di) backtrace_binding(di) end end class Disassembler - # access the default value for @@backtrace_maxblocks for newly created Disassemblers - def self.backtrace_maxblocks ; @@backtrace_maxblocks ; end - def self.backtrace_maxblocks=(b) ; @@backtrace_maxblocks = b ; end + # access the default value for @@backtrace_maxblocks for newly created Disassemblers + def self.backtrace_maxblocks ; @@backtrace_maxblocks ; end + def self.backtrace_maxblocks=(b) ; @@backtrace_maxblocks = b ; end - # adds a commentary at the given address - # comments are found in the array @comment: {addr => [list of strings]} - def add_comment(addr, cmt) - @comment[addr] ||= [] - @comment[addr] |= [cmt] - end + # adds a commentary at the given address + # comments are found in the array @comment: {addr => [list of strings]} + def add_comment(addr, cmt) + @comment[addr] ||= [] + @comment[addr] |= [cmt] + end - # returns the 1st element of #get_section_at (ie the edata at a given address) or nil - def get_edata_at(*a) - if s = get_section_at(*a) - s[0] - end - end + # returns the 1st element of #get_section_at (ie the edata at a given address) or nil + def get_edata_at(*a) + if s = get_section_at(*a) + s[0] + end + end - # returns the DecodedInstruction at addr if it exists - def di_at(addr) - di = @decoded[addr] || @decoded[normalize(addr)] if addr - di if di.kind_of? DecodedInstruction - end + # returns the DecodedInstruction at addr if it exists + def di_at(addr) + di = @decoded[addr] || @decoded[normalize(addr)] if addr + di if di.kind_of? DecodedInstruction + end - # returns the InstructionBlock containing the address at addr - def block_at(addr) - di = di_at(addr) - di.block if di - end + # returns the InstructionBlock containing the address at addr + def block_at(addr) + di = di_at(addr) + di.block if di + end - # returns the DecodedFunction at addr if it exists - def function_at(addr) - f = @function[addr] || @function[normalize(addr)] if addr - f if f.kind_of? DecodedFunction - end + # returns the DecodedFunction at addr if it exists + def function_at(addr) + f = @function[addr] || @function[normalize(addr)] if addr + f if f.kind_of? DecodedFunction + end - # returns the DecodedInstruction covering addr - # returns one at starting nearest addr if multiple are available (overlapping instrs) - def di_including(addr) - return if not addr - addr = normalize(addr) - if off = (0...16).find { |o| @decoded[addr-o].kind_of? DecodedInstruction and @decoded[addr-o].bin_length > o } - @decoded[addr-off] - end - end + # returns the DecodedInstruction covering addr + # returns one at starting nearest addr if multiple are available (overlapping instrs) + def di_including(addr) + return if not addr + addr = normalize(addr) + if off = (0...16).find { |o| @decoded[addr-o].kind_of? DecodedInstruction and @decoded[addr-o].bin_length > o } + @decoded[addr-off] + end + end - # returns the InstructionBlock containing the byte at addr - # returns the one of di_including() on multiple matches (overlapping instrs) - def block_including(addr) - di = di_including(addr) - di.block if di - end + # returns the InstructionBlock containing the byte at addr + # returns the one of di_including() on multiple matches (overlapping instrs) + def block_including(addr) + di = di_including(addr) + di.block if di + end - # returns the DecodedFunction including this byte - # return the one of find_function_start() if multiple are possible (block shared by multiple funcs) - def function_including(addr) - return if not di = di_including(addr) - function_at(find_function_start(di.address)) - end + # returns the DecodedFunction including this byte + # return the one of find_function_start() if multiple are possible (block shared by multiple funcs) + def function_including(addr) + return if not di = di_including(addr) + function_at(find_function_start(di.address)) + end - # yields every InstructionBlock - # returns the list of IBlocks - def each_instructionblock(&b) - ret = [] - @decoded.each { |addr, di| - next if not di.kind_of? DecodedInstruction or not di.block_head? - ret << di.block - b.call(di.block) if b - } - ret - end - alias instructionblocks each_instructionblock + # yields every InstructionBlock + # returns the list of IBlocks + def each_instructionblock(&b) + ret = [] + @decoded.each { |addr, di| + next if not di.kind_of? DecodedInstruction or not di.block_head? + ret << di.block + b.call(di.block) if b + } + ret + end + alias instructionblocks each_instructionblock - # return a backtrace_binding reversed (akin to code emulation) (but not really) - def get_fwdemu_binding(di, pc=nil) - @cpu.get_fwdemu_binding(di, pc) - end + # return a backtrace_binding reversed (akin to code emulation) (but not really) + def get_fwdemu_binding(di, pc=nil) + @cpu.get_fwdemu_binding(di, pc) + end - # reads len raw bytes from the mmaped address space - def read_raw_data(addr, len) - if e = get_section_at(addr) - e[0].read(len) - end - end + # reads len raw bytes from the mmaped address space + def read_raw_data(addr, len) + if e = get_section_at(addr) + e[0].read(len) + end + end - # read an int of arbitrary type (:u8, :i32, ...) - def decode_int(addr, type) - type = "u#{type*8}".to_sym if type.kind_of? Integer - if e = get_section_at(addr) - e[0].decode_imm(type, @cpu.endianness) - end - end + # read an int of arbitrary type (:u8, :i32, ...) + def decode_int(addr, type) + type = "u#{type*8}".to_sym if type.kind_of? Integer + if e = get_section_at(addr) + e[0].decode_imm(type, @cpu.endianness) + end + end - # read a byte at address addr - def decode_byte(addr) - decode_int(addr, :u8) - end + # read a byte at address addr + def decode_byte(addr) + decode_int(addr, :u8) + end - # read a dword at address addr - # the dword is cpu-sized (eg 32 or 64bits) - def decode_dword(addr) - decode_int(addr, @cpu.size/8) - end + # read a dword at address addr + # the dword is cpu-sized (eg 32 or 64bits) + def decode_dword(addr) + decode_int(addr, @cpu.size/8) + end - # read a zero-terminated string from addr - # if no terminal 0 is found, return nil - def decode_strz(addr, maxsz=4096) - if e = get_section_at(addr) - str = e[0].read(maxsz).to_s - return if not len = str.index(?\0) - str[0, len] - end - end + # read a zero-terminated string from addr + # if no terminal 0 is found, return nil + def decode_strz(addr, maxsz=4096) + if e = get_section_at(addr) + str = e[0].read(maxsz).to_s + return if not len = str.index(?\0) + str[0, len] + end + end - # read a zero-terminated wide string from addr - # return nil if no terminal found - def decode_wstrz(addr, maxsz=4096) - if e = get_section_at(addr) - str = e[0].read(maxsz).to_s - return if not len = str.unpack('v*').index(0) - str[0, 2*len] - end - end + # read a zero-terminated wide string from addr + # return nil if no terminal found + def decode_wstrz(addr, maxsz=4096) + if e = get_section_at(addr) + str = e[0].read(maxsz).to_s + return if not len = str.unpack('v*').index(0) + str[0, 2*len] + end + end - # disassembles one instruction at address - # returns nil if no instruction can be decoded there - # does not update any internal state of the disassembler, nor reuse the @decoded cache - def disassemble_instruction(addr) - if e = get_section_at(addr) - @cpu.decode_instruction(e[0], normalize(addr)) - end - end + # disassembles one instruction at address + # returns nil if no instruction can be decoded there + # does not update any internal state of the disassembler, nor reuse the @decoded cache + def disassemble_instruction(addr) + if e = get_section_at(addr) + @cpu.decode_instruction(e[0], normalize(addr)) + end + end - # disassemble addr as if the code flow came from from_addr - def disassemble_from(addr, from_addr) - from_addr = from_addr.address if from_addr.kind_of? DecodedInstruction - from_addr = normalize(from_addr) - if b = block_at(from_addr) - b.add_to_normal(addr) - end - @addrs_todo << [addr, from_addr] - disassemble - end + # disassemble addr as if the code flow came from from_addr + def disassemble_from(addr, from_addr) + from_addr = from_addr.address if from_addr.kind_of? DecodedInstruction + from_addr = normalize(from_addr) + if b = block_at(from_addr) + b.add_to_normal(addr) + end + @addrs_todo << [addr, from_addr] + disassemble + end - # returns the label associated to an addr, or nil if none exist - def get_label_at(addr) - e = get_edata_at(addr, false) - e.inv_export[e.ptr] if e - end + # returns the label associated to an addr, or nil if none exist + def get_label_at(addr) + e = get_edata_at(addr, false) + e.inv_export[e.ptr] if e + end - # sets the label for the specified address - # returns nil if the address is not mapped - # memcheck is passed to get_section_at to validate that the address is mapped - # keep existing label if 'overwrite' is false - def set_label_at(addr, name, memcheck=true, overwrite=true) - addr = Expression[addr].reduce - e, b = get_section_at(addr, memcheck) - if not e - elsif not l = e.inv_export[e.ptr] or (!overwrite and l != name) - l = @program.new_label(name) - e.add_export l, e.ptr - @label_alias_cache = nil - @old_prog_binding[l] = @prog_binding[l] = b + e.ptr - elsif l != name - l = rename_label l, @program.new_label(name) - end - l - end + # sets the label for the specified address + # returns nil if the address is not mapped + # memcheck is passed to get_section_at to validate that the address is mapped + # keep existing label if 'overwrite' is false + def set_label_at(addr, name, memcheck=true, overwrite=true) + addr = Expression[addr].reduce + e, b = get_section_at(addr, memcheck) + if not e + elsif not l = e.inv_export[e.ptr] or (!overwrite and l != name) + l = @program.new_label(name) + e.add_export l, e.ptr + @label_alias_cache = nil + @old_prog_binding[l] = @prog_binding[l] = b + e.ptr + elsif l != name + l = rename_label l, @program.new_label(name) + end + l + end - # remove a label at address addr - def del_label_at(addr, name=get_label_at(addr)) - ed = get_edata_at(addr) - if ed and ed.inv_export[ed.ptr] - ed.del_export name, ed.ptr - @label_alias_cache = nil - end - each_xref(addr) { |xr| - next if not xr.origin or not o = @decoded[xr.origin] or not o.kind_of? Renderable - o.each_expr { |e| - next unless e.kind_of?(Expression) - e.lexpr = addr if e.lexpr == name - e.rexpr = addr if e.rexpr == name - } - } - @old_prog_binding.delete name - @prog_binding.delete name - end + # remove a label at address addr + def del_label_at(addr, name=get_label_at(addr)) + ed = get_edata_at(addr) + if ed and ed.inv_export[ed.ptr] + ed.del_export name, ed.ptr + @label_alias_cache = nil + end + each_xref(addr) { |xr| + next if not xr.origin or not o = @decoded[xr.origin] or not o.kind_of? Renderable + o.each_expr { |e| + next unless e.kind_of?(Expression) + e.lexpr = addr if e.lexpr == name + e.rexpr = addr if e.rexpr == name + } + } + @old_prog_binding.delete name + @prog_binding.delete name + end - # changes a label to another, updates referring instructions etc - # returns the new label - # the new label must be program-uniq (see @program.new_label) - def rename_label(old, new) - return new if old == new - raise "label #{new.inspect} exists" if @prog_binding[new] - each_xref(normalize(old)) { |x| - next if not di = @decoded[x.origin] - @cpu.replace_instr_arg_immediate(di.instruction, old, new) - di.comment.to_a.each { |c| c.gsub!(old, new) } - } - e = get_edata_at(old, false) - if e - e.add_export new, e.export.delete(old), true - end - raise "cant rename nonexisting label #{old}" if not @prog_binding[old] - @label_alias_cache = nil - @old_prog_binding[new] = @prog_binding[new] = @prog_binding.delete(old) - @addrs_todo.each { |at| - case at[0] - when old; at[0] = new - when Expression; at[0] = at[0].bind(old => new) - end - } + # changes a label to another, updates referring instructions etc + # returns the new label + # the new label must be program-uniq (see @program.new_label) + def rename_label(old, new) + return new if old == new + raise "label #{new.inspect} exists" if @prog_binding[new] + each_xref(normalize(old)) { |x| + next if not di = @decoded[x.origin] + @cpu.replace_instr_arg_immediate(di.instruction, old, new) + di.comment.to_a.each { |c| c.gsub!(old, new) } + } + e = get_edata_at(old, false) + if e + e.add_export new, e.export.delete(old), true + end + raise "cant rename nonexisting label #{old}" if not @prog_binding[old] + @label_alias_cache = nil + @old_prog_binding[new] = @prog_binding[new] = @prog_binding.delete(old) + @addrs_todo.each { |at| + case at[0] + when old; at[0] = new + when Expression; at[0] = at[0].bind(old => new) + end + } - if @inv_section_reloc[old] - @inv_section_reloc[old].each { |b, e_, o, r| - (0..16).each { |off| - if di = @decoded[Expression[b]+o-off] and di.bin_length > off - @cpu.replace_instr_arg_immediate(di.instruction, old, new) - end - } - r.target = r.target.bind(old => new) - } - @inv_section_reloc[new] = @inv_section_reloc.delete(old) - end + if @inv_section_reloc[old] + @inv_section_reloc[old].each { |b, e_, o, r| + (0..16).each { |off| + if di = @decoded[Expression[b]+o-off] and di.bin_length > off + @cpu.replace_instr_arg_immediate(di.instruction, old, new) + end + } + r.target = r.target.bind(old => new) + } + @inv_section_reloc[new] = @inv_section_reloc.delete(old) + end - if c_parser and @c_parser.toplevel.symbol[old] - @c_parser.toplevel.symbol[new] = @c_parser.toplevel.symbol.delete(old) - @c_parser.toplevel.symbol[new].name = new - end + if c_parser and @c_parser.toplevel.symbol[old] + @c_parser.toplevel.symbol[new] = @c_parser.toplevel.symbol.delete(old) + @c_parser.toplevel.symbol[new].name = new + end - new - end + new + end - # finds the start of a function from the address of an instruction - def find_function_start(addr) - addr = addr.address if addr.kind_of? DecodedInstruction - todo = [addr] - done = [] - while a = todo.pop - a = normalize(a) - di = @decoded[a] - next if done.include? a or not di.kind_of? DecodedInstruction - done << a - a = di.block.address - break a if @function[a] - l = [] - di.block.each_from_samefunc(self) { |f| l << f } - break a if l.empty? - todo.concat l - end - end + # finds the start of a function from the address of an instruction + def find_function_start(addr) + addr = addr.address if addr.kind_of? DecodedInstruction + todo = [addr] + done = [] + while a = todo.pop + a = normalize(a) + di = @decoded[a] + next if done.include? a or not di.kind_of? DecodedInstruction + done << a + a = di.block.address + break a if @function[a] + l = [] + di.block.each_from_samefunc(self) { |f| l << f } + break a if l.empty? + todo.concat l + end + end - # iterates over the blocks of a function, yields each func block address - # returns the graph of blocks (block address => [list of samefunc blocks]) - def each_function_block(addr, incl_subfuncs = false, find_func_start = true) - addr = @function.index(addr) if addr.kind_of? DecodedFunction - addr = addr.address if addr.kind_of? DecodedInstruction - addr = find_function_start(addr) if not @function[addr] and find_func_start - todo = [addr] - ret = {} - while a = todo.pop - next if not di = di_at(a) - a = di.block.address - next if ret[a] - ret[a] = [] - yield a if block_given? - di.block.each_to_samefunc(self) { |f| ret[a] << f ; todo << f } - di.block.each_to_otherfunc(self) { |f| ret[a] << f ; todo << f } if incl_subfuncs - end - ret - end - alias function_blocks each_function_block + # iterates over the blocks of a function, yields each func block address + # returns the graph of blocks (block address => [list of samefunc blocks]) + def each_function_block(addr, incl_subfuncs = false, find_func_start = true) + addr = @function.index(addr) if addr.kind_of? DecodedFunction + addr = addr.address if addr.kind_of? DecodedInstruction + addr = find_function_start(addr) if not @function[addr] and find_func_start + todo = [addr] + ret = {} + while a = todo.pop + next if not di = di_at(a) + a = di.block.address + next if ret[a] + ret[a] = [] + yield a if block_given? + di.block.each_to_samefunc(self) { |f| ret[a] << f ; todo << f } + di.block.each_to_otherfunc(self) { |f| ret[a] << f ; todo << f } if incl_subfuncs + end + ret + end + alias function_blocks each_function_block - # returns a graph of function calls - # for each func passed as arg (default: all), update the 'ret' hash - # associating func => [list of direct subfuncs called] - def function_graph(funcs = @function.keys + @entrypoints.to_a, ret={}) - funcs = funcs.map { |f| normalize(f) }.uniq.find_all { |f| @decoded[f] } - funcs.each { |f| - next if ret[f] - ret[f] = [] - each_function_block(f) { |b| - @decoded[b].block.each_to_otherfunc(self) { |sf| - ret[f] |= [sf] - } - } - } - ret - end + # returns a graph of function calls + # for each func passed as arg (default: all), update the 'ret' hash + # associating func => [list of direct subfuncs called] + def function_graph(funcs = @function.keys + @entrypoints.to_a, ret={}) + funcs = funcs.map { |f| normalize(f) }.uniq.find_all { |f| @decoded[f] } + funcs.each { |f| + next if ret[f] + ret[f] = [] + each_function_block(f) { |b| + @decoded[b].block.each_to_otherfunc(self) { |sf| + ret[f] |= [sf] + } + } + } + ret + end - # return the graph of function => subfunction list - # recurses from an entrypoint - def function_graph_from(addr) - addr = normalize(addr) - addr = find_function_start(addr) || addr - ret = {} - osz = ret.length-1 - while ret.length != osz - osz = ret.length - function_graph(ret.values.flatten + [addr], ret) - end - ret - end + # return the graph of function => subfunction list + # recurses from an entrypoint + def function_graph_from(addr) + addr = normalize(addr) + addr = find_function_start(addr) || addr + ret = {} + osz = ret.length-1 + while ret.length != osz + osz = ret.length + function_graph(ret.values.flatten + [addr], ret) + end + ret + end - # return the graph of function => subfunction list - # for which a (sub-sub)function includes addr - def function_graph_to(addr) - addr = normalize(addr) - addr = find_function_start(addr) || addr - full = function_graph - ret = {} - todo = [addr] - done = [] - while a = todo.pop - next if done.include? a - done << a - full.each { |f, sf| - next if not sf.include? a - ret[f] ||= [] - ret[f] |= [a] - todo << f - } - end - ret - end + # return the graph of function => subfunction list + # for which a (sub-sub)function includes addr + def function_graph_to(addr) + addr = normalize(addr) + addr = find_function_start(addr) || addr + full = function_graph + ret = {} + todo = [addr] + done = [] + while a = todo.pop + next if done.include? a + done << a + full.each { |f, sf| + next if not sf.include? a + ret[f] ||= [] + ret[f] |= [a] + todo << f + } + end + ret + end - # returns info on sections, from @program if supported - # returns an array of [name, addr, length, info] - def section_info - if @program.respond_to? :section_info - @program.section_info - else - list = [] - @sections.each { |k, v| - list << [get_label_at(k), normalize(k), v.length, nil] - } - list - end - end + # returns info on sections, from @program if supported + # returns an array of [name, addr, length, info] + def section_info + if @program.respond_to? :section_info + @program.section_info + else + list = [] + @sections.each { |k, v| + list << [get_label_at(k), normalize(k), v.length, nil] + } + list + end + end - # transform an address into a file offset - def addr_to_fileoff(addr) - addr = normalize(addr) - @program.addr_to_fileoff(addr) - end + # transform an address into a file offset + def addr_to_fileoff(addr) + addr = normalize(addr) + @program.addr_to_fileoff(addr) + end - # transform a file offset into an address - def fileoff_to_addr(foff) - @program.fileoff_to_addr(foff) - end + # transform a file offset into an address + def fileoff_to_addr(foff) + @program.fileoff_to_addr(foff) + end - # remove the decodedinstruction from..to, replace them by the new Instructions in 'by' - # this updates the block list structure, old di will still be visible in @decoded, except from original block (those are deleted) - # if from..to spans multiple blocks - # to.block is splitted after to - # all path from from are replaced by a single link to after 'to', be careful ! - # (eg a->b->... & a->c ; from in a, to in c => a->b is lost) - # all instructions are stuffed in the first block - # paths are only walked using from/to_normal - # 'by' may be empty - # returns the block containing the new instrs (nil if empty) - def replace_instrs(from, to, by, patch_by=false) - raise 'bad from' if not fdi = di_at(from) or not fdi.block.list.index(fdi) - raise 'bad to' if not tdi = di_at(to) or not tdi.block.list.index(tdi) + # remove the decodedinstruction from..to, replace them by the new Instructions in 'by' + # this updates the block list structure, old di will still be visible in @decoded, except from original block (those are deleted) + # if from..to spans multiple blocks + # to.block is splitted after to + # all path from from are replaced by a single link to after 'to', be careful ! + # (eg a->b->... & a->c ; from in a, to in c => a->b is lost) + # all instructions are stuffed in the first block + # paths are only walked using from/to_normal + # 'by' may be empty + # returns the block containing the new instrs (nil if empty) + def replace_instrs(from, to, by, patch_by=false) + raise 'bad from' if not fdi = di_at(from) or not fdi.block.list.index(fdi) + raise 'bad to' if not tdi = di_at(to) or not tdi.block.list.index(tdi) - # create DecodedInstruction from Instructions in 'by' if needed - split_block(fdi.block, fdi.address) - split_block(tdi.block, tdi.block.list[tdi.block.list.index(tdi)+1].address) if tdi != tdi.block.list.last - fb = fdi.block - tb = tdi.block + # create DecodedInstruction from Instructions in 'by' if needed + split_block(fdi.block, fdi.address) + split_block(tdi.block, tdi.block.list[tdi.block.list.index(tdi)+1].address) if tdi != tdi.block.list.last + fb = fdi.block + tb = tdi.block - # generate DecodedInstr from Instrs - # try to keep the bin_length of original block - wantlen = tdi.address + tdi.bin_length - fb.address - wantlen -= by.grep(DecodedInstruction).inject(0) { |len, di| len + di.bin_length } - ldi = by.last - ldi = DecodedInstruction.new(ldi) if ldi.kind_of? Instruction - nb_i = by.grep(Instruction).length - wantlen = nb_i if wantlen < 0 or (ldi and ldi.opcode.props[:setip]) - if patch_by - by.map! { |di| - if di.kind_of? Instruction - di = DecodedInstruction.new(di) - wantlen -= di.bin_length = wantlen / by.grep(Instruction).length - nb_i -= 1 - end - di - } - else - by = by.map { |di| - if di.kind_of? Instruction - di = DecodedInstruction.new(di) - wantlen -= (di.bin_length = wantlen / nb_i) - nb_i -= 1 - end - di - } - end + # generate DecodedInstr from Instrs + # try to keep the bin_length of original block + wantlen = tdi.address + tdi.bin_length - fb.address + wantlen -= by.grep(DecodedInstruction).inject(0) { |len, di| len + di.bin_length } + ldi = by.last + ldi = DecodedInstruction.new(ldi) if ldi.kind_of? Instruction + nb_i = by.grep(Instruction).length + wantlen = nb_i if wantlen < 0 or (ldi and ldi.opcode.props[:setip]) + if patch_by + by.map! { |di| + if di.kind_of? Instruction + di = DecodedInstruction.new(di) + wantlen -= di.bin_length = wantlen / by.grep(Instruction).length + nb_i -= 1 + end + di + } + else + by = by.map { |di| + if di.kind_of? Instruction + di = DecodedInstruction.new(di) + wantlen -= (di.bin_length = wantlen / nb_i) + nb_i -= 1 + end + di + } + end #puts " ** patch next_addr to #{Expression[tb.list.last.next_addr]}" if not by.empty? and by.last.opcode.props[:saveip] - by.last.next_addr = tb.list.last.next_addr if not by.empty? and by.last.opcode.props[:saveip] - fb.list.each { |di| @decoded.delete di.address } - fb.list.clear - tb.list.each { |di| @decoded.delete di.address } - tb.list.clear - by.each { |di| fb.add_di di } - by.each_with_index { |di, i| - if odi = di_at(di.address) - # collision, hopefully with another deobfuscation run ? - if by[i..-1].all? { |mydi| mydi.to_s == @decoded[mydi.address].to_s } - puts "replace_instrs: merge at #{di}" if $DEBUG - by[i..-1] = by[i..-1].map { |xdi| @decoded[xdi.address] } - by[i..-1].each { fb.list.pop } - split_block(odi.block, odi.address) - tb.to_normal = [di.address] - (odi.block.from_normal ||= []) << to - odi.block.from_normal.uniq! - break - else - #raise "replace_instrs: collision #{di} vs #{odi}" - puts "replace_instrs: collision #{di} vs #{odi}" if $VERBOSE - while @decoded[di.address].kind_of? DecodedInstruction # find free space.. raise ? - di.address += 1 # XXX use floats ? - di.bin_length -= 1 - end - end - end - @decoded[di.address] = di - } - @addrs_done.delete_if { |ad| normalize(ad[0]) == tb.address or ad[1] == tb.address } - @addrs_done.delete_if { |ad| normalize(ad[0]) == fb.address or ad[1] == fb.address } if by.empty? and tb.address != fb.address - - # update to_normal/from_normal - fb.to_normal = tb.to_normal - fb.to_normal.to_a.each { |newto| - # other paths may already point to newto, we must only update the relevant entry - if ndi = di_at(newto) and idx = ndi.block.from_normal.to_a.index(to) - if by.empty? - ndi.block.from_normal[idx,1] = fb.from_normal.to_a - else - ndi.block.from_normal[idx] = fb.list.last.address - end - end - } - - fb.to_subfuncret = tb.to_subfuncret - fb.to_subfuncret.to_a.each { |newto| - if ndi = di_at(newto) and idx = ndi.block.from_subfuncret.to_a.index(to) - if by.empty? - ndi.block.from_subfuncret[idx,1] = fb.from_subfuncret.to_a - else - ndi.block.from_subfuncret[idx] = fb.list.last.address - end - end - } - - if by.empty? - tb.to_subfuncret = nil if tb.to_subfuncret == [] - tolist = tb.to_subfuncret || tb.to_normal.to_a - if lfrom = get_label_at(fb.address) and tolist.length == 1 - lto = auto_label_at(tolist.first) - each_xref(fb.address, :x) { |x| - next if not di = @decoded[x.origin] - @cpu.replace_instr_arg_immediate(di.instruction, lfrom, lto) - di.comment.to_a.each { |c| c.gsub!(lfrom, lto) } - } - end - fb.from_normal.to_a.each { |newfrom| - if ndi = di_at(newfrom) and idx = ndi.block.to_normal.to_a.index(from) - ndi.block.to_normal[idx..idx] = tolist - end - } - fb.from_subfuncret.to_a.each { |newfrom| - if ndi = di_at(newfrom) and idx = ndi.block.to_subfuncret.to_a.index(from) - ndi.block.to_subfuncret[idx..idx] = tolist - end - } - else - # merge with adjacent blocks - merge_blocks(fb, fb.to_normal.first) if fb.to_normal.to_a.length == 1 and di_at(fb.to_normal.first) - merge_blocks(fb.from_normal.first, fb) if fb.from_normal.to_a.length == 1 and di_at(fb.from_normal.first) - end - - fb if not by.empty? - end - - # undefine a sequence of decodedinstructions from an address - # stops at first non-linear branch - # removes @decoded, @comments, @xrefs, @addrs_done - # does not update @prog_binding (does not undefine labels) - def undefine_from(addr) - return if not di_at(addr) - @comment.delete addr if @function.delete addr - split_block(addr) - addrs = [] - while di = di_at(addr) - di.block.list.each { |ddi| addrs << ddi.address } - break if di.block.to_subfuncret.to_a != [] or di.block.to_normal.to_a.length != 1 - addr = di.block.to_normal.first - break if ndi = di_at(addr) and ndi.block.from_normal.to_a.length != 1 - end - addrs.each { |a| @decoded.delete a } - @xrefs.delete_if { |a, x| - if not x.kind_of? Array - true if x and addrs.include? x.origin - else - x.delete_if { |xx| addrs.include? xx.origin } - true if x.empty? - end - } - @addrs_done.delete_if { |ad| !(addrs & [normalize(ad[0]), normalize(ad[1])]).empty? } - end - - # merge two instruction blocks if they form a simple chain and are adjacent - # returns true if merged - def merge_blocks(b1, b2, allow_nonadjacent = false) - if b1 and not b1.kind_of? InstructionBlock - return if not b1 = block_at(b1) - end - if b2 and not b2.kind_of? InstructionBlock - return if not b2 = block_at(b2) - end - if b1 and b2 and (allow_nonadjacent or b1.list.last.next_addr == b2.address) and - b1.to_normal.to_a == [b2.address] and b2.from_normal.to_a.length == 1 and # that handles delay_slot - b1.to_subfuncret.to_a == [] and b2.from_subfuncret.to_a == [] and - b1.to_indirect.to_a == [] and b2.from_indirect.to_a == [] - b2.list.each { |di| b1.add_di di } - b1.to_normal = b2.to_normal - b2.list.clear - @addrs_done.delete_if { |ad| normalize(ad[0]) == b2.address } - true - end - end - - # computes the binding of a code sequence - # just a forwarder to CPU#code_binding - def code_binding(*a) - @cpu.code_binding(self, *a) - end - - # returns an array of instructions/label that, once parsed and assembled, should - # give something equivalent to the code accessible from the (list of) entrypoints given - # from the @decoded dasm graph - # assume all jump targets have a matching label in @prog_binding - # may add inconditionnal jumps in the listing to preserve the code flow - def flatten_graph(entry, include_subfunc=true) - ret = [] - entry = [entry] if not entry.kind_of? Array - todo = entry.map { |a| normalize(a) } - done = [] - inv_binding = @prog_binding.invert - while addr = todo.pop - next if done.include? addr or not di_at(addr) - done << addr - b = @decoded[addr].block - - ret << Label.new(inv_binding[addr]) if inv_binding[addr] - ret.concat b.list.map { |di| di.instruction } - - b.each_to_otherfunc(self) { |to| - to = normalize to - todo.unshift to if include_subfunc - } - b.each_to_samefunc(self) { |to| - to = normalize to - todo << to - } - - if not di = b.list[-1-@cpu.delay_slot] or not di.opcode.props[:stopexec] or di.opcode.props[:saveip] - to = b.list.last.next_addr - if todo.include? to - if done.include? to or not di_at(to) - if not to_l = inv_binding[to] - to_l = auto_label_at(to, 'loc') - if done.include? to and idx = ret.index(@decoded[to].block.list.first.instruction) - ret.insert(idx, Label.new(to_l)) - end - end - ret << @cpu.instr_uncond_jump_to(to_l) - else - todo << to # ensure it's next in the listing - end - end - end - end - - ret - end - - # returns a demangled C++ name - def demangle_cppname(name) - case name[0] - when ?? # MSVC - name = name[1..-1] - demangle_msvc(name[1..-1]) if name[0] == ?? - when ?_ - name = name.sub(/_GLOBAL__[ID]_/, '') - demangle_gcc(name[2..-1][/\S*/]) if name[0, 2] == '_Z' - end - end - - # from wgcc-2.2.2/undecorate.cpp - # TODO - def demangle_msvc(name) - op = name[0, 1] - op = name[0, 2] if op == '_' - if op = { - '2' => "new", '3' => "delete", '4' => "=", '5' => ">>", '6' => "<<", '7' => "!", '8' => "==", '9' => "!=", - 'A' => "[]", 'C' => "->", 'D' => "*", 'E' => "++", 'F' => "--", 'G' => "-", 'H' => "+", 'I' => "&", - 'J' => "->*", 'K' => "/", 'L' => "%", 'M' => "<", 'N' => "<=", 'O' => ">", 'P' => ">=", 'Q' => ",", - 'R' => "()", 'S' => "~", 'T' => "^", 'U' => "|", 'V' => "&&", 'W' => "||", 'X' => "*=", 'Y' => "+=", - 'Z' => "-=", '_0' => "/=", '_1' => "%=", '_2' => ">>=", '_3' => "<<=", '_4' => "&=", '_5' => "|=", '_6' => "^=", - '_7' => "`vftable'", '_8' => "`vbtable'", '_9' => "`vcall'", '_A' => "`typeof'", '_B' => "`local static guard'", - '_C' => "`string'", '_D' => "`vbase destructor'", '_E' => "`vector deleting destructor'", '_F' => "`default constructor closure'", - '_G' => "`scalar deleting destructor'", '_H' => "`vector constructor iterator'", '_I' => "`vector destructor iterator'", - '_J' => "`vector vbase constructor iterator'", '_K' => "`virtual displacement map'", '_L' => "`eh vector constructor iterator'", - '_M' => "`eh vector destructor iterator'", '_N' => "`eh vector vbase constructor iterator'", '_O' => "`copy constructor closure'", - '_S' => "`local vftable'", '_T' => "`local vftable constructor closure'", '_U' => "new[]", '_V' => "delete[]", - '_X' => "`placement delete closure'", '_Y' => "`placement delete[] closure'"}[op] - op[0] == ?` ? op[1..-2] : "op_#{op}" - end - end - - # from http://www.codesourcery.com/public/cxx-abi/abi.html - def demangle_gcc(name) - subs = [] - ret = '' - decode_tok = lambda { - name ||= '' - case name[0] - when nil - ret = nil - when ?N - name = name[1..-1] - decode_tok[] - until name[0] == ?E - break if not ret - ret << '::' - decode_tok[] - end - name = name[1..-1] - when ?I - name = name[1..-1] - ret = ret[0..-3] if ret[-2, 2] == '::' - ret << '<' - decode_tok[] - until name[0] == ?E - break if not ret - ret << ', ' - decode_tok[] - end - ret << ' ' if ret and ret[-1] == ?> - ret << '>' if ret - name = name[1..-1] - when ?T - case name[1] - when ?T; ret << 'vtti(' - when ?V; ret << 'vtable(' - when ?I; ret << 'typeinfo(' - when ?S; ret << 'typename(' - else ret = nil - end - name = name[2..-1].to_s - decode_tok[] if ret - ret << ')' if ret - name = name[1..-1] if name[0] == ?E - when ?C - name = name[2..-1] - base = ret[/([^:]*)(<.*|::)?$/, 1] - ret << base - when ?D - name = name[2..-1] - base = ret[/([^:]*)(<.*|::)?$/, 1] - ret << '~' << base - when ?0..?9 - nr = name[/^[0-9]+/] - name = name[nr.length..-1].to_s - ret << name[0, nr.to_i] - name = name[nr.to_i..-1] - subs << ret[/[\w:]*$/] - when ?S - name = name[1..-1] - case name[0] - when ?_, ?0..?9, ?A..?Z - case name[0] - when ?_; idx = 0 ; name = name[1..-1] - when ?0..?9; idx = name[0, 1].unpack('C')[0] - 0x30 + 1 ; name = name[2..-1] - when ?A..?Z; idx = name[0, 1].unpack('C')[0] - 0x41 + 11 ; name = name[2..-1] - end - if not subs[idx] - ret = nil - else - ret << subs[idx] - end - when ?t - ret << 'std::' - name = name[1..-1] - decode_tok[] - else - std = { ?a => 'std::allocator', - ?b => 'std::basic_string', - ?s => 'std::string', # 'std::basic_string < char, std::char_traits, std::allocator >', - ?i => 'std::istream', # 'std::basic_istream >', - ?o => 'std::ostream', # 'std::basic_ostream >', - ?d => 'std::iostream', # 'std::basic_iostream >' - }[name[0]] - if not std - ret = nil - else - ret << std - end - name = name[1..-1] - end - when ?P, ?R, ?r, ?V, ?K - attr = { ?P => '*', ?R => '&', ?r => ' restrict', ?V => ' volatile', ?K => ' const' }[name[0]] - name = name[1..-1] - rl = ret.length - decode_tok[] - if ret - ret << attr - subs << ret[rl..-1] - end - else - if ret =~ /[(<]/ and ty = { - ?v => 'void', ?w => 'wchar_t', ?b => 'bool', ?c => 'char', ?a => 'signed char', - ?h => 'unsigned char', ?s => 'short', ?t => 'unsigned short', ?i => 'int', - ?j => 'unsigned int', ?l => 'long', ?m => 'unsigned long', ?x => '__int64', - ?y => 'unsigned __int64', ?n => '__int128', ?o => 'unsigned __int128', ?f => 'float', - ?d => 'double', ?e => 'long double', ?g => '__float128', ?z => '...' - }[name[0]] - name = name[1..-1] - ret << ty - else - fu = name[0, 2] - name = name[2..-1] - if op = { - 'nw' => ' new', 'na' => ' new[]', 'dl' => ' delete', 'da' => ' delete[]', - 'ps' => '+', 'ng' => '-', 'ad' => '&', 'de' => '*', 'co' => '~', 'pl' => '+', - 'mi' => '-', 'ml' => '*', 'dv' => '/', 'rm' => '%', 'an' => '&', 'or' => '|', - 'eo' => '^', 'aS' => '=', 'pL' => '+=', 'mI' => '-=', 'mL' => '*=', 'dV' => '/=', - 'rM' => '%=', 'aN' => '&=', 'oR' => '|=', 'eO' => '^=', 'ls' => '<<', 'rs' => '>>', - 'lS' => '<<=', 'rS' => '>>=', 'eq' => '==', 'ne' => '!=', 'lt' => '<', 'gt' => '>', - 'le' => '<=', 'ge' => '>=', 'nt' => '!', 'aa' => '&&', 'oo' => '||', 'pp' => '++', - 'mm' => '--', 'cm' => ',', 'pm' => '->*', 'pt' => '->', 'cl' => '()', 'ix' => '[]', - 'qu' => '?', 'st' => ' sizeof', 'sz' => ' sizeof', 'at' => ' alignof', 'az' => ' alignof' - }[fu] - ret << "operator#{op}" - elsif fu == 'cv' - ret << "cast<" - decode_tok[] - ret << ">" if ret - else - ret = nil - end - end - end - name ||= '' - } - - decode_tok[] - subs.pop - if ret and name != '' - ret << '(' - decode_tok[] - while ret and name != '' - ret << ', ' - decode_tok[] - end - ret << ')' if ret - end - ret - end - - # scans all the sections raw for a given regexp - # return/yields all the addresses matching - # if yield returns nil/false, do not include the addr in the final result - # sections are scanned MB by MB, so this should work (slowly) on 4GB sections (eg debugger VM) - # with addr_start/length, symbol-based section are skipped - def pattern_scan(pat, addr_start=nil, length=nil, chunksz=nil, margin=nil, &b) - chunksz ||= 4*1024*1024 # scan 4MB at a time - margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries - - pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String - - found = [] - @sections.each { |sec_addr, e| - if addr_start - length ||= 0x1000_0000 - begin - if sec_addr < addr_start - next if sec_addr+e.length <= addr_start - e = e[addr_start-sec_addr, e.length] - sec_addr = addr_start - end - if sec_addr+e.length > addr_start+length - next if sec_addr > addr_start+length - e = e[0, sec_addr+e.length-(addr_start+length)] - end - rescue - puts $!, $!.message, $!.backtrace if $DEBUG - # catch arithmetic error with symbol-based section - next - end - end - e.pattern_scan(pat, chunksz, margin) { |eo| - match_addr = sec_addr + eo - found << match_addr if not b or b.call(match_addr) - false - } - } - found - end - - # returns/yields [addr, string] found using pattern_scan /[\x20-\x7e]/ - def strings_scan(minlen=6, &b) - ret = [] - nexto = 0 - pattern_scan(/[\x20-\x7e]{#{minlen},}/m, nil, 1024) { |o| - if o - nexto > 0 - next unless e = get_edata_at(o) - str = e.data[e.ptr, 1024][/[\x20-\x7e]{#{minlen},}/m] - ret << [o, str] if not b or b.call(o, str) - nexto = o + str.length - end - } - ret - end - - # exports the addr => symbol map (see load_map) - def save_map - @prog_binding.map { |l, o| - type = di_at(o) ? 'c' : 'd' # XXX - o = o.to_s(16).rjust(8, '0') if o.kind_of? ::Integer - "#{o} #{type} #{l}" - } - end - - # loads a map file (addr => symbol) - # off is an optionnal offset to add to every address found (for eg rebased binaries) - # understands: - # standard map files (eg linux-kernel.map: , e.g. 'c01001ba t setup_idt') - # ida map files (: ) - # arg is either the map itself or the filename of the map (if it contains no newline) - def load_map(str, off=0) - str = File.read(str) rescue nil if not str.index("\n") - sks = @sections.keys.sort - seen = {} - str.each_line { |l| - case l.strip - when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style - addr = $1.to_i(16)+off - set_label_at(addr, $3, false, !seen[addr]) - seen[addr] = true - when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style - # we do not have section load order, let's just hope that the addresses are sorted (and sortable..) - # could check the 1st part of the file, with section sizes, but it is not very convenient - # the regexp is so that we skip the 1st part with section descriptions - # in the file, section 1 is the 1st section ; we have an additionnal section (exe header) which fixes the 0-index - addr = sks[$1.to_i(16)] + $2.to_i(16) + off - set_label_at(addr, $3, false, !seen[addr]) - seen[addr] = true - end - } - end - - # saves the dasm state in a file - def save_file(file) - tmpfile = file + '.tmp' - File.open(tmpfile, 'wb') { |fd| save_io(fd) } - File.rename tmpfile, file - end - - # saves the dasm state to an IO - def save_io(fd) - fd.puts 'Metasm.dasm' - - if @program.filename and not @program.kind_of?(Shellcode) - t = @program.filename.to_s - fd.puts "binarypath #{t.length}", t - else - t = "#{@cpu.class.name.sub(/.*::/, '')} #{@cpu.size} #{@cpu.endianness}" - fd.puts "cpu #{t.length}", t - # XXX will be reloaded as a Shellcode with this CPU, but it may be a custom EXE - # do not output binarypath, we'll be loaded as a Shellcode, 'section' will suffice - end - - @sections.each { |a, e| - # forget edata exports/relocs - # dump at most 16Mo per section - t = "#{Expression[a]} #{e.length}\n" + - [e.data[0, 2**24].to_str].pack('m*') - fd.puts "section #{t.length}", t - } - - t = save_map.join("\n") - fd.puts "map #{t.length}", t - - t = @decoded.map { |a, d| - next if not d.kind_of? DecodedInstruction - "#{Expression[a]},#{d.bin_length} #{d.instruction}#{" ; #{d.comment.join(' ')}" if d.comment}" - }.compact.sort.join("\n") - fd.puts "decoded #{t.length}", t - - t = @comment.map { |a, c| - c.map { |l| l.chomp }.join("\n").split("\n").map { |lc| "#{Expression[a]} #{lc.chomp}" } - }.join("\n") - fd.puts "comment #{t.length}", t - - bl = @decoded.values.map { |d| - d.block if d.kind_of? DecodedInstruction and d.block_head? - }.compact - t = bl.map { |b| - [Expression[b.address], - b.list.map { |d| Expression[d.address] }.join(','), - b.to_normal.to_a.map { |t_| Expression[t_] }.join(','), - b.to_subfuncret.to_a.map { |t_| Expression[t_] }.join(','), - b.to_indirect.to_a.map { |t_| Expression[t_] }.join(','), - b.from_normal.to_a.map { |t_| Expression[t_] }.join(','), - b.from_subfuncret.to_a.map { |t_| Expression[t_] }.join(','), - b.from_indirect.to_a.map { |t_| Expression[t_] }.join(','), - ].join(';') - }.sort.join("\n") - fd.puts "blocks #{t.length}", t - - t = @function.map { |a, f| - next if not @decoded[a] - [a, *f.return_address.to_a].map { |e| Expression[e] }.join(',') - }.compact.sort.join("\n") - # TODO binding ? - fd.puts "funcs #{t.length}", t - - t = @xrefs.map { |a, x| - a = ':default' if a == :default - a = ':unknown' if a == Expression::Unknown - # XXX origin - case x - when nil - when Xref - [Expression[a], x.type, x.len, (Expression[x.origin] if x.origin)].join(',') - when Array - x.map { |x_| [Expression[a], x_.type, x_.len, (Expression[x_.origin] if x_.origin)].join(',') } - end - }.compact.join("\n") - fd.puts "xrefs #{t.length}", t - - t = @c_parser.to_s - fd.puts "c #{t.length}", t - - #t = bl.map { |b| b.backtracked_for } - #fd.puts "trace #{t.length}" , t - end - - # loads a disassembler from a saved file - def self.load(str, &b) - d = new(nil, nil) - d.load(str, &b) - d - end - - # loads the dasm state from a savefile content - # will yield unknown segments / binarypath notfound - def load(str) - raise 'Not a metasm save file' if str[0, 12].chomp != 'Metasm.dasm' - off = 12 - pp = Preprocessor.new - app = AsmPreprocessor.new - while off < str.length - i = str.index("\n", off) || str.length - type, len = str[off..i].chomp.split - off = i+1 - data = str[off, len.to_i] - off += len.to_i - case type - when nil, '' - when 'binarypath' - data = yield(type, data) if not File.exist? data and block_given? - reinitialize AutoExe.decode_file(data) - @program.disassembler = self - @program.init_disassembler - when 'cpu' - cpuname, size, endianness = data.split - cpu = Metasm.const_get(cpuname) - raise 'invalid cpu' if not cpu < CPU - cpu = cpu.new - cpu.size = size.to_i - cpu.endianness = endianness.to_sym - reinitialize Shellcode.new(cpu) - @program.disassembler = self - @program.init_disassembler - @sections.delete(0) # rm empty section at 0, other real 'section' follow - when 'section' - info = data[0, data.index("\n") || data.length] - data = data[info.length, data.length] - pp.feed!(info) - addr = Expression.parse(pp).reduce - len = Expression.parse(pp).reduce - edata = EncodedData.new(data.unpack('m*').first, :virtsize => len) - add_section(addr, edata) - when 'map' - load_map data - when 'decoded' - data.each_line { |l| - begin - next if l !~ /^([^,]*),(\d*) ([^;]*)(?:; (.*))?/ - a, len, instr, cmt = $1, $2, $3, $4 - a = Expression.parse(pp.feed!(a)).reduce - instr = @cpu.parse_instruction(app.feed!(instr)) - di = DecodedInstruction.new(instr, a) - di.bin_length = len.to_i - di.add_comment cmt if cmt - @decoded[a] = di - rescue - puts "load: bad di #{l.inspect}" if $VERBOSE - end - } - when 'blocks' - data.each_line { |l| - bla = l.chomp.split(';').map { |sl| sl.split(',') } - begin - a = Expression.parse(pp.feed!(bla.shift[0])).reduce - b = InstructionBlock.new(a, get_section_at(a).to_a[0]) - bla.shift.each { |e| - a = Expression.parse(pp.feed!(e)).reduce - b.add_di(@decoded[a]) - } - bla.zip([:to_normal, :to_subfuncret, :to_indirect, :from_normal, :from_subfuncret, :from_indirect]).each { |l_, s| - b.send("#{s}=", l_.map { |e| Expression.parse(pp.feed!(e)).reduce }) if not l_.empty? - } - rescue - puts "load: bad block #{l.inspect}" if $VERBOSE - end - } - when 'funcs' - data.each_line { |l| - begin - a, *r = l.split(',').map { |e| Expression.parse(pp.feed!(e)).reduce } - @function[a] = DecodedFunction.new - @function[a].return_address = r if not r.empty? - @function[a].finalized = true - # TODO - rescue - puts "load: bad function #{l.inspect} #$!" if $VERBOSE - end - } - when 'comment' - data.each_line { |l| - begin - a, c = l.split(' ', 2) - a = Expression.parse(pp.feed!(a)).reduce - @comment[a] ||= [] - @comment[a] |= [c] - rescue - puts "load: bad comment #{l.inspect} #$!" if $VERBOSE - end - } - when 'c' - begin - # TODO parse_invalid_c, split per function, whatever - parse_c('') - @c_parser.allow_bad_c = true - parse_c(data, 'savefile#c') - rescue - puts "load: bad C: #$!", $!.backtrace if $VERBOSE - end - @c_parser.readtok until @c_parser.eos? if @c_parser - when 'xrefs' - data.each_line { |l| - begin - a, t, len, o = l.chomp.split(',') - case a - when ':default'; a = :default - when ':unknown'; a = Expression::Unknown - else a = Expression.parse(pp.feed!(a)).reduce - end - t = (t.empty? ? nil : t.to_sym) - len = (len != '' ? len.to_i : nil) - o = (o.to_s != '' ? Expression.parse(pp.feed!(o)).reduce : nil) # :default/:unknown ? - add_xref(a, Xref.new(t, o, len)) - rescue - puts "load: bad xref #{l.inspect} #$!" if $VERBOSE - end - } - #when 'trace' - else - if block_given? - yield(type, data) - else - puts "load: unsupported section #{type.inspect}" if $VERBOSE - end - end - end - end - - # change the base address of the loaded binary - # better done early (before disassembling anything) - # returns the delta - def rebase(newaddr) - rebase_delta(newaddr - @sections.keys.min) - end - - def rebase_delta(delta) - fix = lambda { |a| - case a - when Array - a.map! { |e| fix[e] } - when Hash - tmp = {} - a.each { |k, v| tmp[fix[k]] = v } - a.replace tmp - when Integer - a += delta - when BacktraceTrace - a.origin = fix[a.origin] - a.address = fix[a.address] - end - a - } - - fix[@sections] - fix[@decoded] - fix[@xrefs] - fix[@function] - fix[@addrs_todo] - fix[@addrs_done] - fix[@comment] - @prog_binding.each_key { |k| @prog_binding[k] = fix[@prog_binding[k]] } - @old_prog_binding.each_key { |k| @old_prog_binding[k] = fix[@old_prog_binding[k]] } - @label_alias_cache = nil - - @decoded.values.grep(DecodedInstruction).each { |di| - if di.block_head? - b = di.block - b.address += delta - fix[b.to_normal] - fix[b.to_subfuncret] - fix[b.to_indirect] - fix[b.from_normal] - fix[b.from_subfuncret] - fix[b.from_indirect] - fix[b.backtracked_for] - end - di.address = fix[di.address] - di.next_addr = fix[di.next_addr] - } - @function.each_value { |f| - f.return_address = fix[f.return_address] - fix[f.backtracked_for] - } - @xrefs.values.flatten.compact.each { |x| x.origin = fix[x.origin] } - delta - end - - # dataflow method - # walks a function, starting at addr - # follows the usage of registers, computing the evolution from the value they had at start_addr - # whenever an instruction references the register (or anything derived from it), - # yield [di, used_register, reg_value, trace_state] where reg_value is the Expression holding the value of - # the register wrt the initial value at start_addr, and trace_state the value of all registers (reg_value - # not yet applied) - # reg_value may be nil if used_register is not modified by the function (eg call [eax]) - # the yield return value is propagated, unless it is nil/false - # init_state is a hash { :reg => initial value } - def trace_function_register(start_addr, init_state) - function_walk(start_addr, init_state) { |args| - trace_state = args.last - case args.first - when :di - di = args[2] - update = {} - get_fwdemu_binding(di).each { |r, v| - if v.kind_of?(Expression) and v.externals.find { |e| trace_state[e] } - # XXX may mix old (from trace) and current (from v) registers - newv = v.bind(trace_state) - update[r] = yield(di, r, newv, trace_state) - elsif r.kind_of?(ExpressionType) and rr = r.externals.find { |e| trace_state[e] } - # reg dereferenced in a write (eg mov [esp], 42) - next if update.has_key?(rr) # already yielded - if yield(di, rr, trace_state[rr], trace_state) == false - update[rr] = false - end - elsif trace_state[r] - # started on mov reg, foo - next if di.address == start_addr - update[r] = false - end - } - - # directly walk the instruction argument list for registers not appearing in the binding - @cpu.instr_args_memoryptr(di).each { |ind| - b = @cpu.instr_args_memoryptr_getbase(ind) - if b and b = b.symbolic and not update.has_key?(b) - yield(di, b, nil, trace_state) - end - } - @cpu.instr_args_regs(di).each { |r| - r = r.symbolic - if not update.has_key?(r) - yield(di, r, nil, trace_state) - end - } - - update.each { |r, v| - trace_state = trace_state.dup - if v - # cannot follow non-registers, or we would have to emulate every single - # instruction (try following [esp+4] across a __stdcall..) - trace_state[r] = v if r.kind_of?(::Symbol) - else - trace_state.delete r - end - } - when :subfunc - faddr = args[1] - f = @function[faddr] - f = @function[f.backtrace_binding[:thunk]] if f and f.backtrace_binding[:thunk] - if f - binding = f.backtrace_binding - if binding.empty? - backtrace_update_function_binding(faddr) - binding = f.backtrace_binding - end - # XXX fwdemu_binding ? - binding.each { |r, v| - if v.externals.find { |e| trace_state[e] } - if r.kind_of?(::Symbol) - trace_state = trace_state.dup - trace_state[r] = Expression[v.bind(trace_state)].reduce - end - elsif trace_state[r] - trace_state = trace_state.dup - trace_state.delete r - end - } - end - when :merge - # when merging paths, keep the smallest common state subset - # XXX may have unexplored froms - conflicts = args[2] - trace_state = trace_state.dup - conflicts.each { |addr, st| - trace_state.delete_if { |k, v| st[k] != v } - } - end - trace_state = false if trace_state.empty? - trace_state - } - end - - # define a register as a pointer to a structure - # rename all [reg+off] as [reg+struct.member] in current function - # also trace assignments of pointer members - def trace_update_reg_structptr(addr, reg, structname, structoff=0) - sname = soff = ctx = nil - expr_to_sname = lambda { |expr| - if not expr.kind_of?(Expression) or expr.op != :+ - sname = nil - next - end - - sname = expr.lexpr || expr.rexpr - soff = (expr.lexpr ? expr.rexpr : 0) - - if soff.kind_of?(Expression) - # ignore index in ptr array - if soff.op == :* and soff.lexpr == @cpu.size/8 - soff = 0 - elsif soff.rexpr.kind_of?(Expression) and soff.rexpr.op == :* and soff.rexpr.lexpr == @cpu.size/8 - soff = soff.lexpr - elsif soff.lexpr.kind_of?(Expression) and soff.lexpr.op == :* and soff.lexpr.lexpr == @cpu.size/8 - soff = soff.rexpr - end - elsif soff.kind_of?(::Symbol) - # array with 1 byte elements / pre-scaled idx? - if not ctx[soff] - soff = 0 - end - end - } - - lastdi = nil - trace_function_register(addr, reg => Expression[structname, :+, structoff]) { |di, r, val, trace| - - next if r.to_s =~ /flag/ # XXX maybe too ia32-specific? - - ctx = trace - @cpu.instr_args_memoryptr(di).each { |ind| - # find the structure dereference in di - b = @cpu.instr_args_memoryptr_getbase(ind) - b = b.symbolic if b - next unless trace[b] - imm = @cpu.instr_args_memoryptr_getoffset(ind) || 0 - - # check expr has the form 'traced_struct_reg + off' - expr_to_sname[trace[b] + imm] # Expr#+ calls Expr#reduce - next unless sname.kind_of?(::String) and soff.kind_of?(::Integer) - next if not st = c_parser.toplevel.struct[sname] or not st.kind_of?(C::Union) - - # ignore lea esi, [esi+0] - next if soff == 0 and not di.backtrace_binding.find { |k, v| v-k != 0 } - - # TODO if trace[b] offset != 0, we had a lea reg, [struct+substruct_off], tweak str accordingly - - # resolve struct + off into struct.membername - str = st.name.dup - mb = st.expand_member_offset(c_parser, soff, str) - # patch di - imm = imm.rexpr if imm.kind_of?(Expression) and not imm.lexpr and imm.rexpr.kind_of?(ExpressionString) - imm = imm.expr if imm.kind_of?(ExpressionString) - @cpu.instr_args_memoryptr_setoffset(ind, ExpressionString.new(imm, str, :structoff)) - - # check if the type is an enum/bitfield, patch instruction immediates - trace_update_reg_structptr_arg_enum(di, ind, mb, str) if mb - } if lastdi != di.address - lastdi = di.address - - next Expression[structname, :+, structoff] if di.address == addr and r == reg - - # check if we need to trace 'r' further - val = val.reduce_rec if val.kind_of?(Expression) - val = Expression[val] if val.kind_of?(::String) - case val - when Expression - # only trace trivial structptr+off expressions - expr_to_sname[val] - if sname.kind_of?(::String) and soff.kind_of?(::Integer) - Expression[sname, :+, soff] - end - - when Indirection - # di is mov reg, [ptr+struct.offset] - # check if the target member is a pointer to a struct, if so, trace it - expr_to_sname[val.pointer.reduce] - - next unless sname.kind_of?(::String) and soff.kind_of?(::Integer) - - if st = c_parser.toplevel.struct[sname] and st.kind_of?(C::Union) - pt = st.expand_member_offset(c_parser, soff, '') - pt = pt.untypedef if pt - if pt.kind_of?(C::Pointer) - tt = pt.type.untypedef - stars = '' - while tt.kind_of?(C::Pointer) - stars << '*' - tt = tt.type.untypedef - end - if tt.kind_of?(C::Union) and tt.name - Expression[tt.name + stars] - end - end - - elsif soff == 0 and sname[-1] == ?* - # XXX pointer to pointer to struct - # full C type support would be better, but harder to fit in an Expr - Expression[sname[0...-1]] - end - # in other cases, stop trace - end - } - end - - # found a special member of a struct, check if we can apply - # bitfield/enum name to other constants in the di - def trace_update_reg_structptr_arg_enum(di, ind, mb, str) - if ename = mb.has_attribute_var('enum') and enum = c_parser.toplevel.struct[ename] and enum.kind_of?(C::Enum) - # handle enums: struct moo { int __attribute__((enum(bla))) fld; }; - doit = lambda { |_di| - if num = _di.instruction.args.grep(Expression).first and num_i = num.reduce and num_i.kind_of?(::Integer) - # handle enum values on tagged structs - if enum.members and name = enum.members.index(num_i) - num.lexpr = nil - num.op = :+ - num.rexpr = ExpressionString.new(Expression[num_i], name, :enum) - _di.add_comment "enum::#{ename}" if _di.address != di.address - end - end - } - - doit[di] - - # mov eax, [ptr+struct.enumfield] => trace eax - if reg = @cpu.instr_args_regs(di).find { |r| v = di.backtrace_binding[r.symbolic] and (v - ind.symbolic) == 0 } - reg = reg.symbolic - trace_function_register(di.address, reg => Expression[0]) { |_di, r, val, trace| - next if r != reg and val != Expression[reg] - doit[_di] - val - } - end - - elsif mb.untypedef.kind_of?(C::Struct) - # handle bitfields - - byte_off = 0 - if str =~ /\+(\d+)$/ - # test byte [bitfield+1], 0x1 => test dword [bitfield], 0x100 - # XXX little-endian only - byte_off = $1.to_i - str[/\+\d+$/] = '' - end - cmt = str.split('.')[-2, 2].join('.') if str.count('.') > 1 - - doit = lambda { |_di, add| - if num = _di.instruction.args.grep(Expression).first and num_i = num.reduce and num_i.kind_of?(::Integer) - # TODO handle ~num_i - num_left = num_i << add - s_or = [] - mb.untypedef.members.each { |mm| - if bo = mb.bitoffsetof(c_parser, mm) - boff, blen = bo - if mm.name && blen == 1 && ((num_left >> boff) & 1) > 0 - s_or << mm.name - num_left &= ~(1 << boff) - end - end - } - if s_or.first - if num_left != 0 - s_or << ('0x%X' % num_left) - end - s = s_or.join('|') - num.lexpr = nil - num.op = :+ - num.rexpr = ExpressionString.new(Expression[num_i], s, :bitfield) - _di.add_comment cmt if _di.address != di.address - end - end - } - - doit[di, byte_off*8] - - if reg = @cpu.instr_args_regs(di).find { |r| v = di.backtrace_binding[r.symbolic] and (v - ind.symbolic) == 0 } - reg = reg.symbolic - trace_function_register(di.address, reg => Expression[0]) { |_di, r, val, trace| - if r.kind_of?(Expression) and r.op == :& - if r.lexpr == reg - # test al, 42 - doit[_di, byte_off*8] - elsif r.lexpr.kind_of?(Expression) and r.lexpr.op == :>> and r.lexpr.lexpr == reg - # test ah, 42 - doit[_di, byte_off*8+r.lexpr.rexpr] - end - end - next if r != reg and val != Expression[reg] - doit[_di, byte_off*8] - _di.address == di.address && r == reg ? Expression[0] : val - } - end - end - end - - # change Expression display mode for current object o to display integers as char constants - def toggle_expr_char(o) - return if not o.kind_of?(Renderable) - tochars = lambda { |v| - if v.kind_of?(::Integer) - a = [] - vv = v.abs - a << (vv & 0xff) - vv >>= 8 - while vv > 0 - a << (vv & 0xff) - vv >>= 8 - end - if a.all? { |b| b < 0x7f } - s = a.pack('C*').inspect.gsub("'") { '\\\'' }[1...-1] - ExpressionString.new(v, (v > 0 ? "'#{s}'" : "-'#{s}'"), :char) - end - end - } - o.each_expr { |e| - if e.kind_of?(Expression) - if nr = tochars[e.rexpr] - e.rexpr = nr - elsif e.rexpr.kind_of?(ExpressionString) and e.rexpr.type == :char - e.rexpr = e.rexpr.expr - end - if nl = tochars[e.lexpr] - e.lexpr = nl - elsif e.lexpr.kind_of?(ExpressionString) and e.lexpr.type == :char - e.lexpr = e.lexpr.expr - end - end - } - end - - def toggle_expr_dec(o) - return if not o.kind_of?(Renderable) - o.each_expr { |e| - if e.kind_of?(Expression) - if e.rexpr.kind_of?(::Integer) - e.rexpr = ExpressionString.new(Expression[e.rexpr], e.rexpr.to_s, :decimal) - elsif e.rexpr.kind_of?(ExpressionString) and e.rexpr.type == :decimal - e.rexpr = e.rexpr.reduce - end - if e.lexpr.kind_of?(::Integer) - e.lexpr = ExpressionString.new(Expression[e.lexpr], e.lexpr.to_s, :decimal) - elsif e.lexpr.kind_of?(ExpressionString) and e.lexpr.type == :decimal - e.lexpr = e.lexpr.reduce - end - end - } - end - - # patch Expressions in current object to include label names when available - # XXX should we also create labels ? - def toggle_expr_offset(o) - return if not o.kind_of? Renderable - o.each_expr { |e| - next unless e.kind_of?(Expression) - if n = @prog_binding[e.lexpr] - e.lexpr = n - elsif e.lexpr.kind_of? ::Integer and n = get_label_at(e.lexpr) - add_xref(normalize(e.lexpr), Xref.new(:addr, o.address)) if o.respond_to? :address - e.lexpr = n - end - if n = @prog_binding[e.rexpr] - e.rexpr = n - elsif e.rexpr.kind_of? ::Integer and n = get_label_at(e.rexpr) - add_xref(normalize(e.rexpr), Xref.new(:addr, o.address)) if o.respond_to? :address - e.rexpr = n - end - } - end - - # toggle all ExpressionStrings - def toggle_expr_str(o) - return if not o.kind_of?(Renderable) - o.each_expr { |e| - next unless e.kind_of?(ExpressionString) - e.hide_str = !e.hide_str - } - end - - # call this function on a function entrypoint if the function is in fact a __noreturn - # will cut the to_subfuncret of callers - def fix_noreturn(o) - each_xref(o, :x) { |a| - a = normalize(a.origin) - next if not di = di_at(a) or not di.opcode.props[:saveip] - # XXX should check if caller also becomes __noreturn - di.block.each_to_subfuncret { |to| - next if not tdi = di_at(to) or not tdi.block.from_subfuncret - tdi.block.from_subfuncret.delete_if { |aa| normalize(aa) == di.address } - tdi.block.from_subfuncret = nil if tdi.block.from_subfuncret.empty? - } - di.block.to_subfuncret = nil - } - end - - # find the addresses of calls calling the address, handles thunks - def call_sites(funcaddr) - find_call_site = proc { |a| - until not di = di_at(a) - if di.opcode.props[:saveip] - cs = di.address - break - end - if di.block.from_subfuncret.to_a.first - while di.block.from_subfuncret.to_a.length == 1 - a = di.block.from_subfuncret[0] - break if not di_at(a) - a = @decoded[a].block.list.first.address - di = @decoded[a] - end - end - break if di.block.from_subfuncret.to_a.first - break if di.block.from_normal.to_a.length != 1 - a = di.block.from_normal.first - end - cs - } - ret = [] - each_xref(normalize(funcaddr), :x) { |a| - ret << find_call_site[a.origin] - } - ret.compact.uniq - end - - # loads a disassembler plugin script - # this is simply a ruby script instance_eval() in the disassembler - # the filename argument is autocompleted with '.rb' suffix, and also - # searched for in the Metasmdir/samples/dasm-plugins subdirectory if not found in cwd - def load_plugin(plugin_filename) - if not File.exist?(plugin_filename) - if File.exist?(plugin_filename+'.rb') - plugin_filename += '.rb' - elsif defined? Metasmdir - # try autocomplete - pf = File.join(Metasmdir, 'samples', 'dasm-plugins', plugin_filename) - if File.exist? pf - plugin_filename = pf - elsif File.exist? pf + '.rb' - plugin_filename = pf + '.rb' - end - end - end - - instance_eval File.read(plugin_filename) - end - - # same as load_plugin, but hides the @gui attribute while loading, preventing the plugin do popup stuff - # this is useful when you want to load a plugin from another plugin to enhance the plugin's functionnality - # XXX this also prevents setting up kbd_callbacks etc.. - def load_plugin_nogui(plugin_filename) - oldgui = gui - @gui = nil - load_plugin(plugin_filename) - ensure - @gui = oldgui - end - - # compose two code/instruction's backtrace_binding - # assumes bd1 is followed by bd2 in the code flow - # eg inc edi + push edi => - # { Ind[:esp, 4] => Expr[:edi + 1], :esp => Expr[:esp - 4], :edi => Expr[:edi + 1] } - # XXX if bd1 writes to memory with a pointer that is reused in bd2, this function has to - # revert the change made by bd2, which only works with simple ptr addition now - # XXX unhandled situations may be resolved using :unknown, or by returning incorrect values - def compose_bt_binding(bd1, bd2) - if bd1.kind_of? DecodedInstruction - bd1 = bd1.backtrace_binding ||= cpu.get_backtrace_binding(bd1) - end - if bd2.kind_of? DecodedInstruction - bd2 = bd2.backtrace_binding ||= cpu.get_backtrace_binding(bd2) - end - - reduce = lambda { |e| Expression[Expression[e].reduce] } - - bd = {} - - bd2.each { |k, v| - bd[k] = reduce[v.bind(bd1)] - } - - # for each pointer appearing in keys of bd1, we must infer from bd2 what final - # pointers should appear in bd - # eg 'mov [eax], 0 mov ebx, eax' => { [eax] <- 0, [ebx] <- 0, ebx <- eax } - bd1.each { |k, v| - if k.kind_of? Indirection - done = false - k.pointer.externals.each { |e| - # XXX this will break on nontrivial pointers or bd2 - bd2.each { |k2, v2| - # we dont want to invert computation of flag_zero/carry etc (booh) - next if k2.to_s =~ /flag/ - - # discard indirection etc, result would be too complex / not useful - next if not Expression[v2].expr_externals.include? e - - done = true - - # try to reverse the computation made upon 'e' - # only simple addition handled here - ptr = reduce[k.pointer.bind(e => Expression[[k2, :-, v2], :+, e])] - - # if bd2 does not rewrite e, duplicate the original pointer - if not bd2[e] - bd[k] ||= reduce[v] - - # here we should not see 'e' in ptr anymore - ptr = Expression::Unknown if ptr.externals.include? e - else - # cant check if add reversion was successful.. - end - - bd[Indirection[reduce[ptr], k.len]] ||= reduce[v] - } - } - bd[k] ||= reduce[v] if not done - else - bd[k] ||= reduce[v] - end - } - - bd - end - - def gui_hilight_word_regexp(word) - @cpu.gui_hilight_word_regexp(word) - end - - # return a C::AllocCStruct from c_parser - # TODO handle program.class::Header.to_c_struct - def decode_c_struct(structname, addr) - if c_parser and edata = get_edata_at(addr) - c_parser.decode_c_struct(structname, edata.data, edata.ptr) - end - end - - def decode_c_ary(structname, addr, len) - if c_parser and edata = get_edata_at(addr) - c_parser.decode_c_ary(structname, len, edata.data, edata.ptr) - end - end - - # find the function containing addr, and find & rename stack vars in it - def name_local_vars(addr) - if @cpu.respond_to?(:name_local_vars) and faddr = find_function_start(addr) - @function[faddr] ||= DecodedFunction.new # XXX - @cpu.name_local_vars(self, faddr) - end - end + by.last.next_addr = tb.list.last.next_addr if not by.empty? and by.last.opcode.props[:saveip] + fb.list.each { |di| @decoded.delete di.address } + fb.list.clear + tb.list.each { |di| @decoded.delete di.address } + tb.list.clear + by.each { |di| fb.add_di di } + by.each_with_index { |di, i| + if odi = di_at(di.address) + # collision, hopefully with another deobfuscation run ? + if by[i..-1].all? { |mydi| mydi.to_s == @decoded[mydi.address].to_s } + puts "replace_instrs: merge at #{di}" if $DEBUG + by[i..-1] = by[i..-1].map { |xdi| @decoded[xdi.address] } + by[i..-1].each { fb.list.pop } + split_block(odi.block, odi.address) + tb.to_normal = [di.address] + (odi.block.from_normal ||= []) << to + odi.block.from_normal.uniq! + break + else + #raise "replace_instrs: collision #{di} vs #{odi}" + puts "replace_instrs: collision #{di} vs #{odi}" if $VERBOSE + while @decoded[di.address].kind_of? DecodedInstruction # find free space.. raise ? + di.address += 1 # XXX use floats ? + di.bin_length -= 1 + end + end + end + @decoded[di.address] = di + } + @addrs_done.delete_if { |ad| normalize(ad[0]) == tb.address or ad[1] == tb.address } + @addrs_done.delete_if { |ad| normalize(ad[0]) == fb.address or ad[1] == fb.address } if by.empty? and tb.address != fb.address + + # update to_normal/from_normal + fb.to_normal = tb.to_normal + fb.to_normal.to_a.each { |newto| + # other paths may already point to newto, we must only update the relevant entry + if ndi = di_at(newto) and idx = ndi.block.from_normal.to_a.index(to) + if by.empty? + ndi.block.from_normal[idx,1] = fb.from_normal.to_a + else + ndi.block.from_normal[idx] = fb.list.last.address + end + end + } + + fb.to_subfuncret = tb.to_subfuncret + fb.to_subfuncret.to_a.each { |newto| + if ndi = di_at(newto) and idx = ndi.block.from_subfuncret.to_a.index(to) + if by.empty? + ndi.block.from_subfuncret[idx,1] = fb.from_subfuncret.to_a + else + ndi.block.from_subfuncret[idx] = fb.list.last.address + end + end + } + + if by.empty? + tb.to_subfuncret = nil if tb.to_subfuncret == [] + tolist = tb.to_subfuncret || tb.to_normal.to_a + if lfrom = get_label_at(fb.address) and tolist.length == 1 + lto = auto_label_at(tolist.first) + each_xref(fb.address, :x) { |x| + next if not di = @decoded[x.origin] + @cpu.replace_instr_arg_immediate(di.instruction, lfrom, lto) + di.comment.to_a.each { |c| c.gsub!(lfrom, lto) } + } + end + fb.from_normal.to_a.each { |newfrom| + if ndi = di_at(newfrom) and idx = ndi.block.to_normal.to_a.index(from) + ndi.block.to_normal[idx..idx] = tolist + end + } + fb.from_subfuncret.to_a.each { |newfrom| + if ndi = di_at(newfrom) and idx = ndi.block.to_subfuncret.to_a.index(from) + ndi.block.to_subfuncret[idx..idx] = tolist + end + } + else + # merge with adjacent blocks + merge_blocks(fb, fb.to_normal.first) if fb.to_normal.to_a.length == 1 and di_at(fb.to_normal.first) + merge_blocks(fb.from_normal.first, fb) if fb.from_normal.to_a.length == 1 and di_at(fb.from_normal.first) + end + + fb if not by.empty? + end + + # undefine a sequence of decodedinstructions from an address + # stops at first non-linear branch + # removes @decoded, @comments, @xrefs, @addrs_done + # does not update @prog_binding (does not undefine labels) + def undefine_from(addr) + return if not di_at(addr) + @comment.delete addr if @function.delete addr + split_block(addr) + addrs = [] + while di = di_at(addr) + di.block.list.each { |ddi| addrs << ddi.address } + break if di.block.to_subfuncret.to_a != [] or di.block.to_normal.to_a.length != 1 + addr = di.block.to_normal.first + break if ndi = di_at(addr) and ndi.block.from_normal.to_a.length != 1 + end + addrs.each { |a| @decoded.delete a } + @xrefs.delete_if { |a, x| + if not x.kind_of? Array + true if x and addrs.include? x.origin + else + x.delete_if { |xx| addrs.include? xx.origin } + true if x.empty? + end + } + @addrs_done.delete_if { |ad| !(addrs & [normalize(ad[0]), normalize(ad[1])]).empty? } + end + + # merge two instruction blocks if they form a simple chain and are adjacent + # returns true if merged + def merge_blocks(b1, b2, allow_nonadjacent = false) + if b1 and not b1.kind_of? InstructionBlock + return if not b1 = block_at(b1) + end + if b2 and not b2.kind_of? InstructionBlock + return if not b2 = block_at(b2) + end + if b1 and b2 and (allow_nonadjacent or b1.list.last.next_addr == b2.address) and + b1.to_normal.to_a == [b2.address] and b2.from_normal.to_a.length == 1 and # that handles delay_slot + b1.to_subfuncret.to_a == [] and b2.from_subfuncret.to_a == [] and + b1.to_indirect.to_a == [] and b2.from_indirect.to_a == [] + b2.list.each { |di| b1.add_di di } + b1.to_normal = b2.to_normal + b2.list.clear + @addrs_done.delete_if { |ad| normalize(ad[0]) == b2.address } + true + end + end + + # computes the binding of a code sequence + # just a forwarder to CPU#code_binding + def code_binding(*a) + @cpu.code_binding(self, *a) + end + + # returns an array of instructions/label that, once parsed and assembled, should + # give something equivalent to the code accessible from the (list of) entrypoints given + # from the @decoded dasm graph + # assume all jump targets have a matching label in @prog_binding + # may add inconditionnal jumps in the listing to preserve the code flow + def flatten_graph(entry, include_subfunc=true) + ret = [] + entry = [entry] if not entry.kind_of? Array + todo = entry.map { |a| normalize(a) } + done = [] + inv_binding = @prog_binding.invert + while addr = todo.pop + next if done.include? addr or not di_at(addr) + done << addr + b = @decoded[addr].block + + ret << Label.new(inv_binding[addr]) if inv_binding[addr] + ret.concat b.list.map { |di| di.instruction } + + b.each_to_otherfunc(self) { |to| + to = normalize to + todo.unshift to if include_subfunc + } + b.each_to_samefunc(self) { |to| + to = normalize to + todo << to + } + + if not di = b.list[-1-@cpu.delay_slot] or not di.opcode.props[:stopexec] or di.opcode.props[:saveip] + to = b.list.last.next_addr + if todo.include? to + if done.include? to or not di_at(to) + if not to_l = inv_binding[to] + to_l = auto_label_at(to, 'loc') + if done.include? to and idx = ret.index(@decoded[to].block.list.first.instruction) + ret.insert(idx, Label.new(to_l)) + end + end + ret << @cpu.instr_uncond_jump_to(to_l) + else + todo << to # ensure it's next in the listing + end + end + end + end + + ret + end + + # returns a demangled C++ name + def demangle_cppname(name) + case name[0] + when ?? # MSVC + name = name[1..-1] + demangle_msvc(name[1..-1]) if name[0] == ?? + when ?_ + name = name.sub(/_GLOBAL__[ID]_/, '') + demangle_gcc(name[2..-1][/\S*/]) if name[0, 2] == '_Z' + end + end + + # from wgcc-2.2.2/undecorate.cpp + # TODO + def demangle_msvc(name) + op = name[0, 1] + op = name[0, 2] if op == '_' + if op = { + '2' => "new", '3' => "delete", '4' => "=", '5' => ">>", '6' => "<<", '7' => "!", '8' => "==", '9' => "!=", + 'A' => "[]", 'C' => "->", 'D' => "*", 'E' => "++", 'F' => "--", 'G' => "-", 'H' => "+", 'I' => "&", + 'J' => "->*", 'K' => "/", 'L' => "%", 'M' => "<", 'N' => "<=", 'O' => ">", 'P' => ">=", 'Q' => ",", + 'R' => "()", 'S' => "~", 'T' => "^", 'U' => "|", 'V' => "&&", 'W' => "||", 'X' => "*=", 'Y' => "+=", + 'Z' => "-=", '_0' => "/=", '_1' => "%=", '_2' => ">>=", '_3' => "<<=", '_4' => "&=", '_5' => "|=", '_6' => "^=", + '_7' => "`vftable'", '_8' => "`vbtable'", '_9' => "`vcall'", '_A' => "`typeof'", '_B' => "`local static guard'", + '_C' => "`string'", '_D' => "`vbase destructor'", '_E' => "`vector deleting destructor'", '_F' => "`default constructor closure'", + '_G' => "`scalar deleting destructor'", '_H' => "`vector constructor iterator'", '_I' => "`vector destructor iterator'", + '_J' => "`vector vbase constructor iterator'", '_K' => "`virtual displacement map'", '_L' => "`eh vector constructor iterator'", + '_M' => "`eh vector destructor iterator'", '_N' => "`eh vector vbase constructor iterator'", '_O' => "`copy constructor closure'", + '_S' => "`local vftable'", '_T' => "`local vftable constructor closure'", '_U' => "new[]", '_V' => "delete[]", + '_X' => "`placement delete closure'", '_Y' => "`placement delete[] closure'"}[op] + op[0] == ?` ? op[1..-2] : "op_#{op}" + end + end + + # from http://www.codesourcery.com/public/cxx-abi/abi.html + def demangle_gcc(name) + subs = [] + ret = '' + decode_tok = lambda { + name ||= '' + case name[0] + when nil + ret = nil + when ?N + name = name[1..-1] + decode_tok[] + until name[0] == ?E + break if not ret + ret << '::' + decode_tok[] + end + name = name[1..-1] + when ?I + name = name[1..-1] + ret = ret[0..-3] if ret[-2, 2] == '::' + ret << '<' + decode_tok[] + until name[0] == ?E + break if not ret + ret << ', ' + decode_tok[] + end + ret << ' ' if ret and ret[-1] == ?> + ret << '>' if ret + name = name[1..-1] + when ?T + case name[1] + when ?T; ret << 'vtti(' + when ?V; ret << 'vtable(' + when ?I; ret << 'typeinfo(' + when ?S; ret << 'typename(' + else ret = nil + end + name = name[2..-1].to_s + decode_tok[] if ret + ret << ')' if ret + name = name[1..-1] if name[0] == ?E + when ?C + name = name[2..-1] + base = ret[/([^:]*)(<.*|::)?$/, 1] + ret << base + when ?D + name = name[2..-1] + base = ret[/([^:]*)(<.*|::)?$/, 1] + ret << '~' << base + when ?0..?9 + nr = name[/^[0-9]+/] + name = name[nr.length..-1].to_s + ret << name[0, nr.to_i] + name = name[nr.to_i..-1] + subs << ret[/[\w:]*$/] + when ?S + name = name[1..-1] + case name[0] + when ?_, ?0..?9, ?A..?Z + case name[0] + when ?_; idx = 0 ; name = name[1..-1] + when ?0..?9; idx = name[0, 1].unpack('C')[0] - 0x30 + 1 ; name = name[2..-1] + when ?A..?Z; idx = name[0, 1].unpack('C')[0] - 0x41 + 11 ; name = name[2..-1] + end + if not subs[idx] + ret = nil + else + ret << subs[idx] + end + when ?t + ret << 'std::' + name = name[1..-1] + decode_tok[] + else + std = { ?a => 'std::allocator', + ?b => 'std::basic_string', + ?s => 'std::string', # 'std::basic_string < char, std::char_traits, std::allocator >', + ?i => 'std::istream', # 'std::basic_istream >', + ?o => 'std::ostream', # 'std::basic_ostream >', + ?d => 'std::iostream', # 'std::basic_iostream >' + }[name[0]] + if not std + ret = nil + else + ret << std + end + name = name[1..-1] + end + when ?P, ?R, ?r, ?V, ?K + attr = { ?P => '*', ?R => '&', ?r => ' restrict', ?V => ' volatile', ?K => ' const' }[name[0]] + name = name[1..-1] + rl = ret.length + decode_tok[] + if ret + ret << attr + subs << ret[rl..-1] + end + else + if ret =~ /[(<]/ and ty = { + ?v => 'void', ?w => 'wchar_t', ?b => 'bool', ?c => 'char', ?a => 'signed char', + ?h => 'unsigned char', ?s => 'short', ?t => 'unsigned short', ?i => 'int', + ?j => 'unsigned int', ?l => 'long', ?m => 'unsigned long', ?x => '__int64', + ?y => 'unsigned __int64', ?n => '__int128', ?o => 'unsigned __int128', ?f => 'float', + ?d => 'double', ?e => 'long double', ?g => '__float128', ?z => '...' + }[name[0]] + name = name[1..-1] + ret << ty + else + fu = name[0, 2] + name = name[2..-1] + if op = { + 'nw' => ' new', 'na' => ' new[]', 'dl' => ' delete', 'da' => ' delete[]', + 'ps' => '+', 'ng' => '-', 'ad' => '&', 'de' => '*', 'co' => '~', 'pl' => '+', + 'mi' => '-', 'ml' => '*', 'dv' => '/', 'rm' => '%', 'an' => '&', 'or' => '|', + 'eo' => '^', 'aS' => '=', 'pL' => '+=', 'mI' => '-=', 'mL' => '*=', 'dV' => '/=', + 'rM' => '%=', 'aN' => '&=', 'oR' => '|=', 'eO' => '^=', 'ls' => '<<', 'rs' => '>>', + 'lS' => '<<=', 'rS' => '>>=', 'eq' => '==', 'ne' => '!=', 'lt' => '<', 'gt' => '>', + 'le' => '<=', 'ge' => '>=', 'nt' => '!', 'aa' => '&&', 'oo' => '||', 'pp' => '++', + 'mm' => '--', 'cm' => ',', 'pm' => '->*', 'pt' => '->', 'cl' => '()', 'ix' => '[]', + 'qu' => '?', 'st' => ' sizeof', 'sz' => ' sizeof', 'at' => ' alignof', 'az' => ' alignof' + }[fu] + ret << "operator#{op}" + elsif fu == 'cv' + ret << "cast<" + decode_tok[] + ret << ">" if ret + else + ret = nil + end + end + end + name ||= '' + } + + decode_tok[] + subs.pop + if ret and name != '' + ret << '(' + decode_tok[] + while ret and name != '' + ret << ', ' + decode_tok[] + end + ret << ')' if ret + end + ret + end + + # scans all the sections raw for a given regexp + # return/yields all the addresses matching + # if yield returns nil/false, do not include the addr in the final result + # sections are scanned MB by MB, so this should work (slowly) on 4GB sections (eg debugger VM) + # with addr_start/length, symbol-based section are skipped + def pattern_scan(pat, addr_start=nil, length=nil, chunksz=nil, margin=nil, &b) + chunksz ||= 4*1024*1024 # scan 4MB at a time + margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries + + pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String + + found = [] + @sections.each { |sec_addr, e| + if addr_start + length ||= 0x1000_0000 + begin + if sec_addr < addr_start + next if sec_addr+e.length <= addr_start + e = e[addr_start-sec_addr, e.length] + sec_addr = addr_start + end + if sec_addr+e.length > addr_start+length + next if sec_addr > addr_start+length + e = e[0, sec_addr+e.length-(addr_start+length)] + end + rescue + puts $!, $!.message, $!.backtrace if $DEBUG + # catch arithmetic error with symbol-based section + next + end + end + e.pattern_scan(pat, chunksz, margin) { |eo| + match_addr = sec_addr + eo + found << match_addr if not b or b.call(match_addr) + false + } + } + found + end + + # returns/yields [addr, string] found using pattern_scan /[\x20-\x7e]/ + def strings_scan(minlen=6, &b) + ret = [] + nexto = 0 + pattern_scan(/[\x20-\x7e]{#{minlen},}/m, nil, 1024) { |o| + if o - nexto > 0 + next unless e = get_edata_at(o) + str = e.data[e.ptr, 1024][/[\x20-\x7e]{#{minlen},}/m] + ret << [o, str] if not b or b.call(o, str) + nexto = o + str.length + end + } + ret + end + + # exports the addr => symbol map (see load_map) + def save_map + @prog_binding.map { |l, o| + type = di_at(o) ? 'c' : 'd' # XXX + o = o.to_s(16).rjust(8, '0') if o.kind_of? ::Integer + "#{o} #{type} #{l}" + } + end + + # loads a map file (addr => symbol) + # off is an optionnal offset to add to every address found (for eg rebased binaries) + # understands: + # standard map files (eg linux-kernel.map: , e.g. 'c01001ba t setup_idt') + # ida map files (: ) + # arg is either the map itself or the filename of the map (if it contains no newline) + def load_map(str, off=0) + str = File.read(str) rescue nil if not str.index("\n") + sks = @sections.keys.sort + seen = {} + str.each_line { |l| + case l.strip + when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style + addr = $1.to_i(16)+off + set_label_at(addr, $3, false, !seen[addr]) + seen[addr] = true + when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style + # we do not have section load order, let's just hope that the addresses are sorted (and sortable..) + # could check the 1st part of the file, with section sizes, but it is not very convenient + # the regexp is so that we skip the 1st part with section descriptions + # in the file, section 1 is the 1st section ; we have an additionnal section (exe header) which fixes the 0-index + addr = sks[$1.to_i(16)] + $2.to_i(16) + off + set_label_at(addr, $3, false, !seen[addr]) + seen[addr] = true + end + } + end + + # saves the dasm state in a file + def save_file(file) + tmpfile = file + '.tmp' + File.open(tmpfile, 'wb') { |fd| save_io(fd) } + File.rename tmpfile, file + end + + # saves the dasm state to an IO + def save_io(fd) + fd.puts 'Metasm.dasm' + + if @program.filename and not @program.kind_of?(Shellcode) + t = @program.filename.to_s + fd.puts "binarypath #{t.length}", t + else + t = "#{@cpu.class.name.sub(/.*::/, '')} #{@cpu.size} #{@cpu.endianness}" + fd.puts "cpu #{t.length}", t + # XXX will be reloaded as a Shellcode with this CPU, but it may be a custom EXE + # do not output binarypath, we'll be loaded as a Shellcode, 'section' will suffice + end + + @sections.each { |a, e| + # forget edata exports/relocs + # dump at most 16Mo per section + t = "#{Expression[a]} #{e.length}\n" + + [e.data[0, 2**24].to_str].pack('m*') + fd.puts "section #{t.length}", t + } + + t = save_map.join("\n") + fd.puts "map #{t.length}", t + + t = @decoded.map { |a, d| + next if not d.kind_of? DecodedInstruction + "#{Expression[a]},#{d.bin_length} #{d.instruction}#{" ; #{d.comment.join(' ')}" if d.comment}" + }.compact.sort.join("\n") + fd.puts "decoded #{t.length}", t + + t = @comment.map { |a, c| + c.map { |l| l.chomp }.join("\n").split("\n").map { |lc| "#{Expression[a]} #{lc.chomp}" } + }.join("\n") + fd.puts "comment #{t.length}", t + + bl = @decoded.values.map { |d| + d.block if d.kind_of? DecodedInstruction and d.block_head? + }.compact + t = bl.map { |b| + [Expression[b.address], + b.list.map { |d| Expression[d.address] }.join(','), + b.to_normal.to_a.map { |t_| Expression[t_] }.join(','), + b.to_subfuncret.to_a.map { |t_| Expression[t_] }.join(','), + b.to_indirect.to_a.map { |t_| Expression[t_] }.join(','), + b.from_normal.to_a.map { |t_| Expression[t_] }.join(','), + b.from_subfuncret.to_a.map { |t_| Expression[t_] }.join(','), + b.from_indirect.to_a.map { |t_| Expression[t_] }.join(','), + ].join(';') + }.sort.join("\n") + fd.puts "blocks #{t.length}", t + + t = @function.map { |a, f| + next if not @decoded[a] + [a, *f.return_address.to_a].map { |e| Expression[e] }.join(',') + }.compact.sort.join("\n") + # TODO binding ? + fd.puts "funcs #{t.length}", t + + t = @xrefs.map { |a, x| + a = ':default' if a == :default + a = ':unknown' if a == Expression::Unknown + # XXX origin + case x + when nil + when Xref + [Expression[a], x.type, x.len, (Expression[x.origin] if x.origin)].join(',') + when Array + x.map { |x_| [Expression[a], x_.type, x_.len, (Expression[x_.origin] if x_.origin)].join(',') } + end + }.compact.join("\n") + fd.puts "xrefs #{t.length}", t + + t = @c_parser.to_s + fd.puts "c #{t.length}", t + + #t = bl.map { |b| b.backtracked_for } + #fd.puts "trace #{t.length}" , t + end + + # loads a disassembler from a saved file + def self.load(str, &b) + d = new(nil, nil) + d.load(str, &b) + d + end + + # loads the dasm state from a savefile content + # will yield unknown segments / binarypath notfound + def load(str) + raise 'Not a metasm save file' if str[0, 12].chomp != 'Metasm.dasm' + off = 12 + pp = Preprocessor.new + app = AsmPreprocessor.new + while off < str.length + i = str.index("\n", off) || str.length + type, len = str[off..i].chomp.split + off = i+1 + data = str[off, len.to_i] + off += len.to_i + case type + when nil, '' + when 'binarypath' + data = yield(type, data) if not File.exist? data and block_given? + reinitialize AutoExe.decode_file(data) + @program.disassembler = self + @program.init_disassembler + when 'cpu' + cpuname, size, endianness = data.split + cpu = Metasm.const_get(cpuname) + raise 'invalid cpu' if not cpu < CPU + cpu = cpu.new + cpu.size = size.to_i + cpu.endianness = endianness.to_sym + reinitialize Shellcode.new(cpu) + @program.disassembler = self + @program.init_disassembler + @sections.delete(0) # rm empty section at 0, other real 'section' follow + when 'section' + info = data[0, data.index("\n") || data.length] + data = data[info.length, data.length] + pp.feed!(info) + addr = Expression.parse(pp).reduce + len = Expression.parse(pp).reduce + edata = EncodedData.new(data.unpack('m*').first, :virtsize => len) + add_section(addr, edata) + when 'map' + load_map data + when 'decoded' + data.each_line { |l| + begin + next if l !~ /^([^,]*),(\d*) ([^;]*)(?:; (.*))?/ + a, len, instr, cmt = $1, $2, $3, $4 + a = Expression.parse(pp.feed!(a)).reduce + instr = @cpu.parse_instruction(app.feed!(instr)) + di = DecodedInstruction.new(instr, a) + di.bin_length = len.to_i + di.add_comment cmt if cmt + @decoded[a] = di + rescue + puts "load: bad di #{l.inspect}" if $VERBOSE + end + } + when 'blocks' + data.each_line { |l| + bla = l.chomp.split(';').map { |sl| sl.split(',') } + begin + a = Expression.parse(pp.feed!(bla.shift[0])).reduce + b = InstructionBlock.new(a, get_section_at(a).to_a[0]) + bla.shift.each { |e| + a = Expression.parse(pp.feed!(e)).reduce + b.add_di(@decoded[a]) + } + bla.zip([:to_normal, :to_subfuncret, :to_indirect, :from_normal, :from_subfuncret, :from_indirect]).each { |l_, s| + b.send("#{s}=", l_.map { |e| Expression.parse(pp.feed!(e)).reduce }) if not l_.empty? + } + rescue + puts "load: bad block #{l.inspect}" if $VERBOSE + end + } + when 'funcs' + data.each_line { |l| + begin + a, *r = l.split(',').map { |e| Expression.parse(pp.feed!(e)).reduce } + @function[a] = DecodedFunction.new + @function[a].return_address = r if not r.empty? + @function[a].finalized = true + # TODO + rescue + puts "load: bad function #{l.inspect} #$!" if $VERBOSE + end + } + when 'comment' + data.each_line { |l| + begin + a, c = l.split(' ', 2) + a = Expression.parse(pp.feed!(a)).reduce + @comment[a] ||= [] + @comment[a] |= [c] + rescue + puts "load: bad comment #{l.inspect} #$!" if $VERBOSE + end + } + when 'c' + begin + # TODO parse_invalid_c, split per function, whatever + parse_c('') + @c_parser.allow_bad_c = true + parse_c(data, 'savefile#c') + rescue + puts "load: bad C: #$!", $!.backtrace if $VERBOSE + end + @c_parser.readtok until @c_parser.eos? if @c_parser + when 'xrefs' + data.each_line { |l| + begin + a, t, len, o = l.chomp.split(',') + case a + when ':default'; a = :default + when ':unknown'; a = Expression::Unknown + else a = Expression.parse(pp.feed!(a)).reduce + end + t = (t.empty? ? nil : t.to_sym) + len = (len != '' ? len.to_i : nil) + o = (o.to_s != '' ? Expression.parse(pp.feed!(o)).reduce : nil) # :default/:unknown ? + add_xref(a, Xref.new(t, o, len)) + rescue + puts "load: bad xref #{l.inspect} #$!" if $VERBOSE + end + } + #when 'trace' + else + if block_given? + yield(type, data) + else + puts "load: unsupported section #{type.inspect}" if $VERBOSE + end + end + end + end + + # change the base address of the loaded binary + # better done early (before disassembling anything) + # returns the delta + def rebase(newaddr) + rebase_delta(newaddr - @sections.keys.min) + end + + def rebase_delta(delta) + fix = lambda { |a| + case a + when Array + a.map! { |e| fix[e] } + when Hash + tmp = {} + a.each { |k, v| tmp[fix[k]] = v } + a.replace tmp + when Integer + a += delta + when BacktraceTrace + a.origin = fix[a.origin] + a.address = fix[a.address] + end + a + } + + fix[@sections] + fix[@decoded] + fix[@xrefs] + fix[@function] + fix[@addrs_todo] + fix[@addrs_done] + fix[@comment] + @prog_binding.each_key { |k| @prog_binding[k] = fix[@prog_binding[k]] } + @old_prog_binding.each_key { |k| @old_prog_binding[k] = fix[@old_prog_binding[k]] } + @label_alias_cache = nil + + @decoded.values.grep(DecodedInstruction).each { |di| + if di.block_head? + b = di.block + b.address += delta + fix[b.to_normal] + fix[b.to_subfuncret] + fix[b.to_indirect] + fix[b.from_normal] + fix[b.from_subfuncret] + fix[b.from_indirect] + fix[b.backtracked_for] + end + di.address = fix[di.address] + di.next_addr = fix[di.next_addr] + } + @function.each_value { |f| + f.return_address = fix[f.return_address] + fix[f.backtracked_for] + } + @xrefs.values.flatten.compact.each { |x| x.origin = fix[x.origin] } + delta + end + + # dataflow method + # walks a function, starting at addr + # follows the usage of registers, computing the evolution from the value they had at start_addr + # whenever an instruction references the register (or anything derived from it), + # yield [di, used_register, reg_value, trace_state] where reg_value is the Expression holding the value of + # the register wrt the initial value at start_addr, and trace_state the value of all registers (reg_value + # not yet applied) + # reg_value may be nil if used_register is not modified by the function (eg call [eax]) + # the yield return value is propagated, unless it is nil/false + # init_state is a hash { :reg => initial value } + def trace_function_register(start_addr, init_state) + function_walk(start_addr, init_state) { |args| + trace_state = args.last + case args.first + when :di + di = args[2] + update = {} + get_fwdemu_binding(di).each { |r, v| + if v.kind_of?(Expression) and v.externals.find { |e| trace_state[e] } + # XXX may mix old (from trace) and current (from v) registers + newv = v.bind(trace_state) + update[r] = yield(di, r, newv, trace_state) + elsif r.kind_of?(ExpressionType) and rr = r.externals.find { |e| trace_state[e] } + # reg dereferenced in a write (eg mov [esp], 42) + next if update.has_key?(rr) # already yielded + if yield(di, rr, trace_state[rr], trace_state) == false + update[rr] = false + end + elsif trace_state[r] + # started on mov reg, foo + next if di.address == start_addr + update[r] = false + end + } + + # directly walk the instruction argument list for registers not appearing in the binding + @cpu.instr_args_memoryptr(di).each { |ind| + b = @cpu.instr_args_memoryptr_getbase(ind) + if b and b = b.symbolic and not update.has_key?(b) + yield(di, b, nil, trace_state) + end + } + @cpu.instr_args_regs(di).each { |r| + r = r.symbolic + if not update.has_key?(r) + yield(di, r, nil, trace_state) + end + } + + update.each { |r, v| + trace_state = trace_state.dup + if v + # cannot follow non-registers, or we would have to emulate every single + # instruction (try following [esp+4] across a __stdcall..) + trace_state[r] = v if r.kind_of?(::Symbol) + else + trace_state.delete r + end + } + when :subfunc + faddr = args[1] + f = @function[faddr] + f = @function[f.backtrace_binding[:thunk]] if f and f.backtrace_binding[:thunk] + if f + binding = f.backtrace_binding + if binding.empty? + backtrace_update_function_binding(faddr) + binding = f.backtrace_binding + end + # XXX fwdemu_binding ? + binding.each { |r, v| + if v.externals.find { |e| trace_state[e] } + if r.kind_of?(::Symbol) + trace_state = trace_state.dup + trace_state[r] = Expression[v.bind(trace_state)].reduce + end + elsif trace_state[r] + trace_state = trace_state.dup + trace_state.delete r + end + } + end + when :merge + # when merging paths, keep the smallest common state subset + # XXX may have unexplored froms + conflicts = args[2] + trace_state = trace_state.dup + conflicts.each { |addr, st| + trace_state.delete_if { |k, v| st[k] != v } + } + end + trace_state = false if trace_state.empty? + trace_state + } + end + + # define a register as a pointer to a structure + # rename all [reg+off] as [reg+struct.member] in current function + # also trace assignments of pointer members + def trace_update_reg_structptr(addr, reg, structname, structoff=0) + sname = soff = ctx = nil + expr_to_sname = lambda { |expr| + if not expr.kind_of?(Expression) or expr.op != :+ + sname = nil + next + end + + sname = expr.lexpr || expr.rexpr + soff = (expr.lexpr ? expr.rexpr : 0) + + if soff.kind_of?(Expression) + # ignore index in ptr array + if soff.op == :* and soff.lexpr == @cpu.size/8 + soff = 0 + elsif soff.rexpr.kind_of?(Expression) and soff.rexpr.op == :* and soff.rexpr.lexpr == @cpu.size/8 + soff = soff.lexpr + elsif soff.lexpr.kind_of?(Expression) and soff.lexpr.op == :* and soff.lexpr.lexpr == @cpu.size/8 + soff = soff.rexpr + end + elsif soff.kind_of?(::Symbol) + # array with 1 byte elements / pre-scaled idx? + if not ctx[soff] + soff = 0 + end + end + } + + lastdi = nil + trace_function_register(addr, reg => Expression[structname, :+, structoff]) { |di, r, val, trace| + + next if r.to_s =~ /flag/ # XXX maybe too ia32-specific? + + ctx = trace + @cpu.instr_args_memoryptr(di).each { |ind| + # find the structure dereference in di + b = @cpu.instr_args_memoryptr_getbase(ind) + b = b.symbolic if b + next unless trace[b] + imm = @cpu.instr_args_memoryptr_getoffset(ind) || 0 + + # check expr has the form 'traced_struct_reg + off' + expr_to_sname[trace[b] + imm] # Expr#+ calls Expr#reduce + next unless sname.kind_of?(::String) and soff.kind_of?(::Integer) + next if not st = c_parser.toplevel.struct[sname] or not st.kind_of?(C::Union) + + # ignore lea esi, [esi+0] + next if soff == 0 and not di.backtrace_binding.find { |k, v| v-k != 0 } + + # TODO if trace[b] offset != 0, we had a lea reg, [struct+substruct_off], tweak str accordingly + + # resolve struct + off into struct.membername + str = st.name.dup + mb = st.expand_member_offset(c_parser, soff, str) + # patch di + imm = imm.rexpr if imm.kind_of?(Expression) and not imm.lexpr and imm.rexpr.kind_of?(ExpressionString) + imm = imm.expr if imm.kind_of?(ExpressionString) + @cpu.instr_args_memoryptr_setoffset(ind, ExpressionString.new(imm, str, :structoff)) + + # check if the type is an enum/bitfield, patch instruction immediates + trace_update_reg_structptr_arg_enum(di, ind, mb, str) if mb + } if lastdi != di.address + lastdi = di.address + + next Expression[structname, :+, structoff] if di.address == addr and r == reg + + # check if we need to trace 'r' further + val = val.reduce_rec if val.kind_of?(Expression) + val = Expression[val] if val.kind_of?(::String) + case val + when Expression + # only trace trivial structptr+off expressions + expr_to_sname[val] + if sname.kind_of?(::String) and soff.kind_of?(::Integer) + Expression[sname, :+, soff] + end + + when Indirection + # di is mov reg, [ptr+struct.offset] + # check if the target member is a pointer to a struct, if so, trace it + expr_to_sname[val.pointer.reduce] + + next unless sname.kind_of?(::String) and soff.kind_of?(::Integer) + + if st = c_parser.toplevel.struct[sname] and st.kind_of?(C::Union) + pt = st.expand_member_offset(c_parser, soff, '') + pt = pt.untypedef if pt + if pt.kind_of?(C::Pointer) + tt = pt.type.untypedef + stars = '' + while tt.kind_of?(C::Pointer) + stars << '*' + tt = tt.type.untypedef + end + if tt.kind_of?(C::Union) and tt.name + Expression[tt.name + stars] + end + end + + elsif soff == 0 and sname[-1] == ?* + # XXX pointer to pointer to struct + # full C type support would be better, but harder to fit in an Expr + Expression[sname[0...-1]] + end + # in other cases, stop trace + end + } + end + + # found a special member of a struct, check if we can apply + # bitfield/enum name to other constants in the di + def trace_update_reg_structptr_arg_enum(di, ind, mb, str) + if ename = mb.has_attribute_var('enum') and enum = c_parser.toplevel.struct[ename] and enum.kind_of?(C::Enum) + # handle enums: struct moo { int __attribute__((enum(bla))) fld; }; + doit = lambda { |_di| + if num = _di.instruction.args.grep(Expression).first and num_i = num.reduce and num_i.kind_of?(::Integer) + # handle enum values on tagged structs + if enum.members and name = enum.members.index(num_i) + num.lexpr = nil + num.op = :+ + num.rexpr = ExpressionString.new(Expression[num_i], name, :enum) + _di.add_comment "enum::#{ename}" if _di.address != di.address + end + end + } + + doit[di] + + # mov eax, [ptr+struct.enumfield] => trace eax + if reg = @cpu.instr_args_regs(di).find { |r| v = di.backtrace_binding[r.symbolic] and (v - ind.symbolic) == 0 } + reg = reg.symbolic + trace_function_register(di.address, reg => Expression[0]) { |_di, r, val, trace| + next if r != reg and val != Expression[reg] + doit[_di] + val + } + end + + elsif mb.untypedef.kind_of?(C::Struct) + # handle bitfields + + byte_off = 0 + if str =~ /\+(\d+)$/ + # test byte [bitfield+1], 0x1 => test dword [bitfield], 0x100 + # XXX little-endian only + byte_off = $1.to_i + str[/\+\d+$/] = '' + end + cmt = str.split('.')[-2, 2].join('.') if str.count('.') > 1 + + doit = lambda { |_di, add| + if num = _di.instruction.args.grep(Expression).first and num_i = num.reduce and num_i.kind_of?(::Integer) + # TODO handle ~num_i + num_left = num_i << add + s_or = [] + mb.untypedef.members.each { |mm| + if bo = mb.bitoffsetof(c_parser, mm) + boff, blen = bo + if mm.name && blen == 1 && ((num_left >> boff) & 1) > 0 + s_or << mm.name + num_left &= ~(1 << boff) + end + end + } + if s_or.first + if num_left != 0 + s_or << ('0x%X' % num_left) + end + s = s_or.join('|') + num.lexpr = nil + num.op = :+ + num.rexpr = ExpressionString.new(Expression[num_i], s, :bitfield) + _di.add_comment cmt if _di.address != di.address + end + end + } + + doit[di, byte_off*8] + + if reg = @cpu.instr_args_regs(di).find { |r| v = di.backtrace_binding[r.symbolic] and (v - ind.symbolic) == 0 } + reg = reg.symbolic + trace_function_register(di.address, reg => Expression[0]) { |_di, r, val, trace| + if r.kind_of?(Expression) and r.op == :& + if r.lexpr == reg + # test al, 42 + doit[_di, byte_off*8] + elsif r.lexpr.kind_of?(Expression) and r.lexpr.op == :>> and r.lexpr.lexpr == reg + # test ah, 42 + doit[_di, byte_off*8+r.lexpr.rexpr] + end + end + next if r != reg and val != Expression[reg] + doit[_di, byte_off*8] + _di.address == di.address && r == reg ? Expression[0] : val + } + end + end + end + + # change Expression display mode for current object o to display integers as char constants + def toggle_expr_char(o) + return if not o.kind_of?(Renderable) + tochars = lambda { |v| + if v.kind_of?(::Integer) + a = [] + vv = v.abs + a << (vv & 0xff) + vv >>= 8 + while vv > 0 + a << (vv & 0xff) + vv >>= 8 + end + if a.all? { |b| b < 0x7f } + s = a.pack('C*').inspect.gsub("'") { '\\\'' }[1...-1] + ExpressionString.new(v, (v > 0 ? "'#{s}'" : "-'#{s}'"), :char) + end + end + } + o.each_expr { |e| + if e.kind_of?(Expression) + if nr = tochars[e.rexpr] + e.rexpr = nr + elsif e.rexpr.kind_of?(ExpressionString) and e.rexpr.type == :char + e.rexpr = e.rexpr.expr + end + if nl = tochars[e.lexpr] + e.lexpr = nl + elsif e.lexpr.kind_of?(ExpressionString) and e.lexpr.type == :char + e.lexpr = e.lexpr.expr + end + end + } + end + + def toggle_expr_dec(o) + return if not o.kind_of?(Renderable) + o.each_expr { |e| + if e.kind_of?(Expression) + if e.rexpr.kind_of?(::Integer) + e.rexpr = ExpressionString.new(Expression[e.rexpr], e.rexpr.to_s, :decimal) + elsif e.rexpr.kind_of?(ExpressionString) and e.rexpr.type == :decimal + e.rexpr = e.rexpr.reduce + end + if e.lexpr.kind_of?(::Integer) + e.lexpr = ExpressionString.new(Expression[e.lexpr], e.lexpr.to_s, :decimal) + elsif e.lexpr.kind_of?(ExpressionString) and e.lexpr.type == :decimal + e.lexpr = e.lexpr.reduce + end + end + } + end + + # patch Expressions in current object to include label names when available + # XXX should we also create labels ? + def toggle_expr_offset(o) + return if not o.kind_of? Renderable + o.each_expr { |e| + next unless e.kind_of?(Expression) + if n = @prog_binding[e.lexpr] + e.lexpr = n + elsif e.lexpr.kind_of? ::Integer and n = get_label_at(e.lexpr) + add_xref(normalize(e.lexpr), Xref.new(:addr, o.address)) if o.respond_to? :address + e.lexpr = n + end + if n = @prog_binding[e.rexpr] + e.rexpr = n + elsif e.rexpr.kind_of? ::Integer and n = get_label_at(e.rexpr) + add_xref(normalize(e.rexpr), Xref.new(:addr, o.address)) if o.respond_to? :address + e.rexpr = n + end + } + end + + # toggle all ExpressionStrings + def toggle_expr_str(o) + return if not o.kind_of?(Renderable) + o.each_expr { |e| + next unless e.kind_of?(ExpressionString) + e.hide_str = !e.hide_str + } + end + + # call this function on a function entrypoint if the function is in fact a __noreturn + # will cut the to_subfuncret of callers + def fix_noreturn(o) + each_xref(o, :x) { |a| + a = normalize(a.origin) + next if not di = di_at(a) or not di.opcode.props[:saveip] + # XXX should check if caller also becomes __noreturn + di.block.each_to_subfuncret { |to| + next if not tdi = di_at(to) or not tdi.block.from_subfuncret + tdi.block.from_subfuncret.delete_if { |aa| normalize(aa) == di.address } + tdi.block.from_subfuncret = nil if tdi.block.from_subfuncret.empty? + } + di.block.to_subfuncret = nil + } + end + + # find the addresses of calls calling the address, handles thunks + def call_sites(funcaddr) + find_call_site = proc { |a| + until not di = di_at(a) + if di.opcode.props[:saveip] + cs = di.address + break + end + if di.block.from_subfuncret.to_a.first + while di.block.from_subfuncret.to_a.length == 1 + a = di.block.from_subfuncret[0] + break if not di_at(a) + a = @decoded[a].block.list.first.address + di = @decoded[a] + end + end + break if di.block.from_subfuncret.to_a.first + break if di.block.from_normal.to_a.length != 1 + a = di.block.from_normal.first + end + cs + } + ret = [] + each_xref(normalize(funcaddr), :x) { |a| + ret << find_call_site[a.origin] + } + ret.compact.uniq + end + + # loads a disassembler plugin script + # this is simply a ruby script instance_eval() in the disassembler + # the filename argument is autocompleted with '.rb' suffix, and also + # searched for in the Metasmdir/samples/dasm-plugins subdirectory if not found in cwd + def load_plugin(plugin_filename) + if not File.exist?(plugin_filename) + if File.exist?(plugin_filename+'.rb') + plugin_filename += '.rb' + elsif defined? Metasmdir + # try autocomplete + pf = File.join(Metasmdir, 'samples', 'dasm-plugins', plugin_filename) + if File.exist? pf + plugin_filename = pf + elsif File.exist? pf + '.rb' + plugin_filename = pf + '.rb' + end + end + end + + instance_eval File.read(plugin_filename) + end + + # same as load_plugin, but hides the @gui attribute while loading, preventing the plugin do popup stuff + # this is useful when you want to load a plugin from another plugin to enhance the plugin's functionnality + # XXX this also prevents setting up kbd_callbacks etc.. + def load_plugin_nogui(plugin_filename) + oldgui = gui + @gui = nil + load_plugin(plugin_filename) + ensure + @gui = oldgui + end + + # compose two code/instruction's backtrace_binding + # assumes bd1 is followed by bd2 in the code flow + # eg inc edi + push edi => + # { Ind[:esp, 4] => Expr[:edi + 1], :esp => Expr[:esp - 4], :edi => Expr[:edi + 1] } + # XXX if bd1 writes to memory with a pointer that is reused in bd2, this function has to + # revert the change made by bd2, which only works with simple ptr addition now + # XXX unhandled situations may be resolved using :unknown, or by returning incorrect values + def compose_bt_binding(bd1, bd2) + if bd1.kind_of? DecodedInstruction + bd1 = bd1.backtrace_binding ||= cpu.get_backtrace_binding(bd1) + end + if bd2.kind_of? DecodedInstruction + bd2 = bd2.backtrace_binding ||= cpu.get_backtrace_binding(bd2) + end + + reduce = lambda { |e| Expression[Expression[e].reduce] } + + bd = {} + + bd2.each { |k, v| + bd[k] = reduce[v.bind(bd1)] + } + + # for each pointer appearing in keys of bd1, we must infer from bd2 what final + # pointers should appear in bd + # eg 'mov [eax], 0 mov ebx, eax' => { [eax] <- 0, [ebx] <- 0, ebx <- eax } + bd1.each { |k, v| + if k.kind_of? Indirection + done = false + k.pointer.externals.each { |e| + # XXX this will break on nontrivial pointers or bd2 + bd2.each { |k2, v2| + # we dont want to invert computation of flag_zero/carry etc (booh) + next if k2.to_s =~ /flag/ + + # discard indirection etc, result would be too complex / not useful + next if not Expression[v2].expr_externals.include? e + + done = true + + # try to reverse the computation made upon 'e' + # only simple addition handled here + ptr = reduce[k.pointer.bind(e => Expression[[k2, :-, v2], :+, e])] + + # if bd2 does not rewrite e, duplicate the original pointer + if not bd2[e] + bd[k] ||= reduce[v] + + # here we should not see 'e' in ptr anymore + ptr = Expression::Unknown if ptr.externals.include? e + else + # cant check if add reversion was successful.. + end + + bd[Indirection[reduce[ptr], k.len]] ||= reduce[v] + } + } + bd[k] ||= reduce[v] if not done + else + bd[k] ||= reduce[v] + end + } + + bd + end + + def gui_hilight_word_regexp(word) + @cpu.gui_hilight_word_regexp(word) + end + + # return a C::AllocCStruct from c_parser + # TODO handle program.class::Header.to_c_struct + def decode_c_struct(structname, addr) + if c_parser and edata = get_edata_at(addr) + c_parser.decode_c_struct(structname, edata.data, edata.ptr) + end + end + + def decode_c_ary(structname, addr, len) + if c_parser and edata = get_edata_at(addr) + c_parser.decode_c_ary(structname, len, edata.data, edata.ptr) + end + end + + # find the function containing addr, and find & rename stack vars in it + def name_local_vars(addr) + if @cpu.respond_to?(:name_local_vars) and faddr = find_function_start(addr) + @function[faddr] ||= DecodedFunction.new # XXX + @cpu.name_local_vars(self, faddr) + end + end end end diff --git a/lib/metasm/metasm/dynldr.rb b/lib/metasm/metasm/dynldr.rb index 67ef531509..780d1f60c8 100644 --- a/lib/metasm/metasm/dynldr.rb +++ b/lib/metasm/metasm/dynldr.rb @@ -9,8 +9,8 @@ module Metasm class DynLdr - # basic C defs for ruby internals - 1.8 and 1.9 compat - x86/x64 - RUBY_H = < 64) - rb_raise(*rb_eArgError, "bad args"); + if (TYPE(args) != T_ARRAY || ARY_LEN(args) > 64) + rb_raise(*rb_eArgError, "bad args"); - uintptr_t flags_v = VAL2INT(flags); - uintptr_t ptr_v = VAL2INT(ptr); - unsigned i, argsz; - uintptr_t args_c[64]; - __int64 ret; + uintptr_t flags_v = VAL2INT(flags); + uintptr_t ptr_v = VAL2INT(ptr); + unsigned i, argsz; + uintptr_t args_c[64]; + __int64 ret; - argsz = ARY_LEN(args); - for (i=0U ; ilen = 10U; // len == 10, no need to ARY_LEN/EMBED stuff + // copy our args to a ruby-accessible buffer + for (i=2U ; i<10U ; ++i) + ARY_PTR(args)[i] = INT2VAL(*addr++); + RArray(args)->len = 10U; // len == 10, no need to ARY_LEN/EMBED stuff - ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(caller_id), args); + ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(caller_id), args); - // dynldr.callback will give us the arity (in bytes) of the callback in args[0] - // we just put the stack lifting offset in caller_id for the asm stub to use - caller_id = VAL2INT(ARY_PTR(args)[0]); + // dynldr.callback will give us the arity (in bytes) of the callback in args[0] + // we just put the stack lifting offset in caller_id for the asm stub to use + caller_id = VAL2INT(ARY_PTR(args)[0]); - return VAL2INT(ret); + return VAL2INT(ret); } #elif defined __amd64__ @@ -294,88 +294,88 @@ double fake_float(void); // TODO float args static VALUE invoke(VALUE self, VALUE ptr, VALUE args, VALUE flags) { - if (TYPE(args) != T_ARRAY || ARY_LEN(args) > 16) - rb_raise(*rb_eArgError, "bad args"); + if (TYPE(args) != T_ARRAY || ARY_LEN(args) > 16) + rb_raise(*rb_eArgError, "bad args"); - uintptr_t flags_v = VAL2INT(flags); - uintptr_t ptr_v = VAL2INT(ptr); - int i, argsz; - uintptr_t args_c[16]; - uintptr_t ret; - uintptr_t (*ptr_f)(uintptr_t, ...) = (void*)ptr_v; + uintptr_t flags_v = VAL2INT(flags); + uintptr_t ptr_v = VAL2INT(ptr); + int i, argsz; + uintptr_t args_c[16]; + uintptr_t ret; + uintptr_t (*ptr_f)(uintptr_t, ...) = (void*)ptr_v; - argsz = (int)ARY_LEN(args); - for (i=0 ; ilen = 8; - ptr[0] = INT2VAL(arg0); - ptr[1] = INT2VAL(arg1); - ptr[2] = INT2VAL(arg2); - ptr[3] = INT2VAL(arg3); - ptr[4] = INT2VAL(arg4); - ptr[5] = INT2VAL(arg5); - ptr[6] = INT2VAL(arg6); - ptr[7] = INT2VAL(arg7); + RArray(args)->len = 8; + ptr[0] = INT2VAL(arg0); + ptr[1] = INT2VAL(arg1); + ptr[2] = INT2VAL(arg2); + ptr[3] = INT2VAL(arg3); + ptr[4] = INT2VAL(arg4); + ptr[5] = INT2VAL(arg5); + ptr[6] = INT2VAL(arg6); + ptr[7] = INT2VAL(arg7); - ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(cb_id), args); + ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(cb_id), args); - return VAL2INT(ret); + return VAL2INT(ret); } #endif int Init_dynldr(void) __attribute__((export_as(Init_))) // to patch before parsing to match the .so name { - dynldr = rb_const_get(rb_const_get(*rb_cObject, rb_intern("Metasm")), rb_intern("DynLdr")); - rb_define_singleton_method(dynldr, "memory_read", memory_read, 2); - rb_define_singleton_method(dynldr, "memory_read_int", memory_read_int, 1); - rb_define_singleton_method(dynldr, "memory_write", memory_write, 2); - rb_define_singleton_method(dynldr, "memory_write_int", memory_write_int, 2); - rb_define_singleton_method(dynldr, "str_ptr", str_ptr, 1); - rb_define_singleton_method(dynldr, "rb_obj_to_value", rb_obj_to_value, 1); - rb_define_singleton_method(dynldr, "rb_value_to_obj", rb_value_to_obj, 1); - rb_define_singleton_method(dynldr, "sym_addr", sym_addr, 2); - rb_define_singleton_method(dynldr, "raw_invoke", invoke, 3); - rb_define_const(dynldr, "CALLBACK_TARGET", + dynldr = rb_const_get(rb_const_get(*rb_cObject, rb_intern("Metasm")), rb_intern("DynLdr")); + rb_define_singleton_method(dynldr, "memory_read", memory_read, 2); + rb_define_singleton_method(dynldr, "memory_read_int", memory_read_int, 1); + rb_define_singleton_method(dynldr, "memory_write", memory_write, 2); + rb_define_singleton_method(dynldr, "memory_write_int", memory_write_int, 2); + rb_define_singleton_method(dynldr, "str_ptr", str_ptr, 1); + rb_define_singleton_method(dynldr, "rb_obj_to_value", rb_obj_to_value, 1); + rb_define_singleton_method(dynldr, "rb_value_to_obj", rb_value_to_obj, 1); + rb_define_singleton_method(dynldr, "sym_addr", sym_addr, 2); + rb_define_singleton_method(dynldr, "raw_invoke", invoke, 3); + rb_define_const(dynldr, "CALLBACK_TARGET", #ifdef __i386__ - INT2VAL((VALUE)&callback_handler)); + INT2VAL((VALUE)&callback_handler)); #elif defined __amd64__ - INT2VAL((VALUE)&do_callback_handler)); + INT2VAL((VALUE)&do_callback_handler)); #endif - rb_define_const(dynldr, "CALLBACK_ID_0", INT2VAL((VALUE)&callback_id_0)); - rb_define_const(dynldr, "CALLBACK_ID_1", INT2VAL((VALUE)&callback_id_1)); - return 0; + rb_define_const(dynldr, "CALLBACK_ID_0", INT2VAL((VALUE)&callback_id_0)); + rb_define_const(dynldr, "CALLBACK_ID_1", INT2VAL((VALUE)&callback_id_1)); + return 0; } EOS - # see the note in compile_bin_module - # this is a dynamic resolver for the ruby symbols we use - DYNLDR_C_PE_HACK = <ldr->inloadorder; - ptr = ((struct _lmodule *)base)->next; - ptr = ptr->next; // skip the first entry = ruby.exe - while (ptr != base) { - if (wstrcaseruby(ptr->basename, ptr->len/2)) - return ptr->base; - ptr = ptr->next; - } + base = &peb->ldr->inloadorder; + ptr = ((struct _lmodule *)base)->next; + ptr = ptr->next; // skip the first entry = ruby.exe + while (ptr != base) { + if (wstrcaseruby(ptr->basename, ptr->len/2)) + return ptr->base; + ptr = ptr->next; + } - return 0; + return 0; } // find the ruby library from an address in the ruby module (Init_dynldr retaddr) static uintptr_t find_ruby_module_mem(uintptr_t someaddr) { - // could __try{}, but with no imports we're useless anyway. - uintptr_t ptr = someaddr & (-0x10000); - while (*((unsigned __int16 *)ptr) != 'ZM') // XXX too weak? - ptr -= 0x10000; - return ptr; + // could __try{}, but with no imports we're useless anyway. + uintptr_t ptr = someaddr & (-0x10000); + while (*((unsigned __int16 *)ptr) != 'ZM') // XXX too weak? + ptr -= 0x10000; + return ptr; } // a table of string offsets, base = the table itself @@ -467,111 +467,111 @@ __stdcall uintptr_t GetProcAddress(uintptr_t, char *); // resolve the ruby imports found by offset in ruby_import_table int load_ruby_imports(uintptr_t rbaddr) { - uintptr_t ruby_module; - uintptr_t *ptr; - char *table; + uintptr_t ruby_module; + uintptr_t *ptr; + char *table; - static int loaded_ruby_imports = 0; - if (loaded_ruby_imports) - return 0; - loaded_ruby_imports = 1; + static int loaded_ruby_imports = 0; + if (loaded_ruby_imports) + return 0; + loaded_ruby_imports = 1; - if (rbaddr) - ruby_module = find_ruby_module_mem(rbaddr); - else - ruby_module = find_ruby_module_peb(); + if (rbaddr) + ruby_module = find_ruby_module_mem(rbaddr); + else + ruby_module = find_ruby_module_peb(); - if (!ruby_module) - return 0; + if (!ruby_module) + return 0; - ptr = &ruby_import_table; - table = (char*)ptr; + ptr = &ruby_import_table; + table = (char*)ptr; - while (*ptr) { - if (!(*ptr = GetProcAddress(ruby_module, table+*ptr))) - // TODO warning or something - return 0; - ptr++; - } + while (*ptr) { + if (!(*ptr = GetProcAddress(ruby_module, table+*ptr))) + // TODO warning or something + return 0; + ptr++; + } - return 1; + return 1; } #ifdef __x86_64__ #define DLL_PROCESS_ATTACH 1 int DllMain(void *handle, int reason, void *res) { - if (reason == DLL_PROCESS_ATTACH) - return load_ruby_imports(0); - return 1; + if (reason == DLL_PROCESS_ATTACH) + return load_ruby_imports(0); + return 1; } #endif EOS - # ia32 asm source for the native component: handles ABI stuff - DYNLDR_ASM_IA32 = < cb structure (inuse only) + # initialization + # load (build if needed) the binary module + def self.start + # callbacks are really just a list of asm 'call', so we share them among subclasses of DynLdr + @@callback_addrs = [] # list of all allocated callback addrs (in use or not) + @@callback_table = {} # addr -> cb structure (inuse only) - binmodule = find_bin_path + binmodule = find_bin_path - if not File.exists?(binmodule) or File.stat(binmodule).mtime < File.stat(__FILE__).mtime - compile_binary_module(host_exe, host_cpu, binmodule) - end + if not File.exists?(binmodule) or File.stat(binmodule).mtime < File.stat(__FILE__).mtime + compile_binary_module(host_exe, host_cpu, binmodule) + end - require binmodule + require binmodule - @@callback_addrs << CALLBACK_ID_0 << CALLBACK_ID_1 - end + @@callback_addrs << CALLBACK_ID_0 << CALLBACK_ID_1 + end - # compile the dynldr binary ruby module for a specific arch/cpu/modulename - def self.compile_binary_module(exe, cpu, modulename) - bin = exe.new(cpu) - # compile the C code, but patch the Init_ export name, which must match the string used in 'require' - module_c_src = DYNLDR_C.gsub('', File.basename(modulename, '.so')) - bin.compile_c module_c_src - # compile the Asm stuff according to the target architecture - bin.assemble case cpu.shortname - when 'ia32'; DYNLDR_ASM_IA32 - when 'x64'; DYNLDR_ASM_X86_64 - end + # compile the dynldr binary ruby module for a specific arch/cpu/modulename + def self.compile_binary_module(exe, cpu, modulename) + bin = exe.new(cpu) + # compile the C code, but patch the Init_ export name, which must match the string used in 'require' + module_c_src = DYNLDR_C.gsub('', File.basename(modulename, '.so')) + bin.compile_c module_c_src + # compile the Asm stuff according to the target architecture + bin.assemble case cpu.shortname + when 'ia32'; DYNLDR_ASM_IA32 + when 'x64'; DYNLDR_ASM_X86_64 + end - # tweak the resulting binary linkage procedures if needed - compile_binary_module_hack(bin) + # tweak the resulting binary linkage procedures if needed + compile_binary_module_hack(bin) - # save the shared library - bin.encode_file(modulename, :lib) - end + # save the shared library + bin.encode_file(modulename, :lib) + end - def self.compile_binary_module_hack(bin) - # this is a hack - # we need the module to use ruby symbols - # but we don't know the actual ruby lib filename (depends on ruby version, # platform, ...) - case bin.shortname - when 'elf' - # we know the lib is already loaded by the main ruby executable, no DT_NEEDED needed - class << bin - def automagic_symbols(*a) - # do the plt generation - super(*a) - # but remove the specific lib names - @tag.delete 'NEEDED' - end - end - return - when 'coff' - # the hard part, see below - else - # unhandled arch, dont tweak - return - end + def self.compile_binary_module_hack(bin) + # this is a hack + # we need the module to use ruby symbols + # but we don't know the actual ruby lib filename (depends on ruby version, # platform, ...) + case bin.shortname + when 'elf' + # we know the lib is already loaded by the main ruby executable, no DT_NEEDED needed + class << bin + def automagic_symbols(*a) + # do the plt generation + super(*a) + # but remove the specific lib names + @tag.delete 'NEEDED' + end + end + return + when 'coff' + # the hard part, see below + else + # unhandled arch, dont tweak + return + end - # we remove the PE IAT section related to ruby symbols, and make - # a manual symbol resolution on module loading. + # we remove the PE IAT section related to ruby symbols, and make + # a manual symbol resolution on module loading. - # populate the ruby import table ourselves on module loading - bin.imports.delete_if { |id| id.libname =~ /ruby/ } + # populate the ruby import table ourselves on module loading + bin.imports.delete_if { |id| id.libname =~ /ruby/ } - # we generate something like: - # .data - # ruby_import_table: - # rb_cObject dd str_rb_cObject - ruby_import_table - # riat_rb_intern dd str_rb_intern - ruby_import_table - # dd 0 - # - # .rodata - # str_rb_cObject db "rb_cObject", 0 - # str_rb_intern db "rb_intern", 0 - # - # .text - # rb_intern: jmp [riat_rb_intern] - # - # the PE_HACK code will parse ruby_import_table and make the symbol resolution on startup + # we generate something like: + # .data + # ruby_import_table: + # rb_cObject dd str_rb_cObject - ruby_import_table + # riat_rb_intern dd str_rb_intern - ruby_import_table + # dd 0 + # + # .rodata + # str_rb_cObject db "rb_cObject", 0 + # str_rb_intern db "rb_intern", 0 + # + # .text + # rb_intern: jmp [riat_rb_intern] + # + # the PE_HACK code will parse ruby_import_table and make the symbol resolution on startup - # setup the string table and the thunks - text = bin.sections.find { |s| s.name == '.text' }.encoded - rb_syms = text.reloc_externals.grep(/^rb_/) + # setup the string table and the thunks + text = bin.sections.find { |s| s.name == '.text' }.encoded + rb_syms = text.reloc_externals.grep(/^rb_/) - dd = (bin.cpu.size == 64 ? 'dq' : 'dd') + dd = (bin.cpu.size == 64 ? 'dq' : 'dd') - init_symbol = text.export.keys.grep(/^Init_/).first - raise 'no Init_mname symbol found' if not init_symbol - if bin.cpu.size == 32 - # hax to find the base of libruby under Win98 (peb sux) - text.export[init_symbol + '_real'] = text.export.delete(init_symbol) - bin.unique_labels_cache.delete(init_symbol) - end + init_symbol = text.export.keys.grep(/^Init_/).first + raise 'no Init_mname symbol found' if not init_symbol + if bin.cpu.size == 32 + # hax to find the base of libruby under Win98 (peb sux) + text.export[init_symbol + '_real'] = text.export.delete(init_symbol) + bin.unique_labels_cache.delete(init_symbol) + end - # the C glue: getprocaddress etc - bin.compile_c DYNLDR_C_PE_HACK.gsub('Init_dynldr', init_symbol) + # the C glue: getprocaddress etc + bin.compile_c DYNLDR_C_PE_HACK.gsub('Init_dynldr', init_symbol) - # the IAT, initialized with relative offsets to symbol names - asm_table = ['.data', '.align 8', 'ruby_import_table:'] - # strings will be in .rodata - bin.parse('.rodata') - rb_syms.each { |sym| - # raw symbol name - str_label = bin.parse_new_label('str', "db #{sym.inspect}, 0") + # the IAT, initialized with relative offsets to symbol names + asm_table = ['.data', '.align 8', 'ruby_import_table:'] + # strings will be in .rodata + bin.parse('.rodata') + rb_syms.each { |sym| + # raw symbol name + str_label = bin.parse_new_label('str', "db #{sym.inspect}, 0") - if sym !~ /^rb_[ce][A-Z]/ - # if we dont reference a data import (rb_cClass / rb_eException), - # then create a function thunk - i = PE::ImportDirectory::Import.new - i.thunk = sym - sym = i.target = 'riat_' + str_label - bin.arch_encode_thunk(text, i) # encode a jmp [importtable] - end + if sym !~ /^rb_[ce][A-Z]/ + # if we dont reference a data import (rb_cClass / rb_eException), + # then create a function thunk + i = PE::ImportDirectory::Import.new + i.thunk = sym + sym = i.target = 'riat_' + str_label + bin.arch_encode_thunk(text, i) # encode a jmp [importtable] + end - # update the IAT - asm_table << "#{sym} #{dd} #{str_label} - ruby_import_table" - } - # IAT null-terminated - asm_table << "#{dd} 0" + # update the IAT + asm_table << "#{sym} #{dd} #{str_label} - ruby_import_table" + } + # IAT null-terminated + asm_table << "#{dd} 0" - # now parse & assemble the IAT in .data - bin.assemble asm_table.join("\n") - end + # now parse & assemble the IAT in .data + bin.assemble asm_table.join("\n") + end - # find the path of the binary module - # if none exists, create a path writeable by the current user - def self.find_bin_path - fname = ['dynldr', host_arch, host_cpu.shortname, - ('19' if RUBY_VERSION >= '1.9')].compact.join('-') + '.so' - dir = File.dirname(__FILE__) - binmodule = File.join(dir, fname) - if not File.exists? binmodule or File.stat(binmodule).mtime < File.stat(__FILE__).mtime - if not dir = find_write_dir - raise LoadError, "no writable dir to put the DynLdr ruby module, try to run as root" - end - binmodule = File.join(dir, fname) - end - binmodule - end + # find the path of the binary module + # if none exists, create a path writeable by the current user + def self.find_bin_path + fname = ['dynldr', host_arch, host_cpu.shortname, + ('19' if RUBY_VERSION >= '1.9')].compact.join('-') + '.so' + dir = File.dirname(__FILE__) + binmodule = File.join(dir, fname) + if not File.exists? binmodule or File.stat(binmodule).mtime < File.stat(__FILE__).mtime + if not dir = find_write_dir + raise LoadError, "no writable dir to put the DynLdr ruby module, try to run as root" + end + binmodule = File.join(dir, fname) + end + binmodule + end - # find a writeable directory - # searches this script directory, $HOME / %APPDATA% / %USERPROFILE%, or $TMP - def self.find_write_dir - writable = lambda { |d| - begin - foo = '/_test_write_' + rand(1<<32).to_s - true if File.writable?(d) and - File.open(d+foo, 'w') { true } and - File.unlink(d+foo) - rescue - end - } - dir = File.dirname(__FILE__) - return dir if writable[dir] - dir = ENV['HOME'] || ENV['APPDATA'] || ENV['USERPROFILE'] - if writable[dir] - dir = File.join(dir, '.metasm') - Dir.mkdir dir if not File.directory? dir - return dir - end - ENV['TMP'] || ENV['TEMP'] || '.' - end + # find a writeable directory + # searches this script directory, $HOME / %APPDATA% / %USERPROFILE%, or $TMP + def self.find_write_dir + writable = lambda { |d| + begin + foo = '/_test_write_' + rand(1<<32).to_s + true if File.writable?(d) and + File.open(d+foo, 'w') { true } and + File.unlink(d+foo) + rescue + end + } + dir = File.dirname(__FILE__) + return dir if writable[dir] + dir = ENV['HOME'] || ENV['APPDATA'] || ENV['USERPROFILE'] + if writable[dir] + dir = File.join(dir, '.metasm') + Dir.mkdir dir if not File.directory? dir + return dir + end + ENV['TMP'] || ENV['TEMP'] || '.' + end - # CPU suitable for compiling code for the current running host - def self.host_cpu - @cpu ||= - case RUBY_PLATFORM - when /i[3-6]86/; Ia32.new - when /x86_64|x64/i; X86_64.new - else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}" - end - end + # CPU suitable for compiling code for the current running host + def self.host_cpu + @cpu ||= + case RUBY_PLATFORM + when /i[3-6]86/; Ia32.new + when /x86_64|x64/i; X86_64.new + else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}" + end + end - # returns whether we run on linux or windows - def self.host_arch - case RUBY_PLATFORM - when /linux/i; :linux - when /mswin|mingw|cygwin/i; :windows - else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}" - end - end + # returns whether we run on linux or windows + def self.host_arch + case RUBY_PLATFORM + when /linux/i; :linux + when /mswin|mingw|cygwin/i; :windows + else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}" + end + end - # ExeFormat suitable as current running host native module - def self.host_exe - case host_arch - when :linux; ELF - when :windows; PE - end - end + # ExeFormat suitable as current running host native module + def self.host_exe + case host_arch + when :linux; ELF + when :windows; PE + end + end - # parse a C string into the @cp parser, create it if needed - def self.parse_c(src) - cp.parse(src) - end + # parse a C string into the @cp parser, create it if needed + def self.parse_c(src) + cp.parse(src) + end - # compile a C fragment into a Shellcode_RWX, honors the host ABI - def self.compile_c(src) - # XXX could we reuse self.cp ? (for its macros etc) - cp = C::Parser.new(host_exe.new(host_cpu)) - cp.parse(src) - sc = Shellcode_RWX.new(host_cpu) - asm = host_cpu.new_ccompiler(cp, sc).compile - sc.assemble(asm) - end + # compile a C fragment into a Shellcode_RWX, honors the host ABI + def self.compile_c(src) + # XXX could we reuse self.cp ? (for its macros etc) + cp = C::Parser.new(host_exe.new(host_cpu)) + cp.parse(src) + sc = Shellcode_RWX.new(host_cpu) + asm = host_cpu.new_ccompiler(cp, sc).compile + sc.assemble(asm) + end - # maps a Shellcode_RWX in memory, fixup stdlib relocations - # returns the Shellcode_RWX, with the base_r/w/x initialized to the allocated memory - def self.sc_map_resolve(sc) - sc_map_resolve_addthunks(sc) + # maps a Shellcode_RWX in memory, fixup stdlib relocations + # returns the Shellcode_RWX, with the base_r/w/x initialized to the allocated memory + def self.sc_map_resolve(sc) + sc_map_resolve_addthunks(sc) - sc.base_r = memory_alloc(sc.encoded_r.length) if sc.encoded_r.length > 0 - sc.base_w = memory_alloc(sc.encoded_w.length) if sc.encoded_w.length > 0 - sc.base_x = memory_alloc(sc.encoded_x.length) if sc.encoded_x.length > 0 + sc.base_r = memory_alloc(sc.encoded_r.length) if sc.encoded_r.length > 0 + sc.base_w = memory_alloc(sc.encoded_w.length) if sc.encoded_w.length > 0 + sc.base_x = memory_alloc(sc.encoded_x.length) if sc.encoded_x.length > 0 - locals = sc.encoded_r.export.keys | sc.encoded_w.export.keys | sc.encoded_x.export.keys - exts = sc.encoded_r.reloc_externals(locals) | sc.encoded_w.reloc_externals(locals) | sc.encoded_x.reloc_externals(locals) - bd = {} - exts.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise rescue raise "unknown symbol #{ext.inspect}" } - sc.fixup_check(bd) + locals = sc.encoded_r.export.keys | sc.encoded_w.export.keys | sc.encoded_x.export.keys + exts = sc.encoded_r.reloc_externals(locals) | sc.encoded_w.reloc_externals(locals) | sc.encoded_x.reloc_externals(locals) + bd = {} + exts.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise rescue raise "unknown symbol #{ext.inspect}" } + sc.fixup_check(bd) - memory_write sc.base_r, sc.encoded_r.data if sc.encoded_r.length > 0 - memory_write sc.base_w, sc.encoded_w.data if sc.encoded_w.length > 0 - memory_write sc.base_x, sc.encoded_x.data if sc.encoded_x.length > 0 + memory_write sc.base_r, sc.encoded_r.data if sc.encoded_r.length > 0 + memory_write sc.base_w, sc.encoded_w.data if sc.encoded_w.length > 0 + memory_write sc.base_x, sc.encoded_x.data if sc.encoded_x.length > 0 - memory_perm sc.base_r, sc.encoded_r.length, 'r' if sc.encoded_r.length > 0 - memory_perm sc.base_w, sc.encoded_w.length, 'rw' if sc.encoded_w.length > 0 - memory_perm sc.base_x, sc.encoded_x.length, 'rx' if sc.encoded_x.length > 0 + memory_perm sc.base_r, sc.encoded_r.length, 'r' if sc.encoded_r.length > 0 + memory_perm sc.base_w, sc.encoded_w.length, 'rw' if sc.encoded_w.length > 0 + memory_perm sc.base_x, sc.encoded_x.length, 'rx' if sc.encoded_x.length > 0 - sc - end + sc + end - def self.sc_map_resolve_addthunks(sc) - case host_cpu.shortname - when 'x64' - # patch 'call moo' into 'call thunk; thunk: jmp qword [moo_ptr]' - # this is similar to ELF PLT section, allowing code to call - # into a library mapped more than 4G away - # XXX handles only 'call extern', not 'lea reg, extern' or anything else - # in this case, the linker will still raise an 'immediate overflow' - # during fixup_check in sc_map_resolve - [sc.encoded_r, sc.encoded_w, sc.encoded_x].each { |edata| - edata.reloc.dup.each { |off, rel| - # target only call extern / jmp.i32 extern - next if rel.type != :i32 - next if rel.target.op != :- - next if edata.export[rel.target.rexpr] != off+4 - next if edata.export[rel.target.lexpr] - opc = edata.data[off-1, 1].unpack('C')[0] - next if opc != 0xe8 and opc != 0xe9 + def self.sc_map_resolve_addthunks(sc) + case host_cpu.shortname + when 'x64' + # patch 'call moo' into 'call thunk; thunk: jmp qword [moo_ptr]' + # this is similar to ELF PLT section, allowing code to call + # into a library mapped more than 4G away + # XXX handles only 'call extern', not 'lea reg, extern' or anything else + # in this case, the linker will still raise an 'immediate overflow' + # during fixup_check in sc_map_resolve + [sc.encoded_r, sc.encoded_w, sc.encoded_x].each { |edata| + edata.reloc.dup.each { |off, rel| + # target only call extern / jmp.i32 extern + next if rel.type != :i32 + next if rel.target.op != :- + next if edata.export[rel.target.rexpr] != off+4 + next if edata.export[rel.target.lexpr] + opc = edata.data[off-1, 1].unpack('C')[0] + next if opc != 0xe8 and opc != 0xe9 - thunk_sc = Shellcode.new(host_cpu).share_namespace(sc) - thunk = thunk_sc.assemble(<' - parse_c(proto) + # reads a bunch of C code, creates binding for those according to the prototypes + # handles enum/defines to define constants + # For each toplevel method prototype, it generates a ruby method in this module, the name is lowercased + # For each numeric macro/enum, it also generates an uppercase named constant + # When such a function is called with a lambda as argument, a callback is created for the duration of the call + # and destroyed afterwards ; use callback_alloc_c to get a callback id with longer life span + def self.new_api_c(proto, fromlib=nil) + proto += "\n;" # allow 'int foo()' and '#include ' + parse_c(proto) - cp.toplevel.symbol.dup.each_value { |v| - next if not v.kind_of? C::Variable # enums - cp.toplevel.symbol.delete v.name - lib = fromlib || lib_from_sym(v.name) - addr = sym_addr(lib, v.name) - if addr == 0 or addr == -1 or addr == 0xffff_ffff or addr == 0xffffffff_ffffffff - api_not_found(lib, v) - next - end + cp.toplevel.symbol.dup.each_value { |v| + next if not v.kind_of? C::Variable # enums + cp.toplevel.symbol.delete v.name + lib = fromlib || lib_from_sym(v.name) + addr = sym_addr(lib, v.name) + if addr == 0 or addr == -1 or addr == 0xffff_ffff or addr == 0xffffffff_ffffffff + api_not_found(lib, v) + next + end - rbname = c_func_name_to_rb(v.name) - if not v.type.kind_of? C::Function - # not a function, simply return the symbol address - # TODO struct/table access through hash/array ? - class << self ; self ; end.send(:define_method, rbname) { addr } - next - end - next if v.initializer # inline & stuff - puts "new_api_c: load method #{rbname} from #{lib}" if $DEBUG + rbname = c_func_name_to_rb(v.name) + if not v.type.kind_of? C::Function + # not a function, simply return the symbol address + # TODO struct/table access through hash/array ? + class << self ; self ; end.send(:define_method, rbname) { addr } + next + end + next if v.initializer # inline & stuff + puts "new_api_c: load method #{rbname} from #{lib}" if $DEBUG - new_caller_for(v, rbname, addr) - } + new_caller_for(v, rbname, addr) + } - # predeclare constants from enums - # macros are handled in const_missing (too slow to (re)do here everytime) - # TODO #define FOO(v) (v<<1)|1 => create ruby counterpart - cexist = constants.inject({}) { |h, c| h.update c.to_s => true } - cp.toplevel.symbol.each { |k, v| - if v.kind_of? ::Integer - n = c_const_name_to_rb(k) - const_set(n, v) if v.kind_of? Integer and not cexist[n] - end - } + # predeclare constants from enums + # macros are handled in const_missing (too slow to (re)do here everytime) + # TODO #define FOO(v) (v<<1)|1 => create ruby counterpart + cexist = constants.inject({}) { |h, c| h.update c.to_s => true } + cp.toplevel.symbol.each { |k, v| + if v.kind_of? ::Integer + n = c_const_name_to_rb(k) + const_set(n, v) if v.kind_of? Integer and not cexist[n] + end + } - # avoid WTF rb warning: toplevel const TRUE referenced by WinAPI::TRUE - cp.lexer.definition.each_key { |k| - n = c_const_name_to_rb(k) - if not cexist[n] and Object.const_defined?(n) and v = @cp.macro_numeric(n) - const_set(n, v) - end - } - end + # avoid WTF rb warning: toplevel const TRUE referenced by WinAPI::TRUE + cp.lexer.definition.each_key { |k| + n = c_const_name_to_rb(k) + if not cexist[n] and Object.const_defined?(n) and v = @cp.macro_numeric(n) + const_set(n, v) + end + } + end - # const_missing handler: will try to find a matching #define - def self.const_missing(c) - # infinite loop on autorequire C.. - return super(c) if not defined? @cp or not @cp + # const_missing handler: will try to find a matching #define + def self.const_missing(c) + # infinite loop on autorequire C.. + return super(c) if not defined? @cp or not @cp - cs = c.to_s - if @cp.lexer.definition[cs] - m = cs - else - m = @cp.lexer.definition.keys.find { |k| c_const_name_to_rb(k) == cs } - end + cs = c.to_s + if @cp.lexer.definition[cs] + m = cs + else + m = @cp.lexer.definition.keys.find { |k| c_const_name_to_rb(k) == cs } + end - if m and v = @cp.macro_numeric(m) - const_set(c, v) - v - else - super(c) - end - end + if m and v = @cp.macro_numeric(m) + const_set(c, v) + v + else + super(c) + end + end - # when defining ruby wrapper for C methods, the ruby method name is the string returned by this function from the C name - def self.c_func_name_to_rb(name) - n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.downcase - n = "m#{n}" if n !~ /^[a-z]/ - n - end + # when defining ruby wrapper for C methods, the ruby method name is the string returned by this function from the C name + def self.c_func_name_to_rb(name) + n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.downcase + n = "m#{n}" if n !~ /^[a-z]/ + n + end - # when defining ruby wrapper for C constants (numeric define/enum), the ruby const name is - # the string returned by this function from the C name. It should follow ruby standards (1st letter upcase) - def self.c_const_name_to_rb(name) - n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.upcase - n = "C#{n}" if n !~ /^[A-Z]/ - n - end + # when defining ruby wrapper for C constants (numeric define/enum), the ruby const name is + # the string returned by this function from the C name. It should follow ruby standards (1st letter upcase) + def self.c_const_name_to_rb(name) + n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.upcase + n = "C#{n}" if n !~ /^[A-Z]/ + n + end - def self.api_not_found(lib, func) - raise "could not find symbol #{func.name.inspect} in #{lib.inspect}" - end + def self.api_not_found(lib, func) + raise "could not find symbol #{func.name.inspect} in #{lib.inspect}" + end - # called whenever a native API is called through new_api_c/new_func_c/etc - def self.trace_invoke(api, args) - #p api - end + # called whenever a native API is called through new_api_c/new_func_c/etc + def self.trace_invoke(api, args) + #p api + end - # define a new method 'name' in the current module to invoke the raw method at addr addr - # translates ruby args to raw args using the specified prototype - def self.new_caller_for(proto, name, addr) - flags = 0 - flags |= 1 if proto.has_attribute('stdcall') - flags |= 2 if proto.has_attribute('fastcall') - flags |= 4 if proto.type.type.integral? and cp.sizeof(nil, proto.type.type) == 8 - flags |= 8 if proto.type.type.float? - class << self ; self ; end.send(:define_method, name) { |*a| - raise ArgumentError, "bad arg count for #{name}: #{a.length} for #{proto.type.args.to_a.length}" if a.length != proto.type.args.to_a.length and not proto.type.varargs + # define a new method 'name' in the current module to invoke the raw method at addr addr + # translates ruby args to raw args using the specified prototype + def self.new_caller_for(proto, name, addr) + flags = 0 + flags |= 1 if proto.has_attribute('stdcall') + flags |= 2 if proto.has_attribute('fastcall') + flags |= 4 if proto.type.type.integral? and cp.sizeof(nil, proto.type.type) == 8 + flags |= 8 if proto.type.type.float? + class << self ; self ; end.send(:define_method, name) { |*a| + raise ArgumentError, "bad arg count for #{name}: #{a.length} for #{proto.type.args.to_a.length}" if a.length != proto.type.args.to_a.length and not proto.type.varargs - # convert the arglist suitably for raw_invoke - auto_cb = [] # list of automatic C callbacks generated from lambdas - a = a.zip(proto.type.args.to_a).map { |ra, fa| - aa = convert_rb2c(fa, ra, :cb_list => auto_cb) - if fa and fa.type.integral? and cp.sizeof(fa) == 8 and host_cpu.size == 32 - aa = [aa & 0xffff_ffff, (aa >> 32) & 0xffff_ffff] - aa.reverse! if host_cpu.endianness != :little - end - aa - }.flatten + # convert the arglist suitably for raw_invoke + auto_cb = [] # list of automatic C callbacks generated from lambdas + a = a.zip(proto.type.args.to_a).map { |ra, fa| + aa = convert_rb2c(fa, ra, :cb_list => auto_cb) + if fa and fa.type.integral? and cp.sizeof(fa) == 8 and host_cpu.size == 32 + aa = [aa & 0xffff_ffff, (aa >> 32) & 0xffff_ffff] + aa.reverse! if host_cpu.endianness != :little + end + aa + }.flatten - trace_invoke(name, a) - # do it - ret = raw_invoke(addr, a, flags) + trace_invoke(name, a) + # do it + ret = raw_invoke(addr, a, flags) - # cleanup autogenerated callbacks - auto_cb.each { |cb| callback_free(cb) } + # cleanup autogenerated callbacks + auto_cb.each { |cb| callback_free(cb) } - # interpret return value - ret = convert_ret_c2rb(proto, ret) - } - end + # interpret return value + ret = convert_ret_c2rb(proto, ret) + } + end - # ruby object -> integer suitable as arg for raw_invoke - def self.convert_rb2c(formal, val, opts=nil) - case val - when String; str_ptr(val) - when Proc; cb = callback_alloc_cobj(formal, val) ; (opts[:cb_list] << cb if opts and opts[:cb_list]) ; cb - when C::AllocCStruct; str_ptr(val.str) + val.stroff - when Hash - if not formal.type.pointed.kind_of?(C::Struct) - raise "invalid argument #{val.inspect} for #{formal}, need a struct*" - end - buf = cp.alloc_c_struct(formal, val) - val.instance_variable_set('@rb2c', buf) # GC trick: lifetime(buf) >= lifetime(hash) (XXX or until next call to convert_rb2c) - str_ptr(buf.str) - #when Float; val # TODO handle that in raw_invoke C code - else - v = val.to_i rescue 0 # NaN, Infinity, etc - v = -v if v == -(1<<(cp.typesize[:ptr]*8-1)) # ruby bug... raise -0x8000_0000: out of ulong range - v - end - end + # ruby object -> integer suitable as arg for raw_invoke + def self.convert_rb2c(formal, val, opts=nil) + case val + when String; str_ptr(val) + when Proc; cb = callback_alloc_cobj(formal, val) ; (opts[:cb_list] << cb if opts and opts[:cb_list]) ; cb + when C::AllocCStruct; str_ptr(val.str) + val.stroff + when Hash + if not formal.type.pointed.kind_of?(C::Struct) + raise "invalid argument #{val.inspect} for #{formal}, need a struct*" + end + buf = cp.alloc_c_struct(formal, val) + val.instance_variable_set('@rb2c', buf) # GC trick: lifetime(buf) >= lifetime(hash) (XXX or until next call to convert_rb2c) + str_ptr(buf.str) + #when Float; val # TODO handle that in raw_invoke C code + else + v = val.to_i rescue 0 # NaN, Infinity, etc + v = -v if v == -(1<<(cp.typesize[:ptr]*8-1)) # ruby bug... raise -0x8000_0000: out of ulong range + v + end + end - # this method is called from the C part to run the ruby code corresponding to - # a given C callback allocated by callback_alloc_c - def self.callback_run(id, args) - cb = @@callback_table[id] - raise "invalid callback #{'%x' % id} not in #{@@callback_table.keys.map { |c| c.to_s(16) }}" if not cb + # this method is called from the C part to run the ruby code corresponding to + # a given C callback allocated by callback_alloc_c + def self.callback_run(id, args) + cb = @@callback_table[id] + raise "invalid callback #{'%x' % id} not in #{@@callback_table.keys.map { |c| c.to_s(16) }}" if not cb - rawargs = args.dup - if host_cpu.shortname == 'ia32' and (not cb[:proto_ori] or not cb[:proto_ori].has_attribute('fastcall')) - rawargs.shift - rawargs.shift - end - ra = cb[:proto] ? cb[:proto].args.map { |fa| convert_cbargs_c2rb(fa, rawargs) } : [] + rawargs = args.dup + if host_cpu.shortname == 'ia32' and (not cb[:proto_ori] or not cb[:proto_ori].has_attribute('fastcall')) + rawargs.shift + rawargs.shift + end + ra = cb[:proto] ? cb[:proto].args.map { |fa| convert_cbargs_c2rb(fa, rawargs) } : [] - # run it - ret = cb[:proc].call(*ra) + # run it + ret = cb[:proc].call(*ra) - # the C code expects to find in args[0] the amount of stack fixing needed for __stdcall callbacks - args[0] = cb[:abi_stackfix] || 0 - ret - end + # the C code expects to find in args[0] the amount of stack fixing needed for __stdcall callbacks + args[0] = cb[:abi_stackfix] || 0 + ret + end - # C raw cb arg -> ruby object - # will combine 2 32bit values for 1 64bit arg - def self.convert_cbargs_c2rb(formal, rawargs) - val = rawargs.shift - if formal.type.integral? and cp.sizeof(formal) == 8 and host_cpu.size == 32 - if host.cpu.endianness == :little - val |= rawargs.shift << 32 - else - val = (val << 32) | rawargs.shift - end - end + # C raw cb arg -> ruby object + # will combine 2 32bit values for 1 64bit arg + def self.convert_cbargs_c2rb(formal, rawargs) + val = rawargs.shift + if formal.type.integral? and cp.sizeof(formal) == 8 and host_cpu.size == 32 + if host.cpu.endianness == :little + val |= rawargs.shift << 32 + else + val = (val << 32) | rawargs.shift + end + end - convert_c2rb(formal, val) - end + convert_c2rb(formal, val) + end - # interpret a raw decoded C value to a ruby value according to the C prototype - # handles signedness etc - # XXX val is an integer, how to decode Floats etc ? raw binary ptr ? - def self.convert_c2rb(formal, val) - formal = formal.type if formal.kind_of? C::Variable - val &= (1 << 8*cp.sizeof(formal))-1 if formal.integral? - val = Expression.make_signed(val, 8*cp.sizeof(formal)) if formal.integral? and formal.signed? - val = nil if formal.pointer? and val == 0 - val - end + # interpret a raw decoded C value to a ruby value according to the C prototype + # handles signedness etc + # XXX val is an integer, how to decode Floats etc ? raw binary ptr ? + def self.convert_c2rb(formal, val) + formal = formal.type if formal.kind_of? C::Variable + val &= (1 << 8*cp.sizeof(formal))-1 if formal.integral? + val = Expression.make_signed(val, 8*cp.sizeof(formal)) if formal.integral? and formal.signed? + val = nil if formal.pointer? and val == 0 + val + end - # C raw ret -> ruby obj - # can be overridden for system-specific calling convention (eg return 0/-1 => raise an error) - def self.convert_ret_c2rb(fproto, ret) - fproto = fproto.type if fproto.kind_of? C::Variable - convert_c2rb(fproto.untypedef.type, ret) - end + # C raw ret -> ruby obj + # can be overridden for system-specific calling convention (eg return 0/-1 => raise an error) + def self.convert_ret_c2rb(fproto, ret) + fproto = fproto.type if fproto.kind_of? C::Variable + convert_c2rb(fproto.untypedef.type, ret) + end - def self.cp ; @cp ||= C::Parser.new(host_exe.new(host_cpu)) ; end - def self.cp=(c); @cp = c ; end + def self.cp ; @cp ||= C::Parser.new(host_exe.new(host_cpu)) ; end + def self.cp=(c); @cp = c ; end - # allocate a callback for a given C prototype (string) - # accepts full C functions (with body) (only 1 at a time) or toplevel 'asm' statement - def self.callback_alloc_c(proto, &b) - proto += ';' # allow 'int foo()' - parse_c(proto) - v = cp.toplevel.symbol.values.find_all { |v_| v_.kind_of? C::Variable and v_.type.kind_of? C::Function }.first - if (v and v.initializer) or cp.toplevel.statements.find { |st| st.kind_of? C::Asm } - cp.toplevel.statements.delete_if { |st| st.kind_of? C::Asm } - cp.toplevel.symbol.delete v.name if v - sc = sc_map_resolve(compile_c(proto)) - sc.base_x - elsif not v - raise 'empty prototype' - else - cp.toplevel.symbol.delete v.name - callback_alloc_cobj(v, b) - end - end + # allocate a callback for a given C prototype (string) + # accepts full C functions (with body) (only 1 at a time) or toplevel 'asm' statement + def self.callback_alloc_c(proto, &b) + proto += ';' # allow 'int foo()' + parse_c(proto) + v = cp.toplevel.symbol.values.find_all { |v_| v_.kind_of? C::Variable and v_.type.kind_of? C::Function }.first + if (v and v.initializer) or cp.toplevel.statements.find { |st| st.kind_of? C::Asm } + cp.toplevel.statements.delete_if { |st| st.kind_of? C::Asm } + cp.toplevel.symbol.delete v.name if v + sc = sc_map_resolve(compile_c(proto)) + sc.base_x + elsif not v + raise 'empty prototype' + else + cp.toplevel.symbol.delete v.name + callback_alloc_cobj(v, b) + end + end - # allocates a callback for a given C prototype (C variable, pointer to func accepted) - def self.callback_alloc_cobj(proto, b) - ori = proto - proto = proto.type if proto and proto.kind_of? C::Variable - proto = proto.pointed while proto and proto.pointer? - id = callback_find_id - cb = {} - cb[:id] = id - cb[:proc] = b - cb[:proto] = proto - cb[:proto_ori] = ori - cb[:abi_stackfix] = proto.args.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('stdcall') - cb[:abi_stackfix] = proto.args[2..-1].to_a.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('fastcall') # supercedes stdcall - @@callback_table[id] = cb - id - end + # allocates a callback for a given C prototype (C variable, pointer to func accepted) + def self.callback_alloc_cobj(proto, b) + ori = proto + proto = proto.type if proto and proto.kind_of? C::Variable + proto = proto.pointed while proto and proto.pointer? + id = callback_find_id + cb = {} + cb[:id] = id + cb[:proc] = b + cb[:proto] = proto + cb[:proto_ori] = ori + cb[:abi_stackfix] = proto.args.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('stdcall') + cb[:abi_stackfix] = proto.args[2..-1].to_a.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('fastcall') # supercedes stdcall + @@callback_table[id] = cb + id + end - # releases a callback id, so that it may be reused by a later callback_alloc - def self.callback_free(id) - @@callback_table.delete id - end + # releases a callback id, so that it may be reused by a later callback_alloc + def self.callback_free(id) + @@callback_table.delete id + end - # finds a free callback id, allocates a new page if needed - def self.callback_find_id - if not id = @@callback_addrs.find { |a| not @@callback_table[a] } - page_size = 4096 - cb_page = memory_alloc(page_size) - sc = Shellcode.new(host_cpu, cb_page) - case sc.cpu.shortname - when 'ia32' - asm = "call #{CALLBACK_TARGET}" - when 'x64' - if (cb_page - CALLBACK_TARGET).abs >= 0x7fff_f000 - # cannot directly 'jmp CB_T' - asm = "1: mov rax, #{CALLBACK_TARGET} push rax lea rax, [rip-$_+1b] ret" - else - asm = "1: lea rax, [rip-$_+1b] jmp #{CALLBACK_TARGET}" - end - else - raise 'Who are you?' - end + # finds a free callback id, allocates a new page if needed + def self.callback_find_id + if not id = @@callback_addrs.find { |a| not @@callback_table[a] } + page_size = 4096 + cb_page = memory_alloc(page_size) + sc = Shellcode.new(host_cpu, cb_page) + case sc.cpu.shortname + when 'ia32' + asm = "call #{CALLBACK_TARGET}" + when 'x64' + if (cb_page - CALLBACK_TARGET).abs >= 0x7fff_f000 + # cannot directly 'jmp CB_T' + asm = "1: mov rax, #{CALLBACK_TARGET} push rax lea rax, [rip-$_+1b] ret" + else + asm = "1: lea rax, [rip-$_+1b] jmp #{CALLBACK_TARGET}" + end + else + raise 'Who are you?' + end - # fill the page with valid callbacks - loop do - off = sc.encoded.length - sc.assemble asm - break if sc.encoded.length > page_size - @@callback_addrs << (cb_page + off) - end + # fill the page with valid callbacks + loop do + off = sc.encoded.length + sc.assemble asm + break if sc.encoded.length > page_size + @@callback_addrs << (cb_page + off) + end - memory_write cb_page, sc.encode_string[0, page_size] - memory_perm cb_page, page_size, 'rx' + memory_write cb_page, sc.encode_string[0, page_size] + memory_perm cb_page, page_size, 'rx' - raise 'callback_alloc bouh' if not id = @@callback_addrs.find { |a| not @@callback_table[a] } - end - id - end + raise 'callback_alloc bouh' if not id = @@callback_addrs.find { |a| not @@callback_table[a] } + end + id + end - # compile a bunch of C functions, defines methods in this module to call them - # returns the raw pointer to the code page - # if given a block, run the block and then undefine all the C functions & free memory - def self.new_func_c(src) - sc = sc_map_resolve(compile_c(src)) + # compile a bunch of C functions, defines methods in this module to call them + # returns the raw pointer to the code page + # if given a block, run the block and then undefine all the C functions & free memory + def self.new_func_c(src) + sc = sc_map_resolve(compile_c(src)) - parse_c(src) # XXX the Shellcode parser may have defined stuff / interpreted C another way... - defs = [] - cp.toplevel.symbol.dup.each_value { |v| - next if not v.kind_of? C::Variable - cp.toplevel.symbol.delete v.name - next if not v.type.kind_of? C::Function or not v.initializer - next if not off = sc.encoded_x.export[v.name] - rbname = c_func_name_to_rb(v.name) - new_caller_for(v, rbname, sc.base_x+off) - defs << rbname - } - if block_given? - begin - yield - ensure - defs.each { |d| class << self ; self ; end.send(:remove_method, d) } - memory_free sc.base_r if sc.base_r - memory_free sc.base_w if sc.base_w - memory_free sc.base_x if sc.base_x - end - else - sc.base_x - end - end + parse_c(src) # XXX the Shellcode parser may have defined stuff / interpreted C another way... + defs = [] + cp.toplevel.symbol.dup.each_value { |v| + next if not v.kind_of? C::Variable + cp.toplevel.symbol.delete v.name + next if not v.type.kind_of? C::Function or not v.initializer + next if not off = sc.encoded_x.export[v.name] + rbname = c_func_name_to_rb(v.name) + new_caller_for(v, rbname, sc.base_x+off) + defs << rbname + } + if block_given? + begin + yield + ensure + defs.each { |d| class << self ; self ; end.send(:remove_method, d) } + memory_free sc.base_r if sc.base_r + memory_free sc.base_w if sc.base_w + memory_free sc.base_x if sc.base_x + end + else + sc.base_x + end + end - # compile an asm sequence, callable with the ABI of the C prototype given - # function name comes from the prototype - # the shellcode is mapped in read-only memory unless selfmodifyingcode is true - # note that you can use a .data section for simple writable non-executable memory - def self.new_func_asm(proto, asm, selfmodifyingcode=false) - proto += "\n;" - old = cp.toplevel.symbol.keys - parse_c(proto) - news = cp.toplevel.symbol.keys - old - raise "invalid proto #{proto}" if news.length != 1 - f = cp.toplevel.symbol[news.first] - raise "invalid func proto #{proto}" if not f.name or not f.type.kind_of? C::Function or f.initializer - cp.toplevel.symbol.delete f.name + # compile an asm sequence, callable with the ABI of the C prototype given + # function name comes from the prototype + # the shellcode is mapped in read-only memory unless selfmodifyingcode is true + # note that you can use a .data section for simple writable non-executable memory + def self.new_func_asm(proto, asm, selfmodifyingcode=false) + proto += "\n;" + old = cp.toplevel.symbol.keys + parse_c(proto) + news = cp.toplevel.symbol.keys - old + raise "invalid proto #{proto}" if news.length != 1 + f = cp.toplevel.symbol[news.first] + raise "invalid func proto #{proto}" if not f.name or not f.type.kind_of? C::Function or f.initializer + cp.toplevel.symbol.delete f.name - sc = Shellcode_RWX.assemble(host_cpu, asm) - sc = sc_map_resolve(sc) - if selfmodifyingcode - memory_perm sc.base_x, sc.encoded_x.length, 'rwx' - end - rbname = c_func_name_to_rb(f.name) - new_caller_for(f, rbname, sc.base_x) - if block_given? - begin - yield - ensure - class << self ; self ; end.send(:remove_method, rbname) - memory_free sc.base_r if sc.base_r - memory_free sc.base_w if sc.base_w - memory_free sc.base_x - end - else - sc.base_x - end - end + sc = Shellcode_RWX.assemble(host_cpu, asm) + sc = sc_map_resolve(sc) + if selfmodifyingcode + memory_perm sc.base_x, sc.encoded_x.length, 'rwx' + end + rbname = c_func_name_to_rb(f.name) + new_caller_for(f, rbname, sc.base_x) + if block_given? + begin + yield + ensure + class << self ; self ; end.send(:remove_method, rbname) + memory_free sc.base_r if sc.base_r + memory_free sc.base_w if sc.base_w + memory_free sc.base_x + end + else + sc.base_x + end + end - # allocate a C::AllocCStruct to hold a specific struct defined in a previous new_api_c - def self.alloc_c_struct(structname, values={}) - cp.alloc_c_struct(structname, values) - end + # allocate a C::AllocCStruct to hold a specific struct defined in a previous new_api_c + def self.alloc_c_struct(structname, values={}) + cp.alloc_c_struct(structname, values) + end - # return a C::AllocCStruct mapped over the string (with optionnal offset) - # str may be an EncodedData - def self.decode_c_struct(structname, str, off=0) - str = str.data if str.kind_of? EncodedData - cp.decode_c_struct(structname, str, off) - end + # return a C::AllocCStruct mapped over the string (with optionnal offset) + # str may be an EncodedData + def self.decode_c_struct(structname, str, off=0) + str = str.data if str.kind_of? EncodedData + cp.decode_c_struct(structname, str, off) + end - # allocate a C::AllocCStruct holding an Array of typename variables - # if len is an int, it holds the ary length, or it can be an array of initialisers - # eg alloc_c_ary("int", [4, 5, 28]) - def self.alloc_c_ary(typename, len) - cp.alloc_c_ary(typename, len) - end + # allocate a C::AllocCStruct holding an Array of typename variables + # if len is an int, it holds the ary length, or it can be an array of initialisers + # eg alloc_c_ary("int", [4, 5, 28]) + def self.alloc_c_ary(typename, len) + cp.alloc_c_ary(typename, len) + end - # return a C::AllocCStruct holding an array of type typename mapped over str - def self.decode_c_ary(typename, len, str, off=0) - cp.decode_c_ary(typename, len, str, off) - end + # return a C::AllocCStruct holding an array of type typename mapped over str + def self.decode_c_ary(typename, len, str, off=0) + cp.decode_c_ary(typename, len, str, off) + end - # return an AllocCStruct holding an array of 1 element of type typename - # access its value with obj[0] - # useful when you need a pointer to an int that will be filled by an API: use alloc_c_ptr('int') - def self.alloc_c_ptr(typename, init=nil) - cp.alloc_c_ary(typename, (init ? [init] : 1)) - end + # return an AllocCStruct holding an array of 1 element of type typename + # access its value with obj[0] + # useful when you need a pointer to an int that will be filled by an API: use alloc_c_ptr('int') + def self.alloc_c_ptr(typename, init=nil) + cp.alloc_c_ary(typename, (init ? [init] : 1)) + end - # return the binary version of a ruby value encoded as a C variable - # only integral types handled for now - def self.encode_c_value(var, val) - cp.encode_c_value(var, val) - end + # return the binary version of a ruby value encoded as a C variable + # only integral types handled for now + def self.encode_c_value(var, val) + cp.encode_c_value(var, val) + end - # decode a C variable - # only integral types handled for now - def self.decode_c_value(str, var, off=0) - cp.decode_c_value(str, var, off) - end + # decode a C variable + # only integral types handled for now + def self.decode_c_value(str, var, off=0) + cp.decode_c_value(str, var, off) + end - # read a 0-terminated string from memory - def self.memory_read_strz(ptr, szmax=4096) - # read up to the end of the ptr memory page - pglim = (ptr + 0x1000) & ~0xfff - sz = [pglim-ptr, szmax].min - data = memory_read(ptr, sz) - return data[0, data.index(?\0)] if data.index(?\0) - if sz < szmax - data = memory_read(ptr, szmax) - data = data[0, data.index(?\0)] if data.index(?\0) - end - data - end + # read a 0-terminated string from memory + def self.memory_read_strz(ptr, szmax=4096) + # read up to the end of the ptr memory page + pglim = (ptr + 0x1000) & ~0xfff + sz = [pglim-ptr, szmax].min + data = memory_read(ptr, sz) + return data[0, data.index(?\0)] if data.index(?\0) + if sz < szmax + data = memory_read(ptr, szmax) + data = data[0, data.index(?\0)] if data.index(?\0) + end + data + end - # read a 0-terminated wide string from memory - def self.memory_read_wstrz(ptr, szmax=4096) - # read up to the end of the ptr memory page - pglim = (ptr + 0x1000) & ~0xfff - sz = [pglim-ptr, szmax].min - data = memory_read(ptr, sz) - if i = data.unpack('v*').index(0) - return data[0, 2*i] - end - if sz < szmax - data = memory_read(ptr, szmax) - data = data[0, 2*i] if i = data.unpack('v*').index(0) - end - data - end + # read a 0-terminated wide string from memory + def self.memory_read_wstrz(ptr, szmax=4096) + # read up to the end of the ptr memory page + pglim = (ptr + 0x1000) & ~0xfff + sz = [pglim-ptr, szmax].min + data = memory_read(ptr, sz) + if i = data.unpack('v*').index(0) + return data[0, 2*i] + end + if sz < szmax + data = memory_read(ptr, szmax) + data = data[0, 2*i] if i = data.unpack('v*').index(0) + end + data + end - # automatically build/load the bin module - start + # automatically build/load the bin module + start - case host_arch - when :windows + case host_arch + when :windows - new_api_c < PAGE_READONLY, 'rw' => PAGE_READWRITE, 'rx' => PAGE_EXECUTE_READ, - 'rwx' => PAGE_EXECUTE_READWRITE }[perm.to_s.downcase] - virtualprotect(addr, len, perm, str_ptr([0].pack('C')*8)) - end + # change memory permissions - perm in [r rw rx rwx] + def self.memory_perm(addr, len, perm) + perm = { 'r' => PAGE_READONLY, 'rw' => PAGE_READWRITE, 'rx' => PAGE_EXECUTE_READ, + 'rwx' => PAGE_EXECUTE_READWRITE }[perm.to_s.downcase] + virtualprotect(addr, len, perm, str_ptr([0].pack('C')*8)) + end - when :linux + when :linux - new_api_c < 0 and @memory_perm_wd ||= find_write_dir - # We are on a PaX-mprotected system. Try to use a file mapping to work aroud. - Dir.chdir(@memory_perm_wd) { - fname = 'tmp_mprot_%d_%x' % [Process.pid, addr] - data = memory_read(addr, len) - begin - File.open(fname, 'w') { |fd| fd.write data } - # reopen to ensure filesystem flush - rret = File.open(fname, 'r') { |fd| mmap(addr, len, p, MAP_FIXED|MAP_PRIVATE, fd.fileno, 0) } - raise 'hax' if data != memory_read(addr, len) - ret = 0 if rret == addr - ensure - File.unlink(fname) rescue nil - end - } - end + if ret != 0 and perm.include?('x') and not perm.include?('w') and len > 0 and @memory_perm_wd ||= find_write_dir + # We are on a PaX-mprotected system. Try to use a file mapping to work aroud. + Dir.chdir(@memory_perm_wd) { + fname = 'tmp_mprot_%d_%x' % [Process.pid, addr] + data = memory_read(addr, len) + begin + File.open(fname, 'w') { |fd| fd.write data } + # reopen to ensure filesystem flush + rret = File.open(fname, 'r') { |fd| mmap(addr, len, p, MAP_FIXED|MAP_PRIVATE, fd.fileno, 0) } + raise 'hax' if data != memory_read(addr, len) + ret = 0 if rret == addr + ensure + File.unlink(fname) rescue nil + end + } + end - ret - end + ret + end - end + end end end diff --git a/lib/metasm/metasm/encode.rb b/lib/metasm/metasm/encode.rb index ef6fa74f7e..cdd79b0eb1 100644 --- a/lib/metasm/metasm/encode.rb +++ b/lib/metasm/metasm/encode.rb @@ -8,335 +8,335 @@ require 'metasm/main' module Metasm class ExeFormat - # encodes an Array of source (Label/Data/Instruction etc) to an EncodedData - # resolves ambiguities using +encode_resolve+ - def assemble_sequence(seq, cpu) - # an array of edata or sub-array of ambiguous edata - # its last element is always an edata - ary = [EncodedData.new] + # encodes an Array of source (Label/Data/Instruction etc) to an EncodedData + # resolves ambiguities using +encode_resolve+ + def assemble_sequence(seq, cpu) + # an array of edata or sub-array of ambiguous edata + # its last element is always an edata + ary = [EncodedData.new] - seq.each { |e| - case e - when Label; ary.last.add_export(e.name, ary.last.virtsize) - when Data; ary.last << e.encode(cpu.endianness) - when Align, Padding - e.fillwith = e.fillwith.encode(cpu.endianness) if e.fillwith and not e.fillwith.kind_of? EncodedData - ary << e << EncodedData.new - when Offset; ary << e << EncodedData.new - when Instruction - case i = cpu.encode_instruction(self, e) - when Array - case i.length - when 0; raise EncodeError, "failed to encode #{e}" - when 1; ary.last << i.first - else ary << i << EncodedData.new # to solve later - end - else - ary.last << i - end - end - } + seq.each { |e| + case e + when Label; ary.last.add_export(e.name, ary.last.virtsize) + when Data; ary.last << e.encode(cpu.endianness) + when Align, Padding + e.fillwith = e.fillwith.encode(cpu.endianness) if e.fillwith and not e.fillwith.kind_of? EncodedData + ary << e << EncodedData.new + when Offset; ary << e << EncodedData.new + when Instruction + case i = cpu.encode_instruction(self, e) + when Array + case i.length + when 0; raise EncodeError, "failed to encode #{e}" + when 1; ary.last << i.first + else ary << i << EncodedData.new # to solve later + end + else + ary.last << i + end + end + } - edata = (ary.length > 1) ? assemble_resolve(ary) : ary.shift - edata.fixup edata.binding - edata - end + edata = (ary.length > 1) ? assemble_resolve(ary) : ary.shift + edata.fixup edata.binding + edata + end - # chose among multiple possible sub-EncodedData - # assumes all ambiguous edata have the equivallent relocations in the same order - def assemble_resolve(ary) - startlabel = new_label('section_start') + # chose among multiple possible sub-EncodedData + # assumes all ambiguous edata have the equivallent relocations in the same order + def assemble_resolve(ary) + startlabel = new_label('section_start') - # create two bindings where all elements are the shortest/longest possible - minbinding = {} - minoff = 0 - maxbinding = {} - maxoff = 0 + # create two bindings where all elements are the shortest/longest possible + minbinding = {} + minoff = 0 + maxbinding = {} + maxoff = 0 - ary.each { |elem| - case elem - when Array - if elem.all? { |ed| ed.kind_of? EncodedData and ed.reloc.empty? } - elem = [elem.sort_by { |ed| ed.length }.first] - end - elem.each { |e| - e.export.each { |label, off| - minbinding[label] = Expression[startlabel, :+, minoff + off] - maxbinding[label] = Expression[startlabel, :+, maxoff + off] - } - } - minoff += elem.map { |e| e.virtsize }.min - maxoff += elem.map { |e| e.virtsize }.max + ary.each { |elem| + case elem + when Array + if elem.all? { |ed| ed.kind_of? EncodedData and ed.reloc.empty? } + elem = [elem.sort_by { |ed| ed.length }.first] + end + elem.each { |e| + e.export.each { |label, off| + minbinding[label] = Expression[startlabel, :+, minoff + off] + maxbinding[label] = Expression[startlabel, :+, maxoff + off] + } + } + minoff += elem.map { |e| e.virtsize }.min + maxoff += elem.map { |e| e.virtsize }.max - when EncodedData - elem.export.each { |label, off| - minbinding[label] = Expression[startlabel, :+, minoff + off] - maxbinding[label] = Expression[startlabel, :+, maxoff + off] - } - minoff += elem.virtsize - maxoff += elem.virtsize + when EncodedData + elem.export.each { |label, off| + minbinding[label] = Expression[startlabel, :+, minoff + off] + maxbinding[label] = Expression[startlabel, :+, maxoff + off] + } + minoff += elem.virtsize + maxoff += elem.virtsize - when Align - minoff += 0 - maxoff += elem.val - 1 + when Align + minoff += 0 + maxoff += elem.val - 1 - when Padding - # find the surrounding Offsets and compute the largest/shortest edata sizes to determine min/max length for the padding - prevoff = ary[0..ary.index(elem)].grep(Offset).last - nextoff = ary[ary.index(elem)..-1].grep(Offset).first - raise elem, 'need .offset after .pad' if not nextoff + when Padding + # find the surrounding Offsets and compute the largest/shortest edata sizes to determine min/max length for the padding + prevoff = ary[0..ary.index(elem)].grep(Offset).last + nextoff = ary[ary.index(elem)..-1].grep(Offset).first + raise elem, 'need .offset after .pad' if not nextoff - # find all elements between the surrounding Offsets - previdx = prevoff ? ary.index(prevoff) + 1 : 0 - surround = ary[previdx..ary.index(nextoff)-1] - surround.delete elem - if surround.find { |nelem| nelem.kind_of? Padding } - raise elem, 'need .offset beetween two .pad' - end - if surround.find { |nelem| nelem.kind_of? Align and ary.index(nelem) > ary.index(elem) } - raise elem, 'cannot .align after a .pad' # XXX really ? - end + # find all elements between the surrounding Offsets + previdx = prevoff ? ary.index(prevoff) + 1 : 0 + surround = ary[previdx..ary.index(nextoff)-1] + surround.delete elem + if surround.find { |nelem| nelem.kind_of? Padding } + raise elem, 'need .offset beetween two .pad' + end + if surround.find { |nelem| nelem.kind_of? Align and ary.index(nelem) > ary.index(elem) } + raise elem, 'cannot .align after a .pad' # XXX really ? + end - # lenmin/lenmax are the extrem length of the Padding - nxt = Expression[nextoff.val] - ext = nxt.externals - raise elem, "bad offset #{nxt}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first]) - nxt = Expression[nxt, :-, startlabel] if not nxt.bind(minbinding).reduce.kind_of? ::Integer - prv = Expression[prevoff ? prevoff.val : 0] - ext = prv.externals - raise elem, "bad offset #{prv}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first]) - prv = Expression[prv, :-, startlabel] if not prv.bind(minbinding).reduce.kind_of? ::Integer + # lenmin/lenmax are the extrem length of the Padding + nxt = Expression[nextoff.val] + ext = nxt.externals + raise elem, "bad offset #{nxt}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first]) + nxt = Expression[nxt, :-, startlabel] if not nxt.bind(minbinding).reduce.kind_of? ::Integer + prv = Expression[prevoff ? prevoff.val : 0] + ext = prv.externals + raise elem, "bad offset #{prv}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first]) + prv = Expression[prv, :-, startlabel] if not prv.bind(minbinding).reduce.kind_of? ::Integer - lenmin = Expression[nxt.bind(minbinding), :-, prv.bind(maxbinding)].reduce - lenmax = Expression[nxt.bind(maxbinding), :-, prv.bind(minbinding)].reduce - raise elem, "bad labels: #{lenmin}" if not lenmin.kind_of? ::Integer or not lenmax.kind_of? ::Integer - surround.each { |nelem| - case nelem - when Array - lenmin -= nelem.map { |e| e.virtsize }.max - lenmax -= nelem.map { |e| e.virtsize }.min - when EncodedData - lenmin -= nelem.virtsize - lenmax -= nelem.virtsize - when Align - lenmin -= nelem.val - 1 - lenmax -= 0 - end - } - raise elem, "no room for .pad before '.offset #{nextoff.val}' at #{Backtrace.backtrace_str(nextoff.backtrace)}, need at least #{-lenmax} more bytes" if lenmax < 0 - minoff += [lenmin, 0].max - maxoff += lenmax + lenmin = Expression[nxt.bind(minbinding), :-, prv.bind(maxbinding)].reduce + lenmax = Expression[nxt.bind(maxbinding), :-, prv.bind(minbinding)].reduce + raise elem, "bad labels: #{lenmin}" if not lenmin.kind_of? ::Integer or not lenmax.kind_of? ::Integer + surround.each { |nelem| + case nelem + when Array + lenmin -= nelem.map { |e| e.virtsize }.max + lenmax -= nelem.map { |e| e.virtsize }.min + when EncodedData + lenmin -= nelem.virtsize + lenmax -= nelem.virtsize + when Align + lenmin -= nelem.val - 1 + lenmax -= 0 + end + } + raise elem, "no room for .pad before '.offset #{nextoff.val}' at #{Backtrace.backtrace_str(nextoff.backtrace)}, need at least #{-lenmax} more bytes" if lenmax < 0 + minoff += [lenmin, 0].max + maxoff += lenmax - when Offset - # nothing to do for now - else - raise "Internal error: bad object #{elem.inspect} in encode_resolve" - end - } + when Offset + # nothing to do for now + else + raise "Internal error: bad object #{elem.inspect} in encode_resolve" + end + } - # checks an expression linearity - check_linear = lambda { |expr| - expr = expr.reduce if expr.kind_of? Expression - while expr.kind_of? Expression - case expr.op - when :* - if expr.lexpr.kind_of? Numeric; expr = expr.rexpr - elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr - else break - end - when :/, :>>, :<< - if expr.rexpr.kind_of? Numeric; expr = expr.lexpr - else break - end - when :+, :- - if not expr.lexpr; expr = expr.rexpr - elsif expr.lexpr.kind_of? Numeric; expr = expr.rexpr - elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr - else - break if not check_linear[expr.rexpr] - expr = expr.lexpr - end - else break - end - end + # checks an expression linearity + check_linear = lambda { |expr| + expr = expr.reduce if expr.kind_of? Expression + while expr.kind_of? Expression + case expr.op + when :* + if expr.lexpr.kind_of? Numeric; expr = expr.rexpr + elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr + else break + end + when :/, :>>, :<< + if expr.rexpr.kind_of? Numeric; expr = expr.lexpr + else break + end + when :+, :- + if not expr.lexpr; expr = expr.rexpr + elsif expr.lexpr.kind_of? Numeric; expr = expr.rexpr + elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr + else + break if not check_linear[expr.rexpr] + expr = expr.lexpr + end + else break + end + end - not expr.kind_of? Expression - } + not expr.kind_of? Expression + } - # now we can resolve all relocations - # for linear expressions of internal variables (ie differences of labels from the ary): - # - calc target numeric bounds, and reject relocs not accepting worst case value - # - else reject all but largest place available - # then chose the shortest overall EData left - ary.map! { |elem| - case elem - when Array - # for each external, compute numeric target values using minbinding[external] and maxbinding[external] - # this gives us all extrem values for linear expressions - target_bounds = {} - rec_checkminmax = lambda { |idx, target, binding, extlist| - if extlist.empty? - (target_bounds[idx] ||= []) << target.bind(binding).reduce - else - rec_checkminmax[idx, target, binding.merge(extlist.last => minbinding[extlist.last]), extlist[0...-1]] - rec_checkminmax[idx, target, binding.merge(extlist.last => maxbinding[extlist.last]), extlist[0...-1]] - end - } - # biggest size disponible for this relocation (for non-linear/external) - wantsize = {} + # now we can resolve all relocations + # for linear expressions of internal variables (ie differences of labels from the ary): + # - calc target numeric bounds, and reject relocs not accepting worst case value + # - else reject all but largest place available + # then chose the shortest overall EData left + ary.map! { |elem| + case elem + when Array + # for each external, compute numeric target values using minbinding[external] and maxbinding[external] + # this gives us all extrem values for linear expressions + target_bounds = {} + rec_checkminmax = lambda { |idx, target, binding, extlist| + if extlist.empty? + (target_bounds[idx] ||= []) << target.bind(binding).reduce + else + rec_checkminmax[idx, target, binding.merge(extlist.last => minbinding[extlist.last]), extlist[0...-1]] + rec_checkminmax[idx, target, binding.merge(extlist.last => maxbinding[extlist.last]), extlist[0...-1]] + end + } + # biggest size disponible for this relocation (for non-linear/external) + wantsize = {} - elem.each { |e| - e.reloc.sort.each_with_index { |r_, i| - r = r_[1] - # has external ref - if not r.target.bind(minbinding).reduce.kind_of?(Numeric) or not check_linear[r.target] - # find the biggest relocation type for the current target - wantsize[i] = elem.map { |edata| - edata.reloc.sort[i][1].type - }.sort_by { |type| Expression::INT_SIZE[type] }.last # XXX do not use rel.length - else - rec_checkminmax[i, r.target, {}, r.target.externals] - end - } - } + elem.each { |e| + e.reloc.sort.each_with_index { |r_, i| + r = r_[1] + # has external ref + if not r.target.bind(minbinding).reduce.kind_of?(Numeric) or not check_linear[r.target] + # find the biggest relocation type for the current target + wantsize[i] = elem.map { |edata| + edata.reloc.sort[i][1].type + }.sort_by { |type| Expression::INT_SIZE[type] }.last # XXX do not use rel.length + else + rec_checkminmax[i, r.target, {}, r.target.externals] + end + } + } - # reject candidates with reloc type too small - acceptable = elem.find_all { |edata| - r = edata.reloc.sort - (0...r.length).all? { |i| - if wantsize[i] - r[i][1].type == wantsize[i] - else - target_bounds[i].all? { |b| Expression.in_range?(b, r[i][1].type) } - end - } - } + # reject candidates with reloc type too small + acceptable = elem.find_all { |edata| + r = edata.reloc.sort + (0...r.length).all? { |i| + if wantsize[i] + r[i][1].type == wantsize[i] + else + target_bounds[i].all? { |b| Expression.in_range?(b, r[i][1].type) } + end + } + } - raise EncodeError, "cannot find candidate in #{elem.inspect}, immediate too big #{wantsize.inspect} #{target_bounds.inspect}" if acceptable.empty? + raise EncodeError, "cannot find candidate in #{elem.inspect}, immediate too big #{wantsize.inspect} #{target_bounds.inspect}" if acceptable.empty? - # keep the shortest - acceptable.sort_by { |edata| edata.virtsize }.first - else - elem - end - } + # keep the shortest + acceptable.sort_by { |edata| edata.virtsize }.first + else + elem + end + } - # assemble all parts, resolve padding sizes, check offset directives - edata = EncodedData.new + # assemble all parts, resolve padding sizes, check offset directives + edata = EncodedData.new - # fills edata with repetitions of data until targetsize - fillwith = lambda { |targetsize, data| - if data - if data.reloc.empty? and not data.data.empty? # avoid useless iterations - nr = (targetsize-edata.virtsize) / data.length - 1 - if nr > 0 - dat = data.data.ljust(data.virtsize, 0.chr) - edata << (dat * nr) - end - end - while edata.virtsize + data.virtsize <= targetsize - edata << data - end - if edata.virtsize < targetsize - edata << data[0, targetsize - edata.virtsize] - end - else - edata.virtsize = targetsize - end - } + # fills edata with repetitions of data until targetsize + fillwith = lambda { |targetsize, data| + if data + if data.reloc.empty? and not data.data.empty? # avoid useless iterations + nr = (targetsize-edata.virtsize) / data.length - 1 + if nr > 0 + dat = data.data.ljust(data.virtsize, 0.chr) + edata << (dat * nr) + end + end + while edata.virtsize + data.virtsize <= targetsize + edata << data + end + if edata.virtsize < targetsize + edata << data[0, targetsize - edata.virtsize] + end + else + edata.virtsize = targetsize + end + } - ary.each { |elem| - case elem - when EncodedData - edata << elem - when Align - fillwith[EncodedData.align_size(edata.virtsize, elem.val), elem.fillwith] - when Offset - raise EncodeError, "could not enforce .offset #{elem.val} #{elem.backtrace}: offset now #{edata.virtsize}" if edata.virtsize != Expression[elem.val].bind(edata.binding(0)).reduce - when Padding - nextoff = ary[ary.index(elem)..-1].grep(Offset).first - targetsize = Expression[nextoff.val].bind(edata.binding(0)).reduce - ary[ary.index(elem)+1..ary.index(nextoff)-1].each { |nelem| targetsize -= nelem.virtsize } - raise EncodeError, "no room for .pad #{elem.backtrace_str} before .offset #{nextoff.val}, would be #{targetsize-edata.length} bytes long" if targetsize < edata.length - fillwith[targetsize, elem.fillwith] - else raise "Internal error: #{elem.inspect}" - end - } + ary.each { |elem| + case elem + when EncodedData + edata << elem + when Align + fillwith[EncodedData.align_size(edata.virtsize, elem.val), elem.fillwith] + when Offset + raise EncodeError, "could not enforce .offset #{elem.val} #{elem.backtrace}: offset now #{edata.virtsize}" if edata.virtsize != Expression[elem.val].bind(edata.binding(0)).reduce + when Padding + nextoff = ary[ary.index(elem)..-1].grep(Offset).first + targetsize = Expression[nextoff.val].bind(edata.binding(0)).reduce + ary[ary.index(elem)+1..ary.index(nextoff)-1].each { |nelem| targetsize -= nelem.virtsize } + raise EncodeError, "no room for .pad #{elem.backtrace_str} before .offset #{nextoff.val}, would be #{targetsize-edata.length} bytes long" if targetsize < edata.length + fillwith[targetsize, elem.fillwith] + else raise "Internal error: #{elem.inspect}" + end + } - edata - end + edata + end end class Expression - def encode(type, endianness, backtrace=nil) - case val = reduce - when Integer; EncodedData.new Expression.encode_imm(val, type, endianness, backtrace) - else - str = case INT_SIZE[type] - when 8; "\0" - when 16; "\0\0" - when 32; "\0\0\0\0" - when 64; "\0\0\0\0\0\0\0\0" - else [0].pack('C')*(INT_SIZE[type]/8) - end - str = str.force_encoding('BINARY') if str.respond_to?(:force_encoding) - EncodedData.new(str, :reloc => {0 => Relocation.new(self, type, endianness, backtrace)}) - end - end + def encode(type, endianness, backtrace=nil) + case val = reduce + when Integer; EncodedData.new Expression.encode_imm(val, type, endianness, backtrace) + else + str = case INT_SIZE[type] + when 8; "\0" + when 16; "\0\0" + when 32; "\0\0\0\0" + when 64; "\0\0\0\0\0\0\0\0" + else [0].pack('C')*(INT_SIZE[type]/8) + end + str = str.force_encoding('BINARY') if str.respond_to?(:force_encoding) + EncodedData.new(str, :reloc => {0 => Relocation.new(self, type, endianness, backtrace)}) + end + end - class << self - def encode_imm(val, type, endianness, backtrace=nil) - type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer - endianness = endianness.endianness if not endianness.kind_of? ::Symbol - raise "unsupported endianness #{endianness.inspect}" unless [:big, :little].include? endianness - raise(EncodeError, "immediate overflow #{type.inspect} #{Expression[val]} #{(Backtrace::backtrace_str(backtrace) if backtrace)}") if not in_range?(val, type) - s = (0...INT_SIZE[type]/8).map { |i| (val >> (8*i)) & 0xff }.pack('C*') - endianness != :little ? s.reverse : s - end - alias encode_immediate encode_imm - end + class << self + def encode_imm(val, type, endianness, backtrace=nil) + type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer + endianness = endianness.endianness if not endianness.kind_of? ::Symbol + raise "unsupported endianness #{endianness.inspect}" unless [:big, :little].include? endianness + raise(EncodeError, "immediate overflow #{type.inspect} #{Expression[val]} #{(Backtrace::backtrace_str(backtrace) if backtrace)}") if not in_range?(val, type) + s = (0...INT_SIZE[type]/8).map { |i| (val >> (8*i)) & 0xff }.pack('C*') + endianness != :little ? s.reverse : s + end + alias encode_immediate encode_imm + end end class Data - def encode(endianness) - edata = case @data - when :uninitialized - EncodedData.new('', :virtsize => Expression::INT_SIZE[INT_TYPE[@type]]/8) - when String - # db 'foo' => 'foo' # XXX could be optimised, but should not be significant - # dw 'foo' => "f\0o\0o\0" / "\0f\0o\0o" - @data.unpack('C*').inject(EncodedData.new) { |ed, chr| ed << Expression.encode_imm(chr, INT_TYPE[@type], endianness, @backtrace) } - when Expression - @data.encode INT_TYPE[@type], endianness, @backtrace - when Array - @data.inject(EncodedData.new) { |ed, d| ed << d.encode(endianness) } - end + def encode(endianness) + edata = case @data + when :uninitialized + EncodedData.new('', :virtsize => Expression::INT_SIZE[INT_TYPE[@type]]/8) + when String + # db 'foo' => 'foo' # XXX could be optimised, but should not be significant + # dw 'foo' => "f\0o\0o\0" / "\0f\0o\0o" + @data.unpack('C*').inject(EncodedData.new) { |ed, chr| ed << Expression.encode_imm(chr, INT_TYPE[@type], endianness, @backtrace) } + when Expression + @data.encode INT_TYPE[@type], endianness, @backtrace + when Array + @data.inject(EncodedData.new) { |ed, d| ed << d.encode(endianness) } + end - # n times - (0...@count).inject(EncodedData.new) { |ed, cnt| ed << edata } - end + # n times + (0...@count).inject(EncodedData.new) { |ed, cnt| ed << edata } + end end class CPU - # returns an EncodedData or an ary of them - # uses +#parse_arg_valid?+ to find the opcode whose signature matches with the instruction - # uses +encode_instr_op+ (arch-specific) - def encode_instruction(program, i) - errmsg = '' - oplist = opcode_list_byname[i.opname].to_a.find_all { |o| - o.args.length == i.args.length and - o.args.zip(i.args).all? { |f, a| parse_arg_valid?(o, f, a) } - }.map { |op| - begin - encode_instr_op(program, i, op) - rescue EncodeError - errmsg = " (#{$!.message})" - nil - end - }.compact.flatten - raise EncodeError, "no matching opcode found for #{i}#{errmsg}" if oplist.empty? - oplist.each { |ed| ed.reloc.each_value { |v| v.backtrace = i.backtrace } } - oplist - end + # returns an EncodedData or an ary of them + # uses +#parse_arg_valid?+ to find the opcode whose signature matches with the instruction + # uses +encode_instr_op+ (arch-specific) + def encode_instruction(program, i) + errmsg = '' + oplist = opcode_list_byname[i.opname].to_a.find_all { |o| + o.args.length == i.args.length and + o.args.zip(i.args).all? { |f, a| parse_arg_valid?(o, f, a) } + }.map { |op| + begin + encode_instr_op(program, i, op) + rescue EncodeError + errmsg = " (#{$!.message})" + nil + end + }.compact.flatten + raise EncodeError, "no matching opcode found for #{i}#{errmsg}" if oplist.empty? + oplist.each { |ed| ed.reloc.each_value { |v| v.backtrace = i.backtrace } } + oplist + end end end diff --git a/lib/metasm/metasm/exe_format/a_out.rb b/lib/metasm/metasm/exe_format/a_out.rb index 49ec2795f3..8972e74598 100644 --- a/lib/metasm/metasm/exe_format/a_out.rb +++ b/lib/metasm/metasm/exe_format/a_out.rb @@ -9,186 +9,189 @@ require 'metasm/decode' module Metasm class AOut < ExeFormat - MAGIC = { 0407 => 'OMAGIC', 0410 => 'NMAGIC', 0413 => 'ZMAGIC', - 0314 => 'QMAGIC', 0421 => 'CMAGIC' - } - MACHINE_TYPE = { 0 => 'OLDSUN2', 1 => '68010', 2 => '68020', - 3 => 'SPARC', 100 => 'PC386', 134 => 'I386', 135 => 'M68K', - 136 => 'M68K4K', 137 => 'NS32532', 138 => 'SPARC', - 139 => 'PMAX', 140 => 'VAX', 141 => 'ALPHA', 142 => 'MIPS', - 143 => 'ARM6', 151 => 'MIPS1', 152 => 'MIPS2', 300 => 'HP300', - 0x20B => 'HPUX800', 0x20C => 'HPUX' - } - FLAGS = { 0x10 => 'PIC', 0x20 => 'DYNAMIC' } - SYMBOL_TYPE = { 0 => 'UNDF', 1 => 'ABS', 2 => 'TEXT', - 3 => 'DATA', 4 => 'BSS', 5 => 'INDR', 6 => 'SIZE', - 9 => 'COMM', 10=> 'SETA', 11=> 'SETT', 12=> 'SETD', - 13=> 'SETB', 14=> 'SETV', 15=> 'FN' - } + MAGIC = { 0407 => 'OMAGIC', 0410 => 'NMAGIC', 0413 => 'ZMAGIC', + 0314 => 'QMAGIC', 0421 => 'CMAGIC' + } + MACHINE_TYPE = { 0 => 'OLDSUN2', 1 => '68010', 2 => '68020', + 3 => 'SPARC', 100 => 'PC386', 134 => 'I386', 135 => 'M68K', + 136 => 'M68K4K', 137 => 'NS32532', 138 => 'SPARC', + 139 => 'PMAX', 140 => 'VAX', 141 => 'ALPHA', 142 => 'MIPS', + 143 => 'ARM6', 151 => 'MIPS1', 152 => 'MIPS2', 300 => 'HP300', + 0x20B => 'HPUX800', 0x20C => 'HPUX' + } + FLAGS = { 0x10 => 'PIC', 0x20 => 'DYNAMIC' } + SYMBOL_TYPE = { 0 => 'UNDF', 1 => 'ABS', 2 => 'TEXT', + 3 => 'DATA', 4 => 'BSS', 5 => 'INDR', 6 => 'SIZE', + 9 => 'COMM', 10=> 'SETA', 11=> 'SETT', 12=> 'SETD', + 13=> 'SETB', 14=> 'SETV', 15=> 'FN' + } - attr_accessor :endianness, :header, :text, :data, :symbols, :textrel, :datarel + attr_accessor :endianness, :header, :text, :data, :symbols, :textrel, :datarel - class Header < SerialStruct - bitfield :word, 0 => :magic, 16 => :machtype, 24 => :flags - fld_enum(:magic, MAGIC) - fld_enum(:machtype, MACHINE_TYPE) - fld_bits(:flags, FLAGS) - words :text, :data, :bss, :syms, :entry, :trsz, :drsz + class Header < SerialStruct + bitfield :word, 0 => :magic, 16 => :machtype, 24 => :flags + fld_enum(:magic, MAGIC) + fld_enum(:machtype, MACHINE_TYPE) + fld_bits(:flags, FLAGS) + words :text, :data, :bss, :syms, :entry, :trsz, :drsz - def decode(aout) - super(aout) + def decode(aout) + super(aout) - case @magic - when 'OMAGIC', 'NMAGIC', 'ZMAGIC', 'QMAGIC' - else raise InvalidExeFormat, "Bad A.OUT signature #@magic" - end - end + case @magic + when 'OMAGIC', 'NMAGIC', 'ZMAGIC', 'QMAGIC' + else raise InvalidExeFormat, "Bad A.OUT signature #@magic" + end + end - def set_default_values(aout) - @magic ||= 'QMAGIC' - @machtype ||= 'PC386' - @flags ||= [] - @text ||= aout.text.length + (@magic == 'QMAGIC' ? 32 : 0) if aout.text - @data ||= aout.data.length if aout.data + def set_default_values(aout) + @magic ||= 'QMAGIC' + @machtype ||= 'PC386' + @flags ||= [] + @text ||= aout.text.length + (@magic == 'QMAGIC' ? 32 : 0) if aout.text + @data ||= aout.data.length if aout.data - super(aout) - end - end + super(aout) + end + end - class Relocation < SerialStruct - word :address - bitfield :word, 0 => :symbolnum, 24 => :pcrel, 25 => :length, - 27 => :extern, 28 => :baserel, 29 => :jmptable, 30 => :relative, 31 => :rtcopy - fld_enum :length, 0 => 1, 1 => 2, 2 => 4, 3 => 8 - fld_default :length, 4 - end + class Relocation < SerialStruct + word :address + bitfield :word, 0 => :symbolnum, 24 => :pcrel, 25 => :length, + 27 => :extern, 28 => :baserel, 29 => :jmptable, 30 => :relative, 31 => :rtcopy + fld_enum :length, 0 => 1, 1 => 2, 2 => 4, 3 => 8 + fld_default :length, 4 + end - class Symbol < SerialStruct - word :name_p - bitfield :byte, 0 => :extern, 1 => :type, 5 => :stab - byte :other - half :desc - word :value - attr_accessor :name + class Symbol < SerialStruct + word :name_p + bitfield :byte, 0 => :extern, 1 => :type, 5 => :stab + byte :other + half :desc + word :value + attr_accessor :name - def decode(aout, strings=nil) - super(aout) - @name = strings[@name_p...(strings.index(?\0, @name_p))] if strings - end + def decode(aout, strings=nil) + super(aout) + @name = strings[@name_p...(strings.index(?\0, @name_p))] if strings + end - def set_default_values(aout, strings=nil) - if strings and name and @name != '' - if not @name_p or strings[@name_p, @name.length] != @name - @name_p = strings.length - strings << @name << 0 - end - end - super(aout, strings) - end - end + def set_default_values(aout, strings=nil) + if strings and name and @name != '' + if not @name_p or strings[@name_p, @name.length] != @name + @name_p = strings.length + strings << @name << 0 + end + end + super(aout, strings) + end + end - def decode_byte(edata = @encoded) edata.decode_imm(:u8 , @endianness) end - def decode_half(edata = @encoded) edata.decode_imm(:u16, @endianness) end - def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end - def encode_byte(w) Expression[w].encode(:u8 , @endianness) end - def encode_half(w) Expression[w].encode(:u16, @endianness) end - def encode_word(w) Expression[w].encode(:u32, @endianness) end + def decode_byte(edata = @encoded) edata.decode_imm(:u8 , @endianness) end + def decode_half(edata = @encoded) edata.decode_imm(:u16, @endianness) end + def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end + def encode_byte(w) Expression[w].encode(:u8 , @endianness) end + def encode_half(w) Expression[w].encode(:u16, @endianness) end + def encode_word(w) Expression[w].encode(:u32, @endianness) end + def sizeof_byte ; 1 ; end + def sizeof_half ; 2 ; end + def sizeof_word ; 4 ; end - def initialize(cpu = nil) - @endianness = cpu ? cpu.endianness : :little - @header = Header.new - @text = EncodedData.new - @data = EncodedData.new - super(cpu) - end + def initialize(cpu = nil) + @endianness = cpu ? cpu.endianness : :little + @header = Header.new + @text = EncodedData.new + @data = EncodedData.new + super(cpu) + end - def decode_header - @encoded.ptr = 0 - @header.decode(self) - end + def decode_header + @encoded.ptr = 0 + @header.decode(self) + end - def decode - decode_header + def decode + decode_header - tlen = @header.text - case @header.magic - when 'ZMAGIC'; @encoded.ptr = 1024 - when 'QMAGIC'; tlen -= 32 # header is included in .text - end - @text = EncodedData.new << @encoded.read(tlen) + tlen = @header.text + case @header.magic + when 'ZMAGIC'; @encoded.ptr = 1024 + when 'QMAGIC'; tlen -= 32 # header is included in .text + end + @text = EncodedData.new << @encoded.read(tlen) - @data = EncodedData.new << @encoded.read(@header.data) + @data = EncodedData.new << @encoded.read(@header.data) - # TODO - #textrel = @encoded.read @header.trsz - #datarel = @encoded.read @header.drsz - #syms = @encoded.read @header.syms - #strings = @encoded.read - end + # TODO + #textrel = @encoded.read @header.trsz + #datarel = @encoded.read @header.drsz + #syms = @encoded.read @header.syms + #strings = @encoded.read + end - def encode - # non mmapable on linux anyway - # could support OMAGIC.. - raise EncodeError, 'cannot encode non-QMAGIC a.out' if @header.magic and @header.magic != 'QMAGIC' + def encode + # non mmapable on linux anyway + # could support OMAGIC.. + raise EncodeError, 'cannot encode non-QMAGIC a.out' if @header.magic and @header.magic != 'QMAGIC' - # data must be 4096-aligned - # 32 bytes of header included in .text - @text.virtsize = (@text.virtsize + 32 + 4096 - 1) / 4096 * 4096 - 32 - if @data.rawsize % 4096 != 0 - @data[(@data.rawsize + 4096 - 1) / 4096 * 4096 - 1] = 0 - end + # data must be 4096-aligned + # 32 bytes of header included in .text + @text.virtsize = (@text.virtsize + 32 + 4096 - 1) / 4096 * 4096 - 32 + if @data.rawsize % 4096 != 0 + @data[(@data.rawsize + 4096 - 1) / 4096 * 4096 - 1] = 0 + end - @header.text = @text.length+32 - @header.data = @data.rawsize - @header.bss = @data.virtsize - @data.rawsize + @header.text = @text.length+32 + @header.data = @data.rawsize + @header.bss = @data.virtsize - @data.rawsize - @encoded = EncodedData.new - @encoded << @header.encode(self) - binding = @text.binding(4096+32).merge @data.binding(4096 + @header.text) - @encoded << @text << @data - @encoded.fixup! binding - @encoded.data - end + @encoded = EncodedData.new + @encoded << @header.encode(self) + binding = @text.binding(4096+32).merge @data.binding(4096 + @header.text) + @encoded << @text << @data + @encoded.fixup! binding + @encoded.data + end - def parse_init - @textsrc ||= [] - @datasrc ||= [] - @cursource ||= @textsrc - super() - end + def parse_init + @textsrc ||= [] + @datasrc ||= [] + @cursource ||= @textsrc + super() + end - def parse_parser_instruction(instr) - case instr.raw.downcase - when '.text'; @cursource = @textsrc - when '.data'; @cursource = @datasrc - when '.entrypoint' - # ".entrypoint " or ".entrypoint" (here) - @lexer.skip_space - if tok = @lexer.nexttok and tok.type == :string - raise instr if not entrypoint = Expression.parse(@lexer) - else - entrypoint = new_label('entrypoint') - @cursource << Label.new(entrypoint, instr.backtrace.dup) - end - @header.entry = entrypoint - else super(instr) - end - end + def parse_parser_instruction(instr) + case instr.raw.downcase + when '.text'; @cursource = @textsrc + when '.data'; @cursource = @datasrc + when '.entrypoint' + # ".entrypoint " or ".entrypoint" (here) + @lexer.skip_space + if tok = @lexer.nexttok and tok.type == :string + raise instr if not entrypoint = Expression.parse(@lexer) + else + entrypoint = new_label('entrypoint') + @cursource << Label.new(entrypoint, instr.backtrace.dup) + end + @header.entry = entrypoint + else super(instr) + end + end - def assemble(*a) - parse(*a) if not a.empty? - @text << assemble_sequence(@textsrc, @cpu) - @textsrc.clear - @data << assemble_sequence(@datasrc, @cpu) - @datasrc.clear - self - end + def assemble(*a) + parse(*a) if not a.empty? + @text << assemble_sequence(@textsrc, @cpu) + @textsrc.clear + @data << assemble_sequence(@datasrc, @cpu) + @datasrc.clear + self + end - def each_section - tva = 0 - tva = 4096+32 if @header.magic == 'QMAGIC' - yield @text, tva - yield @data, tva + @text.virtsize - end + def each_section + tva = 0 + tva = 4096+32 if @header.magic == 'QMAGIC' + yield @text, tva + yield @data, tva + @text.virtsize + end end end diff --git a/lib/metasm/metasm/exe_format/autoexe.rb b/lib/metasm/metasm/exe_format/autoexe.rb index 3bd94d953c..77fd6b3fad 100644 --- a/lib/metasm/metasm/exe_format/autoexe.rb +++ b/lib/metasm/metasm/exe_format/autoexe.rb @@ -13,40 +13,40 @@ class UnknownSignature < InvalidExeFormat ; end # actually calls autoexe_load for the detected filetype from #execlass_from_signature def self.load(str, *a, &b) - s = str - s = str.data if s.kind_of? EncodedData - execlass_from_signature(s).autoexe_load(str, *a, &b) + s = str + s = str.data if s.kind_of? EncodedData + execlass_from_signature(s).autoexe_load(str, *a, &b) end # match the actual exe class from the raw file inspection using the registered signature list # calls #unknown_signature if nothing matches def self.execlass_from_signature(raw) - m = @signatures.find { |sig, exe| - case sig - when String; raw[0, sig.length] == sig - when Proc; sig[raw] - end - } - e = m ? m[1] : unknown_signature(raw) - case e - when String; Metasm.const_get(e) - when Proc; e.call - else e - end + m = @signatures.find { |sig, exe| + case sig + when String; raw[0, sig.length] == sig + when Proc; sig[raw] + end + } + e = m ? m[1] : unknown_signature(raw) + case e + when String; Metasm.const_get(e) + when Proc; e.call + else e + end end # register a new binary file signature def self.register_signature(sig, exe=nil, &b) - (@signatures ||= []) << [sig, exe || b] + (@signatures ||= []) << [sig, exe || b] end def self.init_signatures(sig=[]) - @signatures = sig + @signatures = sig end # this function is called when no signature matches def self.unknown_signature(raw) - raise UnknownSignature, "unrecognized executable file format #{raw[0, 4].unpack('H*').first.inspect}" + raise UnknownSignature, "unrecognized executable file format #{raw[0, 4].unpack('H*').first.inspect}" end # raw signature copies (avoid triggering exefmt autorequire) @@ -63,14 +63,14 @@ register_signature('Metasm.dasm') { Disassembler } # replacement for AutoExe where #load defaults to a Shellcode of the specified CPU def self.orshellcode(cpu=nil, &b) - # here we create an anonymous subclass of AutoExe whose #unknown_sig is patched to return a Shellcode instead of raise()ing - c = ::Class.new(self) - # yeeehaa - class << c ; self ; end.send(:define_method, :unknown_signature) { |raw| - Shellcode.withcpu(cpu || b[raw]) - } - c.init_signatures @signatures - c + # here we create an anonymous subclass of AutoExe whose #unknown_sig is patched to return a Shellcode instead of raise()ing + c = ::Class.new(self) + # yeeehaa + class << c ; self ; end.send(:define_method, :unknown_signature) { |raw| + Shellcode.withcpu(cpu || b[raw]) + } + c.init_signatures @signatures + c end end diff --git a/lib/metasm/metasm/exe_format/bflt.rb b/lib/metasm/metasm/exe_format/bflt.rb index 5c10a664e0..099b4f3629 100644 --- a/lib/metasm/metasm/exe_format/bflt.rb +++ b/lib/metasm/metasm/exe_format/bflt.rb @@ -11,208 +11,209 @@ module Metasm # BFLT is the binary flat format used by the uClinux # from examining a v4 binary, it looks like the header is discarded and the file is mapped from 0x40 to memory address 0 (wrt relocations) class Bflt < ExeFormat - MAGIC = 'bFLT' - FLAGS = { 1 => 'RAM', 2 => 'GOTPIC', 4 => 'GZIP' } + MAGIC = 'bFLT' + FLAGS = { 1 => 'RAM', 2 => 'GOTPIC', 4 => 'GZIP' } - attr_accessor :header, :text, :data, :reloc, :got + attr_accessor :header, :text, :data, :reloc, :got - class Header < SerialStruct - mem :magic, 4 - words :rev, :entry, :data_start, :data_end, :bss_end, :stack_size, - :reloc_start, :reloc_count, :flags - mem :pad, 6*4 - fld_bits(:flags, FLAGS) + class Header < SerialStruct + mem :magic, 4 + words :rev, :entry, :data_start, :data_end, :bss_end, :stack_size, + :reloc_start, :reloc_count, :flags + mem :pad, 6*4 + fld_bits(:flags, FLAGS) - def decode(exe) - super(exe) + def decode(exe) + super(exe) - case @magic - when MAGIC - else raise InvalidExeFormat, "Bad bFLT signature #@magic" - end + case @magic + when MAGIC + else raise InvalidExeFormat, "Bad bFLT signature #@magic" + end - if @rev >= 0x01000000 and (@rev & 0x00f0ffff) == 0 - puts "Bflt: probable wrong endianness, retrying" if $VERBOSE - exe.endianness = { :big => :little, :little => :big }[exe.endianness] - exe.encoded.ptr -= 4*16 - super(exe) - end - end + if @rev >= 0x01000000 and (@rev & 0x00f0ffff) == 0 + puts "Bflt: probable wrong endianness, retrying" if $VERBOSE + exe.endianness = { :big => :little, :little => :big }[exe.endianness] + exe.encoded.ptr -= 4*16 + super(exe) + end + end - def set_default_values(exe) - @magic ||= MAGIC - @rev ||= 4 - @entry ||= 0x40 - @data_start ||= 0x40 + exe.text.length if exe.text - @data_end ||= @data_start + exe.data.data.length if exe.data - @bss_end ||= @data_start + exe.data.length if exe.data - @stack_size ||= 0x1000 - @reloc_start ||= @data_end - @reloc_count ||= exe.reloc.length - @flags ||= [] + def set_default_values(exe) + @magic ||= MAGIC + @rev ||= 4 + @entry ||= 0x40 + @data_start ||= 0x40 + exe.text.length if exe.text + @data_end ||= @data_start + exe.data.data.length if exe.data + @bss_end ||= @data_start + exe.data.length if exe.data + @stack_size ||= 0x1000 + @reloc_start ||= @data_end + @reloc_count ||= exe.reloc.length + @flags ||= [] - super(exe) - end - end + super(exe) + end + end - def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end - def encode_word(w) Expression[w].encode(:u32, @endianness) end + def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end + def encode_word(w) Expression[w].encode(:u32, @endianness) end + def sizeof_word ; 4 ; end - attr_accessor :endianness - def initialize(cpu = nil) - @endianness = cpu ? cpu.endianness : :little - @header = Header.new - @text = EncodedData.new - @data = EncodedData.new - super(cpu) - end + attr_accessor :endianness + def initialize(cpu = nil) + @endianness = cpu ? cpu.endianness : :little + @header = Header.new + @text = EncodedData.new + @data = EncodedData.new + super(cpu) + end - def decode_header - @encoded.ptr = 0 - @header.decode(self) - @encoded.add_export(new_label('entrypoint'), @header.entry) - end + def decode_header + @encoded.ptr = 0 + @header.decode(self) + @encoded.add_export(new_label('entrypoint'), @header.entry) + end - def decode - decode_header + def decode + decode_header - @text = @encoded[0x40...@header.data_start] - @data = @encoded[@header.data_start...@header.data_end] - @data.virtsize += @header.bss_end - @header.data_end + @text = @encoded[0x40...@header.data_start] + @data = @encoded[@header.data_start...@header.data_end] + @data.virtsize += @header.bss_end - @header.data_end - if @header.flags.include?('GZIP') - # TODO gzip - raise 'bFLT decoder: gzip format not supported' - end + if @header.flags.include?('GZIP') + # TODO gzip + raise 'bFLT decoder: gzip format not supported' + end - @reloc = [] - @encoded.ptr = @header.reloc_start - @header.reloc_count.times { @reloc << decode_word } - if @header.rev == 2 - @reloc.map! { |r| r & 0x3fff_ffff } - end + @reloc = [] + @encoded.ptr = @header.reloc_start + @header.reloc_count.times { @reloc << decode_word } + if @header.rev == 2 + @reloc.map! { |r| r & 0x3fff_ffff } + end - decode_interpret_relocs - end + decode_interpret_relocs + end - def decode_interpret_relocs - textsz = @header.data_start-0x40 - @reloc.each { |r| - # where the reloc is - if r < textsz - section = @text - off = section.ptr = r - else - section = @data - off = section.ptr = r-textsz - end + def decode_interpret_relocs + textsz = @header.data_start-0x40 + @reloc.each { |r| + # where the reloc is + if r < textsz + section = @text + off = section.ptr = r + else + section = @data + off = section.ptr = r-textsz + end - # what it points to - target = decode_word(section) - if target < textsz - target = label_at(@text, target, "xref_#{Expression[target]}") - elsif target < @header.bss_end-0x40 - target = label_at(@data, target-textsz, "xref_#{Expression[target]}") - else - puts "out of bounds reloc target #{Expression[target]} at #{Expression[r]}" if $VERBOSE - next - end + # what it points to + target = decode_word(section) + if target < textsz + target = label_at(@text, target, "xref_#{Expression[target]}") + elsif target < @header.bss_end-0x40 + target = label_at(@data, target-textsz, "xref_#{Expression[target]}") + else + puts "out of bounds reloc target #{Expression[target]} at #{Expression[r]}" if $VERBOSE + next + end - section.reloc[off] = Relocation.new(Expression[target], :u32, @endianness) - } - end + section.reloc[off] = Relocation.new(Expression[target], :u32, @endianness) + } + end - def encode - create_relocation_table + def encode + create_relocation_table - # TODO got, gzip - if @header.flags.include? 'GZIP' - puts "W: bFLT: clearing gzip flag" if $VERBOSE - @header.flags.delete 'GZIP' - end + # TODO got, gzip + if @header.flags.include? 'GZIP' + puts "W: bFLT: clearing gzip flag" if $VERBOSE + @header.flags.delete 'GZIP' + end - @encoded = EncodedData.new - @encoded << @header.encode(self) + @encoded = EncodedData.new + @encoded << @header.encode(self) - binding = @text.binding(0x40).merge(@data.binding(@header.data_start)) - @encoded << @text << @data.data - @encoded.fixup! binding - @encoded.reloc.clear + binding = @text.binding(0x40).merge(@data.binding(@header.data_start)) + @encoded << @text << @data.data + @encoded.fixup! binding + @encoded.reloc.clear - @relocs.each { |r| @encoded << encode_word(r) } + @relocs.each { |r| @encoded << encode_word(r) } - @encoded.data - end + @encoded.data + end - def create_relocation_table - @reloc = [] - mapaddr = new_label('mapaddr') - binding = @text.binding(mapaddr).merge(@data.binding(mapaddr)) - [@text, @data].each { |section| - base = 0x40 # XXX maybe 0 ? - base = @header.data_start || base+@text.length if section == @data - section.reloc.each { |o, r| - if r.endianness == @endianness and [:u32, :a32, :i32].include? r.type and - Expression[r.target.bind(binding), :-, mapaddr].reduce.kind_of? ::Integer - @reloc << (base+o) - else - puts "bFLT: ignoring unsupported reloc #{r.inspect} at #{Expression[o]}" if $VERBOSE - end - } - } - end + def create_relocation_table + @reloc = [] + mapaddr = new_label('mapaddr') + binding = @text.binding(mapaddr).merge(@data.binding(mapaddr)) + [@text, @data].each { |section| + base = 0x40 # XXX maybe 0 ? + base = @header.data_start || base+@text.length if section == @data + section.reloc.each { |o, r| + if r.endianness == @endianness and [:u32, :a32, :i32].include? r.type and + Expression[r.target.bind(binding), :-, mapaddr].reduce.kind_of? ::Integer + @reloc << (base+o) + else + puts "bFLT: ignoring unsupported reloc #{r.inspect} at #{Expression[o]}" if $VERBOSE + end + } + } + end - def parse_init - @textsrc ||= [] - @datasrc ||= [] - @cursource ||= @textsrc - super() - end + def parse_init + @textsrc ||= [] + @datasrc ||= [] + @cursource ||= @textsrc + super() + end - def parse_parser_instruction(instr) - case instr.raw.downcase - when '.text'; @cursource = @textsrc - when '.data'; @cursource = @datasrc - when '.entrypoint' - # ".entrypoint " or ".entrypoint" (here) - @lexer.skip_space - if tok = @lexer.nexttok and tok.type == :string - raise instr if not entrypoint = Expression.parse(@lexer) - else - entrypoint = new_label('entrypoint') - @cursource << Label.new(entrypoint, instr.backtrace.dup) - end - @header.entry = entrypoint - else super(instr) - end - end + def parse_parser_instruction(instr) + case instr.raw.downcase + when '.text'; @cursource = @textsrc + when '.data'; @cursource = @datasrc + when '.entrypoint' + # ".entrypoint " or ".entrypoint" (here) + @lexer.skip_space + if tok = @lexer.nexttok and tok.type == :string + raise instr if not entrypoint = Expression.parse(@lexer) + else + entrypoint = new_label('entrypoint') + @cursource << Label.new(entrypoint, instr.backtrace.dup) + end + @header.entry = entrypoint + else super(instr) + end + end - def assemble(*a) - parse(*a) if not a.empty? - @text << assemble_sequence(@textsrc, @cpu) - @textsrc.clear - @data << assemble_sequence(@datasrc, @cpu) - @datasrc.clear - self - end + def assemble(*a) + parse(*a) if not a.empty? + @text << assemble_sequence(@textsrc, @cpu) + @textsrc.clear + @data << assemble_sequence(@datasrc, @cpu) + @datasrc.clear + self + end - def get_default_entrypoints - ['entrypoint'] - end + def get_default_entrypoints + ['entrypoint'] + end - def each_section - yield @text, 0 - yield @data, @header.data_start - @header.entry - end + def each_section + yield @text, 0 + yield @data, @header.data_start - @header.entry + end - def section_info - [['.text', 0, @text.length, 'rx'], - ['.data', @header.data_addr-0x40, @data.data.length, 'rw'], - ['.bss', @header.data_end-0x40, @data.length-@data.data.length, 'rw']] - end + def section_info + [['.text', 0, @text.length, 'rx'], + ['.data', @header.data_addr-0x40, @data.data.length, 'rw'], + ['.bss', @header.data_end-0x40, @data.length-@data.data.length, 'rw']] + end - def module_symbols - ['entrypoint', @header.entry-0x40] - end + def module_symbols + ['entrypoint', @header.entry-0x40] + end end end diff --git a/lib/metasm/metasm/exe_format/coff.rb b/lib/metasm/metasm/exe_format/coff.rb index d0c4027596..2f659b1a9b 100644 --- a/lib/metasm/metasm/exe_format/coff.rb +++ b/lib/metasm/metasm/exe_format/coff.rb @@ -10,444 +10,452 @@ module Metasm # the COFF object file format # mostly used on windows (PE/COFF) class COFF < ExeFormat - CHARACTERISTIC_BITS = { - 0x0001 => 'RELOCS_STRIPPED', 0x0002 => 'EXECUTABLE_IMAGE', - 0x0004 => 'LINE_NUMS_STRIPPED', 0x0008 => 'LOCAL_SYMS_STRIPPED', - 0x0010 => 'AGGRESSIVE_WS_TRIM', 0x0020 => 'LARGE_ADDRESS_AWARE', - 0x0040 => 'x16BIT_MACHINE', 0x0080 => 'BYTES_REVERSED_LO', - 0x0100 => 'x32BIT_MACHINE', 0x0200 => 'DEBUG_STRIPPED', - 0x0400 => 'REMOVABLE_RUN_FROM_SWAP', 0x0800 => 'NET_RUN_FROM_SWAP', - 0x1000 => 'SYSTEM', 0x2000 => 'DLL', - 0x4000 => 'UP_SYSTEM_ONLY', 0x8000 => 'BYTES_REVERSED_HI' - } + CHARACTERISTIC_BITS = { + 0x0001 => 'RELOCS_STRIPPED', 0x0002 => 'EXECUTABLE_IMAGE', + 0x0004 => 'LINE_NUMS_STRIPPED', 0x0008 => 'LOCAL_SYMS_STRIPPED', + 0x0010 => 'AGGRESSIVE_WS_TRIM', 0x0020 => 'LARGE_ADDRESS_AWARE', + 0x0040 => 'x16BIT_MACHINE', 0x0080 => 'BYTES_REVERSED_LO', + 0x0100 => 'x32BIT_MACHINE', 0x0200 => 'DEBUG_STRIPPED', + 0x0400 => 'REMOVABLE_RUN_FROM_SWAP', 0x0800 => 'NET_RUN_FROM_SWAP', + 0x1000 => 'SYSTEM', 0x2000 => 'DLL', + 0x4000 => 'UP_SYSTEM_ONLY', 0x8000 => 'BYTES_REVERSED_HI' + } - MACHINE = { - 0x0 => 'UNKNOWN', 0x184 => 'ALPHA', 0x1c0 => 'ARM', - 0x1d3 => 'AM33', 0x8664=> 'AMD64', 0xebc => 'EBC', - 0x9041=> 'M32R', 0x1f1 => 'POWERPCFP', - 0x284 => 'ALPHA64', 0x14c => 'I386', 0x200 => 'IA64', - 0x268 => 'M68K', 0x266 => 'MIPS16', 0x366 => 'MIPSFPU', - 0x466 => 'MIPSFPU16', 0x1f0 => 'POWERPC', 0x162 => 'R3000', - 0x166 => 'R4000', 0x168 => 'R10000', 0x1a2 => 'SH3', - 0x1a3 => 'SH3DSP', 0x1a6 => 'SH4', 0x1a8 => 'SH5', - 0x1c2 => 'THUMB', 0x169 => 'WCEMIPSV2' - } + MACHINE = { + 0x0 => 'UNKNOWN', 0x184 => 'ALPHA', 0x1c0 => 'ARM', + 0x1d3 => 'AM33', 0x8664=> 'AMD64', 0xebc => 'EBC', + 0x9041=> 'M32R', 0x1f1 => 'POWERPCFP', + 0x284 => 'ALPHA64', 0x14c => 'I386', 0x200 => 'IA64', + 0x268 => 'M68K', 0x266 => 'MIPS16', 0x366 => 'MIPSFPU', + 0x466 => 'MIPSFPU16', 0x1f0 => 'POWERPC', 0x162 => 'R3000', + 0x166 => 'R4000', 0x168 => 'R10000', 0x1a2 => 'SH3', + 0x1a3 => 'SH3DSP', 0x1a6 => 'SH4', 0x1a8 => 'SH5', + 0x1c2 => 'THUMB', 0x169 => 'WCEMIPSV2' + } - # PE+ is for 64bits address spaces - SIGNATURE = { 0x10b => 'PE', 0x20b => 'PE+', 0x107 => 'ROM' } + # PE+ is for 64bits address spaces + SIGNATURE = { 0x10b => 'PE', 0x20b => 'PE+', 0x107 => 'ROM' } - SUBSYSTEM = { - 0 => 'UNKNOWN', 1 => 'NATIVE', 2 => 'WINDOWS_GUI', - 3 => 'WINDOWS_CUI', 5 => 'OS/2_CUI', 7 => 'POSIX_CUI', - 8 => 'WIN9X_DRIVER', 9 => 'WINDOWS_CE_GUI', - 10 => 'EFI_APPLICATION', - 11 => 'EFI_BOOT_SERVICE_DRIVER', 12 => 'EFI_RUNTIME_DRIVER', - 13 => 'EFI_ROM', 14 => 'XBOX' - } + SUBSYSTEM = { + 0 => 'UNKNOWN', 1 => 'NATIVE', 2 => 'WINDOWS_GUI', + 3 => 'WINDOWS_CUI', 5 => 'OS/2_CUI', 7 => 'POSIX_CUI', + 8 => 'WIN9X_DRIVER', 9 => 'WINDOWS_CE_GUI', + 10 => 'EFI_APPLICATION', + 11 => 'EFI_BOOT_SERVICE_DRIVER', 12 => 'EFI_RUNTIME_DRIVER', + 13 => 'EFI_ROM', 14 => 'XBOX' + } - DLL_CHARACTERISTIC_BITS = { - 0x40 => 'DYNAMIC_BASE', 0x80 => 'FORCE_INTEGRITY', 0x100 => 'NX_COMPAT', - 0x200 => 'NO_ISOLATION', 0x400 => 'NO_SEH', 0x800 => 'NO_BIND', - 0x2000 => 'WDM_DRIVER', 0x8000 => 'TERMINAL_SERVER_AWARE' - } + DLL_CHARACTERISTIC_BITS = { + 0x40 => 'DYNAMIC_BASE', 0x80 => 'FORCE_INTEGRITY', 0x100 => 'NX_COMPAT', + 0x200 => 'NO_ISOLATION', 0x400 => 'NO_SEH', 0x800 => 'NO_BIND', + 0x2000 => 'WDM_DRIVER', 0x8000 => 'TERMINAL_SERVER_AWARE' + } - BASE_RELOCATION_TYPE = { 0 => 'ABSOLUTE', 1 => 'HIGH', 2 => 'LOW', 3 => 'HIGHLOW', - 4 => 'HIGHADJ', 5 => 'MIPS_JMPADDR', 9 => 'MIPS_JMPADDR16', 10 => 'DIR64' - } + BASE_RELOCATION_TYPE = { 0 => 'ABSOLUTE', 1 => 'HIGH', 2 => 'LOW', 3 => 'HIGHLOW', + 4 => 'HIGHADJ', 5 => 'MIPS_JMPADDR', 9 => 'MIPS_JMPADDR16', 10 => 'DIR64' + } - RELOCATION_TYPE = Hash.new({}).merge( - 'AMD64' => { 0 => 'ABSOLUTE', 1 => 'ADDR64', 2 => 'ADDR32', 3 => 'ADDR32NB', - 4 => 'REL32', 5 => 'REL32_1', 6 => 'REL32_2', 7 => 'REL32_3', - 8 => 'REL32_4', 9 => 'REL32_5', 10 => 'SECTION', 11 => 'SECREL', - 12 => 'SECREL7', 13 => 'TOKEN', 14 => 'SREL32', 15 => 'PAIR', - 16 => 'SSPAN32' }, - 'ARM' => { 0 => 'ABSOLUTE', 1 => 'ADDR32', 2 => 'ADDR32NB', 3 => 'BRANCH24', - 4 => 'BRANCH11', 14 => 'SECTION', 15 => 'SECREL' }, - 'I386' => { 0 => 'ABSOLUTE', 1 => 'DIR16', 2 => 'REL16', 6 => 'DIR32', - 7 => 'DIR32NB', 9 => 'SEG12', 10 => 'SECTION', 11 => 'SECREL', - 12 => 'TOKEN', 13 => 'SECREL7', 20 => 'REL32' } - ) + RELOCATION_TYPE = Hash.new({}).merge( + 'AMD64' => { 0 => 'ABSOLUTE', 1 => 'ADDR64', 2 => 'ADDR32', 3 => 'ADDR32NB', + 4 => 'REL32', 5 => 'REL32_1', 6 => 'REL32_2', 7 => 'REL32_3', + 8 => 'REL32_4', 9 => 'REL32_5', 10 => 'SECTION', 11 => 'SECREL', + 12 => 'SECREL7', 13 => 'TOKEN', 14 => 'SREL32', 15 => 'PAIR', + 16 => 'SSPAN32' }, + 'ARM' => { 0 => 'ABSOLUTE', 1 => 'ADDR32', 2 => 'ADDR32NB', 3 => 'BRANCH24', + 4 => 'BRANCH11', 14 => 'SECTION', 15 => 'SECREL' }, + 'I386' => { 0 => 'ABSOLUTE', 1 => 'DIR16', 2 => 'REL16', 6 => 'DIR32', + 7 => 'DIR32NB', 9 => 'SEG12', 10 => 'SECTION', 11 => 'SECREL', + 12 => 'TOKEN', 13 => 'SECREL7', 20 => 'REL32' } + ) - # lsb of symbol type, unused - SYMBOL_BTYPE = { 0 => 'NULL', 1 => 'VOID', 2 => 'CHAR', 3 => 'SHORT', - 4 => 'INT', 5 => 'LONG', 6 => 'FLOAT', 7 => 'DOUBLE', 8 => 'STRUCT', - 9 => 'UNION', 10 => 'ENUM', 11 => 'MOE', 12 => 'BYTE', 13 => 'WORD', - 14 => 'UINT', 15 => 'DWORD'} - SYMBOL_TYPE = { 0 => 'NULL', 1 => 'POINTER', 2 => 'FUNCTION', 3 => 'ARRAY' } - SYMBOL_SECTION = { 0 => 'UNDEF', 0xffff => 'ABS', 0xfffe => 'DEBUG' } - SYMBOL_STORAGE = { 0xff => 'EOF', 0 => 'NULL', 1 => 'AUTO', 2 => 'EXTERNAL', - 3 => 'STATIC', 4 => 'REGISTER', 5 => 'EXT_DEF', 6 => 'LABEL', - 7 => 'UNDEF_LABEL', 8 => 'STRUCT_MEMBER', 9 => 'ARGUMENT', 10 => 'STRUCT_TAG', - 11 => 'UNION_MEMBER', 12 => 'UNION_TAG', 13 => 'TYPEDEF', 14 => 'UNDEF_STATIC', - 15 => 'ENUM_TAG', 16 => 'ENUM_MEMBER', 17 => 'REG_PARAM', 18 => 'BIT_FIELD', - 100 => 'BLOCK', 101 => 'FUNCTION', 102 => 'END_STRUCT', - 103 => 'FILE', 104 => 'SECTION', 105 => 'WEAK_EXT', - } + # lsb of symbol type, unused + SYMBOL_BTYPE = { 0 => 'NULL', 1 => 'VOID', 2 => 'CHAR', 3 => 'SHORT', + 4 => 'INT', 5 => 'LONG', 6 => 'FLOAT', 7 => 'DOUBLE', 8 => 'STRUCT', + 9 => 'UNION', 10 => 'ENUM', 11 => 'MOE', 12 => 'BYTE', 13 => 'WORD', + 14 => 'UINT', 15 => 'DWORD'} + SYMBOL_TYPE = { 0 => 'NULL', 1 => 'POINTER', 2 => 'FUNCTION', 3 => 'ARRAY' } + SYMBOL_SECTION = { 0 => 'UNDEF', 0xffff => 'ABS', 0xfffe => 'DEBUG' } + SYMBOL_STORAGE = { 0xff => 'EOF', 0 => 'NULL', 1 => 'AUTO', 2 => 'EXTERNAL', + 3 => 'STATIC', 4 => 'REGISTER', 5 => 'EXT_DEF', 6 => 'LABEL', + 7 => 'UNDEF_LABEL', 8 => 'STRUCT_MEMBER', 9 => 'ARGUMENT', 10 => 'STRUCT_TAG', + 11 => 'UNION_MEMBER', 12 => 'UNION_TAG', 13 => 'TYPEDEF', 14 => 'UNDEF_STATIC', + 15 => 'ENUM_TAG', 16 => 'ENUM_MEMBER', 17 => 'REG_PARAM', 18 => 'BIT_FIELD', + 100 => 'BLOCK', 101 => 'FUNCTION', 102 => 'END_STRUCT', + 103 => 'FILE', 104 => 'SECTION', 105 => 'WEAK_EXT', + } - DEBUG_TYPE = { 0 => 'UNKNOWN', 1 => 'COFF', 2 => 'CODEVIEW', 3 => 'FPO', 4 => 'MISC', - 5 => 'EXCEPTION', 6 => 'FIXUP', 7 => 'OMAP_TO_SRC', 8 => 'OMAP_FROM_SRC', - 9 => 'BORLAND', 10 => 'RESERVED10', 11 => 'CLSID' } + DEBUG_TYPE = { 0 => 'UNKNOWN', 1 => 'COFF', 2 => 'CODEVIEW', 3 => 'FPO', 4 => 'MISC', + 5 => 'EXCEPTION', 6 => 'FIXUP', 7 => 'OMAP_TO_SRC', 8 => 'OMAP_FROM_SRC', + 9 => 'BORLAND', 10 => 'RESERVED10', 11 => 'CLSID' } - DIRECTORIES = %w[export_table import_table resource_table exception_table certificate_table - base_relocation_table debug architecture global_ptr tls_table load_config - bound_import iat delay_import com_runtime reserved] + DIRECTORIES = %w[export_table import_table resource_table exception_table certificate_table + base_relocation_table debug architecture global_ptr tls_table load_config + bound_import iat delay_import com_runtime reserved] - SECTION_CHARACTERISTIC_BITS = { - 0x20 => 'CONTAINS_CODE', 0x40 => 'CONTAINS_DATA', 0x80 => 'CONTAINS_UDATA', - 0x100 => 'LNK_OTHER', 0x200 => 'LNK_INFO', 0x800 => 'LNK_REMOVE', - 0x1000 => 'LNK_COMDAT', 0x8000 => 'GPREL', - 0x20000 => 'MEM_PURGEABLE|16BIT', 0x40000 => 'MEM_LOCKED', 0x80000 => 'MEM_PRELOAD', - 0x100000 => 'ALIGN_1BYTES', 0x200000 => 'ALIGN_2BYTES', - 0x300000 => 'ALIGN_4BYTES', 0x400000 => 'ALIGN_8BYTES', - 0x500000 => 'ALIGN_16BYTES', 0x600000 => 'ALIGN_32BYTES', - 0x700000 => 'ALIGN_64BYTES', 0x800000 => 'ALIGN_128BYTES', - 0x900000 => 'ALIGN_256BYTES', 0xA00000 => 'ALIGN_512BYTES', - 0xB00000 => 'ALIGN_1024BYTES', 0xC00000 => 'ALIGN_2048BYTES', - 0xD00000 => 'ALIGN_4096BYTES', 0xE00000 => 'ALIGN_8192BYTES', - 0x01000000 => 'LNK_NRELOC_OVFL', 0x02000000 => 'MEM_DISCARDABLE', - 0x04000000 => 'MEM_NOT_CACHED', 0x08000000 => 'MEM_NOT_PAGED', - 0x10000000 => 'MEM_SHARED', 0x20000000 => 'MEM_EXECUTE', - 0x40000000 => 'MEM_READ', 0x80000000 => 'MEM_WRITE' - } - # NRELOC_OVFL means there are more than 0xffff reloc - # the reloc count must be set to 0xffff, and the real reloc count - # is the VA of the first relocation + SECTION_CHARACTERISTIC_BITS = { + 0x20 => 'CONTAINS_CODE', 0x40 => 'CONTAINS_DATA', 0x80 => 'CONTAINS_UDATA', + 0x100 => 'LNK_OTHER', 0x200 => 'LNK_INFO', 0x800 => 'LNK_REMOVE', + 0x1000 => 'LNK_COMDAT', 0x8000 => 'GPREL', + 0x20000 => 'MEM_PURGEABLE|16BIT', 0x40000 => 'MEM_LOCKED', 0x80000 => 'MEM_PRELOAD', + 0x100000 => 'ALIGN_1BYTES', 0x200000 => 'ALIGN_2BYTES', + 0x300000 => 'ALIGN_4BYTES', 0x400000 => 'ALIGN_8BYTES', + 0x500000 => 'ALIGN_16BYTES', 0x600000 => 'ALIGN_32BYTES', + 0x700000 => 'ALIGN_64BYTES', 0x800000 => 'ALIGN_128BYTES', + 0x900000 => 'ALIGN_256BYTES', 0xA00000 => 'ALIGN_512BYTES', + 0xB00000 => 'ALIGN_1024BYTES', 0xC00000 => 'ALIGN_2048BYTES', + 0xD00000 => 'ALIGN_4096BYTES', 0xE00000 => 'ALIGN_8192BYTES', + 0x01000000 => 'LNK_NRELOC_OVFL', 0x02000000 => 'MEM_DISCARDABLE', + 0x04000000 => 'MEM_NOT_CACHED', 0x08000000 => 'MEM_NOT_PAGED', + 0x10000000 => 'MEM_SHARED', 0x20000000 => 'MEM_EXECUTE', + 0x40000000 => 'MEM_READ', 0x80000000 => 'MEM_WRITE' + } + # NRELOC_OVFL means there are more than 0xffff reloc + # the reloc count must be set to 0xffff, and the real reloc count + # is the VA of the first relocation - ORDINAL_REGEX = /^Ordinal_(\d+)$/ + ORDINAL_REGEX = /^Ordinal_(\d+)$/ - COMIMAGE_FLAGS = { - 1 => 'ILONLY', 2 => '32BITREQUIRED', 4 => 'IL_LIBRARY', - 8 => 'STRONGNAMESIGNED', 16 => 'NATIVE_ENTRYPOINT', - 0x10000 => 'TRACKDEBUGDATA' - } + COMIMAGE_FLAGS = { + 1 => 'ILONLY', 2 => '32BITREQUIRED', 4 => 'IL_LIBRARY', + 8 => 'STRONGNAMESIGNED', 16 => 'NATIVE_ENTRYPOINT', + 0x10000 => 'TRACKDEBUGDATA' + } - class SerialStruct < Metasm::SerialStruct - new_int_field :xword - end + class SerialStruct < Metasm::SerialStruct + new_int_field :xword + end - class Header < SerialStruct - half :machine, 'I386', MACHINE - half :num_sect - words :time, :ptr_sym, :num_sym - half :size_opthdr - half :characteristics - fld_bits :characteristics, CHARACTERISTIC_BITS - end + class Header < SerialStruct + half :machine, 'I386', MACHINE + half :num_sect + words :time, :ptr_sym, :num_sym + half :size_opthdr + half :characteristics + fld_bits :characteristics, CHARACTERISTIC_BITS + end - # present in linked files (exe/dll/kmod) - class OptionalHeader < SerialStruct - half :signature, 'PE', SIGNATURE - bytes :link_ver_maj, :link_ver_min - words :code_size, :data_size, :udata_size, :entrypoint, :base_of_code - # base_of_data does not exist in 64-bit - new_field(:base_of_data, lambda { |exe, hdr| exe.decode_word if exe.bitsize != 64 }, lambda { |exe, hdr, val| exe.encode_word(val) if exe.bitsize != 64 }, 0) - # NT-specific fields - xword :image_base - words :sect_align, :file_align - halfs :os_ver_maj, :os_ver_min, :img_ver_maj, :img_ver_min, :subsys_maj, :subsys_min - words :reserved, :image_size, :headers_size, :checksum - half :subsystem, 0, SUBSYSTEM - half :dll_characts - fld_bits :dll_characts, DLL_CHARACTERISTIC_BITS - xwords :stack_reserve, :stack_commit, :heap_reserve, :heap_commit - words :ldrflags, :numrva - end + # present in linked files (exe/dll/kmod) + class OptionalHeader < SerialStruct + half :signature, 'PE', SIGNATURE + bytes :link_ver_maj, :link_ver_min + words :code_size, :data_size, :udata_size, :entrypoint, :base_of_code + # base_of_data does not exist in 64-bit + new_field(:base_of_data, lambda { |exe, hdr| exe.decode_word if exe.bitsize != 64 }, lambda { |exe, hdr, val| exe.encode_word(val) if exe.bitsize != 64 }, lambda { |exe, hdr| exe.bitsize != 64 ? 4 : 0 }, 0) + # NT-specific fields + xword :image_base + words :sect_align, :file_align + halfs :os_ver_maj, :os_ver_min, :img_ver_maj, :img_ver_min, :subsys_maj, :subsys_min + words :reserved, :image_size, :headers_size, :checksum + half :subsystem, 0, SUBSYSTEM + half :dll_characts + fld_bits :dll_characts, DLL_CHARACTERISTIC_BITS + xwords :stack_reserve, :stack_commit, :heap_reserve, :heap_commit + words :ldrflags, :numrva + end - # COFF relocatable object symbol (table offset found in the Header.ptr_sym) - class Symbol < SerialStruct - str :name, 8 # if the 1st 4 bytes are 0, the word at 4...8 is the name index in the string table - word :value - half :sec_nr - fld_enum :sec_nr, SYMBOL_SECTION - bitfield :half, 0 => :type_base, 4 => :type - fld_enum :type_base, SYMBOL_BTYPE - fld_enum :type, SYMBOL_TYPE - bytes :storage, :nr_aux - fld_enum :storage, SYMBOL_STORAGE + # COFF relocatable object symbol (table offset found in the Header.ptr_sym) + class Symbol < SerialStruct + str :name, 8 # if the 1st 4 bytes are 0, the word at 4...8 is the name index in the string table + word :value + half :sec_nr + fld_enum :sec_nr, SYMBOL_SECTION + bitfield :half, 0 => :type_base, 4 => :type + fld_enum :type_base, SYMBOL_BTYPE + fld_enum :type, SYMBOL_TYPE + bytes :storage, :nr_aux + fld_enum :storage, SYMBOL_STORAGE - attr_accessor :aux - end + attr_accessor :aux + end - class Section < SerialStruct - str :name, 8 - words :virtsize, :virtaddr, :rawsize, :rawaddr, :relocaddr, :linenoaddr - halfs :relocnr, :linenonr - word :characteristics - fld_bits :characteristics, SECTION_CHARACTERISTIC_BITS + class Section < SerialStruct + str :name, 8 + words :virtsize, :virtaddr, :rawsize, :rawaddr, :relocaddr, :linenoaddr + halfs :relocnr, :linenonr + word :characteristics + fld_bits :characteristics, SECTION_CHARACTERISTIC_BITS - attr_accessor :encoded, :relocs - end + attr_accessor :encoded, :relocs + end - # COFF relocatable object relocation (per section, see relocaddr/relocnr) - class RelocObj < SerialStruct - word :va - word :symidx - half :type - fld_enum(:type) { |coff, rel| RELOCATION_TYPE[coff.header.machine] || {} } - attr_accessor :sym - end + # COFF relocatable object relocation (per section, see relocaddr/relocnr) + class RelocObj < SerialStruct + word :va + word :symidx + half :type + fld_enum(:type) { |coff, rel| RELOCATION_TYPE[coff.header.machine] || {} } + attr_accessor :sym + end - # lists the functions/addresses exported to the OS (pendant of ImportDirectory) - class ExportDirectory < SerialStruct - words :reserved, :timestamp - halfs :version_major, :version_minor - words :libname_p, :ordinal_base, :num_exports, :num_names, :func_p, :names_p, :ord_p - attr_accessor :libname, :exports + # lists the functions/addresses exported to the OS (pendant of ImportDirectory) + class ExportDirectory < SerialStruct + words :reserved, :timestamp + halfs :version_major, :version_minor + words :libname_p, :ordinal_base, :num_exports, :num_names, :func_p, :names_p, :ord_p + attr_accessor :libname, :exports - class Export - attr_accessor :forwarder_lib, :forwarder_ordinal, :forwarder_name, :target, :target_rva, :name_p, :name, :ordinal - end - end + class Export + attr_accessor :forwarder_lib, :forwarder_ordinal, :forwarder_name, :target, :target_rva, :name_p, :name, :ordinal + end + end - # contains the name of dynamic libraries required by the program, and the function to import from them - class ImportDirectory < SerialStruct - words :ilt_p, :timestamp, :firstforwarder, :libname_p, :iat_p - fld_default :firstforwarder, 0xffff_ffff - attr_accessor :libname, :imports, :iat + # contains the name of dynamic libraries required by the program, and the function to import from them + class ImportDirectory < SerialStruct + words :ilt_p, :timestamp, :firstforwarder, :libname_p, :iat_p + fld_default :firstforwarder, 0xffff_ffff + attr_accessor :libname, :imports, :iat - class Import - attr_accessor :ordinal, :hint, :hintname_p, :name, :target, :thunk - end - end + class Import + attr_accessor :ordinal, :hint, :hintname_p, :name, :target, :thunk + end + end - # tree-like structure, holds all misc data the program might need (icons, cursors, version information) - # conventionnally structured in a 3-level depth structure: - # I resource type (icon/cursor/etc, see +TYPES+) - # II resource id (icon n1, icon 'toto', ...) - # III language-specific version (icon n1 en, icon n1 en-dvorak...) - class ResourceDirectory < SerialStruct - words :characteristics, :timestamp - halfs :major_version, :minor_version, :nr_names, :nr_id - attr_accessor :entries - attr_accessor :curoff_label # internal use, in encoder + # tree-like structure, holds all misc data the program might need (icons, cursors, version information) + # conventionnally structured in a 3-level depth structure: + # I resource type (icon/cursor/etc, see +TYPES+) + # II resource id (icon n1, icon 'toto', ...) + # III language-specific version (icon n1 en, icon n1 en-dvorak...) + class ResourceDirectory < SerialStruct + words :characteristics, :timestamp + halfs :major_version, :minor_version, :nr_names, :nr_id + attr_accessor :entries + attr_accessor :curoff_label # internal use, in encoder - class Entry - attr_accessor :name_p, :name, :name_w, - :id, :subdir_p, :subdir, :dataentry_p, - :data_p, :data, :codepage, :reserved - end - end + class Entry + attr_accessor :name_p, :name, :name_w, + :id, :subdir_p, :subdir, :dataentry_p, + :data_p, :data, :codepage, :reserved + end + end - # array of relocations to apply to an executable file - # when it is loaded at an address that is not its preferred_base_address - class RelocationTable < SerialStruct - word :base_addr - attr_accessor :relocs + # array of relocations to apply to an executable file + # when it is loaded at an address that is not its preferred_base_address + class RelocationTable < SerialStruct + word :base_addr + attr_accessor :relocs - class Relocation < SerialStruct - bitfield :half, 0 => :offset, 12 => :type - fld_enum :type, BASE_RELOCATION_TYPE - end - end + class Relocation < SerialStruct + bitfield :half, 0 => :offset, 12 => :type + fld_enum :type, BASE_RELOCATION_TYPE + end + end - class DebugDirectory < SerialStruct - words :characteristics, :timestamp - halfs :major_version, :minor_version - words :type, :size_of_data, :addr, :pointer - fld_enum :type, DEBUG_TYPE + class DebugDirectory < SerialStruct + words :characteristics, :timestamp + halfs :major_version, :minor_version + words :type, :size_of_data, :addr, :pointer + fld_enum :type, DEBUG_TYPE - attr_accessor :data + attr_accessor :data - class NB10 < SerialStruct - word :offset - word :signature - word :age - strz :pdbfilename - end + class NB10 < SerialStruct + word :offset + word :signature + word :age + strz :pdbfilename + end - class RSDS < SerialStruct - mem :guid, 16 - word :age - strz :pdbfilename - end - end + class RSDS < SerialStruct + mem :guid, 16 + word :age + strz :pdbfilename + end + end - class TLSDirectory < SerialStruct - xwords :start_va, :end_va, :index_addr, :callback_p - words :zerofill_sz, :characteristics + class TLSDirectory < SerialStruct + xwords :start_va, :end_va, :index_addr, :callback_p + words :zerofill_sz, :characteristics - attr_accessor :callbacks - end + attr_accessor :callbacks + end - # the 'load configuration' directory (used for SafeSEH) - class LoadConfig < SerialStruct - words :signature, :timestamp - halfs :major_version, :minor_version - words :globalflags_clear, :globalflags_set, :critsec_timeout - # lockpfxtable is an array of VA of LOCK prefixes, to be nopped on singleproc machines (!) - xwords :decommitblock, :decommittotal, :lockpfxtable, :maxalloc, :maxvirtmem, :process_affinity_mask - word :process_heap_flags - halfs :service_pack_id, :reserved - xwords :editlist, :security_cookie, :sehtable_p, :sehcount + # the 'load configuration' directory (used for SafeSEH) + class LoadConfig < SerialStruct + words :signature, :timestamp + halfs :major_version, :minor_version + words :globalflags_clear, :globalflags_set, :critsec_timeout + # lockpfxtable is an array of VA of LOCK prefixes, to be nopped on singleproc machines (!) + xwords :decommitblock, :decommittotal, :lockpfxtable, :maxalloc, :maxvirtmem, :process_affinity_mask + word :process_heap_flags + halfs :service_pack_id, :reserved + xwords :editlist, :security_cookie, :sehtable_p, :sehcount - attr_accessor :safeseh - end + attr_accessor :safeseh + end - class DelayImportDirectory < SerialStruct - words :attributes, :libname_p, :handle_p, :iat_p, :int_p, :biat_p, :uiat_p, :timestamp + class DelayImportDirectory < SerialStruct + words :attributes, :libname_p, :handle_p, :iat_p, :int_p, :biat_p, :uiat_p, :timestamp - attr_accessor :libname - end + attr_accessor :libname + end - # structure defining entrypoints and stuff for .net binaries - class Cor20Header < SerialStruct - word :size - halfs :major_version, :minor_version # runtime version - words :metadata_rva, :metadata_sz - word :flags - fld_bits :flags, COMIMAGE_FLAGS - word :entrypoint # RVA to native or managed ep, depending on flags - words :resources_rva, :resources_sz - words :strongnamesig_rva, :strongnamesig_sz - words :codemgr_rva, :codemgr_sz - words :vtfixup_rva, :vtfixup_sz - words :eatjumps_rva, :eatjumps_sz - words :managednativehdr_rva, :managednativehdr_sz + # structure defining entrypoints and stuff for .net binaries + class Cor20Header < SerialStruct + word :size + halfs :major_version, :minor_version # runtime version + words :metadata_rva, :metadata_sz + word :flags + fld_bits :flags, COMIMAGE_FLAGS + word :entrypoint # RVA to native or managed ep, depending on flags + words :resources_rva, :resources_sz + words :strongnamesig_rva, :strongnamesig_sz + words :codemgr_rva, :codemgr_sz + words :vtfixup_rva, :vtfixup_sz + words :eatjumps_rva, :eatjumps_sz + words :managednativehdr_rva, :managednativehdr_sz - attr_accessor :metadata, :resources, :strongnamesig, :codemgr, :vtfixup, :eatjumps, :managednativehdr - end + attr_accessor :metadata, :resources, :strongnamesig, :codemgr, :vtfixup, :eatjumps, :managednativehdr + end - # for the icon, the one that appears in the explorer is - # (NT) the one with the lowest ID - # (98) the first to appear in the table - class ResourceDirectory - def to_hash(depth=0) - map = case depth - when 0; TYPE - when 1; {} # resource-id - when 2; {} # lang - else {} - end - @entries.inject({}) { |h, e| - k = e.id ? map.fetch(e.id, e.id) : e.name ? e.name : e.name_w - v = e.subdir ? e.subdir.to_hash(depth+1) : e.data - h.update k => v - } - end + # for the icon, the one that appears in the explorer is + # (NT) the one with the lowest ID + # (98) the first to appear in the table + class ResourceDirectory + def to_hash(depth=0) + map = case depth + when 0; TYPE + when 1; {} # resource-id + when 2; {} # lang + else {} + end + @entries.inject({}) { |h, e| + k = e.id ? map.fetch(e.id, e.id) : e.name ? e.name : e.name_w + v = e.subdir ? e.subdir.to_hash(depth+1) : e.data + h.update k => v + } + end - def self.from_hash(h, depth=0) - map = case depth - when 0; TYPE - when 1; {} # resource-id - when 2; {} # lang - else {} - end - ret = new - ret.entries = h.map { |k, v| - e = Entry.new - k.kind_of?(Integer) ? (e.id = k) : map.index(k) ? (e.id = map.index(k)) : (e.name = k) # name_w ? - v.kind_of?(Hash) ? (e.subdir = from_hash(v, depth+1)) : (e.data = v) - e - } - ret - end + def self.from_hash(h, depth=0) + map = case depth + when 0; TYPE + when 1; {} # resource-id + when 2; {} # lang + else {} + end + ret = new + ret.entries = h.map { |k, v| + e = Entry.new + k.kind_of?(Integer) ? (e.id = k) : map.index(k) ? (e.id = map.index(k)) : (e.name = k) # name_w ? + v.kind_of?(Hash) ? (e.subdir = from_hash(v, depth+1)) : (e.data = v) + e + } + ret + end - # returns a string with the to_hash key tree - def to_s - to_s_a(0).join("\n") - end + # returns a string with the to_hash key tree + def to_s + to_s_a(0).join("\n") + end - def to_s_a(depth) - @entries.map { |e| - ar = [] - ar << if e.id - if depth == 0 and TYPE.has_key?(e.id); "#{e.id.to_s} (#{TYPE[e.id]})".ljust(18) - else e.id.to_s.ljust(5) - end - else (e.name || e.name_w).inspect - end - if e.subdir - sa = e.subdir.to_s_a(depth+1) - if sa.length == 1 - ar.last << " | #{sa.first}" - else - ar << sa.map { |s| ' ' + s } - end - elsif e.data.length > 16 - ar.last << " #{e.data[0, 8].inspect}... <#{e.data.length} bytes>" - else - ar.last << ' ' << e.data.inspect - end - ar - }.flatten - end + def to_s_a(depth) + @entries.map { |e| + ar = [] + ar << if e.id + if depth == 0 and TYPE.has_key?(e.id); "#{e.id.to_s} (#{TYPE[e.id]})".ljust(18) + else e.id.to_s.ljust(5) + end + else (e.name || e.name_w).inspect + end + if e.subdir + sa = e.subdir.to_s_a(depth+1) + if sa.length == 1 + ar.last << " | #{sa.first}" + else + ar << sa.map { |s| ' ' + s } + end + elsif e.data.length > 16 + ar.last << " #{e.data[0, 8].inspect}... <#{e.data.length} bytes>" + else + ar.last << ' ' << e.data.inspect + end + ar + }.flatten + end - TYPE = { - 1 => 'CURSOR', 2 => 'BITMAP', 3 => 'ICON', 4 => 'MENU', - 5 => 'DIALOG', 6 => 'STRING', 7 => 'FONTDIR', 8 => 'FONT', - 9 => 'ACCELERATOR', 10 => 'RCADATA', 11 => 'MESSAGETABLE', - 12 => 'GROUP_CURSOR', 14 => 'GROUP_ICON', 16 => 'VERSION', - 17 => 'DLGINCLUDE', 19 => 'PLUGPLAY', 20 => 'VXD', - 21 => 'ANICURSOR', 22 => 'ANIICON', 23 => 'HTML', - 24 => 'MANIFEST' - } + TYPE = { + 1 => 'CURSOR', 2 => 'BITMAP', 3 => 'ICON', 4 => 'MENU', + 5 => 'DIALOG', 6 => 'STRING', 7 => 'FONTDIR', 8 => 'FONT', + 9 => 'ACCELERATOR', 10 => 'RCADATA', 11 => 'MESSAGETABLE', + 12 => 'GROUP_CURSOR', 14 => 'GROUP_ICON', 16 => 'VERSION', + 17 => 'DLGINCLUDE', 19 => 'PLUGPLAY', 20 => 'VXD', + 21 => 'ANICURSOR', 22 => 'ANIICON', 23 => 'HTML', + 24 => 'MANIFEST' + } - ACCELERATOR_BITS = { - 1 => 'VIRTKEY', 2 => 'NOINVERT', 4 => 'SHIFT', 8 => 'CTRL', - 16 => 'ALT', 128 => 'LAST' - } + ACCELERATOR_BITS = { + 1 => 'VIRTKEY', 2 => 'NOINVERT', 4 => 'SHIFT', 8 => 'CTRL', + 16 => 'ALT', 128 => 'LAST' + } - # cursor = raw data, cursor_group = header , pareil pour les icons - class Cursor - attr_accessor :xhotspot, :yhotspot, :data - end - end + # cursor = raw data, cursor_group = header , pareil pour les icons + class Cursor + attr_accessor :xhotspot, :yhotspot, :data + end + end - attr_accessor :header, :optheader, :directory, :sections, :endianness, :symbols, :bitsize, - :export, :imports, :resource, :certificates, :relocations, :debug, :tls, :loadconfig, :delayimports, :com_header + attr_accessor :header, :optheader, :directory, :sections, :endianness, :symbols, :bitsize, + :export, :imports, :resource, :certificates, :relocations, :debug, :tls, :loadconfig, :delayimports, :com_header - # boolean, set to true to have #decode() ignore the base_relocs directory - attr_accessor :nodecode_relocs + # boolean, set to true to have #decode() ignore the base_relocs directory + attr_accessor :nodecode_relocs - def initialize(*a) - cpu = a.grep(CPU).first - @nodecode_relocs = true if a.include? :nodecode_relocs + def initialize(*a) + cpu = a.grep(CPU).first + @nodecode_relocs = true if a.include? :nodecode_relocs - @directory = {} # DIRECTORIES.key => [rva, size] - @sections = [] - @endianness = cpu ? cpu.endianness : :little - @bitsize = cpu ? cpu.size : 32 - @header = Header.new - @optheader = OptionalHeader.new - super(cpu) - end + @directory = {} # DIRECTORIES.key => [rva, size] + @sections = [] + @endianness = cpu ? cpu.endianness : :little + @bitsize = cpu ? cpu.size : 32 + @header = Header.new + @optheader = OptionalHeader.new + super(cpu) + end - def shortname; 'coff'; end + def shortname; 'coff'; end + + def sizeof_byte ; 1 ; end + def sizeof_half ; 2 ; end + def sizeof_word ; 4 ; end + def sizeof_xword ; @bitsize == 32 ? 4 : 8 ; end end # the COFF archive file format # maybe used in .lib files (they hold binary import information for libraries) # used for unix .a static library files (with no 2nd linker and newline-separated longnames) class COFFArchive < ExeFormat - class Member < SerialStruct - mem :name, 16 - mem :date, 12 - mem :uid, 6 - mem :gid, 6 - mem :mode, 8 - mem :size, 10 - mem :eoh, 2 + class Member < SerialStruct + mem :name, 16 + mem :date, 12 + mem :uid, 6 + mem :gid, 6 + mem :mode, 8 + mem :size, 10 + mem :eoh, 2 - attr_accessor :offset, :encoded - end + attr_accessor :offset, :encoded + end - class ImportHeader < SerialStruct - halfs :sig1, :sig2, :version, :machine - words :timestamp, :size_of_data - half :hint - bitfield :half, 0 => :reserved, 11 => :name_type, 14 => :type - #fld_enum :type, IMPORT_TYPE - #fld_enum :name_type, NAME_TYPE - strz :symname - strz :libname - end + class ImportHeader < SerialStruct + halfs :sig1, :sig2, :version, :machine + words :timestamp, :size_of_data + half :hint + bitfield :half, 0 => :reserved, 11 => :name_type, 14 => :type + #fld_enum :type, IMPORT_TYPE + #fld_enum :name_type, NAME_TYPE + strz :symname + strz :libname + end - attr_accessor :members, :signature, :first_linker, :second_linker, :longnames + attr_accessor :members, :signature, :first_linker, :second_linker, :longnames - # return the 1st member whose name is name - def member(name) - @members.find { |m| m.name == name } - end + # return the 1st member whose name is name + def member(name) + @members.find { |m| m.name == name } + end + + def sizeof_half ; 2 ; end + def sizeof_word ; 4 ; end end end diff --git a/lib/metasm/metasm/exe_format/coff_decode.rb b/lib/metasm/metasm/exe_format/coff_decode.rb index 979fca8b0b..1dc388bafd 100644 --- a/lib/metasm/metasm/exe_format/coff_decode.rb +++ b/lib/metasm/metasm/exe_format/coff_decode.rb @@ -9,925 +9,926 @@ require 'metasm/exe_format/coff' unless defined? Metasm::COFF module Metasm class COFF - class OptionalHeader - decode_hook(:entrypoint) { |coff, ohdr| - coff.bitsize = (ohdr.signature == 'PE+' ? 64 : 32) - } - - # decodes a COFF optional header from coff.cursection - # also decodes directories in coff.directory - def decode(coff) - return set_default_values(coff) if coff.header.size_opthdr == 0 and not coff.header.characteristics.include?('EXECUTABLE_IMAGE') - off = coff.curencoded.ptr - super(coff) - nrva = (coff.header.size_opthdr - (coff.curencoded.ptr - off)) / 8 - nrva = @numrva if nrva < 0 - - if nrva > DIRECTORIES.length or nrva != @numrva - puts "W: COFF: Weird directories count #{@numrva}" if $VERBOSE - nrva = DIRECTORIES.length if nrva > DIRECTORIES.length - end - - coff.directory = {} - DIRECTORIES[0, nrva].each { |dir| - rva = coff.decode_word - sz = coff.decode_word - if rva != 0 or sz != 0 - coff.directory[dir] = [rva, sz] - end - } - end - end - - class Symbol - def decode(coff, strtab='') - n0, n1 = coff.decode_word, coff.decode_word - coff.encoded.ptr -= 8 - - super(coff) - - if n0 == 0 and ne = strtab.index(?\0, n1) - @name = strtab[n1...ne] - end - return if @nr_aux == 0 - - @aux = [] - @nr_aux.times { @aux << coff.encoded.read(18) } - end - end - - class Section - def decode(coff) - super(coff) - coff.decode_section_body(self) - end - end - - class RelocObj - def decode(coff) - super(coff) - @sym = coff.symbols[@symidx] - end - end - - class ExportDirectory - # decodes a COFF export table from coff.cursection - def decode(coff) - super(coff) - - if coff.sect_at_rva(@libname_p) - @libname = coff.decode_strz - end - - if coff.sect_at_rva(@func_p) - @exports = [] - addrs = [] - @num_exports.times { addrs << coff.decode_word } - @num_exports.times { |i| - e = Export.new - e.ordinal = i + @ordinal_base - addr = addrs[i] - if addr >= coff.directory['export_table'][0] and addr < coff.directory['export_table'][0] + coff.directory['export_table'][1] and coff.sect_at_rva(addr) - name = coff.decode_strz - e.forwarder_lib, name = name.split('.', 2) - if name[0] == ?# - e.forwarder_ordinal = name[1..-1].to_i - else - e.forwarder_name = name - end - else - e.target = e.target_rva = addr - end - @exports << e - } - end - if coff.sect_at_rva(@names_p) - namep = [] - num_names.times { namep << coff.decode_word } - end - if coff.sect_at_rva(@ord_p) - ords = [] - num_names.times { ords << coff.decode_half } - end - if namep and ords - namep.zip(ords).each { |np, oi| - @exports[oi].name_p = np - if coff.sect_at_rva(np) - @exports[oi].name = coff.decode_strz - end - } - end - end - end - - class ImportDirectory - # decodes all COFF import directories from coff.cursection - def self.decode_all(coff) - ret = [] - loop do - idata = decode(coff) - break if [idata.ilt_p, idata.libname_p].uniq == [0] - ret << idata - end - ret.each { |idata| idata.decode_inner(coff) } - ret - end - - # decode the tables referenced - def decode_inner(coff) - if coff.sect_at_rva(@libname_p) - @libname = coff.decode_strz - end - - if coff.sect_at_rva(@ilt_p) || coff.sect_at_rva(@iat_p) - addrs = [] - while (a_ = coff.decode_xword) != 0 - addrs << a_ - end - - @imports = [] - - ord_mask = 1 << (coff.bitsize-1) - addrs.each { |a| - i = Import.new - if (a & ord_mask) != 0 - i.ordinal = a & (~ord_mask) - else - i.hintname_p = a - if coff.sect_at_rva(a) - i.hint = coff.decode_half - i.name = coff.decode_strz - end - end - @imports << i - } - end - - if coff.sect_at_rva(@iat_p) - @iat = [] - while (a = coff.decode_xword) != 0 - @iat << a - end - end - end - end - - class ResourceDirectory - def decode(coff, edata = coff.curencoded, startptr = edata.ptr, maxdepth=3) - super(coff, edata) - - @entries = [] - - nrnames = @nr_names if $DEBUG - (@nr_names+@nr_id).times { - e = Entry.new - - e_id = coff.decode_word(edata) - e_ptr = coff.decode_word(edata) - - if not e_id.kind_of? Integer or not e_ptr.kind_of? Integer - puts 'W: COFF: relocs in the rsrc directory?' if $VERBOSE - next - end - - tmp = edata.ptr - - if (e_id >> 31) == 1 - if $DEBUG - nrnames -= 1 - puts "W: COFF: rsrc has invalid id #{e_id}" if nrnames < 0 - end - e.name_p = e_id & 0x7fff_ffff - edata.ptr = startptr + e.name_p - namelen = coff.decode_half(edata) - e.name_w = edata.read(2*namelen) - if (chrs = e.name_w.unpack('v*')).all? { |c| c >= 0 and c <= 255 } - e.name = chrs.pack('C*') - end - else - if $DEBUG - puts "W: COFF: rsrc has invalid id #{e_id}" if nrnames > 0 - end - e.id = e_id - end - - if (e_ptr >> 31) == 1 # subdir - e.subdir_p = e_ptr & 0x7fff_ffff - if startptr + e.subdir_p >= edata.length - puts 'W: COFF: invalid resource structure: directory too far' if $VERBOSE - elsif maxdepth > 0 - edata.ptr = startptr + e.subdir_p - e.subdir = ResourceDirectory.new - e.subdir.decode coff, edata, startptr, maxdepth-1 - else - puts 'W: COFF: recursive resource section' if $VERBOSE - end - else - e.dataentry_p = e_ptr - edata.ptr = startptr + e.dataentry_p - e.data_p = coff.decode_word(edata) - sz = coff.decode_word(edata) - e.codepage = coff.decode_word(edata) - e.reserved = coff.decode_word(edata) - - if coff.sect_at_rva(e.data_p) - e.data = coff.curencoded.read(sz) - else - puts 'W: COFF: invalid resource body offset' if $VERBOSE - break - end - end - - edata.ptr = tmp - @entries << e - } - end - - def decode_version(coff, lang=nil) - vers = {} - - decode_tllv = lambda { |ed, state| - sptr = ed.ptr - len, vlen = coff.decode_half(ed), coff.decode_half(ed) - coff.decode_half(ed) # type - tagname = '' - while c = coff.decode_half(ed) and c != 0 - tagname << (c&255) - end - ed.ptr = (ed.ptr + 3) / 4 * 4 - - case state - when 0 - raise if tagname != 'VS_VERSION_INFO' - dat = ed.read(vlen) - dat.unpack('V*').zip([:signature, :strucversion, :fileversionm, :fileversionl, :prodversionm, :prodversionl, :fileflagsmask, :fileflags, :fileos, :filetype, :filesubtype, :filedatem, :filedatel]) { |v, k| vers[k] = v } - raise if vers[:signature] != 0xfeef04bd - vers.delete :signature - vers[:fileversion] = (vers.delete(:fileversionm) << 32) | vers.delete(:fileversionl) - vers[:prodversion] = (vers.delete(:prodversionm) << 32) | vers.delete(:prodversionl) - vers[:filedate] = (vers.delete(:filedatem) << 32) | vers.delete(:filedatel) - nstate = 1 - when 1 - nstate = case tagname - when 'StringFileInfo'; :strtable - when 'VarFileInfo'; :var - else raise - end - when :strtable - nstate = :str - when :str - val = ed.read(vlen*2).unpack('v*') - val.pop if val[-1] == 0 - val = val.pack('C*') if val.all? { |c_| c_ > 0 and c_ < 256 } - vers[tagname] = val - when :var - val = ed.read(vlen).unpack('V*') - vers[tagname] = val - end - - ed.ptr = (ed.ptr + 3) / 4 * 4 - len = ed.length-sptr if len > ed.length-sptr - while ed.ptr < sptr+len - decode_tllv[ed, nstate] - ed.ptr = (ed.ptr + 3) / 4 * 4 - end - } - - return if not e = @entries.find { |e_| e_.id == TYPE.index('VERSION') } - e = e.subdir.entries.first.subdir - e = e.entries.find { |e_| e_.id == lang } || e.entries.first - ed = EncodedData.new(e.data) - decode_tllv[ed, 0] - - vers - #rescue - end - end - - class RelocationTable - # decodes a relocation table from coff.encoded.ptr - def decode(coff) - super(coff) - len = coff.decode_word - len -= 8 - if len < 0 or len % 2 != 0 - puts "W: COFF: Invalid relocation table length #{len+8}" if $VERBOSE - coff.curencoded.read(len) if len > 0 - @relocs = [] - return - end - - @relocs = coff.curencoded.read(len).unpack(coff.endianness == :big ? 'n*' : 'v*').map { |r| Relocation.new(r&0xfff, r>>12) } - #(len/2).times { @relocs << Relocation.decode(coff) } # tables may be big, this is too slow - end - end - - class TLSDirectory - def decode(coff) - super(coff) - - if coff.sect_at_va(@callback_p) - @callbacks = [] - while (ptr = coff.decode_xword) != 0 - # __stdcall void (*ptr)(void* dllhandle, dword reason, void* reserved) - # (same as dll entrypoint) - @callbacks << (ptr - coff.optheader.image_base) - end - end - end - end - - class LoadConfig - def decode(coff) - super(coff) - - if @sehcount >= 0 and @sehcount < 100 and (@signature == 0x40 or @signature == 0x48) and coff.sect_at_va(@sehtable_p) - @safeseh = [] - @sehcount.times { @safeseh << coff.decode_xword } - end - end - end - - class DelayImportDirectory - def self.decode_all(coff) - ret = [] - loop do - didata = decode(coff) - break if [didata.libname_p, didata.handle_p, didata.iat_p].uniq == [0] - ret << didata - end - ret.each { |didata| didata.decode_inner(coff) } - ret - end - - def decode_inner(coff) - if coff.sect_at_rva(@libname_p) - @libname = coff.decode_strz - end - # TODO - end - end - - class Cor20Header - def decode_all(coff) - if coff.sect_at_rva(@metadata_rva) - @metadata = coff.curencoded.read(@metadata_sz) - end - if coff.sect_at_rva(@resources_rva) - @resources = coff.curencoded.read(@resources_sz) - end - if coff.sect_at_rva(@strongnamesig_rva) - @strongnamesig = coff.curencoded.read(@strongnamesig_sz) - end - if coff.sect_at_rva(@codemgr_rva) - @codemgr = coff.curencoded.read(@codemgr_sz) - end - if coff.sect_at_rva(@vtfixup_rva) - @vtfixup = coff.curencoded.read(@vtfixup_sz) - end - if coff.sect_at_rva(@eatjumps_rva) - @eatjumps = coff.curencoded.read(@eatjumps_sz) - end - if coff.sect_at_rva(@managednativehdr_rva) - @managednativehdr = coff.curencoded.read(@managednativehdr_sz) - end - end - end - - class DebugDirectory - def decode_inner(coff) - case @type - when 'CODEVIEW' - # XXX what is @pointer? - return if not coff.sect_at_rva(@addr) - sig = coff.curencoded.read(4) - case sig - when 'NB09' # CodeView 4.10 - when 'NB10' # external pdb2.0 - @data = NB10.decode(coff) - when 'NB11' # CodeView 5.0 - when 'RSDS' # external pdb7.0 - @data = RSDS.decode(coff) - end - end - end - end - - attr_accessor :cursection - def curencoded - @cursection.encoded - end - - def decode_byte( edata = curencoded) ; edata.decode_imm(:u8, @endianness) end - def decode_half( edata = curencoded) ; edata.decode_imm(:u16, @endianness) end - def decode_word( edata = curencoded) ; edata.decode_imm(:u32, @endianness) end - def decode_xword(edata = curencoded) ; edata.decode_imm((@bitsize == 32 ? :u32 : :u64), @endianness) end - def decode_strz( edata = curencoded) ; super(edata) ; end - - # converts an RVA (offset from base address of file when loaded in memory) to the section containing it using the section table - # updates @cursection and @cursection.encoded.ptr to point to the specified address - # may return self when rva points to the coff header - # returns nil if none match, 0 never matches - def sect_at_rva(rva) - return if not rva or rva <= 0 - if sections and not @sections.empty? - if s = @sections.find { |s_| s_.virtaddr <= rva and s_.virtaddr + EncodedData.align_size((s_.virtsize == 0 ? s_.rawsize : s_.virtsize), @optheader.sect_align) > rva } - s.encoded.ptr = rva - s.virtaddr - @cursection = s - elsif rva < @sections.map { |s_| s_.virtaddr }.min - @encoded.ptr = rva - @cursection = self - end - elsif rva <= @encoded.length - @encoded.ptr = rva - @cursection = self - end - end - - def sect_at_va(va) - sect_at_rva(va - @optheader.image_base) - end - - def label_rva(name) - if name.kind_of? Integer - name - elsif s = @sections.find { |s_| s_.encoded.export[name] } - s.virtaddr + s.encoded.export[name] - else - @encoded.export[name] - end - end - - # address -> file offset - # handles LoadedPE - def addr_to_fileoff(addr) - addr -= @load_address ||= @optheader.image_base - return 0 if addr == 0 # sect_at_rva specialcases 0 - if s = sect_at_rva(addr) - if s.respond_to? :virtaddr - addr - s.virtaddr + s.rawaddr - else # header - addr - end - end - end - - # file offset -> memory address - # handles LoadedPE - def fileoff_to_addr(foff) - if s = @sections.find { |s_| s_.rawaddr <= foff and s_.rawaddr + s_.rawsize > foff } - s.virtaddr + foff - s.rawaddr + (@load_address ||= @optheader.image_base) - elsif foff >= 0 and foff < @optheader.headers_size - foff + (@load_address ||= @optheader.image_base) - end - end - - def each_section - if @header.size_opthdr == 0 and not @header.characteristics.include?('EXECUTABLE_IMAGE') - @sections.each { |s| - next if not s.encoded - l = new_label(s.name) - s.encoded.add_export(l, 0) - yield s.encoded, l - } - return - end - base = @optheader.image_base - base = 0 if not base.kind_of? Integer - sz = @optheader.headers_size - sz = EncodedData.align_size(@optheader.image_size, 4096) if @sections.empty? - yield @encoded[0, sz], base - @sections.each { |s| yield s.encoded, base + s.virtaddr } - end - - # decodes the COFF header, optional header, section headers - # marks entrypoint and directories as edata.expord - def decode_header - @cursection ||= self - @encoded.ptr ||= 0 - @sections = [] - @header.decode(self) - optoff = @encoded.ptr - @optheader.decode(self) - decode_symbols if @header.num_sym != 0 and not @header.characteristics.include? 'DEBUG_STRIPPED' - curencoded.ptr = optoff + @header.size_opthdr - decode_sections - if sect_at_rva(@optheader.entrypoint) - curencoded.add_export new_label('entrypoint') - end - (DIRECTORIES - ['certificate_table']).each { |d| - if @directory[d] and sect_at_rva(@directory[d][0]) - curencoded.add_export new_label(d) - end - } - end - - # decode the COFF symbol table (obj only) - def decode_symbols - endptr = @encoded.ptr = @header.ptr_sym + 18*@header.num_sym - strlen = decode_word - @encoded.ptr = endptr - strtab = @encoded.read(strlen) - @encoded.ptr = @header.ptr_sym - @symbols = [] - @header.num_sym.times { - break if @encoded.ptr >= endptr or @encoded.ptr >= @encoded.length - @symbols << Symbol.decode(self, strtab) - # keep the reloc.sym_idx accurate - @symbols.last.nr_aux.times { @symbols << nil } - } - end - - # decode the COFF sections - def decode_sections - @header.num_sect.times { - @sections << Section.decode(self) - } - # now decode COFF object relocations - @sections.each { |s| - next if s.relocnr == 0 - curencoded.ptr = s.relocaddr - s.relocs = [] - s.relocnr.times { s.relocs << RelocObj.decode(self) } - new_label 'pcrel' - s.relocs.each { |r| - case r.type - when 'DIR32' - s.encoded.reloc[r.va] = Metasm::Relocation.new(Expression[r.sym.name], :u32, @endianness) - when 'REL32' - l = new_label('pcrel') - s.encoded.add_export(l, r.va+4) - s.encoded.reloc[r.va] = Metasm::Relocation.new(Expression[r.sym.name, :-, l], :u32, @endianness) - end - } - } if not @header.characteristics.include?('RELOCS_STRIPPED') - symbols.to_a.compact.each { |sym| - next if not sym.sec_nr.kind_of? Integer - next if sym.storage != 'EXTERNAL' and (sym.storage != 'STATIC' or sym.value == 0) - next if not s = @sections[sym.sec_nr-1] - s.encoded.add_export new_label(sym.name), sym.value - } - end - - # decodes a section content (allows simpler LoadedPE override) - def decode_section_body(s) - raw = EncodedData.align_size(s.rawsize, @optheader.file_align) - virt = s.virtsize - virt = raw = s.rawsize if @header.size_opthdr == 0 - virt = raw if virt == 0 - virt = EncodedData.align_size(virt, @optheader.sect_align) - s.encoded = @encoded[s.rawaddr, [raw, virt].min] || EncodedData.new - s.encoded.virtsize = virt - end - - # decodes COFF export table from directory - # mark exported names as encoded.export - def decode_exports - if @directory['export_table'] and sect_at_rva(@directory['export_table'][0]) - @export = ExportDirectory.decode(self) - @export.exports.to_a.each { |e| - if e.name and sect_at_rva(e.target) - name = e.name - elsif e.ordinal and sect_at_rva(e.target) - name = "ord_#{@export.libname}_#{e.ordinal}" - end - e.target = curencoded.add_export new_label(name) if name - } - end - end - - # decodes COFF import tables from directory - # mark iat entries as encoded.export - def decode_imports - if @directory['import_table'] and sect_at_rva(@directory['import_table'][0]) - @imports = ImportDirectory.decode_all(self) - iatlen = @bitsize/8 - @imports.each { |id| - if sect_at_rva(id.iat_p) - ptr = curencoded.ptr - id.imports.each { |i| - if i.name - name = new_label i.name - elsif i.ordinal - name = new_label "ord_#{id.libname}_#{i.ordinal}" - end - if name - i.target ||= name - r = Metasm::Relocation.new(Expression[name], "u#@bitsize".to_sym, @endianness) - curencoded.reloc[ptr] = r - curencoded.add_export new_label('iat_'+name), ptr, true - end - ptr += iatlen - } - end - } - end - end - - # decodes resources from directory - def decode_resources - if @directory['resource_table'] and sect_at_rva(@directory['resource_table'][0]) - @resource = ResourceDirectory.decode(self) - end - end - - # decode the VERSION information from the resources (file version, os, copyright etc) - def decode_version(lang=0x409) - decode_resources if not resource - resource.decode_version(self, lang) - end - - # decodes certificate table - def decode_certificates - if ct = @directory['certificate_table'] - @certificates = [] - @cursection = self - if ct[0] > @encoded.length or ct[1] > @encoded.length - ct[0] - puts "W: COFF: invalid certificate_table #{'0x%X+0x%0X' % ct}" if $VERBOSE - ct = [ct[0], 1] - end - @encoded.ptr = ct[0] - off_end = ct[0]+ct[1] - off_end = @encoded.length if off_end > @encoded.length - while @encoded.ptr < off_end - certlen = decode_word - certrev = decode_half - certtype = decode_half - certdat = @encoded.read(certlen) - @certificates << [certrev, certtype, certdat] - end - end - end - - # decode the COM Cor20 header - def decode_com - if @directory['com_runtime'] and sect_at_rva(@directory['com_runtime'][0]) - @com_header = Cor20Header.decode(self) - if sect_at_rva(@com_header.entrypoint) - curencoded.add_export new_label('com_entrypoint') - end - @com_header.decode_all(self) - end - end - - # decode COFF relocation tables from directory - def decode_relocs - if @directory['base_relocation_table'] and sect_at_rva(@directory['base_relocation_table'][0]) - end_ptr = curencoded.ptr + @directory['base_relocation_table'][1] - @relocations = [] - while curencoded.ptr < end_ptr - @relocations << RelocationTable.decode(self) - end - - # interpret as EncodedData relocations - relocfunc = ('decode_reloc_' << @header.machine.downcase).to_sym - if not respond_to? relocfunc - puts "W: COFF: unsupported relocs for architecture #{@header.machine}" if $VERBOSE - return - end - @relocations.each { |rt| - rt.relocs.each { |r| - if s = sect_at_rva(rt.base_addr + r.offset) - e, p = s.encoded, s.encoded.ptr - rel = send(relocfunc, r) - e.reloc[p] = rel if rel - end - } - } - end - end - - # decodes an I386 COFF relocation pointing to encoded.ptr - def decode_reloc_i386(r) - case r.type - when 'ABSOLUTE' - when 'HIGHLOW' - addr = decode_word - if s = sect_at_va(addr) - label = label_at(s.encoded, s.encoded.ptr, "xref_#{Expression[addr]}") - Metasm::Relocation.new(Expression[label], :u32, @endianness) - end - when 'DIR64' - addr = decode_xword - if s = sect_at_va(addr) - label = label_at(s.encoded, s.encoded.ptr, "xref_#{Expression[addr]}") - Metasm::Relocation.new(Expression[label], :u64, @endianness) - end - else puts "W: COFF: Unsupported i386 relocation #{r.inspect}" if $VERBOSE - end - end - - def decode_reloc_amd64(r) - case r.type - when 'ABSOLUTE' - when 'HIGHLOW' - addr = decode_word - if s = sect_at_va(addr) - label = label_at(s.encoded, s.encoded.ptr, "xref_#{Expression[addr]}") - Metasm::Relocation.new(Expression[label], :u32, @endianness) - end - when 'DIR64' - addr = decode_xword - if s = sect_at_va(addr) - label = label_at(s.encoded, s.encoded.ptr, "xref_#{Expression[addr]}") - Metasm::Relocation.new(Expression[label], :u64, @endianness) - end - else puts "W: COFF: Unsupported amd64 relocation #{r.inspect}" if $VERBOSE - end - end - - def decode_debug - if dd = @directory['debug'] and sect_at_rva(dd[0]) - @debug = [] - p0 = curencoded.ptr - while curencoded.ptr < p0 + dd[1] - @debug << DebugDirectory.decode(self) - end - @debug.each { |dbg| dbg.decode_inner(self) } - end - end - - # decode TLS directory, including tls callback table - def decode_tls - if @directory['tls_table'] and sect_at_rva(@directory['tls_table'][0]) - @tls = TLSDirectory.decode(self) - if s = sect_at_va(@tls.callback_p) - s.encoded.add_export 'tls_callback_table' - @tls.callbacks.each_with_index { |cb, i| - @tls.callbacks[i] = curencoded.add_export "tls_callback_#{i}" if sect_at_rva(cb) - } - end - end - end - - def decode_loadconfig - if lc = @directory['load_config'] and sect_at_rva(lc[0]) - @loadconfig = LoadConfig.decode(self) - end - end - - def decode_delayimports - if di = @directory['delay_import_table'] and sect_at_rva(di[0]) - @delayimports = DelayImportDirectory.decode_all(self) - end - end - - - # decodes a COFF file (headers/exports/imports/relocs/sections) - # starts at encoded.ptr - def decode - decode_header - decode_exports - decode_imports - decode_resources - decode_certificates - decode_debug - decode_tls - decode_loadconfig - decode_delayimports - decode_com - decode_relocs unless nodecode_relocs or ENV['METASM_NODECODE_RELOCS'] # decode relocs last - end - - # returns a metasm CPU object corresponding to +header.machine+ - def cpu_from_headers - case @header.machine - when 'I386'; Ia32.new - when 'AMD64'; X86_64.new - when 'R4000'; MIPS.new(:little) - else raise "unknown cpu #{@header.machine}" - end - end - - # returns an array including the PE entrypoint and the exported functions entrypoints - # TODO filter out exported data, include safeseh ? - def get_default_entrypoints - ep = [] - ep.concat @tls.callbacks.to_a if tls - ep << (@optheader.image_base + label_rva(@optheader.entrypoint)) - @export.exports.to_a.each { |e| - next if e.forwarder_lib or not e.target - ep << (@optheader.image_base + label_rva(e.target)) - } if export - ep - end - - def dump_section_header(addr, edata) - s = @sections.find { |s_| s_.virtaddr == addr-@optheader.image_base } - s ? "\n.section #{s.name.inspect} base=#{Expression[addr]}" : - addr == @optheader.image_base ? "// exe header at #{Expression[addr]}" : super(addr, edata) - end - - # returns an array of [name, addr, length, info] - def section_info - [['header', @optheader.image_base, @optheader.headers_size, nil]] + - @sections.map { |s| - [s.name, @optheader.image_base + s.virtaddr, s.virtsize, s.characteristics.join(',')] - } - end + class OptionalHeader + decode_hook(:entrypoint) { |coff, ohdr| + coff.bitsize = (ohdr.signature == 'PE+' ? 64 : 32) + } + + # decodes a COFF optional header from coff.cursection + # also decodes directories in coff.directory + def decode(coff) + return set_default_values(coff) if coff.header.size_opthdr == 0 and not coff.header.characteristics.include?('EXECUTABLE_IMAGE') + off = coff.curencoded.ptr + super(coff) + nrva = (coff.header.size_opthdr - (coff.curencoded.ptr - off)) / 8 + nrva = @numrva if nrva < 0 + + if nrva > DIRECTORIES.length or nrva != @numrva + puts "W: COFF: Weird directories count #{@numrva}" if $VERBOSE + nrva = DIRECTORIES.length if nrva > DIRECTORIES.length + end + + coff.directory = {} + DIRECTORIES[0, nrva].each { |dir| + rva = coff.decode_word + sz = coff.decode_word + if rva != 0 or sz != 0 + coff.directory[dir] = [rva, sz] + end + } + end + end + + class Symbol + def decode(coff, strtab='') + n0, n1 = coff.decode_word, coff.decode_word + coff.encoded.ptr -= 8 + + super(coff) + + if n0 == 0 and ne = strtab.index(?\0, n1) + @name = strtab[n1...ne] + end + return if @nr_aux == 0 + + @aux = [] + @nr_aux.times { @aux << coff.encoded.read(18) } + end + end + + class Section + def decode(coff) + super(coff) + coff.decode_section_body(self) + end + end + + class RelocObj + def decode(coff) + super(coff) + @sym = coff.symbols[@symidx] + end + end + + class ExportDirectory + # decodes a COFF export table from coff.cursection + def decode(coff) + super(coff) + + if coff.sect_at_rva(@libname_p) + @libname = coff.decode_strz + end + + if coff.sect_at_rva(@func_p) + @exports = [] + addrs = [] + @num_exports.times { addrs << coff.decode_word } + @num_exports.times { |i| + e = Export.new + e.ordinal = i + @ordinal_base + addr = addrs[i] + if addr >= coff.directory['export_table'][0] and addr < coff.directory['export_table'][0] + coff.directory['export_table'][1] and coff.sect_at_rva(addr) + name = coff.decode_strz + e.forwarder_lib, name = name.split('.', 2) + if name[0] == ?# + e.forwarder_ordinal = name[1..-1].to_i + else + e.forwarder_name = name + end + else + e.target = e.target_rva = addr + end + @exports << e + } + end + if coff.sect_at_rva(@names_p) + namep = [] + num_names.times { namep << coff.decode_word } + end + if coff.sect_at_rva(@ord_p) + ords = [] + num_names.times { ords << coff.decode_half } + end + if namep and ords + namep.zip(ords).each { |np, oi| + @exports[oi].name_p = np + if coff.sect_at_rva(np) + @exports[oi].name = coff.decode_strz + end + } + end + end + end + + class ImportDirectory + # decodes all COFF import directories from coff.cursection + def self.decode_all(coff) + ret = [] + loop do + idata = decode(coff) + break if [idata.ilt_p, idata.libname_p].uniq == [0] + ret << idata + end + ret.each { |idata| idata.decode_inner(coff) } + ret + end + + # decode the tables referenced + def decode_inner(coff) + if coff.sect_at_rva(@libname_p) + @libname = coff.decode_strz + end + + if coff.sect_at_rva(@ilt_p) || coff.sect_at_rva(@iat_p) + addrs = [] + while (a_ = coff.decode_xword) != 0 + addrs << a_ + end + + @imports = [] + + ord_mask = 1 << (coff.bitsize-1) + addrs.each { |a| + i = Import.new + if (a & ord_mask) != 0 + i.ordinal = a & (~ord_mask) + else + i.hintname_p = a + if coff.sect_at_rva(a) + i.hint = coff.decode_half + i.name = coff.decode_strz + end + end + @imports << i + } + end + + if coff.sect_at_rva(@iat_p) + @iat = [] + while (a = coff.decode_xword) != 0 + @iat << a + end + end + end + end + + class ResourceDirectory + def decode(coff, edata = coff.curencoded, startptr = edata.ptr, maxdepth=3) + super(coff, edata) + + @entries = [] + + nrnames = @nr_names if $DEBUG + (@nr_names+@nr_id).times { + e = Entry.new + + e_id = coff.decode_word(edata) + e_ptr = coff.decode_word(edata) + + if not e_id.kind_of? Integer or not e_ptr.kind_of? Integer + puts 'W: COFF: relocs in the rsrc directory?' if $VERBOSE + next + end + + tmp = edata.ptr + + if (e_id >> 31) == 1 + if $DEBUG + nrnames -= 1 + puts "W: COFF: rsrc has invalid id #{e_id}" if nrnames < 0 + end + e.name_p = e_id & 0x7fff_ffff + edata.ptr = startptr + e.name_p + namelen = coff.decode_half(edata) + e.name_w = edata.read(2*namelen) + if (chrs = e.name_w.unpack('v*')).all? { |c| c >= 0 and c <= 255 } + e.name = chrs.pack('C*') + end + else + if $DEBUG + puts "W: COFF: rsrc has invalid id #{e_id}" if nrnames > 0 + end + e.id = e_id + end + + if (e_ptr >> 31) == 1 # subdir + e.subdir_p = e_ptr & 0x7fff_ffff + if startptr + e.subdir_p >= edata.length + puts 'W: COFF: invalid resource structure: directory too far' if $VERBOSE + elsif maxdepth > 0 + edata.ptr = startptr + e.subdir_p + e.subdir = ResourceDirectory.new + e.subdir.decode coff, edata, startptr, maxdepth-1 + else + puts 'W: COFF: recursive resource section' if $VERBOSE + end + else + e.dataentry_p = e_ptr + edata.ptr = startptr + e.dataentry_p + e.data_p = coff.decode_word(edata) + sz = coff.decode_word(edata) + e.codepage = coff.decode_word(edata) + e.reserved = coff.decode_word(edata) + + if coff.sect_at_rva(e.data_p) + e.data = coff.curencoded.read(sz) + else + puts 'W: COFF: invalid resource body offset' if $VERBOSE + break + end + end + + edata.ptr = tmp + @entries << e + } + end + + def decode_version(coff, lang=nil) + vers = {} + + decode_tllv = lambda { |ed, state| + sptr = ed.ptr + len, vlen = coff.decode_half(ed), coff.decode_half(ed) + coff.decode_half(ed) # type + tagname = '' + while c = coff.decode_half(ed) and c != 0 + tagname << (c&255) + end + ed.ptr = (ed.ptr + 3) / 4 * 4 + + case state + when 0 + raise if tagname != 'VS_VERSION_INFO' + dat = ed.read(vlen) + dat.unpack('V*').zip([:signature, :strucversion, :fileversionm, :fileversionl, :prodversionm, :prodversionl, :fileflagsmask, :fileflags, :fileos, :filetype, :filesubtype, :filedatem, :filedatel]) { |v, k| vers[k] = v } + raise if vers[:signature] != 0xfeef04bd + vers.delete :signature + vers[:fileversion] = (vers.delete(:fileversionm) << 32) | vers.delete(:fileversionl) + vers[:prodversion] = (vers.delete(:prodversionm) << 32) | vers.delete(:prodversionl) + vers[:filedate] = (vers.delete(:filedatem) << 32) | vers.delete(:filedatel) + nstate = 1 + when 1 + nstate = case tagname + when 'StringFileInfo'; :strtable + when 'VarFileInfo'; :var + else raise + end + when :strtable + nstate = :str + when :str + val = ed.read(vlen*2).unpack('v*') + val.pop if val[-1] == 0 + val = val.pack('C*') if val.all? { |c_| c_ > 0 and c_ < 256 } + vers[tagname] = val + when :var + val = ed.read(vlen).unpack('V*') + vers[tagname] = val + end + + ed.ptr = (ed.ptr + 3) / 4 * 4 + len = ed.length-sptr if len > ed.length-sptr + while ed.ptr < sptr+len + decode_tllv[ed, nstate] + ed.ptr = (ed.ptr + 3) / 4 * 4 + end + } + + return if not e = @entries.find { |e_| e_.id == TYPE.index('VERSION') } + e = e.subdir.entries.first.subdir + e = e.entries.find { |e_| e_.id == lang } || e.entries.first + ed = EncodedData.new(e.data) + decode_tllv[ed, 0] + + vers + #rescue + end + end + + class RelocationTable + # decodes a relocation table from coff.encoded.ptr + def decode(coff) + super(coff) + len = coff.decode_word + len -= 8 + if len < 0 or len % 2 != 0 + puts "W: COFF: Invalid relocation table length #{len+8}" if $VERBOSE + coff.curencoded.read(len) if len > 0 + @relocs = [] + return + end + + @relocs = coff.curencoded.read(len).unpack(coff.endianness == :big ? 'n*' : 'v*').map { |r| Relocation.new(r&0xfff, r>>12) } + #(len/2).times { @relocs << Relocation.decode(coff) } # tables may be big, this is too slow + end + end + + class TLSDirectory + def decode(coff) + super(coff) + + if coff.sect_at_va(@callback_p) + @callbacks = [] + while (ptr = coff.decode_xword) != 0 + # __stdcall void (*ptr)(void* dllhandle, dword reason, void* reserved) + # (same as dll entrypoint) + @callbacks << (ptr - coff.optheader.image_base) + end + end + end + end + + class LoadConfig + def decode(coff) + super(coff) + + if @sehcount >= 0 and @sehcount < 100 and (@signature == 0x40 or @signature == 0x48) and coff.sect_at_va(@sehtable_p) + @safeseh = [] + @sehcount.times { @safeseh << coff.decode_xword } + end + end + end + + class DelayImportDirectory + def self.decode_all(coff) + ret = [] + loop do + didata = decode(coff) + break if [didata.libname_p, didata.handle_p, didata.iat_p].uniq == [0] + ret << didata + end + ret.each { |didata| didata.decode_inner(coff) } + ret + end + + def decode_inner(coff) + if coff.sect_at_rva(@libname_p) + @libname = coff.decode_strz + end + # TODO + end + end + + class Cor20Header + def decode_all(coff) + if coff.sect_at_rva(@metadata_rva) + @metadata = coff.curencoded.read(@metadata_sz) + end + if coff.sect_at_rva(@resources_rva) + @resources = coff.curencoded.read(@resources_sz) + end + if coff.sect_at_rva(@strongnamesig_rva) + @strongnamesig = coff.curencoded.read(@strongnamesig_sz) + end + if coff.sect_at_rva(@codemgr_rva) + @codemgr = coff.curencoded.read(@codemgr_sz) + end + if coff.sect_at_rva(@vtfixup_rva) + @vtfixup = coff.curencoded.read(@vtfixup_sz) + end + if coff.sect_at_rva(@eatjumps_rva) + @eatjumps = coff.curencoded.read(@eatjumps_sz) + end + if coff.sect_at_rva(@managednativehdr_rva) + @managednativehdr = coff.curencoded.read(@managednativehdr_sz) + end + end + end + + class DebugDirectory + def decode_inner(coff) + case @type + when 'CODEVIEW' + # XXX what is @pointer? + return if not coff.sect_at_rva(@addr) + sig = coff.curencoded.read(4) + case sig + when 'NB09' # CodeView 4.10 + when 'NB10' # external pdb2.0 + @data = NB10.decode(coff) + when 'NB11' # CodeView 5.0 + when 'RSDS' # external pdb7.0 + @data = RSDS.decode(coff) + end + end + end + end + + attr_accessor :cursection + def curencoded + @cursection.encoded + end + + def decode_byte( edata = curencoded) ; edata.decode_imm(:u8, @endianness) end + def decode_half( edata = curencoded) ; edata.decode_imm(:u16, @endianness) end + def decode_word( edata = curencoded) ; edata.decode_imm(:u32, @endianness) end + def decode_xword(edata = curencoded) ; edata.decode_imm((@bitsize == 32 ? :u32 : :u64), @endianness) end + def decode_strz( edata = curencoded) ; super(edata) ; end + + # converts an RVA (offset from base address of file when loaded in memory) to the section containing it using the section table + # updates @cursection and @cursection.encoded.ptr to point to the specified address + # may return self when rva points to the coff header + # returns nil if none match, 0 never matches + def sect_at_rva(rva) + return if not rva or rva <= 0 + if sections and not @sections.empty? + if s = @sections.find { |s_| s_.virtaddr <= rva and s_.virtaddr + EncodedData.align_size((s_.virtsize == 0 ? s_.rawsize : s_.virtsize), @optheader.sect_align) > rva } + s.encoded.ptr = rva - s.virtaddr + @cursection = s + elsif rva < @sections.map { |s_| s_.virtaddr }.min + @encoded.ptr = rva + @cursection = self + end + elsif rva <= @encoded.length + @encoded.ptr = rva + @cursection = self + end + end + + def sect_at_va(va) + sect_at_rva(va - @optheader.image_base) + end + + def label_rva(name) + if name.kind_of? Integer + name + elsif s = @sections.find { |s_| s_.encoded.export[name] } + s.virtaddr + s.encoded.export[name] + else + @encoded.export[name] + end + end + + # address -> file offset + # handles LoadedPE + def addr_to_fileoff(addr) + addr -= @load_address ||= @optheader.image_base + return 0 if addr == 0 # sect_at_rva specialcases 0 + if s = sect_at_rva(addr) + if s.respond_to? :virtaddr + addr - s.virtaddr + s.rawaddr + else # header + addr + end + end + end + + # file offset -> memory address + # handles LoadedPE + def fileoff_to_addr(foff) + if s = @sections.find { |s_| s_.rawaddr <= foff and s_.rawaddr + s_.rawsize > foff } + s.virtaddr + foff - s.rawaddr + (@load_address ||= @optheader.image_base) + elsif foff >= 0 and foff < @optheader.headers_size + foff + (@load_address ||= @optheader.image_base) + end + end + + def each_section + if @header.size_opthdr == 0 and not @header.characteristics.include?('EXECUTABLE_IMAGE') + @sections.each { |s| + next if not s.encoded + l = new_label(s.name) + s.encoded.add_export(l, 0) + yield s.encoded, l + } + return + end + base = @optheader.image_base + base = 0 if not base.kind_of? Integer + sz = @optheader.headers_size + sz = EncodedData.align_size(@optheader.image_size, 4096) if @sections.empty? + yield @encoded[0, sz], base + @sections.each { |s| yield s.encoded, base + s.virtaddr } + end + + # decodes the COFF header, optional header, section headers + # marks entrypoint and directories as edata.expord + def decode_header + @cursection ||= self + @encoded.ptr ||= 0 + @sections = [] + @header.decode(self) + optoff = @encoded.ptr + @optheader.decode(self) + decode_symbols if @header.num_sym != 0 and not @header.characteristics.include? 'DEBUG_STRIPPED' + curencoded.ptr = optoff + @header.size_opthdr + decode_sections + if sect_at_rva(@optheader.entrypoint) + curencoded.add_export new_label('entrypoint') + end + (DIRECTORIES - ['certificate_table']).each { |d| + if @directory[d] and sect_at_rva(@directory[d][0]) + curencoded.add_export new_label(d) + end + } + end + + # decode the COFF symbol table (obj only) + def decode_symbols + endptr = @encoded.ptr = @header.ptr_sym + 18*@header.num_sym + strlen = decode_word + @encoded.ptr = endptr + strtab = @encoded.read(strlen) + @encoded.ptr = @header.ptr_sym + @symbols = [] + @header.num_sym.times { + break if @encoded.ptr >= endptr or @encoded.ptr >= @encoded.length + @symbols << Symbol.decode(self, strtab) + # keep the reloc.sym_idx accurate + @symbols.last.nr_aux.times { @symbols << nil } + } + end + + # decode the COFF sections + def decode_sections + @header.num_sect.times { + @sections << Section.decode(self) + } + # now decode COFF object relocations + @sections.each { |s| + next if s.relocnr == 0 + curencoded.ptr = s.relocaddr + s.relocs = [] + s.relocnr.times { s.relocs << RelocObj.decode(self) } + new_label 'pcrel' + s.relocs.each { |r| + case r.type + when 'DIR32' + s.encoded.reloc[r.va] = Metasm::Relocation.new(Expression[r.sym.name], :u32, @endianness) + when 'REL32' + l = new_label('pcrel') + s.encoded.add_export(l, r.va+4) + s.encoded.reloc[r.va] = Metasm::Relocation.new(Expression[r.sym.name, :-, l], :u32, @endianness) + end + } + } if not @header.characteristics.include?('RELOCS_STRIPPED') + symbols.to_a.compact.each { |sym| + next if not sym.sec_nr.kind_of? Integer + next if sym.storage != 'EXTERNAL' and (sym.storage != 'STATIC' or sym.value == 0) + next if not s = @sections[sym.sec_nr-1] + s.encoded.add_export new_label(sym.name), sym.value + } + end + + # decodes a section content (allows simpler LoadedPE override) + def decode_section_body(s) + raw = EncodedData.align_size(s.rawsize, @optheader.file_align) + virt = s.virtsize + virt = raw = s.rawsize if @header.size_opthdr == 0 + virt = raw if virt == 0 + virt = EncodedData.align_size(virt, @optheader.sect_align) + s.encoded = @encoded[s.rawaddr, [raw, virt].min] || EncodedData.new + s.encoded.virtsize = virt + end + + # decodes COFF export table from directory + # mark exported names as encoded.export + def decode_exports + if @directory['export_table'] and sect_at_rva(@directory['export_table'][0]) + @export = ExportDirectory.decode(self) + @export.exports.to_a.each { |e| + if e.name and sect_at_rva(e.target) + name = e.name + elsif e.ordinal and sect_at_rva(e.target) + name = "ord_#{@export.libname}_#{e.ordinal}" + end + e.target = curencoded.add_export new_label(name) if name + } + end + end + + # decodes COFF import tables from directory + # mark iat entries as encoded.export + def decode_imports + if @directory['import_table'] and sect_at_rva(@directory['import_table'][0]) + @imports = ImportDirectory.decode_all(self) + iatlen = @bitsize/8 + @imports.each { |id| + if sect_at_rva(id.iat_p) + ptr = curencoded.ptr + id.imports.each { |i| + if i.name + name = new_label i.name + elsif i.ordinal + name = new_label "ord_#{id.libname}_#{i.ordinal}" + end + if name + i.target ||= name + r = Metasm::Relocation.new(Expression[name], "u#@bitsize".to_sym, @endianness) + curencoded.reloc[ptr] = r + curencoded.add_export new_label('iat_'+name), ptr, true + end + ptr += iatlen + } + end + } + end + end + + # decodes resources from directory + def decode_resources + if @directory['resource_table'] and sect_at_rva(@directory['resource_table'][0]) + @resource = ResourceDirectory.decode(self) + end + end + + # decode the VERSION information from the resources (file version, os, copyright etc) + def decode_version(lang=0x409) + decode_resources if not resource + resource.decode_version(self, lang) + end + + # decodes certificate table + def decode_certificates + if ct = @directory['certificate_table'] + @certificates = [] + @cursection = self + if ct[0] > @encoded.length or ct[1] > @encoded.length - ct[0] + puts "W: COFF: invalid certificate_table #{'0x%X+0x%0X' % ct}" if $VERBOSE + ct = [ct[0], 1] + end + @encoded.ptr = ct[0] + off_end = ct[0]+ct[1] + off_end = @encoded.length if off_end > @encoded.length + while @encoded.ptr < off_end + certlen = decode_word + certrev = decode_half + certtype = decode_half + certdat = @encoded.read(certlen) + @certificates << [certrev, certtype, certdat] + end + end + end + + # decode the COM Cor20 header + def decode_com + if @directory['com_runtime'] and sect_at_rva(@directory['com_runtime'][0]) + @com_header = Cor20Header.decode(self) + if sect_at_rva(@com_header.entrypoint) + curencoded.add_export new_label('com_entrypoint') + end + @com_header.decode_all(self) + end + end + + # decode COFF relocation tables from directory + def decode_relocs + if @directory['base_relocation_table'] and sect_at_rva(@directory['base_relocation_table'][0]) + end_ptr = curencoded.ptr + @directory['base_relocation_table'][1] + @relocations = [] + while curencoded.ptr < end_ptr + @relocations << RelocationTable.decode(self) + end + + # interpret as EncodedData relocations + relocfunc = ('decode_reloc_' << @header.machine.downcase).to_sym + if not respond_to? relocfunc + puts "W: COFF: unsupported relocs for architecture #{@header.machine}" if $VERBOSE + return + end + @relocations.each { |rt| + rt.relocs.each { |r| + if s = sect_at_rva(rt.base_addr + r.offset) + e, p = s.encoded, s.encoded.ptr + rel = send(relocfunc, r) + e.reloc[p] = rel if rel + end + } + } + end + end + + # decodes an I386 COFF relocation pointing to encoded.ptr + def decode_reloc_i386(r) + case r.type + when 'ABSOLUTE' + when 'HIGHLOW' + addr = decode_word + if s = sect_at_va(addr) + label = label_at(s.encoded, s.encoded.ptr, "xref_#{Expression[addr]}") + Metasm::Relocation.new(Expression[label], :u32, @endianness) + end + when 'DIR64' + addr = decode_xword + if s = sect_at_va(addr) + label = label_at(s.encoded, s.encoded.ptr, "xref_#{Expression[addr]}") + Metasm::Relocation.new(Expression[label], :u64, @endianness) + end + else puts "W: COFF: Unsupported i386 relocation #{r.inspect}" if $VERBOSE + end + end + + def decode_reloc_amd64(r) + case r.type + when 'ABSOLUTE' + when 'HIGHLOW' + addr = decode_word + if s = sect_at_va(addr) + label = label_at(s.encoded, s.encoded.ptr, "xref_#{Expression[addr]}") + Metasm::Relocation.new(Expression[label], :u32, @endianness) + end + when 'DIR64' + addr = decode_xword + if s = sect_at_va(addr) + label = label_at(s.encoded, s.encoded.ptr, "xref_#{Expression[addr]}") + Metasm::Relocation.new(Expression[label], :u64, @endianness) + end + else puts "W: COFF: Unsupported amd64 relocation #{r.inspect}" if $VERBOSE + end + end + + def decode_debug + if dd = @directory['debug'] and sect_at_rva(dd[0]) + @debug = [] + p0 = curencoded.ptr + while curencoded.ptr < p0 + dd[1] + @debug << DebugDirectory.decode(self) + end + @debug.each { |dbg| dbg.decode_inner(self) } + end + end + + # decode TLS directory, including tls callback table + def decode_tls + if @directory['tls_table'] and sect_at_rva(@directory['tls_table'][0]) + @tls = TLSDirectory.decode(self) + if s = sect_at_va(@tls.callback_p) + s.encoded.add_export 'tls_callback_table' + @tls.callbacks.each_with_index { |cb, i| + @tls.callbacks[i] = curencoded.add_export "tls_callback_#{i}" if sect_at_rva(cb) + } + end + end + end + + def decode_loadconfig + if lc = @directory['load_config'] and sect_at_rva(lc[0]) + @loadconfig = LoadConfig.decode(self) + end + end + + def decode_delayimports + if di = @directory['delay_import_table'] and sect_at_rva(di[0]) + @delayimports = DelayImportDirectory.decode_all(self) + end + end + + + # decodes a COFF file (headers/exports/imports/relocs/sections) + # starts at encoded.ptr + def decode + decode_header + decode_exports + decode_imports + decode_resources + decode_certificates + decode_debug + decode_tls + decode_loadconfig + decode_delayimports + decode_com + decode_relocs unless nodecode_relocs or ENV['METASM_NODECODE_RELOCS'] # decode relocs last + end + + # returns a metasm CPU object corresponding to +header.machine+ + def cpu_from_headers + case @header.machine + when 'I386'; Ia32.new + when 'AMD64'; X86_64.new + when 'R4000'; MIPS.new(:little) + else raise "unknown cpu #{@header.machine}" + end + end + + # returns an array including the PE entrypoint and the exported functions entrypoints + # TODO filter out exported data, include safeseh ? + def get_default_entrypoints + ep = [] + ep.concat @tls.callbacks.to_a if tls + ep << (@optheader.image_base + label_rva(@optheader.entrypoint)) + @export.exports.to_a.each { |e| + next if e.forwarder_lib or not e.target + ep << (@optheader.image_base + label_rva(e.target)) + } if export + ep + end + + def dump_section_header(addr, edata) + s = @sections.find { |s_| s_.virtaddr == addr-@optheader.image_base } + s ? "\n.section #{s.name.inspect} base=#{Expression[addr]}" : + addr == @optheader.image_base ? "// exe header at #{Expression[addr]}" : super(addr, edata) + end + + # returns an array of [name, addr, length, info] + def section_info + [['header', @optheader.image_base, @optheader.headers_size, nil]] + + @sections.map { |s| + [s.name, @optheader.image_base + s.virtaddr, s.virtsize, s.characteristics.join(',')] + } + end end class COFFArchive - class Member - def decode(ar) - @offset = ar.encoded.ptr + class Member + def decode(ar) + @offset = ar.encoded.ptr - super(ar) - raise 'bad member header' + self.inspect if @eoh != "`\n" + super(ar) + raise 'bad member header' + self.inspect if @eoh != "`\n" - @name.strip! - @date = @date.to_i - @uid = @uid.to_i - @gid = @gid.to_i - @mode = @mode.to_i(8) - @size = @size.to_i + @name.strip! + @date = @date.to_i + @uid = @uid.to_i + @gid = @gid.to_i + @mode = @mode.to_i(8) + @size = @size.to_i - @encoded = ar.encoded[ar.encoded.ptr, @size] - ar.encoded.ptr += @size - ar.encoded.ptr += 1 if @size & 1 == 1 - end + @encoded = ar.encoded[ar.encoded.ptr, @size] + ar.encoded.ptr += @size + ar.encoded.ptr += 1 if @size & 1 == 1 + end - def decode_half ; @encoded.decode_imm(:u16, :big) end - def decode_word ; @encoded.decode_imm(:u32, :big) end + # TODO XXX are those actually used ? + def decode_half ; @encoded.decode_imm(:u16, :big) end + def decode_word ; @encoded.decode_imm(:u32, :big) end - def exe; AutoExe.decode(@encoded) ; end - end + def exe; AutoExe.decode(@encoded) ; end + end - def decode_half(edata = @encoded) ; edata.decode_imm(:u16, :little) end - def decode_word(edata = @encoded) ; edata.decode_imm(:u32, :little) end - def decode_strz(edata = @encoded) - i = edata.data.index(?\0, edata.ptr) || edata.data.index(?\n, edata.ptr) || (edata.length+1) - edata.read(i+1-edata.ptr).chop - end + def decode_half(edata = @encoded) ; edata.decode_imm(:u16, :little) end + def decode_word(edata = @encoded) ; edata.decode_imm(:u32, :little) end + def decode_strz(edata = @encoded) + i = edata.data.index(?\0, edata.ptr) || edata.data.index(?\n, edata.ptr) || (edata.length+1) + edata.read(i+1-edata.ptr).chop + end - def decode_first_linker(m) - offsets = [] - names = [] - m.encoded.ptr = 0 - numsym = m.decode_word - numsym.times { offsets << m.decode_word } - numsym.times { names << decode_strz(m.encoded) } + def decode_first_linker(m) + offsets = [] + names = [] + m.encoded.ptr = 0 + numsym = m.decode_word + numsym.times { offsets << m.decode_word } + numsym.times { names << decode_strz(m.encoded) } - # names[42] is found in object at file offset offsets[42] - # offsets are sorted by object index (all syms from 1st object, then 2nd etc) + # names[42] is found in object at file offset offsets[42] + # offsets are sorted by object index (all syms from 1st object, then 2nd etc) - @first_linker = names.zip(offsets) #.inject({}) { |h, (n, o)| h.update n => o } - end + @first_linker = names.zip(offsets) #.inject({}) { |h, (n, o)| h.update n => o } + end - def decode_second_linker(m) - names = [] - mboffsets = [] - indices = [] - m = @members[1] - m.encoded.ptr = 0 - nummb = decode_word(m.encoded) - nummb.times { mboffsets << decode_word(m.encoded) } - numsym = decode_word(m.encoded) - numsym.times { indices << decode_half(m.encoded) } - numsym.times { names << decode_strz(m.encoded) } + def decode_second_linker(m) + names = [] + mboffsets = [] + indices = [] + m = @members[1] + m.encoded.ptr = 0 + nummb = decode_word(m.encoded) + nummb.times { mboffsets << decode_word(m.encoded) } + numsym = decode_word(m.encoded) + numsym.times { indices << decode_half(m.encoded) } + numsym.times { names << decode_strz(m.encoded) } - # names[42] is found in object at file offset mboffsets[indices[42]] - # symbols sorted by symbol name (supposed to be more efficient, but no index into string table...) + # names[42] is found in object at file offset mboffsets[indices[42]] + # symbols sorted by symbol name (supposed to be more efficient, but no index into string table...) - #names.zip(indices).inject({}) { |h, (n, i)| h.update n => mboffsets[i] } - @second_linker = [names, mboffsets, indices] - end + #names.zip(indices).inject({}) { |h, (n, i)| h.update n => mboffsets[i] } + @second_linker = [names, mboffsets, indices] + end - def decode_longnames(m) - @longnames = m.encoded - end + def decode_longnames(m) + @longnames = m.encoded + end - # set real name to archive members - # look it up in the name table member if needed, or just remove the trailing / - def fixup_names - @members.each { |m| - case m.name - when '/' - when '//' - when /^\/(\d+)/ - @longnames.ptr = $1.to_i - m.name = decode_strz(@longnames).chomp("/") - else m.name.chomp! "/" - end - } - end + # set real name to archive members + # look it up in the name table member if needed, or just remove the trailing / + def fixup_names + @members.each { |m| + case m.name + when '/' + when '//' + when /^\/(\d+)/ + @longnames.ptr = $1.to_i + m.name = decode_strz(@longnames).chomp("/") + else m.name.chomp! "/" + end + } + end - def decode - @encoded.ptr = 0 - @signature = @encoded.read(8) - raise InvalidExeFormat, "Invalid COFF Archive signature #{@signature.inspect}" if @signature != "!\n" - @members = [] - while @encoded.ptr < @encoded.virtsize - @members << Member.decode(self) - end - @members.each { |m| - case m.name - when '/'; @first_linker ? decode_second_linker(m) : decode_first_linker(m) - when '//'; decode_longnames(m) - else break - end - } - fixup_names - end + def decode + @encoded.ptr = 0 + @signature = @encoded.read(8) + raise InvalidExeFormat, "Invalid COFF Archive signature #{@signature.inspect}" if @signature != "!\n" + @members = [] + while @encoded.ptr < @encoded.virtsize + @members << Member.decode(self) + end + @members.each { |m| + case m.name + when '/'; @first_linker ? decode_second_linker(m) : decode_first_linker(m) + when '//'; decode_longnames(m) + else break + end + } + fixup_names + end end end diff --git a/lib/metasm/metasm/exe_format/coff_encode.rb b/lib/metasm/metasm/exe_format/coff_encode.rb index 2f449f9e6d..eb8ab20b0f 100644 --- a/lib/metasm/metasm/exe_format/coff_encode.rb +++ b/lib/metasm/metasm/exe_format/coff_encode.rb @@ -9,1068 +9,1068 @@ require 'metasm/exe_format/coff' unless defined? Metasm::COFF module Metasm class COFF - class OptionalHeader - # encodes an Optional header and the directories - def encode(coff) - opth = super(coff) - - DIRECTORIES[0, @numrva].each { |d| - if d = coff.directory[d] - d = d.dup - d[0] = Expression[d[0], :-, coff.label_at(coff.encoded, 0)] if d[0].kind_of?(::String) - else - d = [0, 0] - end - opth << coff.encode_word(d[0]) << coff.encode_word(d[1]) - } - - opth - end - - # find good default values for optheader members, based on coff.sections - def set_default_values(coff) - @signature ||= (coff.bitsize == 64 ? 'PE+' : 'PE') - @link_ver_maj ||= 1 - @link_ver_min ||= 0 - @sect_align ||= 0x1000 - align = lambda { |sz| EncodedData.align_size(sz, @sect_align) } - @code_size ||= coff.sections.find_all { |s| s.characteristics.include? 'CONTAINS_CODE' }.inject(0) { |sum, s| sum + align[s.virtsize] } - @data_size ||= coff.sections.find_all { |s| s.characteristics.include? 'CONTAINS_DATA' }.inject(0) { |sum, s| sum + align[s.virtsize] } - @udata_size ||= coff.sections.find_all { |s| s.characteristics.include? 'CONTAINS_UDATA' }.inject(0) { |sum, s| sum + align[s.virtsize] } - @entrypoint = Expression[@entrypoint, :-, coff.label_at(coff.encoded, 0)] if entrypoint and not @entrypoint.kind_of?(::Integer) - tmp = coff.sections.find { |s| s.characteristics.include? 'CONTAINS_CODE' } - @base_of_code ||= (tmp ? Expression[coff.label_at(tmp.encoded, 0), :-, coff.label_at(coff.encoded, 0)] : 0) - tmp = coff.sections.find { |s| s.characteristics.include? 'CONTAINS_DATA' } - @base_of_data ||= (tmp ? Expression[coff.label_at(tmp.encoded, 0), :-, coff.label_at(coff.encoded, 0)] : 0) - @file_align ||= 0x200 - @os_ver_maj ||= 4 - @subsys_maj ||= 4 - @stack_reserve||= 0x100000 - @stack_commit ||= 0x1000 - @heap_reserve ||= 0x100000 - @heap_commit ||= 0x1000 - @numrva ||= DIRECTORIES.length - - super(coff) - end - end - - class Section - # find good default values for section header members, defines rawaddr/rawsize as new_label for later fixup - def set_default_values(coff) - @name ||= '' - @virtsize ||= @encoded.virtsize - @virtaddr ||= Expression[coff.label_at(@encoded, 0, 'sect_start'), :-, coff.label_at(coff.encoded, 0)] - @rawsize ||= coff.new_label('sect_rawsize') - @rawaddr ||= coff.new_label('sect_rawaddr') - - super(coff) - end - end - - class ExportDirectory - # encodes an export directory - def encode(coff) - edata = {} - %w[edata addrtable namptable ord_table libname nametable].each { |name| - edata[name] = EncodedData.new - } - label = lambda { |n| coff.label_at(edata[n], 0, n) } - rva = lambda { |n| Expression[label[n], :-, coff.label_at(coff.encoded, 0)] } - rva_end = lambda { |n| Expression[[label[n], :-, coff.label_at(coff.encoded, 0)], :+, edata[n].virtsize] } - - # ordinal base: smallest number > 1 to honor ordinals, minimize gaps - olist = @exports.map { |e| e.ordinal }.compact - # start with lowest ordinal, substract all exports unused to fill ordinal sequence gaps - omin = olist.min.to_i - gaps = olist.empty? ? 0 : olist.max+1 - olist.min - olist.length - noord = @exports.length - olist.length - @ordinal_base ||= [omin - (noord - gaps), 1].max - - @libname_p = rva['libname'] - @num_exports = [@exports.length, @exports.map { |e| e.ordinal }.compact.max.to_i - @ordinal_base].max - @num_names = @exports.find_all { |e| e.name }.length - @func_p = rva['addrtable'] - @names_p = rva['namptable'] - @ord_p = rva['ord_table'] - - edata['edata'] << super(coff) - - edata['libname'] << @libname << 0 - - elist = @exports.find_all { |e| e.name and not e.ordinal }.sort_by { |e| e.name } - @exports.find_all { |e| e.ordinal }.sort_by { |e| e.ordinal }.each { |e| elist.insert(e.ordinal-@ordinal_base, e) } - elist.each { |e| - if not e - # export by ordinal with gaps - # XXX test this value with the windows loader - edata['addrtable'] << coff.encode_word(0xffff_ffff) - next - end - if e.forwarder_lib - edata['addrtable'] << coff.encode_word(rva_end['nametable']) - edata['nametable'] << e.forwarder_lib << ?. << - if not e.forwarder_name - "##{e.forwarder_ordinal}" - else - e.forwarder_name - end << 0 - else - edata['addrtable'] << coff.encode_word(Expression[e.target, :-, coff.label_at(coff.encoded, 0)]) - end - if e.name - edata['ord_table'] << coff.encode_half(edata['addrtable'].virtsize/4 - 1) - edata['namptable'] << coff.encode_word(rva_end['nametable']) - edata['nametable'] << e.name << 0 - end - } - - # sorted by alignment directives - %w[edata addrtable namptable ord_table libname nametable].inject(EncodedData.new) { |ed, name| ed << edata[name] } - end - - def set_default_values(coff) - @timestamp ||= Time.now.to_i - @libname ||= 'metalib' - @ordinal_base ||= 1 - - super(coff) - end - end - - class ImportDirectory - # encode all import directories + iat - def self.encode(coff, ary) - edata = { 'iat' => [] } - %w[idata ilt nametable].each { |name| edata[name] = EncodedData.new } - - ary.each { |i| i.encode(coff, edata) } - - it = edata['idata'] << - coff.encode_word(0) << - coff.encode_word(0) << - coff.encode_word(0) << - coff.encode_word(0) << - coff.encode_word(0) << - edata['ilt'] << - edata['nametable'] - - iat = edata['iat'] # why not fragmented ? - - [it, iat] - end - - # encode one import directory + iat + names in the edata hash received as arg - def encode(coff, edata) - edata['iat'] << EncodedData.new - # edata['ilt'] = edata['iat'] - label = lambda { |n| coff.label_at(edata[n], 0, n) } - rva_end = lambda { |n| Expression[[label[n], :-, coff.label_at(coff.encoded, 0)], :+, edata[n].virtsize] } - - @libname_p = rva_end['nametable'] - @ilt_p = rva_end['ilt'] - @iat_p ||= Expression[coff.label_at(edata['iat'].last, 0, 'iat'), :-, coff.label_at(coff.encoded, 0)] - edata['idata'] << super(coff) - - edata['nametable'] << @libname << 0 - - ord_mask = 1 << (coff.bitsize - 1) - @imports.each { |i| - edata['iat'].last.add_export i.target, edata['iat'].last.virtsize if i.target - if i.ordinal - ptr = coff.encode_xword(Expression[i.ordinal, :|, ord_mask]) - else - edata['nametable'].align 2 - ptr = coff.encode_xword(rva_end['nametable']) - edata['nametable'] << coff.encode_half(i.hint || 0) << i.name << 0 - end - edata['ilt'] << ptr - edata['iat'].last << ptr - } - edata['ilt'] << coff.encode_xword(0) - edata['iat'].last << coff.encode_xword(0) - end - end - - class TLSDirectory - def encode(coff) - cblist = EncodedData.new - @callback_p = coff.label_at(cblist, 0, 'callback_p') - @callbacks.to_a.each { |cb| - cblist << coff.encode_xword(cb) - } - cblist << coff.encode_xword(0) - - dir = super(coff) - - [dir, cblist] - end - - def set_default_values(coff) - @start_va ||= 0 - @end_va ||= @start_va - - super(coff) - end - end - - class RelocationTable - # encodes a COFF relocation table - def encode(coff) - rel = super(coff) << coff.encode_word(8 + 2*@relocs.length) - @relocs.each { |r| rel << r.encode(coff) } - rel - end - - def set_default_values(coff) - # @base_addr is an rva - @base_addr = Expression[@base_addr, :-, coff.label_at(coff.encoded, 0)] if @base_addr.kind_of?(::String) - - # align relocation table size - if @relocs.length % 2 != 0 - r = Relocation.new - r.type = 0 - r.offset = 0 - @relocs << r - end - - super(coff) - end - end - - class ResourceDirectory - # compiles ressource directories - def encode(coff, edata = nil) - if not edata - # init recursion - edata = {} - subtables = %w[table names dataentries data] - subtables.each { |n| edata[n] = EncodedData.new } - encode(coff, edata) - return subtables.inject(EncodedData.new) { |sum, n| sum << edata[n] } - end - - label = lambda { |n| coff.label_at(edata[n], 0, n) } - # data 'rva' are real rvas (from start of COFF) - rva_end = lambda { |n| Expression[[label[n], :-, coff.label_at(coff.encoded, 0)], :+, edata[n].virtsize] } - # names and table 'rva' are relative to the beginning of the resource directory - off_end = lambda { |n| Expression[[label[n], :-, coff.label_at(edata['table'], 0)], :+, edata[n].virtsize] } - - # build name_w if needed - @entries.each { |e| e.name_w = e.name.unpack('C*').pack('v*') if e.name and not e.name_w } - - # fixup forward references to us, as subdir - edata['table'].fixup @curoff_label => edata['table'].virtsize if defined? @curoff_label - - @nr_names = @entries.find_all { |e| e.name_w }.length - @nr_id = @entries.find_all { |e| e.id }.length - edata['table'] << super(coff) - - # encode entries, sorted by names nocase, then id - @entries.sort_by { |e| e.name_w ? [0, e.name_w.downcase] : [1, e.id] }.each { |e| - if e.name_w - edata['table'] << coff.encode_word(Expression[off_end['names'], :|, 1 << 31]) - edata['names'] << coff.encode_half(e.name_w.length/2) << e.name_w - else - edata['table'] << coff.encode_word(e.id) - end - - if e.subdir - e.subdir.curoff_label = coff.new_label('rsrc_curoff') - edata['table'] << coff.encode_word(Expression[e.subdir.curoff_label, :|, 1 << 31]) - else # data entry - edata['table'] << coff.encode_word(off_end['dataentries']) - - edata['dataentries'] << - coff.encode_word(rva_end['data']) << - coff.encode_word(e.data.length) << - coff.encode_word(e.codepage || 0) << - coff.encode_word(e.reserved || 0) - - edata['data'] << e.data - end - } - - # recurse - @entries.find_all { |e| e.subdir }.each { |e| e.subdir.encode(coff, edata) } - end - end - - - # computes the checksum for a given COFF file - # may not work with overlapping sections - def self.checksum(str, endianness = :little) - coff = load str - coff.endianness = endianness - coff.decode_header - coff.encoded.ptr = 0 - - flen = 0 - csum = 0 - # negate old checksum - oldcs = coff.encode_word(coff.optheader.checksum) - oldcs.ptr = 0 - csum -= coff.decode_half(oldcs) - csum -= coff.decode_half(oldcs) - - # checksum header - raw = coff.encoded.read(coff.optheader.headers_size) - flen += coff.optheader.headers_size - - coff.sections.each { |s| - coff.encoded.ptr = s.rawaddr - raw << coff.encoded.read(s.rawsize) - flen += s.rawsize - } - raw.unpack(endianness == :little ? 'v*' : 'n*').each { |s| - csum += s - csum = (csum & 0xffff) + (csum >> 16) if (csum >> 16) > 0 - } - csum + flen - end - - - def encode_byte(w) Expression[w].encode(:u8, @endianness, (caller if $DEBUG)) end - def encode_half(w) Expression[w].encode(:u16, @endianness, (caller if $DEBUG)) end - def encode_word(w) Expression[w].encode(:u32, @endianness, (caller if $DEBUG)) end - def encode_xword(w) Expression[w].encode((@bitsize == 32 ? :u32 : :u64), @endianness, (caller if $DEBUG)) end - - - # adds a new compiler-generated section - def encode_append_section(s) - if (s.virtsize || s.encoded.virtsize) < 4096 - # find section to merge with - # XXX check following sections for hardcoded base address ? - - char = s.characteristics.dup - secs = @sections.dup - # do not merge non-discardable in discardable - if not char.delete 'MEM_DISCARDABLE' - secs.delete_if { |ss| ss.characteristics.include? 'MEM_DISCARDABLE' } - end - # do not merge shared w/ non-shared - if char.delete 'MEM_SHARED' - secs.delete_if { |ss| not ss.characteristics.include? 'MEM_SHARED' } - else - secs.delete_if { |ss| ss.characteristics.include? 'MEM_SHARED' } - end - secs.delete_if { |ss| ss.virtsize.kind_of?(::Integer) or ss.rawsize.kind_of?(::Integer) or secs[secs.index(ss)+1..-1].find { |ss_| ss_.virtaddr.kind_of?(::Integer) } } - - # try to find superset of characteristics - if target = secs.find { |ss| (ss.characteristics & char) == char } - target.encoded.align 8 - puts "PE: merging #{s.name} in #{target.name} (#{target.encoded.virtsize})" if $DEBUG - s.encoded = target.encoded << s.encoded - else - @sections << s - end - else - @sections << s - end - end - - # encodes the export table as a new section, updates directory['export_table'] - def encode_exports - edata = @export.encode self - - # must include name tables (for forwarders) - @directory['export_table'] = [label_at(edata, 0, 'export_table'), edata.virtsize] - - s = Section.new - s.name = '.edata' - s.encoded = edata - s.characteristics = %w[MEM_READ] - encode_append_section s - end - - # encodes the import tables as a new section, updates directory['import_table'] and directory['iat'] - def encode_imports - idata, iat = ImportDirectory.encode(self, @imports) - - @directory['import_table'] = [label_at(idata, 0, 'idata'), idata.virtsize] - - s = Section.new - s.name = '.idata' - s.encoded = idata - s.characteristics = %w[MEM_READ MEM_WRITE MEM_DISCARDABLE] - encode_append_section s - - if @imports.first and @imports.first.iat_p.kind_of?(Integer) - # ordiat = iat.sort_by { @import[x].iat_p } - ordiat = @imports.zip(iat).sort_by { |id, it| id.iat_p.kind_of?(Integer) ? id.iat_p : 1<<65 }.map { |id, it| it } - else - ordiat = iat - end - - @directory['iat'] = [label_at(ordiat.first, 0, 'iat'), - Expression[label_at(ordiat.last, ordiat.last.virtsize, 'iat_end'), :-, label_at(ordiat.first, 0)]] if not ordiat.empty? - - iat_s = nil - - plt = Section.new - plt.name = '.plt' - plt.encoded = EncodedData.new - plt.characteristics = %w[MEM_READ MEM_EXECUTE] - - @imports.zip(iat) { |id, it| - if id.iat_p.kind_of?(Integer) and @sections.find { |s_| s_.virtaddr <= id.iat_p and s_.virtaddr + (s_.virtsize || s_.encoded.virtsize) > id.iat_p } - id.iat = it # will be fixed up after encode_section - else - # XXX should not be mixed (for @directory['iat'][1]) - if not iat_s - iat_s = Section.new - iat_s.name = '.iat' - iat_s.encoded = EncodedData.new - iat_s.characteristics = %w[MEM_READ MEM_WRITE] - encode_append_section iat_s - end - iat_s.encoded << it - end - - id.imports.each { |i| - if i.thunk - arch_encode_thunk(plt.encoded, i) - end - } - } - - encode_append_section plt if not plt.encoded.empty? - end - - # encodes a thunk to imported function - def arch_encode_thunk(edata, import) - case @cpu.shortname - when 'ia32', 'x64' - shellcode = lambda { |c| Shellcode.new(@cpu).share_namespace(self).assemble(c).encoded } - if @cpu.generate_PIC - if @cpu.shortname == 'x64' - edata << shellcode["#{import.thunk}: jmp [rip-$_+#{import.target}]"] - return - end - # sections starts with a helper function that returns the address of metasm_intern_geteip in eax (PIC) - if not @sections.find { |s| s.encoded and s.encoded.export['metasm_intern_geteip'] } and edata.empty? - edata << shellcode["metasm_intern_geteip: call 42f\n42:\npop eax\nsub eax, 42b-metasm_intern_geteip\nret"] - end - edata << shellcode["#{import.thunk}: call metasm_intern_geteip\njmp [eax+#{import.target}-metasm_intern_geteip]"] - else - edata << shellcode["#{import.thunk}: jmp [#{import.target}]"] - end - else raise EncodeError, 'E: COFF: encode import thunk: unsupported architecture' - end - end - - def encode_tls - dir, cbtable = @tls.encode(self) - @directory['tls_table'] = [label_at(dir, 0, 'tls_table'), dir.virtsize] - - s = Section.new - s.name = '.tls' - s.encoded = EncodedData.new << dir << cbtable - s.characteristics = %w[MEM_READ MEM_WRITE] - encode_append_section s - end - - # encodes relocation tables in a new section .reloc, updates @directory['base_relocation_table'] - def encode_relocs - if @relocations.empty? - rt = RelocationTable.new - rt.base_addr = 0 - rt.relocs = [] - @relocations << rt - end - relocs = @relocations.inject(EncodedData.new) { |edata, rt_| edata << rt_.encode(self) } - - @directory['base_relocation_table'] = [label_at(relocs, 0, 'reloc_table'), relocs.virtsize] - - s = Section.new - s.name = '.reloc' - s.encoded = relocs - s.characteristics = %w[MEM_READ MEM_DISCARDABLE] - encode_append_section s - end - - # creates the @relocations from sections.encoded.reloc - def create_relocation_tables - @relocations = [] - - # create a fake binding with all exports, to find only-image_base-dependant relocs targets - # not foolproof, but works in standard cases - startaddr = curaddr = label_at(@encoded, 0, 'coff_start') - binding = {} - @sections.each { |s| - binding.update s.encoded.binding(curaddr) - curaddr = Expression[curaddr, :+, s.encoded.virtsize] - } - - # for each section.encoded, make as many RelocationTables as needed - @sections.each { |s| - - # rt.base_addr temporarily holds the offset from section_start, and is fixed up to rva before '@reloc << rt' - rt = RelocationTable.new - - s.encoded.reloc.each { |off, rel| - # check that the relocation looks like "program_start + integer" when bound using the fake binding - # XXX allow :i32 etc - if rel.endianness == @endianness and [:u32, :a32, :u64, :a64].include?(rel.type) and - rel.target.bind(binding).reduce.kind_of?(Expression) and - Expression[rel.target, :-, startaddr].bind(binding).reduce.kind_of?(::Integer) - # winner ! - - # build relocation - r = RelocationTable::Relocation.new - r.offset = off & 0xfff - r.type = { :u32 => 'HIGHLOW', :u64 => 'DIR64', :a32 => 'HIGHLOW', :a64 => 'DIR64' }[rel.type] - - # check if we need to start a new relocation table - if rt.base_addr and (rt.base_addr & ~0xfff) != (off & ~0xfff) - rt.base_addr = Expression[[label_at(s.encoded, 0, 'sect_start'), :-, startaddr], :+, rt.base_addr] - @relocations << rt - rt = RelocationTable.new - end - - # initialize reloc table base address if needed - rt.base_addr ||= off & ~0xfff - - (rt.relocs ||= []) << r - elsif $DEBUG and not rel.target.bind(binding).reduce.kind_of?(Integer) - puts "W: COFF: Ignoring weird relocation #{rel.inspect} when building relocation tables" - end - } - - if rt and rt.relocs - rt.base_addr = Expression[[label_at(s.encoded, 0, 'sect_start'), :-, startaddr], :+, rt.base_addr] - @relocations << rt - end - } - end - - def encode_resource - res = @resource.encode self - - @directory['resource_table'] = [label_at(res, 0, 'resource_table'), res.virtsize] - - s = Section.new - s.name = '.rsrc' - s.encoded = res - s.characteristics = %w[MEM_READ] - encode_append_section s - end - - # initialize the header from target/cpu/etc, target in ['exe' 'dll' 'kmod' 'obj'] - def pre_encode_header(target='exe', want_relocs=true) - target = {:bin => 'exe', :lib => 'dll', :obj => 'obj', 'sys' => 'kmod', 'drv' => 'kmod'}.fetch(target, target) - - @header.machine ||= case @cpu.shortname - when 'x64'; 'AMD64' - when 'ia32'; 'I386' - end - @optheader.signature ||= case @cpu.size - when 32; 'PE' - when 64; 'PE+' - end - @bitsize = (@optheader.signature == 'PE+' ? 64 : 32) - - # setup header flags - tmp = %w[LINE_NUMS_STRIPPED LOCAL_SYMS_STRIPPED DEBUG_STRIPPED] + - case target - when 'exe'; %w[EXECUTABLE_IMAGE] - when 'dll'; %w[EXECUTABLE_IMAGE DLL] - when 'kmod'; %w[EXECUTABLE_IMAGE] - when 'obj'; [] - end - if @cpu.size == 32 - tmp << 'x32BIT_MACHINE' - else - tmp << 'LARGE_ADDRESS_AWARE' - end - tmp << 'RELOCS_STRIPPED' if not want_relocs - @header.characteristics ||= tmp - - @optheader.subsystem ||= case target - when 'exe', 'dll'; 'WINDOWS_GUI' - when 'kmod'; 'NATIVE' - end - - tmp = [] - tmp << 'NX_COMPAT' - tmp << 'DYNAMIC_BASE' if want_relocs - @optheader.dll_characts ||= tmp - end - - # resets the values in the header that may have been - # modified by your script (eg section count, size, imagesize, etc) - # call this whenever you decode a file, modify it, and want to reencode it later - def invalidate_header - # set those values to nil, they will be - # recomputed during encode_header - [:code_size, :data_size, :udata_size, :base_of_code, :base_of_data, - :sect_align, :file_align, :image_size, :headers_size, :checksum].each { |m| @optheader.send("#{m}=", nil) } - [:num_sect, :ptr_sym, :num_sym, :size_opthdr].each { |m| @header.send("#{m}=", nil) } - end - - # appends the header/optheader/directories/section table to @encoded - def encode_header - # encode section table, add CONTAINS_* flags from other characteristics flags - s_table = EncodedData.new - - @sections.each { |s| - if s.characteristics.kind_of? Array and s.characteristics.include? 'MEM_READ' - if s.characteristics.include? 'MEM_EXECUTE' - s.characteristics |= ['CONTAINS_CODE'] - elsif s.encoded - if s.encoded.rawsize == 0 - s.characteristics |= ['CONTAINS_UDATA'] - else - s.characteristics |= ['CONTAINS_DATA'] - end - end - end - s.rawaddr = nil if s.rawaddr.kind_of?(::Integer) # XXX allow to force rawaddr ? - s_table << s.encode(self) - } - - # encode optional header - @optheader.image_size ||= new_label('image_size') - @optheader.image_base ||= label_at(@encoded, 0) - @optheader.headers_size ||= new_label('headers_size') - @optheader.checksum ||= new_label('checksum') - @optheader.subsystem ||= 'WINDOWS_GUI' - @optheader.numrva = nil - opth = @optheader.encode(self) - - # encode header - @header.machine ||= 'UNKNOWN' - @header.num_sect ||= sections.length - @header.time ||= Time.now.to_i & -255 - @header.size_opthdr ||= opth.virtsize - @encoded << @header.encode(self) << opth << s_table - end - - # append the section bodies to @encoded, and link the resulting binary - def encode_sections_fixup - if @optheader.headers_size.kind_of?(::String) - @encoded.fixup! @optheader.headers_size => @encoded.virtsize - @optheader.headers_size = @encoded.virtsize - end - @encoded.align @optheader.file_align - - baseaddr = @optheader.image_base.kind_of?(::Integer) ? @optheader.image_base : 0x400000 - binding = @encoded.binding(baseaddr) - - curaddr = baseaddr + @optheader.headers_size - @sections.each { |s| - # align - curaddr = EncodedData.align_size(curaddr, @optheader.sect_align) - if s.rawaddr.kind_of?(::String) - @encoded.fixup! s.rawaddr => @encoded.virtsize - s.rawaddr = @encoded.virtsize - end - if s.virtaddr.kind_of?(::Integer) - raise "E: COFF: cannot encode section #{s.name}: hardcoded address too short" if curaddr > baseaddr + s.virtaddr - curaddr = baseaddr + s.virtaddr - end - binding.update s.encoded.binding(curaddr) - curaddr += s.virtsize - - pre_sz = @encoded.virtsize - @encoded << s.encoded[0, s.encoded.rawsize] - @encoded.align @optheader.file_align - if s.rawsize.kind_of?(::String) - @encoded.fixup! s.rawsize => (@encoded.virtsize - pre_sz) - s.rawsize = @encoded.virtsize - pre_sz - end - } - - # not aligned ? spec says it is, visual studio does not - binding[@optheader.image_size] = curaddr - baseaddr if @optheader.image_size.kind_of?(::String) - - # patch the iat where iat_p was defined - # sort to ensure a 0-terminated will not overwrite an entry - # (try to dump notepad.exe, which has a forwarder;) - @imports.find_all { |id| id.iat_p.kind_of?(Integer) }.sort_by { |id| id.iat_p }.each { |id| - s = sect_at_rva(id.iat_p) - @encoded[s.rawaddr + s.encoded.ptr, id.iat.virtsize] = id.iat - binding.update id.iat.binding(baseaddr + id.iat_p) - } if imports - - @encoded.fill - @encoded.fixup! binding - - if @optheader.checksum.kind_of?(::String) and @encoded.reloc.length == 1 - # won't work if there are other unresolved relocs - checksum = self.class.checksum(@encoded.data, @endianness) - @encoded.fixup @optheader.checksum => checksum - @optheader.checksum = checksum - end - end - - # encode a COFF file, building export/import/reloc tables if needed - # creates the base relocation tables (need for references to IAT not known before) - # defaults to generating relocatable files, eg ALSR-aware - # pass want_relocs=false to avoid the file overhead induced by this - def encode(target='exe', want_relocs=true) - @encoded = EncodedData.new - label_at(@encoded, 0, 'coff_start') - pre_encode_header(target, want_relocs) - autoimport - encode_exports if export - encode_imports if imports - encode_resource if resource - encode_tls if tls - create_relocation_tables if want_relocs - encode_relocs if relocations - encode_header - encode_sections_fixup - @encoded.data - end - - def parse_init - # ahem... - # a fake object, which when appended makes us parse '.text', which creates a real default section - # forwards to it this first appendage. - # allows the user to specify its own section if he wishes, and to use .text if he doesn't - if not defined? @cursource or not @cursource - @cursource = ::Object.new - class << @cursource - attr_accessor :coff - def <<(*a) - t = Preprocessor::Token.new(nil) - t.raw = '.text' - coff.parse_parser_instruction t - coff.cursource.send(:<<, *a) - end - end - @cursource.coff = self - end - @source ||= {} - super() - end - - # handles compiler meta-instructions - # - # syntax: - # .section "
" - # section name is a string (may be quoted) - # perms are in 'r' 'w' 'x' 'shared' 'discard', may be concatenated (in this order), may be prefixed by 'no' to remove the attribute for an existing section - # base is the token 'base', the token '=' and an immediate expression - # default sections: - # .text = .section '.text' rx - # .data = .section '.data' rw - # .rodata = .section '.rodata' r - # .bss = .section '.bss' rw - # .entrypoint | .entrypoint