315 lines
11 KiB
Ruby
315 lines
11 KiB
Ruby
##
|
|
# This module requires Metasploit: https://metasploit.com/download
|
|
# Current source: https://github.com/rapid7/metasploit-framework
|
|
##
|
|
|
|
require 'rex/poly'
|
|
|
|
class MetasploitModule < Msf::Encoder::XorAdditiveFeedback
|
|
|
|
# The shikata encoder has an excellent ranking because it is polymorphic.
|
|
# Party time, excellent!
|
|
Rank = ExcellentRanking
|
|
|
|
def initialize
|
|
super(
|
|
'Name' => 'Polymorphic XOR Additive Feedback Encoder',
|
|
'Description' => %q{
|
|
This encoder implements a polymorphic XOR additive feedback encoder.
|
|
The decoder stub is generated based on dynamic instruction
|
|
substitution and dynamic block ordering. Registers are also
|
|
selected dynamically.
|
|
},
|
|
'Author' => 'spoonm',
|
|
'Arch' => ARCH_X86,
|
|
'License' => MSF_LICENSE,
|
|
'Decoder' =>
|
|
{
|
|
'KeySize' => 4,
|
|
'BlockSize' => 4
|
|
})
|
|
end
|
|
|
|
#
|
|
# Generates the shikata decoder stub.
|
|
#
|
|
def decoder_stub(state)
|
|
|
|
# If the decoder stub has not already been generated for this state, do
|
|
# it now. The decoder stub method may be called more than once.
|
|
if (state.decoder_stub == nil)
|
|
|
|
# Sanity check that saved_registers doesn't overlap with modified_registers
|
|
if (modified_registers & saved_registers).length > 0
|
|
raise BadGenerateError
|
|
end
|
|
|
|
# Shikata will only cut off the last 1-4 bytes of it's own end
|
|
# depending on the alignment of the original buffer
|
|
cutoff = 4 - (state.buf.length & 3)
|
|
block = generate_shikata_block(state, state.buf.length + cutoff, cutoff) || (raise BadGenerateError)
|
|
|
|
# Set the state specific key offset to wherever the XORK ended up.
|
|
state.decoder_key_offset = block.index('XORK')
|
|
|
|
# Take the last 1-4 bytes of shikata and prepend them to the buffer
|
|
# that is going to be encoded to make it align on a 4-byte boundary.
|
|
state.buf = block.slice!(block.length - cutoff, cutoff) + state.buf
|
|
|
|
# Cache this decoder stub. The reason we cache the decoder stub is
|
|
# because we need to ensure that the same stub is returned every time
|
|
# for a given encoder state.
|
|
state.decoder_stub = block
|
|
end
|
|
|
|
state.decoder_stub
|
|
end
|
|
|
|
# Indicate that this module can preserve some registers
|
|
def can_preserve_registers?
|
|
true
|
|
end
|
|
|
|
# A list of registers always touched by this encoder
|
|
def modified_registers
|
|
# ESP is assumed and is handled through preserves_stack?
|
|
[
|
|
# The counter register is hardcoded
|
|
Rex::Arch::X86::ECX,
|
|
# These are modified by div and mul operations
|
|
Rex::Arch::X86::EAX, Rex::Arch::X86::EDX
|
|
]
|
|
end
|
|
|
|
# Always blacklist these registers in our block generation
|
|
def block_generator_register_blacklist
|
|
[Rex::Arch::X86::ESP, Rex::Arch::X86::ECX] | saved_registers
|
|
end
|
|
|
|
protected
|
|
|
|
#
|
|
# Returns the set of FPU instructions that can be used for the FPU block of
|
|
# the decoder stub.
|
|
#
|
|
def fpu_instructions
|
|
fpus = []
|
|
|
|
0xe8.upto(0xee) { |x| fpus << "\xd9" + x.chr }
|
|
0xc0.upto(0xcf) { |x| fpus << "\xd9" + x.chr }
|
|
0xc0.upto(0xdf) { |x| fpus << "\xda" + x.chr }
|
|
0xc0.upto(0xdf) { |x| fpus << "\xdb" + x.chr }
|
|
0xc0.upto(0xc7) { |x| fpus << "\xdd" + x.chr }
|
|
|
|
fpus << "\xd9\xd0"
|
|
fpus << "\xd9\xe1"
|
|
fpus << "\xd9\xf6"
|
|
fpus << "\xd9\xf7"
|
|
fpus << "\xd9\xe5"
|
|
|
|
# This FPU instruction seems to fail consistently on Linux
|
|
#fpus << "\xdb\xe1"
|
|
|
|
fpus
|
|
end
|
|
|
|
#
|
|
# Returns a polymorphic decoder stub that is capable of decoding a buffer
|
|
# of the supplied length and encodes the last cutoff bytes of itself.
|
|
#
|
|
def generate_shikata_block(state, length, cutoff)
|
|
# Declare logical registers
|
|
count_reg = Rex::Poly::LogicalRegister::X86.new('count', 'ecx')
|
|
addr_reg = Rex::Poly::LogicalRegister::X86.new('addr')
|
|
key_reg = nil
|
|
|
|
if state.context_encoding
|
|
key_reg = Rex::Poly::LogicalRegister::X86.new('key', 'eax')
|
|
else
|
|
key_reg = Rex::Poly::LogicalRegister::X86.new('key')
|
|
end
|
|
|
|
# Declare individual blocks
|
|
endb = Rex::Poly::SymbolicBlock::End.new
|
|
|
|
# Clear the counter register
|
|
clear_register = Rex::Poly::LogicalBlock.new('clear_register',
|
|
"\x31\xc9", # xor ecx,ecx
|
|
"\x29\xc9", # sub ecx,ecx
|
|
"\x33\xc9", # xor ecx,ecx
|
|
"\x2b\xc9") # sub ecx,ecx
|
|
|
|
# Initialize the counter after zeroing it
|
|
init_counter = Rex::Poly::LogicalBlock.new('init_counter')
|
|
|
|
# Divide the length by four but ensure that it aligns on a block size
|
|
# boundary (4 byte).
|
|
length += 4 + (4 - (length & 3)) & 3
|
|
length /= 4
|
|
|
|
if (length <= 255)
|
|
init_counter.add_perm("\xb1" + [ length ].pack('C'))
|
|
elsif (length <= 65536)
|
|
init_counter.add_perm("\x66\xb9" + [ length ].pack('v'))
|
|
else
|
|
init_counter.add_perm("\xb9" + [ length ].pack('V'))
|
|
end
|
|
|
|
# Key initialization block
|
|
init_key = nil
|
|
|
|
# If using context encoding, we use a mov reg, [addr]
|
|
if state.context_encoding
|
|
init_key = Rex::Poly::LogicalBlock.new('init_key',
|
|
Proc.new { |b| (0xa1 + b.regnum_of(key_reg)).chr + 'XORK'})
|
|
# Otherwise, we do a direct mov reg, val
|
|
else
|
|
init_key = Rex::Poly::LogicalBlock.new('init_key',
|
|
Proc.new { |b| (0xb8 + b.regnum_of(key_reg)).chr + 'XORK'})
|
|
end
|
|
|
|
xor = Proc.new { |b| "\x31" + (0x40 + b.regnum_of(addr_reg) + (8 * b.regnum_of(key_reg))).chr }
|
|
add = Proc.new { |b| "\x03" + (0x40 + b.regnum_of(addr_reg) + (8 * b.regnum_of(key_reg))).chr }
|
|
|
|
sub4 = Proc.new { |b| sub_immediate(b.regnum_of(addr_reg), -4) }
|
|
add4 = Proc.new { |b| add_immediate(b.regnum_of(addr_reg), 4) }
|
|
|
|
if (datastore["BufferRegister"])
|
|
|
|
buff_reg = Rex::Poly::LogicalRegister::X86.new('buff', datastore["BufferRegister"])
|
|
offset = (datastore["BufferOffset"] ? datastore["BufferOffset"].to_i : 0)
|
|
if ((offset < -255 or offset > 255) and state.badchars.include? "\x00")
|
|
raise EncodingError.new("Can't generate NULL-free decoder with a BufferOffset bigger than one byte")
|
|
end
|
|
mov = Proc.new { |b|
|
|
# mov <buff_reg>, <addr_reg>
|
|
"\x89" + (0xc0 + b.regnum_of(addr_reg) + (8 * b.regnum_of(buff_reg))).chr
|
|
}
|
|
add_offset = Proc.new { |b| add_immediate(b.regnum_of(addr_reg), offset) }
|
|
sub_offset = Proc.new { |b| sub_immediate(b.regnum_of(addr_reg), -offset) }
|
|
|
|
getpc = Rex::Poly::LogicalBlock.new('getpc')
|
|
getpc.add_perm(Proc.new{ |b| mov.call(b) + add_offset.call(b) })
|
|
getpc.add_perm(Proc.new{ |b| mov.call(b) + sub_offset.call(b) })
|
|
|
|
# With an offset of less than four, inc is smaller than or the same size as add
|
|
if (offset > 0 and offset < 4)
|
|
getpc.add_perm(Proc.new{ |b| mov.call(b) + inc(b.regnum_of(addr_reg))*offset })
|
|
elsif (offset < 0 and offset > -4)
|
|
getpc.add_perm(Proc.new{ |b| mov.call(b) + dec(b.regnum_of(addr_reg))*(-offset) })
|
|
end
|
|
|
|
# NOTE: Adding a perm with possibly different sizes is normally
|
|
# wrong since it will change the SymbolicBlock::End offset during
|
|
# various stages of generation. In this case, though, offset is
|
|
# constant throughout the whole process, so it isn't a problem.
|
|
getpc.add_perm(Proc.new{ |b|
|
|
if (offset < -255 or offset > 255)
|
|
# lea addr_reg, [buff_reg + DWORD offset]
|
|
# NOTE: This will generate NULL bytes!
|
|
"\x8d" + (0x80 + b.regnum_of(buff_reg) + (8 * b.regnum_of(addr_reg))).chr + [offset].pack('V')
|
|
elsif (offset > -255 and offset != 0 and offset < 255)
|
|
# lea addr_reg, [buff_reg + byte offset]
|
|
"\x8d" + (0x40 + b.regnum_of(buff_reg) + (8 * b.regnum_of(addr_reg))).chr + [offset].pack('c')
|
|
else
|
|
# lea addr_reg, [buff_reg]
|
|
"\x8d" + (b.regnum_of(buff_reg) + (8 * b.regnum_of(addr_reg))).chr
|
|
end
|
|
})
|
|
|
|
# BufferReg+BufferOffset points right at the beginning of our
|
|
# buffer, so in contrast to the fnstenv technique, we don't have to
|
|
# sub off any other offsets.
|
|
xor1 = Proc.new { |b| xor.call(b) + [ (b.offset_of(endb) - cutoff) ].pack('c') }
|
|
xor2 = Proc.new { |b| xor.call(b) + [ (b.offset_of(endb) - 4 - cutoff) ].pack('c') }
|
|
add1 = Proc.new { |b| add.call(b) + [ (b.offset_of(endb) - cutoff) ].pack('c') }
|
|
add2 = Proc.new { |b| add.call(b) + [ (b.offset_of(endb) - 4 - cutoff) ].pack('c') }
|
|
|
|
else
|
|
# FPU blocks
|
|
fpu = Rex::Poly::LogicalBlock.new('fpu',
|
|
*fpu_instructions)
|
|
|
|
fnstenv = Rex::Poly::LogicalBlock.new('fnstenv',
|
|
"\xd9\x74\x24\xf4")
|
|
fnstenv.depends_on(fpu)
|
|
|
|
# Get EIP off the stack
|
|
getpc = Rex::Poly::LogicalBlock.new('getpc',
|
|
Proc.new { |b| (0x58 + b.regnum_of(addr_reg)).chr })
|
|
getpc.depends_on(fnstenv)
|
|
|
|
# Subtract the offset of the fpu instruction since that's where eip points after fnstenv
|
|
xor1 = Proc.new { |b| xor.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - cutoff) ].pack('c') }
|
|
xor2 = Proc.new { |b| xor.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - 4 - cutoff) ].pack('c') }
|
|
add1 = Proc.new { |b| add.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - cutoff) ].pack('c') }
|
|
add2 = Proc.new { |b| add.call(b) + [ (b.offset_of(endb) - b.offset_of(fpu) - 4 - cutoff) ].pack('c') }
|
|
end
|
|
|
|
# Decoder loop block
|
|
loop_block = Rex::Poly::LogicalBlock.new('loop_block')
|
|
|
|
loop_block.add_perm(
|
|
Proc.new { |b| xor1.call(b) + add1.call(b) + sub4.call(b) },
|
|
Proc.new { |b| xor1.call(b) + sub4.call(b) + add2.call(b) },
|
|
Proc.new { |b| sub4.call(b) + xor2.call(b) + add2.call(b) },
|
|
Proc.new { |b| xor1.call(b) + add1.call(b) + add4.call(b) },
|
|
Proc.new { |b| xor1.call(b) + add4.call(b) + add2.call(b) },
|
|
Proc.new { |b| add4.call(b) + xor2.call(b) + add2.call(b) })
|
|
|
|
# Loop instruction block
|
|
loop_inst = Rex::Poly::LogicalBlock.new('loop_inst',
|
|
"\xe2\xf5")
|
|
# In the current implementation the loop block is a constant size,
|
|
# so really no need for a fancy calculation. Nevertheless, here's
|
|
# one way to do it:
|
|
#Proc.new { |b|
|
|
# # loop <loop_block label>
|
|
# # -2 to account for the size of this instruction
|
|
# "\xe2" + [ -2 - b.size_of(loop_block) ].pack('c')
|
|
#})
|
|
|
|
# Define block dependencies
|
|
clear_register.depends_on(getpc)
|
|
init_counter.depends_on(clear_register)
|
|
loop_block.depends_on(init_counter, init_key)
|
|
loop_inst.depends_on(loop_block)
|
|
|
|
begin
|
|
# Generate a permutation saving the ECX, ESP, and user defined registers
|
|
loop_inst.generate(block_generator_register_blacklist, nil, state.badchars)
|
|
rescue RuntimeError, EncodingError => e
|
|
# The Rex::Poly block generator can raise RuntimeError variants
|
|
raise EncodingError, e.to_s
|
|
end
|
|
end
|
|
|
|
# Convert the SaveRegisters to an array of x86 register constants
|
|
def saved_registers
|
|
Rex::Arch::X86.register_names_to_ids(datastore['SaveRegisters'])
|
|
end
|
|
|
|
def sub_immediate(regnum, imm)
|
|
return "" if imm.nil? or imm == 0
|
|
if imm > 255 or imm < -255
|
|
"\x81" + (0xe8 + regnum).chr + [imm].pack('V')
|
|
else
|
|
"\x83" + (0xe8 + regnum).chr + [imm].pack('c')
|
|
end
|
|
end
|
|
def add_immediate(regnum, imm)
|
|
return "" if imm.nil? or imm == 0
|
|
if imm > 255 or imm < -255
|
|
"\x81" + (0xc0 + regnum).chr + [imm].pack('V')
|
|
else
|
|
"\x83" + (0xc0 + regnum).chr + [imm].pack('c')
|
|
end
|
|
end
|
|
def inc(regnum)
|
|
[0x40 + regnum].pack('C')
|
|
end
|
|
def dec(regnum)
|
|
[0x48 + regnum].pack('C')
|
|
end
|
|
end
|