Fixes #807. Make the html encoded output standards compliant

git-svn-id: file:///home/svn/framework3/trunk@8462 4d416f70-5f16-0410-b530-b9f4589650da
unstable
HD Moore 2010-02-12 05:08:16 +00:00
parent 070d6e20cd
commit 5dff9c4efc
1 changed files with 69 additions and 68 deletions

View File

@ -17,7 +17,7 @@ module Rex
###
module Text
@@codepage_map_cache = nil
##
#
# Constants
@ -39,7 +39,7 @@ module Text
AllChars = [*(0x00 .. 0xff)].pack("C*")
DefaultPatternSets = [ Rex::Text::UpperAlpha, Rex::Text::LowerAlpha, Rex::Text::Numerals ]
##
#
# Serialization
@ -73,14 +73,14 @@ module Text
def self.to_c_comment(str, wrap = DefaultWrap)
return "/*\n" + wordwrap(str, 0, wrap, '', ' * ') + " */\n"
end
#
# Creates a javascript-style comment
#
def self.to_js_comment(str, wrap = DefaultWrap)
return wordwrap(str, 0, wrap, '', '// ')
end
#
# Converts a raw string into a perl buffer
#
@ -100,17 +100,17 @@ module Text
buff << "\t" if max == 0
buff << sprintf('(byte) 0x%.2x', c)
max +=1
cnt +=1
if (max > 7)
buff << ",\n" if cnt != str.length
cnt +=1
if (max > 7)
buff << ",\n" if cnt != str.length
max = 0
end
end
buff << "\n};\n"
return buff
return buff
end
#
# Creates a perl-style comment
#
@ -149,7 +149,7 @@ module Text
end
end
#
#
# Converts EBCIDC to ASCII
#
def self.from_ebcdic(str)
@ -161,7 +161,7 @@ module Text
raise ::RuntimeError, "Your installation does not support iconv (needed for EBCDIC conversion)"
end
end
#
# Returns a unicode escaped string for Javascript
#
@ -174,14 +174,14 @@ module Text
dptr += 1
c2 = data[dptr,1].unpack("C*")[0]
dptr += 1
if (endian == ENDIAN_LITTLE)
buff << sprintf('%%u%.2x%.2x', c2, c1)
else
buff << sprintf('%%u%.2x%.2x', c1, c2)
end
end
return buff
return buff
end
#
@ -198,15 +198,15 @@ module Text
end
#
# Converts standard ASCII text to a unicode string.
# Converts standard ASCII text to a unicode string.
#
# Supported unicode types include: utf-16le, utf16-be, utf32-le, utf32-be, utf-7, and utf-8
#
#
# Providing 'mode' provides hints to the actual encoder as to how it should encode the string. Only UTF-7 and UTF-8 use "mode".
#
#
# utf-7 by default does not encode alphanumeric and a few other characters. By specifying the mode of "all", then all of the characters are encoded, not just the non-alphanumeric set.
# to_unicode(str, 'utf-7', 'all')
#
#
# utf-8 specifies that alphanumeric characters are used directly, eg "a" is just "a". However, there exist 6 different overlong encodings of "a" that are technically not valid, but parse just fine in most utf-8 parsers. (0xC1A1, 0xE081A1, 0xF08081A1, 0xF8808081A1, 0xFC80808081A1, 0xFE8080808081A1). How many bytes to use for the overlong enocding is specified providing 'size'.
# to_unicode(str, 'utf-8', 'overlong', 2)
#
@ -215,10 +215,10 @@ module Text
#
# utf-7 defaults to 'normal' utf-7 encoding
# utf-8 defaults to 2 byte 'normal' encoding
#
#
def self.to_unicode(str='', type = 'utf-16le', mode = '', size = '')
return '' if not str
case type
case type
when 'utf-16le'
return str.unpack('C*').pack('v*')
when 'utf-16be'
@ -238,7 +238,7 @@ module Text
'+' + out + '-'
}
else
return str.gsub(/[^\n\r\t\ A-Za-z0-9\'\(\),-.\/\:\?]/){ |a|
return str.gsub(/[^\n\r\t\ A-Za-z0-9\'\(\),-.\/\:\?]/){ |a|
out = ''
if a != '+'
out = encode_base64(to_unicode(a, 'utf-16be')).gsub(/[=\r\n]/, '')
@ -274,7 +274,7 @@ module Text
if i < 6
mod = (((size * 8) - 1) - byte * 8) - i
out[mod] = bit
else
else
byte = byte + 1
i = 0
redo
@ -312,7 +312,7 @@ module Text
end
}
return string
else
else
raise TypeError, 'invalid utf-8 size'
end
when 'uhwtfms' # suggested name from HD :P
@ -364,23 +364,23 @@ module Text
end
}
return string
else
else
raise TypeError, 'invalid utf type'
end
end
#
# Encode a string in a manor useful for HTTP URIs and URI Parameters.
#
# Encode a string in a manor useful for HTTP URIs and URI Parameters.
#
def self.uri_encode(str, mode = 'hex-normal')
return "" if str == nil
return "" if str == nil
return str if mode == 'none' # fast track no encoding
all = /[^\/\\]+/
normal = /[^a-zA-Z0-9\/\\\.\-]+/
normal_na = /[a-zA-Z0-9\/\\\.\-]/
case mode
when 'hex-normal'
return str.gsub(normal) { |s| Rex::Text.to_hex(s, '%') }
@ -390,7 +390,7 @@ module Text
res = ''
str.each_byte do |c|
b = c.chr
res << ((rand(2) == 0) ?
res << ((rand(2) == 0) ?
b.gsub(all) { |s| Rex::Text.to_hex(s, '%') } :
b.gsub(normal){ |s| Rex::Text.to_hex(s, '%') } )
end
@ -403,11 +403,11 @@ module Text
res = ''
str.each_byte do |c|
b = c.chr
res << ((rand(2) == 0) ?
res << ((rand(2) == 0) ?
b.gsub(all) { |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms'), '%u', 2) } :
b.gsub(normal){ |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms'), '%u', 2) } )
end
return res
return res
when 'u-half'
return str.gsub(all) { |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms-half'), '%u', 2) }
else
@ -415,29 +415,29 @@ module Text
end
end
# Encode a string in a manor useful for HTTP URIs and URI Parameters.
#
# a = "javascript".gsub(/./) {|i| "(" + [ Rex::Text.html_encode(i, 'hex'), Rex::Text.html_encode(i, 'int'), Rex::Text.html_encode(i, 'int-wide')].join('|') +')[\s\x00]*' }
#
# Encode a string in a manor useful for HTTP URIs and URI Parameters.
#
def self.html_encode(str, mode = 'hex')
case mode
when 'hex'
return str.gsub(/./) { |s| Rex::Text.to_hex(s, '&#x') }
return str.unpack('C*').collect{ |i| "&#x" + ("%.2x" % i) + ";"}.join
when 'int'
return str.unpack('C*').collect{ |i| "&#" + i.to_s }.join('')
return str.unpack('C*').collect{ |i| "&#" + i.to_s + ";"}.join
when 'int-wide'
return str.unpack('C*').collect{ |i| "&#" + ("0" * (7 - i.to_s.length)) + i.to_s }.join('')
else
return str.unpack('C*').collect{ |i| "&#" + ("0" * (7 - i.to_s.length)) + i.to_s + ";" }.join
else
raise TypeError, 'invalid mode'
end
end
#
#
# Decode a URI encoded string
#
def self.uri_decode(str)
str.gsub(/(%[a-z0-9]{2})/i){ |c| [c[1,2]].pack("H*") }
end
#
# Converts a string to random case
#
@ -458,12 +458,12 @@ module Text
cnt = 0
snl = false
lst = 0
while (idx < str.length)
chunk = str[idx, width]
line = chunk.unpack("H*")[0].scan(/../).join(" ")
buf << line
buf << line
if (lst == 0)
lst = line.length
@ -471,7 +471,7 @@ module Text
else
buf << " " * ((lst - line.length) + 4).abs
end
chunk.unpack("C*").each do |c|
if (c > 0x1f and c < 0x7f)
buf << c.chr
@ -479,15 +479,15 @@ module Text
buf << "."
end
end
buf << "\n"
idx += width
end
buf << "\n"
end
#
# Converts a hex string to a raw string
#
@ -549,7 +549,7 @@ module Text
# If we were in the middle of a line, finish the buffer at this point
if (new_line == false)
output << buf_end + "\n"
end
end
return output
end
@ -578,7 +578,7 @@ module Text
# Raw MD5 digest of the supplied string
#
def self.md5_raw(str)
Digest::MD5.digest(str)
Digest::MD5.digest(str)
end
#
@ -598,9 +598,9 @@ module Text
# Generates a random character.
def self.rand_char(bad, chars = AllChars)
rand_text(1, bad, chars)
rand_text(1, bad, chars)
end
# Base text generator method
def self.rand_base(len, bad, *foo)
# Remove restricted characters
@ -608,13 +608,13 @@ module Text
# Return nil if all bytes are restricted
return nil if foo.length == 0
buff = ""
# Generate a buffer from the remaining bytes
if foo.length >= 256
len.times { buff << Kernel.rand(256) }
else
else
len.times { buff << foo[ rand(foo.length) ] }
end
@ -667,14 +667,14 @@ module Text
foo = ('0' .. '9').to_a
rand_base(len, bad, *foo )
end
# Generate random bytes of english-like data
def self.rand_text_english(len, bad='')
foo = []
foo += (0x21 .. 0x7e).map{ |c| c.chr }
rand_base(len, bad, *foo )
end
# Generate random bytes of high ascii data
def self.rand_text_highascii(len, bad='')
foo = []
@ -702,7 +702,7 @@ module Text
break
end
end
# Maximum permutations reached, but we need more data
if (buf.length < length)
buf = buf * (length / buf.length.to_f).ceil
@ -743,7 +743,7 @@ module Text
while (buf.length < len)
buf << set[rand(set.length),1]
end
buf
}
end
@ -757,7 +757,7 @@ module Text
return false
end
end
# backwards compat for just a bit...
def self.gzip_present?
self.zlib_present?
@ -772,7 +772,7 @@ module Text
dst = z.deflate(str, Zlib::FINISH)
z.close
return dst
else
else
raise RuntimeError, "Gzip support is not present."
end
end
@ -805,7 +805,7 @@ module Text
gz.close
return s
end
#
# Uncompresses a string using gzip
#
@ -818,7 +818,7 @@ module Text
gz.close
return s
end
#
# Return the index of the first badchar in data, otherwise return
# nil if there wasn't any badchar occurences.
@ -878,10 +878,10 @@ module Text
word_ucase = word.dup
word_ucase[idx, 1] = word[idx, 1].upcase
word_lcase = word.dup
word_lcase[idx, 1] = word[idx, 1].downcase
if (idx == word.length)
return [word]
else
@ -891,7 +891,7 @@ module Text
else
res << permute_case(word, idx+1)
end
res.flatten
end
@ -931,7 +931,7 @@ module Text
end
[bits.join].pack("B32").unpack("N")[0]
end
#
# Rotate a 32-bit value to the left by cnt bits
#
@ -968,7 +968,7 @@ protected
buf
end
def self.load_codepage()
return if (!@@codepage_map_cache.nil?)
file = File.join(File.dirname(__FILE__),'codepage.map')
@ -1003,3 +1003,4 @@ protected
end
end