2005-09-30 06:48:52 +00:00
require 'base64'
require 'md5'
2006-01-02 07:49:52 +00:00
require 'stringio'
2005-09-30 06:48:52 +00:00
2006-01-03 04:07:20 +00:00
begin
require 'zlib'
rescue LoadError
end
2005-07-10 07:15:20 +00:00
module Rex
###
#
# This class formats text in various fashions and also provides
# a mechanism for wrapping text at a given column.
#
###
module Text
2005-09-30 06:40:35 +00:00
##
#
# Constants
#
##
2006-02-22 23:29:34 +00:00
2005-09-30 06:40:35 +00:00
UpperAlpha = " ABCDEFGHIJKLMNOPQRSTUVWXYZ "
LowerAlpha = " abcdefghijklmnopqrstuvwxyz "
Numerals = " 0123456789 "
Alpha = UpperAlpha + LowerAlpha
AlphaNumeric = Alpha + Numerals
DefaultWrap = 60
2006-01-31 22:25:55 +00:00
AllChars =
" \xff \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a \x0b \x0c " +
" \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a " +
" \x1b \x1c \x1d \x1e \x1f \x20 \x21 \x22 \x23 \x24 \x25 \x26 \x27 \x28 " +
" \x29 \x2a \x2b \x2c \x2d \x2e \x2f \x30 \x31 \x32 \x33 \x34 \x35 \x36 " +
" \x37 \x38 \x39 \x3a \x3b \x3c \x3d \x3e \x3f \x40 \x41 \x42 \x43 \x44 " +
" \x45 \x46 \x47 \x48 \x49 \x4a \x4b \x4c \x4d \x4e \x4f \x50 \x51 \x52 " +
" \x53 \x54 \x55 \x56 \x57 \x58 \x59 \x5a \x5b \x5c \x5d \x5e \x5f \x60 " +
" \x61 \x62 \x63 \x64 \x65 \x66 \x67 \x68 \x69 \x6a \x6b \x6c \x6d \x6e " +
" \x6f \x70 \x71 \x72 \x73 \x74 \x75 \x76 \x77 \x78 \x79 \x7a \x7b \x7c " +
" \x7d \x7e \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a " +
" \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 " +
" \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 " +
" \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 " +
" \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 " +
" \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 " +
" \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde " +
" \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec " +
" \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa " +
" \xfb \xfc \xfd \xfe "
2005-07-10 07:15:20 +00:00
2005-12-09 00:03:52 +00:00
DefaultPatternSets = [ Rex :: Text :: UpperAlpha , Rex :: Text :: LowerAlpha , Rex :: Text :: Numerals ]
2006-02-22 23:29:34 +00:00
2005-09-30 06:40:35 +00:00
##
#
# Serialization
#
##
2005-07-10 19:21:40 +00:00
2005-07-10 07:15:20 +00:00
#
# Converts a raw string into a ruby buffer
#
2005-07-10 19:21:40 +00:00
def self . to_ruby ( str , wrap = DefaultWrap )
2005-07-10 07:15:20 +00:00
return hexify ( str , wrap , '"' , '" +' , '' , '"' )
end
2005-07-10 19:21:40 +00:00
#
# Creates a ruby-style comment
#
def self . to_ruby_comment ( str , wrap = DefaultWrap )
return wordwrap ( str , 0 , wrap , '' , '# ' )
end
2005-07-10 07:15:20 +00:00
#
# Converts a raw string into a C buffer
#
2005-07-10 19:21:40 +00:00
def self . to_c ( str , wrap = DefaultWrap , name = " buf " )
return hexify ( str , wrap , '"' , '"' , " unsigned char #{ name } [] = \n " , '";' )
end
#
# Creates a c-style comment
#
def self . to_c_comment ( str , wrap = DefaultWrap )
return " /* \n " + wordwrap ( str , 0 , wrap , '' , ' * ' ) + " */ \n "
2005-07-10 07:15:20 +00:00
end
#
# Converts a raw string into a perl buffer
#
2005-07-10 19:21:40 +00:00
def self . to_perl ( str , wrap = DefaultWrap )
2005-07-10 07:15:20 +00:00
return hexify ( str , wrap , '"' , '" .' , '' , '";' )
end
2005-07-10 19:21:40 +00:00
#
# Creates a perl-style comment
#
def self . to_perl_comment ( str , wrap = DefaultWrap )
return wordwrap ( str , 0 , wrap , '' , '# ' )
end
2005-07-10 07:15:20 +00:00
#
# Returns the raw string
#
def self . to_raw ( str )
return str
end
#
# Returns the hex version of the supplied string
#
2006-02-27 19:51:17 +00:00
def self . to_hex ( str , prefix = " \\ x " , count = 1 )
raise RuntimeError , " unable to chunk into #{ count } byte chunks " if ( ( str . length % count ) > 0 )
# XXX: Regexp.new is used here since using /.{#{count}}/o would compile
# the regex the first time it is used and never check again. Since we
# want to know how many to capture on every instance, we do it this
# way.
2006-03-29 18:40:15 +00:00
return str . unpack ( 'H*' ) [ 0 ] . gsub ( Regexp . new ( " .{ #{ count * 2 } } " ) ) { | s | prefix + s }
2005-07-10 07:15:20 +00:00
end
2005-07-13 23:01:34 +00:00
#
2006-02-13 22:52:01 +00:00
# Converts standard ASCII text to a unicode string.
#
# Supported unicode types include: utf-16le, utf16-be, utf32-le, utf32-be, utf-7, and utf-8
#
# Providing 'mode' provides hints to the actual encoder as to how it should encode the string. Only UTF-7 and UTF-8 use "mode".
#
# utf-7 by default does not encode alphanumeric and a few other characters. By specifying the mode of "all", then all of the characters are encoded, not just the non-alphanumeric set.
# to_unicode(str, 'utf-7', 'all')
#
# utf-8 specifies that alphanumeric characters are used directly, eg "a" is just "a". However, there exist 6 different overlong encodings of "a" that are technically not valid, but parse just fine in most utf-8 parsers. (0xC1A1, 0xE081A1, 0xF08081A1, 0xF8808081A1, 0xFC80808081A1, 0xFE8080808081A1). How many bytes to use for the overlong enocding is specified providing 'size'.
# to_unicode(str, 'utf-8', 'overlong', 2)
#
# Many utf-8 parsers also allow invalid overlong encodings, where bits that are unused when encoding a single byte are modified. Many parsers will ignore these bits, rendering simple string matching to be ineffective for dealing with UTF-8 strings. There are many more invalid overlong encodings possible for "a". For example, three encodings are available for an invalid 2 byte encoding of "a". (0xC1E1 0xC161 0xC121). By specifying "invalid", a random invalid encoding is chosen for the given byte size.
# to_unicode(str, 'utf-8', 'invalid', 2)
#
# utf-7 defaults to 'normal' utf-7 encoding
# utf-8 defaults to 2 byte 'normal' encoding
#
2006-02-22 23:29:34 +00:00
def self . to_unicode ( str = '' , type = 'utf-16le' , mode = '' , size = '' )
2006-02-13 22:52:01 +00:00
case type
when 'utf-16le'
return str . unpack ( 'C*' ) . pack ( 'v*' )
when 'utf-16be'
return str . unpack ( 'C*' ) . pack ( 'n*' )
when 'utf-32le'
return str . unpack ( 'C*' ) . pack ( 'V*' )
when 'utf-32be'
return str . unpack ( 'C*' ) . pack ( 'N*' )
when 'utf-7'
case mode
when 'all'
return str . gsub ( / . / ) { | a |
2006-02-10 17:30:41 +00:00
out = ''
2006-02-13 22:52:01 +00:00
if 'a' != '+'
2006-02-10 17:30:41 +00:00
out = encode_base64 ( to_unicode ( a , 'utf-16be' ) ) . gsub ( / [= \ r \ n] / , '' )
end
'+' + out + '-'
}
2006-02-13 22:52:01 +00:00
else
return str . gsub ( / [^ \ n \ r \ t \ A-Za-z0-9 \ ' \ ( \ ),-. \/ \ : \ ?] / ) { | a |
2006-02-10 17:30:41 +00:00
out = ''
2006-02-13 22:52:01 +00:00
if a != '+'
2006-02-10 17:30:41 +00:00
out = encode_base64 ( to_unicode ( a , 'utf-16be' ) ) . gsub ( / [= \ r \ n] / , '' )
end
'+' + out + '-'
}
2006-02-13 22:52:01 +00:00
end
when 'utf-8'
2006-02-22 23:29:34 +00:00
if size == ''
size = 2
end
2006-02-13 22:52:01 +00:00
if size > = 2 and size < = 7
string = ''
str . each_byte { | a |
2006-02-14 01:04:06 +00:00
if ( a < 21 || a > 0x7f ) || mode != ''
2006-02-13 22:52:01 +00:00
# ugh. turn a single byte into the binary representation of it, in array form
bin = [ a ] . pack ( 'C' ) . unpack ( 'B8' ) [ 0 ] . split ( / / )
# even more ugh.
bin . collect! { | a | a = a . to_i }
out = Array . new ( 8 * size , 0 )
0 . upto ( size - 1 ) { | i |
out [ i ] = 1
out [ i * 8 ] = 1
}
i = 0
byte = 0
bin . reverse . each { | bit |
if i < 6
mod = ( ( ( size * 8 ) - 1 ) - byte * 8 ) - i
out [ mod ] = bit
else
byte = byte + 1
i = 0
redo
end
i = i + 1
}
if mode != ''
case mode
when 'overlong'
# do nothing, since we already handle this as above...
when 'invalid'
done = 0
while done == 0
2006-02-14 01:04:06 +00:00
# the ghetto...
2006-02-13 22:52:01 +00:00
bits = [ 7 , 8 , 15 , 16 , 23 , 24 , 31 , 32 , 41 ]
bits . each { | bit |
bit = ( size * 8 ) - bit
if bit > 1
set = rand ( 2 )
if out [ bit ] != set
out [ bit ] = set
done = 1
end
end
}
end
else
raise TypeError , 'Invalid mode. Only "overlong" and "invalid" are acceptable modes for utf-8'
end
end
string += [ out . join ( '' ) ] . pack ( 'B*' )
else
string += [ a ] . pack ( 'C' )
end
}
return string
2006-02-10 17:30:41 +00:00
else
2006-02-13 22:52:01 +00:00
raise TypeError , 'invalid utf-8 size'
2006-02-10 17:30:41 +00:00
end
2006-02-22 23:29:34 +00:00
when 'uhwtfms' # suggested name from HD :P
load_codepage ( )
string = ''
# overloading mode as codepage
if mode == ''
mode = 1252 # ANSI - Latan 1, default for US installs of MS products
else
mode = mode . to_i
end
if $codepage_map_cache [ mode ] . nil?
raise TypeError , " Invalid codepage #{ mode } "
end
str . each_byte { | byte |
char = [ byte ] . pack ( 'C*' )
possible = $codepage_map_cache [ mode ] [ 'data' ] [ char ]
if possible . nil?
raise TypeError , " codepage #{ mode } does not provide an encoding for 0x #{ char . unpack ( 'H*' ) [ 0 ] } "
end
string += possible [ rand ( possible . length ) ]
}
return string
2006-02-13 22:52:01 +00:00
else
raise TypeError , 'invalid utf type'
end
2005-11-26 02:34:39 +00:00
end
2006-02-27 19:51:17 +00:00
#
# Encode a string in a manor useful for HTTP URIs and URI Parameters.
#
def self . uri_encode ( str , mode = 'hex-normal' )
return str if mode == 'none' # fast track no encoding
all = / [^ \/ \\ ]+ /
normal = / [^a-zA-Z1-9]+ /
case mode
when 'hex-normal'
return str . gsub ( normal ) { | s | Rex :: Text . to_hex ( s , '%' ) }
when 'hex-all'
return str . gsub ( all ) { | s | Rex :: Text . to_hex ( s , '%' ) }
when 'u-normal'
return str . gsub ( normal ) { | s | Rex :: Text . to_hex ( Rex :: Text . to_unicode ( s , 'uhwtfms' ) , '%u' , 2 ) }
when 'u-all'
return str . gsub ( all ) { | s | Rex :: Text . to_hex ( Rex :: Text . to_unicode ( s , 'uhwtfms' ) , '%u' , 2 ) }
else
raise TypeError , 'invalid mode'
end
end
2005-11-26 02:34:39 +00:00
#
2005-07-13 23:01:34 +00:00
# Converts a hex string to a raw string
#
def self . hex_to_raw ( str )
2005-07-18 14:39:00 +00:00
[ str . downcase . gsub ( / ' / , '' ) . gsub ( / \\ ?x([a-f0-9][a-f0-9]) / , '\1' ) ] . pack ( " H* " )
2005-07-13 23:01:34 +00:00
end
2005-07-10 07:15:20 +00:00
#
# Wraps text at a given column using a supplied indention
#
2005-07-10 19:21:40 +00:00
def self . wordwrap ( str , indent = 0 , col = DefaultWrap , append = '' , prepend = '' )
2005-07-10 07:15:20 +00:00
return str . gsub ( / .{1, #{ col - indent } }(?: \ s| \ Z) / ) {
( ( " " * indent ) + prepend + $& + append + 5 . chr ) . gsub ( / \ n \ 005 / , " \n " ) . gsub ( / \ 005 / , " \n " ) }
end
#
# Converts a string to a hex version with wrapping support
#
2005-07-10 19:21:40 +00:00
def self . hexify ( str , col = DefaultWrap , line_start = '' , line_end = '' , buf_start = '' , buf_end = '' )
2005-07-10 07:15:20 +00:00
output = buf_start
cur = 0
count = 0
new_line = true
# Go through each byte in the string
str . each_byte { | byte |
count += 1
append = ''
# If this is a new line, prepend with the
# line start text
if ( new_line == true )
append += line_start
new_line = false
end
# Append the hexified version of the byte
append += sprintf ( " \\ x%.2x " , byte )
cur += append . length
# If we're about to hit the column or have gone past it,
# time to finish up this line
if ( ( cur + line_end . length > = col ) or
( cur + buf_end . length > = col ) )
new_line = true
cur = 0
# If this is the last byte, use the buf_end instead of
# line_end
if ( count == str . length )
append += buf_end + " \n "
else
append += line_end + " \n "
end
end
output += append
}
# If we were in the middle of a line, finish the buffer at this point
if ( new_line == false )
output += buf_end + " \n "
end
return output
end
2005-09-30 06:40:35 +00:00
2005-09-30 06:48:52 +00:00
##
#
# Transforms
#
##
#
# Base64 encoder
#
def self . encode_base64 ( str )
2005-12-05 05:00:27 +00:00
Base64 . encode64 ( str ) . chomp
2005-09-30 06:48:52 +00:00
end
#
# Base64 decoder
#
def self . decode_base64 ( str )
Base64 . decode64 ( str )
end
#
# Raw MD5 digest of the supplied string
#
def self . md5_raw ( str )
MD5 . digest ( str )
end
#
# Hexidecimal MD5 digest of the supplied string
#
def self . md5 ( str )
MD5 . hexdigest ( str )
end
2005-09-30 06:40:35 +00:00
##
#
# Generators
#
##
2005-07-17 10:30:11 +00:00
2005-07-18 01:47:18 +00:00
# Base text generator method
def self . rand_base ( len , bad , * foo )
# Remove restricted characters
2005-11-26 11:16:36 +00:00
( bad || '' ) . split ( '' ) . each { | c | foo . delete ( c ) }
2005-07-18 01:47:18 +00:00
# Return nil if all bytes are restricted
return nil if foo . length == 0
2006-01-27 05:33:08 +00:00
2005-07-17 10:30:11 +00:00
buff = " "
2006-01-27 05:33:08 +00:00
2005-12-14 20:13:35 +00:00
# Generate a buffer from the remaining bytes
if foo . length > = 256
len . times { buff << Kernel . rand ( 256 ) }
else
len . times { buff += foo [ rand ( foo . length ) ] }
end
2005-07-17 10:30:11 +00:00
return buff
end
2005-07-18 01:47:18 +00:00
# Generate random bytes of data
2006-01-31 22:25:55 +00:00
def self . rand_text ( len , bad = '' , chars = AllChars )
foo = chars . split ( '' )
2005-07-18 01:47:18 +00:00
rand_base ( len , bad , * foo )
end
# Generate random bytes of alpha data
def self . rand_text_alpha ( len , bad = '' )
foo = [ ]
foo += ( 'A' .. 'Z' ) . to_a
foo += ( 'a' .. 'z' ) . to_a
rand_base ( len , bad , * foo )
end
# Generate random bytes of lowercase alpha data
def self . rand_text_alpha_lower ( len , bad = '' )
rand_base ( len , bad , * ( 'a' .. 'z' ) . to_a )
end
# Generate random bytes of uppercase alpha data
def self . rand_text_alpha_upper ( len , bad = '' )
rand_base ( len , bad , * ( 'A' .. 'Z' ) . to_a )
end
# Generate random bytes of alphanumeric data
def self . rand_text_alphanumeric ( len , bad = '' )
foo = [ ]
foo += ( 'A' .. 'Z' ) . to_a
foo += ( 'a' .. 'z' ) . to_a
foo += ( '0' .. '9' ) . to_a
rand_base ( len , bad , * foo )
end
2005-09-30 06:40:35 +00:00
2005-11-24 03:16:10 +00:00
# Generate random bytes of english-like data
def self . rand_text_english ( len , bad = '' )
foo = [ ]
foo += ( 0x21 .. 0x7e ) . map { | c | c . chr }
rand_base ( len , bad , * foo )
end
2005-09-30 06:40:35 +00:00
#
# Creates a pattern that can be used for offset calculation purposes. This
# routine is capable of generating patterns using a supplied set and a
2005-12-09 00:03:52 +00:00
# supplied number of identifiable characters (slots). The supplied sets
# should not contain any duplicate characters or the logic will fail.
2005-09-30 06:40:35 +00:00
#
2005-12-09 00:03:52 +00:00
def self . pattern_create ( length , sets = [ UpperAlpha , LowerAlpha , Numerals ] )
buf = ''
idx = 0
offsets = [ ]
2005-09-30 06:40:35 +00:00
2005-12-09 00:03:52 +00:00
sets . length . times { offsets << 0 }
2005-09-30 06:40:35 +00:00
2005-12-09 00:03:52 +00:00
until buf . length > = length
begin
buf += converge_sets ( sets , 0 , offsets , length )
rescue RuntimeError
break
2005-09-30 06:40:35 +00:00
end
end
2005-12-09 00:03:52 +00:00
buf [ 0 .. length ]
2005-09-30 06:40:35 +00:00
end
#
# Calculate the offset to a pattern
#
def self . pattern_offset ( pattern , value )
if ( value . kind_of? ( String ) )
pattern . index ( value )
2005-12-09 00:03:52 +00:00
elsif ( value . kind_of? ( Fixnum ) or value . kind_of? ( Bignum ) )
pattern . index ( [ value ] . pack ( 'V' ) )
2005-09-30 06:40:35 +00:00
else
raise ArgumentError , " Invalid class for value: #{ value . class } "
end
end
2005-10-01 06:15:39 +00:00
#
# Compresses a string, eliminating all superfluous whitespace before and
# after lines and eliminating all lines.
#
def self . compress ( str )
str . gsub ( / \ n /m , ' ' ) . gsub ( / \ s+ / , ' ' ) . gsub ( / ^ \ s+ / , '' ) . gsub ( / \ s+$ / , '' )
end
2006-01-02 07:49:52 +00:00
2006-01-19 15:12:22 +00:00
# Returns true if zlib can be used.
def self . zlib_present?
2006-01-03 04:07:20 +00:00
begin
Zlib
return true
rescue
return false
end
end
2006-01-27 05:33:08 +00:00
# backwards compat for just a bit...
def self . gzip_present?
self . zlib_present?
end
2006-01-19 15:12:22 +00:00
2006-01-27 05:33:08 +00:00
#
# Compresses a string using zlib
#
def self . zlib_deflate ( str )
2006-01-19 15:12:22 +00:00
raise RuntimeError , " Gzip support is not present. " if ( ! zlib_present? )
2006-01-27 05:33:08 +00:00
return Zlib :: Deflate . deflate ( str )
end
2006-01-19 15:12:22 +00:00
2006-01-27 05:33:08 +00:00
#
# Uncompresses a string using zlib
#
def self . zlib_inflate ( str )
2006-01-19 15:12:22 +00:00
raise RuntimeError , " Gzip support is not present. " if ( ! zlib_present? )
2006-01-27 05:33:08 +00:00
return Zlib :: Inflate . inflate ( str )
end
2006-01-03 04:07:20 +00:00
2006-01-02 07:49:52 +00:00
#
# Compresses a string using gzip
#
2006-01-05 22:20:28 +00:00
def self . gzip ( str , level = 9 )
2006-01-19 15:12:22 +00:00
raise RuntimeError , " Gzip support is not present. " if ( ! zlib_present? )
2006-01-24 03:59:44 +00:00
raise RuntimeError , " Invalid gzip compression level " if ( level < 1 or level > 9 )
2006-01-03 04:07:20 +00:00
2006-01-27 05:33:08 +00:00
s = " "
gz = Zlib :: GzipWriter . new ( StringIO . new ( s ) , level )
gz << str
gz . close
return s
2006-01-05 22:20:28 +00:00
end
2006-01-27 05:33:08 +00:00
#
2006-01-05 22:20:28 +00:00
# Uncompresses a string using gzip
#
def self . ungzip ( str )
2006-01-19 15:12:22 +00:00
raise RuntimeError , " Gzip support is not present. " if ( ! zlib_present? )
2006-01-05 22:20:28 +00:00
2006-01-27 05:33:08 +00:00
s = " "
gz = Zlib :: GzipReader . new ( StringIO . new ( str ) )
s << gz . read
gz . close
return s
2006-01-02 07:49:52 +00:00
end
2005-07-17 10:30:11 +00:00
2005-11-09 04:18:08 +00:00
#
# Return the index of the first badchar in data, otherwise return
# nil if there wasn't any badchar occurences.
#
2005-11-26 11:16:36 +00:00
def self . badchar_index ( data , badchars = '' )
2005-11-09 04:18:08 +00:00
badchars . each_byte { | badchar |
pos = data . index ( badchar )
return pos if pos
}
return nil
end
#
# This method removes bad characters from a string.
#
2005-11-26 11:16:36 +00:00
def self . remove_badchars ( data , badchars = '' )
2005-11-09 04:18:08 +00:00
data . delete ( badchars )
end
2005-11-27 18:42:44 +00:00
#
# This method returns all chars but the supplied set
#
2005-12-02 00:49:46 +00:00
def self . charset_exclude ( keepers )
2005-11-27 18:42:44 +00:00
[ * ( 0 .. 255 ) ] . pack ( 'C*' ) . delete ( keepers )
end
2006-01-17 04:09:40 +00:00
#
# Shuffles a byte stream
#
def self . shuffle_s ( str )
shuffle_a ( str . unpack ( " C* " ) ) . pack ( " C* " )
end
#
# Performs a Fisher-Yates shuffle on an array
#
def self . shuffle_a ( arr )
len = arr . length
max = len - 1
cyc = [ * ( 0 .. max ) ]
for d in cyc
e = rand ( d + 1 )
next if e == d
f = arr [ d ] ;
g = arr [ e ] ;
arr [ d ] = g ;
arr [ e ] = f ;
end
return arr
end
2005-12-09 00:03:52 +00:00
protected
def self . converge_sets ( sets , idx , offsets , length ) # :nodoc:
buf = sets [ idx ] [ offsets [ idx ] ] . chr
# If there are more sets after use, converage with them.
if ( sets [ idx + 1 ] )
buf += converge_sets ( sets , idx + 1 , offsets , length )
else
# Increment the current set offset as well as previous ones if we
# wrap back to zero.
while ( idx > = 0 and ( ( offsets [ idx ] = ( offsets [ idx ] + 1 ) % sets [ idx ] . length ) ) == 0 )
idx -= 1
end
# If we reached the point where the idx fell below zero, then that
# means we've reached the maximum threshold for permutations.
if ( idx < 0 )
raise RuntimeError , " Maximum permutations reached "
end
end
buf
end
2006-02-22 23:29:34 +00:00
def self . load_codepage ( )
return if ( ! $codepage_map_cache . nil? )
file = File . join ( File . dirname ( __FILE__ ) , 'codepage.map' )
page = ''
name = ''
map = { }
File . open ( file ) . each { | line |
next if line =~ / ^ # /
next if line =~ / ^ \ s*$ /
data = line . split
if data [ 1 ] =~ / ^ \ ( /
page = data . shift . to_i
name = data . join ( ' ' ) . sub ( / ^ \ ( / , '' ) . sub ( / \ )$ / , '' )
map [ page ] = { }
map [ page ] [ 'name' ] = name
map [ page ] [ 'data' ] = { }
else
data . each { | entry |
wide , char = entry . split ( ':' )
char = [ char ] . pack ( 'H*' )
wide = [ wide ] . pack ( 'H*' )
if map [ page ] [ 'data' ] [ char ] . nil?
map [ page ] [ 'data' ] [ char ] = [ wide ]
else
map [ page ] [ 'data' ] [ char ] . push ( wide )
end
}
end
}
$codepage_map_cache = map
end
2005-12-09 00:03:52 +00:00
2005-07-10 07:15:20 +00:00
end
end