metasploit-framework/lib/ole/io_helpers.rb


# move to support?
class IO
	def self.copy src, dst
		until src.eof?
			buf = src.read(4096)
			dst.write buf
		end
	end
end

#
# = Introduction
#
# +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
# slices of the input file by providing a list of ranges. Intended as an initial measure to curb
# inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
# no method to stream it.
#
# This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
# and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
# getting 16 bytes doesn't read the whole thing).
#
# In the simplest case it can be used with a single range to provide a limited io to a section of
# a file.
#
# = Limitations
#
# * No buffering. by design at the moment. Intended for large reads
#
# = TODO
#
# On further reflection, this class is something of a joining/optimization of
# two separate IO classes. a SubfileIO, for providing access to a range within
# a File as a separate IO object, and a ConcatIO, allowing the presentation of
# a bunch of io objects as a single unified whole.
#
# I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
# convert a whole mime message into an IO stream, that can be read from.
# It will just be the concatenation of a series of IO objects, corresponding to
# headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
# original message proper, or RangesIO as provided by the Attachment#data, that
# will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
# fly. Thus the attachment, in its plain or encoded form, and the message as a
# whole never exists as a single string in memory, as it does now. This is a
# fair bit of work to achieve, but generally useful I believe.
#
# This class isn't ole specific, maybe move it to my general ruby stream project.
#
class RangesIO
	attr_reader :io, :ranges, :size, :pos
	# +io+ is the parent io object that we are wrapping.
	#
	# +ranges+ are byte offsets, either
	# 1. an array of ranges [1..2, 4..5, 6..8] or
	# 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
	#    (think the way String indexing works)
	# The +ranges+ provide sequential slices of the file that will be read. they can overlap.
	def initialize io, ranges, opts={}
		@opts = {:close_parent => false}.merge opts
		@io = io
		# convert ranges to arrays. check for negative ranges?
		@ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
		# calculate size
		@size = @ranges.inject(0) { |total, (pos, len)| total + len }
		# initial position in the file
		@pos = 0
	end

	def pos= pos, whence=IO::SEEK_SET
		# FIXME support other whence values
		raise NotImplementedError, "#{whence.inspect} not supported" unless whence == IO::SEEK_SET
		# just a simple pos calculation. invalidate buffers if we had them
		@pos = pos
	end

	alias seek :pos=
	alias tell :pos

	def close
		@io.close if @opts[:close_parent]
	end

	def range_and_offset pos
		off = nil
		r = ranges.inject(0) do |total, r|
			to = total + r[1]
			if pos <= to
				off = pos - total
				break r
			end
			to
		end
		# should be impossible for any valid pos, (0...size) === pos
		raise "unable to find range for pos #{pos.inspect}" unless off
		[r, off]
	end

	def eof?
		@pos == @size
	end

	# read bytes from file, to a maximum of +limit+, or all available if unspecified.
	def read limit=nil
		data = ''
		limit ||= size
		# special case eof
		return data if eof?
		r, off = range_and_offset @pos
		i = ranges.index r
		# this may be conceptually nice (create sub-range starting where we are), but
		# for a large range array its pretty wasteful. even the previous way was. but
		# i'm not trying to optimize this atm. it may even go to c later if necessary.
		([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
			@io.seek pos
			if limit < len
				# FIXME this += isn't correct if there is a read error
				# or something.
				@pos += limit
				break data << @io.read(limit)
			end
			# this can also stuff up. if the ranges are beyond the size of the file, we can get
			# nil here.
			data << @io.read(len)
			@pos += len
			limit -= len
		end
		data
	end

	# you may override this call to update @ranges and @size, if applicable. then write
	# support can grow below
	def truncate size
		raise NotImplementedError, 'truncate not supported'
	end
	# why not? :)
	alias size= :truncate

	def write data
		# short cut. needed because truncate 0 may return no ranges, instead of empty range,
		# thus range_and_offset fails.
		return 0 if data.empty?
		data_pos = 0
		# if we don't have room, we can use the truncate hook to make more space.
		if data.length > @size - @pos
			begin
				truncate @pos + data.length
			rescue NotImplementedError
				# FIXME maybe warn instead, then just truncate the data?
				raise "unable to satisfy write of #{data.length} bytes"
			end
		end
		r, off = range_and_offset @pos
		i = ranges.index r
		([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
			@io.seek pos
			if data_pos + len > data.length
				chunk = data[data_pos..-1]
				@io.write chunk
				@pos += chunk.length
				data_pos = data.length
				break
			end
			@io.write data[data_pos, len]
			@pos += len
			data_pos += len
		end
		data_pos
	end

	# this will be generalised to a module later
	def each_read blocksize=4096
		yield read(blocksize) until eof?
	end

	# write should look fairly similar to the above.

	def inspect
		# the rescue is for empty files
		pos, len = *(range_and_offset(@pos)[0] rescue [nil, nil])
		range_str = pos ? "#{pos}..#{pos+len}" : 'nil'
		"#<#{self.class} io=#{io.inspect} size=#@size pos=#@pos "\
			"current_range=#{range_str}>"
	end
end