#! /usr/bin/ruby -w require 'iconv' require 'date' require 'stringio' require 'tempfile' require 'ole/base' require 'ole/types' require 'ole/io_helpers' module Ole # :nodoc: # # = Introduction # # Ole::Storage is a simple class intended to abstract away details of the # access to OLE2 structured storage files, such as those produced by # Microsoft Office, eg *.doc, *.msg etc. # # Initially based on chicago's libole, source available at # http://prdownloads.sf.net/chicago/ole.tgz # Later augmented with some corrections by inspecting pole, and (purely # for header definitions) gsf. # # = Usage # # Usage should be fairly straight forward: # # # get the parent ole storage object # ole = Ole::Storage.open 'myfile.msg', 'r+' # # => # root=#> # # read some data # ole.root[1].read 4 # # => "\001\000\376\377" # # get the top level root object and output a tree structure for # # debugging # puts ole.root.to_tree # # => # - # # |- # # | |- # # ... # |- # # |- # # \- # # |- # # ... # # write some data, and finish up (note that open is 'r+', so this overwrites # # but doesn't truncate) # ole.root["\001CompObj"].open { |f| f.write "blah blah" } # ole.close # # = TODO # # 1. tests. lock down how things work at the moment - mostly good. # create from scratch works now, as does copying in a subtree of another doc, so # ole embedded attachment serialization works now. i can save embedded xls in an msg # into a separate file, and open it. this was a goal. now i would want to implemenet # to_mime conversion for embedded attachments, that serializes them to ole, but handles # some separately like various meta file types as plain .wmf attachments perhaps. this # will give pretty good .eml's from emails with embedded attachments. # the other todo is .rtf output, with full support for embedded ole objects... # 2. lots of tidying up # - main FIXME's in this regard are: # * the custom header cruft for Header and Dirent needs some love. # * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent, # and, in a manner of speaking, but arguably different, Storage itself. # they have differing api's which would be nice to clean. # AllocationTable::Big must be created aot now, as it is used for all subsequent reads. # * ole types need work, can't serialize datetime at the moment. # 3. need to fix META_BAT support in #flush. # class Storage VERSION = '1.1.1' # The top of the ole tree structure attr_reader :root # The tree structure in its original flattened form. only valid after #load, or #flush. attr_reader :dirents # The underlying io object to/from which the ole object is serialized, whether we # should close it, and whether it is writeable attr_reader :io, :close_parent, :writeable # Low level internals, you probably shouldn't need to mess with these attr_reader :header, :bbat, :sbat, :sb_file # maybe include an option hash, and allow :close_parent => true, to be more general. # +arg+ should be either a file, or an +IO+ object, and needs to be seekable. def initialize arg, mode=nil # get the io object @close_parent, @io = if String === arg [true, open(arg, mode || 'rb')] else raise 'unable to specify mode string with io object' if mode [false, arg] end # do we have this file opened for writing? don't know of a better way to tell @writeable = begin @io.flush true rescue IOError false end # silence undefined warning in clear @sb_file = nil # if the io object has data, we should load it, otherwise start afresh if @io.size > 0; load else clear end end def self.new arg, mode=nil ole = super if block_given? begin yield ole ensure; ole.close end else ole end end class << self # encouraged alias open :new # deprecated alias load :new end # load document from file. def load # we always read 512 for the header block. if the block size ends up being different, # what happens to the 109 fat entries. are there more/less entries? @io.rewind header_block = @io.read 512 @header = Header.load header_block # create an empty bbat @bbat = AllocationTable::Big.new self # extra mbat blocks mbat_blocks = (0...@header.num_mbat).map { |i| i + @header.mbat_start } bbat_chain = (header_block[Header::SIZE..-1] + @bbat.read(mbat_blocks)).unpack 'L*' # am i using num_bat in the right way? @bbat.load @bbat.read(bbat_chain[0, @header.num_bat]) # get block chain for directories, read it, then split it into chunks and load the # directory entries. semantics changed - used to cut at first dir where dir.type == 0 @dirents = @bbat.read(@header.dirent_start).scan(/.{#{Dirent::SIZE}}/mo). map { |str| Dirent.load self, str }.reject { |d| d.type_id == 0 } # now reorder from flat into a tree # links are stored in some kind of balanced binary tree # check that everything is visited at least, and at most once # similarly with the blocks of the file. # was thinking of moving this to Dirent.to_tree instead. class << @dirents def to_tree idx=0 return [] if idx == Dirent::EOT d = self[idx] d.children = to_tree d.child raise "directory #{d.inspect} used twice" if d.idx d.idx = idx to_tree(d.prev) + [d] + to_tree(d.next) end end @root = @dirents.to_tree.first Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry' unused = @dirents.reject(&:idx).length Log.warn "* #{unused} unused directories" if unused > 0 # FIXME i don't currently use @header.num_sbat which i should # hmm. nor do i write it. it means what exactly again? @sb_file = RangesIOResizeable.new @bbat, @root.first_block, @root.size @sbat = AllocationTable::Small.new self @sbat.load @bbat.read(@header.sbat_start) end def close flush if @writeable @sb_file.close @io.close if @close_parent end # should have a #open_dirent i think. and use it in load and flush. neater. # also was thinking about Dirent#open_padding. then i can more easily clean up the padding # to be 0.chr =begin thoughts on fixes: 1. reterminate any chain not ending in EOC. 2. pass through all chain heads looking for collisions, and making sure nothing points to them (ie they are really heads). 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks in the bat for them. this stuff will ensure reliability of input better. otherwise, its actually worth doing a repack directly after read, to ensure the above is probably acounted for, before subsequent writes possibly destroy things. =end def flush # recreate dirs from our tree, split into dirs and big and small files @root.type = :root # for now. @root.name = 'Root Entry' @root.first_block = @sb_file.first_block @root.size = @sb_file.size @dirents = @root.flatten #dirs, files = @dirents.partition(&:dir?) #big_files, small_files = files.partition { |file| file.size > @header.threshold } # maybe i should move the block form up to RangesIO, and get it for free at all levels. # Dirent#open gets block form for free then io = RangesIOResizeable.new @bbat, @header.dirent_start io.truncate 0 @dirents.each { |dirent| io.write dirent.save } padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size #p [:padding, padding] io.write 0.chr * padding @header.dirent_start = io.first_block io.close # similarly for the sbat data. io = RangesIOResizeable.new @bbat, @header.sbat_start io.truncate 0 io.write @sbat.save @header.sbat_start = io.first_block @header.num_sbat = @bbat.chain(@header.sbat_start).length io.close # what follows will be slightly more complex for the bat fiddling. # create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using # truncate. then when its time to write, convert that chain and some chunk of blocks at # the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its # done. @bbat.table.map! do |b| b == AllocationTable::BAT || b == AllocationTable::META_BAT ? AllocationTable::AVAIL : b end io = RangesIOResizeable.new @bbat, AllocationTable::EOC # use crappy loop for now: while true bbat_data = @bbat.save #mbat_data = bbat_data.length / @bbat.block_size * 4 mbat_chain = @bbat.chain io.first_block raise NotImplementedError, "don't handle writing out extra META_BAT blocks yet" if mbat_chain.length > 109 # so we can ignore meta blocks in this calculation: break if io.size >= bbat_data.length # it shouldn't be bigger right? # this may grow the bbat, depending on existing available blocks io.truncate bbat_data.length end # now extract the info we want: ranges = io.ranges mbat_chain = @bbat.chain io.first_block io.close mbat_chain.each { |b| @bbat.table[b] = AllocationTable::BAT } @header.num_bat = mbat_chain.length #p @bbat.truncated_table #p ranges #p mbat_chain # not resizeable! io = RangesIO.new @io, ranges io.write @bbat.save io.close mbat_chain += [AllocationTable::AVAIL] * (109 - mbat_chain.length) @header.mbat_start = AllocationTable::EOC @header.num_mbat = 0 =begin bbat_data = new_bbat.save # must exist as linear chain stored in header. @header.num_bat = (bbat_data.length / new_bbat.block_size.to_f).ceil base = io.pos / new_bbat.block_size - 1 io.write bbat_data # now that spanned a number of blocks: mbat = (0...@header.num_bat).map { |i| i + base } mbat += [AllocationTable::AVAIL] * (109 - mbat.length) if mbat.length < 109 header_mbat = mbat[0...109] other_mbat_data = mbat[109..-1].pack 'L*' @header.mbat_start = base + @header.num_bat @header.num_mbat = (other_mbat_data.length / new_bbat.block_size.to_f).ceil io.write other_mbat_data =end @root.type = :dir # now seek back and write the header out @io.seek 0 @io.write @header.save + mbat_chain.pack('L*') @io.flush end def clear # first step though is to support modifying pre-existing and saving, then this # missing gap will be fairly straight forward - essentially initialize to # equivalent of loading an empty ole document. #raise NotImplementedError, 'unable to create new ole objects from scratch as yet' Log.warn 'creating new ole storage object on non-writable io' unless @writeable @header = Header.new @bbat = AllocationTable::Big.new self @root = Dirent.new self, :dir @root.name = 'Root Entry' @dirents = [@root] @root.idx = 0 @root.children = [] # size shouldn't display for non-files @root.size = 0 @sb_file.close if @sb_file @sb_file = RangesIOResizeable.new @bbat, AllocationTable::EOC @sbat = AllocationTable::Small.new self # throw everything else the hell away @io.truncate 0 end # could be useful with mis-behaving ole documents. or to just clean them up. def repack temp=:file case temp when :file; Tempfile.open 'w+', &method(:repack_using_io) when :mem; StringIO.open(&method(:repack_using_io)) else raise "unknown temp backing #{temp.inspect}" end end def repack_using_io temp_io @io.rewind IO.copy @io, temp_io clear Storage.open temp_io do |temp_ole| temp_ole.root.type = :dir Dirent.copy temp_ole.root, root end end def bat_for_size size # note >=, not > previously. size >= @header.threshold ? @bbat : @sbat end def inspect "#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>" end # A class which wraps the ole header class Header < Struct.new( :magic, :clsid, :minor_ver, :major_ver, :byte_order, :b_shift, :s_shift, :reserved, :csectdir, :num_bat, :dirent_start, :transacting_signature, :threshold, :sbat_start, :num_sbat, :mbat_start, :num_mbat ) PACK = 'a8 a16 S2 a2 S2 a6 L3 a4 L5' SIZE = 0x4c # i have seen it pointed out that the first 4 bytes of hex, # 0xd0cf11e0, is supposed to spell out docfile. hmmm :) MAGIC = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" # expected value of Header#magic # what you get if creating new header from scratch. # AllocationTable::EOC isn't available yet. meh. EOC = 0xfffffffe DEFAULT = [ MAGIC, 0.chr * 16, 59, 3, "\xfe\xff", 9, 6, 0.chr * 6, 0, 1, EOC, 0.chr * 4, 4096, EOC, 0, EOC, 0 ] # 2 basic initializations, from scratch, or from a data string. # from scratch will be geared towards creating a new ole object def initialize *values super(*(values.empty? ? DEFAULT : values)) validate! end def self.load str Header.new(*str.unpack(PACK)) end def save to_a.pack PACK end def validate! raise "OLE2 signature is invalid" unless magic == MAGIC if num_bat == 0 or # is that valid for a completely empty file? # not sure about this one. basically to do max possible bat given size of mbat num_bat > 109 && num_bat > 109 + num_mbat * (1 << b_shift - 2) or # shouldn't need to use the mbat as there is enough space in the header block num_bat < 109 && num_mbat != 0 or # given the size of the header is 76, if b_shift <= 6, blocks address the header. s_shift > b_shift or b_shift <= 6 or b_shift >= 31 or # we only handle little endian byte_order != "\xfe\xff" raise "not valid OLE2 structured storage file" end # relaxed this, due to test-msg/qwerty_[1-3]*.msg they all had # 3 for this value. # transacting_signature != "\x00" * 4 or if threshold != 4096 or num_mbat == 0 && mbat_start != AllocationTable::EOC or reserved != "\x00" * 6 Log.warn "may not be a valid OLE2 structured storage file" end true end end # # +AllocationTable+'s hold the chains corresponding to files. Given # an initial index, AllocationTable#chain follows the chain, returning # the blocks that make up that file. # # There are 2 allocation tables, the bbat, and sbat, for big and small # blocks respectively. The block chain should be loaded using either # Storage#read_big_blocks or Storage#read_small_blocks # as appropriate. # # Whether or not big or small blocks are used for a file depends on # whether its size is over the Header#threshold level. # # An Ole::Storage document is serialized as a series of directory objects, # which are stored in blocks throughout the file. The blocks are either # big or small, and are accessed using the AllocationTable. # # The bbat allocation table's data is stored in the spare room in the header # block, and in extra blocks throughout the file as referenced by the meta # bat. That chain is linear, as there is no higher level table. # class AllocationTable # a free block (I don't currently leave any blocks free), although I do pad out # the allocation table with AVAIL to the block size. AVAIL = 0xffffffff EOC = 0xfffffffe # end of a chain # these blocks correspond to the bat, and aren't part of a file, nor available. # (I don't currently output these) BAT = 0xfffffffd META_BAT = 0xfffffffc attr_reader :ole, :io, :table, :block_size def initialize ole @ole = ole @table = [] end def load data @table = data.unpack('L*') end def truncated_table # this strips trailing AVAILs. come to think of it, this has the potential to break # bogus ole. if you terminate using AVAIL instead of EOC, like I did before. but that is # very broken. however, if a chain ends with AVAIL, it should probably be fixed to EOC # at load time. temp = @table.reverse not_avail = temp.find { |b| b != AVAIL } and temp = temp[temp.index(not_avail)..-1] temp.reverse end def save table = truncated_table #@table # pad it out some num = @ole.bbat.block_size / 4 # do you really use AVAIL? they probably extend past end of file, and may shortly # be used for the bat. not really good. table += [AVAIL] * (num - (table.length % num)) if (table.length % num) != 0 table.pack 'L*' end # rewriting this to be non-recursive. it broke on a large attachment # building up the chain, causing a stack error. need tail-call elimination... def chain start a = [] idx = start until idx >= META_BAT raise "broken allocationtable chain" if idx < 0 || idx > @table.length a << idx idx = @table[idx] end Log.warn "invalid chain terminator #{idx}" unless idx == EOC a end def ranges chain, size=nil chain = self.chain(chain) unless Array === chain blocks_to_ranges chain, size end # Turn a chain (an array given by +chain+) of big blocks, optionally # truncated to +size+, into an array of arrays describing the stretches of # bytes in the file that it belongs to. # # Big blocks are of size Ole::Storage::Header#b_size, and are stored # directly in the parent file. # truncate the chain if required # convert chain to ranges of the block size # truncate final range if required def blocks_to_ranges chain, size=nil chain = chain[0...(size.to_f / block_size).ceil] if size ranges = chain.map { |i| [block_size * i, block_size] } ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size ranges end # quick shortcut. chain can be either a head (in which case the table is used to # turn it into a chain), or a chain. it is converted to ranges, then to rangesio. # its not resizeable or migrateable. it probably could be resizeable though, using # self as the bat. but what would the first_block be? def open chain, size=nil io = RangesIO.new @io, ranges(chain, size) if block_given? begin yield io ensure; io.close end else io end end def read chain, size=nil open chain, size, &:read end # ---------------------- def get_free_block @table.each_index { |i| return i if @table[i] == AVAIL } @table.push AVAIL @table.length - 1 end # must return first_block def resize_chain first_block, size new_num_blocks = (size / block_size.to_f).ceil blocks = chain first_block old_num_blocks = blocks.length if new_num_blocks < old_num_blocks # de-allocate some of our old blocks. TODO maybe zero them out in the file??? (new_num_blocks...old_num_blocks).each { |i| @table[blocks[i]] = AVAIL } # if we have a chain, terminate it and return head, otherwise return EOC if new_num_blocks > 0 @table[blocks[new_num_blocks-1]] = EOC first_block else EOC end elsif new_num_blocks > old_num_blocks # need some more blocks. last_block = blocks.last (new_num_blocks - old_num_blocks).times do block = get_free_block # connect the chain. handle corner case of blocks being [] initially if last_block @table[last_block] = block else first_block = block end last_block = block # this is just to inhibit the problem where it gets picked as being a free block # again next time around. @table[last_block] = EOC end first_block else first_block end end class Big < AllocationTable def initialize(*args) super @block_size = 1 << @ole.header.b_shift @io = @ole.io end # Big blocks are kind of -1 based, in order to not clash with the header. def blocks_to_ranges blocks, size super blocks.map { |b| b + 1 }, size end end class Small < AllocationTable def initialize(*args) super @block_size = 1 << @ole.header.s_shift @io = @ole.sb_file end end end # like normal RangesIO, but Ole::Storage specific. the ranges are backed by an # AllocationTable, and can be resized. used for read/write to 2 streams: # 1. serialized dirent data # 2. sbat table data # 3. all dirents but through RangesIOMigrateable below # # Note that all internal access to first_block is through accessors, as it is sometimes # useful to redirect it. class RangesIOResizeable < RangesIO attr_reader :bat attr_accessor :first_block def initialize bat, first_block, size=nil @bat = bat self.first_block = first_block super @bat.io, @bat.ranges(first_block, size) end def truncate size # note that old_blocks is != @ranges.length necessarily. i'm planning to write a # merge_ranges function that merges sequential ranges into one as an optimization. self.first_block = @bat.resize_chain first_block, size @ranges = @bat.ranges first_block, size @pos = @size if @pos > size # don't know if this is required, but we explicitly request our @io to grow if necessary # we never shrink it though. maybe this belongs in allocationtable, where smarter decisions # can be made. # maybe its ok to just seek out there later?? max = @ranges.map { |pos, len| pos + len }.max || 0 @io.truncate max if max > @io.size @size = size end end # like RangesIOResizeable, but Ole::Storage::Dirent specific. provides for migration # between bats based on size, and updating the dirent, instead of the ole copy back # on close. class RangesIOMigrateable < RangesIOResizeable attr_reader :dirent def initialize dirent @dirent = dirent super @dirent.ole.bat_for_size(@dirent.size), @dirent.first_block, @dirent.size end def truncate size bat = @dirent.ole.bat_for_size size if bat != @bat # bat migration needed! we need to backup some data. the amount of data # should be <= @ole.header.threshold, so we can just hold it all in one buffer. # backup this pos = @pos @pos = 0 keep = read [@size, size].min # this does a normal truncate to 0, removing our presence from the old bat, and # rewrite the dirent's first_block super 0 @bat = bat # just change the underlying io from right under everyone :) @io = bat.io # important to do this now, before the write. as the below write will always # migrate us back to sbat! this will now allocate us +size+ in the new bat. super @pos = 0 write keep @pos = pos else super end # now just update the file @dirent.size = size end # forward this to the dirent def first_block @dirent.first_block end def first_block= val @dirent.first_block = val end end # # A class which wraps an ole directory entry. Can be either a directory # (Dirent#dir?) or a file (Dirent#file?) # # Most interaction with Ole::Storage is through this class. # The 2 most important functions are Dirent#children, and # Dirent#data. # # was considering separate classes for dirs and files. some methods/attrs only # applicable to one or the other. class Dirent MEMBERS = [ :name_utf16, :name_len, :type_id, :colour, :prev, :next, :child, :clsid, :flags, # dirs only :create_time_str, :modify_time_str, # files only :first_block, :size, :reserved ] PACK = 'a64 S C C L3 a16 L a8 a8 L2 a4' SIZE = 128 EPOCH = DateTime.parse '1601-01-01' TYPE_MAP = { # this is temporary 0 => :empty, 1 => :dir, 2 => :file, 5 => :root } COLOUR_MAP = { 0 => :red, 1 => :black } # used in the next / prev / child stuff to show that the tree ends here. # also used for first_block for directory. EOT = 0xffffffff # All +Dirent+ names are in UTF16, which we convert FROM_UTF16 = Iconv.new 'utf-8', 'utf-16le' TO_UTF16 = Iconv.new 'utf-16le', 'utf-8' include Enumerable attr_accessor :values # Dirent's should be created in 1 of 2 ways, either Dirent.new ole, [:dir/:file/:root], # or Dirent.load '... dirent data ...' # its a bit clunky, but thats how it is at the moment. you can assign to type, but # shouldn't. attr_accessor :idx # This returns all the children of this +Dirent+. It is filled in # when the tree structure is recreated. attr_accessor :children attr_reader :ole, :type, :create_time, :modify_time, :name def initialize ole, type @ole = ole # this isn't really good enough. need default values put in there. @values = [ 0.chr * 2, 2, 0, # will get overwritten 1, EOT, EOT, EOT, 0.chr * 16, 0, nil, nil, AllocationTable::EOC, 0, 0.chr * 4] # maybe check types here. @type = type @create_time = @modify_time = nil @children = [] if file? @create_time = Time.now @modify_time = Time.now end end def self.load ole, str # load should function without the need for the initializer. dirent = Dirent.allocate dirent.load ole, str dirent end def load ole, str @ole = ole @values = str.unpack PACK @name = FROM_UTF16.iconv name_utf16[0...name_len].sub(/\x00\x00$/, '') @type = TYPE_MAP[type_id] or raise "unknown type #{type_id.inspect}" if file? @create_time = Types.load_time create_time_str @modify_time = Types.load_time modify_time_str end end # only defined for files really. and the above children stuff is only for children. # maybe i should have some sort of File and Dir class, that subclass Dirents? a dirent # is just a data holder. # this can be used for write support if the underlying io object was opened for writing. # maybe take a mode string argument, and do truncation, append etc stuff. def open return nil unless file? io = RangesIOMigrateable.new self if block_given? begin yield io ensure; io.close end else io end end def read limit=nil open { |io| io.read limit } end def dir? # to count root as a dir. type != :file end def file? type == :file end def time # time is nil for streams, otherwise try to parse either of the time pairse (not # sure of their meaning - created / modified?) #@time ||= file? ? nil : (Dirent.parse_time(secs1, days1) || Dirent.parse_time(secs2, days2)) create_time || modify_time end def each(&block) @children.each(&block) end def [] idx return children[idx] if Integer === idx # path style look up. # maybe take another arg to allow creation? or leave that to the filesystem # add on. # not sure if '/' is a valid char in an Dirent#name, so no splitting etc at # this level. # also what about warning about multiple hits for the same name? children.find { |child| idx === child.name } end # solution for the above '/' thing for now. def / path self[path] end def to_tree if children and !children.empty? str = "- #{inspect}\n" children.each_with_index do |child, i| last = i == children.length - 1 child.to_tree.split(/\n/).each_with_index do |line, j| str << " #{last ? (j == 0 ? "\\" : ' ') : '|'}#{line}\n" end end str else "- #{inspect}\n" end end MEMBERS.each_with_index do |sym, i| define_method(sym) { @values[i] } define_method(sym.to_s + '=') { |val| @values[i] = val } end def to_a @values end # flattens the tree starting from here into +dirents+. note it modifies its argument. def flatten dirents=[] @idx = dirents.length dirents << self children.each { |child| child.flatten dirents } self.child = Dirent.flatten_helper children dirents end # i think making the tree structure optimized is actually more complex than this, and # requires some intelligent ordering of the children based on names, but as long as # it is valid its ok. # actually, i think its ok. gsf for example only outputs a singly-linked-list, where # prev is always EOT. def self.flatten_helper children return EOT if children.empty? i = children.length / 2 this = children[i] this.prev, this.next = [(0...i), (i+1..-1)].map { |r| flatten_helper children[r] } this.idx end attr_accessor :name, :type def save tmp = TO_UTF16.iconv(name) tmp = tmp[0, 62] if tmp.length > 62 tmp += 0.chr * 2 self.name_len = tmp.length self.name_utf16 = tmp + 0.chr * (64 - tmp.length) begin self.type_id = TYPE_MAP.to_a.find { |id, name| @type == name }.first rescue raise "unknown type #{type.inspect}" end # for the case of files, it is assumed that that was handled already # note not dir?, so as not to override root's first_block self.first_block = Dirent::EOT if type == :dir if 0 #file? #self.create_time_str = ?? #Types.load_time create_time_str #self.modify_time_str = ?? #Types.load_time modify_time_str else self.create_time_str = 0.chr * 8 self.modify_time_str = 0.chr * 8 end @values.pack PACK end def inspect str = "#' end # -------- # and for creation of a dirent. don't like the name. is it a file or a directory? # assign to type later? io will be empty. def new_child type child = Dirent.new ole, type children << child yield child if block_given? child end def delete child # remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone raise "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child # free our blocks child.open { |io| io.truncate 0 } end def self.copy src, dst # copies the contents of src to dst. must be the same type. this will throw an # error on copying to root. maybe this will recurse too much for big documents?? raise unless src.type == dst.type dst.name = src.name if src.dir? src.children.each do |src_child| dst.new_child(src_child.type) { |dst_child| Dirent.copy src_child, dst_child } end else src.open do |src_io| dst.open { |dst_io| IO.copy src_io, dst_io } end end end end end end if $0 == __FILE__ puts Ole::Storage.open(ARGV[0]) { |ole| ole.root.to_tree } end