require 'rexml/document' module Rex module Parser # # Stream parser for nmap -oX xml output # # Yields a hash representing each host found in the xml stream. Each host # will look something like the following: # { # "status" => "up", # "addrs" => { "ipv4" => "192.168.0.1", "mac" => "00:0d:87:a1:df:72" }, # "ports" => [ # { "portid" => "22", "state" => "closed", ... }, # { "portid" => "80", "state" => "open", ... }, # ... # ] # } # # Usage: # # parser = NmapXMLStreamParser.new { |host| # # do stuff with the host # } # REXML::Document.parse_stream(File.new(nmap_xml), parser) # # -- or -- # # parser = NmapXMLStreamParser.new # parser.on_found_host = Proc.new { |host| # # do stuff with the host # } # REXML::Document.parse_stream(File.new(nmap_xml), parser) # # # This parser does not maintain state as well as a tree parser, so malformed # xml will trip it up. Nmap shouldn't ever output malformed xml, so it's not # a big deal. # class NmapXMLStreamParser attr_accessor :on_found_host def initialize(&block) reset_state on_found_host = block if block end def reset_state @host = { "status" => nil, "addrs" => {}, "ports" => [] } end def tag_start(name, attributes) case name when "address" @host["addrs"][attributes["addrtype"]] = attributes["addr"] if (attributes["addrtype"] =~ /ipv[46]/) @host["addr"] = attributes["addr"] end when "osclass" @host["os_vendor"] = attributes["vendor"] @host["os_family"] = attributes["osfamily"] @host["os_version"] = attributes["osgen"] @host["os_accuracy"] = attributes["accuracy"] when "osmatch" if(attributes["accuracy"].to_i == 100) @host["os_match"] = attributes["name"] end when "uptime" @host["last_boot"] = attributes["lastboot"] when "hostname" if(attributes["type"] == "PTR") @host["reverse_dns"] = attributes["name"] end when "status" # refers to the liveness of the host; values are "up" or "down" @host["status"] = attributes["state"] @host["status_reason"] = attributes["reason"] when "port" @host["ports"].push(attributes) when "state" # refers to the state of a port; values are "open", "closed", or "filtered" @host["ports"].last["state"] = attributes["state"] when "service" # Store any service and script info with the associated port. There shouldn't # be any collisions on attribute names here, so just merge them. @host["ports"].last.merge!(attributes) when "script" @host["ports"].last["scripts"] ||= {} @host["ports"].last["scripts"][attributes["id"]] = attributes["output"] when "trace" @host["trace"] = {"port" => attributes["port"], "proto" => attributes["proto"], "hops" => [] } when "hop" if @host["trace"] @host["trace"]["hops"].push(attributes) end end end def tag_end(name) case name when "host" on_found_host.call(@host) if on_found_host reset_state end end # We don't need these methods, but they're necessary to keep REXML happy def text(str); end def xmldecl(version, encoding, standalone); end def cdata; end def comment(str); end def instruction(name, instruction); end def attlist; end end end end