#!/usr/bin/env ruby # Copyright (C) 2008 Rapid7, Inc. # # This script extracts the forms from the main page of each # web site in a list. The output of this can be used with # Metasploit (and other tools) to obtain the saved form data # of these domains. # require 'rubygems' # install rubygems require 'hpricot' # gem install hpricot require 'uri' require 'timeout' def usage $stderr.puts "#{$0} [site list] [output-dir]" exit(0) end sitelist = ARGV.shift() || usage() output = ARGV.shift() || usage() File.readlines(sitelist).each do |site| site.strip! next if site.length == 0 next if site =~ /^#/ out = File.join(output, site + ".txt") File.unlink(out) if File.exist?(out) fd = File.open(out, "a") ["", "www."].each do |prefix| begin Timeout.timeout(10) do doc = Hpricot(URI.parse("http://#{prefix}#{site}/").open) doc.search("//form").each do |form| # Extract the form res = " " # Strip out the value form.search("//input") do |inp| inp.attributes.keys.each do |ikey| if (ikey.downcase == "value") inp[ikey] = "" next end if(inp.attributes[ikey] =~ /^http/i) inp[ikey] = "" next end end res << inp.to_html end res << "" fd.write(res) end end break rescue ::Timeout::Error $stderr.puts "#{prefix}#{site} timed out" rescue ::Interrupt raise $! rescue ::Exception => e $stderr.puts "#{prefix}#{site} #{e.class} #{e}" end end fd.close File.unlink(out) if (File.size(out) == 0) end