#!/usr/bin/env ruby # # $Id$ # # Web Crawler. # # Author: Efrain Torres et [at] metasploit.com 2010 # # # openssl before rubygems mac os require 'msf/core' require 'openssl' require 'rubygems' require 'rinda/tuplespace' require 'pathname' require 'uri' class Metasploit3 < Msf::Auxiliary include Msf::Auxiliary::Scanner include Msf::Auxiliary::Report def initialize(info = {}) super(update_info(info, 'Name' => 'Metasploit Web Crawler', 'Version' => '$Revision$', 'Description' => 'This auxiliary module is a modular web crawler, to be used in conjuntion with wmap (someday) or standalone.', 'Author' => 'et', 'License' => MSF_LICENSE )) register_options([ OptString.new('PATH', [true, "Starting crawling path", '/']), OptBool.new('VERBOSE', [ true, "Verbose output", false ]), OptInt.new('RPORT', [true, "Remote port", 80 ]), ], self.class) register_advanced_options([ OptPath.new('CrawlerModulesDir', [true, 'The base directory containing the crawler modules', File.join(Msf::Config.install_root, "data", "msfcrawler") ]), OptBool.new('EnableUl', [ false, "Enable maximum number of request per URI", true ]), OptBool.new('StoreDB', [ false, "Store requests in database", false ]), OptInt.new('MaxUriLimit', [ true, "Number max. request per URI", 10]), OptInt.new('SleepTime', [ true, "Sleep time (secs) between requests", 0]), OptInt.new('TakeTimeout', [ true, "Timeout for loop ending", 15]), OptInt.new('ReadTimeout', [ true, "Read timeout (-1 forever)", 3]), OptInt.new('ThreadNum', [ true, "Threads number", 20]), OptString.new('DontCrawl', [true, "Filestypes not to crawl", '.exe,.zip,.tar,.bz2,.run,.asc,.gz']), ], self.class) end attr_accessor :ctarget, :cport, :cssl def run i, a = 0, [] self.ctarget = datastore['RHOSTS'] self.cport = datastore['RPORT'] self.cssl = datastore['SSL'] inipath = datastore['PATH'] cinipath = (inipath.nil? or inipath.empty?) ? '/' : inipath inireq = { 'rhost' => ctarget, 'rport' => cport, 'uri' => cinipath, 'method' => 'GET', 'ctype' => 'text/plain', 'ssl' => cssl, 'query' => nil, 'data' => nil } @NotViewedQueue = Rinda::TupleSpace.new @ViewedQueue = Hash.new @UriLimits = Hash.new @curent_site = self.ctarget insertnewpath(inireq) print_status("Loading modules: #{datastore['CrawlerModulesDir']}") load_modules(datastore['CrawlerModulesDir']) print_status("OK") if datastore['EnableUl'] print_status("URI LIMITS ENABLED: #{datastore['MaxUriLimit']} (Maximum number of requests per uri)") end print_status("Target: #{self.ctarget} Port: #{self.cport} Path: #{cinipath} SSL: #{self.cssl}") begin reqfilter = reqtemplate(self.ctarget,self.cport,self.cssl) i =0 loop do #### #if i <= datastore['ThreadNum'] # a.push(Thread.new { #### hashreq = @NotViewedQueue.take(reqfilter, datastore['TakeTimeout']) ul = false if @UriLimits.include?(hashreq['uri']) and datastore['EnableUl'] #puts "Request #{@UriLimits[hashreq['uri']]}/#{$maxurilimit} #{hashreq['uri']}" if @UriLimits[hashreq['uri']] >= datastore['MaxUriLimit'] #puts "URI LIMIT Reached: #{$maxurilimit} for uri #{hashreq['uri']}" ul = true end else @UriLimits[hashreq['uri']] = 0 end if !@ViewedQueue.include?(hashsig(hashreq)) and !ul @ViewedQueue[hashsig(hashreq)] = Time.now @UriLimits[hashreq['uri']] += 1 if !File.extname(hashreq['uri']).empty? and datastore['DontCrawl'].include? File.extname(hashreq['uri']) if datastore['VERBOSE'] print_status "URI not crawled #{hashreq['uri']}" end else prx = nil #if self.useproxy # prx = "HTTP:"+self.proxyhost.to_s+":"+self.proxyport.to_s #end c = Rex::Proto::Http::Client.new( self.ctarget, self.cport.to_i, {}, self.cssl, nil, prx ) sendreq(c,hashreq) end else if datastore['VERBOSE'] puts "#{hashreq['uri']} already visited. " end end #### #}) #i += 1 #else # sleep(0.01) and a.delete_if {|x| not x.alive?} while not a.empty? # i = 0 #end #### end rescue Rinda::RequestExpiredError puts "END." return end print_status("Finished crawling") end def reqtemplate(target,port,ssl) hreq = { 'rhost' => target, 'rport' => port, 'uri' => nil, 'method' => nil, 'ctype' => nil, 'ssl' => ssl, 'query' => nil, 'data' => nil } return hreq end def storedb(hashreq,response,dbpath) info = { :web_site => @current_site, :path => hashreq['uri'], :query => hashreq['query'], :data => hashreq['data'], :code => response['code'], :body => response['body'], :headers => response['headers'] } #if response['content-type'] # info[:ctype] = response['content-type'][0] #end #if response['set-cookie'] # info[:cookie] = page.headers['set-cookie'].join("\n") #end #if page.headers['authorization'] # info[:auth] = page.headers['authorization'].join("\n") #end #if page.headers['location'] # info[:location] = page.headers['location'][0] #end #if page.headers['last-modified'] # info[:mtime] = page.headers['last-modified'][0] #end # Report the web page to the database report_web_page(info) end # # Modified version of load_protocols from psnuffle by Max Moser # def load_modules(crawlermodulesdir) base = crawlermodulesdir if (not File.directory?(base)) raise RuntimeError,"The Crawler modules parameter is set to an invalid directory" end @crawlermodules = {} cmodules = Dir.new(base).entries.grep(/\.rb$/).sort cmodules.each do |n| f = File.join(base, n) m = ::Module.new begin m.module_eval(File.read(f, File.size(f))) m.constants.grep(/^Crawler(.*)/) do cmod = $1 klass = m.const_get("Crawler#{cmod}") @crawlermodules[cmod.downcase] = klass.new(self) print_status("Loaded crawler module #{cmod} from #{f}...") end rescue ::Exception => e print_error("Crawler module #{n} failed to load: #{e.class} #{e} #{e.backtrace}") end end end def sendreq(nclient,reqopts={}) begin r = nclient.request_raw(reqopts) resp = nclient.send_recv(r, datastore['ReadTimeout']) if resp # # Quickfix for bug packet.rb to_s line: 190 # In case modules or crawler calls to_s on de-chunked responses # resp.transfer_chunked = false if resp['Set-Cookie'] #puts "Set Cookie: #{resp['Set-Cookie']}" #puts "Storing in cookie jar for host:port #{reqopts['rhost']}:#{reqopts['rport']}" #$cookiejar["#{reqopts['rhost']}:#{reqopts['rport']}"] = resp['Set-Cookie'] end if datastore['StoreDB'] storedb(reqopts,resp,$dbpathmsf) end print_status ">> [#{resp.code}] #{reqopts['uri']}" if reqopts['query'] and !reqopts['query'].empty? print_status ">>> [Q] #{reqopts['query']}" end if reqopts['data'] print_status ">>> [D] #{reqopts['data']}" end case resp.code when 200 @crawlermodules.each_key do |k| @crawlermodules[k].parse(reqopts,resp) end when 301..303 puts "[#{resp.code}] Redirection to: #{resp['Location']}" if advopts['VERBOSE'] print_status urltohash('GET',resp['Location'],reqopts['uri'],nil) end insertnewpath(urltohash('GET',resp['Location'],reqopts['uri'],nil)) when 404 print_status "[404] Invalid link #{reqopts['uri']}" else print_status "Unhandled #{resp.code}" end else print_status "No response" end sleep(datastore['SleepTime']) rescue print_status "ERROR" if datastore['VERBOSE'] print_status "#{$!}: #{$!.backtrace}" end end end # # Add new path (uri) to test non-viewed queue # def insertnewpath(hashreq) hashreq['uri'] = canonicalize(hashreq['uri']) if hashreq['rhost'] == datastore['RHOSTS'] and hashreq['rport'] == datastore['RPORT'] if !@ViewedQueue.include?(hashsig(hashreq)) if @NotViewedQueue.read_all(hashreq).size > 0 if datastore['VERBOSE'] print_status "Already in queue to be viewed: #{hashreq['uri']}" end else if datastore['VERBOSE'] print_status "Inserted: #{hashreq['uri']}" end @NotViewedQueue.write(hashreq) end else if datastore['VERBOSE'] print_status "#{hashreq['uri']} already visited at #{@ViewedQueue[hashsig(hashreq)]}" end end end end # # Build a new hash for a local path # def urltohash(m,url,basepath,dat) # m: method # url: uri?[query] # basepath: base path/uri to determine absolute path when relative # data: body data, nil if GET and query = uri.query uri = URI.parse(url) uritargetssl = (uri.scheme == "https") ? true : false uritargethost = uri.host if (uri.host.nil? or uri.host.empty?) uritargethost = self.ctarget uritargetssl = self.cssl end uritargetport = uri.port if (uri.port.nil?) uritargetport = self.cport end uritargetpath = uri.path if (uri.path.nil? or uri.path.empty?) uritargetpath = "/" end newp = Pathname.new(uritargetpath) oldp = Pathname.new(basepath) if !newp.absolute? if oldp.to_s[-1,1] == '/' newp = oldp+newp else if !newp.to_s.empty? newp = File.join(oldp.dirname,newp) end end end hashreq = { 'rhost' => uritargethost, 'rport' => uritargetport, 'uri' => newp.to_s, 'method' => m, 'ctype' => 'text/plain', 'ssl' => uritargetssl, 'query' => uri.query, 'data' => nil } if m == 'GET' and !dat.nil? hashreq['query'] = dat else hashreq['data'] = dat end return hashreq end # Taken from http://www.ruby-forum.com/topic/140101 by Rob Biedenharn def canonicalize(uri) u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s) u.normalize! newpath = u.path while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match| $1 == '..' ? match : '' } do end newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/') u.path = newpath # Ugly fix u.path = u.path.gsub("\/..\/","\/") u.to_s end def hashsig(hashreq) hashreq.to_s end end class BaseParser attr_accessor :crawler def initialize(c) self.crawler = c end def parse(request,result) nil end # # Add new path (uri) to test hash queue # def insertnewpath(hashreq) self.crawler.insertnewpath(hashreq) end def hashsig(hashreq) self.crawler.hashsig(hashreq) end def urltohash(m,url,basepath,dat) self.crawler.urltohash(m,url,basepath,dat) end def targetssl self.crawler.cssl end def targetport self.crawler.cport end def targethost self.crawler.ctarget end def targetinipath self.crawler.cinipath end end