metasploit-framework/modules/auxiliary/crawler/msfcrawler.rb

#!/usr/bin/env ruby
#
# $Id: msfcrawler.rb 9525 2010-06-15 07:18:08Z et $
#
# Web Crawler.
#
# Author:  Efrain Torres   et [at] metasploit.com 2010
#
#

# openssl before rubygems mac os
require 'msf/core'
require 'openssl'
require 'rubygems'
require 'rinda/tuplespace'
require 'pathname'
require 'uri'

class Metasploit3 < Msf::Auxiliary

	include Msf::Auxiliary::Scanner
	include Msf::Auxiliary::Report

	def initialize
		super(
			'Name'			=> 'Metasploit Web Crawler',
			'Version'           => '$Revision: 9929 $',
			'Description'       => 'This auxiliary module is a modular web crawler, to be used in conjuntion with wmap (someday) or standalone.',
			'Author'			=> 'et',
			'License'			=> MSF_LICENSE,
		)

		register_options([
			OptString.new('PATH',	[true,	"Starting crawling path", '/']),
			OptBool.new('VERBOSE', [ true, "Verbose output", false ]),
			OptInt.new('RPORT', [true, "Remote port", 80 ]),
		], self.class)

		register_advanced_options([
			OptPath.new('CrawlerModulesDir', [true,	'The base directory containing the crawler modules',
				File.join(Msf::Config.install_root, "data", "msfcrawler")
			]),
			OptBool.new('EnableUl', [ false, "Enable maximum number of request per URI", true ]),
			OptBool.new('StoreDB', [ false, "Store requests in database", false ]),
			OptInt.new('MaxUriLimit', [ true, "Number max. request per URI", 10]),
			OptInt.new('SleepTime', [ true, "Sleep time (secs) between requests", 0]),
			OptInt.new('TakeTimeout', [ true, "Timeout for loop ending", 15]),
			OptInt.new('ReadTimeout', [ true, "Read timeout (-1 forever)", 3]),
			OptInt.new('ThreadNum', [ true, "Threads number", 20]),
			OptString.new('DontCrawl',	[true,	"Filestypes not to crawl", '.exe,.zip,.tar,.bz2,.run,.asc,.gz']),
		], self.class)
	end

	attr_accessor :ctarget, :cport, :cssl

	def run
		i, a = 0, []
		
		self.ctarget = datastore['RHOSTS']
		self.cport = datastore['RPORT']
		self.cssl = datastore['SSL']
		inipath = datastore['PATH']
		
		cinipath = (inipath.nil? or inipath.empty?) ? '/' : inipath

		inireq = {
				'rhost'		=> ctarget,
				'rport'		=> cport,
				'uri' 		=> cinipath,
				'method'   	=> 'GET',
				'ctype'		=> 'text/plain',
				'ssl'		=> cssl,
				'query'		=> nil,
				'data'		=> nil
		}

		@NotViewedQueue = Rinda::TupleSpace.new
		@ViewedQueue = Hash.new
		@UriLimits = Hash.new
		@curent_site = self.ctarget

		insertnewpath(inireq)

		print_status("Loading modules: #{datastore['CrawlerModulesDir']}")
		load_modules(datastore['CrawlerModulesDir'])
		print_status("OK")
				
		if datastore['EnableUl']
			print_status("URI LIMITS ENABLED: #{datastore['MaxUriLimit']} (Maximum number of requests per uri)")
		end

		print_status("Target: #{self.ctarget} Port: #{self.cport} Path: #{cinipath} SSL: #{self.cssl}")
		
		
		begin
			reqfilter = reqtemplate(self.ctarget,self.cport,self.cssl)

			i =0

			loop do

				####
				#if i <= datastore['ThreadNum']
				#	a.push(Thread.new {
				####

				hashreq = @NotViewedQueue.take(reqfilter, datastore['TakeTimeout'])

				ul = false
				if @UriLimits.include?(hashreq['uri']) and datastore['EnableUl']
					#puts "Request #{@UriLimits[hashreq['uri']]}/#{$maxurilimit} #{hashreq['uri']}"
					if @UriLimits[hashreq['uri']] >= datastore['MaxUriLimit']
						#puts "URI LIMIT Reached: #{$maxurilimit} for uri #{hashreq['uri']}"
						ul = true
					end
 				else
					@UriLimits[hashreq['uri']] = 0
				end

				if !@ViewedQueue.include?(hashsig(hashreq)) and !ul

					@ViewedQueue[hashsig(hashreq)] = Time.now
					@UriLimits[hashreq['uri']] += 1

					if !File.extname(hashreq['uri']).empty? and datastore['DontCrawl'].include? File.extname(hashreq['uri'])
						if datastore['VERBOSE']
							print_status "URI not crawled #{hashreq['uri']}"
						end
					else
							prx = nil
							#if self.useproxy
							#	prx = "HTTP:"+self.proxyhost.to_s+":"+self.proxyport.to_s
							#end

							c = Rex::Proto::Http::Client.new(
								self.ctarget,
								self.cport.to_i,
								{},
								self.cssl,
								nil,
								prx
							)

							sendreq(c,hashreq)
					end
				else
					if datastore['VERBOSE']
						puts "#{hashreq['uri']} already visited. "
					end
				end

				####
				#})

				#i += 1
				#else
				#	sleep(0.01) and a.delete_if {|x| not x.alive?} while not a.empty?
				#	i = 0
				#end
				####

			end
		rescue Rinda::RequestExpiredError
			puts "END."
			return
		end

		print_status("Finished crawling")
	end
		
	def reqtemplate(target,port,ssl)
		hreq = {
			'rhost'		=> target,
			'rport'		=> port,
			'uri'  		=> nil,
			'method'   	=> nil,
			'ctype'		=> nil,
			'ssl'		=> ssl,
			'query'		=> nil,
			'data'		=> nil
		}

		return hreq
	end

	def storedb(hashreq,response,dbpath)
		
		info = { 
			:web_site => @current_site,
			:path     => hashreq['uri'],
			:query    => hashreq['query'],
			:data	=> hashreq['data'],	
			:code     => response['code'],
			:body     => response['body'],
			:headers  => response['headers']
		}

		#if response['content-type']
		#	info[:ctype] = response['content-type'][0]
		#end
		
		#if response['set-cookie']
		#	info[:cookie] = page.headers['set-cookie'].join("\n")
		#end

		#if page.headers['authorization']
		#	info[:auth] = page.headers['authorization'].join("\n")
		#end

		#if page.headers['location']
		#	info[:location] = page.headers['location'][0]
		#end
		
		#if page.headers['last-modified']
		#	info[:mtime] = page.headers['last-modified'][0]
		#end
									
		# Report the web page to the database
		report_web_page(info)
	end

	#
	# Modified version of load_protocols from psnuffle by Max Moser  <mmo@remote-exploit.org>
	#

	def load_modules(crawlermodulesdir)

		base = crawlermodulesdir
		if (not File.directory?(base))
			raise RuntimeError,"The Crawler modules parameter is set to an invalid directory"
		end

		@crawlermodules = {}
		cmodules = Dir.new(base).entries.grep(/\.rb$/).sort
		cmodules.each do |n|
			f = File.join(base, n)
			m = ::Module.new
			begin
				m.module_eval(File.read(f, File.size(f)))
				m.constants.grep(/^Crawler(.*)/) do
					cmod = $1
					klass = m.const_get("Crawler#{cmod}")
					@crawlermodules[cmod.downcase] = klass.new(self)

					print_status("Loaded crawler module #{cmod} from #{f}...")
				end
			rescue ::Exception => e
				print_error("Crawler module #{n} failed to load: #{e.class} #{e} #{e.backtrace}")
			end
		end
	end

	def sendreq(nclient,reqopts={})

		begin
			r = nclient.request_raw(reqopts)
			resp = nclient.send_recv(r, datastore['ReadTimeout'])
			while(resp and resp.code == 100)
				resp = nclient.reread_response(resp, datastore['ReadTimeout'])
			end

			if resp
				#
				# Quickfix for bug packet.rb to_s line: 190
				# In case modules or crawler calls to_s on de-chunked responses
				#
				resp.transfer_chunked = false
				if resp['Set-Cookie']
					#puts "Set Cookie: #{resp['Set-Cookie']}"
					#puts "Storing in cookie jar for host:port #{reqopts['rhost']}:#{reqopts['rport']}"
					#$cookiejar["#{reqopts['rhost']}:#{reqopts['rport']}"] = resp['Set-Cookie']
				end

				if datastore['StoreDB']
					storedb(reqopts,resp,$dbpathmsf)
				end

				print_status ">> [#{resp.code}] #{reqopts['uri']}"

				if reqopts['query'] and !reqopts['query'].empty?
					print_status ">>> [Q] #{reqopts['query']}"
				end

				if reqopts['data']
					print_status ">>> [D] #{reqopts['data']}"
				end

				case resp.code
				when 200
					@crawlermodules.each_key do |k|
						@crawlermodules[k].parse(reqopts,resp)
					end
				when 301..303
					puts "[#{resp.code}] Redirection to: #{resp['Location']}"
					if advopts['VERBOSE']
						print_status urltohash('GET',resp['Location'],reqopts['uri'],nil)
					end
					insertnewpath(urltohash('GET',resp['Location'],reqopts['uri'],nil))
				when 404
					print_status "[404] Invalid link #{reqopts['uri']}"
				else
					print_status "Unhandled #{resp.code}"
				end

			else
				print_status "No response"
			end
			sleep(datastore['SleepTime'])
		rescue
			print_status "ERROR"
			if datastore['VERBOSE']
				print_status "#{$!}: #{$!.backtrace}"
			end
		end
	end

	#
	# Add new path (uri) to test non-viewed queue
	#

	def insertnewpath(hashreq)

		hashreq['uri'] = canonicalize(hashreq['uri'])

		if hashreq['rhost'] == datastore['RHOSTS'] and hashreq['rport'] == datastore['RPORT']
			if !@ViewedQueue.include?(hashsig(hashreq))
				if @NotViewedQueue.read_all(hashreq).size > 0
					if datastore['VERBOSE']
						print_status "Already in queue to be viewed: #{hashreq['uri']}"
					end
				else
					if datastore['VERBOSE']
						print_status "Inserted: #{hashreq['uri']}"
					end

					@NotViewedQueue.write(hashreq)
				end
			else
				if datastore['VERBOSE']
					print_status "#{hashreq['uri']} already visited at #{@ViewedQueue[hashsig(hashreq)]}"
				end
			end
		end
	end

	#
	# Build a new hash for a local path
	#

	def urltohash(m,url,basepath,dat)

			# m:   method
			# url: uri?[query]
			# basepath: base path/uri to determine absolute path when relative
			# data: body data, nil if GET and query = uri.query

			uri = URI.parse(url)
			uritargetssl = (uri.scheme == "https") ? true : false

			uritargethost = uri.host
			if (uri.host.nil? or uri.host.empty?)
				uritargethost = self.ctarget
				uritargetssl = self.cssl
			end

			uritargetport = uri.port
			if (uri.port.nil?)
				uritargetport = self.cport
			end

			uritargetpath = uri.path
			if (uri.path.nil? or uri.path.empty?)
				uritargetpath = "/"
			end

			newp = Pathname.new(uritargetpath)
			oldp = Pathname.new(basepath)
			if !newp.absolute?
				if oldp.to_s[-1,1] == '/'
					newp = oldp+newp
				else
					if !newp.to_s.empty?
						newp = File.join(oldp.dirname,newp)
					end
				end
			end

			hashreq = {
				'rhost'		=> uritargethost,
				'rport'		=> uritargetport,
				'uri' 		=> newp.to_s,
				'method'   	=> m,
				'ctype'		=> 'text/plain',
				'ssl'		=> uritargetssl,
				'query'		=> uri.query,
				'data'		=> nil
			}

			if m == 'GET' and !dat.nil?
				hashreq['query'] = dat
			else
				hashreq['data'] = dat
			end

			return hashreq
	end

	# Taken from http://www.ruby-forum.com/topic/140101 by  Rob Biedenharn
	def canonicalize(uri)

   		u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
   		u.normalize!
   		newpath = u.path
   		while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
              		$1 == '..' ? match : ''
            	} do end
   		newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
   		u.path = newpath
		# Ugly fix
		u.path = u.path.gsub("\/..\/","\/")
   		u.to_s
	end

	def hashsig(hashreq)
		hashreq.to_s
	end
		
end

class BaseParser
	attr_accessor :crawler

	def initialize(c)
		self.crawler = c
	end

	def parse(request,result)
		nil
	end

	#
	# Add new path (uri) to test hash queue
	#
	def insertnewpath(hashreq)
		self.crawler.insertnewpath(hashreq)
	end

	def hashsig(hashreq)
		self.crawler.hashsig(hashreq)
	end

	def urltohash(m,url,basepath,dat)
		self.crawler.urltohash(m,url,basepath,dat)
	end

	def targetssl
		self.crawler.cssl
	end

	def targetport
		self.crawler.cport
	end

	def targethost
		self.crawler.ctarget
	end

	def targetinipath
		self.crawler.cinipath
	end
end
first try on modular crawling git-svn-id: file:///home/svn/framework3/trunk@10915 4d416f70-5f16-0410-b530-b9f4589650da 2010-11-05 04:00:49 +00:00			`#!/usr/bin/env ruby`
			`#`
			`# $Id: msfcrawler.rb 9525 2010-06-15 07:18:08Z et $`
			`#`
			`# Web Crawler.`
			`#`
			`# Author: Efrain Torres et [at] metasploit.com 2010`
			`#`
			`#`

			`# openssl before rubygems mac os`
			`require 'msf/core'`
			`require 'openssl'`
			`require 'rubygems'`
			`require 'rinda/tuplespace'`
			`require 'pathname'`
			`require 'uri'`

			`class Metasploit3 < Msf::Auxiliary`

			`include Msf::Auxiliary::Scanner`
			`include Msf::Auxiliary::Report`

			`def initialize`
			`super(`
			`'Name' => 'Metasploit Web Crawler',`
			`'Version' => '$Revision: 9929 $',`
			`'Description' => 'This auxiliary module is a modular web crawler, to be used in conjuntion with wmap (someday) or standalone.',`
			`'Author' => 'et',`
			`'License' => MSF_LICENSE,`
			`)`

			`register_options([`
			`OptString.new('PATH', [true, "Starting crawling path", '/']),`
			`OptBool.new('VERBOSE', [ true, "Verbose output", false ]),`
			`OptInt.new('RPORT', [true, "Remote port", 80 ]),`
			`], self.class)`

			`register_advanced_options([`
			`OptPath.new('CrawlerModulesDir', [true, 'The base directory containing the crawler modules',`
			`File.join(Msf::Config.install_root, "data", "msfcrawler")`
			`]),`
			`OptBool.new('EnableUl', [ false, "Enable maximum number of request per URI", true ]),`
			`OptBool.new('StoreDB', [ false, "Store requests in database", false ]),`
			`OptInt.new('MaxUriLimit', [ true, "Number max. request per URI", 10]),`
			`OptInt.new('SleepTime', [ true, "Sleep time (secs) between requests", 0]),`
			`OptInt.new('TakeTimeout', [ true, "Timeout for loop ending", 15]),`
			`OptInt.new('ReadTimeout', [ true, "Read timeout (-1 forever)", 3]),`
			`OptInt.new('ThreadNum', [ true, "Threads number", 20]),`
			`OptString.new('DontCrawl', [true, "Filestypes not to crawl", '.exe,.zip,.tar,.bz2,.run,.asc,.gz']),`
			`], self.class)`
			`end`

			`attr_accessor :ctarget, :cport, :cssl`

			`def run`
			`i, a = 0, []`

			`self.ctarget = datastore['RHOSTS']`
			`self.cport = datastore['RPORT']`
			`self.cssl = datastore['SSL']`
			`inipath = datastore['PATH']`

			`cinipath = (inipath.nil? or inipath.empty?) ? '/' : inipath`

			`inireq = {`
			`'rhost' => ctarget,`
			`'rport' => cport,`
			`'uri' => cinipath,`
			`'method' => 'GET',`
			`'ctype' => 'text/plain',`
			`'ssl' => cssl,`
			`'query' => nil,`
			`'data' => nil`
			`}`

			`@NotViewedQueue = Rinda::TupleSpace.new`
			`@ViewedQueue = Hash.new`
			`@UriLimits = Hash.new`
			`@curent_site = self.ctarget`

			`insertnewpath(inireq)`

			`print_status("Loading modules: #{datastore['CrawlerModulesDir']}")`
			`load_modules(datastore['CrawlerModulesDir'])`
			`print_status("OK")`

			`if datastore['EnableUl']`
			`print_status("URI LIMITS ENABLED: #{datastore['MaxUriLimit']} (Maximum number of requests per uri)")`
			`end`

			`print_status("Target: #{self.ctarget} Port: #{self.cport} Path: #{cinipath} SSL: #{self.cssl}")`


			`begin`
			`reqfilter = reqtemplate(self.ctarget,self.cport,self.cssl)`

			`i =0`

			`loop do`

			`####`
			`#if i <= datastore['ThreadNum']`
			`# a.push(Thread.new {`
			`####`

			`hashreq = @NotViewedQueue.take(reqfilter, datastore['TakeTimeout'])`

			`ul = false`
			`if @UriLimits.include?(hashreq['uri']) and datastore['EnableUl']`
			`#puts "Request #{@UriLimits[hashreq['uri']]}/#{$maxurilimit} #{hashreq['uri']}"`
			`if @UriLimits[hashreq['uri']] >= datastore['MaxUriLimit']`
			`#puts "URI LIMIT Reached: #{$maxurilimit} for uri #{hashreq['uri']}"`
			`ul = true`
			`end`
			`else`
			`@UriLimits[hashreq['uri']] = 0`
			`end`

			`if !@ViewedQueue.include?(hashsig(hashreq)) and !ul`

			`@ViewedQueue[hashsig(hashreq)] = Time.now`
			`@UriLimits[hashreq['uri']] += 1`

			`if !File.extname(hashreq['uri']).empty? and datastore['DontCrawl'].include? File.extname(hashreq['uri'])`
			`if datastore['VERBOSE']`
			`print_status "URI not crawled #{hashreq['uri']}"`
			`end`
			`else`
			`prx = nil`
			`#if self.useproxy`
			`# prx = "HTTP:"+self.proxyhost.to_s+":"+self.proxyport.to_s`
			`#end`

			`c = Rex::Proto::Http::Client.new(`
			`self.ctarget,`
			`self.cport.to_i,`
			`{},`
			`self.cssl,`
			`nil,`
			`prx`
			`)`

			`sendreq(c,hashreq)`
			`end`
			`else`
			`if datastore['VERBOSE']`
			`puts "#{hashreq['uri']} already visited. "`
			`end`
			`end`

			`####`
			`#})`

			`#i += 1`
			`#else`
			`# sleep(0.01) and a.delete_if {\|x\| not x.alive?} while not a.empty?`
			`# i = 0`
			`#end`
			`####`

			`end`
			`rescue Rinda::RequestExpiredError`
			`puts "END."`
			`return`
			`end`

			`print_status("Finished crawling")`
			`end`

			`def reqtemplate(target,port,ssl)`
			`hreq = {`
			`'rhost' => target,`
			`'rport' => port,`
			`'uri' => nil,`
			`'method' => nil,`
			`'ctype' => nil,`
			`'ssl' => ssl,`
			`'query' => nil,`
			`'data' => nil`
			`}`

			`return hreq`
			`end`

			`def storedb(hashreq,response,dbpath)`

			`info = {`
			`:web_site => @current_site,`
			`:path => hashreq['uri'],`
			`:query => hashreq['query'],`
			`:data => hashreq['data'],`
			`:code => response['code'],`
			`:body => response['body'],`
			`:headers => response['headers']`
			`}`

			`#if response['content-type']`
			`# info[:ctype] = response['content-type'][0]`
			`#end`

			`#if response['set-cookie']`
			`# info[:cookie] = page.headers['set-cookie'].join("\n")`
			`#end`

			`#if page.headers['authorization']`
			`# info[:auth] = page.headers['authorization'].join("\n")`
			`#end`

			`#if page.headers['location']`
			`# info[:location] = page.headers['location'][0]`
			`#end`

			`#if page.headers['last-modified']`
			`# info[:mtime] = page.headers['last-modified'][0]`
			`#end`

			`# Report the web page to the database`
			`report_web_page(info)`
			`end`

			`#`
			`# Modified version of load_protocols from psnuffle by Max Moser <mmo@remote-exploit.org>`
			`#`

			`def load_modules(crawlermodulesdir)`

			`base = crawlermodulesdir`
			`if (not File.directory?(base))`
			`raise RuntimeError,"The Crawler modules parameter is set to an invalid directory"`
			`end`

			`@crawlermodules = {}`
			`cmodules = Dir.new(base).entries.grep(/\.rb$/).sort`
			`cmodules.each do \|n\|`
			`f = File.join(base, n)`
			`m = ::Module.new`
			`begin`
			`m.module_eval(File.read(f, File.size(f)))`
			`m.constants.grep(/^Crawler(.*)/) do`
			`cmod = $1`
			`klass = m.const_get("Crawler#{cmod}")`
			`@crawlermodules[cmod.downcase] = klass.new(self)`

			`print_status("Loaded crawler module #{cmod} from #{f}...")`
			`end`
			`rescue ::Exception => e`
			`print_error("Crawler module #{n} failed to load: #{e.class} #{e} #{e.backtrace}")`
			`end`
			`end`
			`end`

			`def sendreq(nclient,reqopts={})`

			`begin`
			`r = nclient.request_raw(reqopts)`
			`resp = nclient.send_recv(r, datastore['ReadTimeout'])`
			`while(resp and resp.code == 100)`
			`resp = nclient.reread_response(resp, datastore['ReadTimeout'])`
			`end`

			`if resp`
			`#`
			`# Quickfix for bug packet.rb to_s line: 190`
			`# In case modules or crawler calls to_s on de-chunked responses`
			`#`
			`resp.transfer_chunked = false`
			`if resp['Set-Cookie']`
			`#puts "Set Cookie: #{resp['Set-Cookie']}"`
			`#puts "Storing in cookie jar for host:port #{reqopts['rhost']}:#{reqopts['rport']}"`
			`#$cookiejar["#{reqopts['rhost']}:#{reqopts['rport']}"] = resp['Set-Cookie']`
			`end`

			`if datastore['StoreDB']`
			`storedb(reqopts,resp,$dbpathmsf)`
			`end`

			`print_status ">> [#{resp.code}] #{reqopts['uri']}"`

			`if reqopts['query'] and !reqopts['query'].empty?`
			`print_status ">>> [Q] #{reqopts['query']}"`
			`end`

			`if reqopts['data']`
			`print_status ">>> [D] #{reqopts['data']}"`
			`end`

			`case resp.code`
			`when 200`
			`@crawlermodules.each_key do \|k\|`
			`@crawlermodules[k].parse(reqopts,resp)`
			`end`
			`when 301..303`
			`puts "[#{resp.code}] Redirection to: #{resp['Location']}"`
			`if advopts['VERBOSE']`
			`print_status urltohash('GET',resp['Location'],reqopts['uri'],nil)`
			`end`
			`insertnewpath(urltohash('GET',resp['Location'],reqopts['uri'],nil))`
			`when 404`
			`print_status "[404] Invalid link #{reqopts['uri']}"`
			`else`
			`print_status "Unhandled #{resp.code}"`
			`end`

			`else`
			`print_status "No response"`
			`end`
			`sleep(datastore['SleepTime'])`
			`rescue`
			`print_status "ERROR"`
			`if datastore['VERBOSE']`
			`print_status "#{$!}: #{$!.backtrace}"`
			`end`
			`end`
			`end`

			`#`
			`# Add new path (uri) to test non-viewed queue`
			`#`

			`def insertnewpath(hashreq)`

			`hashreq['uri'] = canonicalize(hashreq['uri'])`

			`if hashreq['rhost'] == datastore['RHOSTS'] and hashreq['rport'] == datastore['RPORT']`
			`if !@ViewedQueue.include?(hashsig(hashreq))`
			`if @NotViewedQueue.read_all(hashreq).size > 0`
			`if datastore['VERBOSE']`
			`print_status "Already in queue to be viewed: #{hashreq['uri']}"`
			`end`
			`else`
			`if datastore['VERBOSE']`
			`print_status "Inserted: #{hashreq['uri']}"`
			`end`

			`@NotViewedQueue.write(hashreq)`
			`end`
			`else`
			`if datastore['VERBOSE']`
			`print_status "#{hashreq['uri']} already visited at #{@ViewedQueue[hashsig(hashreq)]}"`
			`end`
			`end`
			`end`
			`end`

			`#`
			`# Build a new hash for a local path`
			`#`

			`def urltohash(m,url,basepath,dat)`

			`# m: method`
			`# url: uri?[query]`
			`# basepath: base path/uri to determine absolute path when relative`
			`# data: body data, nil if GET and query = uri.query`

			`uri = URI.parse(url)`
			`uritargetssl = (uri.scheme == "https") ? true : false`

			`uritargethost = uri.host`
			`if (uri.host.nil? or uri.host.empty?)`
			`uritargethost = self.ctarget`
			`uritargetssl = self.cssl`
			`end`

			`uritargetport = uri.port`
			`if (uri.port.nil?)`
			`uritargetport = self.cport`
			`end`

			`uritargetpath = uri.path`
			`if (uri.path.nil? or uri.path.empty?)`
			`uritargetpath = "/"`
			`end`

			`newp = Pathname.new(uritargetpath)`
			`oldp = Pathname.new(basepath)`
			`if !newp.absolute?`
			`if oldp.to_s[-1,1] == '/'`
			`newp = oldp+newp`
			`else`
			`if !newp.to_s.empty?`
			`newp = File.join(oldp.dirname,newp)`
			`end`
			`end`
			`end`

			`hashreq = {`
			`'rhost' => uritargethost,`
			`'rport' => uritargetport,`
			`'uri' => newp.to_s,`
			`'method' => m,`
			`'ctype' => 'text/plain',`
			`'ssl' => uritargetssl,`
			`'query' => uri.query,`
			`'data' => nil`
			`}`

			`if m == 'GET' and !dat.nil?`
			`hashreq['query'] = dat`
			`else`
			`hashreq['data'] = dat`
			`end`

			`return hashreq`
			`end`

			`# Taken from http://www.ruby-forum.com/topic/140101 by Rob Biedenharn`
			`def canonicalize(uri)`

			`u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)`
			`u.normalize!`
			`newpath = u.path`
			`while newpath.gsub!(%r{([^/]+)/\.\./?}) { \|match\|`
			`$1 == '..' ? match : ''`
			`} do end`
			`newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')`
			`u.path = newpath`
			`# Ugly fix`
			`u.path = u.path.gsub("\/..\/","\/")`
			`u.to_s`
			`end`

			`def hashsig(hashreq)`
			`hashreq.to_s`
			`end`

			`end`

			`class BaseParser`
			`attr_accessor :crawler`

			`def initialize(c)`
			`self.crawler = c`
			`end`

			`def parse(request,result)`
			`nil`
			`end`

			`#`
			`# Add new path (uri) to test hash queue`
			`#`
			`def insertnewpath(hashreq)`
			`self.crawler.insertnewpath(hashreq)`
			`end`

			`def hashsig(hashreq)`
			`self.crawler.hashsig(hashreq)`
			`end`

			`def urltohash(m,url,basepath,dat)`
			`self.crawler.urltohash(m,url,basepath,dat)`
			`end`

			`def targetssl`
			`self.crawler.cssl`
			`end`

			`def targetport`
			`self.crawler.cport`
			`end`

			`def targethost`
			`self.crawler.ctarget`
			`end`

			`def targetinipath`
			`self.crawler.cinipath`
			`end`
			`end`