metasploit-framework/data/msfcrawler/basic.rb

##
# $Id$
##

##
# This file is part of the Metasploit Framework and may be subject to
# redistribution and commercial restrictions. Please see the Metasploit
# Framework web site for more information on licensing and terms of use.
# http://metasploit.com/framework/
##

# $Revision$

require 'rubygems'
require 'pathname'
require 'hpricot'
require 'uri'

class CrawlerSimple < BaseParser

	def parse(request,result)

		if !result['Content-Type'].include? "text/html"
			return
		end

		doc = Hpricot(result.body.to_s)
		doc.search('a').each do |link|

		hr = link.attributes['href']

		if hr and !hr.match(/^(\#|javascript\:)/)
			begin
				hreq = urltohash('GET',hr,request['uri'],nil)

				insertnewpath(hreq)

			rescue URI::InvalidURIError
				#puts "Parse error"
				#puts "Error: #{link[0]}"
			end
		end
		end
	end
end
more cleanups git-svn-id: file:///home/svn/framework3/trunk@9212 4d416f70-5f16-0410-b530-b9f4589650da 2010-05-03 17:13:09 +00:00			`##`
			`# $Id$`
			`##`

			`##`
			`# This file is part of the Metasploit Framework and may be subject to`
			`# redistribution and commercial restrictions. Please see the Metasploit`
			`# Framework web site for more information on licensing and terms of use.`
			`# http://metasploit.com/framework/`
			`##`

			`# $Revision$`

In progress. crawling git-svn-id: file:///home/svn/framework3/trunk@8236 4d416f70-5f16-0410-b530-b9f4589650da 2010-01-26 04:21:07 +00:00			`require 'rubygems'`
Fix crawl module example git-svn-id: file:///home/svn/framework3/trunk@8378 4d416f70-5f16-0410-b530-b9f4589650da 2010-02-06 05:16:29 +00:00			`require 'pathname'`
Using hpricot git-svn-id: file:///home/svn/framework3/trunk@8862 4d416f70-5f16-0410-b530-b9f4589650da 2010-03-21 00:13:12 +00:00			`require 'hpricot'`
In progress. crawling git-svn-id: file:///home/svn/framework3/trunk@8236 4d416f70-5f16-0410-b530-b9f4589650da 2010-01-26 04:21:07 +00:00			`require 'uri'`

Using hpricot git-svn-id: file:///home/svn/framework3/trunk@8862 4d416f70-5f16-0410-b530-b9f4589650da 2010-03-21 00:13:12 +00:00			`class CrawlerSimple < BaseParser`
In progress. crawling git-svn-id: file:///home/svn/framework3/trunk@8236 4d416f70-5f16-0410-b530-b9f4589650da 2010-01-26 04:21:07 +00:00
			`def parse(request,result)`
more cleanups git-svn-id: file:///home/svn/framework3/trunk@9212 4d416f70-5f16-0410-b530-b9f4589650da 2010-05-03 17:13:09 +00:00
Using hpricot git-svn-id: file:///home/svn/framework3/trunk@8862 4d416f70-5f16-0410-b530-b9f4589650da 2010-03-21 00:13:12 +00:00			`if !result['Content-Type'].include? "text/html"`
			`return`
			`end`
more cleanups git-svn-id: file:///home/svn/framework3/trunk@9212 4d416f70-5f16-0410-b530-b9f4589650da 2010-05-03 17:13:09 +00:00
Using hpricot git-svn-id: file:///home/svn/framework3/trunk@8862 4d416f70-5f16-0410-b530-b9f4589650da 2010-03-21 00:13:12 +00:00			`doc = Hpricot(result.body.to_s)`
			`doc.search('a').each do \|link\|`
more cleanups git-svn-id: file:///home/svn/framework3/trunk@9212 4d416f70-5f16-0410-b530-b9f4589650da 2010-05-03 17:13:09 +00:00
Using hpricot git-svn-id: file:///home/svn/framework3/trunk@8862 4d416f70-5f16-0410-b530-b9f4589650da 2010-03-21 00:13:12 +00:00			`hr = link.attributes['href']`
more cleanups git-svn-id: file:///home/svn/framework3/trunk@9212 4d416f70-5f16-0410-b530-b9f4589650da 2010-05-03 17:13:09 +00:00
			`if hr and !hr.match(/^(\#\|javascript\:)/)`
In progress. crawling git-svn-id: file:///home/svn/framework3/trunk@8236 4d416f70-5f16-0410-b530-b9f4589650da 2010-01-26 04:21:07 +00:00			`begin`
more cleanups git-svn-id: file:///home/svn/framework3/trunk@9212 4d416f70-5f16-0410-b530-b9f4589650da 2010-05-03 17:13:09 +00:00			`hreq = urltohash('GET',hr,request['uri'],nil)`

In progress. crawling git-svn-id: file:///home/svn/framework3/trunk@8236 4d416f70-5f16-0410-b530-b9f4589650da 2010-01-26 04:21:07 +00:00			`insertnewpath(hreq)`
more cleanups git-svn-id: file:///home/svn/framework3/trunk@9212 4d416f70-5f16-0410-b530-b9f4589650da 2010-05-03 17:13:09 +00:00
Using hpricot git-svn-id: file:///home/svn/framework3/trunk@8862 4d416f70-5f16-0410-b530-b9f4589650da 2010-03-21 00:13:12 +00:00			`rescue URI::InvalidURIError`
In progress. crawling git-svn-id: file:///home/svn/framework3/trunk@8236 4d416f70-5f16-0410-b530-b9f4589650da 2010-01-26 04:21:07 +00:00			`#puts "Parse error"`
			`#puts "Error: #{link[0]}"`
			`end`
			`end`
Using hpricot git-svn-id: file:///home/svn/framework3/trunk@8862 4d416f70-5f16-0410-b530-b9f4589650da 2010-03-21 00:13:12 +00:00			`end`
more cleanups git-svn-id: file:///home/svn/framework3/trunk@9212 4d416f70-5f16-0410-b530-b9f4589650da 2010-05-03 17:13:09 +00:00			`end`
In progress. crawling git-svn-id: file:///home/svn/framework3/trunk@8236 4d416f70-5f16-0410-b530-b9f4589650da 2010-01-26 04:21:07 +00:00			`end`