2010-05-03 17:13:09 +00:00
|
|
|
##
|
2017-07-24 13:26:21 +00:00
|
|
|
# This module requires Metasploit: https://metasploit.com/download
|
2017-03-13 16:36:21 +00:00
|
|
|
# Current source: https://github.com/rapid7/metasploit-framework
|
2010-05-03 17:13:09 +00:00
|
|
|
##
|
|
|
|
|
2010-02-06 05:16:29 +00:00
|
|
|
require 'pathname'
|
2014-07-16 22:14:07 +00:00
|
|
|
require 'nokogiri'
|
2010-01-26 04:21:07 +00:00
|
|
|
require 'uri'
|
|
|
|
|
2010-03-21 00:13:12 +00:00
|
|
|
class CrawlerSimple < BaseParser
|
2010-01-26 04:21:07 +00:00
|
|
|
|
2013-09-30 18:47:53 +00:00
|
|
|
def parse(request,result)
|
2017-03-13 16:36:21 +00:00
|
|
|
return unless result['Content-Type'].include?('text/html')
|
2010-05-03 17:13:09 +00:00
|
|
|
|
2014-07-16 22:14:07 +00:00
|
|
|
# doc = Hpricot(result.body.to_s)
|
|
|
|
doc = Nokogiri::HTML(result.body.to_s)
|
|
|
|
doc.css('a').each do |anchor_tag|
|
|
|
|
hr = anchor_tag['href']
|
|
|
|
if hr && !hr.match(/^(\#|javascript\:)/)
|
|
|
|
begin
|
|
|
|
hreq = urltohash('GET', hr, request['uri'], nil)
|
|
|
|
insertnewpath(hreq)
|
|
|
|
rescue URI::InvalidURIError
|
|
|
|
#puts "Parse error"
|
|
|
|
#puts "Error: #{link[0]}"
|
|
|
|
end
|
2013-09-30 18:47:53 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2010-01-26 04:21:07 +00:00
|
|
|
end
|
|
|
|
|