Add the crawler mixin and a sample form extractor crawler
git-svn-id: file:///home/svn/framework3/trunk@11025 4d416f70-5f16-0410-b530-b9f4589650daunstable
parent
bc2d43d86d
commit
f457ccb8f7
|
@ -1,6 +1,5 @@
|
|||
require 'rubygems'
|
||||
# Load the Anemone core
|
||||
require 'anemone/core'
|
||||
|
||||
# Overload the HTTP class
|
||||
# Overload the HTTP class with a variant that uses Rex::Proto::HTTP
|
||||
require 'anemone/rex_http'
|
||||
|
||||
|
|
|
@ -0,0 +1,300 @@
|
|||
module Msf
|
||||
|
||||
###
|
||||
#
|
||||
# This module provides methods for implementing a web crawler
|
||||
#
|
||||
###
|
||||
module Exploit::Remote::HttpCrawler
|
||||
include Msf::Auxiliary::Report
|
||||
|
||||
def initialize(info = {})
|
||||
super
|
||||
|
||||
register_options(
|
||||
[
|
||||
Opt::RHOST,
|
||||
Opt::RPORT(80),
|
||||
OptString.new('VHOST', [ false, "HTTP server virtual host" ]),
|
||||
OptString.new('URI', [ true, "The starting page to crawl", "/"]),
|
||||
Opt::Proxies,
|
||||
OptInt.new('MAX_PAGES', [ true, 'The maximum number of pages to crawl per URL', 500]),
|
||||
OptInt.new('MAX_MINUTES', [ true, 'The maximum number of minutes to spend on each URL', 5]),
|
||||
OptInt.new('MAX_THREADS', [ true, 'The maximum number of concurrent requests', 4])
|
||||
], self.class
|
||||
)
|
||||
|
||||
register_advanced_options(
|
||||
[
|
||||
OptInt.new('RequestTimeout', [false, 'The maximum number of seconds to wait for a reply', 15]),
|
||||
OptInt.new('RedirectLimit', [false, 'The maximum number of redirects for a single request', 5]),
|
||||
OptInt.new('RetryLimit', [false, 'The maximum number of attempts for a single request', 5]),
|
||||
OptString.new('UserAgent', [true, 'The User-Agent header to use for all requests',
|
||||
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
||||
]),
|
||||
OptString.new('BasicAuthUser', [false, 'The HTTP username to specify for basic authentication']),
|
||||
OptString.new('BasicAuthPass', [false, 'The HTTP password to specify for basic authentication']),
|
||||
OptBool.new('SSL', [ false, 'Negotiate SSL for outgoing connections', false]),
|
||||
OptEnum.new('SSLVersion', [ false, 'Specify the version of SSL that should be used', 'SSL3', ['SSL2', 'SSL23', 'SSL3', 'TLS1']]),
|
||||
], self.class
|
||||
)
|
||||
|
||||
register_autofilter_ports([ 80, 8080, 443, 8000, 8888, 8880, 8008, 3000, 8443 ])
|
||||
register_autofilter_services(%W{ http https })
|
||||
|
||||
begin
|
||||
require 'anemone'
|
||||
@anemone_loaded = true
|
||||
rescue ::Exception => e
|
||||
@anemone_loaded = false
|
||||
@anemone_error = e
|
||||
end
|
||||
end
|
||||
|
||||
def setup
|
||||
raise RuntimeError, "Could not load Anemone/Nokogiri: #{@anemone_error}" if not @anemone_loaded
|
||||
super
|
||||
end
|
||||
|
||||
def cleanup
|
||||
if @crawler
|
||||
@crawler.shutdown rescue nil
|
||||
@crawler = nil
|
||||
end
|
||||
super
|
||||
end
|
||||
|
||||
##
|
||||
#
|
||||
# Crawler methods and accessors
|
||||
#
|
||||
##
|
||||
|
||||
# A target object for tracking URLs
|
||||
class WebTarget < ::Hash
|
||||
def to_url
|
||||
proto = self[:ssl] ? "https" : "http"
|
||||
"#{proto}://#{self[:host]}:#{self[:port]}#{self[:path]}"
|
||||
end
|
||||
end
|
||||
|
||||
# A custom error to signify we hit the page request cap
|
||||
class MaximumPageCount < ::RuntimeError
|
||||
end
|
||||
|
||||
# Some accessors for stat tracking
|
||||
attr_accessor :targets
|
||||
attr_accessor :url_count, :url_total, :form_count, :request_count
|
||||
|
||||
|
||||
# Entry point for the crawler code
|
||||
def run_host(ip)
|
||||
|
||||
print_status("Testing #{ip}...")
|
||||
|
||||
self.request_count = 0
|
||||
self.form_count = 0
|
||||
self.url_count = 0
|
||||
self.url_total = 1
|
||||
|
||||
path,query = datastore['URI'].split('?', 2)
|
||||
query ||= ""
|
||||
|
||||
t = WebTarget.new
|
||||
|
||||
t.merge!({
|
||||
:vhost => vhost,
|
||||
:host => rhost,
|
||||
:port => rport,
|
||||
:ssl => ssl,
|
||||
:path => path,
|
||||
:query => query,
|
||||
:user => datastore['BasicAuthUser'],
|
||||
:pass => datastore['BasicAuthPass'],
|
||||
:info => ""
|
||||
})
|
||||
|
||||
t[:site] = report_web_site(:wait => true, :host => t[:host], :port => t[:port], :vhost => t[:vhost], :ssl => t[:ssl])
|
||||
|
||||
print_status("Crawling #{t.to_url}...")
|
||||
begin
|
||||
@current_vhost = t[:vhost]
|
||||
@current_site = t[:site]
|
||||
::Timeout.timeout(max_crawl_time) { crawl_target(t) }
|
||||
rescue ::Timeout::Error
|
||||
print_error("Crawl of #{t.to_url} has reached the configured timeout")
|
||||
ensure
|
||||
@current_vhost = nil
|
||||
end
|
||||
print_status("Crawl of #{t.to_url} complete")
|
||||
end
|
||||
|
||||
def get_connection_timeout
|
||||
datastore['RequestTimeout']
|
||||
end
|
||||
|
||||
def max_page_count
|
||||
datastore['MAX_PAGES']
|
||||
end
|
||||
|
||||
def max_crawl_time
|
||||
datastore['MAX_MINUTES'] * 60.0
|
||||
end
|
||||
|
||||
def max_crawl_threads
|
||||
datastore['MAX_THREADS']
|
||||
end
|
||||
|
||||
def get_link_filter
|
||||
/\.(js|png|jpe?g|bmp|gif|swf|jar|zip|gz|bz2|rar|pdf|docx?|pptx?)$/i
|
||||
end
|
||||
|
||||
def focus_crawl(page)
|
||||
page.links
|
||||
end
|
||||
|
||||
def crawl_target(t)
|
||||
cnt = 0
|
||||
opts = crawler_options(t)
|
||||
url = t.to_url
|
||||
|
||||
@crawler = ::Anemone::Core.new([url], opts)
|
||||
@crawler.on_every_page do |page|
|
||||
cnt += 1
|
||||
|
||||
self.request_count += 1
|
||||
|
||||
# Extract any interesting data from the page
|
||||
crawler_process_page(t, page, cnt)
|
||||
|
||||
# Sync the database every 100 items
|
||||
if cnt % 100 == 0
|
||||
framework.db.sync
|
||||
end
|
||||
|
||||
# Blow up if we hit our maximum page count
|
||||
if cnt >= max_page_count
|
||||
print_error("Maximum page count reached for #{url}")
|
||||
raise MaximumPageCount, "Maximum page count reached"
|
||||
end
|
||||
end
|
||||
|
||||
# Skip link processing based on a regular expression
|
||||
@crawler.skip_links_like(
|
||||
get_link_filter
|
||||
)
|
||||
|
||||
# Focus our crawling on interesting, but not over-crawled links
|
||||
@crawler.focus_crawl do |page|
|
||||
focus_crawl(page)
|
||||
end
|
||||
|
||||
begin
|
||||
@crawler.run
|
||||
rescue MaximumPageCount
|
||||
# No need to print anything else
|
||||
rescue ::Timeout::Error
|
||||
# Bubble this up to the top-level handler
|
||||
raise $!
|
||||
rescue ::Exception => e
|
||||
print_error("Crawler Exception: #{url} #{e} #{e.backtrace}")
|
||||
ensure
|
||||
@crawler.shutdown rescue nil
|
||||
@crawler = nil
|
||||
end
|
||||
end
|
||||
|
||||
def crawler_process_page(t, page, cnt)
|
||||
msg = "[#{"%.5d" % cnt}/#{"%.5d" % max_page_count}] #{page.code || "ERR"} - #{@current_site.vhost} - #{page.url}"
|
||||
case page.code
|
||||
when 301,302
|
||||
if page.headers and page.headers["location"]
|
||||
print_status(msg + " -> " + page.headers["location"].to_s)
|
||||
else
|
||||
print_status(msg)
|
||||
end
|
||||
when 500...599
|
||||
# XXX: Log the fact that we hit an error page
|
||||
print_good(msg)
|
||||
when 401,403
|
||||
print_good(msg)
|
||||
when 200
|
||||
print_status(msg)
|
||||
when 404
|
||||
print_error(msg)
|
||||
else
|
||||
print_error(msg)
|
||||
end
|
||||
end
|
||||
|
||||
def crawler_options(t)
|
||||
opts = {}
|
||||
opts[:user_agent] = datastore['UserAgent']
|
||||
opts[:verbose] = false
|
||||
opts[:threads] = max_crawl_threads
|
||||
opts[:obey_robots_txt] = false
|
||||
opts[:redirect_limit] = datastore['RedirectLimit']
|
||||
opts[:retry_limit] = datastore['RetryLimit']
|
||||
opts[:accept_cookies] = true
|
||||
opts[:depth_limit] = false
|
||||
opts[:skip_query_strings] = false
|
||||
opts[:discard_page_bodies] = true
|
||||
opts[:framework] = framework
|
||||
opts[:module] = self
|
||||
opts[:timeout] = get_connection_timeout
|
||||
opts
|
||||
end
|
||||
|
||||
|
||||
##
|
||||
#
|
||||
# Wrappers for getters
|
||||
#
|
||||
##
|
||||
|
||||
#
|
||||
# Returns the target host
|
||||
#
|
||||
def rhost
|
||||
datastore['RHOST']
|
||||
end
|
||||
|
||||
#
|
||||
# Returns the remote port
|
||||
#
|
||||
def rport
|
||||
datastore['RPORT']
|
||||
end
|
||||
|
||||
#
|
||||
# Returns the VHOST of the HTTP server.
|
||||
#
|
||||
def vhost
|
||||
datastore['VHOST'] || datastore['RHOST']
|
||||
end
|
||||
|
||||
#
|
||||
# Returns the boolean indicating SSL
|
||||
#
|
||||
def ssl
|
||||
((datastore.default?('SSL') and rport.to_i == 443) or datastore['SSL'])
|
||||
end
|
||||
|
||||
#
|
||||
# Returns the string indicating SSL version
|
||||
#
|
||||
def ssl_version
|
||||
datastore['SSLVersion']
|
||||
end
|
||||
|
||||
#
|
||||
# Returns the configured proxy list
|
||||
#
|
||||
def proxies
|
||||
datastore['Proxies']
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
end
|
|
@ -34,6 +34,7 @@ require 'msf/core/exploit/telnet'
|
|||
require 'msf/core/exploit/ftpserver'
|
||||
require 'msf/core/exploit/http/client'
|
||||
require 'msf/core/exploit/http/server'
|
||||
require 'msf/core/exploit/http/crawler'
|
||||
require 'msf/core/exploit/smtp'
|
||||
require 'msf/core/exploit/dcerpc'
|
||||
require 'msf/core/exploit/sunrpc'
|
||||
|
|
|
@ -0,0 +1,222 @@
|
|||
##
|
||||
# $Id$
|
||||
##
|
||||
|
||||
##
|
||||
# This file is part of the Metasploit Framework and may be subject to
|
||||
# redistribution and commercial restrictions. Please see the Metasploit
|
||||
# Framework web site for more information on licensing and terms of use.
|
||||
# http://metasploit.com/framework/
|
||||
##
|
||||
|
||||
require 'rex/proto/http'
|
||||
require 'msf/core'
|
||||
|
||||
|
||||
class Metasploit3 < Msf::Auxiliary
|
||||
|
||||
include Msf::Exploit::Remote::HttpCrawler
|
||||
include Msf::Auxiliary::Scanner
|
||||
|
||||
def initialize
|
||||
super(
|
||||
'Name' => 'Web Site Crawler',
|
||||
'Version' => '$Revision$',
|
||||
'Description' => 'Crawl one or more web sites and store information about what was found',
|
||||
'Author' => 'hdm',
|
||||
'License' => MSF_LICENSE
|
||||
)
|
||||
|
||||
end
|
||||
|
||||
# Scrub links that end in these extensions
|
||||
def get_link_filter
|
||||
/\.(js|png|jpe?g|bmp|gif|swf|jar|zip|gz|bz2|rar|pdf|docx?|pptx?)$/i
|
||||
end
|
||||
|
||||
=begin
|
||||
# Prefer dynamic content over non-dynamic
|
||||
def focus_crawl(page)
|
||||
page.links
|
||||
end
|
||||
=end
|
||||
|
||||
#
|
||||
# The main callback from the crawler
|
||||
#
|
||||
# Data we will report:
|
||||
# - The path of any URL found by the crawler (web.uri, :path => page.path)
|
||||
# - The occurence of any form (web.form :path, :type (get|post|path_info), :params)
|
||||
#
|
||||
def crawler_process_page(t, page, cnt)
|
||||
msg = "[#{"%.5d" % cnt}/#{"%.5d" % max_page_count}] #{page.code || "ERR"} - #{@current_site.vhost} - #{page.url}"
|
||||
case page.code
|
||||
when 301,302
|
||||
if page.headers and page.headers["location"]
|
||||
print_status(msg + " -> " + page.headers["location"].to_s)
|
||||
else
|
||||
print_status(msg)
|
||||
end
|
||||
when 500...599
|
||||
# XXX: Log the fact that we hit an error page
|
||||
print_good(msg)
|
||||
when 401,403
|
||||
print_good(msg)
|
||||
when 200
|
||||
print_status(msg)
|
||||
when 404
|
||||
print_error(msg)
|
||||
else
|
||||
print_error(msg)
|
||||
end
|
||||
|
||||
#
|
||||
# Process the web page
|
||||
#
|
||||
|
||||
info = {
|
||||
:web_site => @current_site,
|
||||
:path => page.url.path,
|
||||
:query => page.url.query,
|
||||
:code => page.code,
|
||||
:body => page.body,
|
||||
:headers => page.headers
|
||||
}
|
||||
|
||||
if page.headers['content-type']
|
||||
info[:ctype] = page.headers['content-type']
|
||||
end
|
||||
|
||||
if page.headers['set-cookie']
|
||||
info[:cookie] = page.headers['set-cookie']
|
||||
end
|
||||
|
||||
if page.headers['authorization']
|
||||
info[:auth] = page.headers['authorization']
|
||||
end
|
||||
|
||||
if page.headers['location']
|
||||
info[:location] = page.headers['location']
|
||||
end
|
||||
|
||||
if page.headers['last-modified']
|
||||
info[:mtime] = page.headers['last-modified']
|
||||
end
|
||||
|
||||
# Report the web page to the database
|
||||
report_web_page(info)
|
||||
|
||||
# Only process interesting response codes
|
||||
return if not [302, 301, 200, 500, 401, 403, 404].include?(page.code)
|
||||
|
||||
#
|
||||
# Skip certain types of forms right off the bat
|
||||
#
|
||||
|
||||
# Apache multiview directories
|
||||
return if page.url.query =~ /^C=[A-Z];O=/ # Apache
|
||||
|
||||
# Scrub out the jsessionid appends
|
||||
page.url.path = page.url.path.sub(/;jsessionid=[a-zA-Z0-9]+/, '')
|
||||
|
||||
#
|
||||
# Continue processing forms
|
||||
#
|
||||
forms = []
|
||||
form_template = { :web_site => @current_site }
|
||||
form = {}.merge(form_template)
|
||||
|
||||
# This page has a query parameter we can test with GET parameters
|
||||
# ex: /test.php?a=b&c=d
|
||||
if page.url.query and not page.url.query.empty?
|
||||
form[:method] = 'GET'
|
||||
form[:path] = page.url.path
|
||||
vars = page.url.query.split('&').map{|x| x.split("=", 2) }
|
||||
form[:params] = vars
|
||||
end
|
||||
|
||||
# This is a REST-ish application with numeric parameters
|
||||
# ex: /customers/343
|
||||
if not form[:path] and page.url.path.to_s =~ /(.*)\/(\d+)$/
|
||||
path_base = $1
|
||||
path_info = $2
|
||||
form[:method] = 'PATH'
|
||||
form[:path] = path_base
|
||||
form[:params] = [['PATH', path_info]]
|
||||
form[:query] = page.url.query.to_s
|
||||
end
|
||||
|
||||
# This is an application that uses PATH_INFO for parameters:
|
||||
# ex: /index.php/Main_Page/Article01
|
||||
if not form[:path] and page.url.path.to_s =~ /(.*\/[a-z09A-Z]{3,256}\.[a-z09A-Z]{2,8})(\/.*)/
|
||||
path_base = $1
|
||||
path_info = $2
|
||||
form[:method] = 'PATH'
|
||||
form[:path] = path_base
|
||||
form[:params] = [['PATH', path_info]]
|
||||
form[:query] = page.url.query.to_s
|
||||
end
|
||||
|
||||
# Done processing URI-based forms
|
||||
forms << form
|
||||
|
||||
if page.doc
|
||||
page.doc.css("form").each do |f|
|
||||
|
||||
target = page.url
|
||||
|
||||
if f['action'] and not f['action'].strip.empty?
|
||||
action = f['action']
|
||||
|
||||
# Prepend relative URLs with the current directory
|
||||
if action[0,1] != "/" and action !~ /\:\/\//
|
||||
# Extract the base href first
|
||||
base = target.path.gsub(/(.*\/)[^\/]+$/, "\\1")
|
||||
page.doc.css("base").each do |bref|
|
||||
if bref['href']
|
||||
base = bref['href']
|
||||
end
|
||||
end
|
||||
action = (base + "/").sub(/\/\/$/, '/') + action
|
||||
end
|
||||
|
||||
target = page.to_absolute(URI( action )) rescue next
|
||||
|
||||
if not page.in_domain?(target)
|
||||
# Replace 127.0.0.1 and non-qualified hostnames with our page.host
|
||||
# ex: http://localhost/url OR http://www01/url
|
||||
target_uri = URI(target.to_s)
|
||||
if (target_uri.host.index(".").nil? or target_uri.host == "127.0.0.1")
|
||||
target_uri.host = page.url.host
|
||||
target = target_uri
|
||||
else
|
||||
next
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
form = {}.merge!(form_template)
|
||||
form[:method] = (f['method'] || 'GET').upcase
|
||||
form[:query] = target.query.to_s if form[:method] != "GET"
|
||||
form[:path] = target.path
|
||||
form[:params] = []
|
||||
f.css('input', 'textarea').each do |inp|
|
||||
form[:params] << [inp['name'].to_s, inp['value'] || inp.content || '', { :type => inp['type'].to_s }]
|
||||
end
|
||||
|
||||
# XXX: handle SELECT elements
|
||||
|
||||
forms << form
|
||||
end
|
||||
end
|
||||
|
||||
# Report each of the discovered forms
|
||||
forms.each do |form|
|
||||
next if not form[:method]
|
||||
print_status((" " * 24) + "FORM: #{form[:method]} #{form[:path]}")
|
||||
report_web_form(form)
|
||||
self.form_count += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
Loading…
Reference in New Issue