diff --git a/lib/anemone.rb b/lib/anemone.rb index 34a12e4c33..f1bfde22d5 100644 --- a/lib/anemone.rb +++ b/lib/anemone.rb @@ -1,6 +1,5 @@ -require 'rubygems' +# Load the Anemone core require 'anemone/core' -# Overload the HTTP class +# Overload the HTTP class with a variant that uses Rex::Proto::HTTP require 'anemone/rex_http' - diff --git a/lib/msf/core/exploit/http/crawler.rb b/lib/msf/core/exploit/http/crawler.rb new file mode 100644 index 0000000000..90cad38c72 --- /dev/null +++ b/lib/msf/core/exploit/http/crawler.rb @@ -0,0 +1,300 @@ +module Msf + +### +# +# This module provides methods for implementing a web crawler +# +### +module Exploit::Remote::HttpCrawler + include Msf::Auxiliary::Report + + def initialize(info = {}) + super + + register_options( + [ + Opt::RHOST, + Opt::RPORT(80), + OptString.new('VHOST', [ false, "HTTP server virtual host" ]), + OptString.new('URI', [ true, "The starting page to crawl", "/"]), + Opt::Proxies, + OptInt.new('MAX_PAGES', [ true, 'The maximum number of pages to crawl per URL', 500]), + OptInt.new('MAX_MINUTES', [ true, 'The maximum number of minutes to spend on each URL', 5]), + OptInt.new('MAX_THREADS', [ true, 'The maximum number of concurrent requests', 4]) + ], self.class + ) + + register_advanced_options( + [ + OptInt.new('RequestTimeout', [false, 'The maximum number of seconds to wait for a reply', 15]), + OptInt.new('RedirectLimit', [false, 'The maximum number of redirects for a single request', 5]), + OptInt.new('RetryLimit', [false, 'The maximum number of attempts for a single request', 5]), + OptString.new('UserAgent', [true, 'The User-Agent header to use for all requests', + "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" + ]), + OptString.new('BasicAuthUser', [false, 'The HTTP username to specify for basic authentication']), + OptString.new('BasicAuthPass', [false, 'The HTTP password to specify for basic authentication']), + OptBool.new('SSL', [ false, 'Negotiate SSL for outgoing connections', false]), + OptEnum.new('SSLVersion', [ false, 'Specify the version of SSL that should be used', 'SSL3', ['SSL2', 'SSL23', 'SSL3', 'TLS1']]), + ], self.class + ) + + register_autofilter_ports([ 80, 8080, 443, 8000, 8888, 8880, 8008, 3000, 8443 ]) + register_autofilter_services(%W{ http https }) + + begin + require 'anemone' + @anemone_loaded = true + rescue ::Exception => e + @anemone_loaded = false + @anemone_error = e + end + end + + def setup + raise RuntimeError, "Could not load Anemone/Nokogiri: #{@anemone_error}" if not @anemone_loaded + super + end + + def cleanup + if @crawler + @crawler.shutdown rescue nil + @crawler = nil + end + super + end + + ## + # + # Crawler methods and accessors + # + ## + + # A target object for tracking URLs + class WebTarget < ::Hash + def to_url + proto = self[:ssl] ? "https" : "http" + "#{proto}://#{self[:host]}:#{self[:port]}#{self[:path]}" + end + end + + # A custom error to signify we hit the page request cap + class MaximumPageCount < ::RuntimeError + end + + # Some accessors for stat tracking + attr_accessor :targets + attr_accessor :url_count, :url_total, :form_count, :request_count + + + # Entry point for the crawler code + def run_host(ip) + + print_status("Testing #{ip}...") + + self.request_count = 0 + self.form_count = 0 + self.url_count = 0 + self.url_total = 1 + + path,query = datastore['URI'].split('?', 2) + query ||= "" + + t = WebTarget.new + + t.merge!({ + :vhost => vhost, + :host => rhost, + :port => rport, + :ssl => ssl, + :path => path, + :query => query, + :user => datastore['BasicAuthUser'], + :pass => datastore['BasicAuthPass'], + :info => "" + }) + + t[:site] = report_web_site(:wait => true, :host => t[:host], :port => t[:port], :vhost => t[:vhost], :ssl => t[:ssl]) + + print_status("Crawling #{t.to_url}...") + begin + @current_vhost = t[:vhost] + @current_site = t[:site] + ::Timeout.timeout(max_crawl_time) { crawl_target(t) } + rescue ::Timeout::Error + print_error("Crawl of #{t.to_url} has reached the configured timeout") + ensure + @current_vhost = nil + end + print_status("Crawl of #{t.to_url} complete") + end + + def get_connection_timeout + datastore['RequestTimeout'] + end + + def max_page_count + datastore['MAX_PAGES'] + end + + def max_crawl_time + datastore['MAX_MINUTES'] * 60.0 + end + + def max_crawl_threads + datastore['MAX_THREADS'] + end + + def get_link_filter + /\.(js|png|jpe?g|bmp|gif|swf|jar|zip|gz|bz2|rar|pdf|docx?|pptx?)$/i + end + + def focus_crawl(page) + page.links + end + + def crawl_target(t) + cnt = 0 + opts = crawler_options(t) + url = t.to_url + + @crawler = ::Anemone::Core.new([url], opts) + @crawler.on_every_page do |page| + cnt += 1 + + self.request_count += 1 + + # Extract any interesting data from the page + crawler_process_page(t, page, cnt) + + # Sync the database every 100 items + if cnt % 100 == 0 + framework.db.sync + end + + # Blow up if we hit our maximum page count + if cnt >= max_page_count + print_error("Maximum page count reached for #{url}") + raise MaximumPageCount, "Maximum page count reached" + end + end + + # Skip link processing based on a regular expression + @crawler.skip_links_like( + get_link_filter + ) + + # Focus our crawling on interesting, but not over-crawled links + @crawler.focus_crawl do |page| + focus_crawl(page) + end + + begin + @crawler.run + rescue MaximumPageCount + # No need to print anything else + rescue ::Timeout::Error + # Bubble this up to the top-level handler + raise $! + rescue ::Exception => e + print_error("Crawler Exception: #{url} #{e} #{e.backtrace}") + ensure + @crawler.shutdown rescue nil + @crawler = nil + end + end + + def crawler_process_page(t, page, cnt) + msg = "[#{"%.5d" % cnt}/#{"%.5d" % max_page_count}] #{page.code || "ERR"} - #{@current_site.vhost} - #{page.url}" + case page.code + when 301,302 + if page.headers and page.headers["location"] + print_status(msg + " -> " + page.headers["location"].to_s) + else + print_status(msg) + end + when 500...599 + # XXX: Log the fact that we hit an error page + print_good(msg) + when 401,403 + print_good(msg) + when 200 + print_status(msg) + when 404 + print_error(msg) + else + print_error(msg) + end + end + + def crawler_options(t) + opts = {} + opts[:user_agent] = datastore['UserAgent'] + opts[:verbose] = false + opts[:threads] = max_crawl_threads + opts[:obey_robots_txt] = false + opts[:redirect_limit] = datastore['RedirectLimit'] + opts[:retry_limit] = datastore['RetryLimit'] + opts[:accept_cookies] = true + opts[:depth_limit] = false + opts[:skip_query_strings] = false + opts[:discard_page_bodies] = true + opts[:framework] = framework + opts[:module] = self + opts[:timeout] = get_connection_timeout + opts + end + + + ## + # + # Wrappers for getters + # + ## + + # + # Returns the target host + # + def rhost + datastore['RHOST'] + end + + # + # Returns the remote port + # + def rport + datastore['RPORT'] + end + + # + # Returns the VHOST of the HTTP server. + # + def vhost + datastore['VHOST'] || datastore['RHOST'] + end + + # + # Returns the boolean indicating SSL + # + def ssl + ((datastore.default?('SSL') and rport.to_i == 443) or datastore['SSL']) + end + + # + # Returns the string indicating SSL version + # + def ssl_version + datastore['SSLVersion'] + end + + # + # Returns the configured proxy list + # + def proxies + datastore['Proxies'] + end + + +end + +end diff --git a/lib/msf/core/exploit/mixins.rb b/lib/msf/core/exploit/mixins.rb index 05d6cce99a..b53fd7063f 100644 --- a/lib/msf/core/exploit/mixins.rb +++ b/lib/msf/core/exploit/mixins.rb @@ -34,6 +34,7 @@ require 'msf/core/exploit/telnet' require 'msf/core/exploit/ftpserver' require 'msf/core/exploit/http/client' require 'msf/core/exploit/http/server' +require 'msf/core/exploit/http/crawler' require 'msf/core/exploit/smtp' require 'msf/core/exploit/dcerpc' require 'msf/core/exploit/sunrpc' diff --git a/modules/auxiliary/scanner/http/crawler.rb b/modules/auxiliary/scanner/http/crawler.rb new file mode 100644 index 0000000000..cacb50434b --- /dev/null +++ b/modules/auxiliary/scanner/http/crawler.rb @@ -0,0 +1,222 @@ +## +# $Id$ +## + +## +# This file is part of the Metasploit Framework and may be subject to +# redistribution and commercial restrictions. Please see the Metasploit +# Framework web site for more information on licensing and terms of use. +# http://metasploit.com/framework/ +## + +require 'rex/proto/http' +require 'msf/core' + + +class Metasploit3 < Msf::Auxiliary + + include Msf::Exploit::Remote::HttpCrawler + include Msf::Auxiliary::Scanner + + def initialize + super( + 'Name' => 'Web Site Crawler', + 'Version' => '$Revision$', + 'Description' => 'Crawl one or more web sites and store information about what was found', + 'Author' => 'hdm', + 'License' => MSF_LICENSE + ) + + end + + # Scrub links that end in these extensions + def get_link_filter + /\.(js|png|jpe?g|bmp|gif|swf|jar|zip|gz|bz2|rar|pdf|docx?|pptx?)$/i + end + +=begin + # Prefer dynamic content over non-dynamic + def focus_crawl(page) + page.links + end +=end + + # + # The main callback from the crawler + # + # Data we will report: + # - The path of any URL found by the crawler (web.uri, :path => page.path) + # - The occurence of any form (web.form :path, :type (get|post|path_info), :params) + # + def crawler_process_page(t, page, cnt) + msg = "[#{"%.5d" % cnt}/#{"%.5d" % max_page_count}] #{page.code || "ERR"} - #{@current_site.vhost} - #{page.url}" + case page.code + when 301,302 + if page.headers and page.headers["location"] + print_status(msg + " -> " + page.headers["location"].to_s) + else + print_status(msg) + end + when 500...599 + # XXX: Log the fact that we hit an error page + print_good(msg) + when 401,403 + print_good(msg) + when 200 + print_status(msg) + when 404 + print_error(msg) + else + print_error(msg) + end + + # + # Process the web page + # + + info = { + :web_site => @current_site, + :path => page.url.path, + :query => page.url.query, + :code => page.code, + :body => page.body, + :headers => page.headers + } + + if page.headers['content-type'] + info[:ctype] = page.headers['content-type'] + end + + if page.headers['set-cookie'] + info[:cookie] = page.headers['set-cookie'] + end + + if page.headers['authorization'] + info[:auth] = page.headers['authorization'] + end + + if page.headers['location'] + info[:location] = page.headers['location'] + end + + if page.headers['last-modified'] + info[:mtime] = page.headers['last-modified'] + end + + # Report the web page to the database + report_web_page(info) + + # Only process interesting response codes + return if not [302, 301, 200, 500, 401, 403, 404].include?(page.code) + + # + # Skip certain types of forms right off the bat + # + + # Apache multiview directories + return if page.url.query =~ /^C=[A-Z];O=/ # Apache + + # Scrub out the jsessionid appends + page.url.path = page.url.path.sub(/;jsessionid=[a-zA-Z0-9]+/, '') + + # + # Continue processing forms + # + forms = [] + form_template = { :web_site => @current_site } + form = {}.merge(form_template) + + # This page has a query parameter we can test with GET parameters + # ex: /test.php?a=b&c=d + if page.url.query and not page.url.query.empty? + form[:method] = 'GET' + form[:path] = page.url.path + vars = page.url.query.split('&').map{|x| x.split("=", 2) } + form[:params] = vars + end + + # This is a REST-ish application with numeric parameters + # ex: /customers/343 + if not form[:path] and page.url.path.to_s =~ /(.*)\/(\d+)$/ + path_base = $1 + path_info = $2 + form[:method] = 'PATH' + form[:path] = path_base + form[:params] = [['PATH', path_info]] + form[:query] = page.url.query.to_s + end + + # This is an application that uses PATH_INFO for parameters: + # ex: /index.php/Main_Page/Article01 + if not form[:path] and page.url.path.to_s =~ /(.*\/[a-z09A-Z]{3,256}\.[a-z09A-Z]{2,8})(\/.*)/ + path_base = $1 + path_info = $2 + form[:method] = 'PATH' + form[:path] = path_base + form[:params] = [['PATH', path_info]] + form[:query] = page.url.query.to_s + end + + # Done processing URI-based forms + forms << form + + if page.doc + page.doc.css("form").each do |f| + + target = page.url + + if f['action'] and not f['action'].strip.empty? + action = f['action'] + + # Prepend relative URLs with the current directory + if action[0,1] != "/" and action !~ /\:\/\// + # Extract the base href first + base = target.path.gsub(/(.*\/)[^\/]+$/, "\\1") + page.doc.css("base").each do |bref| + if bref['href'] + base = bref['href'] + end + end + action = (base + "/").sub(/\/\/$/, '/') + action + end + + target = page.to_absolute(URI( action )) rescue next + + if not page.in_domain?(target) + # Replace 127.0.0.1 and non-qualified hostnames with our page.host + # ex: http://localhost/url OR http://www01/url + target_uri = URI(target.to_s) + if (target_uri.host.index(".").nil? or target_uri.host == "127.0.0.1") + target_uri.host = page.url.host + target = target_uri + else + next + end + end + end + + form = {}.merge!(form_template) + form[:method] = (f['method'] || 'GET').upcase + form[:query] = target.query.to_s if form[:method] != "GET" + form[:path] = target.path + form[:params] = [] + f.css('input', 'textarea').each do |inp| + form[:params] << [inp['name'].to_s, inp['value'] || inp.content || '', { :type => inp['type'].to_s }] + end + + # XXX: handle SELECT elements + + forms << form + end + end + + # Report each of the discovered forms + forms.each do |form| + next if not form[:method] + print_status((" " * 24) + "FORM: #{form[:method]} #{form[:path]}") + report_web_form(form) + self.form_count += 1 + end + end +end +