283 lines
7.9 KiB
Ruby
283 lines
7.9 KiB
Ruby
##
|
|
# This file is part of the Metasploit Framework and may be subject to
|
|
# redistribution and commercial restrictions. Please see the Metasploit
|
|
# web site for more information on licensing and terms of use.
|
|
# http://metasploit.com/
|
|
##
|
|
|
|
require 'rex/proto/http'
|
|
require 'msf/core'
|
|
|
|
|
|
class Metasploit3 < Msf::Auxiliary
|
|
|
|
include Msf::Auxiliary::HttpCrawler
|
|
|
|
def initialize
|
|
super(
|
|
'Name' => 'Web Site Crawler',
|
|
'Description' => 'Crawl a web site and store information about what was found',
|
|
'Author' => %w(hdm tasos),
|
|
'License' => MSF_LICENSE
|
|
)
|
|
|
|
register_advanced_options([
|
|
OptString.new('ExcludePathPatterns', [false, 'Newline-separated list of path patterns to ignore (\'*\' is a wildcard)']),
|
|
])
|
|
@for_each_page_blocks = []
|
|
end
|
|
|
|
=begin
|
|
# Prefer dynamic content over non-dynamic
|
|
def focus_crawl(page)
|
|
page.links
|
|
end
|
|
=end
|
|
|
|
# Overrides Msf::Auxiliary::HttpCrawler#get_link_filter to add
|
|
# datastore['ExcludePathPatterns']
|
|
def get_link_filter
|
|
return super if datastore['ExcludePathPatterns'].to_s.empty?
|
|
|
|
patterns = opt_patterns_to_regexps( datastore['ExcludePathPatterns'].to_s )
|
|
patterns = patterns.map { |r| "(#{r.source})" }
|
|
|
|
Regexp.new( [["(#{super.source})"] | patterns].join( '|' ) )
|
|
end
|
|
|
|
def run
|
|
super
|
|
|
|
if form = form_from_url( @current_site, datastore['URI'] )
|
|
print_status((" " * 24) + "FORM: #{form[:method]} #{form[:path]}")
|
|
report_web_form( form )
|
|
self.form_count += 1
|
|
end
|
|
end
|
|
|
|
def for_each_page( &block )
|
|
@for_each_page_blocks << block if block_given?
|
|
end
|
|
|
|
#
|
|
# The main callback from the crawler, redefines crawler_process_page() as
|
|
# defined by Msf::Auxiliary::HttpCrawler
|
|
#
|
|
# Data we will report:
|
|
# - The path of any URL found by the crawler (web.uri, :path => page.path)
|
|
# - The occurence of any form (web.form :path, :type (get|post|path_info), :params)
|
|
#
|
|
def crawler_process_page(t, page, cnt)
|
|
msg = "[#{"%.5d" % cnt}/#{"%.5d" % max_page_count}] #{page.code || "ERR"} - #{t[:vhost]} - #{page.url}"
|
|
case page.code
|
|
when 301,302
|
|
if page.headers and page.headers["location"]
|
|
print_status(msg + " -> " + page.headers["location"].to_s)
|
|
else
|
|
print_status(msg)
|
|
end
|
|
when 500...599
|
|
# XXX: Log the fact that we hit an error page
|
|
print_good(msg)
|
|
when 401,403
|
|
print_good(msg)
|
|
when 200
|
|
print_status(msg)
|
|
when 404
|
|
print_error(msg)
|
|
else
|
|
print_error(msg)
|
|
end
|
|
|
|
#
|
|
# Process the web page
|
|
#
|
|
|
|
info = {
|
|
:web_site => t[:site],
|
|
:path => page.url.path,
|
|
:query => page.url.query,
|
|
:code => page.code,
|
|
:body => page.body,
|
|
:headers => page.headers
|
|
}
|
|
|
|
if page.headers['content-type']
|
|
info[:ctype] = page.headers['content-type']
|
|
end
|
|
|
|
if page.headers['set-cookie']
|
|
info[:cookie] = page.headers['set-cookie']
|
|
end
|
|
|
|
if page.headers['authorization']
|
|
info[:auth] = page.headers['authorization']
|
|
end
|
|
|
|
if page.headers['location']
|
|
info[:location] = page.headers['location']
|
|
end
|
|
|
|
if page.headers['last-modified']
|
|
info[:mtime] = page.headers['last-modified']
|
|
end
|
|
|
|
# Report the web page to the database
|
|
report_web_page(info)
|
|
|
|
# Only process interesting response codes
|
|
return if not [302, 301, 200, 500, 401, 403, 404].include?(page.code)
|
|
|
|
#
|
|
# Skip certain types of forms right off the bat
|
|
#
|
|
|
|
# Apache multiview directories
|
|
return if page.url.query =~ /^C=[A-Z];O=/ # Apache
|
|
|
|
forms = []
|
|
form_template = { :web_site => t[:site] }
|
|
|
|
if form = form_from_url( t[:site], page.url )
|
|
forms << form
|
|
end
|
|
|
|
if page.doc
|
|
page.doc.css("form").each do |f|
|
|
|
|
target = page.url
|
|
|
|
if f['action'] and not f['action'].strip.empty?
|
|
action = f['action']
|
|
|
|
# Prepend relative URLs with the current directory
|
|
if action[0,1] != "/" and action !~ /\:\/\//
|
|
# Extract the base href first
|
|
base = target.path.gsub(/(.*\/)[^\/]+$/, "\\1")
|
|
page.doc.css("base").each do |bref|
|
|
if bref['href']
|
|
base = bref['href']
|
|
end
|
|
end
|
|
action = (base + "/").sub(/\/\/$/, '/') + action
|
|
end
|
|
|
|
target = page.to_absolute(URI( action )) rescue next
|
|
|
|
if not page.in_domain?(target)
|
|
# Replace 127.0.0.1 and non-qualified hostnames with our page.host
|
|
# ex: http://localhost/url OR http://www01/url
|
|
target_uri = URI(target.to_s)
|
|
if (target_uri.host.index(".").nil? or target_uri.host == "127.0.0.1")
|
|
target_uri.host = page.url.host
|
|
target = target_uri
|
|
else
|
|
next
|
|
end
|
|
end
|
|
end
|
|
|
|
# skip this form if it matches exclusion criteria
|
|
if !(target.to_s =~ get_link_filter)
|
|
form = {}.merge!(form_template)
|
|
form[:method] = (f['method'] || 'GET').upcase
|
|
form[:query] = target.query.to_s if form[:method] != "GET"
|
|
form[:path] = target.path
|
|
form[:params] = []
|
|
f.css('input', 'textarea').each do |inp|
|
|
form[:params] << [inp['name'].to_s, inp['value'] || inp.content || '', { :type => inp['type'].to_s }]
|
|
end
|
|
|
|
f.css( 'select' ).each do |s|
|
|
value = nil
|
|
|
|
# iterate over each option to find the default value (if there is a selected one)
|
|
s.children.each do |opt|
|
|
ov = opt['value'] || opt.content
|
|
value = ov if opt['selected']
|
|
end
|
|
|
|
# set the first one as the default value if we don't already have one
|
|
value ||= s.children.first['value'] || s.children.first.content rescue ''
|
|
|
|
form[:params] << [ s['name'].to_s, value.to_s, [ :type => 'select'] ]
|
|
end
|
|
|
|
forms << form
|
|
end
|
|
end
|
|
end
|
|
|
|
# Report each of the discovered forms
|
|
forms.each do |form|
|
|
next if not form[:method]
|
|
print_status((" " * 24) + "FORM: #{form[:method]} #{form[:path]}")
|
|
report_web_form(form)
|
|
self.form_count += 1
|
|
end
|
|
|
|
@for_each_page_blocks.each { |p| p.call( page ) }
|
|
end
|
|
|
|
def form_from_url( website, url )
|
|
url = URI( url.to_s ) if !url.is_a?( URI )
|
|
|
|
begin
|
|
# Scrub out the jsessionid appends
|
|
url.path = url.path.sub(/;jsessionid=[a-zA-Z0-9]+/, '')
|
|
rescue URI::Error
|
|
end
|
|
|
|
#
|
|
# Continue processing forms
|
|
#
|
|
forms = []
|
|
form_template = { :web_site => website }
|
|
form = {}.merge(form_template)
|
|
|
|
# This page has a query parameter we can test with GET parameters
|
|
# ex: /test.php?a=b&c=d
|
|
if url.query and not url.query.empty?
|
|
form[:method] = 'GET'
|
|
form[:path] = url.path
|
|
vars = url.query.split('&').map{|x| x.split("=", 2) }
|
|
form[:params] = vars
|
|
end
|
|
|
|
# This is a REST-ish application with numeric parameters
|
|
# ex: /customers/343
|
|
if not form[:path] and url.path.to_s =~ /(.*)\/(\d+)$/
|
|
path_base = $1
|
|
path_info = $2
|
|
form[:method] = 'PATH'
|
|
form[:path] = path_base
|
|
form[:params] = [['PATH', path_info]]
|
|
form[:query] = url.query.to_s
|
|
end
|
|
|
|
# This is an application that uses PATH_INFO for parameters:
|
|
# ex: /index.php/Main_Page/Article01
|
|
if not form[:path] and url.path.to_s =~ /(.*\/[a-z0-9A-Z]{3,256}\.[a-z0-9A-Z]{2,8})(\/.*)/
|
|
path_base = $1
|
|
path_info = $2
|
|
form[:method] = 'PATH'
|
|
form[:path] = path_base
|
|
form[:params] = [['PATH', path_info]]
|
|
form[:query] = url.query.to_s
|
|
end
|
|
|
|
form[:method] ? form : nil
|
|
end
|
|
|
|
private
|
|
def opt_patterns_to_regexps( patterns )
|
|
magic_wildcard_replacement = Rex::Text.rand_text_alphanumeric( 10 )
|
|
patterns.to_s.split( /[\r\n]+/).map do |p|
|
|
Regexp.new '^' + Regexp.escape( p.gsub( '*', magic_wildcard_replacement ) ).
|
|
gsub( magic_wildcard_replacement, '.*' ) + '$'
|
|
end
|
|
end
|
|
|
|
|
|
end
|