metasploit-framework/lib/anemone/extractors/generic.rb

require 'uri'

class Anemone::Extractors::Generic < Anemone::Extractors::Base

	def run
		URI.extract( doc.to_s, %w(http https) ).map do |u|
			#
			# This extractor needs to be a tiny bit intelligent because
			# due to its generic nature it'll inevitably match some garbage.
			#
			# For example, if some JS code contains:
			#
			#	var = 'http://blah.com?id=1'
			#
			# or
			#
			#	var = { 'http://blah.com?id=1', 1 }
			#
			#
			# The URI.extract call will match:
			#
			#	http://blah.com?id=1'
			#
			# and
			#
			#	http://blah.com?id=1',
			#
			# respectively.
			#
			if !includes_quotes?( u )
				u
			else
				if html.include?( "'#{u}" )
					u.split( '\'' ).first
				elsif html.include?( "\"#{u}" )
					u.split( '"' ).first
				else
					u
				end
			end
		end
	rescue
		[]
	end

	def includes_quotes?( url )
		url.include?( '\'' ) || url.include?( '"' )
	end

end