metasploit-framework/lib/anemone/extractors/generic.rb

require 'uri'

class Anemone::Extractors::Generic < Anemone::Extractors::Base

  def run
    URI.extract( doc.to_s, %w(http https) ).map do |u|
      #
      # This extractor needs to be a tiny bit intelligent because
      # due to its generic nature it'll inevitably match some garbage.
      #
      # For example, if some JS code contains:
      #
      #	var = 'http://blah.com?id=1'
      #
      # or
      #
      #	var = { 'http://blah.com?id=1', 1 }
      #
      #
      # The URI.extract call will match:
      #
      #	http://blah.com?id=1'
      #
      # and
      #
      #	http://blah.com?id=1',
      #
      # respectively.
      #
      if !includes_quotes?( u )
        u
      else
        if html.include?( "'#{u}" )
          u.split( '\'' ).first
        elsif html.include?( "\"#{u}" )
          u.split( '"' ).first
        else
          u
        end
      end
    end
  rescue
    []
  end

  def includes_quotes?( url )
    url.include?( '\'' ) || url.include?( '"' )
  end

end