2010-03-26 02:39:19 +00:00
|
|
|
require 'rubygems'
|
|
|
|
require 'pathname'
|
|
|
|
require 'hpricot'
|
|
|
|
require 'uri'
|
|
|
|
|
|
|
|
class CrawlerForms < BaseParser
|
|
|
|
|
|
|
|
def parse(request,result)
|
|
|
|
|
|
|
|
if !result['Content-Type'].include? "text/html"
|
|
|
|
return
|
|
|
|
end
|
2010-03-26 03:15:00 +00:00
|
|
|
|
|
|
|
hr = ''
|
|
|
|
m = ''
|
2010-03-26 02:39:19 +00:00
|
|
|
|
|
|
|
doc = Hpricot(result.body.to_s)
|
|
|
|
doc.search('form').each do |f|
|
2010-03-26 03:15:00 +00:00
|
|
|
hr = f.attributes['action']
|
|
|
|
puts "Parsing form: #{f.attributes['name']}"
|
|
|
|
|
|
|
|
m = "GET"
|
|
|
|
if f.attributes['method']
|
|
|
|
m = f.attributes['method']
|
|
|
|
end
|
2010-03-26 02:39:19 +00:00
|
|
|
|
|
|
|
htmlform = Hpricot(f.inner_html)
|
2010-03-26 03:15:00 +00:00
|
|
|
|
|
|
|
arrdata = []
|
|
|
|
|
2010-03-26 02:39:19 +00:00
|
|
|
htmlform.search('input').each do |p|
|
|
|
|
#puts p.attributes['name']
|
|
|
|
#puts p.attributes['type']
|
|
|
|
#puts p.attributes['value']
|
2010-03-26 03:15:00 +00:00
|
|
|
arrdata << (p.attributes['name'] + "=" + p.attributes['value'])
|
2010-03-26 02:39:19 +00:00
|
|
|
end
|
2010-03-26 03:15:00 +00:00
|
|
|
|
|
|
|
data = arrdata.join("&").to_s
|
|
|
|
|
2010-03-26 02:39:19 +00:00
|
|
|
|
2010-03-26 03:15:00 +00:00
|
|
|
begin
|
|
|
|
uri = URI.parse(hr)
|
|
|
|
|
|
|
|
tssl = false
|
|
|
|
if uri.scheme == "https"
|
|
|
|
tssl = true
|
|
|
|
else
|
|
|
|
tssl = false
|
|
|
|
end
|
|
|
|
|
|
|
|
if !uri.host or uri.host == nil
|
|
|
|
thost = request['rhost']
|
|
|
|
tssl = self.targetssl
|
|
|
|
else
|
|
|
|
thost = uri.host
|
|
|
|
end
|
|
|
|
|
|
|
|
if !uri.port or uri.port == nil
|
|
|
|
tport = request['rport']
|
|
|
|
else
|
|
|
|
tport = uri.port
|
|
|
|
end
|
|
|
|
|
|
|
|
if !uri.path or uri.path == nil
|
|
|
|
tpath = "/"
|
|
|
|
else
|
|
|
|
tpath = uri.path
|
|
|
|
end
|
|
|
|
|
|
|
|
newp = Pathname.new(tpath)
|
|
|
|
oldp = Pathname.new(request['uri'])
|
|
|
|
if !oldp.absolute?
|
|
|
|
if !newp.absolute?
|
|
|
|
newp = oldp + newp.cleanpath
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
hreq = {
|
|
|
|
'rhost' => thost,
|
|
|
|
'rport' => tport,
|
|
|
|
'uri' => newp.to_s,
|
|
|
|
'method' => m,
|
|
|
|
'ctype' => 'text/plain',
|
|
|
|
'ssl' => tssl,
|
|
|
|
'query' => uri.query,
|
|
|
|
'data' => data
|
|
|
|
|
|
|
|
}
|
|
|
|
#puts hreq
|
|
|
|
insertnewpath(hreq)
|
|
|
|
|
|
|
|
|
|
|
|
rescue URI::InvalidURIError
|
|
|
|
#puts "Parse error"
|
|
|
|
#puts "Error: #{link[0]}"
|
|
|
|
end
|
|
|
|
end
|
2010-03-26 02:39:19 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|