Replace hpricot by nokogiri

bug/bundler_fix
Sam 2014-07-17 00:14:07 +02:00
parent f8e47a5c61
commit 8cabc753a9
9 changed files with 76 additions and 136 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@ Gemfile.local
Gemfile.local.lock
# Rubymine project directory
.idea
.ruby-version
# Sublime Text project directory (not created by ST by default)
.sublime-project
# RVM control file, keep this to avoid backdooring Metasploit

View File

@ -1 +1 @@
1.9.3-p547
1.9.3-p545

View File

@ -13,7 +13,7 @@
require 'rubygems'
require 'pathname'
require 'hpricot'
require 'nokogiri'
require 'uri'
class CrawlerSimple < BaseParser
@ -24,17 +24,14 @@ class CrawlerSimple < BaseParser
return
end
doc = Hpricot(result.body.to_s)
doc.search('a').each do |link|
hr = link.attributes['href']
if hr and !hr.match(/^(\#|javascript\:)/)
# doc = Hpricot(result.body.to_s)
doc = Nokogiri::HTML(result.body.to_s)
doc.css('a').each do |anchor_tag|
hr = anchor_tag['href']
if hr && !hr.match(/^(\#|javascript\:)/)
begin
hreq = urltohash('GET', hr, request['uri'], nil)
insertnewpath(hreq)
rescue URI::InvalidURIError
#puts "Parse error"
#puts "Error: #{link[0]}"

View File

@ -13,7 +13,7 @@
require 'rubygems'
require 'pathname'
require 'hpricot'
require 'nokogiri'
require 'uri'
class CrawlerForms < BaseParser
@ -27,49 +27,30 @@ class CrawlerForms < BaseParser
hr = ''
m = ''
doc = Hpricot(result.body.to_s)
doc.search('form').each do |f|
hr = f.attributes['action']
doc = Nokogiri::HTML(result.body.to_s)
doc.css('form').each do |f|
hr = f['action']
fname = f.attributes['name']
if fname.empty?
fname = "NONE"
end
fname = f['name']
fname = "NONE" if fname.empty?
m = "GET"
if !f.attributes['method'].empty?
m = f.attributes['method'].upcase
end
m = f['method'].empty? ? 'GET' : f['method'].upcase
#puts "Parsing form name: #{fname} (#{m})"
htmlform = Hpricot(f.inner_html)
htmlform = Nokogiri::HTML(f.inner_html)
arrdata = []
htmlform.search('input').each do |p|
#puts p.attributes['name']
#puts p.attributes['type']
#puts p.attributes['value']
#raw_request has uri_encoding disabled as it encodes '='.
arrdata << (p.attributes['name'] + "=" + Rex::Text.uri_encode(p.attributes['value']))
htmlform.css('input').each do |p|
arrdata << "#{p['name']}=#{Rex::Text.uri_encode(p['value'])}"
end
data = arrdata.join("&").to_s
begin
hreq = urltohash(m, hr, request['uri'], data)
hreq['ctype'] = 'application/x-www-form-urlencoded'
insertnewpath(hreq)
rescue URI::InvalidURIError
#puts "Parse error"
#puts "Error: #{link[0]}"
end
end
end

View File

@ -9,33 +9,29 @@
require 'rubygems'
require 'pathname'
require 'hpricot'
require 'nokogiri'
require 'uri'
class CrawlerFrames < BaseParser
def parse(request,result)
if !result['Content-Type'].include? "text/html"
return
end
return unless result['Content-Type'].include?('text/html')
doc = Hpricot(result.body.to_s)
doc.search('iframe').each do |ifra|
doc = Nokogiri::HTML(result.body.to_s)
doc.css('iframe').each do |ifra|
ir = ifra['src']
ir = ifra.attributes['src']
if ir and !ir.match(/^(\#|javascript\:)/)
if ir && !ir.match(/^(\#|javascript\:)/)
begin
hreq = urltohash('GET', ir, request['uri'], nil)
insertnewpath(hreq)
rescue URI::InvalidURIError
#puts "Error"
end
end
end
end
end
end
end
end

View File

@ -10,33 +10,26 @@
require 'rubygems'
require 'pathname'
require 'hpricot'
require 'nokogiri'
require 'uri'
class CrawlerImage < BaseParser
def parse(request,result)
if !result['Content-Type'].include? "text/html"
return
end
return unless result['Content-Type'].include?('text/html')
doc = Hpricot(result.body.to_s)
doc.search('img').each do |i|
im = i.attributes['src']
if im and !im.match(/^(\#|javascript\:)/)
doc = Nokogiri::HTML(result.body.to_s)
doc.css('img').each do |i|
im = i['src']
if im && !im.match(/^(\#|javascript\:)/)
begin
hreq = urltohash('GET', im, request['uri'], nil)
insertnewpath(hreq)
rescue URI::InvalidURIError
#puts "Parse error"
#puts "Error: #{i[0]}"
end
end
end
end
end

View File

@ -10,33 +10,25 @@
require 'rubygems'
require 'pathname'
require 'hpricot'
require 'nokogiri'
require 'uri'
class CrawlerLink < BaseParser
def parse(request,result)
return unless result['Content-Type'].include?('text/html')
if !result['Content-Type'].include? "text/html"
return
end
doc = Hpricot(result.body.to_s)
doc.search('link').each do |link|
hr = link.attributes['href']
if hr and !hr.match(/^(\#|javascript\:)/)
doc = Nokogiri::HTML(result.body.to_s)
doc.css('link').each do |link|
hr = link['href']
if hr && !hr.match(/^(\#|javascript\:)/)
begin
hreq = urltohash('GET', hr, request['uri'], nil)
insertnewpath(hreq)
rescue URI::InvalidURIError
#puts "Parse error"
#puts "Error: #{link[0]}"
end
end
end
end
end

View File

@ -13,36 +13,25 @@
require 'rubygems'
require 'pathname'
require 'hpricot'
require 'nokogiri'
require 'uri'
class CrawlerObjects < BaseParser
def parse(request,result)
if !result['Content-Type'].include? "text/html"
return
end
return unless result['Content-Type'].include?('text/html') # TOOD: use MIXIN
hr = ''
m = ''
doc = Hpricot(result.body.to_s)
doc.search("//object/embed").each do |obj|
doc = Nokogiri::HTML(result.body.to_s)
doc.xpath("//object/embed").each do |obj|
s = obj['src']
begin
hreq = urltohash('GET', s, request['uri'], nil)
insertnewpath(hreq)
rescue URI::InvalidURIError
#puts "Parse error"
#puts "Error: #{link[0]}"
end
end
end
end
end

View File

@ -13,36 +13,27 @@
require 'rubygems'
require 'pathname'
require 'hpricot'
require 'nokogiri'
require 'uri'
class CrawlerScripts < BaseParser
def parse(request,result)
if !result['Content-Type'].include? "text/html"
return
end
return unless result['Content-Type'].include? "text/html"
hr = ''
m = ''
doc = Hpricot(result.body.to_s)
doc.search("//script").each do |obj|
doc = Nokogiri::HTML(result.body.to_s)
doc.xpath("//script").each do |obj|
s = obj['src']
begin
hreq = urltohash('GET', s, request['uri'], nil)
insertnewpath(hreq)
rescue URI::InvalidURIError
#puts "Parse error"
#puts "Error: #{link[0]}"
end
end
end
end
end
end