From 62ba505f10f70bb4aa44f67cbf41b6e0dc6c16c6 Mon Sep 17 00:00:00 2001 From: et <> Date: Wed, 7 Apr 2010 03:32:50 +0000 Subject: [PATCH] Extra parsing modules git-svn-id: file:///home/svn/framework3/trunk@9030 4d416f70-5f16-0410-b530-b9f4589650da --- data/msfcrawler/flash.rb | 28 +++++++------ data/msfcrawler/objects.rb | 85 ++++++++++++++++++++++++++++++++++++++ data/msfcrawler/scripts.rb | 85 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 186 insertions(+), 12 deletions(-) create mode 100644 data/msfcrawler/objects.rb create mode 100644 data/msfcrawler/scripts.rb diff --git a/data/msfcrawler/flash.rb b/data/msfcrawler/flash.rb index 9c6385f89b..2ae549d90f 100644 --- a/data/msfcrawler/flash.rb +++ b/data/msfcrawler/flash.rb @@ -3,14 +3,18 @@ require 'pathname' require 'uri' -$flarebinary = "/Users/et/Downloads/flare" -$flareoutdir = "/Users/et/Downloads/temp/" +$flarebinary = "/home/et/Downloads/flare" +$flareoutdir = "/home/et/Downloads/" class CrawlerFlash < BaseParser def parse(request,result) - + rexp = ['loadMovieNum\(\'(.*?)\'', + 'loadMovie\(\'(.*?)\'', + 'getURL\(\'(.*?)\'' + ] + if !result['Content-Type'].include? "application/x-shockwave-flash" return @@ -22,7 +26,8 @@ class CrawlerFlash < BaseParser ffile = File.new(outswf, "wb") ffile.puts(result.body) - + ffile.close + system("#{$flarebinary} #{outswf}") outflr = outswf.gsub('.swf','.flr') @@ -36,14 +41,13 @@ class CrawlerFlash < BaseParser File.open(outflr, "r") do |infile| while (line = infile.gets) - - links = line.to_s.scan(/\b(?:(?:https?|ftp):\/\/|www\.)[-a-z0-9+&@#\/%?=~_|!:,.;]*[-a-z0-9+&@#\/%=~_|]/i) #" + + rexp.each do |r| + links = line.to_s.scan(Regexp.new(r,true)) #" links.each do |link| - puts "SWF: #{link}" - begin - uri = URI.parse(link) + uri = URI.parse(link[0]) tssl = false if uri.scheme == "https" @@ -94,6 +98,7 @@ class CrawlerFlash < BaseParser 'data' => nil } + insertnewpath(hreq) rescue URI::InvalidURIError @@ -102,9 +107,8 @@ class CrawlerFlash < BaseParser end end end - end - - puts "Done. " + end + end end end diff --git a/data/msfcrawler/objects.rb b/data/msfcrawler/objects.rb new file mode 100644 index 0000000000..ca9b706b9f --- /dev/null +++ b/data/msfcrawler/objects.rb @@ -0,0 +1,85 @@ +require 'rubygems' +require 'pathname' +require 'hpricot' +require 'uri' + +class CrawlerObjects < BaseParser + + def parse(request,result) + + if !result['Content-Type'].include? "text/html" + return + end + + hr = '' + m = '' + + doc = Hpricot(result.body.to_s) + doc.search("//object/embed").each do |obj| + + s = obj['src'] + + begin + uri = URI.parse(s) + + tssl = false + if uri.scheme == "https" + tssl = true + else + tssl = false + end + + if !uri.host or uri.host == nil + thost = request['rhost'] + tssl = self.targetssl + else + thost = uri.host + end + + if !uri.port or uri.port == nil + tport = request['rport'] + else + tport = uri.port + end + + if !uri.path or uri.path == nil + tpath = "/" + else + tpath = uri.path + end + + newp = Pathname.new(tpath) + oldp = Pathname.new(request['uri']) + if !newp.absolute? + if oldp.to_s[-1,1] == '/' + newp = oldp+newp + else + if !newp.to_s.empty? + newp = File.join(oldp.dirname,newp) + end + end + end + + hreq = { + 'rhost' => thost, + 'rport' => tport, + 'uri' => newp.to_s, + 'method' => 'GET', + 'ctype' => 'text/html', + 'ssl' => tssl, + 'query' => uri.query, + 'data' => nil + + } + #puts hreq + insertnewpath(hreq) + + + rescue URI::InvalidURIError + #puts "Parse error" + #puts "Error: #{link[0]}" + end + end + end +end + diff --git a/data/msfcrawler/scripts.rb b/data/msfcrawler/scripts.rb new file mode 100644 index 0000000000..bfcbb5fe0b --- /dev/null +++ b/data/msfcrawler/scripts.rb @@ -0,0 +1,85 @@ +require 'rubygems' +require 'pathname' +require 'hpricot' +require 'uri' + +class CrawlerScripts < BaseParser + + def parse(request,result) + + if !result['Content-Type'].include? "text/html" + return + end + + hr = '' + m = '' + + doc = Hpricot(result.body.to_s) + doc.search("//script").each do |obj| + + s = obj['src'] + + begin + uri = URI.parse(s) + + tssl = false + if uri.scheme == "https" + tssl = true + else + tssl = false + end + + if !uri.host or uri.host == nil + thost = request['rhost'] + tssl = self.targetssl + else + thost = uri.host + end + + if !uri.port or uri.port == nil + tport = request['rport'] + else + tport = uri.port + end + + if !uri.path or uri.path == nil + tpath = "/" + else + tpath = uri.path + end + + newp = Pathname.new(tpath) + oldp = Pathname.new(request['uri']) + if !newp.absolute? + if oldp.to_s[-1,1] == '/' + newp = oldp+newp + else + if !newp.to_s.empty? + newp = File.join(oldp.dirname,newp) + end + end + end + + hreq = { + 'rhost' => thost, + 'rport' => tport, + 'uri' => newp.to_s, + 'method' => 'GET', + 'ctype' => 'text/html', + 'ssl' => tssl, + 'query' => uri.query, + 'data' => nil + + } + #puts hreq + insertnewpath(hreq) + + + rescue URI::InvalidURIError + #puts "Parse error" + #puts "Error: #{link[0]}" + end + end + end +end +