added http client 'download' method and updates to pdf author module from @bcoles

bug/bundler_fix
Brent Cook 2017-08-14 01:08:53 -04:00
parent 7e487ec745
commit 5d05ca154a
2 changed files with 149 additions and 71 deletions

View File

@ -511,7 +511,16 @@ module Exploit::Remote::HttpClient
# #
# Returns a hash of request opts from a URL string # Returns a hash of request opts from a URL string
def request_opts_from_url(url) def request_opts_from_url(url)
tgt = URI.parse(url) # verify and extract components from the URL
begin
tgt = URI.parse(url)
raise 'Invalid URL' unless tgt.scheme =~ %r{https?}
raise 'Invalid URL' if tgt.host.to_s.eql? ''
rescue => e
print_error "Could not parse URL: #{e}"
return nil
end
opts = { 'rhost' => tgt.host, 'rport' => tgt.port, 'uri' => tgt.request_uri } opts = { 'rhost' => tgt.host, 'rport' => tgt.port, 'uri' => tgt.request_uri }
opts['SSL'] = true if tgt.scheme == 'https' opts['SSL'] = true if tgt.scheme == 'https'
if tgt.query and tgt.query.size > 13 if tgt.query and tgt.query.size > 13
@ -528,11 +537,39 @@ module Exploit::Remote::HttpClient
# #
# Returns response from a simple URL call # Returns response from a simple URL call
def request_url(url, keepalive = false) def request_url(url, keepalive = false)
res = send_request_raw(request_opts_from_url(url)) opts = request_opts_from_url(url)
return nil if opts.nil?
res = send_request_raw(opts)
disconnect unless keepalive disconnect unless keepalive
return res return res
end end
#
# Downloads a URL
def download(url)
print_status "Downloading '#{url}'"
begin
target = URI.parse url
raise 'Invalid URL' unless target.scheme =~ /https?/
raise 'Invalid URL' if target.host.to_s.eql? ''
rescue => e
print_error "Could not parse URL: #{e}"
return nil
end
res = request_url(url)
unless res
print_error 'Connection failed'
return nil
end
print_status "- HTTP #{res.code} - #{res.body.length} bytes"
res.code == 200 ? res.body : nil
end
# removes HTML tags from a provided string. # removes HTML tags from a provided string.
# The string is html-unescaped before the tags are removed # The string is html-unescaped before the tags are removed
# Leading whitespaces and double linebreaks are removed too # Leading whitespaces and double linebreaks are removed too

View File

@ -1,36 +1,47 @@
## ##
# This module requires Metasploit: http://metasploit.com/download # This module requires Metasploit: https://metasploit.com/download
# Current source: https://github.com/rapid7/metasploit-framework # Current source: https://github.com/rapid7/metasploit-framework
## ##
require 'pdf-reader' require 'pdf-reader'
class MetasploitModule < Msf::Auxiliary class MetasploitModule < Msf::Auxiliary
include Msf::Exploit::Remote::HttpClient include Msf::Exploit::Remote::HttpClient
include Msf::Auxiliary::Report
def initialize(info = {}) def initialize(info = {})
super(update_info(info, super(update_info(info,
'Name' => 'Gather PDF Authors', 'Name' => 'Gather PDF Authors',
'Description' => %q{ 'Description' => %q{
This module downloads PDF files and extracts the author's This module downloads PDF documents and extracts the author's
name from the document metadata. name from the document metadata.
This module expects a URL to be provided using the URL option.
Alternatively, multiple URLs can be provided by supplying the
path to a file containing a list of URLs in the URL_LIST option.
The URL_TYPE option is used to specify the type of URLs supplied.
By specifying 'pdf' for the URL_TYPE, the module will treat
the specified URL(s) as PDF documents. The module will
download the documents and extract the authors' names from the
document metadata.
By specifying 'html' for the URL_TYPE, the module will treat
the specified URL(s) as HTML pages. The module will scrape the
pages for links to PDF documents, download the PDF documents,
and extract the author's name from the document metadata.
}, },
'License' => MSF_LICENSE, 'License' => MSF_LICENSE,
'Author' => 'Brendan Coles <bcoles[at]gmail.com>')) 'Author' => 'Brendan Coles <bcoles[at]gmail.com>'))
register_options( register_options(
[ [
OptString.new('URL', [ false, 'The URL of a PDF to analyse', '' ]), OptString.new('URL', [ false, 'The target URL', '' ]),
OptString.new('URL_LIST', [ false, 'File containing a list of PDF URLs to analyze', '' ]), OptString.new('URL_LIST', [ false, 'File containing a list of target URLs', '' ]),
OptString.new('OUTFILE', [ false, 'File to store output', '' ]) OptEnum.new('URL_TYPE', [ true, 'The type of URL(s) specified', 'html', [ 'pdf', 'html' ] ]),
]) OptBool.new('STORE_LOOT', [ false, 'Store authors in loot', true ])
register_advanced_options(
[
OptString.new('SSL_VERIFY', [ true, 'Verify SSL certificate', true ]),
OptString.new('PROXY', [ false, 'Proxy server to route connection. <host>:<port>', nil ]),
OptString.new('PROXY_USER', [ false, 'Proxy Server User', nil ]),
OptString.new('PROXY_PASS', [ false, 'Proxy Server Password', nil ])
]) ])
deregister_options 'RHOST', 'RPORT', 'VHOST'
end end
def progress(current, total) def progress(current, total)
@ -47,79 +58,117 @@ class MetasploitModule < Msf::Auxiliary
end end
unless File.file? datastore['URL_LIST'].to_s unless File.file? datastore['URL_LIST'].to_s
fail_with Failure::BadConfig, "File '#{datastore['URL_LIST']}' does not exit" fail_with Failure::BadConfig, "File '#{datastore['URL_LIST']}' does not exist"
end end
File.open(datastore['URL_LIST'], 'rb') {|f| f.read}.split(/\r?\n/) File.open(datastore['URL_LIST'], 'rb') { |f| f.read }.split(/\r?\n/)
end end
def read(data) def read(data)
begin Timeout.timeout(10) do
reader = PDF::Reader.new data reader = PDF::Reader.new data
return parse reader return parse reader
rescue PDF::Reader::MalformedPDFError
print_error "Could not parse PDF: PDF is malformed"
return
rescue PDF::Reader::UnsupportedFeatureError
print_error "Could not parse PDF: PDF::Reader::UnsupportedFeatureError"
return
rescue => e
print_error "Could not parse PDF: Unhandled exception: #{e}"
return
end end
rescue PDF::Reader::MalformedPDFError
print_error "Could not parse PDF: PDF is malformed (MalformedPDFError)"
return
rescue PDF::Reader::UnsupportedFeatureError
print_error "Could not parse PDF: PDF contains unsupported features (UnsupportedFeatureError)"
return
rescue SystemStackError
print_error "Could not parse PDF: PDF is malformed (SystemStackError)"
return
rescue SyntaxError
print_error "Could not parse PDF: PDF is malformed (SyntaxError)"
return
rescue Timeout::Error
print_error "Could not parse PDF: PDF is malformed (Timeout)"
return
rescue => e
print_error "Could not parse PDF: Unhandled exception: #{e}"
return
end end
def parse(reader) def parse(reader)
# PDF # PDF
#print_status "PDF Version: #{reader.pdf_version}" # print_status "PDF Version: #{reader.pdf_version}"
#print_status "PDF Title: #{reader.info['title']}" # print_status "PDF Title: #{reader.info['title']}"
#print_status "PDF Info: #{reader.info}" # print_status "PDF Info: #{reader.info}"
#print_status "PDF Metadata: #{reader.metadata}" # print_status "PDF Metadata: #{reader.metadata}"
#print_status "PDF Pages: #{reader.page_count}" # print_status "PDF Pages: #{reader.page_count}"
# Software # Software
#print_status "PDF Creator: #{reader.info[:Creator]}" # print_status "PDF Creator: #{reader.info[:Creator]}"
#print_status "PDF Producer: #{reader.info[:Producer]}" # print_status "PDF Producer: #{reader.info[:Producer]}"
# Author # Author
reader.info[:Author].class == String ? reader.info[:Author].split(/\r?\n/).first : '' reader.info[:Author].class == String ? reader.info[:Author].split(/\r?\n/).first : ''
end end
def download(url)
print_status "Downloading PDF from '#{url}'"
res = request_url(url)
print_status "HTTP #{res.code} -- Downloaded PDF (#{res.body.length} bytes)"
return res.code == 200 ? StringIO.new(res.body) : StringIO.new
end
def write_output(data)
return if datastore['OUTFILE'].to_s.eql? ''
print_status "Writing data to #{datastore['OUTFILE']}..."
file_name = datastore['OUTFILE']
if FileTest::exist?(file_name)
print_status 'OUTFILE already exists, appending..'
end
File.open(file_name, 'ab') do |fd|
fd.write(data)
end
end
def run def run
urls = load_urls urls = load_urls
if datastore['URL_TYPE'].eql? 'html'
urls = extract_pdf_links urls
if urls.empty?
print_error 'Found no links to PDF files'
return
end
print_line
print_good "Found links to #{urls.size} PDF files:"
print_line urls.join "\n"
print_line
end
authors = extract_authors urls
print_line
if authors.empty?
print_status 'Found no authors'
return
end
print_good "Found #{authors.size} authors: #{authors.join ', '}"
return unless datastore['STORE_LOOT']
p = store_loot 'pdf.authors', 'text/plain', nil, authors.join("\n"), 'pdf.authors.txt', 'PDF authors'
print_good "File saved in: #{p}"
end
def extract_pdf_links(urls)
print_status "Processing #{urls.size} URLs..." print_status "Processing #{urls.size} URLs..."
pdf_urls = []
urls.each_with_index do |url, index|
next if url.blank?
html = download url
next if html.blank?
doc = Nokogiri::HTML html
doc.search('a[href]').select { |n| n['href'][/(\.pdf$|\.pdf\?)/] }.map do |n|
pdf_urls << URI.join(url, n['href']).to_s
end
progress(index + 1, urls.size)
end
pdf_urls.uniq
end
def extract_authors(urls)
print_status "Processing #{urls.size} URLs..."
authors = [] authors = []
max_len = 256 max_len = 256
urls.each_with_index do |url, index| urls.each_with_index do |url, index|
next if url.blank? next if url.blank?
contents = download url file = download url
next if contents.blank? next if file.blank?
author = read contents pdf = StringIO.new
pdf.puts file
author = read pdf
unless author.blank? unless author.blank?
print_good "PDF Author: #{author}" print_good "PDF Author: #{author}"
if author.length > max_len if author.length > max_len
@ -132,14 +181,6 @@ class MetasploitModule < Msf::Auxiliary
progress(index + 1, urls.size) progress(index + 1, urls.size)
end end
print_line authors.uniq
if authors.empty?
print_status 'Found no authors'
return
end
print_good "Found #{authors.size} authors: #{authors.join ', '}"
write_output authors.join "\n"
end end
end end