added http client 'download' method and updates to pdf author module from @bcoles
parent
7e487ec745
commit
5d05ca154a
|
@ -511,7 +511,16 @@ module Exploit::Remote::HttpClient
|
|||
#
|
||||
# Returns a hash of request opts from a URL string
|
||||
def request_opts_from_url(url)
|
||||
# verify and extract components from the URL
|
||||
begin
|
||||
tgt = URI.parse(url)
|
||||
raise 'Invalid URL' unless tgt.scheme =~ %r{https?}
|
||||
raise 'Invalid URL' if tgt.host.to_s.eql? ''
|
||||
rescue => e
|
||||
print_error "Could not parse URL: #{e}"
|
||||
return nil
|
||||
end
|
||||
|
||||
opts = { 'rhost' => tgt.host, 'rport' => tgt.port, 'uri' => tgt.request_uri }
|
||||
opts['SSL'] = true if tgt.scheme == 'https'
|
||||
if tgt.query and tgt.query.size > 13
|
||||
|
@ -528,11 +537,39 @@ module Exploit::Remote::HttpClient
|
|||
#
|
||||
# Returns response from a simple URL call
|
||||
def request_url(url, keepalive = false)
|
||||
res = send_request_raw(request_opts_from_url(url))
|
||||
opts = request_opts_from_url(url)
|
||||
return nil if opts.nil?
|
||||
res = send_request_raw(opts)
|
||||
disconnect unless keepalive
|
||||
return res
|
||||
end
|
||||
|
||||
#
|
||||
# Downloads a URL
|
||||
def download(url)
|
||||
print_status "Downloading '#{url}'"
|
||||
|
||||
begin
|
||||
target = URI.parse url
|
||||
raise 'Invalid URL' unless target.scheme =~ /https?/
|
||||
raise 'Invalid URL' if target.host.to_s.eql? ''
|
||||
rescue => e
|
||||
print_error "Could not parse URL: #{e}"
|
||||
return nil
|
||||
end
|
||||
|
||||
res = request_url(url)
|
||||
|
||||
unless res
|
||||
print_error 'Connection failed'
|
||||
return nil
|
||||
end
|
||||
|
||||
print_status "- HTTP #{res.code} - #{res.body.length} bytes"
|
||||
|
||||
res.code == 200 ? res.body : nil
|
||||
end
|
||||
|
||||
# removes HTML tags from a provided string.
|
||||
# The string is html-unescaped before the tags are removed
|
||||
# Leading whitespaces and double linebreaks are removed too
|
||||
|
|
|
@ -1,36 +1,47 @@
|
|||
##
|
||||
# This module requires Metasploit: http://metasploit.com/download
|
||||
# This module requires Metasploit: https://metasploit.com/download
|
||||
# Current source: https://github.com/rapid7/metasploit-framework
|
||||
##
|
||||
|
||||
require 'pdf-reader'
|
||||
|
||||
class MetasploitModule < Msf::Auxiliary
|
||||
|
||||
include Msf::Exploit::Remote::HttpClient
|
||||
include Msf::Auxiliary::Report
|
||||
|
||||
def initialize(info = {})
|
||||
super(update_info(info,
|
||||
'Name' => 'Gather PDF Authors',
|
||||
'Description' => %q{
|
||||
This module downloads PDF files and extracts the author's
|
||||
This module downloads PDF documents and extracts the author's
|
||||
name from the document metadata.
|
||||
|
||||
This module expects a URL to be provided using the URL option.
|
||||
Alternatively, multiple URLs can be provided by supplying the
|
||||
path to a file containing a list of URLs in the URL_LIST option.
|
||||
|
||||
The URL_TYPE option is used to specify the type of URLs supplied.
|
||||
|
||||
By specifying 'pdf' for the URL_TYPE, the module will treat
|
||||
the specified URL(s) as PDF documents. The module will
|
||||
download the documents and extract the authors' names from the
|
||||
document metadata.
|
||||
|
||||
By specifying 'html' for the URL_TYPE, the module will treat
|
||||
the specified URL(s) as HTML pages. The module will scrape the
|
||||
pages for links to PDF documents, download the PDF documents,
|
||||
and extract the author's name from the document metadata.
|
||||
},
|
||||
'License' => MSF_LICENSE,
|
||||
'Author' => 'Brendan Coles <bcoles[at]gmail.com>'))
|
||||
register_options(
|
||||
[
|
||||
OptString.new('URL', [ false, 'The URL of a PDF to analyse', '' ]),
|
||||
OptString.new('URL_LIST', [ false, 'File containing a list of PDF URLs to analyze', '' ]),
|
||||
OptString.new('OUTFILE', [ false, 'File to store output', '' ])
|
||||
])
|
||||
register_advanced_options(
|
||||
[
|
||||
OptString.new('SSL_VERIFY', [ true, 'Verify SSL certificate', true ]),
|
||||
OptString.new('PROXY', [ false, 'Proxy server to route connection. <host>:<port>', nil ]),
|
||||
OptString.new('PROXY_USER', [ false, 'Proxy Server User', nil ]),
|
||||
OptString.new('PROXY_PASS', [ false, 'Proxy Server Password', nil ])
|
||||
OptString.new('URL', [ false, 'The target URL', '' ]),
|
||||
OptString.new('URL_LIST', [ false, 'File containing a list of target URLs', '' ]),
|
||||
OptEnum.new('URL_TYPE', [ true, 'The type of URL(s) specified', 'html', [ 'pdf', 'html' ] ]),
|
||||
OptBool.new('STORE_LOOT', [ false, 'Store authors in loot', true ])
|
||||
])
|
||||
deregister_options 'RHOST', 'RPORT', 'VHOST'
|
||||
end
|
||||
|
||||
def progress(current, total)
|
||||
|
@ -47,27 +58,36 @@ class MetasploitModule < Msf::Auxiliary
|
|||
end
|
||||
|
||||
unless File.file? datastore['URL_LIST'].to_s
|
||||
fail_with Failure::BadConfig, "File '#{datastore['URL_LIST']}' does not exit"
|
||||
fail_with Failure::BadConfig, "File '#{datastore['URL_LIST']}' does not exist"
|
||||
end
|
||||
|
||||
File.open(datastore['URL_LIST'], 'rb') { |f| f.read }.split(/\r?\n/)
|
||||
end
|
||||
|
||||
def read(data)
|
||||
begin
|
||||
Timeout.timeout(10) do
|
||||
reader = PDF::Reader.new data
|
||||
return parse reader
|
||||
end
|
||||
rescue PDF::Reader::MalformedPDFError
|
||||
print_error "Could not parse PDF: PDF is malformed"
|
||||
print_error "Could not parse PDF: PDF is malformed (MalformedPDFError)"
|
||||
return
|
||||
rescue PDF::Reader::UnsupportedFeatureError
|
||||
print_error "Could not parse PDF: PDF::Reader::UnsupportedFeatureError"
|
||||
print_error "Could not parse PDF: PDF contains unsupported features (UnsupportedFeatureError)"
|
||||
return
|
||||
rescue SystemStackError
|
||||
print_error "Could not parse PDF: PDF is malformed (SystemStackError)"
|
||||
return
|
||||
rescue SyntaxError
|
||||
print_error "Could not parse PDF: PDF is malformed (SyntaxError)"
|
||||
return
|
||||
rescue Timeout::Error
|
||||
print_error "Could not parse PDF: PDF is malformed (Timeout)"
|
||||
return
|
||||
rescue => e
|
||||
print_error "Could not parse PDF: Unhandled exception: #{e}"
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
def parse(reader)
|
||||
# PDF
|
||||
|
@ -85,41 +105,70 @@ class MetasploitModule < Msf::Auxiliary
|
|||
reader.info[:Author].class == String ? reader.info[:Author].split(/\r?\n/).first : ''
|
||||
end
|
||||
|
||||
def download(url)
|
||||
print_status "Downloading PDF from '#{url}'"
|
||||
|
||||
res = request_url(url)
|
||||
print_status "HTTP #{res.code} -- Downloaded PDF (#{res.body.length} bytes)"
|
||||
|
||||
return res.code == 200 ? StringIO.new(res.body) : StringIO.new
|
||||
end
|
||||
|
||||
def write_output(data)
|
||||
return if datastore['OUTFILE'].to_s.eql? ''
|
||||
|
||||
print_status "Writing data to #{datastore['OUTFILE']}..."
|
||||
file_name = datastore['OUTFILE']
|
||||
|
||||
if FileTest::exist?(file_name)
|
||||
print_status 'OUTFILE already exists, appending..'
|
||||
end
|
||||
|
||||
File.open(file_name, 'ab') do |fd|
|
||||
fd.write(data)
|
||||
end
|
||||
end
|
||||
|
||||
def run
|
||||
|
||||
urls = load_urls
|
||||
|
||||
if datastore['URL_TYPE'].eql? 'html'
|
||||
urls = extract_pdf_links urls
|
||||
|
||||
if urls.empty?
|
||||
print_error 'Found no links to PDF files'
|
||||
return
|
||||
end
|
||||
|
||||
print_line
|
||||
print_good "Found links to #{urls.size} PDF files:"
|
||||
print_line urls.join "\n"
|
||||
print_line
|
||||
end
|
||||
|
||||
authors = extract_authors urls
|
||||
|
||||
print_line
|
||||
|
||||
if authors.empty?
|
||||
print_status 'Found no authors'
|
||||
return
|
||||
end
|
||||
|
||||
print_good "Found #{authors.size} authors: #{authors.join ', '}"
|
||||
|
||||
return unless datastore['STORE_LOOT']
|
||||
|
||||
p = store_loot 'pdf.authors', 'text/plain', nil, authors.join("\n"), 'pdf.authors.txt', 'PDF authors'
|
||||
print_good "File saved in: #{p}"
|
||||
end
|
||||
|
||||
def extract_pdf_links(urls)
|
||||
print_status "Processing #{urls.size} URLs..."
|
||||
|
||||
pdf_urls = []
|
||||
urls.each_with_index do |url, index|
|
||||
next if url.blank?
|
||||
html = download url
|
||||
next if html.blank?
|
||||
doc = Nokogiri::HTML html
|
||||
doc.search('a[href]').select { |n| n['href'][/(\.pdf$|\.pdf\?)/] }.map do |n|
|
||||
pdf_urls << URI.join(url, n['href']).to_s
|
||||
end
|
||||
progress(index + 1, urls.size)
|
||||
end
|
||||
|
||||
pdf_urls.uniq
|
||||
end
|
||||
|
||||
def extract_authors(urls)
|
||||
print_status "Processing #{urls.size} URLs..."
|
||||
|
||||
authors = []
|
||||
max_len = 256
|
||||
urls.each_with_index do |url, index|
|
||||
next if url.blank?
|
||||
contents = download url
|
||||
next if contents.blank?
|
||||
author = read contents
|
||||
file = download url
|
||||
next if file.blank?
|
||||
pdf = StringIO.new
|
||||
pdf.puts file
|
||||
author = read pdf
|
||||
unless author.blank?
|
||||
print_good "PDF Author: #{author}"
|
||||
if author.length > max_len
|
||||
|
@ -132,14 +181,6 @@ class MetasploitModule < Msf::Auxiliary
|
|||
progress(index + 1, urls.size)
|
||||
end
|
||||
|
||||
print_line
|
||||
|
||||
if authors.empty?
|
||||
print_status 'Found no authors'
|
||||
return
|
||||
end
|
||||
|
||||
print_good "Found #{authors.size} authors: #{authors.join ', '}"
|
||||
write_output authors.join "\n"
|
||||
authors.uniq
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue