added http client 'download' method and updates to pdf author module from @bcoles
parent
7e487ec745
commit
5d05ca154a
|
@ -511,7 +511,16 @@ module Exploit::Remote::HttpClient
|
||||||
#
|
#
|
||||||
# Returns a hash of request opts from a URL string
|
# Returns a hash of request opts from a URL string
|
||||||
def request_opts_from_url(url)
|
def request_opts_from_url(url)
|
||||||
tgt = URI.parse(url)
|
# verify and extract components from the URL
|
||||||
|
begin
|
||||||
|
tgt = URI.parse(url)
|
||||||
|
raise 'Invalid URL' unless tgt.scheme =~ %r{https?}
|
||||||
|
raise 'Invalid URL' if tgt.host.to_s.eql? ''
|
||||||
|
rescue => e
|
||||||
|
print_error "Could not parse URL: #{e}"
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
|
||||||
opts = { 'rhost' => tgt.host, 'rport' => tgt.port, 'uri' => tgt.request_uri }
|
opts = { 'rhost' => tgt.host, 'rport' => tgt.port, 'uri' => tgt.request_uri }
|
||||||
opts['SSL'] = true if tgt.scheme == 'https'
|
opts['SSL'] = true if tgt.scheme == 'https'
|
||||||
if tgt.query and tgt.query.size > 13
|
if tgt.query and tgt.query.size > 13
|
||||||
|
@ -528,11 +537,39 @@ module Exploit::Remote::HttpClient
|
||||||
#
|
#
|
||||||
# Returns response from a simple URL call
|
# Returns response from a simple URL call
|
||||||
def request_url(url, keepalive = false)
|
def request_url(url, keepalive = false)
|
||||||
res = send_request_raw(request_opts_from_url(url))
|
opts = request_opts_from_url(url)
|
||||||
|
return nil if opts.nil?
|
||||||
|
res = send_request_raw(opts)
|
||||||
disconnect unless keepalive
|
disconnect unless keepalive
|
||||||
return res
|
return res
|
||||||
end
|
end
|
||||||
|
|
||||||
|
#
|
||||||
|
# Downloads a URL
|
||||||
|
def download(url)
|
||||||
|
print_status "Downloading '#{url}'"
|
||||||
|
|
||||||
|
begin
|
||||||
|
target = URI.parse url
|
||||||
|
raise 'Invalid URL' unless target.scheme =~ /https?/
|
||||||
|
raise 'Invalid URL' if target.host.to_s.eql? ''
|
||||||
|
rescue => e
|
||||||
|
print_error "Could not parse URL: #{e}"
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
|
||||||
|
res = request_url(url)
|
||||||
|
|
||||||
|
unless res
|
||||||
|
print_error 'Connection failed'
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
|
||||||
|
print_status "- HTTP #{res.code} - #{res.body.length} bytes"
|
||||||
|
|
||||||
|
res.code == 200 ? res.body : nil
|
||||||
|
end
|
||||||
|
|
||||||
# removes HTML tags from a provided string.
|
# removes HTML tags from a provided string.
|
||||||
# The string is html-unescaped before the tags are removed
|
# The string is html-unescaped before the tags are removed
|
||||||
# Leading whitespaces and double linebreaks are removed too
|
# Leading whitespaces and double linebreaks are removed too
|
||||||
|
|
|
@ -1,36 +1,47 @@
|
||||||
##
|
##
|
||||||
# This module requires Metasploit: http://metasploit.com/download
|
# This module requires Metasploit: https://metasploit.com/download
|
||||||
# Current source: https://github.com/rapid7/metasploit-framework
|
# Current source: https://github.com/rapid7/metasploit-framework
|
||||||
##
|
##
|
||||||
|
|
||||||
require 'pdf-reader'
|
require 'pdf-reader'
|
||||||
|
|
||||||
class MetasploitModule < Msf::Auxiliary
|
class MetasploitModule < Msf::Auxiliary
|
||||||
|
|
||||||
include Msf::Exploit::Remote::HttpClient
|
include Msf::Exploit::Remote::HttpClient
|
||||||
|
include Msf::Auxiliary::Report
|
||||||
|
|
||||||
def initialize(info = {})
|
def initialize(info = {})
|
||||||
super(update_info(info,
|
super(update_info(info,
|
||||||
'Name' => 'Gather PDF Authors',
|
'Name' => 'Gather PDF Authors',
|
||||||
'Description' => %q{
|
'Description' => %q{
|
||||||
This module downloads PDF files and extracts the author's
|
This module downloads PDF documents and extracts the author's
|
||||||
name from the document metadata.
|
name from the document metadata.
|
||||||
|
|
||||||
|
This module expects a URL to be provided using the URL option.
|
||||||
|
Alternatively, multiple URLs can be provided by supplying the
|
||||||
|
path to a file containing a list of URLs in the URL_LIST option.
|
||||||
|
|
||||||
|
The URL_TYPE option is used to specify the type of URLs supplied.
|
||||||
|
|
||||||
|
By specifying 'pdf' for the URL_TYPE, the module will treat
|
||||||
|
the specified URL(s) as PDF documents. The module will
|
||||||
|
download the documents and extract the authors' names from the
|
||||||
|
document metadata.
|
||||||
|
|
||||||
|
By specifying 'html' for the URL_TYPE, the module will treat
|
||||||
|
the specified URL(s) as HTML pages. The module will scrape the
|
||||||
|
pages for links to PDF documents, download the PDF documents,
|
||||||
|
and extract the author's name from the document metadata.
|
||||||
},
|
},
|
||||||
'License' => MSF_LICENSE,
|
'License' => MSF_LICENSE,
|
||||||
'Author' => 'Brendan Coles <bcoles[at]gmail.com>'))
|
'Author' => 'Brendan Coles <bcoles[at]gmail.com>'))
|
||||||
register_options(
|
register_options(
|
||||||
[
|
[
|
||||||
OptString.new('URL', [ false, 'The URL of a PDF to analyse', '' ]),
|
OptString.new('URL', [ false, 'The target URL', '' ]),
|
||||||
OptString.new('URL_LIST', [ false, 'File containing a list of PDF URLs to analyze', '' ]),
|
OptString.new('URL_LIST', [ false, 'File containing a list of target URLs', '' ]),
|
||||||
OptString.new('OUTFILE', [ false, 'File to store output', '' ])
|
OptEnum.new('URL_TYPE', [ true, 'The type of URL(s) specified', 'html', [ 'pdf', 'html' ] ]),
|
||||||
])
|
OptBool.new('STORE_LOOT', [ false, 'Store authors in loot', true ])
|
||||||
register_advanced_options(
|
|
||||||
[
|
|
||||||
OptString.new('SSL_VERIFY', [ true, 'Verify SSL certificate', true ]),
|
|
||||||
OptString.new('PROXY', [ false, 'Proxy server to route connection. <host>:<port>', nil ]),
|
|
||||||
OptString.new('PROXY_USER', [ false, 'Proxy Server User', nil ]),
|
|
||||||
OptString.new('PROXY_PASS', [ false, 'Proxy Server Password', nil ])
|
|
||||||
])
|
])
|
||||||
|
deregister_options 'RHOST', 'RPORT', 'VHOST'
|
||||||
end
|
end
|
||||||
|
|
||||||
def progress(current, total)
|
def progress(current, total)
|
||||||
|
@ -47,79 +58,117 @@ class MetasploitModule < Msf::Auxiliary
|
||||||
end
|
end
|
||||||
|
|
||||||
unless File.file? datastore['URL_LIST'].to_s
|
unless File.file? datastore['URL_LIST'].to_s
|
||||||
fail_with Failure::BadConfig, "File '#{datastore['URL_LIST']}' does not exit"
|
fail_with Failure::BadConfig, "File '#{datastore['URL_LIST']}' does not exist"
|
||||||
end
|
end
|
||||||
|
|
||||||
File.open(datastore['URL_LIST'], 'rb') {|f| f.read}.split(/\r?\n/)
|
File.open(datastore['URL_LIST'], 'rb') { |f| f.read }.split(/\r?\n/)
|
||||||
end
|
end
|
||||||
|
|
||||||
def read(data)
|
def read(data)
|
||||||
begin
|
Timeout.timeout(10) do
|
||||||
reader = PDF::Reader.new data
|
reader = PDF::Reader.new data
|
||||||
return parse reader
|
return parse reader
|
||||||
rescue PDF::Reader::MalformedPDFError
|
|
||||||
print_error "Could not parse PDF: PDF is malformed"
|
|
||||||
return
|
|
||||||
rescue PDF::Reader::UnsupportedFeatureError
|
|
||||||
print_error "Could not parse PDF: PDF::Reader::UnsupportedFeatureError"
|
|
||||||
return
|
|
||||||
rescue => e
|
|
||||||
print_error "Could not parse PDF: Unhandled exception: #{e}"
|
|
||||||
return
|
|
||||||
end
|
end
|
||||||
|
rescue PDF::Reader::MalformedPDFError
|
||||||
|
print_error "Could not parse PDF: PDF is malformed (MalformedPDFError)"
|
||||||
|
return
|
||||||
|
rescue PDF::Reader::UnsupportedFeatureError
|
||||||
|
print_error "Could not parse PDF: PDF contains unsupported features (UnsupportedFeatureError)"
|
||||||
|
return
|
||||||
|
rescue SystemStackError
|
||||||
|
print_error "Could not parse PDF: PDF is malformed (SystemStackError)"
|
||||||
|
return
|
||||||
|
rescue SyntaxError
|
||||||
|
print_error "Could not parse PDF: PDF is malformed (SyntaxError)"
|
||||||
|
return
|
||||||
|
rescue Timeout::Error
|
||||||
|
print_error "Could not parse PDF: PDF is malformed (Timeout)"
|
||||||
|
return
|
||||||
|
rescue => e
|
||||||
|
print_error "Could not parse PDF: Unhandled exception: #{e}"
|
||||||
|
return
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse(reader)
|
def parse(reader)
|
||||||
# PDF
|
# PDF
|
||||||
#print_status "PDF Version: #{reader.pdf_version}"
|
# print_status "PDF Version: #{reader.pdf_version}"
|
||||||
#print_status "PDF Title: #{reader.info['title']}"
|
# print_status "PDF Title: #{reader.info['title']}"
|
||||||
#print_status "PDF Info: #{reader.info}"
|
# print_status "PDF Info: #{reader.info}"
|
||||||
#print_status "PDF Metadata: #{reader.metadata}"
|
# print_status "PDF Metadata: #{reader.metadata}"
|
||||||
#print_status "PDF Pages: #{reader.page_count}"
|
# print_status "PDF Pages: #{reader.page_count}"
|
||||||
|
|
||||||
# Software
|
# Software
|
||||||
#print_status "PDF Creator: #{reader.info[:Creator]}"
|
# print_status "PDF Creator: #{reader.info[:Creator]}"
|
||||||
#print_status "PDF Producer: #{reader.info[:Producer]}"
|
# print_status "PDF Producer: #{reader.info[:Producer]}"
|
||||||
|
|
||||||
# Author
|
# Author
|
||||||
reader.info[:Author].class == String ? reader.info[:Author].split(/\r?\n/).first : ''
|
reader.info[:Author].class == String ? reader.info[:Author].split(/\r?\n/).first : ''
|
||||||
end
|
end
|
||||||
|
|
||||||
def download(url)
|
|
||||||
print_status "Downloading PDF from '#{url}'"
|
|
||||||
|
|
||||||
res = request_url(url)
|
|
||||||
print_status "HTTP #{res.code} -- Downloaded PDF (#{res.body.length} bytes)"
|
|
||||||
|
|
||||||
return res.code == 200 ? StringIO.new(res.body) : StringIO.new
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_output(data)
|
|
||||||
return if datastore['OUTFILE'].to_s.eql? ''
|
|
||||||
|
|
||||||
print_status "Writing data to #{datastore['OUTFILE']}..."
|
|
||||||
file_name = datastore['OUTFILE']
|
|
||||||
|
|
||||||
if FileTest::exist?(file_name)
|
|
||||||
print_status 'OUTFILE already exists, appending..'
|
|
||||||
end
|
|
||||||
|
|
||||||
File.open(file_name, 'ab') do |fd|
|
|
||||||
fd.write(data)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def run
|
def run
|
||||||
|
|
||||||
urls = load_urls
|
urls = load_urls
|
||||||
|
|
||||||
|
if datastore['URL_TYPE'].eql? 'html'
|
||||||
|
urls = extract_pdf_links urls
|
||||||
|
|
||||||
|
if urls.empty?
|
||||||
|
print_error 'Found no links to PDF files'
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
print_line
|
||||||
|
print_good "Found links to #{urls.size} PDF files:"
|
||||||
|
print_line urls.join "\n"
|
||||||
|
print_line
|
||||||
|
end
|
||||||
|
|
||||||
|
authors = extract_authors urls
|
||||||
|
|
||||||
|
print_line
|
||||||
|
|
||||||
|
if authors.empty?
|
||||||
|
print_status 'Found no authors'
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
print_good "Found #{authors.size} authors: #{authors.join ', '}"
|
||||||
|
|
||||||
|
return unless datastore['STORE_LOOT']
|
||||||
|
|
||||||
|
p = store_loot 'pdf.authors', 'text/plain', nil, authors.join("\n"), 'pdf.authors.txt', 'PDF authors'
|
||||||
|
print_good "File saved in: #{p}"
|
||||||
|
end
|
||||||
|
|
||||||
|
def extract_pdf_links(urls)
|
||||||
print_status "Processing #{urls.size} URLs..."
|
print_status "Processing #{urls.size} URLs..."
|
||||||
|
|
||||||
|
pdf_urls = []
|
||||||
|
urls.each_with_index do |url, index|
|
||||||
|
next if url.blank?
|
||||||
|
html = download url
|
||||||
|
next if html.blank?
|
||||||
|
doc = Nokogiri::HTML html
|
||||||
|
doc.search('a[href]').select { |n| n['href'][/(\.pdf$|\.pdf\?)/] }.map do |n|
|
||||||
|
pdf_urls << URI.join(url, n['href']).to_s
|
||||||
|
end
|
||||||
|
progress(index + 1, urls.size)
|
||||||
|
end
|
||||||
|
|
||||||
|
pdf_urls.uniq
|
||||||
|
end
|
||||||
|
|
||||||
|
def extract_authors(urls)
|
||||||
|
print_status "Processing #{urls.size} URLs..."
|
||||||
|
|
||||||
authors = []
|
authors = []
|
||||||
max_len = 256
|
max_len = 256
|
||||||
urls.each_with_index do |url, index|
|
urls.each_with_index do |url, index|
|
||||||
next if url.blank?
|
next if url.blank?
|
||||||
contents = download url
|
file = download url
|
||||||
next if contents.blank?
|
next if file.blank?
|
||||||
author = read contents
|
pdf = StringIO.new
|
||||||
|
pdf.puts file
|
||||||
|
author = read pdf
|
||||||
unless author.blank?
|
unless author.blank?
|
||||||
print_good "PDF Author: #{author}"
|
print_good "PDF Author: #{author}"
|
||||||
if author.length > max_len
|
if author.length > max_len
|
||||||
|
@ -132,14 +181,6 @@ class MetasploitModule < Msf::Auxiliary
|
||||||
progress(index + 1, urls.size)
|
progress(index + 1, urls.size)
|
||||||
end
|
end
|
||||||
|
|
||||||
print_line
|
authors.uniq
|
||||||
|
|
||||||
if authors.empty?
|
|
||||||
print_status 'Found no authors'
|
|
||||||
return
|
|
||||||
end
|
|
||||||
|
|
||||||
print_good "Found #{authors.size} authors: #{authors.join ', '}"
|
|
||||||
write_output authors.join "\n"
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue