metasploit-framework/modules/auxiliary/gather/http_pdf_authors.rb

187 lines
5.6 KiB
Ruby

##
# This module requires Metasploit: https://metasploit.com/download
# Current source: https://github.com/rapid7/metasploit-framework
##
require 'pdf-reader'
class MetasploitModule < Msf::Auxiliary
include Msf::Exploit::Remote::HttpClient
include Msf::Auxiliary::Report
def initialize(info = {})
super(update_info(info,
'Name' => 'Gather PDF Authors',
'Description' => %q{
This module downloads PDF documents and extracts the author's
name from the document metadata.
This module expects a URL to be provided using the URL option.
Alternatively, multiple URLs can be provided by supplying the
path to a file containing a list of URLs in the URL_LIST option.
The URL_TYPE option is used to specify the type of URLs supplied.
By specifying 'pdf' for the URL_TYPE, the module will treat
the specified URL(s) as PDF documents. The module will
download the documents and extract the authors' names from the
document metadata.
By specifying 'html' for the URL_TYPE, the module will treat
the specified URL(s) as HTML pages. The module will scrape the
pages for links to PDF documents, download the PDF documents,
and extract the author's name from the document metadata.
},
'License' => MSF_LICENSE,
'Author' => 'Brendan Coles <bcoles[at]gmail.com>'))
register_options(
[
OptString.new('URL', [ false, 'The target URL', '' ]),
OptString.new('URL_LIST', [ false, 'File containing a list of target URLs', '' ]),
OptEnum.new('URL_TYPE', [ true, 'The type of URL(s) specified', 'html', [ 'pdf', 'html' ] ]),
OptBool.new('STORE_LOOT', [ false, 'Store authors in loot', true ])
])
deregister_options 'RHOST', 'RHOSTS', 'RPORT', 'VHOST', 'SSL'
end
def progress(current, total)
done = (current.to_f / total.to_f) * 100
percent = "%3.2f%%" % done.to_f
print_status "%7s done (%d/%d files)" % [percent, current, total]
end
def load_urls
return [ datastore['URL'] ] unless datastore['URL'].to_s.eql? ''
if datastore['URL_LIST'].to_s.eql? ''
fail_with Failure::BadConfig, 'No URL(s) specified'
end
unless File.file? datastore['URL_LIST'].to_s
fail_with Failure::BadConfig, "File '#{datastore['URL_LIST']}' does not exist"
end
File.open(datastore['URL_LIST'], 'rb') { |f| f.read }.split(/\r?\n/)
end
def read(data)
Timeout.timeout(10) do
reader = PDF::Reader.new data
return parse reader
end
rescue PDF::Reader::MalformedPDFError
print_error "Could not parse PDF: PDF is malformed (MalformedPDFError)"
return
rescue PDF::Reader::UnsupportedFeatureError
print_error "Could not parse PDF: PDF contains unsupported features (UnsupportedFeatureError)"
return
rescue SystemStackError
print_error "Could not parse PDF: PDF is malformed (SystemStackError)"
return
rescue SyntaxError
print_error "Could not parse PDF: PDF is malformed (SyntaxError)"
return
rescue Timeout::Error
print_error "Could not parse PDF: PDF is malformed (Timeout)"
return
rescue => e
print_error "Could not parse PDF: Unhandled exception: #{e}"
return
end
def parse(reader)
# PDF
# print_status "PDF Version: #{reader.pdf_version}"
# print_status "PDF Title: #{reader.info['title']}"
# print_status "PDF Info: #{reader.info}"
# print_status "PDF Metadata: #{reader.metadata}"
# print_status "PDF Pages: #{reader.page_count}"
# Software
# print_status "PDF Creator: #{reader.info[:Creator]}"
# print_status "PDF Producer: #{reader.info[:Producer]}"
# Author
reader.info[:Author].class == String ? reader.info[:Author].split(/\r?\n/).first : ''
end
def run
urls = load_urls
if datastore['URL_TYPE'].eql? 'html'
urls = extract_pdf_links urls
if urls.empty?
print_error 'Found no links to PDF files'
return
end
print_line
print_good "Found links to #{urls.size} PDF files:"
print_line urls.join "\n"
print_line
end
authors = extract_authors urls
print_line
if authors.empty?
print_status 'Found no authors'
return
end
print_good "Found #{authors.size} authors: #{authors.join ', '}"
return unless datastore['STORE_LOOT']
p = store_loot 'pdf.authors', 'text/plain', nil, authors.join("\n"), 'pdf.authors.txt', 'PDF authors'
print_good "File saved in: #{p}"
end
def extract_pdf_links(urls)
print_status "Processing #{urls.size} URLs..."
pdf_urls = []
urls.each_with_index do |url, index|
next if url.blank?
html = download url
next if html.blank?
doc = Nokogiri::HTML html
doc.search('a[href]').select { |n| n['href'][/(\.pdf$|\.pdf\?)/] }.map do |n|
pdf_urls << URI.join(url, n['href']).to_s
end
progress(index + 1, urls.size)
end
pdf_urls.uniq
end
def extract_authors(urls)
print_status "Processing #{urls.size} URLs..."
authors = []
max_len = 256
urls.each_with_index do |url, index|
next if url.blank?
file = download url
next if file.blank?
pdf = StringIO.new
pdf.puts file
author = read pdf
unless author.blank?
print_good "PDF Author: #{author}"
if author.length > max_len
print_warning "Warning: Truncated author's name at #{max_len} characters"
authors << author[0...max_len]
else
authors << author
end
end
progress(index + 1, urls.size)
end
authors.uniq
end
end