From 5d05ca154abca8cbca754295242f87dd52b1d8a3 Mon Sep 17 00:00:00 2001 From: Brent Cook Date: Mon, 14 Aug 2017 01:08:53 -0400 Subject: [PATCH] added http client 'download' method and updates to pdf author module from @bcoles --- lib/msf/core/exploit/http/client.rb | 41 ++++- modules/auxiliary/gather/http_pdf_authors.rb | 179 ++++++++++++------- 2 files changed, 149 insertions(+), 71 deletions(-) diff --git a/lib/msf/core/exploit/http/client.rb b/lib/msf/core/exploit/http/client.rb index 5944a5c822..d7bcc0c46e 100644 --- a/lib/msf/core/exploit/http/client.rb +++ b/lib/msf/core/exploit/http/client.rb @@ -511,7 +511,16 @@ module Exploit::Remote::HttpClient # # Returns a hash of request opts from a URL string def request_opts_from_url(url) - tgt = URI.parse(url) + # verify and extract components from the URL + begin + tgt = URI.parse(url) + raise 'Invalid URL' unless tgt.scheme =~ %r{https?} + raise 'Invalid URL' if tgt.host.to_s.eql? '' + rescue => e + print_error "Could not parse URL: #{e}" + return nil + end + opts = { 'rhost' => tgt.host, 'rport' => tgt.port, 'uri' => tgt.request_uri } opts['SSL'] = true if tgt.scheme == 'https' if tgt.query and tgt.query.size > 13 @@ -528,11 +537,39 @@ module Exploit::Remote::HttpClient # # Returns response from a simple URL call def request_url(url, keepalive = false) - res = send_request_raw(request_opts_from_url(url)) + opts = request_opts_from_url(url) + return nil if opts.nil? + res = send_request_raw(opts) disconnect unless keepalive return res end + # + # Downloads a URL + def download(url) + print_status "Downloading '#{url}'" + + begin + target = URI.parse url + raise 'Invalid URL' unless target.scheme =~ /https?/ + raise 'Invalid URL' if target.host.to_s.eql? '' + rescue => e + print_error "Could not parse URL: #{e}" + return nil + end + + res = request_url(url) + + unless res + print_error 'Connection failed' + return nil + end + + print_status "- HTTP #{res.code} - #{res.body.length} bytes" + + res.code == 200 ? res.body : nil + end + # removes HTML tags from a provided string. # The string is html-unescaped before the tags are removed # Leading whitespaces and double linebreaks are removed too diff --git a/modules/auxiliary/gather/http_pdf_authors.rb b/modules/auxiliary/gather/http_pdf_authors.rb index b49281b501..3217609ca2 100644 --- a/modules/auxiliary/gather/http_pdf_authors.rb +++ b/modules/auxiliary/gather/http_pdf_authors.rb @@ -1,36 +1,47 @@ ## -# This module requires Metasploit: http://metasploit.com/download +# This module requires Metasploit: https://metasploit.com/download # Current source: https://github.com/rapid7/metasploit-framework ## require 'pdf-reader' class MetasploitModule < Msf::Auxiliary - include Msf::Exploit::Remote::HttpClient + include Msf::Auxiliary::Report def initialize(info = {}) super(update_info(info, 'Name' => 'Gather PDF Authors', 'Description' => %q{ - This module downloads PDF files and extracts the author's + This module downloads PDF documents and extracts the author's name from the document metadata. + + This module expects a URL to be provided using the URL option. + Alternatively, multiple URLs can be provided by supplying the + path to a file containing a list of URLs in the URL_LIST option. + + The URL_TYPE option is used to specify the type of URLs supplied. + + By specifying 'pdf' for the URL_TYPE, the module will treat + the specified URL(s) as PDF documents. The module will + download the documents and extract the authors' names from the + document metadata. + + By specifying 'html' for the URL_TYPE, the module will treat + the specified URL(s) as HTML pages. The module will scrape the + pages for links to PDF documents, download the PDF documents, + and extract the author's name from the document metadata. }, 'License' => MSF_LICENSE, 'Author' => 'Brendan Coles ')) register_options( [ - OptString.new('URL', [ false, 'The URL of a PDF to analyse', '' ]), - OptString.new('URL_LIST', [ false, 'File containing a list of PDF URLs to analyze', '' ]), - OptString.new('OUTFILE', [ false, 'File to store output', '' ]) - ]) - register_advanced_options( - [ - OptString.new('SSL_VERIFY', [ true, 'Verify SSL certificate', true ]), - OptString.new('PROXY', [ false, 'Proxy server to route connection. :', nil ]), - OptString.new('PROXY_USER', [ false, 'Proxy Server User', nil ]), - OptString.new('PROXY_PASS', [ false, 'Proxy Server Password', nil ]) + OptString.new('URL', [ false, 'The target URL', '' ]), + OptString.new('URL_LIST', [ false, 'File containing a list of target URLs', '' ]), + OptEnum.new('URL_TYPE', [ true, 'The type of URL(s) specified', 'html', [ 'pdf', 'html' ] ]), + OptBool.new('STORE_LOOT', [ false, 'Store authors in loot', true ]) ]) + deregister_options 'RHOST', 'RPORT', 'VHOST' end def progress(current, total) @@ -47,79 +58,117 @@ class MetasploitModule < Msf::Auxiliary end unless File.file? datastore['URL_LIST'].to_s - fail_with Failure::BadConfig, "File '#{datastore['URL_LIST']}' does not exit" + fail_with Failure::BadConfig, "File '#{datastore['URL_LIST']}' does not exist" end - File.open(datastore['URL_LIST'], 'rb') {|f| f.read}.split(/\r?\n/) + File.open(datastore['URL_LIST'], 'rb') { |f| f.read }.split(/\r?\n/) end def read(data) - begin + Timeout.timeout(10) do reader = PDF::Reader.new data return parse reader - rescue PDF::Reader::MalformedPDFError - print_error "Could not parse PDF: PDF is malformed" - return - rescue PDF::Reader::UnsupportedFeatureError - print_error "Could not parse PDF: PDF::Reader::UnsupportedFeatureError" - return - rescue => e - print_error "Could not parse PDF: Unhandled exception: #{e}" - return end + rescue PDF::Reader::MalformedPDFError + print_error "Could not parse PDF: PDF is malformed (MalformedPDFError)" + return + rescue PDF::Reader::UnsupportedFeatureError + print_error "Could not parse PDF: PDF contains unsupported features (UnsupportedFeatureError)" + return + rescue SystemStackError + print_error "Could not parse PDF: PDF is malformed (SystemStackError)" + return + rescue SyntaxError + print_error "Could not parse PDF: PDF is malformed (SyntaxError)" + return + rescue Timeout::Error + print_error "Could not parse PDF: PDF is malformed (Timeout)" + return + rescue => e + print_error "Could not parse PDF: Unhandled exception: #{e}" + return end def parse(reader) # PDF - #print_status "PDF Version: #{reader.pdf_version}" - #print_status "PDF Title: #{reader.info['title']}" - #print_status "PDF Info: #{reader.info}" - #print_status "PDF Metadata: #{reader.metadata}" - #print_status "PDF Pages: #{reader.page_count}" + # print_status "PDF Version: #{reader.pdf_version}" + # print_status "PDF Title: #{reader.info['title']}" + # print_status "PDF Info: #{reader.info}" + # print_status "PDF Metadata: #{reader.metadata}" + # print_status "PDF Pages: #{reader.page_count}" # Software - #print_status "PDF Creator: #{reader.info[:Creator]}" - #print_status "PDF Producer: #{reader.info[:Producer]}" + # print_status "PDF Creator: #{reader.info[:Creator]}" + # print_status "PDF Producer: #{reader.info[:Producer]}" # Author reader.info[:Author].class == String ? reader.info[:Author].split(/\r?\n/).first : '' end - def download(url) - print_status "Downloading PDF from '#{url}'" - - res = request_url(url) - print_status "HTTP #{res.code} -- Downloaded PDF (#{res.body.length} bytes)" - - return res.code == 200 ? StringIO.new(res.body) : StringIO.new - end - - def write_output(data) - return if datastore['OUTFILE'].to_s.eql? '' - - print_status "Writing data to #{datastore['OUTFILE']}..." - file_name = datastore['OUTFILE'] - - if FileTest::exist?(file_name) - print_status 'OUTFILE already exists, appending..' - end - - File.open(file_name, 'ab') do |fd| - fd.write(data) - end - end - def run - urls = load_urls + + if datastore['URL_TYPE'].eql? 'html' + urls = extract_pdf_links urls + + if urls.empty? + print_error 'Found no links to PDF files' + return + end + + print_line + print_good "Found links to #{urls.size} PDF files:" + print_line urls.join "\n" + print_line + end + + authors = extract_authors urls + + print_line + + if authors.empty? + print_status 'Found no authors' + return + end + + print_good "Found #{authors.size} authors: #{authors.join ', '}" + + return unless datastore['STORE_LOOT'] + + p = store_loot 'pdf.authors', 'text/plain', nil, authors.join("\n"), 'pdf.authors.txt', 'PDF authors' + print_good "File saved in: #{p}" + end + + def extract_pdf_links(urls) print_status "Processing #{urls.size} URLs..." + + pdf_urls = [] + urls.each_with_index do |url, index| + next if url.blank? + html = download url + next if html.blank? + doc = Nokogiri::HTML html + doc.search('a[href]').select { |n| n['href'][/(\.pdf$|\.pdf\?)/] }.map do |n| + pdf_urls << URI.join(url, n['href']).to_s + end + progress(index + 1, urls.size) + end + + pdf_urls.uniq + end + + def extract_authors(urls) + print_status "Processing #{urls.size} URLs..." + authors = [] max_len = 256 urls.each_with_index do |url, index| next if url.blank? - contents = download url - next if contents.blank? - author = read contents + file = download url + next if file.blank? + pdf = StringIO.new + pdf.puts file + author = read pdf unless author.blank? print_good "PDF Author: #{author}" if author.length > max_len @@ -132,14 +181,6 @@ class MetasploitModule < Msf::Auxiliary progress(index + 1, urls.size) end - print_line - - if authors.empty? - print_status 'Found no authors' - return - end - - print_good "Found #{authors.size} authors: #{authors.join ', '}" - write_output authors.join "\n" + authors.uniq end end