added http client 'download' method and updates to pdf author module from @bcoles

2017-08-14 01:08:53 -04:00 · 2017-08-14 01:08:53 -04:00 · 5d05ca154a
parent 7e487ec745
commit 5d05ca154a
2 changed files with 149 additions and 71 deletions
--- a/lib/msf/core/exploit/http/client.rb
+++ b/lib/msf/core/exploit/http/client.rb
@ -511,7 +511,16 @@ module Exploit::Remote::HttpClient
  #
  # Returns a hash of request opts from a URL string
  def request_opts_from_url(url)
-    tgt = URI.parse(url)
+    # verify and extract components from the URL
    begin
      tgt = URI.parse(url)
      raise 'Invalid URL' unless tgt.scheme =~ %r{https?}
      raise 'Invalid URL' if tgt.host.to_s.eql? ''
    rescue => e
      print_error "Could not parse URL: #{e}"
      return nil
    end
    opts = { 'rhost' => tgt.host, 'rport' => tgt.port, 'uri' => tgt.request_uri }
    opts['SSL'] = true if tgt.scheme == 'https'
    if tgt.query and tgt.query.size > 13
@ -528,11 +537,39 @@ module Exploit::Remote::HttpClient
  #
  # Returns response from a simple URL call
  def request_url(url, keepalive = false)
-    res = send_request_raw(request_opts_from_url(url))
+    opts = request_opts_from_url(url)
    return nil if opts.nil?
    res = send_request_raw(opts)
    disconnect unless keepalive
    return res
  end
  #
  # Downloads a URL
  def download(url)
    print_status "Downloading '#{url}'"
    begin
      target = URI.parse url
      raise 'Invalid URL' unless target.scheme =~ /https?/
      raise 'Invalid URL' if target.host.to_s.eql? ''
    rescue => e
      print_error "Could not parse URL: #{e}"
      return nil
    end
    res = request_url(url)
    unless res
      print_error 'Connection failed'
      return nil
    end
    print_status "- HTTP #{res.code} - #{res.body.length} bytes"
    res.code == 200 ? res.body : nil
  end
  # removes HTML tags from a provided string.
  # The string is html-unescaped before the tags are removed
  # Leading whitespaces and double linebreaks are removed too
--- a/modules/auxiliary/gather/http_pdf_authors.rb
+++ b/modules/auxiliary/gather/http_pdf_authors.rb
@ -1,36 +1,47 @@
 ##
-# This module requires Metasploit: http://metasploit.com/download
+# This module requires Metasploit: https://metasploit.com/download
 # Current source: https://github.com/rapid7/metasploit-framework
 ##
 require 'pdf-reader'
 class MetasploitModule < Msf::Auxiliary
  include Msf::Exploit::Remote::HttpClient
  include Msf::Auxiliary::Report
  def initialize(info = {})
    super(update_info(info,
      'Name'        => 'Gather PDF Authors',
      'Description' => %q{
-        This module downloads PDF files and extracts the author's
+        This module downloads PDF documents and extracts the author's
        name from the document metadata.
        This module expects a URL to be provided using the URL option.
        Alternatively, multiple URLs can be provided by supplying the
        path to a file containing a list of URLs in the URL_LIST option.
        The URL_TYPE option is used to specify the type of URLs supplied.
        By specifying 'pdf' for the URL_TYPE, the module will treat
        the specified URL(s) as PDF documents. The module will
        download the documents and extract the authors' names from the
        document metadata.
        By specifying 'html' for the URL_TYPE, the module will treat
        the specified URL(s) as HTML pages. The module will scrape the
        pages for links to PDF documents, download the PDF documents,
        and extract the author's name from the document metadata.
      },
      'License'     => MSF_LICENSE,
      'Author'      => 'Brendan Coles <bcoles[at]gmail.com>'))
    register_options(
      [
-        OptString.new('URL', [ false, 'The URL of a PDF to analyse', '' ]),
+        OptString.new('URL', [ false, 'The target URL', '' ]),
-        OptString.new('URL_LIST', [ false, 'File containing a list of PDF URLs to analyze', '' ]),
+        OptString.new('URL_LIST', [ false, 'File containing a list of target URLs', '' ]),
-        OptString.new('OUTFILE', [ false, 'File to store output', '' ])
+        OptEnum.new('URL_TYPE', [ true, 'The type of URL(s) specified', 'html', [ 'pdf', 'html' ] ]),
-      ])
+        OptBool.new('STORE_LOOT', [ false, 'Store authors in loot', true ])
    register_advanced_options(
      [
        OptString.new('SSL_VERIFY', [ true, 'Verify SSL certificate', true ]),
        OptString.new('PROXY', [ false, 'Proxy server to route connection. <host>:<port>', nil ]),
        OptString.new('PROXY_USER', [ false, 'Proxy Server User', nil ]),
        OptString.new('PROXY_PASS', [ false, 'Proxy Server Password', nil ])
      ])
    deregister_options 'RHOST', 'RPORT', 'VHOST'
  end
  def progress(current, total)
@ -47,79 +58,117 @@ class MetasploitModule < Msf::Auxiliary
    end
    unless File.file? datastore['URL_LIST'].to_s
-      fail_with Failure::BadConfig, "File '#{datastore['URL_LIST']}' does not exit"
+      fail_with Failure::BadConfig, "File '#{datastore['URL_LIST']}' does not exist"
    end
-    File.open(datastore['URL_LIST'], 'rb') {|f| f.read}.split(/\r?\n/)
+    File.open(datastore['URL_LIST'], 'rb') { |f| f.read }.split(/\r?\n/)
  end
  def read(data)
-    begin
+    Timeout.timeout(10) do
      reader = PDF::Reader.new data
      return parse reader
    rescue PDF::Reader::MalformedPDFError
      print_error "Could not parse PDF: PDF is malformed"
      return
    rescue PDF::Reader::UnsupportedFeatureError
      print_error "Could not parse PDF: PDF::Reader::UnsupportedFeatureError"
      return
    rescue => e
      print_error "Could not parse PDF: Unhandled exception: #{e}"
      return
    end
  rescue PDF::Reader::MalformedPDFError
    print_error "Could not parse PDF: PDF is malformed (MalformedPDFError)"
    return
  rescue PDF::Reader::UnsupportedFeatureError
    print_error "Could not parse PDF: PDF contains unsupported features (UnsupportedFeatureError)"
    return
  rescue SystemStackError
    print_error "Could not parse PDF: PDF is malformed (SystemStackError)"
    return
  rescue SyntaxError
    print_error "Could not parse PDF: PDF is malformed (SyntaxError)"
    return
  rescue Timeout::Error
    print_error "Could not parse PDF: PDF is malformed (Timeout)"
    return
  rescue => e
    print_error "Could not parse PDF: Unhandled exception: #{e}"
    return
  end
  def parse(reader)
    # PDF
-    #print_status "PDF Version: #{reader.pdf_version}"
+    # print_status "PDF Version: #{reader.pdf_version}"
-    #print_status "PDF Title: #{reader.info['title']}"
+    # print_status "PDF Title: #{reader.info['title']}"
-    #print_status "PDF Info: #{reader.info}"
+    # print_status "PDF Info: #{reader.info}"
-    #print_status "PDF Metadata: #{reader.metadata}"
+    # print_status "PDF Metadata: #{reader.metadata}"
-    #print_status "PDF Pages: #{reader.page_count}"
+    # print_status "PDF Pages: #{reader.page_count}"
    # Software
-    #print_status "PDF Creator: #{reader.info[:Creator]}"
+    # print_status "PDF Creator: #{reader.info[:Creator]}"
-    #print_status "PDF Producer: #{reader.info[:Producer]}"
+    # print_status "PDF Producer: #{reader.info[:Producer]}"
    # Author
    reader.info[:Author].class == String ? reader.info[:Author].split(/\r?\n/).first : ''
  end
  def download(url)
    print_status "Downloading PDF from '#{url}'"
    res = request_url(url)
    print_status "HTTP #{res.code} -- Downloaded PDF (#{res.body.length} bytes)"
    return res.code == 200 ? StringIO.new(res.body) : StringIO.new
  end
  def write_output(data)
    return if datastore['OUTFILE'].to_s.eql? ''
    print_status "Writing data to #{datastore['OUTFILE']}..."
    file_name = datastore['OUTFILE']
    if FileTest::exist?(file_name)
      print_status 'OUTFILE already exists, appending..'
    end
    File.open(file_name, 'ab') do |fd|
      fd.write(data)
    end
  end
  def run
    urls = load_urls
    if datastore['URL_TYPE'].eql? 'html'
      urls = extract_pdf_links urls
      if urls.empty?
        print_error 'Found no links to PDF files'
        return
      end
      print_line
      print_good "Found links to #{urls.size} PDF files:"
      print_line urls.join "\n"
      print_line
    end
    authors = extract_authors urls
    print_line
    if authors.empty?
      print_status 'Found no authors'
      return
    end
    print_good "Found #{authors.size} authors: #{authors.join ', '}"
    return unless datastore['STORE_LOOT']
    p = store_loot 'pdf.authors', 'text/plain', nil, authors.join("\n"), 'pdf.authors.txt', 'PDF authors'
    print_good "File saved in: #{p}"
  end
  def extract_pdf_links(urls)
    print_status "Processing #{urls.size} URLs..."
    pdf_urls = []
    urls.each_with_index do |url, index|
      next if url.blank?
      html = download url
      next if html.blank?
      doc = Nokogiri::HTML html
      doc.search('a[href]').select { |n| n['href'][/(\.pdf$|\.pdf\?)/] }.map do |n|
        pdf_urls << URI.join(url, n['href']).to_s
      end
      progress(index + 1, urls.size)
    end
    pdf_urls.uniq
  end
  def extract_authors(urls)
    print_status "Processing #{urls.size} URLs..."
    authors = []
    max_len = 256
    urls.each_with_index do |url, index|
      next if url.blank?
-      contents = download url
+      file = download url
-      next if contents.blank?
+      next if file.blank?
-      author = read contents
+      pdf = StringIO.new
      pdf.puts file
      author = read pdf
      unless author.blank?
        print_good "PDF Author: #{author}"
        if author.length > max_len
@ -132,14 +181,6 @@ class MetasploitModule < Msf::Auxiliary
      progress(index + 1, urls.size)
    end
-    print_line
+    authors.uniq
    if authors.empty?
      print_status 'Found no authors'
      return
    end
    print_good "Found #{authors.size} authors: #{authors.join ', '}"
    write_output authors.join "\n"
  end
 end