42 lines
813 B
Ruby
42 lines
813 B
Ruby
require 'anemone'
|
|
require 'optparse'
|
|
require 'ostruct'
|
|
|
|
options = OpenStruct.new
|
|
options.relative = false
|
|
|
|
begin
|
|
# make sure that the last option is a URL we can crawl
|
|
root = URI(ARGV.last)
|
|
rescue
|
|
puts <<-INFO
|
|
Usage:
|
|
anemone url-list [options] <url>
|
|
|
|
Synopsis:
|
|
Crawls a site starting at the given URL, and outputs the URL of each page
|
|
in the domain as they are encountered.
|
|
|
|
Options:
|
|
-r, --relative Output relative URLs (rather than absolute)
|
|
INFO
|
|
exit(0)
|
|
end
|
|
|
|
# parse command-line options
|
|
opts = OptionParser.new
|
|
opts.on('-r', '--relative') { options.relative = true }
|
|
opts.parse!(ARGV)
|
|
|
|
Anemone.crawl(root, :discard_page_bodies => true) do |anemone|
|
|
|
|
anemone.on_every_page do |page|
|
|
if options.relative
|
|
puts page.url.path
|
|
else
|
|
puts page.url
|
|
end
|
|
end
|
|
|
|
end
|