45 lines
1.3 KiB
Ruby
45 lines
1.3 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require 'mechanize'
|
|
agent = Mechanize.new
|
|
|
|
# Function to check for links
|
|
def check_for_links(address, agent, indexed_pages = [])
|
|
# Check all links on the page
|
|
page = agent.get(address)
|
|
if page.is_a?(Mechanize::Page)
|
|
page.links.each do |link|
|
|
# Get a complete resolved URI for the link
|
|
next if link.href == 'javascript:'
|
|
link = link.resolved_uri
|
|
# Remove the fragment so there's less overlap
|
|
link.fragment = nil
|
|
# Clear query (TODO: Stash away later)
|
|
link.query = nil
|
|
# Check if we've seen the link before
|
|
# Check if we're leaving the host
|
|
if !indexed_pages.include?(link) && link.host == address.host
|
|
# Save link to array
|
|
indexed_pages.push(link)
|
|
end
|
|
end
|
|
# Return all functions
|
|
indexed_pages
|
|
end
|
|
rescue Mechanize::ResponseCodeError => e
|
|
p e.response_code
|
|
end
|
|
|
|
addresses = check_for_links(URI.parse('http://git.blizzard.systems'), agent)
|
|
old_addresses = []
|
|
until addresses - old_addresses == []
|
|
old_addresses = Array.new(addresses)
|
|
addresses.each do |address|
|
|
appending_addrs = check_for_links(address, agent, addresses)
|
|
addresses.union(appending_addrs) unless appending_addrs.nil?
|
|
end
|
|
end
|
|
|
|
p addresses
|
|
puts '********************************************'
|
|
p old_addresses
|