gemfarmer · April 17, 2017 19:56
diff --git a/README.md b/README.md
diff --git a/crawl_broken_urls.rb b/crawl_broken_urls.rb
 require 'yaml'
 require 'pry'
 require 'rb-readline'
 require 'net/http'
 require 'uri'
 require 'timeout'

 broken_urls = YAML.load_file("./broken_urls.yml")

 def fetch(uri_str, limit = 10)
  default_error = 'HTTPError'
  raise ArgumentError, 'HTTP redirect too deep' if limit == 0

  url = URI.parse(uri_str)
  req = Net::HTTP::Get.new(url.path)
  response = Net::HTTP.start(url.host, url.port) do |http|
    begin
      status = Timeout::timeout(3) {
        http.request(req)
      }
    rescue Timeout::Error
      puts 'That took too long, exiting...'
    end
  end

  begin
    case response
    when Net::HTTPSuccess     then response
    when (Net::HTTPRedirection && response.code != '404') then
      if (uri_str != response['location']) && (response.code != '302')
        fetch(response['location'], limit - 1)
      else
        response
      end
    else
      default_error
    end
  rescue
    "TimeoutError"
  end
 end

 broken_urls.each do |link|
  # puts "#{link}"
  # puts `curl -I #{link}`
  puts "#{link}: #{fetch(link)}"
 end
	require 'yaml'
	require 'pry'
	require 'rb-readline'
	require 'net/http'
	require 'uri'
	require 'timeout'

	broken_urls = YAML.load_file("./broken_urls.yml")

	def fetch(uri_str, limit = 10)
	default_error = 'HTTPError'
	raise ArgumentError, 'HTTP redirect too deep' if limit == 0

	url = URI.parse(uri_str)
	req = Net::HTTP::Get.new(url.path)
	response = Net::HTTP.start(url.host, url.port) do \|http\|
	begin
	status = Timeout::timeout(3) {
	http.request(req)
	}
	rescue Timeout::Error
	puts 'That took too long, exiting...'
	end
	end

	begin
	case response
	when Net::HTTPSuccess then response
	when (Net::HTTPRedirection && response.code != '404') then
	if (uri_str != response['location']) && (response.code != '302')
	fetch(response['location'], limit - 1)
	else
	response
	end
	else
	default_error
	end
	rescue
	"TimeoutError"
	end
	end

	broken_urls.each do \|link\|
	# puts "#{link}"
	# puts `curl -I #{link}`
	puts "#{link}: #{fetch(link)}"
	end