Last active
December 21, 2015 11:17
-
-
Save ronan-mch/879ddfb27bb58042cbbd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'nokogiri' | |
require 'net/http' | |
require 'colorize' | |
require 'open-uri' | |
require 'zlib' | |
class SitemapCheck | |
def initialize(basemap) | |
@root_target = basemap | |
@log = 'sitemap.log' | |
File.open(@log, 'w') {|file| file.truncate(0) } | |
@sitemaps = [] | |
end | |
def run | |
check_map(@root_target) | |
@sitemaps.each { |map| check_map(map) } | |
end | |
def check_map(map) | |
doc = Nokogiri::XML(get_file_pointer(map)) { |config| config.strict } | |
doc.xpath('//sm:loc', 'sm' => 'http://www.sitemaps.org/schemas/sitemap/0.9').each do |loc| | |
targ = loc.text | |
targ << '/' unless targ[-1] == '/' || targ.include?('gz') | |
if targ =~ /sitemap\d+\.xml/ | |
@sitemaps << targ | |
else | |
check_target(targ) | |
end | |
end | |
end | |
def check_target(targ) | |
puts "parsing #{targ}".colorize(:yellow) | |
url = URI.parse(targ) | |
req = Net::HTTP.new(url.host, url.port) | |
res = req.request_head(url.path) | |
if res.code != '200' | |
puts "Error #{res.code} parsing #{targ}".colorize(:red) | |
log("#{res.code}: #{targ}\n") | |
end | |
end | |
def log(message) | |
File.open(@log, 'a') { |f| f << message } | |
end | |
def get_file_pointer(loc) | |
puts "opening #{loc}".colorize(:yellow) | |
if loc.include?('http') && loc.include?('.gz') | |
fname = loc.split('/').last | |
`wget --no-verbose #{loc} -O #{fname}` | |
::Zlib::GzipReader.open(fname) | |
else | |
File.open(loc) | |
end | |
end | |
end | |
target = ARGV.shift | |
checker = SitemapCheck.new(target) | |
checker.run | |
puts "Check complete - reading error log:".colorize(:green) | |
puts File.open('sitemap.log', 'r').read |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment