Skip to content

Instantly share code, notes, and snippets.

@nicolrx
Created February 5, 2024 15:19
Show Gist options
  • Save nicolrx/df71e6bb199c5c3385395ac0ad12ed7c to your computer and use it in GitHub Desktop.
Save nicolrx/df71e6bb199c5c3385395ac0ad12ed7c to your computer and use it in GitHub Desktop.
Ruby on Rails Worker to Check in Google Search Console for pages that are not indexed. Then, a worker to index the page.
class GoogleIndexWorker
include Sidekiq::Worker
sidekiq_options retry: 0
require "google/apis/indexing_v3"
require 'google/apis/webmasters_v3'
include Rails.application.routes.url_helpers
# launch worker with your sitemap URL
def perform(sitemap_url)
# Create a client object
client = Google::Apis::WebmastersV3::WebmastersService.new
# Get the environment configured authorization
scopes = ['https://www.googleapis.com/auth/webmasters',
'https://www.googleapis.com/auth/webmasters.readonly']
client.authorization = Google::Auth::ServiceAccountCredentials.make_creds(
json_key_io: File.open('YOUR JSON KEY FROM GOOGLE CLOUD'),
scope: scopes)
access_token_request = client.authorization.fetch_access_token
access_token = access_token_request['access_token']
sitemap_xml = URI.open(sitemap_url) do |f|
Zlib::GzipReader.new(f).read
end
doc = Nokogiri::XML(sitemap_xml)
urls = doc.css('url loc').map(&:text)
error_api_count = 0
urls.each_slice(100) do |batch_url|
batch_url.each do |url|
begin
check = check_indexed(url, access_token)
rescue
puts "Error Indexing API"
error_api_count += 1
if error_api_count == 50
GoogleIndexWorker.perform_in(10.days, sitemap_url)
else
next
end
end
end
end
end
def check_indexed(url, access_token)
uri = URI.parse("https://searchconsole.googleapis.com/v1/urlInspection/index:inspect")
request = Net::HTTP::Post.new(uri)
request.content_type = "application/json"
request["Authorization"] = "Bearer #{access_token}"
request["Accept"] = "application/json"
request.body = JSON.dump({
"inspectionUrl" => url,
"siteUrl" => "YOUR SITE URL"
})
req_options = {
use_ssl: uri.scheme == "https",
}
response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
http.request(request)
end
parsed_json = JSON.parse(response.body)
if parsed_json.present?
coverage_state = parsed_json['inspectionResult']['indexStatusResult']['coverageState']
last_crawl_time = parsed_json['inspectionResult']['indexStatusResult']['lastCrawlTime']
crawl_is_too_old = check_is_too_old(last_crawl_time)
if coverage_state == "Duplicate, Google chose different canonical than user"
puts "Canonical: #{parsed_json['inspectionResult']['indexStatusResult']['googleCanonical']}"
end
if indexable_statuses.include? coverage_state or crawl_is_too_old == true
GoogleIndexSinglePageWorker.perform_async(url)
return true
else
return false
end
end
end
def indexable_statuses
return [
"Discovered - currently not indexed",
"Crawled - currently not indexed",
"URL is unknown to Google",
"Forbidden",
"Error",
]
end
def check_is_too_old(last_checked_at)
last_crawl_date = DateTime.parse(last_checked_at)
current_date = DateTime.now
if (current_date - last_crawl_date).to_i <= 45
return false
else
return true
end
end
end
------------------------------------
class GoogleIndexSinglePageWorker
include Sidekiq::Worker
sidekiq_options retry: 2
require "google/apis/indexing_v3"
def perform(page_url)
# Create a client object
client = Google::Apis::IndexingV3::IndexingService.new
# Get the environment configured authorization
scopes = ['https://www.googleapis.com/auth/indexing']
endpoint = 'https://indexing.googleapis.com/v3/urlNotifications:publish'
client.authorization = Google::Auth::ServiceAccountCredentials.make_creds(
json_key_io: File.open('YOUR JSON KEY FROM GOOGLE CLOUD'),
scope: scopes)
access_token_request = client.authorization.fetch_access_token
access_token = access_token_request['access_token']
uri = URI.parse("https://www.googleapis.com/auth/indexing")
request = Net::HTTP::Post.new(uri)
request.content_type = "application/json"
request["Authorization"] = "Bearer #{access_token}"
request["Accept"] = "application/json"
request.body = JSON.dump({
"url" => page_url,
"type" => "URL_UPDATED"
})
req_options = {
use_ssl: uri.scheme == "https",
}
response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
http.request(request)
end
if response.code == "200"
puts "✅ Page #{page_url} submitted to be indexed."
else
puts "❌ Error indexing page #{page_url}"
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment