Created
February 5, 2024 15:19
-
-
Save nicolrx/df71e6bb199c5c3385395ac0ad12ed7c to your computer and use it in GitHub Desktop.
Ruby on Rails Worker to Check in Google Search Console for pages that are not indexed. Then, a worker to index the page.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class GoogleIndexWorker | |
include Sidekiq::Worker | |
sidekiq_options retry: 0 | |
require "google/apis/indexing_v3" | |
require 'google/apis/webmasters_v3' | |
include Rails.application.routes.url_helpers | |
# launch worker with your sitemap URL | |
def perform(sitemap_url) | |
# Create a client object | |
client = Google::Apis::WebmastersV3::WebmastersService.new | |
# Get the environment configured authorization | |
scopes = ['https://www.googleapis.com/auth/webmasters', | |
'https://www.googleapis.com/auth/webmasters.readonly'] | |
client.authorization = Google::Auth::ServiceAccountCredentials.make_creds( | |
json_key_io: File.open('YOUR JSON KEY FROM GOOGLE CLOUD'), | |
scope: scopes) | |
access_token_request = client.authorization.fetch_access_token | |
access_token = access_token_request['access_token'] | |
sitemap_xml = URI.open(sitemap_url) do |f| | |
Zlib::GzipReader.new(f).read | |
end | |
doc = Nokogiri::XML(sitemap_xml) | |
urls = doc.css('url loc').map(&:text) | |
error_api_count = 0 | |
urls.each_slice(100) do |batch_url| | |
batch_url.each do |url| | |
begin | |
check = check_indexed(url, access_token) | |
rescue | |
puts "Error Indexing API" | |
error_api_count += 1 | |
if error_api_count == 50 | |
GoogleIndexWorker.perform_in(10.days, sitemap_url) | |
else | |
next | |
end | |
end | |
end | |
end | |
end | |
def check_indexed(url, access_token) | |
uri = URI.parse("https://searchconsole.googleapis.com/v1/urlInspection/index:inspect") | |
request = Net::HTTP::Post.new(uri) | |
request.content_type = "application/json" | |
request["Authorization"] = "Bearer #{access_token}" | |
request["Accept"] = "application/json" | |
request.body = JSON.dump({ | |
"inspectionUrl" => url, | |
"siteUrl" => "YOUR SITE URL" | |
}) | |
req_options = { | |
use_ssl: uri.scheme == "https", | |
} | |
response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http| | |
http.request(request) | |
end | |
parsed_json = JSON.parse(response.body) | |
if parsed_json.present? | |
coverage_state = parsed_json['inspectionResult']['indexStatusResult']['coverageState'] | |
last_crawl_time = parsed_json['inspectionResult']['indexStatusResult']['lastCrawlTime'] | |
crawl_is_too_old = check_is_too_old(last_crawl_time) | |
if coverage_state == "Duplicate, Google chose different canonical than user" | |
puts "Canonical: #{parsed_json['inspectionResult']['indexStatusResult']['googleCanonical']}" | |
end | |
if indexable_statuses.include? coverage_state or crawl_is_too_old == true | |
GoogleIndexSinglePageWorker.perform_async(url) | |
return true | |
else | |
return false | |
end | |
end | |
end | |
def indexable_statuses | |
return [ | |
"Discovered - currently not indexed", | |
"Crawled - currently not indexed", | |
"URL is unknown to Google", | |
"Forbidden", | |
"Error", | |
] | |
end | |
def check_is_too_old(last_checked_at) | |
last_crawl_date = DateTime.parse(last_checked_at) | |
current_date = DateTime.now | |
if (current_date - last_crawl_date).to_i <= 45 | |
return false | |
else | |
return true | |
end | |
end | |
end | |
------------------------------------ | |
class GoogleIndexSinglePageWorker | |
include Sidekiq::Worker | |
sidekiq_options retry: 2 | |
require "google/apis/indexing_v3" | |
def perform(page_url) | |
# Create a client object | |
client = Google::Apis::IndexingV3::IndexingService.new | |
# Get the environment configured authorization | |
scopes = ['https://www.googleapis.com/auth/indexing'] | |
endpoint = 'https://indexing.googleapis.com/v3/urlNotifications:publish' | |
client.authorization = Google::Auth::ServiceAccountCredentials.make_creds( | |
json_key_io: File.open('YOUR JSON KEY FROM GOOGLE CLOUD'), | |
scope: scopes) | |
access_token_request = client.authorization.fetch_access_token | |
access_token = access_token_request['access_token'] | |
uri = URI.parse("https://www.googleapis.com/auth/indexing") | |
request = Net::HTTP::Post.new(uri) | |
request.content_type = "application/json" | |
request["Authorization"] = "Bearer #{access_token}" | |
request["Accept"] = "application/json" | |
request.body = JSON.dump({ | |
"url" => page_url, | |
"type" => "URL_UPDATED" | |
}) | |
req_options = { | |
use_ssl: uri.scheme == "https", | |
} | |
response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http| | |
http.request(request) | |
end | |
if response.code == "200" | |
puts "✅ Page #{page_url} submitted to be indexed." | |
else | |
puts "❌ Error indexing page #{page_url}" | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment