-
-
Save inky/152816 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# quick Google PageRank lookup | |
# usage: ~/bin/pagerank jamiedubs.com | |
# => 6 | |
# | |
# by Vsevolod S. Balashov <vsevolod @ balashovREMOVETHIS.name> | |
# hacks/binify by Jamie Dubs <http://jamiedubs.com> | |
# based on 3rd party code snippets (see comments) | |
# TODO: make into a gem... the existing googlePR one is broken! | |
require 'uri' | |
require 'open-uri' | |
# http://blog.outer-court.com/archive/2004_06_27_index.html#108834386239051706 | |
module SEO | |
class GooglePR | |
def initialize(uri) | |
@uri = uri | |
end | |
M=0x100000000 # modulo for unsigned int 32bit(4byte) | |
def m1(a,b,c,d) | |
(((a+(M-b)+(M-c))%M)^(d%M))%M # mix/power mod | |
end | |
def i2c(i) | |
[i&0xff, i>>8&0xff, i>>16&0xff, i>>24&0xff] | |
end | |
def c2i(s,k=0) | |
((s[k+3].to_i*0x100+s[k+2].to_i)*0x100+s[k+1].to_i)*0x100+s[k].to_i | |
end | |
def mix(a,b,c) | |
a = a%M; b = b%M; c = c%M | |
a = m1(a,b,c, c >> 13); b = m1(b,c,a, a << 8); c = m1(c,a,b, b >> 13) | |
a = m1(a,b,c, c >> 12); b = m1(b,c,a, a << 16); c = m1(c,a,b, b >> 5) | |
a = m1(a,b,c, c >> 3); b = m1(b,c,a, a << 10); c = m1(c,a,b, b >> 15) | |
[a, b, c] | |
end | |
def old_cn(iurl = 'info:' + @uri) | |
a = 0x9E3779B9; b = 0x9E3779B9; c = 0xE6359A60 | |
len = iurl.size | |
k = 0 | |
while (len >= k + 12) do | |
a += c2i(iurl,k); b += c2i(iurl,k+4); c += c2i(iurl,k+8) | |
a, b, c = mix(a, b, c) | |
k = k + 12 | |
end | |
a += c2i(iurl,k); b += c2i(iurl,k+4); c += (c2i(iurl,k+8) << 8) + len | |
a,b,c = mix(a,b,c) | |
return c | |
end | |
def cn | |
ch = old_cn | |
ch = ((ch/7) << 2) | ((ch-(ch/13).floor*13)&7) | |
new_url = [] | |
20.times { i2c(ch).each { |i| new_url << i }; ch -= 9 } | |
('6' + old_cn(new_url).to_s).to_i | |
end | |
def request_uri | |
# http://www.bigbold.com/snippets/posts/show/1260 + _ -> %5F | |
"http://toolbarqueries.google.com/search?client=navclient-auto&hl=en&ch=#{cn}&ie=UTF-8&oe=UTF-8&features=Rank&q=info:#{URI.escape(@uri, /[^-.!~*'()a-zA-Z\d]/)}" | |
end | |
def page_rank(uri = @uri) | |
@uri = uri if uri != @uri | |
begin | |
open(request_uri) { |f| return $1.to_i if f.string =~ /Rank_1:\d:(\d+)/ } | |
rescue OpenURI::HTTPError => e | |
puts "(!!) Error fetching URL: #{e} -- #{e.backtrace.join("\n")}" | |
sleep 2 | |
end | |
nil | |
end | |
# calculate the checksum for the given URL | |
private :m1, :i2c, :c2i, :mix, :old_cn | |
attr_accessor :uri | |
end | |
end | |
# run | |
if __FILE__ == $0 | |
url = ARGV.first | |
c = SEO::GooglePR.new(url) | |
puts c.page_rank | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment