Created
November 19, 2015 14:39
-
-
Save benilovj/75249065df6ea1736bd6 to your computer and use it in GitHub Desktop.
A ruby script that takes a list of domains and tries to figure out if that site runs WordPress or not.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'open-uri' | |
require 'open_uri_redirections' | |
require 'timeout' | |
require 'active_support/core_ext/array' | |
require 'pp' | |
def fetch(url) | |
# puts "Fetching #{url}" | |
Timeout::timeout(2) { | |
open(url, allow_redirections: :safe).read rescue nil | |
} | |
rescue Timeout::Error | |
nil | |
end | |
def determine(domain) | |
admin_url = domain + "/wp-admin" | |
response = fetch(admin_url) | |
if response | |
return [domain, true, "wp-admin"] | |
else | |
response = fetch(domain) | |
if response && response.include?("wp-content") | |
return [domain, true, "wp-content"] | |
end | |
end | |
[domain, false, nil] | |
end | |
def fetch_in_parallel(domains, thread_count) | |
domain_groups = domains.in_groups(10).map(&:compact) | |
threads = [] | |
results = [] | |
domain_groups.each do |domain_group| | |
threads << Thread.new { results << domain_group.map { |domain| print "."; determine(domain) } } | |
end | |
threads.each { |thread| thread.join } | |
results.flatten!(1) | |
results.sort_by!(&:first) | |
results | |
end | |
thread_count = 20 | |
domains = File.read('sorted_unique_domains.csv').split#.take(300) | |
results = fetch_in_parallel(domains, thread_count) | |
puts "" | |
results.each { |result| puts result.join(",") } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment