Created
December 23, 2016 11:26
-
-
Save jthatch/0e1613518c05136997064bed8d98a5a1 to your computer and use it in GitHub Desktop.
Scan known google proxy IP subnets to determine if said ip's are google proxys. - This is written to help us detect traffic originating from google's "data saver" mobile chrome feature
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
/* | |
* dnsresolve.js | |
* Will resolve ip ranges to determine if they're google proxies. | |
* | |
* TIPS for increasing speed: | |
* Run on multi-core system | |
* ulimit -n 40000 | |
* | |
* Examples: | |
* ./dnsresolve.js 62.249.*.* | |
* | |
* @author jamest | |
* @date 20/12/2016 | |
*/ | |
'use strict'; | |
const fs = require('fs'); | |
const util = require('util'); | |
const path = require('path'); | |
const cluster = require('cluster'); | |
const EventEmitter = require('events').EventEmitter; | |
const dns = require('dns'); | |
const chalk = require('chalk'); | |
/** | |
* This follows the observer design pattern. We take arguments first from options, then argv then resort to defaults | |
* @constructor | |
*/ | |
function Rdns() { | |
this.range = process.argv[2] || [ | |
'64.233.173.*', // Provided by Akay | |
'66.249.93.*', | |
'66.249.82.*', | |
'66.249.85.*', | |
'66.102.9.*', | |
'66.102.6.*', | |
'66.249.88.*', | |
'66.102.7.*', | |
'66.249.80.*', | |
'66.249.85.*', | |
'66.249.83.*', | |
'66.249.84.*', | |
'66.102.8.*', | |
'64.233.173.*', | |
'64.233.172.*', | |
'66.249.93.*', | |
'66.102.9.*', | |
'66.249.82.*', | |
'64.9.249.*', | |
'64.233.172.*', | |
'64.233.173.*', | |
'64.9.249.*', | |
'66.102.6.*', | |
'66.102.7.*', | |
'66.102.8.*', | |
'66.102.9.*', | |
'66.249.80.*', | |
'66.249.82.*', | |
'66.249.83.*', | |
'66.249.84.*', | |
'66.249.85.*', | |
'66.249.88.*', | |
'66.249.93.*', | |
]; | |
this.re = /google\-proxy\-[0-9\-]+\.google\.com/ig; | |
this.workers = require('os').cpus().length; | |
this.concurrentResolves = this.workers * 4; | |
// hard limit, feel free to remove this, but I find anymore than 4 is over kill | |
if (this.workers > 4) { | |
this.workers = 4; | |
this.concurrentResolves = this.workers * 4; | |
} | |
// internal variables | |
this._ips = []; | |
this._proxies = []; // format [[ip, address], .. n] | |
this._workersFinished = 0; | |
this._resolved = 0; | |
this._skipped = 0; | |
this._startTime = new Date().getTime(); | |
EventEmitter.call(this); | |
} | |
Rdns.prototype.main = function() { | |
var _this = this; | |
/** | |
* Master, responsible for pulling the list of media from the 4chan thread and spinning up and directing workers | |
*/ | |
if (cluster.isMaster) { | |
_this.log("Resolving from ", "c:green underline", this.range, | |
" using ", "c:green underline", this.workers, " threads and ", | |
"c:green underline", this.concurrentResolves, " concurrent resolves."); | |
// spawn our worker threads immediately as this is non-blocking but takes a little while | |
for (var i = 0; i < this.workers; i++) { | |
cluster.fork(); | |
} | |
// receive messages from our worker threads, specifically when they've finished downloading a media file | |
Object.keys(cluster.workers).forEach(function(id){ | |
_this.log("c:bgBlue bold", "worker #" + id + ' is online'); | |
cluster.workers[id].on('message', function(msg) { | |
if (msg.cmd) { | |
switch (msg.cmd) { | |
case 'lookup': | |
_this._resolved++; | |
_this._proxies.push([msg.data.ip, msg.data.address]); | |
_this.log("c:green", "Resolved ", "c:green bold", msg.data.ip, | |
"c:green", " to ", "c:green bold", msg.data.address, | |
"c:green", " in " + _this.runTime(msg.data.duration)); | |
_this.dispatchResolver(id); | |
break; | |
case 'skipped': | |
_this._skipped++; | |
_this.log("c:red", "Skipped ", "c:red bold", msg.data.ip, | |
"c:red", " due to ", "c:red bold", msg.data.err | |
); | |
_this.dispatchResolver(id); | |
break; | |
} | |
} | |
}); | |
}); | |
if (!(this._ips = this.parseRange(this.range))) { | |
this.log("Unable to parse range: " + this.range); | |
process.exit(); | |
} | |
_this.log("c:bgGreen bold", 'Found ' + _this._ips.length + ' ips'); | |
/** | |
* Initiate the download via the workers | |
*/ | |
var lastWorker = 1; | |
var downloadsInProgress = 0; | |
while ( ( downloadsInProgress < _this.concurrentResolves ) && _this._ips.length ) { | |
var ip = _this._ips.shift(); | |
lastWorker = lastWorker > _this.workers ? 1 : lastWorker; | |
_this.broadcastToWorkers(lastWorker++, 'lookup', ip); | |
downloadsInProgress++; | |
} | |
} | |
// worker | |
else { | |
// receive messages from master | |
process.on('message', function(msg) { | |
if (msg.cmd) { | |
switch(msg.cmd) { | |
case 'lookup': | |
_this.lookup(msg.data); | |
break; | |
case 'shutdown': | |
process.disconnect(); | |
break; | |
default: | |
_this.log('Invalid msg: ' + msg.cmd + ': ' + JSON.stringify(msg.data)); | |
break; | |
} | |
} | |
}); | |
this.on('lookup', function (file) { | |
_this.broadcastToMaster('lookup', file); | |
}); | |
this.on('skipped', function (file) { | |
_this.broadcastToMaster('skipped', file); | |
}); | |
} | |
}; | |
/** | |
* NOTE: *'s will be extrapolated to 1-255, eg parseRange('62.249.*.*') would generate 65536 ips | |
* Could do with rewriting the loops to make them recursive and O(log n) | |
* @param string IpRange - format 62.249.*.* | |
*/ | |
Rdns.prototype.parseRange = function(ipRange) { | |
var _this = this; | |
var ips = []; | |
if (typeof ipRange == 'object') { // if it's an array | |
for (var i = 0; i< ipRange.length;i++) { | |
var range = ipRange[i]; | |
ips.push(range); | |
} | |
} | |
else { | |
ips.push(ipRange); | |
} | |
var loop = true; | |
while (loop) { | |
for (var i in ips) { // this is O(n2) atm, could do with a speedup | |
var ip = ips[i]; | |
var offset = ip.indexOf('*'); | |
if (offset > -1) { | |
var ipBefore = ip.slice(0, offset); | |
var ipAfter = ip.slice(offset + 1); | |
for (var j = 1; j < 256; j++) { | |
var newIp = ipBefore + j + ipAfter; | |
ips[i] = newIp; | |
ips.push(newIp); | |
} | |
} | |
else { | |
loop = false; | |
} | |
} | |
} | |
// sort and remove any dupes | |
return ips.sort().filter(function(item, pos, arr) { | |
return !pos || item != arr[pos -1]; | |
}); | |
}; | |
/** | |
* Dispatch a download to a particular worker assuming there's any files left | |
* @param id | |
*/ | |
Rdns.prototype.dispatchResolver = function(id) { | |
var _this = this; | |
// If we still have files available to download, send them to the worker id | |
if (this._ips.length) { | |
var ip = this._ips.shift(); | |
this.broadcastToWorkers(id, 'lookup', ip); | |
} | |
else { | |
if (++this._workersFinished >= this.concurrentResolves || | |
this._skipped < (this._workersFinished || this.concurrentResolves) || | |
this._resolved < (this._workersFinished || this.concurrentResolves)) { | |
_this.log(); | |
_this.log("c:blue bold", "Resolved " + _this._resolved + " ips in " + _this.runTime()); | |
this.broadcastToWorkers(false, 'shutdown'); | |
this.saveProxies(); | |
} | |
} | |
}; | |
/** | |
* save the proxy ip's in tab+csv format. | |
* Filename is based on the range: 66.249.x.x.txt | |
*/ | |
Rdns.prototype.saveProxies = function() { | |
var _this = this; | |
var fileName; | |
if (typeof this.range == 'object') { | |
fileName = 'google-proxies-' + this.dateStamp() + '.txt'; | |
} | |
else { | |
var fileName = this.range.replace(/\*/g, 'x') + '.txt'; | |
} | |
var str = ""; | |
for (var key in this._proxies) { | |
var proxy = this._proxies[key]; | |
str += proxy[0] + "\t" + proxy[1] + "\n"; | |
} | |
fs.writeFileSync(fileName, str, "utf8"); | |
this.log("Saved to ", "c:green bold", fileName); | |
}; | |
/** | |
* Use DNS protocol to resolve an IP address to a hostname | |
* @param string ip | |
*/ | |
Rdns.prototype.lookup = function(ip) { | |
var _this = this; | |
var startTime = new Date().getTime(); | |
try { | |
dns.reverse(ip, function(err, domains) { | |
if (err) { | |
_this.emit('skipped', {err: err.code + ": " + err.message, ip: ip, duration: startTime}); | |
} | |
else if (!domains) { | |
_this.emit('skipped', {err: 'NO_DOMAINS', ip: ip, duration: startTime}); | |
} | |
else { | |
var found = false; | |
domains.forEach(function(domain) { // ips can have multiple rdns CNAMES/A etc | |
var match = _this.re.exec(domain); // check host matches googles' regex | |
if (match) { | |
_this.emit('lookup', {err: null, ip: ip, address: domain, duration: startTime}); | |
found = true; | |
//break; // match found, no need to continue | |
} | |
}); | |
if (!found) { | |
_this.emit('skipped', {err: 'NO_MATCHES', ip: ip, duration: startTime}); | |
} | |
} | |
}); | |
} catch (err) { | |
_this.emit('skipped', {err: err.code + ": " + err.message, ip: ip, duration: startTime}); | |
} | |
}; | |
/** | |
* broadcastToWorkers - if an id is defined we send the payload to only that worker, otherwise it gets broadcasted to all. | |
* Returns the number of messages broadcast | |
* @param bool|int id | |
* @param string | |
* @param array|object data | |
* @return int | |
*/ | |
Rdns.prototype.broadcastToWorkers = function(id, cmd, data){ | |
var count = 0; | |
// send to a selected worker | |
if (id && typeof cluster.workers[id] !== 'undefined') { | |
cluster.workers[id].send({ cmd: cmd, data: data }); | |
count++; | |
} | |
else { | |
// send to all workers | |
Object.keys(cluster.workers).forEach(function(id){ | |
cluster.workers[id].send({cmd : cmd, data : data}); | |
count++; | |
}); | |
} | |
return count; | |
}; | |
/** | |
* broadcastToMaster sends a payload back to our master thread | |
* @param array|object payload | |
*/ | |
Rdns.prototype.broadcastToMaster = function(cmd, data) { | |
process.send({ cmd: cmd, data: data }); | |
}; | |
/** | |
* Returns the date in the format DD-MM-YYYY | |
* @param Date dateObj (optional) | |
* @returns {string} | |
*/ | |
Rdns.prototype.dateStamp = function(dateObj) { | |
dateObj = dateObj || new Date(); | |
return dateObj.toISOString().split('T')[0].split('-').reverse().join('-'); | |
}; | |
/** | |
* I like nice looking log output | |
* Little log function to take advantage of ansi colours on the CL. | |
* Takes as many arguments as you want, they'll be joined together to form the log string. | |
* If you want to style start an argument with c: and then your colour(s) e.g. | |
* this.log('c:bgGreen bold', 'This is bold text with a green background'); | |
*/ | |
Rdns.prototype.log = function() { | |
var args = Array.prototype.slice.call(arguments); | |
var msg = ''; | |
var skipNext = false; | |
for (var i = 0; i < args.length; i++) { | |
var arg = typeof args[i] == 'object' ? JSON.stringify(args[i]) : String(args[i]), | |
next = typeof args[i] == 'object' ? JSON.stringify(args[i + 1]) : String(args[i + 1]); | |
if (skipNext) { | |
skipNext = false; | |
continue; | |
} | |
if (arg && arg.substr(0,2) == 'c:') { | |
var color = arg.substr(2, arg.length); | |
color = color.split(' '); | |
if (color.length == 1) | |
msg += chalk[color[0]](next); | |
else if (color.length == 2) | |
msg += chalk[color[0]][color[1]](next); | |
else if (color.length == 3) | |
msg += chalk[color[0]][color[1]][color[2]](next); | |
skipNext = true; | |
} | |
else { | |
msg += arg; | |
skipNext = false; | |
} | |
} | |
var str = this.runTime() + chalk.grey('> '); | |
var noAnsi = str.replace(/[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g, ''); | |
var padding = Array(12).join(' '); | |
var maxLength = 12; | |
console.log(str + padding.substring(0, maxLength - noAnsi.length) + msg); | |
}; | |
/** | |
* Returns the duration | |
* @param (optional) startTime | |
* @returns {string} | |
*/ | |
Rdns.prototype.runTime = function(startTime) { | |
var millisecondDiff = new Date().getTime() - (typeof startTime !== 'undefined' ? startTime : this._startTime); | |
var elapsed = { | |
'days' : 0, | |
'hours' : 0, | |
'mins' : 0, | |
'secs' : 0, | |
'ms' : millisecondDiff | |
}; | |
if (millisecondDiff > 0) { | |
elapsed.ms = millisecondDiff % 1e3; | |
millisecondDiff = Math.floor( millisecondDiff / 1e3 ); | |
elapsed.days = Math.floor( millisecondDiff / 86400 ); | |
millisecondDiff %= 86400; | |
elapsed.hours = Math.floor ( millisecondDiff / 3600 ); | |
millisecondDiff %= 3600; | |
elapsed.mins = Math.floor ( millisecondDiff / 60 ); | |
millisecondDiff %= 60; | |
elapsed.secs = Math.floor( millisecondDiff ); | |
} | |
var showMs = true; | |
var str = ''; | |
if (elapsed.days > 0) { | |
str += chalk.bold(elapsed.days) +'d '; | |
showMs = false; | |
} | |
if (elapsed.hours > 0) { | |
str += chalk.bold(elapsed.hours) + 'h '; | |
showMs = false; | |
} | |
if (elapsed.mins > 0) { | |
str += chalk.bold(elapsed.mins) + 'm ' ; | |
} | |
if (( elapsed.secs > 0 && showMs ) || ( elapsed.secs == 0 && elapsed.ms > 0 ) ) { | |
str += chalk.bold(elapsed.secs) + '.' + chalk.bold(String(elapsed.ms).substr(0,2)) + 's'; | |
} | |
else { | |
str += chalk.bold(elapsed.secs) + 's'; | |
} | |
return str; | |
}; | |
/** | |
* Outputs usage to the screen, including examples | |
*/ | |
Rdns.prototype.usage = function() { | |
var _this = this; | |
_this.log(); | |
_this.log('c:bold','Usage: ./dnsresolve.js [range]'); | |
_this.log(); | |
_this.log("Range should be in the format: 62.249.*.*"); | |
_this.log(); | |
_this.log("Examples:"); | |
_this.log("c:bold", "./dnsresolve.js 62.249.9.*"); | |
_this.log(); | |
} | |
util.inherits(Rdns, EventEmitter); | |
// if we are being run as a command line app, execute our program | |
if (process.argv[1] == __filename) { | |
var rdns = new Rdns(); | |
rdns.main(); | |
} | |
else { | |
module.export = Rdns; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment