Created
March 21, 2020 02:52
-
-
Save jpgninja/dc50731af7c4a85f17136b2ca957608d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const axios = require('axios'); | |
const cheerio = require('cheerio'); | |
/** | |
* Checks if a proxy is accessible. | |
* | |
* @param host Host of the proxy | |
* @param port Port of the proxy | |
*/ | |
var stop = function(host, port, options, callback) { | |
console.log('\n\n-- Stopping scrape (Google) --\n'); | |
} | |
/** | |
* Mission accomplished | |
* | |
* @param null Lorem Ipsum | |
*/ | |
var scrapeCompleteHandler = function() { | |
console.log('\n\n-- Scrape complete (Google) --\n'); | |
} | |
var search = (term, proxy) => { | |
let url; | |
let proxy_array = proxy.split(":"); | |
let proxy_object = {}; | |
let req_data = {}; | |
// Setup proxy. | |
proxy_object.host = ( "undefined" !== typeof proxy_array[0] ) ? proxy_array[0] : ""; | |
proxy_object.port = ( "undefined" !== typeof proxy_array[1] ) ? parseInt( proxy_array[1], 10 ) : 80; | |
if ( "" !== proxy_object.host ) { | |
req_data.proxy = proxy_object; | |
} | |
// Setup request. | |
req_data.timeout = 5000; | |
req_data.headers = { | |
"User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en-us) AppleWebKit/534.1+ (KHTML, like Gecko) Version/5.0 Safari/533.16", | |
}; | |
// Setup term. | |
term += " -site:www.tumblr.com"; | |
// term += " site:tumblr.com"; | |
// Setup search URL. | |
url = `http://www.google.com/search?q=${encodeURIComponent(term)}&ie=UTF-8`; // Base URL. | |
url += "&num=100"; // Return 100 results. | |
url += "&pws=0"; // Personalized search on/off. | |
url += "&lr=lang_en"; // Interface language (for in case the proxy throws this off). | |
// url += "&safe=active"; // NSFW. | |
console.log( "Googling '%s' using proxy (%s:%d)", term, req_data.proxy.host, req_data.proxy.port ); | |
console.log( "URL: %s", url ); | |
return axios.get(url, req_data) | |
.catch( (res) => { | |
console.log("ACK! Google error.") | |
if ( "undefined" !== typeof res.response.status ) { | |
console.log(res.response.status, res.response.statusText); | |
console.log( "Message: '%s'", res.message ); | |
console.log( res ); | |
} | |
else if (res.isAxiosError) { | |
res = res.toJSON(); | |
console.log( "Message: '%s'", res.message ); | |
} | |
else if ("ECONNABORTED" === res.code ) { | |
console.log( "Message: '%s'", "Connection timed out." ); | |
} | |
else { | |
console.log("Well, this is embarrassing... I have no output."); | |
console.log(res); | |
} | |
return false; | |
}); | |
} | |
/* | |
* Scrapes Google SERP. | |
*/ | |
let scrape_results = ( res ) => { | |
// Ensure we received a response. | |
if ( "undefined" === typeof res.data ) { | |
return; | |
} | |
let $ = cheerio.load(res.data); | |
let results = []; | |
console.log( "Found %d total results.", $("div#search").find(".rc .r").length ); | |
$("div#search") | |
.find(".rc .r") | |
.map((i, el) => { | |
const result = { | |
title: $(el).find("h3").text(), | |
url: $(el).find("a").attr("href"), | |
description: $(el).find("div.s div span.st").text(), | |
}; | |
results.push( result ); | |
}); | |
console.log( "\n\n\n---" ) | |
console.log( "\n\n\nAll results\n\n\n"); | |
console.log( results ); | |
return results; | |
} | |
// Run. | |
search('red lipstick', '196.54.30.47:80') | |
.then( scrape_results ) | |
.catch( (err) => { | |
console.log('ERR! Fetch error.'); | |
console.log( err ); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment