Skip to content

Instantly share code, notes, and snippets.

@kunalgrover05
Last active March 6, 2017 15:01
Show Gist options
  • Save kunalgrover05/75c31dc48fb44e63616409794b383b71 to your computer and use it in GitHub Desktop.
Save kunalgrover05/75c31dc48fb44e63616409794b383b71 to your computer and use it in GitHub Desktop.
NodeJS Osmosis based crawler script for Google search results. Read: https://crondev.wordpress.com/2017/03/05/web-crawling-with-nodejs-its-an-interesting-world/
var nextLink;
function open_page(url) {
console.log("Opening " + url);
osmosis.get(url)
.find('#nav td:last a')
.set({
'nextLink': '@href'
})
.find('.g')
.set({
'title': '.r',
'url': 'cite',
'link': '.r @href',
'text': '.st'
})
.follow('.r @href')
.set({
'pageText': 'title'
})
.data(function(l) {
nextLink = l['nextLink'];
console.log(l);
})
.error(console.log)
.debug(console.log)
.done(function() {
open_page('https://www.google.co.in/' + nextLink);
})
}
open_page('https://www.google.co.in/search?q=random+search');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment