Iqlaas · August 1, 2017 00:59
diff --git a/influencer.js b/influencer.js
 var webdriver = require('selenium-webdriver'),
    chrome = require('selenium-webdriver/chrome'),
    By = webdriver.By,
    until = webdriver.until,
    Key = webdriver.Key;
    var o = new chrome.Options();
    // o.addArguments('start-fullscreen');
    o.addArguments('disable-infobars');
    o.addArguments("disable-notifications");
    o.setUserPreferences( { credentials_enable_service: false } );
    var parseFullName = require('parse-full-name').parseFullName;
    var findEmails = require('find-emails-in-string');
    var json2csv = require('json2csv');
    var fs = require('fs');
    var okrabyte = require("okrabyte");

 var driver = new webdriver.Builder().withCapabilities(webdriver.Capabilities.chrome()).setChromeOptions(o).build();


 //helper find function
 var find = function(el){
    driver.wait(until.elementLocated(By.css(el)), 5000, "Could not find " + el);
    return driver.findElement(By.css(el));
 }

 //read contents of directory with images
 //Since this deals with converting images to text and is error prone, I run this script separately first, so I can validate that it's all correct in the console,
 //I then literally copy paste the result into an array :)  Ideally, I would simply change this to a funciton that returns an array value ready to be used by the
 //rest of the script.

 //Read directory contents using 'readdir'
 fs.readdir("imgs/", function (err, files) {
    if (err) {
        throw err;
    }
    //iterate over each file in the directory.
    for(i=0;i<files.length;i++){
      //Using okrabyte package read each image file (using readFileSync) and capture text
      okrabyte.decodeBuffer(fs.readFileSync("imgs/" + files[i]), function(error, data){
        //split the returned string into an array of words (separated by spaces), and strip unwanted characters
        var splitWords = data.split(" ");
        var word = splitWords[0].replace(/_|[0-9]/g,"")
        console.log(word);
      })
    }
 });

 //array of influencers extracted from images.
 var influencers = [
 ]

 function goToUser() {

  //initialize variables to store data from each profile
  var followers = null;
  var firstName = null;
  var middleName = null;
  var lastName = null;
  var email = null;
  var website = null;
  var profileUrl = null;

  //array to store influencers
  var influencerCSV = [];

  //csv fields to be created by json2csv
  var fields = ["first name", "last name", "followers", "email", "profile url"];

   //iterate through array of influencers 
  for(i=0;i<influencers.length;i++){
    //using webdriver, go to the profile page for each list member
    driver.get("http://instagram.com/" + influencers[i]);
    //Find the div tha contains the followers
    find("._9o0bc li:nth-child(2) ._bkw5z").getText().then(function(txt){
      followers = txt;
    })
    
    //find div containing instagram bio, the first div is always the user's full name. use 'praseFullName' package to split the user's name
    //Using a package in order to deal with long names, middle initials, etc
    find("._79dar").getText().then(function(txt){
      var name = parseFullName(txt);
      firstName = name.first;
      lastName = name.last;
    })

    //find div containing instagram bio and extract emails if any.
    find("._bugdy").getText().then(function(txt){
      var emails = findEmails(txt);
      if(emails[0] === undefined){
        email = "No email listed";
      }else{
        email = emails[0];
      }
    })

    //get link to instagram user's profile url.
    driver.getCurrentUrl().then(function(url){
      profileUrl = url;
    })


    //By now all the vars contain the desired data for this IG user, create a JSON object for the user with their info.
    // I'm executing these functions inside driver.sleep() methods to make them run syncronously because I'm lazy like that.
  driver.sleep(100).then(function(){
      influencerCSV.push({
        "first name": firstName,
        "last name": lastName,
        "followers": followers,
        "email": email,
        "profile url": profileUrl
      })
    })
  }

  //Convert JSON array to CSV spreadsheet
  driver.sleep(500).then(function(){
      var csv = json2csv({ data: influencerCSV, fields: fields });
      fs.writeFile('file.csv', csv, function(err) {
        if (err) throw err;
        console.log('file saved');
      });
    });
  
 }
 //Run the function that produces spreadsheet.
 goToUser();
	var webdriver = require('selenium-webdriver'),
	chrome = require('selenium-webdriver/chrome'),
	By = webdriver.By,
	until = webdriver.until,
	Key = webdriver.Key;
	var o = new chrome.Options();
	// o.addArguments('start-fullscreen');
	o.addArguments('disable-infobars');
	o.addArguments("disable-notifications");
	o.setUserPreferences( { credentials_enable_service: false } );
	var parseFullName = require('parse-full-name').parseFullName;
	var findEmails = require('find-emails-in-string');
	var json2csv = require('json2csv');
	var fs = require('fs');
	var okrabyte = require("okrabyte");

	var driver = new webdriver.Builder().withCapabilities(webdriver.Capabilities.chrome()).setChromeOptions(o).build();


	//helper find function
	var find = function(el){
	driver.wait(until.elementLocated(By.css(el)), 5000, "Could not find " + el);
	return driver.findElement(By.css(el));
	}

	//read contents of directory with images
	//Since this deals with converting images to text and is error prone, I run this script separately first, so I can validate that it's all correct in the console,
	//I then literally copy paste the result into an array :) Ideally, I would simply change this to a funciton that returns an array value ready to be used by the
	//rest of the script.

	//Read directory contents using 'readdir'
	fs.readdir("imgs/", function (err, files) {
	if (err) {
	throw err;
	}
	//iterate over each file in the directory.
	for(i=0;i<files.length;i++){
	//Using okrabyte package read each image file (using readFileSync) and capture text
	okrabyte.decodeBuffer(fs.readFileSync("imgs/" + files[i]), function(error, data){
	//split the returned string into an array of words (separated by spaces), and strip unwanted characters
	var splitWords = data.split(" ");
	var word = splitWords[0].replace(/_\|[0-9]/g,"")
	console.log(word);
	})
	}
	});

	//array of influencers extracted from images.
	var influencers = [
	]

	function goToUser() {

	//initialize variables to store data from each profile
	var followers = null;
	var firstName = null;
	var middleName = null;
	var lastName = null;
	var email = null;
	var website = null;
	var profileUrl = null;

	//array to store influencers
	var influencerCSV = [];

	//csv fields to be created by json2csv
	var fields = ["first name", "last name", "followers", "email", "profile url"];

	//iterate through array of influencers
	for(i=0;i<influencers.length;i++){
	//using webdriver, go to the profile page for each list member
	driver.get("http://instagram.com/" + influencers[i]);
	//Find the div tha contains the followers
	find("._9o0bc li:nth-child(2) ._bkw5z").getText().then(function(txt){
	followers = txt;
	})

	//find div containing instagram bio, the first div is always the user's full name. use 'praseFullName' package to split the user's name
	//Using a package in order to deal with long names, middle initials, etc
	find("._79dar").getText().then(function(txt){
	var name = parseFullName(txt);
	firstName = name.first;
	lastName = name.last;
	})

	//find div containing instagram bio and extract emails if any.
	find("._bugdy").getText().then(function(txt){
	var emails = findEmails(txt);
	if(emails[0] === undefined){
	email = "No email listed";
	}else{
	email = emails[0];
	}
	})

	//get link to instagram user's profile url.
	driver.getCurrentUrl().then(function(url){
	profileUrl = url;
	})


	//By now all the vars contain the desired data for this IG user, create a JSON object for the user with their info.
	// I'm executing these functions inside driver.sleep() methods to make them run syncronously because I'm lazy like that.
	driver.sleep(100).then(function(){
	influencerCSV.push({
	"first name": firstName,
	"last name": lastName,
	"followers": followers,
	"email": email,
	"profile url": profileUrl
	})
	})
	}

	//Convert JSON array to CSV spreadsheet
	driver.sleep(500).then(function(){
	var csv = json2csv({ data: influencerCSV, fields: fields });
	fs.writeFile('file.csv', csv, function(err) {
	if (err) throw err;
	console.log('file saved');
	});
	});

	}
	//Run the function that produces spreadsheet.
	goToUser();