Last active
October 22, 2018 16:12
-
-
Save jwcounts/ebf46df324e3e4704ab60dc01b86eb32 to your computer and use it in GitHub Desktop.
Update to Twitter Analytics Scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// You can set a custom user agent if you like, this one is for Firefox 60 | |
$user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:60.0) Gecko/20100101 Firefox/60.0'; | |
// Username and password | |
$username = ""; | |
$password = ""; | |
// Set the start and end time, and add 3 zeroes, since Twitter measures in microseconds | |
$start = mktime( 0, 0, 0, 6, 19, 2018 ).'000'; | |
$end = mktime( 0, 0, 0, 6, 24, 2018 ).'000'; | |
// Pull in the cookie file to see if authentication tokens already exist | |
$tw_cookie = "./cookie.txt"; | |
$cookie = file_get_contents( $tw_cookie ); | |
// Set up our basic cURL options | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); | |
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); | |
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); | |
curl_setopt($ch, CURLOPT_COOKIEFILE, $tw_cookie); | |
curl_setopt($ch, CURLOPT_COOKIEJAR, $tw_cookie); | |
curl_setopt($ch, CURLOPT_REFERER, "https://twitter.com/"); | |
// Check if the auth_token cookie exists. If not, log in and grab one | |
if ( !preg_match( '/auth_token\t([a-z0-9]+)/', $cookie ) ) : | |
// First call gets hidden form field authenticity_token and session cookie | |
curl_setopt($ch, CURLOPT_URL, "https://twitter.com/"); | |
$html = curl_exec($ch); | |
// parse authenticity_token out of html response | |
preg_match('/formAuthenticityToken\"\;\:"\;([0-9a-zA-Z]+)\"\;/', $html, $match); | |
$authenticity_token = $match[1]; | |
// set post data | |
$sPost = "session[username_or_email]=$username&session[password]=$password&return_to_ssl=true&scribe_log=&redirect_after_login=%2F&authenticity_token=$authenticity_token"; | |
// second call is a post and performs login | |
curl_setopt($ch, CURLOPT_URL, "https://twitter.com/sessions"); | |
curl_setopt($ch, CURLOPT_POST, true); | |
curl_setopt($ch, CURLOPT_POSTFIELDS, $sPost); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Content-type: application/x-www-form-urlencoded")); | |
curl_exec($ch); | |
endif; | |
// Pull in the cookie file again, and remove references to '#HttpOnly_' | |
// If you don't, subsequent runs will ignore the auth_token because it will be parsed as a comment | |
$cookie = file_get_contents( $tw_cookie ); | |
$cookie_strip = str_replace( '#HttpOnly_', '', $cookie ); | |
file_put_contents( $tw_cookie, $cookie_strip ); | |
// Do a post request to generate a CSV of the tweets from our time period | |
$sTarget = "https://analytics.twitter.com/user/{$username}/tweets/export.json?start_time={$start}&end_time={$end}&lang=en"; | |
$sTargetBundle = "https://analytics.twitter.com/user/$username/tweets/bundle?start_time={$start}&end_time={$end}&lang=en"; | |
curl_setopt($ch, CURLOPT_URL, $sTarget); | |
curl_setopt($ch, CURLOPT_POST, true); | |
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Content-type: application/x-www-form-urlencoded")); | |
curl_setopt($ch, CURLOPT_POSTFIELDS, ''); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | |
curl_setopt($ch, CURLOPT_HEADER, false); | |
$data_arr = ["status" => "Pending"]; | |
// Since the CSV takes some time to generate, check the status a few times | |
$attempts = 0; | |
while ( $data_arr['status'] == "Pending" && $attempts < 5 ) : | |
$data = curl_exec($ch); | |
$data_arr = json_decode($data, true); | |
sleep(2); | |
endwhile; | |
// Once it is completed, download the CSV file and save it | |
curl_setopt($ch, CURLOPT_POST, false); | |
curl_setopt($ch, CURLOPT_URL, $sTargetBundle); | |
$data = curl_exec($ch); | |
// display server response | |
$error = curl_error($ch); | |
$destination = "./tweets.csv"; | |
$file = fopen($destination, "w+"); // overwrite the previous file | |
fputs($file, $data); | |
fclose($file); | |
// Download the graph data JSON | |
$sTarget = "https://analytics.twitter.com/user/{$username}/tweets/account_stats.json?start_time={$start}&end_time={$end}"; | |
curl_setopt($ch, CURLOPT_URL, $sTarget); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_HEADER, false); | |
// Save the JSON file | |
$data = curl_exec ($ch); | |
$error = curl_error($ch); | |
$destination = "./graphs.json"; | |
$file = fopen($destination, "w+"); // overwrite the previous file | |
fputs($file, $data); | |
fclose($file); | |
// Data for SVG timeline | |
$sTarget = "https://analytics.twitter.com/user/{$username}/tweets/timeline.json?start_time={$start}&max_id=0&end_time={$end}&page=0&filter=no_replies&metric=clicks&lang=en"; | |
curl_setopt($ch, CURLOPT_URL, $sTarget); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
// Save the timeline data | |
$data = curl_exec ($ch); | |
$error = curl_error($ch); | |
$destination = "./timeline.json"; | |
$file = fopen($destination, "w+"); // overwrite the previous file | |
fputs($file, $data); | |
fclose($file); | |
curl_close ($ch); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sorry I missed your question for so long! If you're still interested, I initially ran into problems while testing, since I would tweak the script and test it several times over the course of an hour or so. I then tried to space out runs of the script every couple of hours, but that started backfiring too. Pretty soon, any run of the script, no matter how long I waited, would lock the account, even with the modifications I had made.