Skip to content

Instantly share code, notes, and snippets.

@sarojbelbase
Created October 28, 2020 05:44
Show Gist options
  • Save sarojbelbase/74db2176cfcd686bcacfb14386796359 to your computer and use it in GitHub Desktop.
Save sarojbelbase/74db2176cfcd686bcacfb14386796359 to your computer and use it in GitHub Desktop.
Get recent tweets from a desired account with images and saves them as csv or json
# Possible due to https://gist.github.com/yanofsky/5436496
import csv
import json
import re
import pytz
import tweepy
def those_tweets(screen_name=TWEETARATI):
# Twitter only allows access to a users most recent 3240 tweets with this method
# authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY,ACCESS_SECRET)
api = tweepy.API(auth)
# initialize a list to hold all the tweepy Tweets
alltweets = []
# make initial request for most recent tweets (200 is the maximum allowed count)
new_tweets = api.user_timeline(
screen_name=screen_name,
count=1,
include_rts=False,
exclude_replies=True,
tweet_mode='extended')
# save most recent tweets
alltweets.extend(new_tweets)
# save the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
# keep grabbing tweets until there are no tweets left to grab
while len(new_tweets) > 0:
# all subsequent requests use the max_id param to prevent duplicates
new_tweets = api.user_timeline(
screen_name=screen_name,
count=200, max_id=oldest,
include_rts=False,
exclude_replies=True,
tweet_mode='extended')
# save most recent tweets
alltweets.extend(new_tweets)
# update the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
print(f" Got {len(alltweets)} tweets so far...")
bleached_tweets = [] # making proper headers
for tweet in alltweets:
try:
# not all tweets will have media url, so rain-checking
tweet.entities['media'][0]['media_url']
except (NameError, KeyError):
# adding "None" with no image links
bleached_tweets.append(
{
"tweet_id": tweet.id_str,
"timestamp": make_it_utc(tweet.created_at),
"tweet": remove_url(tweet.full_text),
"image_url": "None"
}
)
else:
# adding image_url for image_links
bleached_tweets.append(
{
"tweet_id": tweet.id_str,
"timestamp": make_it_utc(tweet.created_at),
"tweet": remove_url(tweet.full_text),
"image_url": tweet.entities['media'][0]['media_url']
}
)
return bleached_tweets
def remove_url(text):
modified = re.sub(r"http\S+", "", text)
return modified.encode("utf-8").decode("utf-8")
def make_it_utc(datetime_obj):
return datetime_obj.replace(tzinfo=pytz.UTC)
# If you want to store it as a csv
# def write_to_csv():
# with open(f'{TWEETARATI}.csv', 'w') as f:
# writer = csv.writer(f)
# writer.writerow(["id", "timestamp", "tweet", "image_url"])
# writer.writerows(those_tweets())
# If you want to store it as a json
# def write_to_json():
# with open(f"{TWEETARATI}.json", "w", encoding='utf8') as file:
# the_tweets = json.dumps(get_all_tweets(
# TWEETARATI), indent=4, sort_keys=True, default=str, ensure_ascii=False)
# file.write(the_tweets)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment