Skip to content

Instantly share code, notes, and snippets.

@brosner
Created October 24, 2011 20:06

Revisions

  1. brosner created this gist Oct 24, 2011.
    99 changes: 99 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,99 @@

    # original: https://github.com/simonw/mytweets
    # modified by Brian Rosner
    # This script does not fetch all tweets. It is limited by what Twitter's API
    # returns. Somewhere around 3200 tweets.
    #
    # Tweak the API keys around line 40 for your Twitter app

    import json
    import httplib
    import time
    import traceback

    import twitter


    FILE = "my_tweets.json"


    def load_all():
    try:
    return json.load(open(FILE))
    except IOError:
    return []


    def fetch_and_save_new_tweets():
    tweets = load_all()
    old_tweet_ids = set(t["id"] for t in tweets)
    if tweets:
    since_id = max(t["id"] for t in tweets)
    else:
    since_id = None
    api = twitter.Api(
    consumer_key="xxx",
    consumer_secret="xxx",
    access_token_key="xxx",
    access_token_secret="xxx"
    )
    new_tweets = fetch_all(api, since_id)
    num_new_saved = 0
    for tweet in new_tweets:
    if tweet["id"] not in old_tweet_ids:
    tweets.append(tweet)
    num_new_saved += 1
    tweets.sort(key=lambda t: t["id"], reverse=True)
    # Delete the "user" key
    for t in tweets:
    if "user" in t:
    del t["user"]
    # Save back to disk
    json.dump(tweets, open(FILE, "w"), indent=2)
    print "Saved %s new tweets" % num_new_saved


    def fetch_all(api, since_id=None):
    all_tweets, all_tweets_len = [], 0
    seen_ids = set()
    page = 0
    attempts = 0

    kwargs = {"count": 200}
    if since_id is not None:
    kwargs["since_id"] = since_id

    try:
    while True:
    kwargs["page"] = page
    try:
    tweets = api.GetUserTimeline(**kwargs)
    except twitter.TwitterError, e:
    if "Capacity" in e.args[0]:
    attempts += 1
    continue
    except httplib.BadStatusLine:
    attempts += 1
    continue
    else:
    attempts = 0
    page += 1
    if not tweets:
    break
    for tweet in tweets:
    if tweet.id not in seen_ids:
    seen_ids.add(tweet.id)
    all_tweets.append(tweet.AsDict())
    print "Fetched another %s" % (len(all_tweets) - all_tweets_len)
    all_tweets_len = len(all_tweets)
    time.sleep(5)
    except:
    traceback.print_exc()
    print "Saving tweets to disk anyways"

    all_tweets.sort(key=lambda t: t["id"], reverse=True)
    return all_tweets


    if __name__ == "__main__":
    fetch_and_save_new_tweets()