Skip to content

Instantly share code, notes, and snippets.

@bostwick
Created February 11, 2012 04:00

Revisions

  1. bostwick revised this gist Feb 11, 2012. 1 changed file with 6 additions and 0 deletions.
    6 changes: 6 additions & 0 deletions FacebookWordCloud.py
    Original file line number Diff line number Diff line change
    @@ -118,6 +118,12 @@ def word_html(word, size):
    messages = [post['message'] for page in pages
    for post in page['data']
    if 'message' in post]
    comments = [comment['message'] for page in pages
    for post in page['data']
    if 'comments' in post and 'data' in post['comments']
    for comment in post['comments']['data']
    if 'if message' in comment]
    messages.extend(comments)

    tokens = [token for msg in messages for token in word_tokenize(msg)]
    counts = word_count(tokens)
  2. bostwick revised this gist Feb 11, 2012. 1 changed file with 11 additions and 6 deletions.
    17 changes: 11 additions & 6 deletions FacebookWordCloud.py
    Original file line number Diff line number Diff line change
    @@ -20,7 +20,7 @@

    def get_json(url):
    """ Return the parsed JSON from a url. """
    # print "GET %s" % url
    print "GET %s" % url
    f = urllib2.urlopen(url)
    return json.loads(f.read())

    @@ -32,18 +32,23 @@ def fb_feed_url(access_token):
    return "https://graph.facebook.com/me/home?access_token=%s" % access_token


    def fb_news_feed(access_token, pages):
    def fb_news_feed(access_token):
    """ Returns a set number of pages from a facebook news feed as a
    list of parsed json.
    """
    feed_url = fb_feed_url(access_token)
    page_json = []

    for x in range(0, pages):
    for x in range(0, 100):
    json = get_json(feed_url)
    page_json.append(json)
    feed_url = json['paging']['next']


    # Break if there's not enough data to continue
    if 'paging' in json and 'next' in json['paging']:
    feed_url = json['paging']['next']
    else:
    break

    return page_json


    @@ -109,7 +114,7 @@ def word_html(word, size):
    if __name__ == "__main__":
    access_token = sys.argv[1]

    pages = fb_news_feed(access_token, 6)
    pages = fb_news_feed(access_token)
    messages = [post['message'] for page in pages
    for post in page['data']
    if 'message' in post]
  3. bostwick revised this gist Feb 11, 2012. 1 changed file with 0 additions and 5 deletions.
    5 changes: 0 additions & 5 deletions get_json
    Original file line number Diff line number Diff line change
    @@ -1,5 +0,0 @@
    def get_json(url):
    """ Return the parsed JSON from a url. """
    # print "GET %s" % url
    f = urllib2.urlopen(url)
    return json.loads(f.read())
  4. bostwick revised this gist Feb 11, 2012. 2 changed files with 125 additions and 20 deletions.
    125 changes: 125 additions & 0 deletions FacebookWordCloud.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,125 @@
    #!/usr/bin/env python

    import sys
    import urllib2
    import json
    from collections import defaultdict

    from nltk import word_tokenize

    IGNORED_WORDS = ["!", ".", ",", "(", ")", "'s", ":", "?", "...", "$",
    "<", ">", "''", "``", "-", "c", "'", "--", "&",
    "and", "the", "or", "not", "i", "you", "to", "this",
    "of", "in", "for", "a", "an", "and", "your", "with",
    "me", "my", "be", "these", "that", "do", "at", "no", "so",
    "1", "2", "3", "4", "5", "6", "7", "8", "9", "0",
    "http", "@", "is", "am", "are", "it", "if", "n't", "'em",
    "from", "one", "on", "up", "like", "we", "their", "they",
    "'ll", "'d", "'m", "//www.youtube.com/watch", "by", "have",
    "just", "will", "as"]

    def get_json(url):
    """ Return the parsed JSON from a url. """
    # print "GET %s" % url
    f = urllib2.urlopen(url)
    return json.loads(f.read())


    def fb_feed_url(access_token):
    """ The graph api url for your facebook feed. access_token should be
    a valid facebook access token as a string.
    """
    return "https://graph.facebook.com/me/home?access_token=%s" % access_token


    def fb_news_feed(access_token, pages):
    """ Returns a set number of pages from a facebook news feed as a
    list of parsed json.
    """
    feed_url = fb_feed_url(access_token)
    page_json = []

    for x in range(0, pages):
    json = get_json(feed_url)
    page_json.append(json)
    feed_url = json['paging']['next']

    return page_json


    def word_count(words):
    """ Count the occurrences of each word. Words should be a list of strings
    that you might get from str.split() or nltk.word_tokenize().
    """
    seen = defaultdict(lambda: 0)

    for w in words:
    if w.lower() not in IGNORED_WORDS:
    seen[w] += 1

    return seen


    def word_cloud_sizes(counts):
    max_count = max(counts.values())
    min_size, max_size = (1, 5)
    word_sizes = defaultdict(lambda : 1)

    for word in counts:
    if counts[word] > 1:
    size = int(float(counts[word]) * (max_size - 1) / max_count) + 1
    word_sizes[word] = size

    return word_sizes

    def word_cloud_header():
    return """
    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    <html>
    <head>
    <title>My Facebook Word Cloud</title>
    <style type=\"text/css\">
    .size-1 { font-size: 10pt; }
    .size-2 { font-size: 16pt; }
    .size-3 { font-size: 28pt; }
    .size-4 { font-size: 36pt; font-weight: bold; }
    .size-5 { font-size: 48pt; font-weight: bold; }
    .word-cloud { margin: 0 auto; width: 600px; padding-top: 10px; }
    </style>
    </head>
    <body>
    <div class="word-cloud">
    """

    def word_cloud_footer():
    return """
    </div>
    </body>
    </html>
    """

    def word_cloud_html(sizes):
    def word_html(word, size):
    return "<span class=\"size-%s\">%s</span>" % (size, word)

    return ' '.join([word_html(w, s) for w, s in sizes.items()])


    if __name__ == "__main__":
    access_token = sys.argv[1]

    pages = fb_news_feed(access_token, 6)
    messages = [post['message'] for page in pages
    for post in page['data']
    if 'message' in post]

    tokens = [token for msg in messages for token in word_tokenize(msg)]
    counts = word_count(tokens)

    sizes = word_cloud_sizes(counts)

    print ''.join([
    word_cloud_header(),
    word_cloud_html(sizes),
    word_cloud_footer()])
    20 changes: 0 additions & 20 deletions fb_news_feed
    Original file line number Diff line number Diff line change
    @@ -1,20 +0,0 @@
    def fb_feed_url(access_token):
    """ The graph api url for your facebook feed. access_token should be
    a valid facebook access token as a string.
    """
    return "https://graph.facebook.com/me/home?access_token=%s" % access_token


    def fb_news_feed(access_token, pages):
    """ Returns a set number of pages from a facebook news feed as a
    list of parsed json.
    """
    feed_url = fb_feed_url(access_token)
    page_json = []

    for x in range(0, pages):
    json = get_json(feed_url)
    page_json.append(json)
    feed_url = json['paging']['next']

    return page_json
  5. bostwick renamed this gist Feb 11, 2012. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  6. bostwick revised this gist Feb 11, 2012. 1 changed file with 20 additions and 0 deletions.
    20 changes: 20 additions & 0 deletions gistfile1.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,20 @@
    def fb_feed_url(access_token):
    """ The graph api url for your facebook feed. access_token should be
    a valid facebook access token as a string.
    """
    return "https://graph.facebook.com/me/home?access_token=%s" % access_token


    def fb_news_feed(access_token, pages):
    """ Returns a set number of pages from a facebook news feed as a
    list of parsed json.
    """
    feed_url = fb_feed_url(access_token)
    page_json = []

    for x in range(0, pages):
    json = get_json(feed_url)
    page_json.append(json)
    feed_url = json['paging']['next']

    return page_json
  7. bostwick created this gist Feb 11, 2012.
    5 changes: 5 additions & 0 deletions get_json
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,5 @@
    def get_json(url):
    """ Return the parsed JSON from a url. """
    # print "GET %s" % url
    f = urllib2.urlopen(url)
    return json.loads(f.read())