Created
February 11, 2012 04:00
Revisions
-
bostwick revised this gist
Feb 11, 2012 . 1 changed file with 6 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -118,6 +118,12 @@ def word_html(word, size): messages = [post['message'] for page in pages for post in page['data'] if 'message' in post] comments = [comment['message'] for page in pages for post in page['data'] if 'comments' in post and 'data' in post['comments'] for comment in post['comments']['data'] if 'if message' in comment] messages.extend(comments) tokens = [token for msg in messages for token in word_tokenize(msg)] counts = word_count(tokens) -
bostwick revised this gist
Feb 11, 2012 . 1 changed file with 11 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,7 +20,7 @@ def get_json(url): """ Return the parsed JSON from a url. """ print "GET %s" % url f = urllib2.urlopen(url) return json.loads(f.read()) @@ -32,18 +32,23 @@ def fb_feed_url(access_token): return "https://graph.facebook.com/me/home?access_token=%s" % access_token def fb_news_feed(access_token): """ Returns a set number of pages from a facebook news feed as a list of parsed json. """ feed_url = fb_feed_url(access_token) page_json = [] for x in range(0, 100): json = get_json(feed_url) page_json.append(json) # Break if there's not enough data to continue if 'paging' in json and 'next' in json['paging']: feed_url = json['paging']['next'] else: break return page_json @@ -109,7 +114,7 @@ def word_html(word, size): if __name__ == "__main__": access_token = sys.argv[1] pages = fb_news_feed(access_token) messages = [post['message'] for page in pages for post in page['data'] if 'message' in post] -
bostwick revised this gist
Feb 11, 2012 . 1 changed file with 0 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +0,0 @@ -
bostwick revised this gist
Feb 11, 2012 . 2 changed files with 125 additions and 20 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,125 @@ #!/usr/bin/env python import sys import urllib2 import json from collections import defaultdict from nltk import word_tokenize IGNORED_WORDS = ["!", ".", ",", "(", ")", "'s", ":", "?", "...", "$", "<", ">", "''", "``", "-", "c", "'", "--", "&", "and", "the", "or", "not", "i", "you", "to", "this", "of", "in", "for", "a", "an", "and", "your", "with", "me", "my", "be", "these", "that", "do", "at", "no", "so", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "http", "@", "is", "am", "are", "it", "if", "n't", "'em", "from", "one", "on", "up", "like", "we", "their", "they", "'ll", "'d", "'m", "//www.youtube.com/watch", "by", "have", "just", "will", "as"] def get_json(url): """ Return the parsed JSON from a url. """ # print "GET %s" % url f = urllib2.urlopen(url) return json.loads(f.read()) def fb_feed_url(access_token): """ The graph api url for your facebook feed. access_token should be a valid facebook access token as a string. """ return "https://graph.facebook.com/me/home?access_token=%s" % access_token def fb_news_feed(access_token, pages): """ Returns a set number of pages from a facebook news feed as a list of parsed json. """ feed_url = fb_feed_url(access_token) page_json = [] for x in range(0, pages): json = get_json(feed_url) page_json.append(json) feed_url = json['paging']['next'] return page_json def word_count(words): """ Count the occurrences of each word. Words should be a list of strings that you might get from str.split() or nltk.word_tokenize(). """ seen = defaultdict(lambda: 0) for w in words: if w.lower() not in IGNORED_WORDS: seen[w] += 1 return seen def word_cloud_sizes(counts): max_count = max(counts.values()) min_size, max_size = (1, 5) word_sizes = defaultdict(lambda : 1) for word in counts: if counts[word] > 1: size = int(float(counts[word]) * (max_size - 1) / max_count) + 1 word_sizes[word] = size return word_sizes def word_cloud_header(): return """ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html> <head> <title>My Facebook Word Cloud</title> <style type=\"text/css\"> .size-1 { font-size: 10pt; } .size-2 { font-size: 16pt; } .size-3 { font-size: 28pt; } .size-4 { font-size: 36pt; font-weight: bold; } .size-5 { font-size: 48pt; font-weight: bold; } .word-cloud { margin: 0 auto; width: 600px; padding-top: 10px; } </style> </head> <body> <div class="word-cloud"> """ def word_cloud_footer(): return """ </div> </body> </html> """ def word_cloud_html(sizes): def word_html(word, size): return "<span class=\"size-%s\">%s</span>" % (size, word) return ' '.join([word_html(w, s) for w, s in sizes.items()]) if __name__ == "__main__": access_token = sys.argv[1] pages = fb_news_feed(access_token, 6) messages = [post['message'] for page in pages for post in page['data'] if 'message' in post] tokens = [token for msg in messages for token in word_tokenize(msg)] counts = word_count(tokens) sizes = word_cloud_sizes(counts) print ''.join([ word_cloud_header(), word_cloud_html(sizes), word_cloud_footer()]) This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,20 +0,0 @@ -
bostwick renamed this gist
Feb 11, 2012 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
bostwick revised this gist
Feb 11, 2012 . 1 changed file with 20 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,20 @@ def fb_feed_url(access_token): """ The graph api url for your facebook feed. access_token should be a valid facebook access token as a string. """ return "https://graph.facebook.com/me/home?access_token=%s" % access_token def fb_news_feed(access_token, pages): """ Returns a set number of pages from a facebook news feed as a list of parsed json. """ feed_url = fb_feed_url(access_token) page_json = [] for x in range(0, pages): json = get_json(feed_url) page_json.append(json) feed_url = json['paging']['next'] return page_json -
bostwick created this gist
Feb 11, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,5 @@ def get_json(url): """ Return the parsed JSON from a url. """ # print "GET %s" % url f = urllib2.urlopen(url) return json.loads(f.read())