Last active
November 8, 2017 18:36
-
-
Save enkeboll/c9949e215e29e4b2d30e to your computer and use it in GitHub Desktop.
COMS 6998 Social Networks Facebook Post Scorer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'andyenkeboll' | |
import requests | |
# on mac, `sudo pip install facebook-sdk` | |
# https://pypi.python.org/pypi/facebook-sdk | |
import facebook | |
from collections import Counter | |
# `sudo pip install mechanize` | |
import mechanize | |
import re | |
# get token from https://developers.facebook.com/tools/explorer/ | |
# I don't remember all the permissions it needs, but as you run it | |
# it will tell you permissions it needs that the token doesn't have. | |
# I enabled them all, even the extended permissions. | |
access_token = 'TOKEN' | |
graph = facebook.GraphAPI(access_token) | |
profile = graph.get_object('me') | |
feed = graph.get_connections('me', 'feed') | |
inbox = graph.get_connections('me', 'inbox') | |
authors = Counter() | |
comment_auths = Counter() | |
like_auths = Counter() | |
inbox_authors = Counter() | |
all_contacts = {} | |
count = 0 | |
while count <= 50: | |
try: | |
for post in feed['data']: | |
if post.get('status_type') == 'added_photo': | |
pass | |
else: | |
authors[post.get('from',{}).get('name')] += 1 | |
all_contacts[post.get('from',{}).get('name')] = post.get('from',{}).get('id') | |
likes = post.get('likes',{}).get('data') | |
if likes: | |
for like in likes: | |
like_auths[like.get('name')] += .5 | |
all_contacts[like.get('name')] = like.get('id') | |
comments = post.get('comments',{}).get('data', []) | |
if comments: | |
for comment in comments: | |
comment_auths[comment.get('from',{}).get('name')] += .5 | |
all_contacts[comment.get('from',{}).get('name')] = comment.get('from',{}).get('id') | |
count += 1 | |
feed = requests.get(feed['paging']['next']).json() | |
except KeyError: | |
print "KeyError" | |
break | |
count = 0 | |
while count <= 50: | |
try: | |
for message in inbox['data']: | |
for comment in message.get('comments',{}).get('data',[]): | |
# Add your name here to ignore messages from you that appear here | |
if comment.get('from',{}).get('name') != 'YOURNAME': | |
inbox_authors[comment.get('from',{}).get('name')] += 1 | |
count += 1 | |
all_contacts[comment.get('from',{}).get('name')] = comment.get('from',{}).get('id') | |
inbox = requests.get(inbox['paging']['next']).json() | |
except KeyError: | |
break | |
browser = mechanize.Browser() | |
cookies = mechanize.CookieJar() | |
browser.set_cookiejar(cookies) | |
browser.set_handle_robots(False) | |
browser.set_handle_equiv(True) | |
browser.set_handle_gzip(True) | |
browser.set_handle_redirect(True) | |
browser.set_handle_referer(True) | |
browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] | |
base_url = 'https://www.facebook.com' | |
browser.open(base_url) | |
browser.form = list(browser.forms())[0] | |
# be sure and replace these. I'm sure there's a better way | |
# to do this but I didn't get in to it | |
browser.form['email'] = 'EMAIL' | |
browser.form['pass'] = 'PASSWORD' | |
response = browser.submit() | |
user_url = 'https://www.facebook.com/profile.php?id={0}' | |
for name, id in all_contacts.iteritems(): | |
user_page = br.open(user_url.format(id)) | |
user_content = user_page.read() | |
# this is super brittle and I'd have prefered to use BS4 here | |
# but IDs are different based on if you are or aren't friends | |
# with a user, so I just left the regex search | |
m = re.search('\(([0-9]+?) Mutual\)',user_content) | |
mutual_friends = "0" | |
if m: | |
mutual_friends = m.group(1) | |
# I just printed here, then copied/pasted into excel to manipulate | |
# into the exact file that Prof. A wanted. It prints in CSV format, | |
# so you could always just pipe this into a file if you want. | |
# There are no column headers. | |
print ",".join((str(id) | |
,str(mutual_friends) | |
,str(authors.get(name,"0")) | |
,str(comment_auths.get(name,"0")) | |
,str(like_auths.get(name,"0")) | |
,str(inbox_authors.get(name,"0")) | |
)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment