Created
February 20, 2012 11:11
-
-
Save marians/1868815 to your computer and use it in GitHub Desktop.
Collecting tweets mentioning given keywords, storing the result to a MySQL table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE `tweets` ( | |
`id` varchar(24) NOT NULL DEFAULT '', | |
`created_at` datetime NOT NULL, | |
`user_id` bigint(20) unsigned NOT NULL, | |
`user_name` varchar(128) NOT NULL DEFAULT '', | |
`user_followers` int(11) unsigned NOT NULL, | |
`user_friends` int(10) unsigned DEFAULT NULL, | |
`user_listed` int(10) unsigned DEFAULT NULL, | |
`user_statuses` int(10) unsigned DEFAULT NULL, | |
`user_location` varchar(100) DEFAULT NULL, | |
`user_utc_offset` int(11) DEFAULT NULL, | |
`is_retweet` tinyint(3) unsigned NOT NULL, | |
`is_reply` tinyint(3) unsigned NOT NULL, | |
`text` varchar(200) NOT NULL DEFAULT '', | |
PRIMARY KEY (`id`) | |
) ENGINE=MyISAM DEFAULT CHARSET=utf8; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tweetstream | |
import MySQLdb | |
import sys | |
import datetime | |
words = ['keyword1', 'keyword2'] | |
TWITTER_USER = "YourUserName" | |
TWITTER_PASS = "YourPassword" | |
DB_HOST = 'localhost' | |
DB_USER = 'root' | |
DB_PASS = '' | |
DB_NAME = 'tweets' | |
def twitter_to_iso_time(dt): | |
datestring = dt[4:7] + " " + dt[8:10] + " " + dt[11:19] + " " + dt[26:30] | |
date = datetime.datetime.strptime(datestring, '%b %d %H:%M:%S %Y') | |
return date.strftime('%Y-%m-%d %H:%M:%S') | |
def save_tweet(tweet): | |
global cursor | |
is_retweet = 0 | |
is_reply = 0 | |
if tweet['retweeted']: | |
is_retweet = 1 | |
if tweet['in_reply_to_user_id_str'] is not None: | |
is_reply = 1 | |
location = None | |
if tweet['user']['location'] is not None and tweet['user']['location'] != "": | |
location = tweet['user']['location'].encode('utf-8') | |
sql = """ | |
INSERT LOW_PRIORITY IGNORE INTO tweets | |
(id, created_at, user_id, user_name, user_followers, | |
user_friends, user_listed, user_statuses, user_location, | |
user_utc_offset, is_retweet, is_reply, text) VALUES ( | |
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) | |
""" | |
cursor.execute(sql, [tweet['id'], twitter_to_iso_time(tweet['created_at']), tweet['user']['id'], | |
tweet['user']['screen_name'].encode('utf-8'), tweet['user']['followers_count'], | |
tweet['user']['friends_count'], tweet['user']['listed_count'], | |
tweet['user']['statuses_count'], location, | |
tweet['user']['utc_offset'], is_retweet, is_reply, tweet['text'].encode('utf-8')]) | |
if __name__=='__main__': | |
try: | |
conn = MySQLdb.connect (host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME) | |
cursor = conn.cursor(MySQLdb.cursors.DictCursor) | |
cursor.execute('SET CHARACTER SET utf8'); | |
except MySQLdb.Error, e: | |
print "Error %d: %s" % (e.args[0], e.args[1]) | |
sys.exit (1) | |
try: | |
with tweetstream.FilterStream(TWITTER_USER, TWITTER_PASS, track=words) as stream: | |
for tweet in stream: | |
print "From: %s (%d)\n%s" % ( | |
tweet["user"]["screen_name"], stream.count, tweet['text']) | |
print "" | |
save_tweet(tweet) | |
except tweetstream.ConnectionError, e: | |
print "Disconnected from twitter. Reason:", e.reason |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment