Created
September 5, 2012 16:02
-
-
Save michellesun/3639040 to your computer and use it in GitHub Desktop.
Hello Challenge Part 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import bsddb3 as bsddb | |
import re | |
import collections | |
# part 1 | |
# get top ten devices | |
def findtopten(db): | |
device = {} | |
for key in db.keys(): | |
device_name = db[key].split(",")[1] | |
if device_name not in device.keys(): | |
device[device_name] = 1 | |
else: | |
device[device_name] += 1 | |
topten = sorted(device.items(),key=lambda x:x) | |
return topten | |
# part 2 | |
# report the percentage of matches that are | |
# cross-platform (Android to iOS or vice versa). | |
def crossplatform(db, filename, total=1091608): | |
# got the lines of matches.txt by command | |
# wc -l matches.txt | |
cross_count = 0 | |
f = open(filename) | |
for line in f: | |
matchpair = line.strip().split(" MATCH ")[1] | |
match = json.loads(matchpair) | |
userid1 = match["userid1"] | |
userid2 = match["userid2"] | |
platform1 = db[userid1][0] | |
platform2 = db[userid2][0] | |
if platform1 == platform2: | |
cross_count += 1 | |
percent_crossplatform = cross_count / float(total) * 100 | |
return percent_crossplatform | |
# Part 3: what are the top 3 busiest hours for bumping (matched ones) | |
def getbumplist(filename): | |
f = open(filename,"r") | |
hourlist = [] | |
line = f.readline() | |
f = open(filename) | |
for line in f: | |
date = line.strip().split(" MATCH ")[0] | |
hour = re.findall(r'\w+', date)[3] | |
# only get the hour, '00' | |
hourlist.append(hour) | |
return hourlist | |
def topthreehours(hourlist): | |
counter = collections.Counter(hourlist) | |
topthreehrs = (counter.most_common(3)) | |
return topthreehrs | |
def main(): | |
db = bsddb.btopen("users.db") | |
# { "1293483219": "ios,iphone", "12094389": "andriod,htcxyz"} | |
# # part 1: prints the top ten devices among our users | |
topten = findtopten(db) | |
print "Top ten devices for bump users are" | |
for pair in topten: | |
print pair[0], | |
# part 2: percentage of matches that are cross-platform | |
filename = 'matches.txt' | |
print "Percentage of crossplatform matches is", crossplatform(db, filename) | |
# part 3: top 3 busiest hours for bumping | |
hourlist = getbumplist(filename) | |
topthreehrs = topthreehours(hourlist) | |
print "Top 3 busiest hours for users to successfully bump is: " | |
for item in topthreehrs: | |
print item[0], | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment