Created
July 18, 2012 03:11
-
-
Save kedarbellare/3133918 to your computer and use it in GitHub Desktop.
Finding the max and average centrality in a graph
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Write centrality_max to return the maximum distance | |
# from a node to all the other nodes it can reach | |
# | |
from collections import deque | |
def centrality_max(G, v): | |
open_list = deque([v]) | |
distance = {v: 0} | |
while open_list: | |
head = open_list.popleft() | |
for nbr in G[head]: | |
if nbr not in distance: | |
open_list.append(nbr) | |
distance[nbr] = distance[head] + 1 | |
return distance[head] | |
#def centrality_max(G, v): | |
# # your code here | |
# open_list = [v] | |
# distance = {v: 0} | |
# best_distance = 0 | |
# head, length = 0, 1 | |
# while head < length: | |
# n = open_list[head] | |
# head += 1 | |
# for nbr in G[n]: | |
# if nbr not in distance: | |
# open_list.append(nbr) | |
# length += 1 | |
# best_distance = distance[n] + 1 | |
# distance[nbr] = best_distance | |
# return best_distance | |
def centrality_max2(G, v): | |
toCrawl = [] | |
nextDepth = [v] | |
marked = {v: True} | |
depth = -1 | |
while nextDepth: | |
toCrawl = [] + nextDepth | |
nextDepth = [] | |
depth += 1 | |
while toCrawl: | |
nodeID = toCrawl.pop() | |
for e in G[nodeID]: | |
if e not in marked: | |
nextDepth.append(e) | |
marked[e] = True | |
return depth | |
def centrality_max_my(G, v): | |
distance = {v:0} | |
open_list = deque([v]) | |
while open_list: | |
item = open_list.popleft() | |
for neighbor in G[item]: | |
if neighbor not in distance: | |
distance[neighbor] = distance[item] + 1 | |
open_list.append(neighbor) | |
return max(distance.values()) | |
def centrality_max_mine(G, v): | |
toCrawl = [] | |
nextDepth = [v] | |
marked = {} #keep track of marked nodes so we don't go over something twice | |
depth = -1 | |
while nextDepth: | |
toCrawl, nextDepth = nextDepth,toCrawl | |
gotMarked = False | |
while toCrawl: | |
nodeID = toCrawl.pop() | |
if nodeID not in marked: | |
gotMarked = True | |
marked[nodeID] = True #just assigning it something | |
for e in G[nodeID]: | |
if e not in marked: | |
nextDepth.append(e) | |
if gotMarked: | |
depth += 1 | |
return depth | |
def centrality_max5(G, v): | |
frontier, distance_map = deque([v]), {v: 0} | |
while frontier: | |
current_node = frontier.popleft() | |
for successor in G[current_node]: | |
if successor not in distance_map: | |
distance_map[successor] = distance_map[current_node] + 1 | |
frontier.append(successor) | |
return distance_map[current_node] | |
def average_centrality(G, v): | |
frontier, distance = deque([v]), {v: 0} | |
while frontier: | |
head = frontier.popleft() | |
for nbr in G[head]: | |
if nbr not in distance: | |
distance[nbr] = distance[head] + 1 | |
frontier.append(nbr) | |
return (1.0*sum(distance.values()))/len(distance) | |
################# | |
# Testing code | |
# | |
def make_link(G, node1, node2): | |
if node1 not in G: | |
G[node1] = {} | |
(G[node1])[node2] = 1 | |
if node2 not in G: | |
G[node2] = {} | |
(G[node2])[node1] = 1 | |
return G | |
def test(): | |
chain = ((1,2), (2,3), (3,4), (4,5), (5,6)) | |
G = {} | |
for n1, n2 in chain: | |
make_link(G, n1, n2) | |
assert centrality_max(G, 1) == 5 | |
assert centrality_max(G, 3) == 3 | |
tree = ((1, 2), (1, 3), | |
(2, 4), (2, 5), | |
(3, 6), (3, 7), | |
(4, 8), (4, 9), | |
(6, 10), (6, 11)) | |
G = {} | |
for n1, n2 in tree: | |
make_link(G, n1, n2) | |
assert centrality_max(G, 1) == 3 | |
assert centrality_max(G, 11) == 6 | |
G = {} | |
tree = ((1, 2), (1, 3), (2, 3)) | |
for n1, n2 in tree: | |
make_link(G, n1, n2) | |
assert centrality_max(G, 1) == 1 | |
G = {} | |
euler = [(0, 1), (1, 2), (2, 0), (2, 3), (3, 4), (4, 2), (4, 5), (5, 6), (6, 4), (6, 7), (7, 8), (8, 6), (8, 9), (9, 10), (10, 8), (10, 11), (11, 12), (12, 10), (12, 13), (13, 14), (14, 15), (15, 16), (16, 14), (16, 17), (17, 18), (18, 16), (18, 19), (19, 20), (20, 18), (20, 21), (21, 22), (22, 20), (22, 23), (23, 24), (24, 22), (24, 25), (25, 26), (26, 24), (26, 27), (27, 28), (28, 26), (28, 29), (29, 30), (30, 28), (30, 31), (31, 32), (32, 30), (32, 33), (33, 34), (34, 32), (34, 35), (35, 36), (36, 34), (36, 37), (37, 38), (38, 36), (38, 39), (39, 40), (40, 38), (40, 41), (41, 42), (42, 40), (42, 43), (43, 44), (44, 42), (44, 45), (45, 46), (46, 44), (46, 47), (47, 48), (48, 46), (48, 49), (49, 50), (50, 48), (50, 51), (51, 52), (52, 50), (52, 53), (53, 54), (54, 52), (54, 55), (55, 56), (56, 54), (56, 57), (57, 58), (58, 56), (58, 59), (59, 60), (60, 58), (60, 61), (61, 62), (62, 60), (62, 63), (63, 64), (64, 62), (64, 65), (65, 66), (66, 64), (66, 67), (67, 68), (68, 66), (68, 69), (69, 70), (70, 68), (70, 71), (71, 72), (72, 70), (72, 73), (73, 74), (74, 72), (74, 75), (75, 76), (76, 74), (76, 77), (77, 78), (78, 76), (78, 79), (79, 80), (80, 78), (80, 81), (81, 82), (82, 80), (82, 83), (83, 84), (84, 82), (84, 85), (85, 86), (86, 84), (86, 87), (87, 88), (88, 86), (88, 89), (89, 90), (90, 88), (90, 91), (91, 92), (92, 90), (92, 93), (93, 94), (94, 92), (94, 95), (95, 96), (96, 94), (96, 97), (97, 98), (98, 96), (98, 99), (99, 100), (100, 98)] | |
for n1, n2 in euler: | |
make_link(G, n1, n2) | |
assert centrality_max(G, 0) == 51 | |
return 'all tests pass' | |
import csv, time | |
def test_enron(): | |
# helper function for getting time in seconds | |
def now(): return time.time()*1.0 | |
G = {} | |
file = csv.reader(open('email-Enron.txt', 'rb'), delimiter='\t') | |
for row in file: | |
make_link(G, row[0], row[1]) | |
n = 15 | |
a = now() | |
max([centrality_max5(G, x) for x in G.keys()[:n]]) | |
a -= now() | |
b = now() | |
max([centrality_max2(G, x) for x in G.keys()[:n]]) | |
b -= now() | |
c = now() | |
max([centrality_max_mine(G, x) for x in G.keys()[:n]]) | |
c -= now() | |
d = now() | |
max([centrality_max_my(G, x) for x in G.keys()[:n]]) | |
d -= now() | |
e = now() | |
max([centrality_max(G, x) for x in G.keys()[:n]]) | |
e -= now() | |
print "%.3f,%.3f,%.3f,%.3f,%.3f" % (-a, -b, -c, -d, -e) | |
#print test() | |
#for i in xrange(100): | |
# test_enron() | |
def parent(i): return (i-1)/2 | |
def left_child(i): return 2*i+1 | |
def right_child(i): return 2*i+2 | |
def is_leaf(L,i): | |
return (left_child(i) >= len(L)) and (right_child(i) >= len(L)) | |
def one_child(L,i): | |
return (left_child(i) < len(L)) and (right_child(i) >= len(L)) | |
def up_heapify(L, i): | |
if i == 0: return | |
pi = parent(i) | |
if L[pi] < L[i]: | |
L[i], L[pi] = L[pi], L[i] | |
up_heapify(L, pi) | |
def down_heapify(L, i): | |
if is_leaf(L, i): | |
return | |
elif one_child(L, i): | |
# check heap property | |
li = left_child(i) | |
if L[i] < L[li]: | |
L[li], L[i] = L[i], L[li] | |
else: | |
li, ri = left_child(i), right_child(i) | |
swapi = li if L[li] > L[ri] else ri | |
if L[swapi] > L[i]: | |
L[swapi], L[i] = L[i], L[swapi] | |
down_heapify(L, swapi) | |
def insert_heap(L, v, Kmax): | |
if len(L) >= Kmax and v >= L[0]: return | |
if len(L) < Kmax: | |
L.append(v) | |
up_heapify(L, len(L)-1) | |
else: | |
L[0] = v | |
down_heapify(L, 0) | |
def extract_max(L): | |
L[0], L[len(L)-1] = L[len(L)-1], L[0] | |
current = L.pop() | |
down_heapify(L, 0) | |
return current | |
def imdb_central(fname, Kmax=20): | |
G = {} | |
actors = {} | |
movies = {} | |
indices = {} | |
file = csv.reader(open(fname, 'rb'), delimiter='\t') | |
index = 0 | |
for row in file: | |
actor = row[0] | |
movie = '%s (%s)' % (row[1], row[2]) | |
if actor not in actors: | |
index += 1 | |
indices[index] = actor | |
actors[actor] = index | |
if movie not in movies: | |
index += 1 | |
indices[index] = movie | |
movies[movie] = index | |
make_link(G, actors[actor], movies[movie]) | |
heap = [] | |
done = 0 | |
for actor in actors: | |
centrality = average_centrality(G, actors[actor]) | |
current = (centrality, actor) | |
insert_heap(heap, current, Kmax) | |
done += 1 | |
if done % 200 == 0: | |
print 'finished %d / %d' % (done, len(actors)) | |
print 'final', min(heap) | |
while heap: | |
rank = len(heap) | |
current = extract_max(heap) | |
print 'rank %d: %s centrality=%.3f' % (rank, current[1], current[0]) | |
imdb_central('imdb-4.tsv') | |
#print test() | |
#for i in xrange(100): | |
# test_enron() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment