Created
December 22, 2019 20:20
-
-
Save pganti/744ef65ca1127d35d6fb4059b6edd294 to your computer and use it in GitHub Desktop.
crunchbase to network
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Crunchbase to NetworkX network builder | |
# | |
# Builds a network from the Crunchbase database and outputs it in graphml format. | |
# | |
# Required modules: | |
# simplejson (http://undefined.org/python/#simplejson) | |
# networkx (http://networkx.lanl.gov/) | |
import urllib2, simplejson as json, networkx as nx | |
def getCBinfo(namespace, permalink): | |
api_url = "http://api.crunchbase.com/v/1/%s/%s.js" % (namespace, permalink) | |
return json.loads(urllib2.urlopen(api_url).read()) | |
def add_clique(G,investors): | |
# Take a set of investors and add them to the graph, along with edges | |
# between them all. Where an edge already exists, increment its weight. | |
l_inv = len(investors) | |
if l_inv > 1: | |
# add nodes | |
for inv, typ in investors: | |
G.add_node(inv, inv_type = typ) | |
# add edges | |
for i in range(0,l_inv-1): | |
for j in range(i+1,l_inv): | |
if G.edge[investors[i][0]].has_key(investors[j][0]): | |
G.edge[investors[i][0]][investors[j][0]]['weight'] += 1 | |
else: | |
G.add_edge(investors[i][0],investors[j][0],weight=1) | |
return G | |
# Main. | |
# Get the list of companies Crunchbase has data on | |
company_names = json.loads(urllib2.urlopen("http://api.crunchbase.com/v/1/companies.js").read()) | |
# initialize Graph | |
G = nx.Graph() | |
# Iterate through companies, getting CB data on each | |
for company in company_names: | |
try: | |
co_info = getCBinfo('company', company['permalink']) | |
except: | |
continue | |
# For each company make a set of all investors | |
investors = set() | |
if co_info.has_key('funding_rounds') and co_info['funding_rounds']: | |
for iround in co_info['funding_rounds']: | |
for investment in iround['investments']: | |
for i_type in ['financial_org','person','company']: | |
if investment[i_type]: | |
investors.add((investment[i_type]['permalink'],i_type)) | |
# Add investors and edges between them to the graph | |
G = add_clique(G,list(investors)) | |
# Write the network to a graphml file /projects/cb_graph.graphml | |
# NetworkX supports many other formats as well, check the docs. | |
nx.write_graphml(G,"/projects/cb_graph.graphml") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment