Created
May 13, 2012 22:57
-
-
Save pridkett/2690667 to your computer and use it in GitHub Desktop.
Export the network of the FOLLOWER relationship in gitminer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* This script will create a graphml file with the follower relation | |
* every user in a gitminer database. | |
* | |
* @author Patrick Wagstrom <[email protected]> | |
*/ | |
/** | |
* Process the traversal to save FOLLOWER relationships | |
* | |
* This eliminates the biography property because it takes up a ton | |
* of space and we're not that concerned about it for our analysis. | |
* | |
* @param users: an iterable list of usrs. Must be all users in the | |
* set of you could experience some problems. | |
* @param outfile: the file to save the graph to | |
*/ | |
def exportFollowerSubgraph(users, outfile) { | |
to = new TinkerGraph() | |
System.out.println("processing users") | |
for (vertex in users) { | |
System.out.println("user: " + vertex["login"]) | |
toVertex = to.addVertex(vertex.getId()) | |
ElementHelper.copyProperties(vertex, toVertex) | |
toVertex.setProperty('biography', null) | |
} | |
System.out.println("processing edges") | |
for (vertex in users) { | |
System.out.println("edges for user: " + vertex["login"]) | |
for (edge in vertex.outE("FOLLOWER")) { | |
if (edge == null) continue; | |
System.out.println("edge: " + vertex["login"] + " => " + edge.getInVertex()["login"]) | |
// System.out.println("to.v: " + to.v(vertex.getId())) | |
if (to.v(edge.getInVertex().getId()) == null) { | |
System.out.println("target vertex is out of set, skipping"); | |
} else { | |
toEdge = to.addEdge(edge.getId(), to.v(vertex.getId()), | |
to.v(edge.getInVertex().getId()), edge.getLabel()) | |
ElementHelper.copyProperties(edge, toEdge) | |
} | |
} | |
} | |
System.out.println("Writing graphml file") | |
GraphMLWriter writer = new GraphMLWriter() | |
writer.outputGraph(to, new FileOutputStream("follower.graphml")) | |
} | |
/** | |
* this just gets a couple of different ways that users can be | |
* associated with a project. | |
* | |
* a future post will contain more information about how to mine these | |
* relationships. | |
* | |
* @param repo: the vertex of the repository to start at | |
*/ | |
def getAllUsersForRepo(repo) { | |
watchers = repo.in("REPO_WATCHED").toSet() | |
collaborators = repo.out("REPO_COLLABORATOR").toSet() | |
contributors = repo.out("REPO_CONTRIBUTOR").toSet() + \ | |
repo.in("REPO_OWNER").dedup().toSet() | |
return watchers + collaborators + contributors | |
} | |
// get every user related to gremlin | |
gremlin = g.idx('repo-idx').get('reponame','tinkerpop/gremlin').next() | |
// println "Getting all users..." | |
users = getAllUsersForRepo(gremlin) | |
// save the graph to followers.graphml | |
exportFollowerSubgraph(users, "followers.graphml") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment