Created
June 2, 2022 16:24
-
-
Save semenoffalex/c6e1bf5687e28ca55af9ebbdbd04ec38 to your computer and use it in GitHub Desktop.
Ben Lind's script for backbone projection applied to Marvel Universe Social Network
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The following script was written by Benjamin Lind (@benjaminlind) | |
# It implements methods, described in the paper: | |
# Neal, Zachary. "The backbone of bipartite projections: Inferring relationships from co-authorship, co-sponsorship, co-attendance and other co-behaviors." Social Networks 39 (2014): 84-97. | |
# https://www.sciencedirect.com/science/article/abs/pii/S0378873314000343 | |
# | |
# The Marvel Universe Social Network | |
# Dataset created by Cesc Rosselló, Ricardo Alberich, and Joe Miro | |
# The network data is bipartite. There are characters and the books they appear within. | |
# Information on the data: http://bioinfo.uib.es/~joemiro/marvel.html | |
# Primary source: http://www.chronologyproject.com/ | |
setwd("C:/Users/Alexander/Dropbox/Events/TMCA") | |
library(igraph); library(Rcpp); library(compiler) | |
marvnet <- read.graph("http://bioinfo.uib.es/~joemiro/marvel/porgat.txt", format = "pajek") | |
save(marvnet, file="marvnet.Rdata") | |
typeof(marvnet) | |
# Count the number of characters and books | |
n.books <- sum(get.vertex.attribute(marvnet, "type")) | |
n.chars <- sum(get.vertex.attribute(marvnet, "type")==FALSE) | |
# How many books are each character in? | |
V(marvnet)$n.books <- rep(NA, vcount(marvnet)) | |
V(marvnet)$n.books[which(V(marvnet)$type==FALSE)] <- degree(marvnet, which(V(marvnet)$type==FALSE)) | |
# Create a one-mode projection for relationships between characters | |
cppFunction('IntegerMatrix all_pairs_Cpp(IntegerVector y){ | |
int ylen = y.size(); | |
int outrows = ylen * (ylen-1) / 2; | |
IntegerMatrix out(outrows, 2); | |
int outrowcounter = 0; | |
for(int i = 0; i < (ylen - 1); ++i){ | |
for(int j = (i+1); j < ylen; ++j){ | |
out(outrowcounter, 0) = y[i]; | |
out(outrowcounter, 1) = y[j]; | |
outrowcounter++; | |
} | |
} | |
return out; | |
}') | |
all_pairs <- function(x){ | |
a <- 1:length(x) | |
outmat <- all_pairs_Cpp(a) | |
outmat <- data.frame(outmat) | |
outmat[,1] <- x[outmat[,1]] | |
outmat[,2] <- x[outmat[,2]] | |
return(outmat) | |
} | |
all_pairs <- cmpfun(all_pairs) | |
charel <- all_pairs(V(marvnet)$id[which(V(marvnet)$type==FALSE)]) | |
charel <- data.frame(From = charel[, 1], To = charel[, 2], stringsAsFactors=FALSE) | |
charel <- charel[-which(charel$From == charel$To),] | |
rm(all_pairs, all_pairs_Cpp) | |
# How many interactions would we expect by chance? | |
cppFunction('NumericVector expected_interaction(IntegerVector x, IntegerVector y, int totalinteraction){ | |
int n = x.size(); | |
NumericVector out(n); | |
for(int i = 0; i < n; ++i){ | |
out[i] = x[i] * y[i] / totalinteraction; | |
} | |
return out; | |
}') | |
charel[,"Prob"] <- expected_interaction(V(marvnet)$n.books[match(charel$From, V(marvnet)$id)], V(marvnet)$n.books[match(charel$To, V(marvnet)$id)], n.books) | |
rm(expected_interaction) | |
# How many interactions do we observe? | |
marvneigh <- neighborhood(marvnet, order = 1, nodes = which(V(marvnet)$type==FALSE)) | |
marvneigh <- lapply(marvneigh, function(x) return(as.integer(x[-1]))) | |
names(marvneigh) <- V(marvnet)$id[which(V(marvnet)$type==FALSE)] | |
marvneighsize <- sapply(marvneigh, length) | |
charel[,"FromInt"] <- match(charel$From, names(marvneigh)) | |
charel[,"ToInt"] <- match(charel$To, names(marvneigh)) | |
cppFunction('IntegerVector n_common_elements(IntegerVector ind1, IntegerVector ind2, List masterlist){ | |
int n = ind1.size(); | |
IntegerVector out(n); | |
for(int i = 0; i < n; ++i){ | |
int aind = ind1[i]-1; | |
int bind = ind2[i]-1; | |
IntegerVector a = masterlist[aind]; | |
IntegerVector b = masterlist[bind]; | |
int countcommon = 0; | |
for(int j = 0; j < a.size(); ++j){ | |
if(is_true(any(b == a[j]))){ | |
countcommon++; | |
} | |
} | |
out[i] = countcommon; | |
} | |
return out; | |
}') | |
# This line might take a couple minutes. | |
charel[,"Obs"] <- n_common_elements(charel$FromInt, charel$ToInt, marvneigh) | |
rm(marvneigh, marvneighsize, n_common_elements) # Delete unnecessary objects | |
charel <- charel[, -c(4, 5)] # Drop unnecessary columns | |
cppFunction('NumericVector z_score_fun(IntegerVector obs, NumericVector expected){ | |
int n = obs.size(); | |
NumericVector out(n); | |
double avgexp = mean(expected); | |
double sdobs; | |
NumericVector obsminusexpsq(n); | |
for(int i = 0; i < n; ++i){ | |
obsminusexpsq[i] = pow(obs[i] - avgexp, 2); | |
} | |
sdobs = sqrt(sum(obsminusexpsq) / n); | |
for(int j = 0; j < n; ++j){ | |
out[j] = (obs[j] - expected[j]) / sdobs; | |
} | |
return out; | |
}') | |
charel[, "ObsZ"] <- z_score_fun(charel$Obs, charel$Prob) | |
rm(z_score_fun) | |
cppFunction('NumericVector log_transform(NumericVector x){ | |
int n = x.size(); | |
NumericVector out(n); | |
for(int i = 0; i < n; ++i){ | |
if(x[i] == 0){ | |
out[i] = 0; | |
} | |
if(x[i] > 0){ | |
out[i] = log(x[i]); | |
} | |
if(x[i] < 0){ | |
out[i] = (-1) * log(abs(x[i])); | |
} | |
} | |
return out; | |
}') | |
alpha = .00001; criticalvalue <- qnorm(alpha, lower.tail = FALSE) | |
charel$ObsZlog <- log_transform(charel$ObsZ) | |
charel[, "Keep"] <- (abs(charel$ObsZ) >= criticalvalue) & (abs(charel$ObsZlog) >= criticalvalue) | |
rm(alpha, criticalvalue, log_transform) | |
charelbb <- charel[which(charel$Keep == TRUE), ] | |
charelbb <- charelbb[, c("From", "To", "ObsZlog")] | |
charelbb <- graph.data.frame(charelbb, directed = FALSE) | |
charelbb <- simplify(charelbb) | |
cleanupnames <- function(y){ | |
y <- strsplit(y, "/", fixed = TRUE)[[1]][1] | |
y <- strsplit(y, " [", fixed = TRUE)[[1]][1] | |
y <- gsub(".", "", y, fixed = TRUE) | |
ylen <- nchar(y) | |
lastchar <- substr(y, ylen, ylen) | |
y <- ifelse(lastchar == " ", substr(y, 1, ylen - 1), y) | |
return(y) | |
} | |
cleanupnames <- cmpfun(cleanupnames) | |
V(charelbb)$name <- sapply(1:vcount(charelbb), function(i) return(cleanupnames(V(charelbb)$name[i]))) | |
rm(cleanupnames) | |
lo.charelbb <- layout.fruchterman.reingold(charelbb, params = list(niter = 5000, area = vcount(charelbb)^3)) | |
plot(charelbb, vertex.size = 2 + 3 * betweenness(charelbb)/900, vertex.label.cex = .5, layout = lo.charelbb, vertex.label.family = "sans", vertex.color = "#F297A0", vertex.frame.color = "#6F95A2", vertex.label.color = "#000000", edge.color = "#B6BFBE") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment