Skip to content

Instantly share code, notes, and snippets.

@gilesc
Created April 22, 2021 18:25
Show Gist options
  • Save gilesc/7d114ce38e6e795370c783012e8d4911 to your computer and use it in GitHub Desktop.
Save gilesc/7d114ce38e6e795370c783012e8d4911 to your computer and use it in GitHub Desktop.
from dataclasses import dataclass
import pandas as pd
import numpy as np
import scipy.stats
@dataclass
class BinaryAnnotation:
columns: np.array
index: np.array
data: np.array # 2D array of packed bits
def query(self, query: pd.Series):
bg = np.packbits(np.isin(self.columns, query.index))
q = np.packbits(np.isin(self.columns, query.index[query != 0]))
A = np.bitwise_and(self.data, bg)
def fn(x):
return np.unpackbits(x.reshape(self.data.shape), axis=1).sum(axis=1)
c = np.vstack([
fn(A & q),
fn(A & ~q),
fn(~A & q),
fn(bg & ~self.data & ~q)
]).astype(np.int64)
OR = c[0,:] * c[3,:] / (c[1,:] * c[2,:])
n1 = c[0,:] + c[1,:]
n2 = c[2,:] + c[3,:]
n = c[0,:] + c[2,:]
# less
#p = scipy.stats.hypergeom.cdf(c[0,:], n1 + n2, n1, n)
# greater
p = scipy.stats.hypergeom.cdf(c[1,:], n1 + n2, n1, c[1,:] + c[3,:])
o = pd.DataFrame(c.T, index=self.index, columns=["AS","A","S","O"])
o["OR"] = OR
o["p"] = p
return o.sort_values("p")
def from_mapping(M):
columns = np.array(list(sorted(M["ElementID"].unique())))
data = {
k:np.packbits(np.isin(columns, v)) for k,v in
M.groupby("TermID")["ElementID"].unique().items()
}
index = np.array(list(sorted(data.keys())))
return BinaryAnnotation(
index=index,
columns=columns,
data=np.vstack([data[k] for k in index])
)
def fn():
import wrenlab.enrichment
GO = wrenlab.enrichment.annotation("GO")
return BinaryAnnotation.from_mapping(GO.mapping)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment