Created
August 18, 2025 23:30
-
-
Save drbh/d0c78d42adcc34562b242e34b76f80b5 to your computer and use it in GitHub Desktop.
Explores the difference in fdr by adding a pairwise fdr column to pdex
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
uv run pdex-pairwise-fdr.py | |
Installed 44 packages in 198ms | |
Selected target: LAD1 | |
Selected target: TWF2 | |
INFO:pdex._single_cell:vectorized processing: 3 targets, 18080 genes | |
INFO:pdex._single_cell:Processing 2 targets | |
Processing targets: 100%|█████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00, 2.82s/it] | |
target reference feature target_mean ... statistic pairwise_fdr fdr diff | |
22040 TWF2 non-targeting MASP1 0.135913 ... 18364126.0 0.044305 0.062304 -0.017999 | |
34612 TWF2 non-targeting PTPRT 0.887897 ... 17946027.5 0.041230 0.057592 -0.016361 | |
35110 TWF2 non-targeting ADORA2A 0.371032 ... 18062904.5 0.031475 0.043659 -0.012184 | |
34255 TWF2 non-targeting ZIM3 16.389881 ... 20643953.0 0.031475 0.043659 -0.012184 | |
32342 TWF2 non-targeting LRRC37A2 0.000992 ... 19259288.0 0.010005 0.014916 -0.004911 | |
... ... ... ... ... ... ... ... ... ... | |
1106 LAD1 non-targeting LCE1C 0.001470 ... 26015583.0 0.000492 0.000358 0.000134 | |
12597 LAD1 non-targeting SLC51B 0.003674 ... 26059912.5 0.013928 0.008622 0.005306 | |
2137 LAD1 non-targeting TSPYL6 0.002204 ... 26030586.5 0.018776 0.011949 0.006828 | |
2940 LAD1 non-targeting PRSS56 0.002204 ... 26029907.5 0.047369 0.026593 0.020776 | |
1541 LAD1 non-targeting TNNI1 0.022043 ... 25390407.0 0.047369 0.025954 0.021415 | |
[61 rows x 12 columns] | |
Target: LAD1, DE genes: 16 | |
Target: TWF2, DE genes: 45 | |
============================== | |
target reference feature target_mean ... statistic pairwise_fdr fdr diff | |
35110 TWF2 non-targeting ADORA2A 0.371032 ... 18062904.5 0.031475 0.043659 -0.012184 | |
34255 TWF2 non-targeting ZIM3 16.389881 ... 20643953.0 0.031475 0.043659 -0.012184 | |
32342 TWF2 non-targeting LRRC37A2 0.000992 ... 19259288.0 0.010005 0.014916 -0.004911 | |
19717 TWF2 non-targeting HSD11B1 0.000992 ... 19259288.0 0.010005 0.014916 -0.004911 | |
29758 TWF2 non-targeting RNF113B 0.000992 ... 19259288.0 0.010005 0.014916 -0.004911 | |
... ... ... ... ... ... ... ... ... ... | |
2580 LAD1 non-targeting SLC38A11 0.001470 ... 26014222.0 0.056277 0.035642 0.020635 | |
15292 LAD1 non-targeting DAND5 0.010287 ... 26154785.0 0.056277 0.035626 0.020652 | |
3445 LAD1 non-targeting C3orf49 0.013960 ... 26172724.0 0.056277 0.035626 0.020652 | |
2940 LAD1 non-targeting PRSS56 0.002204 ... 26029907.5 0.047369 0.026593 0.020776 | |
1541 LAD1 non-targeting TNNI1 0.022043 ... 25390407.0 0.047369 0.025954 0.021415 | |
[62 rows x 12 columns] | |
Target: LAD1, DE genes: 19 | |
Target: TWF2, DE genes: 43 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /// script | |
# requires-python = ">=3.10" | |
# dependencies = [ | |
# "anndata", | |
# "pdex", | |
# ] | |
# | |
# [tool.uv.sources] | |
# pdex = { git = "https://github.com/drbh/pdex.git", branch = "pairwise_fdr" } | |
# /// | |
import os | |
import anndata as ad | |
from pdex._single_cell import ( | |
parallel_differential_expression_vec_wrapper as parallel_differential_expression, | |
) | |
adata = ad.read_h5ad("../bspc/convert/vcc_data/adata_Training.h5ad") | |
os.makedirs("de_results", exist_ok=True) | |
ctrl_mask = adata.obs["target_gene"] == "non-targeting" | |
control_cells = adata[ctrl_mask] | |
targets = ["LAD1", "TWF2"] | |
cells = [control_cells] | |
for target in targets: | |
print(f"Selected target: {target}") | |
target_mask = adata.obs["target_gene"] == target | |
target_cells = adata[target_mask] | |
cells.append(target_cells) | |
de_adata = ad.concat(cells) | |
results = parallel_differential_expression( | |
de_adata, | |
reference="non-targeting", | |
groupby_key="target_gene", | |
) | |
results["diff"] = results["pairwise_fdr"] - results["fdr"] | |
results = results.sort_values("diff") | |
filtered = results[results["pairwise_fdr"] < 0.05] | |
print(filtered) | |
# print the number of each target | |
for target in targets: | |
count = filtered[filtered["target"] == target].shape[0] | |
print(f"Target: {target}, DE genes: {count}") | |
print("=" * 30) | |
filtered2 = results[results["fdr"] < 0.05] | |
print(filtered2) | |
for target in targets: | |
count = filtered2[filtered2["target"] == target].shape[0] | |
print(f"Target: {target}, DE genes: {count}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment