Created
August 20, 2025 20:12
-
-
Save drbh/673ad8beb19cb14979a139b209cd74c2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
import numpy as np | |
# Read the data | |
df = pd.read_csv("de_results/filtered_results.csv") | |
# Get unique targets and calculate stats | |
targets = df['target'].unique() | |
n_targets = len(targets) | |
# Calculate mean difference for each target and sort | |
target_diffs = [] | |
for target in targets: | |
target_data = df[df['target'] == target] | |
mean_diff = target_data['diff'].mean() | |
target_diffs.append((target, mean_diff, len(target_data))) | |
# Sort by mean difference | |
target_diffs.sort(key=lambda x: x[1]) | |
# Create figure with subplots in a grid | |
cols = 15 | |
rows = int(np.ceil(n_targets / cols)) | |
fig, axes = plt.subplots(rows, cols, figsize=(60, rows * 4), squeeze=False) | |
fig.suptitle('FDR vs Pairwise FDR Distributions by Target', fontsize=48) | |
# Flatten axes for easier iteration | |
axes_flat = axes.flatten() | |
# Hide extra subplots | |
for i in range(n_targets, len(axes_flat)): | |
axes_flat[i].set_visible(False) | |
# Plot each target | |
for idx, (target, mean_diff, n_features) in enumerate(target_diffs): | |
ax = axes_flat[idx] | |
target_data = df[df['target'] == target].sort_values('fdr') | |
# Plot dots | |
ax.plot(target_data['fdr'], 'b.', markersize=4, alpha=0.7) | |
ax.plot(target_data['pairwise_fdr'], 'r.', markersize=4, alpha=0.7) | |
# Add 0.05 threshold | |
ax.axhline(y=0.05, color='gray', linestyle='--', linewidth=2, alpha=0.5) | |
# Format subplot | |
ax.set_title(f'{target}\n(n={n_features})', fontsize=24, pad=6) | |
ax.set_ylim(0, min(target_data[['fdr', 'pairwise_fdr']].max().max() * 1.1, 0.2)) | |
ax.tick_params(labelsize=18) | |
ax.set_xticks([]) | |
# Color background based on mean difference | |
if mean_diff > 0.01: | |
ax.set_facecolor('#ffeeee') | |
elif mean_diff < -0.01: | |
ax.set_facecolor('#eeeeff') | |
# Add legend | |
legend_elements = [ | |
plt.Line2D([0], [0], color='b', marker='.', markersize=12, linestyle='', label='FDR'), | |
plt.Line2D([0], [0], color='r', marker='.', markersize=12, linestyle='', label='Pairwise FDR'), | |
plt.Line2D([0], [0], color='gray', linestyle='--', label='0.05 threshold', linewidth=2) | |
] | |
fig.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(0.98, 0.98), fontsize=24) | |
plt.tight_layout() | |
plt.savefig('de_results/all_distributions.png', dpi=300, bbox_inches='tight') | |
print(f"Saved all distributions to: de_results/all_distributions.png") | |
# Create a simple summary plot | |
fig2, ax = plt.subplots(figsize=(16, 10)) | |
# Prepare data for bar plot | |
targets_sorted = [t[0] for t in target_diffs[-20:]] # Top 20 by difference | |
mean_diffs = [t[1] for t in target_diffs[-20:]] | |
n_features = [t[2] for t in target_diffs[-20:]] | |
# Create bar plot | |
bars = ax.bar(range(len(targets_sorted)), mean_diffs, color=plt.cm.RdBu_r((np.array(mean_diffs) + 0.02) / 0.04)) | |
# Add feature counts as text | |
for i, (bar, n) in enumerate(zip(bars, n_features)): | |
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.0005, | |
f'n={n}', ha='center', va='bottom', fontsize=8) | |
ax.set_xlabel('Target') | |
ax.set_ylabel('Mean Difference (FDR - Pairwise FDR)') | |
ax.set_title('Top 20 Targets by Mean FDR Difference') | |
ax.set_xticks(range(len(targets_sorted))) | |
ax.set_xticklabels(targets_sorted, rotation=45, ha='right') | |
ax.axhline(y=0, color='black', linestyle='-', linewidth=0.5) | |
ax.grid(True, alpha=0.3, axis='y') | |
plt.tight_layout() | |
plt.savefig('de_results/top_differences.png', dpi=300, bbox_inches='tight') | |
print(f"Saved summary to: de_results/top_differences.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment