Skip to content

Instantly share code, notes, and snippets.

@drbh
Created August 20, 2025 20:12
Show Gist options
  • Save drbh/673ad8beb19cb14979a139b209cd74c2 to your computer and use it in GitHub Desktop.
Save drbh/673ad8beb19cb14979a139b209cd74c2 to your computer and use it in GitHub Desktop.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Read the data
df = pd.read_csv("de_results/filtered_results.csv")
# Get unique targets and calculate stats
targets = df['target'].unique()
n_targets = len(targets)
# Calculate mean difference for each target and sort
target_diffs = []
for target in targets:
target_data = df[df['target'] == target]
mean_diff = target_data['diff'].mean()
target_diffs.append((target, mean_diff, len(target_data)))
# Sort by mean difference
target_diffs.sort(key=lambda x: x[1])
# Create figure with subplots in a grid
cols = 15
rows = int(np.ceil(n_targets / cols))
fig, axes = plt.subplots(rows, cols, figsize=(60, rows * 4), squeeze=False)
fig.suptitle('FDR vs Pairwise FDR Distributions by Target', fontsize=48)
# Flatten axes for easier iteration
axes_flat = axes.flatten()
# Hide extra subplots
for i in range(n_targets, len(axes_flat)):
axes_flat[i].set_visible(False)
# Plot each target
for idx, (target, mean_diff, n_features) in enumerate(target_diffs):
ax = axes_flat[idx]
target_data = df[df['target'] == target].sort_values('fdr')
# Plot dots
ax.plot(target_data['fdr'], 'b.', markersize=4, alpha=0.7)
ax.plot(target_data['pairwise_fdr'], 'r.', markersize=4, alpha=0.7)
# Add 0.05 threshold
ax.axhline(y=0.05, color='gray', linestyle='--', linewidth=2, alpha=0.5)
# Format subplot
ax.set_title(f'{target}\n(n={n_features})', fontsize=24, pad=6)
ax.set_ylim(0, min(target_data[['fdr', 'pairwise_fdr']].max().max() * 1.1, 0.2))
ax.tick_params(labelsize=18)
ax.set_xticks([])
# Color background based on mean difference
if mean_diff > 0.01:
ax.set_facecolor('#ffeeee')
elif mean_diff < -0.01:
ax.set_facecolor('#eeeeff')
# Add legend
legend_elements = [
plt.Line2D([0], [0], color='b', marker='.', markersize=12, linestyle='', label='FDR'),
plt.Line2D([0], [0], color='r', marker='.', markersize=12, linestyle='', label='Pairwise FDR'),
plt.Line2D([0], [0], color='gray', linestyle='--', label='0.05 threshold', linewidth=2)
]
fig.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(0.98, 0.98), fontsize=24)
plt.tight_layout()
plt.savefig('de_results/all_distributions.png', dpi=300, bbox_inches='tight')
print(f"Saved all distributions to: de_results/all_distributions.png")
# Create a simple summary plot
fig2, ax = plt.subplots(figsize=(16, 10))
# Prepare data for bar plot
targets_sorted = [t[0] for t in target_diffs[-20:]] # Top 20 by difference
mean_diffs = [t[1] for t in target_diffs[-20:]]
n_features = [t[2] for t in target_diffs[-20:]]
# Create bar plot
bars = ax.bar(range(len(targets_sorted)), mean_diffs, color=plt.cm.RdBu_r((np.array(mean_diffs) + 0.02) / 0.04))
# Add feature counts as text
for i, (bar, n) in enumerate(zip(bars, n_features)):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.0005,
f'n={n}', ha='center', va='bottom', fontsize=8)
ax.set_xlabel('Target')
ax.set_ylabel('Mean Difference (FDR - Pairwise FDR)')
ax.set_title('Top 20 Targets by Mean FDR Difference')
ax.set_xticks(range(len(targets_sorted)))
ax.set_xticklabels(targets_sorted, rotation=45, ha='right')
ax.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
ax.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('de_results/top_differences.png', dpi=300, bbox_inches='tight')
print(f"Saved summary to: de_results/top_differences.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment