drbh · August 20, 2025 20:12
diff --git a/explore-fdrs.py b/explore-fdrs.py
 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np

 # Read the data
 df = pd.read_csv("de_results/filtered_results.csv")

 # Get unique targets and calculate stats
 targets = df['target'].unique()
 n_targets = len(targets)

 # Calculate mean difference for each target and sort
 target_diffs = []
 for target in targets:
    target_data = df[df['target'] == target]
    mean_diff = target_data['diff'].mean()
    target_diffs.append((target, mean_diff, len(target_data)))

 # Sort by mean difference
 target_diffs.sort(key=lambda x: x[1])

 # Create figure with subplots in a grid
 cols = 15
 rows = int(np.ceil(n_targets / cols))
 fig, axes = plt.subplots(rows, cols, figsize=(60, rows * 4), squeeze=False)
 fig.suptitle('FDR vs Pairwise FDR Distributions by Target', fontsize=48)

 # Flatten axes for easier iteration
 axes_flat = axes.flatten()

 # Hide extra subplots
 for i in range(n_targets, len(axes_flat)):
    axes_flat[i].set_visible(False)

 # Plot each target
 for idx, (target, mean_diff, n_features) in enumerate(target_diffs):
    ax = axes_flat[idx]
    target_data = df[df['target'] == target].sort_values('fdr')
    
    # Plot dots
    ax.plot(target_data['fdr'], 'b.', markersize=4, alpha=0.7)
    ax.plot(target_data['pairwise_fdr'], 'r.', markersize=4, alpha=0.7)
    
    # Add 0.05 threshold
    ax.axhline(y=0.05, color='gray', linestyle='--', linewidth=2, alpha=0.5)
    
    # Format subplot
    ax.set_title(f'{target}\n(n={n_features})', fontsize=24, pad=6)
    ax.set_ylim(0, min(target_data[['fdr', 'pairwise_fdr']].max().max() * 1.1, 0.2))
    ax.tick_params(labelsize=18)
    ax.set_xticks([])
    
    # Color background based on mean difference
    if mean_diff > 0.01:
        ax.set_facecolor('#ffeeee')
    elif mean_diff < -0.01:
        ax.set_facecolor('#eeeeff')

 # Add legend
 legend_elements = [
    plt.Line2D([0], [0], color='b', marker='.', markersize=12, linestyle='', label='FDR'),
    plt.Line2D([0], [0], color='r', marker='.', markersize=12, linestyle='', label='Pairwise FDR'),
    plt.Line2D([0], [0], color='gray', linestyle='--', label='0.05 threshold', linewidth=2)
 ]
 fig.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(0.98, 0.98), fontsize=24)

 plt.tight_layout()
 plt.savefig('de_results/all_distributions.png', dpi=300, bbox_inches='tight')
 print(f"Saved all distributions to: de_results/all_distributions.png")

 # Create a simple summary plot
 fig2, ax = plt.subplots(figsize=(16, 10))

 # Prepare data for bar plot
 targets_sorted = [t[0] for t in target_diffs[-20:]]  # Top 20 by difference
 mean_diffs = [t[1] for t in target_diffs[-20:]]
 n_features = [t[2] for t in target_diffs[-20:]]

 # Create bar plot
 bars = ax.bar(range(len(targets_sorted)), mean_diffs, color=plt.cm.RdBu_r((np.array(mean_diffs) + 0.02) / 0.04))

 # Add feature counts as text
 for i, (bar, n) in enumerate(zip(bars, n_features)):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.0005, 
            f'n={n}', ha='center', va='bottom', fontsize=8)

 ax.set_xlabel('Target')
 ax.set_ylabel('Mean Difference (FDR - Pairwise FDR)')
 ax.set_title('Top 20 Targets by Mean FDR Difference')
 ax.set_xticks(range(len(targets_sorted)))
 ax.set_xticklabels(targets_sorted, rotation=45, ha='right')
 ax.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
 ax.grid(True, alpha=0.3, axis='y')

 plt.tight_layout()
 plt.savefig('de_results/top_differences.png', dpi=300, bbox_inches='tight')
 print(f"Saved summary to: de_results/top_differences.png")
	import pandas as pd
	import matplotlib.pyplot as plt
	import numpy as np

	# Read the data
	df = pd.read_csv("de_results/filtered_results.csv")

	# Get unique targets and calculate stats
	targets = df['target'].unique()
	n_targets = len(targets)

	# Calculate mean difference for each target and sort
	target_diffs = []
	for target in targets:
	target_data = df[df['target'] == target]
	mean_diff = target_data['diff'].mean()
	target_diffs.append((target, mean_diff, len(target_data)))

	# Sort by mean difference
	target_diffs.sort(key=lambda x: x[1])

	# Create figure with subplots in a grid
	cols = 15
	rows = int(np.ceil(n_targets / cols))
	fig, axes = plt.subplots(rows, cols, figsize=(60, rows * 4), squeeze=False)
	fig.suptitle('FDR vs Pairwise FDR Distributions by Target', fontsize=48)

	# Flatten axes for easier iteration
	axes_flat = axes.flatten()

	# Hide extra subplots
	for i in range(n_targets, len(axes_flat)):
	axes_flat[i].set_visible(False)

	# Plot each target
	for idx, (target, mean_diff, n_features) in enumerate(target_diffs):
	ax = axes_flat[idx]
	target_data = df[df['target'] == target].sort_values('fdr')

	# Plot dots
	ax.plot(target_data['fdr'], 'b.', markersize=4, alpha=0.7)
	ax.plot(target_data['pairwise_fdr'], 'r.', markersize=4, alpha=0.7)

	# Add 0.05 threshold
	ax.axhline(y=0.05, color='gray', linestyle='--', linewidth=2, alpha=0.5)

	# Format subplot
	ax.set_title(f'{target}\n(n={n_features})', fontsize=24, pad=6)
	ax.set_ylim(0, min(target_data[['fdr', 'pairwise_fdr']].max().max() * 1.1, 0.2))
	ax.tick_params(labelsize=18)
	ax.set_xticks([])

	# Color background based on mean difference
	if mean_diff > 0.01:
	ax.set_facecolor('#ffeeee')
	elif mean_diff < -0.01:
	ax.set_facecolor('#eeeeff')

	# Add legend
	legend_elements = [
	plt.Line2D([0], [0], color='b', marker='.', markersize=12, linestyle='', label='FDR'),
	plt.Line2D([0], [0], color='r', marker='.', markersize=12, linestyle='', label='Pairwise FDR'),
	plt.Line2D([0], [0], color='gray', linestyle='--', label='0.05 threshold', linewidth=2)
	]
	fig.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(0.98, 0.98), fontsize=24)

	plt.tight_layout()
	plt.savefig('de_results/all_distributions.png', dpi=300, bbox_inches='tight')
	print(f"Saved all distributions to: de_results/all_distributions.png")

	# Create a simple summary plot
	fig2, ax = plt.subplots(figsize=(16, 10))

	# Prepare data for bar plot
	targets_sorted = [t[0] for t in target_diffs[-20:]] # Top 20 by difference
	mean_diffs = [t[1] for t in target_diffs[-20:]]
	n_features = [t[2] for t in target_diffs[-20:]]

	# Create bar plot
	bars = ax.bar(range(len(targets_sorted)), mean_diffs, color=plt.cm.RdBu_r((np.array(mean_diffs) + 0.02) / 0.04))

	# Add feature counts as text
	for i, (bar, n) in enumerate(zip(bars, n_features)):
	ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.0005,
	f'n={n}', ha='center', va='bottom', fontsize=8)

	ax.set_xlabel('Target')
	ax.set_ylabel('Mean Difference (FDR - Pairwise FDR)')
	ax.set_title('Top 20 Targets by Mean FDR Difference')
	ax.set_xticks(range(len(targets_sorted)))
	ax.set_xticklabels(targets_sorted, rotation=45, ha='right')
	ax.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
	ax.grid(True, alpha=0.3, axis='y')

	plt.tight_layout()
	plt.savefig('de_results/top_differences.png', dpi=300, bbox_inches='tight')
	print(f"Saved summary to: de_results/top_differences.png")