betatim · December 10, 2025 07:38
diff --git a/benchmark_rf_sklearn_vs_lightgbm.py b/benchmark_rf_sklearn_vs_lightgbm.py
 """
 Benchmark: scikit-learn RandomForest vs LightGBM RandomForest

 Compares performance across:
 - Number of samples (1K, 10K, 100K, 500K)
 - Number of features (10, 50, 200)
 - Feature types (numerical, categorical, mixed)
 - Number of classes (2, 5, 10)

 Includes cases optimized for LightGBM's strengths:
 - Native categorical handling (no one-hot encoding needed)
 - Large datasets with high-cardinality categorical features

 Evaluation metric: Log Loss (cross-entropy) on held-out test data
 - Lower is better
 - Measures both prediction quality and probability calibration
 """

 import time
 import warnings
 from dataclasses import dataclass
 from typing import Literal

 import numpy as np
 import pandas as pd
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import log_loss
 from sklearn.model_selection import train_test_split

 try:
    import lightgbm as lgb
 except ImportError:
    raise ImportError("Please install lightgbm: pip install lightgbm")

 # Suppress warnings for cleaner output
 warnings.filterwarnings("ignore")

 N_JOBS = -1  # Use all cores
 N_ESTIMATORS = 100
 RANDOM_STATE = 42


 @dataclass
 class BenchmarkConfig:
    """Configuration for a single benchmark run."""

    name: str
    n_samples: int
    n_features: int
    n_classes: int
    feature_type: Literal["numerical", "categorical", "mixed"]
    n_categorical_features: int = 0
    categorical_cardinality: int = 10
    description: str = ""


 def generate_data(config: BenchmarkConfig):
    """Generate synthetic data based on configuration with train/test split."""
    rng = np.random.default_rng(RANDOM_STATE)

    n_numerical = config.n_features - config.n_categorical_features
    n_categorical = config.n_categorical_features

    # Generate numerical features
    if n_numerical > 0:
        X_num = rng.standard_normal((config.n_samples, n_numerical))
    else:
        X_num = np.empty((config.n_samples, 0))

    # Generate categorical features (as integers)
    if n_categorical > 0:
        X_cat = rng.integers(
            0, config.categorical_cardinality, size=(config.n_samples, n_categorical)
        )
    else:
        X_cat = np.empty((config.n_samples, 0), dtype=np.int64)

    # Combine features
    X = np.hstack([X_num, X_cat])

    # Generate target based on feature values (to create some signal)
    # Shuffle column order so all features have equal chance of contributing
    n_signal_features = min(5, config.n_features)
    shuffled_col_indices = rng.permutation(config.n_features)
    signal_col_indices = shuffled_col_indices[:n_signal_features]
    signal = X[:, signal_col_indices].sum(axis=1)
    y = pd.qcut(signal, q=config.n_classes, labels=False).astype(np.int32)

    # Track which columns are categorical
    categorical_indices = list(range(n_numerical, config.n_features))

    # Train/test split (80/20)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=RANDOM_STATE, stratify=y
    )

    return X_train, X_test, y_train, y_test, categorical_indices


 def benchmark_sklearn(X_train, X_test, y_train, y_test, categorical_indices, config: BenchmarkConfig):
    """Benchmark scikit-learn RandomForestClassifier."""
    # sklearn needs categorical features encoded properly
    # For fair comparison, we use the data as-is (already ordinal encoded)

    clf = RandomForestClassifier(
        n_estimators=N_ESTIMATORS,
        n_jobs=N_JOBS,
        random_state=RANDOM_STATE,
        max_features="sqrt",
    )

    start = time.perf_counter()
    clf.fit(X_train, y_train)
    fit_time = time.perf_counter() - start

    start = time.perf_counter()
    y_proba = clf.predict_proba(X_test)
    predict_time = time.perf_counter() - start

    # Compute log loss on test set
    score = log_loss(y_test, y_proba)

    return fit_time, predict_time, score


 def benchmark_lightgbm(X_train, X_test, y_train, y_test, categorical_indices, config: BenchmarkConfig):
    """Benchmark LightGBM in RandomForest mode."""
    # LightGBM RF parameters
    params = {
        "boosting_type": "rf",
        "objective": "multiclass" if config.n_classes > 2 else "binary",
        "num_class": config.n_classes if config.n_classes > 2 else 1,
        "n_estimators": N_ESTIMATORS,
        "bagging_freq": 1,
        "bagging_fraction": 0.8,
        "feature_fraction": np.sqrt(config.n_features) / config.n_features,
        "n_jobs": N_JOBS,
        "random_state": RANDOM_STATE,
        "verbose": -1,
    }

    # Create DataFrame with proper categorical dtypes for LightGBM
    if categorical_indices:
        df_train = pd.DataFrame(X_train)
        df_test = pd.DataFrame(X_test)
        for idx in categorical_indices:
            df_train[idx] = df_train[idx].astype("category")
            df_test[idx] = df_test[idx].astype("category")
        X_lgb_train = df_train
        X_lgb_test = df_test
    else:
        X_lgb_train = X_train
        X_lgb_test = X_test

    clf = lgb.LGBMClassifier(**params)

    start = time.perf_counter()
    clf.fit(X_lgb_train, y_train, categorical_feature=categorical_indices if categorical_indices else "auto")
    fit_time = time.perf_counter() - start

    start = time.perf_counter()
    y_proba = clf.predict_proba(X_lgb_test)
    predict_time = time.perf_counter() - start

    # Compute log loss on test set
    score = log_loss(y_test, y_proba)

    return fit_time, predict_time, score


 def run_benchmark(config: BenchmarkConfig):
    """Run a single benchmark configuration."""
    print(f"\n{'=' * 70}")
    print(f"Benchmark: {config.name}")
    print(f"  {config.description}")
    print(f"  Samples: {config.n_samples:,} | Features: {config.n_features} "
          f"| Classes: {config.n_classes} | Type: {config.feature_type}")
    if config.n_categorical_features > 0:
        print(f"  Categorical: {config.n_categorical_features} features, "
              f"cardinality={config.categorical_cardinality}")
    print("-" * 70)

    # Generate data with train/test split
    print("Generating data...", end=" ", flush=True)
    X_train, X_test, y_train, y_test, categorical_indices = generate_data(config)
    print(f"Done. Train: {X_train.shape}, Test: {X_test.shape}")

    # Benchmark sklearn
    print("Running scikit-learn RandomForest...", end=" ", flush=True)
    sklearn_fit, sklearn_pred, sklearn_score = benchmark_sklearn(
        X_train, X_test, y_train, y_test, categorical_indices, config
    )
    print(f"Done.")

    # Benchmark LightGBM
    print("Running LightGBM RandomForest...", end=" ", flush=True)
    lgb_fit, lgb_pred, lgb_score = benchmark_lightgbm(
        X_train, X_test, y_train, y_test, categorical_indices, config
    )
    print(f"Done.")

    # Results
    print("\nResults:")
    print(f"  {'Method':<20} {'Fit (s)':<10} {'Predict (s)':<12} {'Log Loss':<12}")
    print(f"  {'-' * 54}")
    print(f"  {'scikit-learn':<20} {sklearn_fit:<10.3f} {sklearn_pred:<12.3f} {sklearn_score:<12.4f}")
    print(f"  {'LightGBM':<20} {lgb_fit:<10.3f} {lgb_pred:<12.3f} {lgb_score:<12.4f}")
    print(f"  {'-' * 54}")

    speedup_fit = sklearn_fit / lgb_fit if lgb_fit > 0 else float("inf")
    speedup_pred = sklearn_pred / lgb_pred if lgb_pred > 0 else float("inf")
    winner_fit = "LightGBM" if speedup_fit > 1 else "sklearn"
    winner_pred = "LightGBM" if speedup_pred > 1 else "sklearn"
    # Lower log loss is better
    winner_score = "sklearn" if sklearn_score < lgb_score else "LightGBM"
    score_diff = abs(sklearn_score - lgb_score)

    print(f"  Fit speedup: {speedup_fit:.2f}x ({winner_fit} faster)")
    print(f"  Predict speedup: {speedup_pred:.2f}x ({winner_pred} faster)")
    print(f"  Log Loss: {winner_score} better by {score_diff:.4f}")

    return {
        "name": config.name,
        "n_samples": config.n_samples,
        "n_features": config.n_features,
        "n_classes": config.n_classes,
        "feature_type": config.feature_type,
        "sklearn_fit": sklearn_fit,
        "sklearn_pred": sklearn_pred,
        "sklearn_score": sklearn_score,
        "lgb_fit": lgb_fit,
        "lgb_pred": lgb_pred,
        "lgb_score": lgb_score,
        "speedup_fit": speedup_fit,
        "speedup_pred": speedup_pred,
    }


 def main():
    print("=" * 70)
    print("RandomForest Benchmark: scikit-learn vs LightGBM")
    print(f"Configuration: {N_ESTIMATORS} trees, using all available cores")
    print("=" * 70)

    # Define benchmark configurations
    configs = [
        # Small baseline - should run in a few seconds
        BenchmarkConfig(
            name="Small Numerical",
            n_samples=1_000,
            n_features=10,
            n_classes=2,
            feature_type="numerical",
            description="Quick baseline test with small numerical data",
        ),
        # Medium numerical - tests scaling
        BenchmarkConfig(
            name="Medium Numerical",
            n_samples=10_000,
            n_features=50,
            n_classes=2,
            feature_type="numerical",
            description="Medium-sized numerical dataset",
        ),
        # Large numerical - more substantial test
        BenchmarkConfig(
            name="Large Numerical",
            n_samples=100_000,
            n_features=50,
            n_classes=2,
            feature_type="numerical",
            description="Large numerical dataset",
        ),
        # Multi-class classification
        BenchmarkConfig(
            name="Multi-class (5 classes)",
            n_samples=50_000,
            n_features=30,
            n_classes=5,
            feature_type="numerical",
            description="Multi-class classification problem",
        ),
        # Multi-class with more classes
        BenchmarkConfig(
            name="Multi-class (10 classes)",
            n_samples=50_000,
            n_features=30,
            n_classes=10,
            feature_type="numerical",
            description="Multi-class classification with 10 classes",
        ),
        # Mixed features - moderate categorical cardinality
        BenchmarkConfig(
            name="Mixed Features",
            n_samples=50_000,
            n_features=40,
            n_classes=2,
            feature_type="mixed",
            n_categorical_features=20,
            categorical_cardinality=10,
            description="Mixed numerical and categorical features",
        ),
        # High-cardinality categorical (LightGBM strength)
        BenchmarkConfig(
            name="High-Cardinality Categorical",
            n_samples=100_000,
            n_features=30,
            n_classes=2,
            feature_type="mixed",
            n_categorical_features=15,
            categorical_cardinality=100,
            description="High-cardinality categorical features (LightGBM advantage)",
        ),
        # Pure categorical (LightGBM strength)
        BenchmarkConfig(
            name="Pure Categorical",
            n_samples=100_000,
            n_features=50,
            n_classes=2,
            feature_type="categorical",
            n_categorical_features=50,
            categorical_cardinality=50,
            description="All categorical features (LightGBM native support)",
        ),
        # LightGBM optimal case: Large dataset, many high-cardinality categoricals
        BenchmarkConfig(
            name="LightGBM Optimal",
            n_samples=500_000,
            n_features=100,
            n_classes=3,
            feature_type="mixed",
            n_categorical_features=60,
            categorical_cardinality=200,
            description="Large dataset with many high-cardinality categoricals (LightGBM's sweet spot)",
        ),
        # High-dimensional numerical
        BenchmarkConfig(
            name="High-Dimensional Numerical",
            n_samples=50_000,
            n_features=200,
            n_classes=2,
            feature_type="numerical",
            description="High-dimensional numerical data",
        ),
        # Very large samples
        BenchmarkConfig(
            name="Very Large Dataset",
            n_samples=500_000,
            n_features=50,
            n_classes=2,
            feature_type="numerical",
            description="Very large sample size test",
        ),
    ]

    # Run all benchmarks
    results = []
    for config in configs:
        result = run_benchmark(config)
        results.append(result)

    # Summary table - Timing
    print("\n" + "=" * 80)
    print("SUMMARY - TIMING")
    print("=" * 80)
    print(f"\n{'Benchmark':<30} {'sklearn Fit':<12} {'LightGBM Fit':<12} {'Speedup':<10}")
    print("-" * 70)
    for r in results:
        speedup_str = f"{r['speedup_fit']:.2f}x"
        winner = "✓ LGB" if r['speedup_fit'] > 1 else "✓ SKL"
        print(f"{r['name']:<30} {r['sklearn_fit']:<12.3f} {r['lgb_fit']:<12.3f} {speedup_str:<8} {winner}")

    print("\n" + "-" * 70)
    avg_speedup = np.mean([r['speedup_fit'] for r in results])
    print(f"Average fit speedup: {avg_speedup:.2f}x")

    # Find best cases for each (timing)
    best_for_lgb = max(results, key=lambda r: r['speedup_fit'])
    best_for_sklearn = min(results, key=lambda r: r['speedup_fit'])

    print(f"\nBest case for LightGBM: {best_for_lgb['name']} ({best_for_lgb['speedup_fit']:.2f}x faster)")
    print(f"Best case for sklearn:  {best_for_sklearn['name']} ({1/best_for_sklearn['speedup_fit']:.2f}x faster)")

    # Summary table - Predictive Performance (Log Loss)
    print("\n" + "=" * 80)
    print("SUMMARY - PREDICTIVE PERFORMANCE (Log Loss, lower is better)")
    print("=" * 80)
    print(f"\n{'Benchmark':<30} {'sklearn':<12} {'LightGBM':<12} {'Diff':<10} {'Winner':<8}")
    print("-" * 80)
    sklearn_wins = 0
    lgb_wins = 0
    for r in results:
        diff = r['sklearn_score'] - r['lgb_score']
        if r['sklearn_score'] < r['lgb_score']:
            winner = "sklearn"
            sklearn_wins += 1
        elif r['lgb_score'] < r['sklearn_score']:
            winner = "LightGBM"
            lgb_wins += 1
        else:
            winner = "tie"
        print(f"{r['name']:<30} {r['sklearn_score']:<12.4f} {r['lgb_score']:<12.4f} {diff:+.4f}    {winner}")

    print("\n" + "-" * 80)
    avg_sklearn_score = np.mean([r['sklearn_score'] for r in results])
    avg_lgb_score = np.mean([r['lgb_score'] for r in results])
    print(f"Average Log Loss - sklearn: {avg_sklearn_score:.4f}, LightGBM: {avg_lgb_score:.4f}")
    print(f"Wins - sklearn: {sklearn_wins}, LightGBM: {lgb_wins}")


 if __name__ == "__main__":
    main()
diff --git a/output.log b/output.log
 ======================================================================
 RandomForest Benchmark: scikit-learn vs LightGBM
 Configuration: 100 trees, using all available cores
 ======================================================================

 ======================================================================
 Benchmark: Small Numerical
  Quick baseline test with small numerical data
  Samples: 1,000 | Features: 10 | Classes: 2 | Type: numerical
 ----------------------------------------------------------------------
 Generating data... Done. Train: (800, 10), Test: (200, 10)
 Running scikit-learn RandomForest... Done.
 Running LightGBM RandomForest... Done.

 Results:
  Method               Fit (s)    Predict (s)  Log Loss
  ------------------------------------------------------
  scikit-learn         0.144      0.012        0.2992
  LightGBM             0.348      0.001        0.5132
  ------------------------------------------------------
  Fit speedup: 0.41x (sklearn faster)
  Predict speedup: 10.73x (LightGBM faster)
  Log Loss: sklearn better by 0.2139

 ======================================================================
 Benchmark: Medium Numerical
  Medium-sized numerical dataset
  Samples: 10,000 | Features: 50 | Classes: 2 | Type: numerical
 ----------------------------------------------------------------------
 Generating data... Done. Train: (8000, 50), Test: (2000, 50)
 Running scikit-learn RandomForest... Done.
 Running LightGBM RandomForest... Done.

 Results:
  Method               Fit (s)    Predict (s)  Log Loss
  ------------------------------------------------------
  scikit-learn         0.465      0.014        0.3598
  LightGBM             0.573      0.003        0.6178
  ------------------------------------------------------
  Fit speedup: 0.81x (sklearn faster)
  Predict speedup: 4.20x (LightGBM faster)
  Log Loss: sklearn better by 0.2580

 ======================================================================
 Benchmark: Large Numerical
  Large numerical dataset
  Samples: 100,000 | Features: 50 | Classes: 2 | Type: numerical
 ----------------------------------------------------------------------
 Generating data... Done. Train: (80000, 50), Test: (20000, 50)
 Running scikit-learn RandomForest... Done.
 Running LightGBM RandomForest... Done.

 Results:
  Method               Fit (s)    Predict (s)  Log Loss
  ------------------------------------------------------
  scikit-learn         6.286      0.041        0.2667
  LightGBM             0.815      0.019        0.6066
  ------------------------------------------------------
  Fit speedup: 7.71x (LightGBM faster)
  Predict speedup: 2.17x (LightGBM faster)
  Log Loss: sklearn better by 0.3400

 ======================================================================
 Benchmark: Multi-class (5 classes)
  Multi-class classification problem
  Samples: 50,000 | Features: 30 | Classes: 5 | Type: numerical
 ----------------------------------------------------------------------
 Generating data... Done. Train: (40000, 30), Test: (10000, 30)
 Running scikit-learn RandomForest... Done.
 Running LightGBM RandomForest... Done.

 Results:
  Method               Fit (s)    Predict (s)  Log Loss
  ------------------------------------------------------
  scikit-learn         2.321      0.028        0.8599
  LightGBM             3.022      0.042        1.4571
  ------------------------------------------------------
  Fit speedup: 0.77x (sklearn faster)
  Predict speedup: 0.66x (sklearn faster)
  Log Loss: sklearn better by 0.5972

 ======================================================================
 Benchmark: Multi-class (10 classes)
  Multi-class classification with 10 classes
  Samples: 50,000 | Features: 30 | Classes: 10 | Type: numerical
 ----------------------------------------------------------------------
 Generating data... Done. Train: (40000, 30), Test: (10000, 30)
 Running scikit-learn RandomForest... Done.
 Running LightGBM RandomForest... Done.

 Results:
  Method               Fit (s)    Predict (s)  Log Loss
  ------------------------------------------------------
  scikit-learn         2.437      0.048        1.4693
  LightGBM             5.832      0.079        2.1385
  ------------------------------------------------------
  Fit speedup: 0.42x (sklearn faster)
  Predict speedup: 0.60x (sklearn faster)
  Log Loss: sklearn better by 0.6692

 ======================================================================
 Benchmark: Mixed Features
  Mixed numerical and categorical features
  Samples: 50,000 | Features: 40 | Classes: 2 | Type: mixed
  Categorical: 20 features, cardinality=10
 ----------------------------------------------------------------------
 Generating data... Done. Train: (40000, 40), Test: (10000, 40)
 Running scikit-learn RandomForest... Done.
 Running LightGBM RandomForest... Done.

 Results:
  Method               Fit (s)    Predict (s)  Log Loss
  ------------------------------------------------------
  scikit-learn         1.388      0.027        0.2516
  LightGBM             0.680      0.015        0.5941
  ------------------------------------------------------
  Fit speedup: 2.04x (LightGBM faster)
  Predict speedup: 1.76x (LightGBM faster)
  Log Loss: sklearn better by 0.3425

 ======================================================================
 Benchmark: High-Cardinality Categorical
  High-cardinality categorical features (LightGBM advantage)
  Samples: 100,000 | Features: 30 | Classes: 2 | Type: mixed
  Categorical: 15 features, cardinality=100
 ----------------------------------------------------------------------
 Generating data... Done. Train: (80000, 30), Test: (20000, 30)
 Running scikit-learn RandomForest... Done.
 Running LightGBM RandomForest... Done.

 Results:
  Method               Fit (s)    Predict (s)  Log Loss
  ------------------------------------------------------
  scikit-learn         3.098      0.041        0.1882
  LightGBM             0.843      0.027        0.6055
  ------------------------------------------------------
  Fit speedup: 3.67x (LightGBM faster)
  Predict speedup: 1.50x (LightGBM faster)
  Log Loss: sklearn better by 0.4174

 ======================================================================
 Benchmark: Pure Categorical
  All categorical features (LightGBM native support)
  Samples: 100,000 | Features: 50 | Classes: 2 | Type: categorical
  Categorical: 50 features, cardinality=50
 ----------------------------------------------------------------------
 Generating data... Done. Train: (80000, 50), Test: (20000, 50)
 Running scikit-learn RandomForest... Done.
 Running LightGBM RandomForest... Done.

 Results:
  Method               Fit (s)    Predict (s)  Log Loss
  ------------------------------------------------------
  scikit-learn         2.499      0.042        0.2766
  LightGBM             0.897      0.027        0.6145
  ------------------------------------------------------
  Fit speedup: 2.79x (LightGBM faster)
  Predict speedup: 1.60x (LightGBM faster)
  Log Loss: sklearn better by 0.3379

 ======================================================================
 Benchmark: LightGBM Optimal
  Large dataset with many high-cardinality categoricals (LightGBM's sweet spot)
  Samples: 500,000 | Features: 100 | Classes: 3 | Type: mixed
  Categorical: 60 features, cardinality=200
 ----------------------------------------------------------------------
 Generating data... Done. Train: (400000, 100), Test: (100000, 100)
 Running scikit-learn RandomForest... Done.
 Running LightGBM RandomForest... Done.

 Results:
  Method               Fit (s)    Predict (s)  Log Loss
  ------------------------------------------------------
  scikit-learn         56.499     0.384        0.3760
  LightGBM             4.810      0.257        1.0001
  ------------------------------------------------------
  Fit speedup: 11.75x (LightGBM faster)
  Predict speedup: 1.50x (LightGBM faster)
  Log Loss: sklearn better by 0.6242

 ======================================================================
 Benchmark: High-Dimensional Numerical
  High-dimensional numerical data
  Samples: 50,000 | Features: 200 | Classes: 2 | Type: numerical
 ----------------------------------------------------------------------
 Generating data... Done. Train: (40000, 200), Test: (10000, 200)
 Running scikit-learn RandomForest... Done.
 Running LightGBM RandomForest... Done.

 Results:
  Method               Fit (s)    Predict (s)  Log Loss
  ------------------------------------------------------
  scikit-learn         6.897      0.028        0.4134
  LightGBM             0.895      0.017        0.6599
  ------------------------------------------------------
  Fit speedup: 7.70x (LightGBM faster)
  Predict speedup: 1.60x (LightGBM faster)
  Log Loss: sklearn better by 0.2465

 ======================================================================
 Benchmark: Very Large Dataset
  Very large sample size test
  Samples: 500,000 | Features: 50 | Classes: 2 | Type: numerical
 ----------------------------------------------------------------------
 Generating data... Done. Train: (400000, 50), Test: (100000, 50)
 Running scikit-learn RandomForest... Done.
 Running LightGBM RandomForest... Done.

 Results:
  Method               Fit (s)    Predict (s)  Log Loss
  ------------------------------------------------------
  scikit-learn         46.691     0.313        0.2193
  LightGBM             1.726      0.078        0.6171
  ------------------------------------------------------
  Fit speedup: 27.05x (LightGBM faster)
  Predict speedup: 4.02x (LightGBM faster)
  Log Loss: sklearn better by 0.3978

 ================================================================================
 SUMMARY - TIMING
 ================================================================================

 Benchmark                      sklearn Fit  LightGBM Fit Speedup
 ----------------------------------------------------------------------
 Small Numerical                0.144        0.348        0.41x    ✓ SKL
 Medium Numerical               0.465        0.573        0.81x    ✓ SKL
 Large Numerical                6.286        0.815        7.71x    ✓ LGB
 Multi-class (5 classes)        2.321        3.022        0.77x    ✓ SKL
 Multi-class (10 classes)       2.437        5.832        0.42x    ✓ SKL
 Mixed Features                 1.388        0.680        2.04x    ✓ LGB
 High-Cardinality Categorical   3.098        0.843        3.67x    ✓ LGB
 Pure Categorical               2.499        0.897        2.79x    ✓ LGB
 LightGBM Optimal               56.499       4.810        11.75x   ✓ LGB
 High-Dimensional Numerical     6.897        0.895        7.70x    ✓ LGB
 Very Large Dataset             46.691       1.726        27.05x   ✓ LGB

 ----------------------------------------------------------------------
 Average fit speedup: 5.92x

 Best case for LightGBM: Very Large Dataset (27.05x faster)
 Best case for sklearn:  Small Numerical (2.42x faster)

 ================================================================================
 SUMMARY - PREDICTIVE PERFORMANCE (Log Loss, lower is better)
 ================================================================================

 Benchmark                      sklearn      LightGBM     Diff       Winner
 --------------------------------------------------------------------------------
 Small Numerical                0.2992       0.5132       -0.2139    sklearn
 Medium Numerical               0.3598       0.6178       -0.2580    sklearn
 Large Numerical                0.2667       0.6066       -0.3400    sklearn
 Multi-class (5 classes)        0.8599       1.4571       -0.5972    sklearn
 Multi-class (10 classes)       1.4693       2.1385       -0.6692    sklearn
 Mixed Features                 0.2516       0.5941       -0.3425    sklearn
 High-Cardinality Categorical   0.1882       0.6055       -0.4174    sklearn
 Pure Categorical               0.2766       0.6145       -0.3379    sklearn
 LightGBM Optimal               0.3760       1.0001       -0.6242    sklearn
 High-Dimensional Numerical     0.4134       0.6599       -0.2465    sklearn
 Very Large Dataset             0.2193       0.6171       -0.3978    sklearn

 --------------------------------------------------------------------------------
 Average Log Loss - sklearn: 0.4527, LightGBM: 0.8568
 Wins - sklearn: 11, LightGBM: 0
	"""
	Benchmark: scikit-learn RandomForest vs LightGBM RandomForest

	Compares performance across:
	- Number of samples (1K, 10K, 100K, 500K)
	- Number of features (10, 50, 200)
	- Feature types (numerical, categorical, mixed)
	- Number of classes (2, 5, 10)

	Includes cases optimized for LightGBM's strengths:
	- Native categorical handling (no one-hot encoding needed)
	- Large datasets with high-cardinality categorical features

	Evaluation metric: Log Loss (cross-entropy) on held-out test data
	- Lower is better
	- Measures both prediction quality and probability calibration
	"""

	import time
	import warnings
	from dataclasses import dataclass
	from typing import Literal

	import numpy as np
	import pandas as pd
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import log_loss
	from sklearn.model_selection import train_test_split

	try:
	import lightgbm as lgb
	except ImportError:
	raise ImportError("Please install lightgbm: pip install lightgbm")

	# Suppress warnings for cleaner output
	warnings.filterwarnings("ignore")

	N_JOBS = -1 # Use all cores
	N_ESTIMATORS = 100
	RANDOM_STATE = 42


	@dataclass
	class BenchmarkConfig:
	"""Configuration for a single benchmark run."""

	name: str
	n_samples: int
	n_features: int
	n_classes: int
	feature_type: Literal["numerical", "categorical", "mixed"]
	n_categorical_features: int = 0
	categorical_cardinality: int = 10
	description: str = ""


	def generate_data(config: BenchmarkConfig):
	"""Generate synthetic data based on configuration with train/test split."""
	rng = np.random.default_rng(RANDOM_STATE)

	n_numerical = config.n_features - config.n_categorical_features
	n_categorical = config.n_categorical_features

	# Generate numerical features
	if n_numerical > 0:
	X_num = rng.standard_normal((config.n_samples, n_numerical))
	else:
	X_num = np.empty((config.n_samples, 0))

	# Generate categorical features (as integers)
	if n_categorical > 0:
	X_cat = rng.integers(
	0, config.categorical_cardinality, size=(config.n_samples, n_categorical)
	)
	else:
	X_cat = np.empty((config.n_samples, 0), dtype=np.int64)

	# Combine features
	X = np.hstack([X_num, X_cat])

	# Generate target based on feature values (to create some signal)
	# Shuffle column order so all features have equal chance of contributing
	n_signal_features = min(5, config.n_features)
	shuffled_col_indices = rng.permutation(config.n_features)
	signal_col_indices = shuffled_col_indices[:n_signal_features]
	signal = X[:, signal_col_indices].sum(axis=1)
	y = pd.qcut(signal, q=config.n_classes, labels=False).astype(np.int32)

	# Track which columns are categorical
	categorical_indices = list(range(n_numerical, config.n_features))

	# Train/test split (80/20)
	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=RANDOM_STATE, stratify=y
	)

	return X_train, X_test, y_train, y_test, categorical_indices


	def benchmark_sklearn(X_train, X_test, y_train, y_test, categorical_indices, config: BenchmarkConfig):
	"""Benchmark scikit-learn RandomForestClassifier."""
	# sklearn needs categorical features encoded properly
	# For fair comparison, we use the data as-is (already ordinal encoded)

	clf = RandomForestClassifier(
	n_estimators=N_ESTIMATORS,
	n_jobs=N_JOBS,
	random_state=RANDOM_STATE,
	max_features="sqrt",
	)

	start = time.perf_counter()
	clf.fit(X_train, y_train)
	fit_time = time.perf_counter() - start

	start = time.perf_counter()
	y_proba = clf.predict_proba(X_test)
	predict_time = time.perf_counter() - start

	# Compute log loss on test set
	score = log_loss(y_test, y_proba)

	return fit_time, predict_time, score


	def benchmark_lightgbm(X_train, X_test, y_train, y_test, categorical_indices, config: BenchmarkConfig):
	"""Benchmark LightGBM in RandomForest mode."""
	# LightGBM RF parameters
	params = {
	"boosting_type": "rf",
	"objective": "multiclass" if config.n_classes > 2 else "binary",
	"num_class": config.n_classes if config.n_classes > 2 else 1,
	"n_estimators": N_ESTIMATORS,
	"bagging_freq": 1,
	"bagging_fraction": 0.8,
	"feature_fraction": np.sqrt(config.n_features) / config.n_features,
	"n_jobs": N_JOBS,
	"random_state": RANDOM_STATE,
	"verbose": -1,
	}

	# Create DataFrame with proper categorical dtypes for LightGBM
	if categorical_indices:
	df_train = pd.DataFrame(X_train)
	df_test = pd.DataFrame(X_test)
	for idx in categorical_indices:
	df_train[idx] = df_train[idx].astype("category")
	df_test[idx] = df_test[idx].astype("category")
	X_lgb_train = df_train
	X_lgb_test = df_test
	else:
	X_lgb_train = X_train
	X_lgb_test = X_test

	clf = lgb.LGBMClassifier(**params)

	start = time.perf_counter()
	clf.fit(X_lgb_train, y_train, categorical_feature=categorical_indices if categorical_indices else "auto")
	fit_time = time.perf_counter() - start

	start = time.perf_counter()
	y_proba = clf.predict_proba(X_lgb_test)
	predict_time = time.perf_counter() - start

	# Compute log loss on test set
	score = log_loss(y_test, y_proba)

	return fit_time, predict_time, score


	def run_benchmark(config: BenchmarkConfig):
	"""Run a single benchmark configuration."""
	print(f"\n{'=' * 70}")
	print(f"Benchmark: {config.name}")
	print(f" {config.description}")
	print(f" Samples: {config.n_samples:,} \| Features: {config.n_features} "
	f"\| Classes: {config.n_classes} \| Type: {config.feature_type}")
	if config.n_categorical_features > 0:
	print(f" Categorical: {config.n_categorical_features} features, "
	f"cardinality={config.categorical_cardinality}")
	print("-" * 70)

	# Generate data with train/test split
	print("Generating data...", end=" ", flush=True)
	X_train, X_test, y_train, y_test, categorical_indices = generate_data(config)
	print(f"Done. Train: {X_train.shape}, Test: {X_test.shape}")

	# Benchmark sklearn
	print("Running scikit-learn RandomForest...", end=" ", flush=True)
	sklearn_fit, sklearn_pred, sklearn_score = benchmark_sklearn(
	X_train, X_test, y_train, y_test, categorical_indices, config
	)
	print(f"Done.")

	# Benchmark LightGBM
	print("Running LightGBM RandomForest...", end=" ", flush=True)
	lgb_fit, lgb_pred, lgb_score = benchmark_lightgbm(
	X_train, X_test, y_train, y_test, categorical_indices, config
	)
	print(f"Done.")

	# Results
	print("\nResults:")
	print(f" {'Method':<20} {'Fit (s)':<10} {'Predict (s)':<12} {'Log Loss':<12}")
	print(f" {'-' * 54}")
	print(f" {'scikit-learn':<20} {sklearn_fit:<10.3f} {sklearn_pred:<12.3f} {sklearn_score:<12.4f}")
	print(f" {'LightGBM':<20} {lgb_fit:<10.3f} {lgb_pred:<12.3f} {lgb_score:<12.4f}")
	print(f" {'-' * 54}")

	speedup_fit = sklearn_fit / lgb_fit if lgb_fit > 0 else float("inf")
	speedup_pred = sklearn_pred / lgb_pred if lgb_pred > 0 else float("inf")
	winner_fit = "LightGBM" if speedup_fit > 1 else "sklearn"
	winner_pred = "LightGBM" if speedup_pred > 1 else "sklearn"
	# Lower log loss is better
	winner_score = "sklearn" if sklearn_score < lgb_score else "LightGBM"
	score_diff = abs(sklearn_score - lgb_score)

	print(f" Fit speedup: {speedup_fit:.2f}x ({winner_fit} faster)")
	print(f" Predict speedup: {speedup_pred:.2f}x ({winner_pred} faster)")
	print(f" Log Loss: {winner_score} better by {score_diff:.4f}")

	return {
	"name": config.name,
	"n_samples": config.n_samples,
	"n_features": config.n_features,
	"n_classes": config.n_classes,
	"feature_type": config.feature_type,
	"sklearn_fit": sklearn_fit,
	"sklearn_pred": sklearn_pred,
	"sklearn_score": sklearn_score,
	"lgb_fit": lgb_fit,
	"lgb_pred": lgb_pred,
	"lgb_score": lgb_score,
	"speedup_fit": speedup_fit,
	"speedup_pred": speedup_pred,
	}


	def main():
	print("=" * 70)
	print("RandomForest Benchmark: scikit-learn vs LightGBM")
	print(f"Configuration: {N_ESTIMATORS} trees, using all available cores")
	print("=" * 70)

	# Define benchmark configurations
	configs = [
	# Small baseline - should run in a few seconds
	BenchmarkConfig(
	name="Small Numerical",
	n_samples=1_000,
	n_features=10,
	n_classes=2,
	feature_type="numerical",
	description="Quick baseline test with small numerical data",
	),
	# Medium numerical - tests scaling
	BenchmarkConfig(
	name="Medium Numerical",
	n_samples=10_000,
	n_features=50,
	n_classes=2,
	feature_type="numerical",
	description="Medium-sized numerical dataset",
	),
	# Large numerical - more substantial test
	BenchmarkConfig(
	name="Large Numerical",
	n_samples=100_000,
	n_features=50,
	n_classes=2,
	feature_type="numerical",
	description="Large numerical dataset",
	),
	# Multi-class classification
	BenchmarkConfig(
	name="Multi-class (5 classes)",
	n_samples=50_000,
	n_features=30,
	n_classes=5,
	feature_type="numerical",
	description="Multi-class classification problem",
	),
	# Multi-class with more classes
	BenchmarkConfig(
	name="Multi-class (10 classes)",
	n_samples=50_000,
	n_features=30,
	n_classes=10,
	feature_type="numerical",
	description="Multi-class classification with 10 classes",
	),
	# Mixed features - moderate categorical cardinality
	BenchmarkConfig(
	name="Mixed Features",
	n_samples=50_000,
	n_features=40,
	n_classes=2,
	feature_type="mixed",
	n_categorical_features=20,
	categorical_cardinality=10,
	description="Mixed numerical and categorical features",
	),
	# High-cardinality categorical (LightGBM strength)
	BenchmarkConfig(
	name="High-Cardinality Categorical",
	n_samples=100_000,
	n_features=30,
	n_classes=2,
	feature_type="mixed",
	n_categorical_features=15,
	categorical_cardinality=100,
	description="High-cardinality categorical features (LightGBM advantage)",
	),
	# Pure categorical (LightGBM strength)
	BenchmarkConfig(
	name="Pure Categorical",
	n_samples=100_000,
	n_features=50,
	n_classes=2,
	feature_type="categorical",
	n_categorical_features=50,
	categorical_cardinality=50,
	description="All categorical features (LightGBM native support)",
	),
	# LightGBM optimal case: Large dataset, many high-cardinality categoricals
	BenchmarkConfig(
	name="LightGBM Optimal",
	n_samples=500_000,
	n_features=100,
	n_classes=3,
	feature_type="mixed",
	n_categorical_features=60,
	categorical_cardinality=200,
	description="Large dataset with many high-cardinality categoricals (LightGBM's sweet spot)",
	),
	# High-dimensional numerical
	BenchmarkConfig(
	name="High-Dimensional Numerical",
	n_samples=50_000,
	n_features=200,
	n_classes=2,
	feature_type="numerical",
	description="High-dimensional numerical data",
	),
	# Very large samples
	BenchmarkConfig(
	name="Very Large Dataset",
	n_samples=500_000,
	n_features=50,
	n_classes=2,
	feature_type="numerical",
	description="Very large sample size test",
	),
	]

	# Run all benchmarks
	results = []
	for config in configs:
	result = run_benchmark(config)
	results.append(result)

	# Summary table - Timing
	print("\n" + "=" * 80)
	print("SUMMARY - TIMING")
	print("=" * 80)
	print(f"\n{'Benchmark':<30} {'sklearn Fit':<12} {'LightGBM Fit':<12} {'Speedup':<10}")
	print("-" * 70)
	for r in results:
	speedup_str = f"{r['speedup_fit']:.2f}x"
	winner = "✓ LGB" if r['speedup_fit'] > 1 else "✓ SKL"
	print(f"{r['name']:<30} {r['sklearn_fit']:<12.3f} {r['lgb_fit']:<12.3f} {speedup_str:<8} {winner}")

	print("\n" + "-" * 70)
	avg_speedup = np.mean([r['speedup_fit'] for r in results])
	print(f"Average fit speedup: {avg_speedup:.2f}x")

	# Find best cases for each (timing)
	best_for_lgb = max(results, key=lambda r: r['speedup_fit'])
	best_for_sklearn = min(results, key=lambda r: r['speedup_fit'])

	print(f"\nBest case for LightGBM: {best_for_lgb['name']} ({best_for_lgb['speedup_fit']:.2f}x faster)")
	print(f"Best case for sklearn: {best_for_sklearn['name']} ({1/best_for_sklearn['speedup_fit']:.2f}x faster)")

	# Summary table - Predictive Performance (Log Loss)
	print("\n" + "=" * 80)
	print("SUMMARY - PREDICTIVE PERFORMANCE (Log Loss, lower is better)")
	print("=" * 80)
	print(f"\n{'Benchmark':<30} {'sklearn':<12} {'LightGBM':<12} {'Diff':<10} {'Winner':<8}")
	print("-" * 80)
	sklearn_wins = 0
	lgb_wins = 0
	for r in results:
	diff = r['sklearn_score'] - r['lgb_score']
	if r['sklearn_score'] < r['lgb_score']:
	winner = "sklearn"
	sklearn_wins += 1
	elif r['lgb_score'] < r['sklearn_score']:
	winner = "LightGBM"
	lgb_wins += 1
	else:
	winner = "tie"
	print(f"{r['name']:<30} {r['sklearn_score']:<12.4f} {r['lgb_score']:<12.4f} {diff:+.4f} {winner}")

	print("\n" + "-" * 80)
	avg_sklearn_score = np.mean([r['sklearn_score'] for r in results])
	avg_lgb_score = np.mean([r['lgb_score'] for r in results])
	print(f"Average Log Loss - sklearn: {avg_sklearn_score:.4f}, LightGBM: {avg_lgb_score:.4f}")
	print(f"Wins - sklearn: {sklearn_wins}, LightGBM: {lgb_wins}")


	if __name__ == "__main__":
	main()
	======================================================================
	RandomForest Benchmark: scikit-learn vs LightGBM
	Configuration: 100 trees, using all available cores
	======================================================================

	======================================================================
	Benchmark: Small Numerical
	Quick baseline test with small numerical data
	Samples: 1,000 \| Features: 10 \| Classes: 2 \| Type: numerical
	----------------------------------------------------------------------
	Generating data... Done. Train: (800, 10), Test: (200, 10)
	Running scikit-learn RandomForest... Done.
	Running LightGBM RandomForest... Done.

	Results:
	Method Fit (s) Predict (s) Log Loss
	------------------------------------------------------
	scikit-learn 0.144 0.012 0.2992
	LightGBM 0.348 0.001 0.5132
	------------------------------------------------------
	Fit speedup: 0.41x (sklearn faster)
	Predict speedup: 10.73x (LightGBM faster)
	Log Loss: sklearn better by 0.2139

	======================================================================
	Benchmark: Medium Numerical
	Medium-sized numerical dataset
	Samples: 10,000 \| Features: 50 \| Classes: 2 \| Type: numerical
	----------------------------------------------------------------------
	Generating data... Done. Train: (8000, 50), Test: (2000, 50)
	Running scikit-learn RandomForest... Done.
	Running LightGBM RandomForest... Done.

	Results:
	Method Fit (s) Predict (s) Log Loss
	------------------------------------------------------
	scikit-learn 0.465 0.014 0.3598
	LightGBM 0.573 0.003 0.6178
	------------------------------------------------------
	Fit speedup: 0.81x (sklearn faster)
	Predict speedup: 4.20x (LightGBM faster)
	Log Loss: sklearn better by 0.2580

	======================================================================
	Benchmark: Large Numerical
	Large numerical dataset
	Samples: 100,000 \| Features: 50 \| Classes: 2 \| Type: numerical
	----------------------------------------------------------------------
	Generating data... Done. Train: (80000, 50), Test: (20000, 50)
	Running scikit-learn RandomForest... Done.
	Running LightGBM RandomForest... Done.

	Results:
	Method Fit (s) Predict (s) Log Loss
	------------------------------------------------------
	scikit-learn 6.286 0.041 0.2667
	LightGBM 0.815 0.019 0.6066
	------------------------------------------------------
	Fit speedup: 7.71x (LightGBM faster)
	Predict speedup: 2.17x (LightGBM faster)
	Log Loss: sklearn better by 0.3400

	======================================================================
	Benchmark: Multi-class (5 classes)
	Multi-class classification problem
	Samples: 50,000 \| Features: 30 \| Classes: 5 \| Type: numerical
	----------------------------------------------------------------------
	Generating data... Done. Train: (40000, 30), Test: (10000, 30)
	Running scikit-learn RandomForest... Done.
	Running LightGBM RandomForest... Done.

	Results:
	Method Fit (s) Predict (s) Log Loss
	------------------------------------------------------
	scikit-learn 2.321 0.028 0.8599
	LightGBM 3.022 0.042 1.4571
	------------------------------------------------------
	Fit speedup: 0.77x (sklearn faster)
	Predict speedup: 0.66x (sklearn faster)
	Log Loss: sklearn better by 0.5972

	======================================================================
	Benchmark: Multi-class (10 classes)
	Multi-class classification with 10 classes
	Samples: 50,000 \| Features: 30 \| Classes: 10 \| Type: numerical
	----------------------------------------------------------------------
	Generating data... Done. Train: (40000, 30), Test: (10000, 30)
	Running scikit-learn RandomForest... Done.
	Running LightGBM RandomForest... Done.

	Results:
	Method Fit (s) Predict (s) Log Loss
	------------------------------------------------------
	scikit-learn 2.437 0.048 1.4693
	LightGBM 5.832 0.079 2.1385
	------------------------------------------------------
	Fit speedup: 0.42x (sklearn faster)
	Predict speedup: 0.60x (sklearn faster)
	Log Loss: sklearn better by 0.6692

	======================================================================
	Benchmark: Mixed Features
	Mixed numerical and categorical features
	Samples: 50,000 \| Features: 40 \| Classes: 2 \| Type: mixed
	Categorical: 20 features, cardinality=10
	----------------------------------------------------------------------
	Generating data... Done. Train: (40000, 40), Test: (10000, 40)
	Running scikit-learn RandomForest... Done.
	Running LightGBM RandomForest... Done.

	Results:
	Method Fit (s) Predict (s) Log Loss
	------------------------------------------------------
	scikit-learn 1.388 0.027 0.2516
	LightGBM 0.680 0.015 0.5941
	------------------------------------------------------
	Fit speedup: 2.04x (LightGBM faster)
	Predict speedup: 1.76x (LightGBM faster)
	Log Loss: sklearn better by 0.3425

	======================================================================
	Benchmark: High-Cardinality Categorical
	High-cardinality categorical features (LightGBM advantage)
	Samples: 100,000 \| Features: 30 \| Classes: 2 \| Type: mixed
	Categorical: 15 features, cardinality=100
	----------------------------------------------------------------------
	Generating data... Done. Train: (80000, 30), Test: (20000, 30)
	Running scikit-learn RandomForest... Done.
	Running LightGBM RandomForest... Done.

	Results:
	Method Fit (s) Predict (s) Log Loss
	------------------------------------------------------
	scikit-learn 3.098 0.041 0.1882
	LightGBM 0.843 0.027 0.6055
	------------------------------------------------------
	Fit speedup: 3.67x (LightGBM faster)
	Predict speedup: 1.50x (LightGBM faster)
	Log Loss: sklearn better by 0.4174

	======================================================================
	Benchmark: Pure Categorical
	All categorical features (LightGBM native support)
	Samples: 100,000 \| Features: 50 \| Classes: 2 \| Type: categorical
	Categorical: 50 features, cardinality=50
	----------------------------------------------------------------------
	Generating data... Done. Train: (80000, 50), Test: (20000, 50)
	Running scikit-learn RandomForest... Done.
	Running LightGBM RandomForest... Done.

	Results:
	Method Fit (s) Predict (s) Log Loss
	------------------------------------------------------
	scikit-learn 2.499 0.042 0.2766
	LightGBM 0.897 0.027 0.6145
	------------------------------------------------------
	Fit speedup: 2.79x (LightGBM faster)
	Predict speedup: 1.60x (LightGBM faster)
	Log Loss: sklearn better by 0.3379

	======================================================================
	Benchmark: LightGBM Optimal
	Large dataset with many high-cardinality categoricals (LightGBM's sweet spot)
	Samples: 500,000 \| Features: 100 \| Classes: 3 \| Type: mixed
	Categorical: 60 features, cardinality=200
	----------------------------------------------------------------------
	Generating data... Done. Train: (400000, 100), Test: (100000, 100)
	Running scikit-learn RandomForest... Done.
	Running LightGBM RandomForest... Done.

	Results:
	Method Fit (s) Predict (s) Log Loss
	------------------------------------------------------
	scikit-learn 56.499 0.384 0.3760
	LightGBM 4.810 0.257 1.0001
	------------------------------------------------------
	Fit speedup: 11.75x (LightGBM faster)
	Predict speedup: 1.50x (LightGBM faster)
	Log Loss: sklearn better by 0.6242

	======================================================================
	Benchmark: High-Dimensional Numerical
	High-dimensional numerical data
	Samples: 50,000 \| Features: 200 \| Classes: 2 \| Type: numerical
	----------------------------------------------------------------------
	Generating data... Done. Train: (40000, 200), Test: (10000, 200)
	Running scikit-learn RandomForest... Done.
	Running LightGBM RandomForest... Done.

	Results:
	Method Fit (s) Predict (s) Log Loss
	------------------------------------------------------
	scikit-learn 6.897 0.028 0.4134
	LightGBM 0.895 0.017 0.6599
	------------------------------------------------------
	Fit speedup: 7.70x (LightGBM faster)
	Predict speedup: 1.60x (LightGBM faster)
	Log Loss: sklearn better by 0.2465

	======================================================================
	Benchmark: Very Large Dataset
	Very large sample size test
	Samples: 500,000 \| Features: 50 \| Classes: 2 \| Type: numerical
	----------------------------------------------------------------------
	Generating data... Done. Train: (400000, 50), Test: (100000, 50)
	Running scikit-learn RandomForest... Done.
	Running LightGBM RandomForest... Done.

	Results:
	Method Fit (s) Predict (s) Log Loss
	------------------------------------------------------
	scikit-learn 46.691 0.313 0.2193
	LightGBM 1.726 0.078 0.6171
	------------------------------------------------------
	Fit speedup: 27.05x (LightGBM faster)
	Predict speedup: 4.02x (LightGBM faster)
	Log Loss: sklearn better by 0.3978

	================================================================================
	SUMMARY - TIMING
	================================================================================

	Benchmark sklearn Fit LightGBM Fit Speedup
	----------------------------------------------------------------------
	Small Numerical 0.144 0.348 0.41x ✓ SKL
	Medium Numerical 0.465 0.573 0.81x ✓ SKL
	Large Numerical 6.286 0.815 7.71x ✓ LGB
	Multi-class (5 classes) 2.321 3.022 0.77x ✓ SKL
	Multi-class (10 classes) 2.437 5.832 0.42x ✓ SKL
	Mixed Features 1.388 0.680 2.04x ✓ LGB
	High-Cardinality Categorical 3.098 0.843 3.67x ✓ LGB
	Pure Categorical 2.499 0.897 2.79x ✓ LGB
	LightGBM Optimal 56.499 4.810 11.75x ✓ LGB
	High-Dimensional Numerical 6.897 0.895 7.70x ✓ LGB
	Very Large Dataset 46.691 1.726 27.05x ✓ LGB

	----------------------------------------------------------------------
	Average fit speedup: 5.92x

	Best case for LightGBM: Very Large Dataset (27.05x faster)
	Best case for sklearn: Small Numerical (2.42x faster)

	================================================================================
	SUMMARY - PREDICTIVE PERFORMANCE (Log Loss, lower is better)
	================================================================================

	Benchmark sklearn LightGBM Diff Winner
	--------------------------------------------------------------------------------
	Small Numerical 0.2992 0.5132 -0.2139 sklearn
	Medium Numerical 0.3598 0.6178 -0.2580 sklearn
	Large Numerical 0.2667 0.6066 -0.3400 sklearn
	Multi-class (5 classes) 0.8599 1.4571 -0.5972 sklearn
	Multi-class (10 classes) 1.4693 2.1385 -0.6692 sklearn
	Mixed Features 0.2516 0.5941 -0.3425 sklearn
	High-Cardinality Categorical 0.1882 0.6055 -0.4174 sklearn
	Pure Categorical 0.2766 0.6145 -0.3379 sklearn
	LightGBM Optimal 0.3760 1.0001 -0.6242 sklearn
	High-Dimensional Numerical 0.4134 0.6599 -0.2465 sklearn
	Very Large Dataset 0.2193 0.6171 -0.3978 sklearn

	--------------------------------------------------------------------------------
	Average Log Loss - sklearn: 0.4527, LightGBM: 0.8568
	Wins - sklearn: 11, LightGBM: 0