patcon · November 23, 2025 02:59
diff --git a/parameters.yml b/parameters.yml
 # Set per-run
 polis_url: "${runtime_params:polis_url, null}"

 # Overall
 n_components: 2
 min_votes_threshold: 7

 # Plots
 flip_x: false
 flip_y: false
 # Control whether to generate PNG images, beyond preview's JSON plot data
 # (false by default to save significant time)
 generate_plot_images: false

 # If the name of a definition looks like a template (e.g., "foo_{some_param}_bar"),
 # then that named parameter will be expanded into a few definitions.
 # (e.g., `some_param: [1, 2]` will expand to two definitions "foo_1_bar" and "foo_2_bar")

 definitions:
  preprocessors:
    - name: "{percent_vote_cutoff}pct"
      percent_vote_cutoff: [025, 050, 075, 100]

  imputers:
    - name: mean
      estimator: SimpleImputer
      params:
        strategy: mean

    - name: zero
      estimator: SimpleImputer
      params:
        strategy: constant
        fill_value: 0

    - name: median
      estimator: SimpleImputer
      params:
        strategy: median

    - name: mode
      estimator: SimpleImputer
      params:
        strategy: most_frequent

    - name: "knn{n_neighbors}d"
      estimator: KNNImputer
      params:
        n_neighbors: [5, 10]
        weights: distance

    - name: "knn{n_neighbors}u"
      estimator: KNNImputer
      params:
        n_neighbors: [5, 10]
        weights: uniform

    - name: noop
      estimator: NoOpTransformer

  reducers:
    - name: pca
      estimator: PCA
      params:
        n_components: ${params:n_components}
        random_state: ${globals:random_state}
      scaler:
        estimator: SparsityAwareScaler
        X_sparse: "input:masked_vote_matrix"

    - name: pca_no_scale
      estimator: PCA
      params:
        n_components: ${params:n_components}
        random_state: ${globals:random_state}

    - name: pacmap
      estimator: PaCMAP
      params:
        n_components: ${params:n_components}
        n_neighbors: null
        random_state: ${globals:random_state}

    - name: pacmap_masked
      estimator: PaCMAPWithMaskedDistance
      params:
        n_components: ${params:n_components}
        n_neighbors: null
        random_state: ${globals:random_state}

    - name: localmap
      estimator: LocalMAP
      n_components: ${params:n_components}
      n_neighbors: null
      random_state: ${globals:random_state}

    - name: umap
      estimator: UMAP
      n_components: ${params:n_components}
      n_neighbors: 15
      random_state: ${globals:random_state}

  clusterers:
    - name: bestkmeans
      estimator: BestKMeans
      params:
        k_bounds: [2, 5]
        random_state: ${globals:random_state}

    - name: besthdbscan
      estimator: BestHDBSCANFlat
      params:
        # Uncommenting gets around occasional error in HDBSCAN_flat
        # cluster_selection_method: leaf
        k_bounds: [2, 10]
        random_state: ${globals:random_state}

    - name: hdbscan_eom
      estimator: HDBSCAN
      params:
        cluster_selection_method: eom

    - name: hdbscan_leaf
      estimator: HDBSCAN
      params:
        cluster_selection_method: leaf

 pipeline_variants:
  main_matrix:
    active: true
    name: "{imputer}_{reducer}_{clusterer}"
    preprocessor: 100pct
    imputers: [mean, zero, median, knn5d, knn10d, knn5u, knn10u]
    reducers: [pca, pacmap, localmap]
    clusterers: [bestkmeans, besthdbscan, hdbscan_eom, hdbscan_leaf]
  
  timescale:
    active: true
    name: "knn5d_pacmap_bestkmeans_{preprocessor}"
    preprocessor: [025pct, 050pct, 075pct]
    imputer: knn5d
    reducer: pacmap
    clusterer: bestkmeans
    
  zero_pacmap_masked_bestkmeans:
    active: true
    name: zero_pacmap_masked_bestkmeans
    preprocessor: 100pct
    # imputation happens within the custom PaCMAP estimator
    imputer: noop
    reducer: pacmap_masked
    clusterer: bestkmeans

  mean_pca_bestkmeans_not_sparsity_aware:
    active: true
    preprocessor: 100pct
    name: mean_pca_bestkmeans_not_sparsity_aware
    imputer: mean
    reducer: pca_no_scale
    clusterer: bestkmeans
	# Set per-run
	polis_url: "${runtime_params:polis_url, null}"

	# Overall
	n_components: 2
	min_votes_threshold: 7

	# Plots
	flip_x: false
	flip_y: false
	# Control whether to generate PNG images, beyond preview's JSON plot data
	# (false by default to save significant time)
	generate_plot_images: false

	# If the name of a definition looks like a template (e.g., "foo_{some_param}_bar"),
	# then that named parameter will be expanded into a few definitions.
	# (e.g., `some_param: [1, 2]` will expand to two definitions "foo_1_bar" and "foo_2_bar")

	definitions:
	preprocessors:
	- name: "{percent_vote_cutoff}pct"
	percent_vote_cutoff: [025, 050, 075, 100]

	imputers:
	- name: mean
	estimator: SimpleImputer
	params:
	strategy: mean

	- name: zero
	estimator: SimpleImputer
	params:
	strategy: constant
	fill_value: 0

	- name: median
	estimator: SimpleImputer
	params:
	strategy: median

	- name: mode
	estimator: SimpleImputer
	params:
	strategy: most_frequent

	- name: "knn{n_neighbors}d"
	estimator: KNNImputer
	params:
	n_neighbors: [5, 10]
	weights: distance

	- name: "knn{n_neighbors}u"
	estimator: KNNImputer
	params:
	n_neighbors: [5, 10]
	weights: uniform

	- name: noop
	estimator: NoOpTransformer

	reducers:
	- name: pca
	estimator: PCA
	params:
	n_components: ${params:n_components}
	random_state: ${globals:random_state}
	scaler:
	estimator: SparsityAwareScaler
	X_sparse: "input:masked_vote_matrix"

	- name: pca_no_scale
	estimator: PCA
	params:
	n_components: ${params:n_components}
	random_state: ${globals:random_state}

	- name: pacmap
	estimator: PaCMAP
	params:
	n_components: ${params:n_components}
	n_neighbors: null
	random_state: ${globals:random_state}

	- name: pacmap_masked
	estimator: PaCMAPWithMaskedDistance
	params:
	n_components: ${params:n_components}
	n_neighbors: null
	random_state: ${globals:random_state}

	- name: localmap
	estimator: LocalMAP
	n_components: ${params:n_components}
	n_neighbors: null
	random_state: ${globals:random_state}

	- name: umap
	estimator: UMAP
	n_components: ${params:n_components}
	n_neighbors: 15
	random_state: ${globals:random_state}

	clusterers:
	- name: bestkmeans
	estimator: BestKMeans
	params:
	k_bounds: [2, 5]
	random_state: ${globals:random_state}

	- name: besthdbscan
	estimator: BestHDBSCANFlat
	params:
	# Uncommenting gets around occasional error in HDBSCAN_flat
	# cluster_selection_method: leaf
	k_bounds: [2, 10]
	random_state: ${globals:random_state}

	- name: hdbscan_eom
	estimator: HDBSCAN
	params:
	cluster_selection_method: eom

	- name: hdbscan_leaf
	estimator: HDBSCAN
	params:
	cluster_selection_method: leaf

	pipeline_variants:
	main_matrix:
	active: true
	name: "{imputer}_{reducer}_{clusterer}"
	preprocessor: 100pct
	imputers: [mean, zero, median, knn5d, knn10d, knn5u, knn10u]
	reducers: [pca, pacmap, localmap]
	clusterers: [bestkmeans, besthdbscan, hdbscan_eom, hdbscan_leaf]

	timescale:
	active: true
	name: "knn5d_pacmap_bestkmeans_{preprocessor}"
	preprocessor: [025pct, 050pct, 075pct]
	imputer: knn5d
	reducer: pacmap
	clusterer: bestkmeans

	zero_pacmap_masked_bestkmeans:
	active: true
	name: zero_pacmap_masked_bestkmeans
	preprocessor: 100pct
	# imputation happens within the custom PaCMAP estimator
	imputer: noop
	reducer: pacmap_masked
	clusterer: bestkmeans

	mean_pca_bestkmeans_not_sparsity_aware:
	active: true
	preprocessor: 100pct
	name: mean_pca_bestkmeans_not_sparsity_aware
	imputer: mean
	reducer: pca_no_scale
	clusterer: bestkmeans
No results found