Last active
November 23, 2025 02:59
-
-
Save patcon/7d8e0c153699d46073ec105a954f7e51 to your computer and use it in GitHub Desktop.
My dream kedro config for generating pipelines
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Set per-run | |
| polis_url: "${runtime_params:polis_url, null}" | |
| # Overall | |
| n_components: 2 | |
| min_votes_threshold: 7 | |
| # Plots | |
| flip_x: false | |
| flip_y: false | |
| # Control whether to generate PNG images, beyond preview's JSON plot data | |
| # (false by default to save significant time) | |
| generate_plot_images: false | |
| # If the name of a definition looks like a template (e.g., "foo_{some_param}_bar"), | |
| # then that named parameter will be expanded into a few definitions. | |
| # (e.g., `some_param: [1, 2]` will expand to two definitions "foo_1_bar" and "foo_2_bar") | |
| definitions: | |
| preprocessors: | |
| - name: "{percent_vote_cutoff}pct" | |
| percent_vote_cutoff: [025, 050, 075, 100] | |
| imputers: | |
| - name: mean | |
| estimator: SimpleImputer | |
| params: | |
| strategy: mean | |
| - name: zero | |
| estimator: SimpleImputer | |
| params: | |
| strategy: constant | |
| fill_value: 0 | |
| - name: median | |
| estimator: SimpleImputer | |
| params: | |
| strategy: median | |
| - name: mode | |
| estimator: SimpleImputer | |
| params: | |
| strategy: most_frequent | |
| - name: "knn{n_neighbors}d" | |
| estimator: KNNImputer | |
| params: | |
| n_neighbors: [5, 10] | |
| weights: distance | |
| - name: "knn{n_neighbors}u" | |
| estimator: KNNImputer | |
| params: | |
| n_neighbors: [5, 10] | |
| weights: uniform | |
| - name: noop | |
| estimator: NoOpTransformer | |
| reducers: | |
| - name: pca | |
| estimator: PCA | |
| params: | |
| n_components: ${params:n_components} | |
| random_state: ${globals:random_state} | |
| scaler: | |
| estimator: SparsityAwareScaler | |
| X_sparse: "input:masked_vote_matrix" | |
| - name: pca_no_scale | |
| estimator: PCA | |
| params: | |
| n_components: ${params:n_components} | |
| random_state: ${globals:random_state} | |
| - name: pacmap | |
| estimator: PaCMAP | |
| params: | |
| n_components: ${params:n_components} | |
| n_neighbors: null | |
| random_state: ${globals:random_state} | |
| - name: pacmap_masked | |
| estimator: PaCMAPWithMaskedDistance | |
| params: | |
| n_components: ${params:n_components} | |
| n_neighbors: null | |
| random_state: ${globals:random_state} | |
| - name: localmap | |
| estimator: LocalMAP | |
| n_components: ${params:n_components} | |
| n_neighbors: null | |
| random_state: ${globals:random_state} | |
| - name: umap | |
| estimator: UMAP | |
| n_components: ${params:n_components} | |
| n_neighbors: 15 | |
| random_state: ${globals:random_state} | |
| clusterers: | |
| - name: bestkmeans | |
| estimator: BestKMeans | |
| params: | |
| k_bounds: [2, 5] | |
| random_state: ${globals:random_state} | |
| - name: besthdbscan | |
| estimator: BestHDBSCANFlat | |
| params: | |
| # Uncommenting gets around occasional error in HDBSCAN_flat | |
| # cluster_selection_method: leaf | |
| k_bounds: [2, 10] | |
| random_state: ${globals:random_state} | |
| - name: hdbscan_eom | |
| estimator: HDBSCAN | |
| params: | |
| cluster_selection_method: eom | |
| - name: hdbscan_leaf | |
| estimator: HDBSCAN | |
| params: | |
| cluster_selection_method: leaf | |
| pipeline_variants: | |
| main_matrix: | |
| active: true | |
| name: "{imputer}_{reducer}_{clusterer}" | |
| preprocessor: 100pct | |
| imputers: [mean, zero, median, knn5d, knn10d, knn5u, knn10u] | |
| reducers: [pca, pacmap, localmap] | |
| clusterers: [bestkmeans, besthdbscan, hdbscan_eom, hdbscan_leaf] | |
| timescale: | |
| active: true | |
| name: "knn5d_pacmap_bestkmeans_{preprocessor}" | |
| preprocessor: [025pct, 050pct, 075pct] | |
| imputer: knn5d | |
| reducer: pacmap | |
| clusterer: bestkmeans | |
| zero_pacmap_masked_bestkmeans: | |
| active: true | |
| name: zero_pacmap_masked_bestkmeans | |
| preprocessor: 100pct | |
| # imputation happens within the custom PaCMAP estimator | |
| imputer: noop | |
| reducer: pacmap_masked | |
| clusterer: bestkmeans | |
| mean_pca_bestkmeans_not_sparsity_aware: | |
| active: true | |
| preprocessor: 100pct | |
| name: mean_pca_bestkmeans_not_sparsity_aware | |
| imputer: mean | |
| reducer: pca_no_scale | |
| clusterer: bestkmeans |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment