Skip to content

Instantly share code, notes, and snippets.

@benmoss
Last active June 26, 2025 15:22
Show Gist options
  • Save benmoss/7386f91a117198791a1d9ef081b9e50e to your computer and use it in GitHub Desktop.
Save benmoss/7386f91a117198791a1d9ef081b9e50e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Script to minimize explicit package requirements in Pixi environments.
This script analyzes each environment defined in pixi.toml, identifies packages
that are both explicitly required and required as dependencies of other packages,
and produces a minimal list of explicitly required packages.
Generated by Copilot, feel free to modify/burn this code with fire.
"""
import re
import subprocess
import sys
import tomllib
from pathlib import Path
from typing import Dict, Iterable, List, Set, Tuple
def load_pixi_toml(file_path) -> dict:
"""Load and parse a pixi.toml file."""
with open(file_path, "rb") as f:
return tomllib.load(f)
def get_environments(toml_data: dict) -> Dict[str, List[str]]:
"""Extract environments and their features from pixi.toml."""
environments = {}
if "environments" in toml_data:
for env_name, features in toml_data["environments"].items():
# Handle different ways environments can be defined
if isinstance(features, list):
environments[env_name] = features
elif isinstance(features, dict) and "features" in features:
environments[env_name] = features["features"]
else:
environments[env_name] = []
return environments
def get_explicit_packages(environment: str) -> Set[str]:
"""Get all explicitly required packages for an environment."""
try:
cmd = [
"pixi",
"list",
"--environment",
environment,
"--explicit",
"--platform",
"linux-64",
]
print(f" Running command: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
packages = set()
# Process the output line by line
in_package_section = False
for line in result.stdout.splitlines():
line = line.strip()
# Skip empty lines
if not line:
continue
# Check if this is the header line
if "Package" in line and "Version" in line and "Build" in line:
in_package_section = True
continue
# If we're in the package section and line isn't a separator or header
if (
in_package_section
and not line.startswith("═")
and not line.startswith("─")
):
# Extract package name (first column)
parts = line.split(maxsplit=1)
if parts and parts[0] != "Environment:":
package_name = parts[0]
packages.add(package_name)
return packages
except subprocess.CalledProcessError as e:
print(f" Error getting explicit packages for environment '{environment}': {e}")
print(f" Error output: {e.stderr}")
return set()
def get_package_dependents(environment: str, package: str) -> Set[str]:
"""Get all packages that depend on the given package in the environment."""
try:
cmd = [
"pixi",
"tree",
"--environment",
environment,
"--invert",
f"^{package}$",
"--platform",
"linux-64",
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
dependents = set()
# Parse the output to find packages that depend on our package
# The output format is typically:
# package-name version
# ├── dependent-package1 version
# │ ├── sub-dependent-package version
# │ └── sub-dependent-package version
# └── dependent-package2 version
found_target = False
for line in result.stdout.splitlines():
line_stripped = line.strip()
# Skip environment line and empty lines
if line_stripped.startswith("Environment:") or not line_stripped:
continue
# Find the target package line - it should start with the package name
if not found_target and line_stripped.startswith(package + " "):
found_target = True
continue
if found_target:
# Extract dependent package names - they're indented and start with tree characters
# The format is like: "├── example_package 0.101.0"
match = re.search(r"[├└]── ([a-zA-Z0-9_.-]+)", line)
if match:
dependent_pkg = match.group(1)
dependents.add(dependent_pkg)
# If we hit a non-indented line after finding the target, we're done
elif not line.startswith(" ") and line.strip():
break
# Debug output for the first few packages
if package.startswith("a") and dependents:
print(f" Found dependents for {package}: {', '.join(sorted(dependents))}")
return dependents
except subprocess.CalledProcessError as e:
print(f" Error getting dependents for '{package}' in '{environment}': {e}")
print(f" Error output: {e.stderr}")
return set()
def get_feature_packages(toml_data: dict) -> Dict[str, Set[str]]:
"""
Extract packages defined in each feature.
Returns a dictionary mapping feature names to sets of package names.
"""
feature_packages = {}
if "feature" in toml_data:
for feature_name, feature_data in toml_data["feature"].items():
packages = set()
if "dependencies" in feature_data:
for pkg in feature_data["dependencies"]:
# Packages might have version constraints like "package >=1.0"
# We only want the package name
pkg_name = pkg.split()[0].strip()
packages.add(pkg_name)
feature_packages[feature_name] = packages
return feature_packages
def get_package_to_feature_mapping(
feature_packages: Dict[str, Set[str]],
) -> Dict[str, Set[str]]:
"""
Maps each package to the features that include it.
Returns a dictionary mapping package names to sets of feature names.
"""
package_to_features = {}
for feature, packages in feature_packages.items():
for package in packages:
if package not in package_to_features:
package_to_features[package] = set()
package_to_features[package].add(feature)
return package_to_features
def analyze_shared_packages(
all_environments: Dict[str, List[str]],
package_to_features: Dict[str, Set[str]],
results: Dict[str, Dict[str, Iterable]],
) -> Dict[str, Set[Tuple[str, str, str]]]:
"""
Find packages that are redundant in one environment but explicitly needed in others.
Returns a dictionary mapping environment names to sets of tuples:
(package_name, shared_feature, dependent_environment)
"""
shared_packages = {}
# For each environment with redundant packages
for env_name, result in results.items():
redundant_packages = result.get("redundant", set())
if not redundant_packages:
continue
shared_packages[env_name] = set()
# For each redundant package in this environment
for package in redundant_packages:
# Find features containing this package
features = package_to_features.get(package, set())
if not features:
continue # Package not defined in any feature
# Check if any of these features are used in other environments
for feature in features:
for other_env, other_features in all_environments.items():
# Skip the current environment or environments without results yet
if other_env == env_name or other_env not in results:
continue
if feature in other_features:
# Check if package is NOT redundant in the other environment
other_redundant = results[other_env].get("redundant", set())
if package not in other_redundant and package in results[
other_env
].get("original", set()):
# This package is redundant here but needed in another env
shared_packages[env_name].add((package, feature, other_env))
return shared_packages
def minimize_environment(environment: str) -> Dict[str, object]:
"""Find minimal set of explicit packages for an environment."""
print(f"\nAnalyzing environment: {environment}")
explicit_packages = get_explicit_packages(environment)
if not explicit_packages:
print(f" No packages found for environment '{environment}'.")
return {
"original": set(),
"minimal": set(),
"redundant": set(),
"dependents": {},
}
print(f" Found {len(explicit_packages)} explicitly required packages")
# Track packages we can safely remove (those that are dependencies of others)
redundant_packages = set()
dependent_mapping = {} # Maps packages to their dependents
# Show a progress indicator
total = len(explicit_packages)
for i, package in enumerate(sorted(explicit_packages)):
progress = (i + 1) / total
bar_length = 30
filled_length = int(bar_length * progress)
bar = "█" * filled_length + "░" * (bar_length - filled_length)
# Print progress bar
sys.stdout.write(f"\r Progress: [{bar}] {i+1}/{total} - {package:<30}")
sys.stdout.flush()
dependents = get_package_dependents(environment, package)
dependent_mapping[package] = dependents
# If this package is a dependency of another explicit package,
# we can potentially remove it from explicit requirements
if dependents & explicit_packages:
# Add to redundant packages, but will handle circular dependencies later
redundant_packages.add(package)
sys.stdout.write("\n") # Move to the next line after progress bar
# Handle circular dependencies
# Build a graph of dependencies between redundant packages
circular_deps = set()
self_deps = set()
for pkg1 in redundant_packages:
# Check for self-dependency (package depends on itself)
if pkg1 in dependent_mapping[pkg1]:
self_deps.add(pkg1)
print(f" Found self-dependency in package {pkg1}")
# Check for circular dependencies between different packages
for pkg2 in redundant_packages:
if (
pkg1 != pkg2
and pkg2 in dependent_mapping[pkg1]
and pkg1 in dependent_mapping[pkg2]
):
# Found circular dependency between pkg1 and pkg2
circular_deps.add(
(min(pkg1, pkg2), max(pkg1, pkg2))
) # Store in sorted order to avoid duplicates
# For each circular dependency, keep one package
kept_circular_packages = set()
for pkg1, pkg2 in circular_deps:
print(f" Found circular dependency between {pkg1} and {pkg2}")
# Choose the first package to keep (alphabetically first)
# You could implement different selection criteria here if desired
kept_circular_packages.add(pkg1)
print(f" Keeping {pkg1} as an explicit dependency")
# The minimal set is all explicit packages minus redundant ones,
# but keeping one from each circular pair and all self-dependent packages
minimal_explicit = (
(explicit_packages - redundant_packages) | kept_circular_packages | self_deps
)
true_redundant = redundant_packages - kept_circular_packages - self_deps
return {
"original": explicit_packages,
"minimal": minimal_explicit,
"redundant": true_redundant,
"dependents": dependent_mapping,
"circular_deps": circular_deps,
"kept_circular": kept_circular_packages,
"self_deps": self_deps,
}
def main():
import argparse
import json
parser = argparse.ArgumentParser(
description="Analyze Pixi environments to find minimal explicit package sets."
)
parser.add_argument(
"-e",
"--environment",
action="append",
help="Specify an environment to analyze (can be used multiple times, default is all)",
)
parser.add_argument(
"-p",
"--manifest-path",
help="Path to pixi.toml file (if not provided, will be auto-detected using pixi info)",
)
args = parser.parse_args()
# Check if pixi is installed and available
try:
subprocess.run(["pixi", "--version"], capture_output=True, check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
print("Error: 'pixi' command not found or not working correctly.")
print("Please make sure pixi is installed and configured correctly.")
return
# Get pixi.toml path from command line argument or pixi info command
pixi_toml_path = args.manifest_path
if not pixi_toml_path:
try:
pixi_info_result = subprocess.run(
["pixi", "info", "--json"],
capture_output=True,
text=True,
check=True
)
pixi_info = json.loads(pixi_info_result.stdout)
pixi_toml_path = pixi_info.get("project_info", {}).get("manifest_path")
if not pixi_toml_path:
print("Error: Could not determine pixi.toml path from pixi info.")
print("Please specify the path using the --manifest-path argument.")
return
except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
print(f"Error: Could not determine pixi.toml path: {e}")
print("Please specify the path using the --manifest-path argument.")
return
print(f"Analyzing {pixi_toml_path}...")
toml_data = load_pixi_toml(pixi_toml_path)
all_environments = get_environments(toml_data)
feature_packages = get_feature_packages(toml_data)
package_to_features = get_package_to_feature_mapping(feature_packages)
# Check if pixi is installed and available
try:
subprocess.run(["pixi", "--version"], capture_output=True, check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
print("Error: 'pixi' command not found or not working correctly.")
print("Please make sure pixi is installed and configured correctly.")
return
# Determine which environments to analyze
environments_to_analyze = []
if args.environment:
# Check if all specified environments exist
missing_envs = [env for env in args.environment if env not in all_environments]
if missing_envs:
print(
f"Error: Environment(s) not found in pixi.toml: {', '.join(missing_envs)}"
)
print(
f"Available environments: {', '.join(sorted(all_environments.keys()))}"
)
return
environments_to_analyze = args.environment
else:
# List all environments
print(f"Found {len(all_environments)} environments in pixi.toml:")
for env_name in sorted(all_environments.keys()):
features = all_environments[env_name]
print(
f" - {env_name} (features: {', '.join(features) if features else 'none'})"
)
environments_to_analyze = list(all_environments.keys())
# Analyze each environment
results = {}
for env_name in environments_to_analyze:
results[env_name] = minimize_environment(env_name)
# Analyze shared packages across environments
# Only perform this analysis if we're analyzing multiple environments
shared_package_analysis = {}
if len(environments_to_analyze) > 1:
shared_package_analysis = analyze_shared_packages(
all_environments, package_to_features, results
)
# Print summary report
print("\n\n=========== SUMMARY REPORT ===========")
for env_name, result in results.items():
print(f"\nEnvironment: {env_name}")
print(f" Original explicit packages: {len(result['original'])}")
print(f" Minimal explicit packages: {len(result['minimal'])}")
print(
f" Packages that could potentially be removed: {len(result['redundant'])}"
)
# Show circular dependencies if any were found
if "circular_deps" in result and result["circular_deps"]:
print("\n Circular dependencies detected:")
for pkg1, pkg2 in sorted(result["circular_deps"]):
print(f" - {pkg1}{pkg2} (keeping {pkg1} as explicit)")
# Show self-dependencies if any were found
if "self_deps" in result and result["self_deps"]:
print("\n Self-dependencies detected (must be kept as explicit):")
for pkg in sorted(result["self_deps"]):
print(f" - {pkg} (depends on itself)")
# Show shared packages if any were found
if env_name in shared_package_analysis and shared_package_analysis[env_name]:
print(
"\n Shared packages (redundant here but needed in other environments):"
)
for pkg, feature, other_env in sorted(shared_package_analysis[env_name]):
print(f" - {pkg} (via feature '{feature}', needed in '{other_env}')")
if result["redundant"]:
print("\n Redundant packages that could be removed:")
for pkg in sorted(result["redundant"]):
dependents = result["dependents"][pkg]
explicit_dependents = dependents & result["original"]
print(
f" - {pkg} (required by: {', '.join(sorted(explicit_dependents))})"
)
# Analyze shared packages across environments
feature_packages = get_feature_packages(toml_data)
package_to_features = get_package_to_feature_mapping(feature_packages)
shared_packages = analyze_shared_packages(
all_environments, package_to_features, results
)
# Print shared packages report
if shared_packages:
print("\n\n=========== SHARED PACKAGES REPORT ===========")
for env_name, packages in shared_packages.items():
print(f"\nEnvironment: {env_name}")
for package, feature, other_env in sorted(packages):
print(f" - {package} (feature: {feature}, needed in: {other_env})")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment