Last active
August 15, 2025 14:35
-
-
Save arastu/ba6de9357b74ab2badfcc37447af1d4f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # | |
| # mac-data-ml-setup.sh: An enhanced, interactive script to set up a modern | |
| # Data & ML development environment on macOS from scratch. | |
| # | |
| # This script improves upon the original by adding: | |
| # - Stricter error checking (set -euo pipefail). | |
| # - Pre-flight checks for essential commands (git, curl). | |
| # - More robust shell detection and configuration. | |
| # - User choice for installing different sets of Python packages. | |
| # - Better error handling and clearer, more colorful output. | |
| # - A final check to verify the installation. | |
| # Exit immediately on errors, treat unset variables as errors, and handle pipeline failures | |
| set -euo pipefail | |
| # --- Configuration & Colors --- | |
| # Using tput for wider compatibility and to check terminal capabilities | |
| if tput setaf 1 >&/dev/null; then | |
| C_RESET=$(tput sgr0) | |
| C_RED=$(tput setaf 1) | |
| C_GREEN=$(tput setaf 2) | |
| C_YELLOW=$(tput setaf 3) | |
| C_BLUE=$(tput setaf 4) | |
| C_MAGENTA=$(tput setaf 5) | |
| C_BOLD=$(tput bold) | |
| else | |
| C_RESET='\033[0m' | |
| C_RED='\033[0;31m' | |
| C_GREEN='\033[0;32m' | |
| C_YELLOW='\033[0;33m' | |
| C_BLUE='\033[0;34m' | |
| C_MAGENTA='\033[0;35m' | |
| C_BOLD='\033[1m' | |
| fi | |
| # --- Helper Functions --- | |
| # Prints a formatted header message | |
| function print_header() { | |
| echo -e "\n${C_BOLD}${C_MAGENTA}π $1 ${C_RESET}" | |
| echo -e "${C_BOLD}${C_MAGENTA}=====================================================${C_RESET}" | |
| } | |
| # Prints a success message | |
| function print_success() { | |
| echo -e "${C_GREEN}β $1${C_RESET}" | |
| } | |
| # Prints an error message and exits | |
| function print_error() { | |
| echo -e "${C_RED}β ERROR: $1${C_RESET}" >&2 | |
| exit 1 | |
| } | |
| # Prints an informational message | |
| function print_info() { | |
| echo -e "${C_BLUE} $1${C_RESET}" | |
| } | |
| # Prints a warning message | |
| function print_warning() { | |
| echo -e "${C_YELLOW}β οΈ $1${C_RESET}" | |
| } | |
| # Checks if a command exists | |
| function command_exists() { | |
| command -v "$1" &>/dev/null | |
| } | |
| # --- Main Logic --- | |
| function main() { | |
| # --- Step 1: Welcome and System Pre-flight Checks --- | |
| clear | |
| print_header "π macOS Data & ML Environment Setup" | |
| echo -e "${C_BOLD}${C_BLUE}Welcome to the ultimate Python development environment setup!${C_RESET}" | |
| echo | |
| print_info "π― What this script will do for you:" | |
| echo -e " ${C_GREEN}β Install modern development tools (Homebrew, uv, direnv)${C_RESET}" | |
| echo -e " ${C_GREEN}β Create a professional workspace structure${C_RESET}" | |
| echo -e " ${C_GREEN}β Set up a Python project with automatic environment management${C_RESET}" | |
| echo -e " ${C_GREEN}β Install data science, ML, and optional web development packages${C_RESET}" | |
| echo -e " ${C_GREEN}β Configure everything to work seamlessly together${C_RESET}" | |
| echo | |
| print_info "β±οΈ Estimated time: 5-10 minutes" | |
| print_info "π Internet connection required for downloads" | |
| echo | |
| echo -e -n "${C_YELLOW}Ready to transform your Mac into a data science powerhouse? [y/N]: ${C_RESET}" | |
| read -n 1 -r | |
| echo | |
| if [[ ! $REPLY =~ ^[Yy]$ ]]; then | |
| print_info "Setup cancelled. Run this script again when you're ready!" | |
| exit 0 | |
| fi | |
| echo | |
| print_success "Great! Let's get started! π" | |
| # Ensure we are on macOS | |
| if [[ "$(uname)" != "Darwin" ]]; then | |
| print_error "This script is designed for macOS only." | |
| fi | |
| # Check for essential dependencies | |
| for cmd in git curl; do | |
| if ! command_exists $cmd; then | |
| print_error "'$cmd' is not installed. Please install the Xcode Command Line Tools with 'xcode-select --install' and re-run." | |
| fi | |
| done | |
| print_success "System pre-flight checks passed." | |
| # --- Step 2: Install and Configure Homebrew --- | |
| print_header "Checking for Homebrew" | |
| if ! command_exists brew; then | |
| print_info "Homebrew not found. Installing now..." | |
| /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" | |
| else | |
| print_success "Homebrew is already installed." | |
| fi | |
| # Configure Homebrew shell environment | |
| local brew_path | |
| if [[ -x "/opt/homebrew/bin/brew" ]]; then # Apple Silicon | |
| brew_path="/opt/homebrew/bin/brew" | |
| elif [[ -x "/usr/local/bin/brew" ]]; then # Intel Macs | |
| brew_path="/usr/local/bin/brew" | |
| else | |
| print_error "Could not find Homebrew executable." | |
| fi | |
| # Set up for the current script session | |
| eval "$($brew_path shellenv)" | |
| # Permanently add to the user's shell profile for future sessions | |
| local shell_config_file="" | |
| case "$SHELL" in | |
| */zsh) shell_config_file="$HOME/.zshrc" ;; | |
| */bash) shell_config_file="$HOME/.bash_profile" ;; | |
| *) | |
| print_warning "Could not detect a standard shell. Please add Homebrew to your PATH manually." | |
| ;; | |
| esac | |
| if [[ -n "$shell_config_file" ]]; then | |
| print_info "Adding Homebrew to your shell profile ($shell_config_file)..." | |
| touch "$shell_config_file" # Ensure the file exists | |
| if ! grep -q "eval \"\$($brew_path shellenv)\"" "$shell_config_file"; then | |
| echo -e '\n# Set up Homebrew environment' >> "$shell_config_file" | |
| echo "eval \"\$($brew_path shellenv)\"" >> "$shell_config_file" | |
| print_success "Homebrew shell environment configured permanently." | |
| else | |
| print_success "Homebrew shell environment already configured." | |
| fi | |
| fi | |
| if ! command_exists brew; then | |
| print_error "Homebrew is not available in the current shell. Please restart your terminal or source your profile." | |
| fi | |
| print_info "Updating Homebrew..." | |
| brew update | |
| print_success "Homebrew updated." | |
| # --- Step 3: Install Core Tools with Brew --- | |
| print_header "Installing Core Development Tools" | |
| print_info "Checking and installing essential tools for modern Python development:" | |
| echo -e " ${C_BLUE}π direnv - Automatic environment loading${C_RESET}" | |
| echo -e " ${C_BLUE}β‘ uv - Ultra-fast Python package manager${C_RESET}" | |
| echo | |
| # Check direnv | |
| if command_exists direnv; then | |
| print_success "direnv is already installed and available." | |
| else | |
| print_info "direnv not found. Installing via Homebrew..." | |
| if brew list --formula | grep -q "^direnv"; then | |
| print_info "direnv installed via Homebrew but not in PATH. Configuring..." | |
| eval "$(brew shellenv)" | |
| else | |
| print_info "Installing direnv... (this may take a moment)" | |
| brew install direnv | |
| print_success "direnv installed successfully!" | |
| fi | |
| fi | |
| # Check uv | |
| if command_exists uv; then | |
| print_success "uv is already installed and available." | |
| else | |
| print_info "uv not found. Installing via Homebrew..." | |
| if brew list --formula | grep -q "^uv"; then | |
| print_info "uv installed via Homebrew but not in PATH. Configuring..." | |
| eval "$(brew shellenv)" | |
| else | |
| print_info "Installing uv... (this may take a moment)" | |
| brew install uv | |
| print_success "uv installed successfully!" | |
| fi | |
| fi | |
| # Final verification | |
| if ! command_exists direnv || ! command_exists uv; then | |
| print_error "Some tools are still not available. Please restart your terminal and try again." | |
| fi | |
| echo | |
| print_success "All essential tools are ready!" | |
| # --- Step 4: Configure direnv --- | |
| print_header "Configuring direnv for your shell" | |
| if [[ -n "$shell_config_file" ]]; then | |
| local hook_cmd | |
| case "$SHELL" in | |
| */zsh) hook_cmd='eval "$(direnv hook zsh)"' ;; | |
| */bash) hook_cmd='eval "$(direnv hook bash)"' ;; | |
| esac | |
| if ! grep -q 'direnv hook' "$shell_config_file"; then | |
| print_info "Adding direnv hook to $shell_config_file..." | |
| echo -e '\n# direnv hook' >> "$shell_config_file" | |
| echo "$hook_cmd" >> "$shell_config_file" | |
| print_success "direnv hook added." | |
| else | |
| print_success "direnv hook is already configured." | |
| fi | |
| else | |
| print_warning "Cannot configure direnv automatically. Please add the appropriate hook to your shell configuration file." | |
| fi | |
| # --- Step 5: Create Workspace Directory --- | |
| print_header "Setting up your Development Workspace" | |
| print_info "A workspace is your main development folder where you'll organize all your projects." | |
| print_info "This will be created in your home directory and will contain:" | |
| echo -e " ${C_BLUE}π Your Python projects and notebooks${C_RESET}" | |
| echo -e " ${C_BLUE}π A shared Python environment with all your tools${C_RESET}" | |
| echo -e " ${C_BLUE}π Data science and ML experiments${C_RESET}" | |
| echo -e " ${C_BLUE}βοΈ Configuration files (pyproject.toml, .envrc)${C_RESET}" | |
| echo | |
| print_info "Popular workspace names: 'Workspace', 'Projects', 'Development', 'DataScience', 'Code'" | |
| echo | |
| local workspace_name | |
| while true; do | |
| echo -e -n "${C_YELLOW}π‘ What would you like to name your workspace folder? ${C_RESET}" | |
| read workspace_name | |
| if [[ -z "$workspace_name" ]]; then | |
| echo -e "${C_RED}β Folder name cannot be empty. Please try again.${C_RESET}" | |
| elif [[ "$workspace_name" =~ [[:space:]] ]]; then | |
| echo -e "${C_RED}β Folder name should not contain spaces. Try 'DataScience' instead of 'Data Science'.${C_RESET}" | |
| elif [[ -e "$HOME/$workspace_name" && ! -d "$HOME/$workspace_name" ]]; then | |
| echo -e "${C_RED}β A file named '$workspace_name' already exists. Please choose a different name.${C_RESET}" | |
| else | |
| echo -e "${C_GREEN}β Great choice! Creating workspace: ${C_BOLD}$workspace_name${C_RESET}" | |
| break | |
| fi | |
| done | |
| local workspace_path="$HOME/$workspace_name" | |
| mkdir -p "$workspace_path" | |
| cd "$workspace_path" || print_error "Could not navigate to $workspace_path" | |
| print_success "Workspace created at: $workspace_path" | |
| # --- Step 6: Initialize Python Project and direnv config --- | |
| print_info "Initializing Python project with uv..." | |
| uv init --no-readme | |
| print_success "Python project initialized with pyproject.toml." | |
| print_info "Creating virtual environment..." | |
| uv venv | |
| print_success "Virtual environment created." | |
| print_info "Configuring direnv to auto-load this environment..." | |
| cat > .envrc << 'EOF' | |
| # Activate uv environment | |
| source .venv/bin/activate | |
| EOF | |
| direnv allow . | |
| print_success "'.envrc' created and allowed. Environment will now auto-load." | |
| # --- Step 7: Install Python Packages --- | |
| print_header "Installing Python Packages" | |
| print_info "We'll install packages in groups based on your needs. All packages are added to pyproject.toml for easy management." | |
| echo | |
| local core_packages=( | |
| "jupyterlab" "notebook" "pandas" "polars" "numpy" "scikit-learn" | |
| "matplotlib" "seaborn" "plotly" "ruff" "ipykernel" | |
| ) | |
| local web_packages=( | |
| "fastapi[all]" "uvicorn" "pydantic" "sqlalchemy" "psycopg2-binary" | |
| ) | |
| local data_eng_packages=( | |
| "duckdb" "dbt-duckdb" "great-expectations" | |
| ) | |
| print_info "π CORE DATA SCIENCE PACKAGES (Essential - Installing automatically):" | |
| print_info " β’ JupyterLab & Notebook - Interactive development environment" | |
| print_info " β’ Pandas & Polars - Data manipulation and analysis" | |
| print_info " β’ NumPy - Numerical computing library" | |
| print_info " β’ Scikit-learn - Machine learning library" | |
| print_info " β’ Matplotlib, Seaborn, Plotly - Data visualization" | |
| print_info " β’ Ruff - Fast Python linter and formatter" | |
| print_info " β’ IPykernel - Jupyter kernel for Python" | |
| echo | |
| print_info "Installing core data science packages..." | |
| uv add "${core_packages[@]}" | |
| print_success "Core packages installed." | |
| echo | |
| print_info "π WEB DEVELOPMENT PACKAGES:" | |
| print_info " β’ FastAPI - Modern, fast web framework for building APIs" | |
| print_info " β’ Uvicorn - Lightning-fast ASGI server" | |
| print_info " β’ Pydantic - Data validation using Python type annotations" | |
| print_info " β’ SQLAlchemy - SQL toolkit and Object-Relational Mapping" | |
| print_info " β’ psycopg2-binary - PostgreSQL adapter for Python" | |
| echo -e -n "${C_YELLOW}π‘ Do you want to install web development packages? [y/N]: ${C_RESET}" | |
| read -n 1 -r | |
| echo | |
| if [[ $REPLY =~ ^[Yy]$ ]]; then | |
| print_info "Installing web development packages..." | |
| uv add "${web_packages[@]}" | |
| print_success "Web packages installed." | |
| else | |
| print_info "Skipping web development packages. You can install them later with: uv add fastapi uvicorn" | |
| fi | |
| echo | |
| print_info "π§ DATA ENGINEERING PACKAGES:" | |
| print_info " β’ DuckDB - In-process SQL OLAP database management system" | |
| print_info " β’ dbt-duckdb - Data build tool with DuckDB adapter" | |
| print_info " β’ Great Expectations - Data validation and documentation" | |
| echo -e -n "${C_YELLOW}π‘ Do you want to install data engineering packages? [y/N]: ${C_RESET}" | |
| read -n 1 -r | |
| echo | |
| if [[ $REPLY =~ ^[Yy]$ ]]; then | |
| print_info "Installing data engineering packages..." | |
| uv add "${data_eng_packages[@]}" | |
| print_success "Data engineering packages installed." | |
| else | |
| print_info "Skipping data engineering packages. You can install them later with: uv add duckdb dbt-duckdb" | |
| fi | |
| # --- Final Instructions --- | |
| print_header "π Setup Complete! π" | |
| echo -e "${C_BOLD}${C_GREEN}Congratulations! Your modern Data & ML environment is ready to use!${C_RESET}" | |
| echo -e "\n${C_YELLOW}π --- WHAT WAS CREATED ---${C_RESET}" | |
| echo -e " ${C_BOLD}π Workspace:${C_RESET} $workspace_path" | |
| echo -e " ${C_BOLD}π Python Project:${C_RESET} Complete with pyproject.toml and .venv" | |
| echo -e " ${C_BOLD}βοΈ Auto-activation:${C_RESET} Environment loads automatically via direnv" | |
| echo -e " ${C_BOLD}π¦ Packages:${C_RESET} All dependencies tracked in pyproject.toml" | |
| echo -e "\n${C_YELLOW}π --- GETTING STARTED ---${C_RESET}" | |
| echo -e "${C_BOLD}1. Restart your terminal or reload your shell:${C_RESET}" | |
| if [[ -n "$shell_config_file" ]]; then | |
| echo -e " ${C_GREEN}source $shell_config_file${C_RESET}" | |
| else | |
| echo -e " ${C_GREEN}# Simply restart your terminal app${C_RESET}" | |
| fi | |
| echo | |
| echo -e "${C_BOLD}2. Navigate to your workspace:${C_RESET}" | |
| echo -e " ${C_GREEN}cd \"$workspace_path\"${C_RESET}" | |
| echo -e " ${C_BLUE}π‘ Your Python environment will activate automatically!${C_RESET}" | |
| echo | |
| echo -e "${C_BOLD}3. Verify everything works:${C_RESET}" | |
| echo -e " ${C_GREEN}python --version${C_RESET} ${C_BLUE}# Check Python version${C_RESET}" | |
| echo -e " ${C_GREEN}which python${C_RESET} ${C_BLUE}# Should point to .venv/bin/python${C_RESET}" | |
| echo -e " ${C_GREEN}uv pip list${C_RESET} ${C_BLUE}# See installed packages${C_RESET}" | |
| echo | |
| echo -e "${C_BOLD}4. Start coding:${C_RESET}" | |
| echo -e " ${C_GREEN}jupyter lab${C_RESET} ${C_BLUE}# Launch Jupyter Lab${C_RESET}" | |
| echo -e " ${C_GREEN}uv add requests${C_RESET} ${C_BLUE}# Add new packages anytime${C_RESET}" | |
| echo -e " ${C_GREEN}uv run python script.py${C_RESET} ${C_BLUE}# Run Python scripts${C_RESET}" | |
| echo -e "\n${C_YELLOW}π‘ --- PRO TIPS ---${C_RESET}" | |
| echo -e " β’ Your environment auto-activates when you cd into $workspace_name" | |
| echo -e " β’ Use '${C_GREEN}uv add package-name${C_RESET}' to install new packages" | |
| echo -e " β’ All dependencies are saved in pyproject.toml for easy sharing" | |
| echo -e " β’ Create new projects with '${C_GREEN}uv init${C_RESET}' in subdirectories" | |
| echo -e "\n${C_BOLD}${C_MAGENTA}π― Ready to build amazing things with Python! π${C_RESET}" | |
| echo -e "\n${C_BLUE}Happy coding!${C_RESET}\n" | |
| } | |
| # Run the main function | |
| main | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment