Skip to content

Instantly share code, notes, and snippets.

@arastu
Last active August 15, 2025 14:35
Show Gist options
  • Save arastu/ba6de9357b74ab2badfcc37447af1d4f to your computer and use it in GitHub Desktop.
Save arastu/ba6de9357b74ab2badfcc37447af1d4f to your computer and use it in GitHub Desktop.
#!/bin/bash
#
# mac-data-ml-setup.sh: An enhanced, interactive script to set up a modern
# Data & ML development environment on macOS from scratch.
#
# This script improves upon the original by adding:
# - Stricter error checking (set -euo pipefail).
# - Pre-flight checks for essential commands (git, curl).
# - More robust shell detection and configuration.
# - User choice for installing different sets of Python packages.
# - Better error handling and clearer, more colorful output.
# - A final check to verify the installation.
# Exit immediately on errors, treat unset variables as errors, and handle pipeline failures
set -euo pipefail
# --- Configuration & Colors ---
# Using tput for wider compatibility and to check terminal capabilities
if tput setaf 1 >&/dev/null; then
C_RESET=$(tput sgr0)
C_RED=$(tput setaf 1)
C_GREEN=$(tput setaf 2)
C_YELLOW=$(tput setaf 3)
C_BLUE=$(tput setaf 4)
C_MAGENTA=$(tput setaf 5)
C_BOLD=$(tput bold)
else
C_RESET='\033[0m'
C_RED='\033[0;31m'
C_GREEN='\033[0;32m'
C_YELLOW='\033[0;33m'
C_BLUE='\033[0;34m'
C_MAGENTA='\033[0;35m'
C_BOLD='\033[1m'
fi
# --- Helper Functions ---
# Prints a formatted header message
function print_header() {
echo -e "\n${C_BOLD}${C_MAGENTA}πŸš€ $1 ${C_RESET}"
echo -e "${C_BOLD}${C_MAGENTA}=====================================================${C_RESET}"
}
# Prints a success message
function print_success() {
echo -e "${C_GREEN}βœ… $1${C_RESET}"
}
# Prints an error message and exits
function print_error() {
echo -e "${C_RED}❌ ERROR: $1${C_RESET}" >&2
exit 1
}
# Prints an informational message
function print_info() {
echo -e "${C_BLUE} $1${C_RESET}"
}
# Prints a warning message
function print_warning() {
echo -e "${C_YELLOW}⚠️ $1${C_RESET}"
}
# Checks if a command exists
function command_exists() {
command -v "$1" &>/dev/null
}
# --- Main Logic ---
function main() {
# --- Step 1: Welcome and System Pre-flight Checks ---
clear
print_header "πŸš€ macOS Data & ML Environment Setup"
echo -e "${C_BOLD}${C_BLUE}Welcome to the ultimate Python development environment setup!${C_RESET}"
echo
print_info "🎯 What this script will do for you:"
echo -e " ${C_GREEN}βœ… Install modern development tools (Homebrew, uv, direnv)${C_RESET}"
echo -e " ${C_GREEN}βœ… Create a professional workspace structure${C_RESET}"
echo -e " ${C_GREEN}βœ… Set up a Python project with automatic environment management${C_RESET}"
echo -e " ${C_GREEN}βœ… Install data science, ML, and optional web development packages${C_RESET}"
echo -e " ${C_GREEN}βœ… Configure everything to work seamlessly together${C_RESET}"
echo
print_info "⏱️ Estimated time: 5-10 minutes"
print_info "🌐 Internet connection required for downloads"
echo
echo -e -n "${C_YELLOW}Ready to transform your Mac into a data science powerhouse? [y/N]: ${C_RESET}"
read -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
print_info "Setup cancelled. Run this script again when you're ready!"
exit 0
fi
echo
print_success "Great! Let's get started! πŸŽ‰"
# Ensure we are on macOS
if [[ "$(uname)" != "Darwin" ]]; then
print_error "This script is designed for macOS only."
fi
# Check for essential dependencies
for cmd in git curl; do
if ! command_exists $cmd; then
print_error "'$cmd' is not installed. Please install the Xcode Command Line Tools with 'xcode-select --install' and re-run."
fi
done
print_success "System pre-flight checks passed."
# --- Step 2: Install and Configure Homebrew ---
print_header "Checking for Homebrew"
if ! command_exists brew; then
print_info "Homebrew not found. Installing now..."
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
else
print_success "Homebrew is already installed."
fi
# Configure Homebrew shell environment
local brew_path
if [[ -x "/opt/homebrew/bin/brew" ]]; then # Apple Silicon
brew_path="/opt/homebrew/bin/brew"
elif [[ -x "/usr/local/bin/brew" ]]; then # Intel Macs
brew_path="/usr/local/bin/brew"
else
print_error "Could not find Homebrew executable."
fi
# Set up for the current script session
eval "$($brew_path shellenv)"
# Permanently add to the user's shell profile for future sessions
local shell_config_file=""
case "$SHELL" in
*/zsh) shell_config_file="$HOME/.zshrc" ;;
*/bash) shell_config_file="$HOME/.bash_profile" ;;
*)
print_warning "Could not detect a standard shell. Please add Homebrew to your PATH manually."
;;
esac
if [[ -n "$shell_config_file" ]]; then
print_info "Adding Homebrew to your shell profile ($shell_config_file)..."
touch "$shell_config_file" # Ensure the file exists
if ! grep -q "eval \"\$($brew_path shellenv)\"" "$shell_config_file"; then
echo -e '\n# Set up Homebrew environment' >> "$shell_config_file"
echo "eval \"\$($brew_path shellenv)\"" >> "$shell_config_file"
print_success "Homebrew shell environment configured permanently."
else
print_success "Homebrew shell environment already configured."
fi
fi
if ! command_exists brew; then
print_error "Homebrew is not available in the current shell. Please restart your terminal or source your profile."
fi
print_info "Updating Homebrew..."
brew update
print_success "Homebrew updated."
# --- Step 3: Install Core Tools with Brew ---
print_header "Installing Core Development Tools"
print_info "Checking and installing essential tools for modern Python development:"
echo -e " ${C_BLUE}πŸ“ direnv - Automatic environment loading${C_RESET}"
echo -e " ${C_BLUE}⚑ uv - Ultra-fast Python package manager${C_RESET}"
echo
# Check direnv
if command_exists direnv; then
print_success "direnv is already installed and available."
else
print_info "direnv not found. Installing via Homebrew..."
if brew list --formula | grep -q "^direnv"; then
print_info "direnv installed via Homebrew but not in PATH. Configuring..."
eval "$(brew shellenv)"
else
print_info "Installing direnv... (this may take a moment)"
brew install direnv
print_success "direnv installed successfully!"
fi
fi
# Check uv
if command_exists uv; then
print_success "uv is already installed and available."
else
print_info "uv not found. Installing via Homebrew..."
if brew list --formula | grep -q "^uv"; then
print_info "uv installed via Homebrew but not in PATH. Configuring..."
eval "$(brew shellenv)"
else
print_info "Installing uv... (this may take a moment)"
brew install uv
print_success "uv installed successfully!"
fi
fi
# Final verification
if ! command_exists direnv || ! command_exists uv; then
print_error "Some tools are still not available. Please restart your terminal and try again."
fi
echo
print_success "All essential tools are ready!"
# --- Step 4: Configure direnv ---
print_header "Configuring direnv for your shell"
if [[ -n "$shell_config_file" ]]; then
local hook_cmd
case "$SHELL" in
*/zsh) hook_cmd='eval "$(direnv hook zsh)"' ;;
*/bash) hook_cmd='eval "$(direnv hook bash)"' ;;
esac
if ! grep -q 'direnv hook' "$shell_config_file"; then
print_info "Adding direnv hook to $shell_config_file..."
echo -e '\n# direnv hook' >> "$shell_config_file"
echo "$hook_cmd" >> "$shell_config_file"
print_success "direnv hook added."
else
print_success "direnv hook is already configured."
fi
else
print_warning "Cannot configure direnv automatically. Please add the appropriate hook to your shell configuration file."
fi
# --- Step 5: Create Workspace Directory ---
print_header "Setting up your Development Workspace"
print_info "A workspace is your main development folder where you'll organize all your projects."
print_info "This will be created in your home directory and will contain:"
echo -e " ${C_BLUE}πŸ“ Your Python projects and notebooks${C_RESET}"
echo -e " ${C_BLUE}🐍 A shared Python environment with all your tools${C_RESET}"
echo -e " ${C_BLUE}πŸ“Š Data science and ML experiments${C_RESET}"
echo -e " ${C_BLUE}βš™οΈ Configuration files (pyproject.toml, .envrc)${C_RESET}"
echo
print_info "Popular workspace names: 'Workspace', 'Projects', 'Development', 'DataScience', 'Code'"
echo
local workspace_name
while true; do
echo -e -n "${C_YELLOW}πŸ’‘ What would you like to name your workspace folder? ${C_RESET}"
read workspace_name
if [[ -z "$workspace_name" ]]; then
echo -e "${C_RED}❌ Folder name cannot be empty. Please try again.${C_RESET}"
elif [[ "$workspace_name" =~ [[:space:]] ]]; then
echo -e "${C_RED}❌ Folder name should not contain spaces. Try 'DataScience' instead of 'Data Science'.${C_RESET}"
elif [[ -e "$HOME/$workspace_name" && ! -d "$HOME/$workspace_name" ]]; then
echo -e "${C_RED}❌ A file named '$workspace_name' already exists. Please choose a different name.${C_RESET}"
else
echo -e "${C_GREEN}βœ… Great choice! Creating workspace: ${C_BOLD}$workspace_name${C_RESET}"
break
fi
done
local workspace_path="$HOME/$workspace_name"
mkdir -p "$workspace_path"
cd "$workspace_path" || print_error "Could not navigate to $workspace_path"
print_success "Workspace created at: $workspace_path"
# --- Step 6: Initialize Python Project and direnv config ---
print_info "Initializing Python project with uv..."
uv init --no-readme
print_success "Python project initialized with pyproject.toml."
print_info "Creating virtual environment..."
uv venv
print_success "Virtual environment created."
print_info "Configuring direnv to auto-load this environment..."
cat > .envrc << 'EOF'
# Activate uv environment
source .venv/bin/activate
EOF
direnv allow .
print_success "'.envrc' created and allowed. Environment will now auto-load."
# --- Step 7: Install Python Packages ---
print_header "Installing Python Packages"
print_info "We'll install packages in groups based on your needs. All packages are added to pyproject.toml for easy management."
echo
local core_packages=(
"jupyterlab" "notebook" "pandas" "polars" "numpy" "scikit-learn"
"matplotlib" "seaborn" "plotly" "ruff" "ipykernel"
)
local web_packages=(
"fastapi[all]" "uvicorn" "pydantic" "sqlalchemy" "psycopg2-binary"
)
local data_eng_packages=(
"duckdb" "dbt-duckdb" "great-expectations"
)
print_info "πŸ“Š CORE DATA SCIENCE PACKAGES (Essential - Installing automatically):"
print_info " β€’ JupyterLab & Notebook - Interactive development environment"
print_info " β€’ Pandas & Polars - Data manipulation and analysis"
print_info " β€’ NumPy - Numerical computing library"
print_info " β€’ Scikit-learn - Machine learning library"
print_info " β€’ Matplotlib, Seaborn, Plotly - Data visualization"
print_info " β€’ Ruff - Fast Python linter and formatter"
print_info " β€’ IPykernel - Jupyter kernel for Python"
echo
print_info "Installing core data science packages..."
uv add "${core_packages[@]}"
print_success "Core packages installed."
echo
print_info "🌐 WEB DEVELOPMENT PACKAGES:"
print_info " β€’ FastAPI - Modern, fast web framework for building APIs"
print_info " β€’ Uvicorn - Lightning-fast ASGI server"
print_info " β€’ Pydantic - Data validation using Python type annotations"
print_info " β€’ SQLAlchemy - SQL toolkit and Object-Relational Mapping"
print_info " β€’ psycopg2-binary - PostgreSQL adapter for Python"
echo -e -n "${C_YELLOW}πŸ’‘ Do you want to install web development packages? [y/N]: ${C_RESET}"
read -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
print_info "Installing web development packages..."
uv add "${web_packages[@]}"
print_success "Web packages installed."
else
print_info "Skipping web development packages. You can install them later with: uv add fastapi uvicorn"
fi
echo
print_info "πŸ”§ DATA ENGINEERING PACKAGES:"
print_info " β€’ DuckDB - In-process SQL OLAP database management system"
print_info " β€’ dbt-duckdb - Data build tool with DuckDB adapter"
print_info " β€’ Great Expectations - Data validation and documentation"
echo -e -n "${C_YELLOW}πŸ’‘ Do you want to install data engineering packages? [y/N]: ${C_RESET}"
read -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
print_info "Installing data engineering packages..."
uv add "${data_eng_packages[@]}"
print_success "Data engineering packages installed."
else
print_info "Skipping data engineering packages. You can install them later with: uv add duckdb dbt-duckdb"
fi
# --- Final Instructions ---
print_header "πŸŽ‰ Setup Complete! πŸŽ‰"
echo -e "${C_BOLD}${C_GREEN}Congratulations! Your modern Data & ML environment is ready to use!${C_RESET}"
echo -e "\n${C_YELLOW}πŸ“‹ --- WHAT WAS CREATED ---${C_RESET}"
echo -e " ${C_BOLD}πŸ“ Workspace:${C_RESET} $workspace_path"
echo -e " ${C_BOLD}🐍 Python Project:${C_RESET} Complete with pyproject.toml and .venv"
echo -e " ${C_BOLD}βš™οΈ Auto-activation:${C_RESET} Environment loads automatically via direnv"
echo -e " ${C_BOLD}πŸ“¦ Packages:${C_RESET} All dependencies tracked in pyproject.toml"
echo -e "\n${C_YELLOW}πŸš€ --- GETTING STARTED ---${C_RESET}"
echo -e "${C_BOLD}1. Restart your terminal or reload your shell:${C_RESET}"
if [[ -n "$shell_config_file" ]]; then
echo -e " ${C_GREEN}source $shell_config_file${C_RESET}"
else
echo -e " ${C_GREEN}# Simply restart your terminal app${C_RESET}"
fi
echo
echo -e "${C_BOLD}2. Navigate to your workspace:${C_RESET}"
echo -e " ${C_GREEN}cd \"$workspace_path\"${C_RESET}"
echo -e " ${C_BLUE}πŸ’‘ Your Python environment will activate automatically!${C_RESET}"
echo
echo -e "${C_BOLD}3. Verify everything works:${C_RESET}"
echo -e " ${C_GREEN}python --version${C_RESET} ${C_BLUE}# Check Python version${C_RESET}"
echo -e " ${C_GREEN}which python${C_RESET} ${C_BLUE}# Should point to .venv/bin/python${C_RESET}"
echo -e " ${C_GREEN}uv pip list${C_RESET} ${C_BLUE}# See installed packages${C_RESET}"
echo
echo -e "${C_BOLD}4. Start coding:${C_RESET}"
echo -e " ${C_GREEN}jupyter lab${C_RESET} ${C_BLUE}# Launch Jupyter Lab${C_RESET}"
echo -e " ${C_GREEN}uv add requests${C_RESET} ${C_BLUE}# Add new packages anytime${C_RESET}"
echo -e " ${C_GREEN}uv run python script.py${C_RESET} ${C_BLUE}# Run Python scripts${C_RESET}"
echo -e "\n${C_YELLOW}πŸ’‘ --- PRO TIPS ---${C_RESET}"
echo -e " β€’ Your environment auto-activates when you cd into $workspace_name"
echo -e " β€’ Use '${C_GREEN}uv add package-name${C_RESET}' to install new packages"
echo -e " β€’ All dependencies are saved in pyproject.toml for easy sharing"
echo -e " β€’ Create new projects with '${C_GREEN}uv init${C_RESET}' in subdirectories"
echo -e "\n${C_BOLD}${C_MAGENTA}🎯 Ready to build amazing things with Python! πŸš€${C_RESET}"
echo -e "\n${C_BLUE}Happy coding!${C_RESET}\n"
}
# Run the main function
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment