Created
October 23, 2025 19:54
-
-
Save dbsanfte/6a6c4edd56d593654cce912b30adc165 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # run_mpi.sh - Generic MPI launcher with optimal OpenMP/MPI settings | |
| # | |
| # This wrapper provides consistent performance configuration for MPI applications: | |
| # * One MPI rank per socket by default | |
| # * Threads pinned to sockets; OpenMP restricted to physical cores | |
| # * Automatic CPU topology detection | |
| # * Optimal OpenMP and MPI environment settings | |
| # | |
| # Usage: ./run_mpi.sh <mpi-processes> <executable> [args...] | |
| # | |
| # Examples: | |
| # ./run_mpi.sh 2 ./my_app -i input.dat | |
| # ./run_mpi.sh 4 python my_script.py --verbose | |
| set -euo pipefail | |
| # Function to detect CPU topology | |
| detect_cpu_topology() { | |
| # Parse /proc/cpuinfo to extract topology information | |
| local physical_ids=$(grep "^physical id" /proc/cpuinfo | awk '{print $NF}' | sort -u | wc -l) | |
| local total_cores=$(grep "^processor" /proc/cpuinfo | wc -l) | |
| # Extract unique (socket, core) pairs to count physical cores | |
| local unique_cores=$(awk ' | |
| /^processor/ { proc = $NF } | |
| /^physical id/ { phys_id = $NF } | |
| /^core id/ { core_id = $NF; print phys_id ":" core_id } | |
| ' /proc/cpuinfo | sort -u | wc -l) | |
| # Calculate topology values | |
| SOCKETS=$physical_ids | |
| PHYSICAL_CORES=$unique_cores | |
| TOTAL_CORES=$total_cores | |
| CORES_PER_SOCKET=$((PHYSICAL_CORES / SOCKETS)) | |
| THREADS_PER_CORE=$((TOTAL_CORES / PHYSICAL_CORES)) | |
| # Hyperthreading detection | |
| if [ $THREADS_PER_CORE -gt 1 ]; then | |
| HYPERTHREADING_DETECTED="Yes" | |
| else | |
| HYPERTHREADING_DETECTED="No" | |
| fi | |
| # Set optimal OpenMP thread count (physical cores per socket) | |
| OMP_THREADS=$CORES_PER_SOCKET | |
| } | |
| # Check arguments | |
| if [ $# -lt 2 ]; then | |
| echo "Usage: $0 <mpi-processes> <executable> [args...]" >&2 | |
| echo "" >&2 | |
| echo "Examples:" >&2 | |
| echo " $0 2 ./my_app -i input.dat" >&2 | |
| echo " $0 4 python my_script.py --verbose" >&2 | |
| exit 1 | |
| fi | |
| MPI_PROCS=$1 | |
| shift | |
| EXECUTABLE=$1 | |
| shift | |
| # Validate MPI process count | |
| if ! [[ $MPI_PROCS =~ ^[0-9]+$ ]] || [ $MPI_PROCS -lt 1 ]; then | |
| echo "Error: Invalid MPI process count '$MPI_PROCS'" >&2 | |
| exit 1 | |
| fi | |
| # Validate executable exists | |
| if [ ! -f "$EXECUTABLE" ] && ! command -v "$EXECUTABLE" &> /dev/null; then | |
| echo "Error: Executable '$EXECUTABLE' not found" >&2 | |
| exit 1 | |
| fi | |
| # Detect system topology | |
| detect_cpu_topology | |
| # Canonical OpenMP settings (using detected core count) | |
| export OMP_NUM_THREADS=$OMP_THREADS # Physical cores per socket (auto-detected) | |
| export OMP_PLACES=sockets # Place threads on sockets | |
| export OMP_PROC_BIND=close # Bind threads close to each other | |
| export OMP_NESTED=false # Disable nested parallelism | |
| export OMP_DYNAMIC=false # Disable dynamic thread adjustment | |
| # Additional OpenMP optimizations | |
| export KMP_AFFINITY=granularity=fine,compact,1,0 | |
| export KMP_BLOCKTIME=0 # Reduce thread blocking time | |
| export MKL_NUM_THREADS=$OMP_THREADS # If using MKL (same as OMP) | |
| export MKL_DYNAMIC=false | |
| # OpenBLAS thread configuration - always match OMP_NUM_THREADS | |
| export OPENBLAS_NUM_THREADS=$OMP_NUM_THREADS | |
| export GOTO_NUM_THREADS=$OMP_NUM_THREADS | |
| # Canonical MPI optimizations | |
| export OMPI_MCA_mpi_leave_pinned=1 # Keep memory pinned | |
| export OMPI_MCA_btl_vader_single_copy_mechanism=none # Avoid cross-NUMA copies | |
| export OMPI_MCA_btl_openib_allow_ib=1 # Enable InfiniBand if available | |
| # Additional system information | |
| NUMA_NODES=$(lscpu | grep 'NUMA node(s):' | awk '{print $3}') | |
| echo "=== MPI Application Configuration ===" | |
| echo "System: ${SOCKETS} sockets, ${CORES_PER_SOCKET} cores/socket, ${NUMA_NODES} NUMA nodes" | |
| echo "Topology: ${PHYSICAL_CORES} physical cores, ${TOTAL_CORES} logical cores" | |
| echo "Hyperthreading: ${HYPERTHREADING_DETECTED} (${THREADS_PER_CORE} threads/core)" | |
| echo "OpenMP: ${OMP_THREADS} threads/socket, ${OMP_PLACES} placement, ${OMP_PROC_BIND} binding" | |
| echo "OpenBLAS: ${OPENBLAS_NUM_THREADS} threads (matching OMP_NUM_THREADS)" | |
| echo "MPI: ${MPI_PROCS} processes" | |
| echo "" | |
| # Run application with optimal MPI/OpenMP settings | |
| echo "=== Starting MPI Application ===" | |
| exec mpirun -np ${MPI_PROCS} \ | |
| --bind-to socket \ | |
| --map-by socket \ | |
| --mca mpi_leave_pinned 1 \ | |
| --mca btl_vader_single_copy_mechanism none \ | |
| --report-bindings \ | |
| "$EXECUTABLE" "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment