Skip to content

Instantly share code, notes, and snippets.

@dbsanfte
Created October 23, 2025 19:54
Show Gist options
  • Select an option

  • Save dbsanfte/6a6c4edd56d593654cce912b30adc165 to your computer and use it in GitHub Desktop.

Select an option

Save dbsanfte/6a6c4edd56d593654cce912b30adc165 to your computer and use it in GitHub Desktop.
#!/bin/bash
# run_mpi.sh - Generic MPI launcher with optimal OpenMP/MPI settings
#
# This wrapper provides consistent performance configuration for MPI applications:
# * One MPI rank per socket by default
# * Threads pinned to sockets; OpenMP restricted to physical cores
# * Automatic CPU topology detection
# * Optimal OpenMP and MPI environment settings
#
# Usage: ./run_mpi.sh <mpi-processes> <executable> [args...]
#
# Examples:
# ./run_mpi.sh 2 ./my_app -i input.dat
# ./run_mpi.sh 4 python my_script.py --verbose
set -euo pipefail
# Function to detect CPU topology
detect_cpu_topology() {
# Parse /proc/cpuinfo to extract topology information
local physical_ids=$(grep "^physical id" /proc/cpuinfo | awk '{print $NF}' | sort -u | wc -l)
local total_cores=$(grep "^processor" /proc/cpuinfo | wc -l)
# Extract unique (socket, core) pairs to count physical cores
local unique_cores=$(awk '
/^processor/ { proc = $NF }
/^physical id/ { phys_id = $NF }
/^core id/ { core_id = $NF; print phys_id ":" core_id }
' /proc/cpuinfo | sort -u | wc -l)
# Calculate topology values
SOCKETS=$physical_ids
PHYSICAL_CORES=$unique_cores
TOTAL_CORES=$total_cores
CORES_PER_SOCKET=$((PHYSICAL_CORES / SOCKETS))
THREADS_PER_CORE=$((TOTAL_CORES / PHYSICAL_CORES))
# Hyperthreading detection
if [ $THREADS_PER_CORE -gt 1 ]; then
HYPERTHREADING_DETECTED="Yes"
else
HYPERTHREADING_DETECTED="No"
fi
# Set optimal OpenMP thread count (physical cores per socket)
OMP_THREADS=$CORES_PER_SOCKET
}
# Check arguments
if [ $# -lt 2 ]; then
echo "Usage: $0 <mpi-processes> <executable> [args...]" >&2
echo "" >&2
echo "Examples:" >&2
echo " $0 2 ./my_app -i input.dat" >&2
echo " $0 4 python my_script.py --verbose" >&2
exit 1
fi
MPI_PROCS=$1
shift
EXECUTABLE=$1
shift
# Validate MPI process count
if ! [[ $MPI_PROCS =~ ^[0-9]+$ ]] || [ $MPI_PROCS -lt 1 ]; then
echo "Error: Invalid MPI process count '$MPI_PROCS'" >&2
exit 1
fi
# Validate executable exists
if [ ! -f "$EXECUTABLE" ] && ! command -v "$EXECUTABLE" &> /dev/null; then
echo "Error: Executable '$EXECUTABLE' not found" >&2
exit 1
fi
# Detect system topology
detect_cpu_topology
# Canonical OpenMP settings (using detected core count)
export OMP_NUM_THREADS=$OMP_THREADS # Physical cores per socket (auto-detected)
export OMP_PLACES=sockets # Place threads on sockets
export OMP_PROC_BIND=close # Bind threads close to each other
export OMP_NESTED=false # Disable nested parallelism
export OMP_DYNAMIC=false # Disable dynamic thread adjustment
# Additional OpenMP optimizations
export KMP_AFFINITY=granularity=fine,compact,1,0
export KMP_BLOCKTIME=0 # Reduce thread blocking time
export MKL_NUM_THREADS=$OMP_THREADS # If using MKL (same as OMP)
export MKL_DYNAMIC=false
# OpenBLAS thread configuration - always match OMP_NUM_THREADS
export OPENBLAS_NUM_THREADS=$OMP_NUM_THREADS
export GOTO_NUM_THREADS=$OMP_NUM_THREADS
# Canonical MPI optimizations
export OMPI_MCA_mpi_leave_pinned=1 # Keep memory pinned
export OMPI_MCA_btl_vader_single_copy_mechanism=none # Avoid cross-NUMA copies
export OMPI_MCA_btl_openib_allow_ib=1 # Enable InfiniBand if available
# Additional system information
NUMA_NODES=$(lscpu | grep 'NUMA node(s):' | awk '{print $3}')
echo "=== MPI Application Configuration ==="
echo "System: ${SOCKETS} sockets, ${CORES_PER_SOCKET} cores/socket, ${NUMA_NODES} NUMA nodes"
echo "Topology: ${PHYSICAL_CORES} physical cores, ${TOTAL_CORES} logical cores"
echo "Hyperthreading: ${HYPERTHREADING_DETECTED} (${THREADS_PER_CORE} threads/core)"
echo "OpenMP: ${OMP_THREADS} threads/socket, ${OMP_PLACES} placement, ${OMP_PROC_BIND} binding"
echo "OpenBLAS: ${OPENBLAS_NUM_THREADS} threads (matching OMP_NUM_THREADS)"
echo "MPI: ${MPI_PROCS} processes"
echo ""
# Run application with optimal MPI/OpenMP settings
echo "=== Starting MPI Application ==="
exec mpirun -np ${MPI_PROCS} \
--bind-to socket \
--map-by socket \
--mca mpi_leave_pinned 1 \
--mca btl_vader_single_copy_mechanism none \
--report-bindings \
"$EXECUTABLE" "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment