Skip to content

Instantly share code, notes, and snippets.

@afiqiqmal
Created June 21, 2025 13:58
Show Gist options
  • Save afiqiqmal/274940cee7f37517ab217fc023514752 to your computer and use it in GitHub Desktop.
Save afiqiqmal/274940cee7f37517ab217fc023514752 to your computer and use it in GitHub Desktop.
Monitor Supervisor Zombie Process
#!/bin/bash
# Variables
LOG_DIR="/var/log/supervisord_monitor"
LOG_FILE="$LOG_DIR/monitor.log"
SUPERVISORD_SERVICE="supervisord"
TIMEOUT_CMD="/usr/bin/timeout"
SUPERVISORCTL_CMD="/usr/local/bin/supervisorctl"
TIMEOUT=5 # seconds
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
# Ensure log directory exists
mkdir -p "$LOG_DIR"
# Check supervisord process
if ! pgrep -x "$SUPERVISORD_SERVICE" > /dev/null; then
echo "$TIMESTAMP - ERROR: supervisord process not found, attempting restart..." >> "$LOG_FILE"
systemctl restart "$SUPERVISORD_SERVICE"
echo "$TIMESTAMP - supervisord restarted." >> "$LOG_FILE"
exit 1
fi
# Check supervisord responsiveness
if ! "$TIMEOUT_CMD" "$TIMEOUT" "$SUPERVISORCTL_CMD" version > /dev/null 2>&1; then
echo "$TIMESTAMP - ERROR: supervisord unresponsive (no version response), attempting restart..." >> "$LOG_FILE"
systemctl restart "$SUPERVISORD_SERVICE"
echo "$TIMESTAMP - supervisord restarted." >> "$LOG_FILE"
exit 1
fi
# Check if any zombie process exists
if ps -o stat= -C "$SUPERVISORD_SERVICE" | grep -q 'Z'; then
echo "$TIMESTAMP - ERROR: supervisord is a zombie, attempting restart..." >> "$LOG_FILE"
systemctl restart "$SUPERVISORD_SERVICE"
echo "$TIMESTAMP - supervisord restarted." >> "$LOG_FILE"
exit 1
fi
# Check if the supervisorctl command is available
if ! "$TIMEOUT_CMD" "$TIMEOUT" "$SUPERVISORCTL_CMD" version > /dev/null 2>&1; then
echo "$TIMESTAMP - ERROR: supervisord unresponsive (no version response), attempting restart..." >> "$LOG_FILE"
systemctl restart "$SUPERVISORD_SERVICE"
echo "$TIMESTAMP - supervisord restarted." >> "$LOG_FILE"
exit 1
fi
# Check if the conf file is RUNNING
if ! "$TIMEOUT_CMD" "$TIMEOUT" "$SUPERVISORCTL_CMD" status | grep -q RUNNING; then
echo "$TIMESTAMP - ERROR: supervisor config process not running under supervisord." >> "$LOG_FILE"
"$SUPERVISORCTL_CMD" restart all
echo "$TIMESTAMP - supervisorctl process restarted." >> "$LOG_FILE"
exit 1
fi
echo "$TIMESTAMP - OK: supervisord is running and responsive." >> "$LOG_FILE"
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment