-
-
Save invalidusrname/3285705 to your computer and use it in GitHub Desktop.
Resque: automatically kill stuck workers and retry failed jobs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
TIMEOUT_PERIOD=600 | |
EMAIL_ADDRESS="[email protected]" | |
# Updated to make use of timeout and email variables | |
# Also see: http://vitobotta.com/resque-automatically-kill-stuck-workers-retry-failed-jobs/ | |
[[ -f /tmp/retry-failed-resque-jobs ]] && rm /tmp/retry-failed-resque-jobs | |
ps -eo pid,command | | |
grep [r]esque | | |
grep "Processing" | | |
grep "since" | | |
while read PID COMMAND; do | |
if [[ -d /proc/$PID ]]; then | |
SECONDS=`expr $(awk -F. '{print $1}' /proc/uptime) - $(expr $(awk '{print $22}' /proc/${PID}/stat) / 100)` | |
if [ $SECONDS -gt $TIMEOUT_PERIOD ]; then | |
kill -9 $PID | |
touch /tmp/retry-failed-resque-jobs | |
QUEUE=`echo "$COMMAND" | cut -d ' ' -f 3` | |
echo " | |
The forked child with pid #$PID (queue: $QUEUE) was found stuck for longer than $TIMEOUT_PERIOD seconds. | |
It has now been killed and job(s) flagged as failed as a result have been re-enqueued. | |
You may still want to check the Resque UI and the status of the workers for problems. | |
" | mail -s "Killed stuck Resque job on $(hostname) PID $PID" $EMAIL_ADDRESS | |
fi | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment