Last active
March 20, 2025 20:56
-
-
Save sfirke/533383f0656bba3c56d19dba2a35b946 to your computer and use it in GitHub Desktop.
Bash script to monitor health of Airflow scheduler container deployed with docker compose, restarting it if necessary
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Requires the sendemail package being installed on the host machine | |
# Get container health status | |
healthy=$(docker inspect -f '{{.State.Health.Status}}' airflow-airflow-scheduler-1) | |
# Check if healthy | |
if [[ $healthy != "healthy" ]]; then | |
# attempt to restart Airflow - the scheduler will stop if the Azure Postgres DB becomes unavailable due to maintenance | |
cd ~/airflow | |
docker compose -f docker-compose.yaml --env-file .env down | |
docker compose -f docker-compose.yaml --env-file .env up -d | |
sleep 300 | |
# Check if healthy | |
healthy=$(docker inspect -f '{{.State.Health.Status}}' airflow-airflow-scheduler-1) | |
if [[ $healthy != "healthy" ]]; then | |
# Send email notification | |
SUBJ="Airflow scheduler container is unhealthy" | |
MESSAGE="Please investigate as Airflow jobs may not be running. Sam's health monitor bash script attempted a docker compose down & up and the container remains unhealthy." | |
else | |
SUBJ="Airflow scheduler container went unhealthy and was restarted successfully" | |
MESSAGE="This happens when the Azure Postgres DB becomes unavailable, either due to maintenance (15 minutes no more often than monthly) or some other issue like max connections. Airflow jobs should be running again, but confirm in the web UI and consider checking relevant logs in the scheduler container and the Azure Portal for the Flexible PostgreSQL server resource." | |
fi | |
SERVER="my.smtp.server" | |
FROM="[email protected]" | |
TO="[email protected]" | |
CHARSET="utf-8" | |
sendemail -f $FROM -t $TO -u $SUBJ -s $SERVER -m $MESSAGE -v -o message-charset=$CHARSET | |
sleep 21600 | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
#!/bin/bash # Requires the sendemail package being installed on the host machine # Get container health status healthy=$(docker inspect -f '{{.State.Health.Status}}' airflow-airflow-scheduler-1) # Check if healthy if [[$healthy != "healthy" ]]; then # attempt to restart Airflow - the scheduler will stop if the Azure Postgres DB becomes unavailable due to maintenance cd ~/airflow docker compose -f docker-compose.yaml --env-file .env down docker compose -f docker-compose.yaml --env-file .env up -d sleep 300 # Check if healthy healthy=$ (docker inspect -f '{{.State.Health.Status}}' airflow-airflow-scheduler-1) if [[ $healthy != "healthy" ]]; then # Send email notification SUBJ="Airflow scheduler container is unhealthy" MESSAGE="Please investigate as Airflow jobs may not be running. Sam's health monitor bash script attempted a docker compose down & up and the container remains unhealthy." else SUBJ="Airflow scheduler container went unhealthy and was restarted successfully" MESSAGE="This happens when the Azure Postgres DB becomes unavailable, either due to maintenance (15 minutes no more often than monthly) or some other issue like max connections. Airflow jobs should be running again, but confirm in the web UI and consider checking relevant logs in the scheduler container and the Azure Portal for the Flexible PostgreSQL server resource." fi SERVER="my.smtp.server" FROM="[email protected]" TO="[email protected]" CHARSET="utf-8" sendemail -f $FROM -t $TO -u $SUBJ -s $SERVER -m $MESSAGE -v -o message-charset=$CHARSET sleep 21600 fi