-
-
Save sfirke/533383f0656bba3c56d19dba2a35b946 to your computer and use it in GitHub Desktop.
#!/bin/bash | |
# Requires the sendemail package being installed on the host machine | |
# Get container health status | |
healthy=$(docker inspect -f '{{.State.Health.Status}}' airflow-airflow-scheduler-1) | |
# Check if healthy | |
if [[ $healthy != "healthy" ]]; then | |
# attempt to restart Airflow - the scheduler will stop if the Azure Postgres DB becomes unavailable due to maintenance | |
cd ~/airflow | |
docker compose -f docker-compose.yaml --env-file .env down | |
docker compose -f docker-compose.yaml --env-file .env up -d | |
sleep 300 | |
# Check if healthy | |
healthy=$(docker inspect -f '{{.State.Health.Status}}' airflow-airflow-scheduler-1) | |
if [[ $healthy != "healthy" ]]; then | |
# Send email notification | |
SUBJ="Airflow scheduler container is unhealthy" | |
MESSAGE="Please investigate as Airflow jobs may not be running. Sam's health monitor bash script attempted a docker compose down & up and the container remains unhealthy." | |
else | |
SUBJ="Airflow scheduler container went unhealthy and was restarted successfully" | |
MESSAGE="This happens when the Azure Postgres DB becomes unavailable, either due to maintenance (15 minutes no more often than monthly) or some other issue like max connections. Airflow jobs should be running again, but confirm in the web UI and consider checking relevant logs in the scheduler container and the Azure Portal for the Flexible PostgreSQL server resource." | |
fi | |
SERVER="my.smtp.server" | |
FROM="[email protected]" | |
TO="[email protected]" | |
CHARSET="utf-8" | |
sendemail -f $FROM -t $TO -u $SUBJ -s $SERVER -m $MESSAGE -v -o message-charset=$CHARSET | |
sleep 21600 | |
fi |
#!/bin/bash # Requires the sendemail package being installed on the host machine # Get container health status healthy=$(docker inspect -f '{{.State.Health.Status}}' airflow-airflow-scheduler-1) # Check if healthy if [[
#!/bin/bash
Requires the sendemail package being installed on the host machine
Get container health status
healthy=$(docker inspect -f '{{.State.Health.Status}}' airflow-airflow-scheduler-1)
Check if healthy
if [[ $healthy != "healthy" ]]; then
attempt to restart Airflow - the scheduler will stop if the Azure Postgres DB becomes unavailable due to maintenance
cd ~/airflow
docker compose -f docker-compose.yaml --env-file .env down
docker compose -f docker-compose.yaml --env-file .env up -d
sleep 300
Check if healthy
healthy=$(docker inspect -f '{{.State.Health.Status}}' airflow-airflow-scheduler-1)
if [[ $healthy != "healthy" ]]; then
# Send email notification
SUBJ="Airflow scheduler container is unhealthy"
MESSAGE="Please investigate as Airflow jobs may not be running. Sam's health monitor bash script attempted a docker compose down & up and the container remains unhealthy."
else
SUBJ="Airflow scheduler container went unhealthy and was restarted successfully"
MESSAGE="This happens when the Azure Postgres DB becomes unavailable, either due to maintenance (15 minutes no more often than monthly) or some other issue like max connections. Airflow jobs should be running again, but confirm in the web UI and consider checking relevant logs in the scheduler container and the Azure Portal for the Flexible PostgreSQL server resource."
fi
SERVER="my.smtp.server"
FROM="[email protected]"
TO="[email protected]"
CHARSET="utf-8"
sendemail -f $FROM -t $TO -u $SUBJ -s $SERVER -m $MESSAGE -v -o message-charset=$CHARSET
sleep 21600
fi