Skip to content

Instantly share code, notes, and snippets.

@ealebed
Last active August 26, 2021 11:13
Show Gist options
  • Save ealebed/7a3f62320917a59f619b273880bf0af3 to your computer and use it in GitHub Desktop.
Save ealebed/7a3f62320917a59f619b273880bf0af3 to your computer and use it in GitHub Desktop.
Apache Airflow with custom healthcheck
#!/bin/bash
curl -s http://127.0.0.1:8080/health | jq -e '.scheduler.status == "healthy"'
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: airflow-role-admin
rules:
- apiGroups:
- ""
resources:
- pods
- pods/log
verbs:
- get
- watch
- list
- create
- delete
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: airflow-role-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: airflow-role-admin
subjects:
- kind: ServiceAccount
name: default
---
apiVersion: v1
data:
airflow.cfg: |
[core]
dags_folder = /usr/local/airflow/dags
executor = LocalExecutor
hide_sensitive_variable_fields = True
# sql_alchemy_conn = AIRFLOW__CORE__SQL_ALCHEMY_CONN from manifest
sql_alchemy_conn =
load_examples = False
load_default_connections = False
# fernet_key = AIRFLOW__CORE__FERNET_KEY
fernet_key =
[logging]
base_log_folder = /usr/local/airflow/logs
logging_level = INFO
fab_logging_level = ERROR
worker_logging_level = WARN
log_format = %%(asctime)s [%%(filename)s:%%(lineno)d] %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
[cli]
api_client = airflow.api.client.local_client
endpoint_url = http://my-airflow.example.cool
[api]
auth_backend = airflow.api.auth.backend.basic_auth
[webserver]
base_url = http://my-airflow.example.cool
web_server_host = 0.0.0.0
web_server_port = 8080
authenticate = True
auth_backend = airflow.contrib.auth.backends.password_auth
rbac = True
expose_config = True
[scheduler]
min_file_process_interval = 5
scheduler_health_check_threshold = 300
[code_editor]
git_default_args = -c color.ui=true
git_init_repo = False
kind: ConfigMap
metadata:
name: airflow-configmap
---
apiVersion: v1
kind: Service
metadata:
name: airflow
spec:
clusterIP: None
ports:
- name: http
port: 8080
selector:
app: airflow
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: airflow
spec:
replicas: 1
revisionHistoryLimit: 1
selector:
matchLabels:
app: airflow
strategy:
type: Recreate
template:
metadata:
labels:
app: airflow
spec:
containers:
- env:
- name: GIT_SYNC_REPO
value: https://my:[email protected]/ealebed/airflow/
- name: GIT_SYNC_BRANCH
value: master
- name: GIT_SYNC_ROOT
value: /git
- name: GIT_SYNC_DEST
value: repo
image: k8s.gcr.io/git-sync:v3.3.4
name: git-sync
securityContext:
runAsUser: 65533
volumeMounts:
- mountPath: /git
name: airflow-dags
- env:
- name: AIRFLOW__CORE__SQL_ALCHEMY_CONN
value: postgresql+psycopg2://airflow:airflow@postgreshost:5432/airflow
- name: AIRFLOW_CONN_POSTGRES_DEFAULT
value: postgres://airflow:airflow@postgreshost:5432/airflow
- name: AIRFLOW_SLACK_WEBHOOK_URL
value: T02H6C..........q3QPW0m
- name: AIRFLOW_ADMIN_USER
value: airflow
- name: AIRFLOW_ADMIN_PASSWORD
value: airflow
- name: AIRFLOW__CORE__FERNET_KEY
value: tsJjtESQbN_24ADldX2HISyIVwfj7pW1nEfYDkcPYMY=
- name: AIRFLOW__CORE__EXECUTOR
value: LocalExecutor
image: index.docker.io/ealebed/airflow:2.1.2
livenessProbe:
exec:
command:
- bash
- -c
- /airflow-healthcheck.sh
failureThreshold: 5
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 5
name: airflow
ports:
- containerPort: 8080
name: http
readinessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 60
periodSeconds: 5
timeoutSeconds: 5
resources:
limits:
cpu: "2"
memory: 6Gi
requests:
cpu: "2"
memory: 6Gi
volumeMounts:
- mountPath: /usr/local/airflow/airflow.cfg
name: airflow-configmap
subPath: airflow.cfg
- mountPath: /usr/local/airflow/dags
name: airflow-dags
securityContext:
fsGroup: 1000
volumes:
- emptyDir: {}
name: airflow-dags
- configMap:
name: airflow-configmap
name: airflow-configmap
FROM python:3.9-slim-buster
LABEL maintainer="Yevhen Lebid <[email protected]>"
# Never prompts the user for choices on installation/configuration of packages
ENV DEBIAN_FRONTEND=noninteractive \
TERM=linux
# Airflow
ARG AIRFLOW_VERSION=2.1.2
ARG AIRFLOW_USER_HOME=/usr/local/airflow
ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME}
RUN set -ex \
&& buildDeps=" \
libsasl2-dev \
libssl-dev \
libffi-dev \
libpq-dev \
build-essential \
" \
&& pipDeps=" \
pytz \
pyOpenSSL \
ndg-httpsclient \
pyasn1 \
psycopg2-binary \
SQLAlchemy \
" \
&& apt-get update -yqq \
&& apt-get upgrade -yqq \
&& apt-get install -yqq --no-install-recommends git curl jq $buildDeps \
&& useradd -ms /bin/bash -d ${AIRFLOW_USER_HOME} -u 65533 airflow \
&& pip install -U pip setuptools wheel \
&& pip install $pipDeps \
&& pip install apache-airflow[async,http,postgres,cncf.kubernetes,password,slack]==${AIRFLOW_VERSION} airflow-code-editor \
&& apt-get purge --auto-remove -yqq $buildDeps \
&& apt-get autoremove -yqq --purge \
&& apt-get clean \
&& rm -rf \
/var/lib/apt/lists/* \
/tmp/* \
/var/tmp/* \
/usr/share/man \
/usr/share/doc \
/usr/share/doc-base
COPY entrypoint.sh /entrypoint.sh
COPY airflow-healthcheck.sh /airflow-healthcheck.sh
COPY airflow.cfg ${AIRFLOW_USER_HOME}/airflow.cfg
RUN chown -R airflow:airflow ${AIRFLOW_USER_HOME}
EXPOSE 8080
USER airflow
WORKDIR ${AIRFLOW_USER_HOME}
ENTRYPOINT ["/entrypoint.sh"]
CMD ["webserver"]
#!/usr/bin/env bash
declare -a DEFAULT_CONNS=(
"slack"
"postgres_default"
)
case "$1" in
webserver)
airflow db init
airflow users create \
--role 'Admin' \
--username ${AIRFLOW_ADMIN_USER} \
--password ${AIRFLOW_ADMIN_PASSWORD} \
--firstname 'Air' \
--lastname 'Flow' \
--email '[email protected]'
for CONN in "${DEFAULT_CONNS[@]}"
do
airflow connections delete ${CONN}
done
airflow connections add 'postgres_default' \
--conn-uri ${AIRFLOW_CONN_POSTGRES_DEFAULT}
airflow connections add 'slack' \
--conn-type 'http' \
--conn-host 'https://hooks.slack.com/services' \
--conn-password ${AIRFLOW_SLACK_WEBHOOK_URL}
exec airflow scheduler &
exec airflow webserver
;;
scheduler)
# To give the webserver time to run initdb.
sleep 10
exec airflow "$@"
;;
version)
exec airflow "$@"
;;
*)
# The command is something like bash, not an airflow subcommand. Just run it in the right environment.
exec "$@"
;;
esac
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment