feat(airflow,jupyterhub): share data
This commit is contained in:
@@ -47,6 +47,16 @@ postgresql:
|
||||
data:
|
||||
metadataSecretName: airflow-metadata-connection
|
||||
|
||||
# DAG persistence configuration
|
||||
dags:
|
||||
persistence:
|
||||
enabled: {{ .Env.AIRFLOW_DAGS_PERSISTENCE_ENABLED | default "true" }}
|
||||
{{- if eq (.Env.AIRFLOW_DAGS_STORAGE_TYPE | default "default") "nfs" }}
|
||||
existingClaim: airflow-dags-nfs-pvc
|
||||
{{- else }}
|
||||
existingClaim: airflow-dags-pvc
|
||||
{{- end }}
|
||||
|
||||
ingress:
|
||||
apiServer:
|
||||
enabled: true
|
||||
@@ -58,3 +68,12 @@ ingress:
|
||||
- name: {{ .Env.AIRFLOW_HOST }}
|
||||
tls:
|
||||
enabled: true
|
||||
|
||||
# Security contexts for shared file system access
|
||||
securityContexts:
|
||||
pod:
|
||||
runAsUser: 1000
|
||||
runAsGroup: 0
|
||||
fsGroup: 100
|
||||
container:
|
||||
allowPrivilegeEscalation: false
|
||||
|
||||
11
airflow/dags-pvc.yaml
Normal file
11
airflow/dags-pvc.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: airflow-dags-pvc
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany # Multiple pods can read/write
|
||||
storageClassName: longhorn # Explicitly use Longhorn which supports RWX
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
@@ -1,9 +1,14 @@
|
||||
set fallback := true
|
||||
|
||||
export AIRFLOW_NAMESPACE := env("AIRFLOW_NAMESPACE", "airflow")
|
||||
export AIRFLOW_NAMESPACE := env("AIRFLOW_NAMESPACE", "jupyter")
|
||||
export AIRFLOW_CHART_VERSION := env("AIRFLOW_CHART_VERSION", "1.18.0")
|
||||
export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets")
|
||||
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
|
||||
export AIRFLOW_DAGS_PERSISTENCE_ENABLED := env("AIRFLOW_DAGS_PERSISTENCE_ENABLED", "")
|
||||
export AIRFLOW_DAGS_STORAGE_TYPE := env("AIRFLOW_DAGS_STORAGE_TYPE", "")
|
||||
export AIRFLOW_NFS_IP := env("AIRFLOW_NFS_IP", "")
|
||||
export AIRFLOW_NFS_PATH := env("AIRFLOW_NFS_PATH", "")
|
||||
export AIRFLOW_DAGS_STORAGE_SIZE := env("AIRFLOW_DAGS_STORAGE_SIZE", "10Gi")
|
||||
|
||||
[private]
|
||||
default:
|
||||
@@ -18,7 +23,7 @@ add-helm-repo:
|
||||
remove-helm-repo:
|
||||
helm repo remove apache-airflow
|
||||
|
||||
# Create Airflow namespace
|
||||
# Create namespace (shared with JupyterHub when using jupyter namespace)
|
||||
create-namespace:
|
||||
@kubectl get namespace ${AIRFLOW_NAMESPACE} &>/dev/null || \
|
||||
kubectl create namespace ${AIRFLOW_NAMESPACE}
|
||||
@@ -247,6 +252,17 @@ install:
|
||||
--placeholder="e.g., airflow.example.com"
|
||||
)
|
||||
done
|
||||
if [ -z "${AIRFLOW_DAGS_PERSISTENCE_ENABLED}" ]; then
|
||||
if gum confirm "Enable DAG persistence with PVC?"; then
|
||||
AIRFLOW_DAGS_PERSISTENCE_ENABLED="true"
|
||||
else
|
||||
AIRFLOW_DAGS_PERSISTENCE_ENABLED="false"
|
||||
fi
|
||||
fi
|
||||
# Force default storage type (NFS disabled due to permission issues)
|
||||
if [ "${AIRFLOW_DAGS_PERSISTENCE_ENABLED}" = "true" ]; then
|
||||
AIRFLOW_DAGS_STORAGE_TYPE="default"
|
||||
fi
|
||||
echo "Installing Airflow..."
|
||||
just create-namespace
|
||||
just setup-database
|
||||
@@ -254,7 +270,10 @@ install:
|
||||
just create-keycloak-roles
|
||||
just add-helm-repo
|
||||
|
||||
# Create API server config ConfigMap
|
||||
if [ "${AIRFLOW_DAGS_PERSISTENCE_ENABLED}" = "true" ]; then
|
||||
just setup-dags-storage "default"
|
||||
fi
|
||||
|
||||
KEYCLOAK_HOST=${KEYCLOAK_HOST} KEYCLOAK_REALM=${KEYCLOAK_REALM} \
|
||||
gomplate -f webserver_config.py.gomplate -o webserver_config.py
|
||||
kubectl delete configmap airflow-api-server-config -n ${AIRFLOW_NAMESPACE} --ignore-not-found
|
||||
@@ -268,6 +287,14 @@ install:
|
||||
-f airflow-values.yaml
|
||||
echo "Airflow installation completed"
|
||||
echo "Access Airflow at: https://${AIRFLOW_HOST}"
|
||||
if [ "${AIRFLOW_NAMESPACE}" = "jupyter" ] && [ "${AIRFLOW_DAGS_PERSISTENCE_ENABLED}" = "true" ]; then
|
||||
echo ""
|
||||
echo "📝 JupyterHub Integration Notes:"
|
||||
echo " • If JupyterHub is already installed with DAG mounting enabled:"
|
||||
echo " Restart user pods to access DAGs: kubectl delete pods -n jupyter -l app.kubernetes.io/component=singleuser-server"
|
||||
echo " • If JupyterHub will be installed later:"
|
||||
echo " Enable 'Airflow DAG storage mounting' during JupyterHub installation"
|
||||
fi
|
||||
|
||||
# Uninstall Airflow
|
||||
uninstall delete-db='true':
|
||||
@@ -394,6 +421,35 @@ delete-api-user username='':
|
||||
echo "Deletion cancelled"
|
||||
fi
|
||||
|
||||
# Setup DAG storage (PVC)
|
||||
setup-dags-storage storage-type='':
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
echo "Setting up DAG storage (default)..."
|
||||
echo "Creating PersistentVolumeClaim with default StorageClass..."
|
||||
kubectl apply -n ${AIRFLOW_NAMESPACE} -f dags-pvc.yaml
|
||||
echo "✅ Default storage configured"
|
||||
echo " PVC: airflow-dags-pvc"
|
||||
echo " Uses cluster default StorageClass (k3s local-path, etc.)"
|
||||
echo ""
|
||||
echo "DAG storage is ready for use"
|
||||
echo "Mount path in pods: /opt/airflow/dags"
|
||||
echo ""
|
||||
if [ "${AIRFLOW_NAMESPACE}" = "jupyter" ]; then
|
||||
echo "📝 JupyterHub Integration:"
|
||||
echo " Since Airflow is in the 'jupyter' namespace, JupyterHub can mount this PVC"
|
||||
echo " Enable 'Airflow DAG storage mounting' when installing JupyterHub"
|
||||
echo " DAGs will be available at: /opt/airflow-dags in notebooks"
|
||||
fi
|
||||
|
||||
# Delete DAG storage
|
||||
delete-dags-storage:
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
echo "Deleting DAG storage resources..."
|
||||
kubectl delete pvc airflow-dags-pvc -n ${AIRFLOW_NAMESPACE} --ignore-not-found
|
||||
echo "✅ DAG storage deleted"
|
||||
|
||||
# Clean up database and secrets
|
||||
cleanup:
|
||||
#!/bin/bash
|
||||
|
||||
@@ -142,6 +142,7 @@ RUN mamba install --yes \
|
||||
|
||||
RUN pip install \
|
||||
agno \
|
||||
apache-airflow-client \
|
||||
fastembed \
|
||||
feature-engine \
|
||||
jupyter-ai \
|
||||
|
||||
@@ -142,6 +142,7 @@ RUN mamba install --yes \
|
||||
|
||||
RUN pip install \
|
||||
agno \
|
||||
apache-airflow-client \
|
||||
fastembed \
|
||||
feature-engine \
|
||||
jupyter-ai \
|
||||
|
||||
7
jupyterhub/jupyter-nfs-storage-class.yaml
Normal file
7
jupyterhub/jupyter-nfs-storage-class.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: jupyter-nfs-static
|
||||
provisioner: kubernetes.io/no-provisioner
|
||||
volumeBindingMode: WaitForFirstConsumer
|
||||
reclaimPolicy: Retain
|
||||
@@ -18,11 +18,13 @@ hub:
|
||||
#!/bin/bash
|
||||
pip install --no-cache-dir hvac==2.3.0
|
||||
exec jupyterhub --config /usr/local/etc/jupyterhub/jupyterhub_config.py --upgrade-db
|
||||
{{- if .Env.USER_POLICY_HCL }}
|
||||
user_policy.hcl:
|
||||
mountPath: /srv/jupyterhub/user_policy.hcl
|
||||
mode: 0644
|
||||
stringData: |
|
||||
{{ .Env.USER_POLICY_HCL | strings.Indent 8 }}
|
||||
{{- end }}
|
||||
pre_spawn_hook.py:
|
||||
mountPath: /srv/jupyterhub/pre_spawn_hook.py
|
||||
mode: 0644
|
||||
@@ -152,6 +154,34 @@ singleuser:
|
||||
NOTEBOOK_VAULT_TOKEN_TTL: "{{ .Env.NOTEBOOK_VAULT_TOKEN_TTL }}"
|
||||
NOTEBOOK_VAULT_TOKEN_MAX_TTL: "{{ .Env.NOTEBOOK_VAULT_TOKEN_MAX_TTL }}"
|
||||
|
||||
storage:
|
||||
{{ if env.Getenv "PVC_NAME" -}}
|
||||
type: static
|
||||
static:
|
||||
pvcName: {{ .Env.PVC_NAME }}
|
||||
{{ else -}}
|
||||
type: dynamic
|
||||
dynamic:
|
||||
{{ if env.Getenv "JUPYTERHUB_STORAGE_CLASS" -}}
|
||||
storageClass: {{ .Env.JUPYTERHUB_STORAGE_CLASS }}
|
||||
{{ end -}}
|
||||
storageAccessModes:
|
||||
- ReadWriteOnce
|
||||
{{ end -}}
|
||||
capacity: 10Gi
|
||||
{{- if eq .Env.JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED "true" }}
|
||||
# Mount Airflow DAGs when both are in the same namespace (jupyter)
|
||||
extraVolumes:
|
||||
- name: airflow-dags
|
||||
persistentVolumeClaim:
|
||||
claimName: airflow-dags-pvc
|
||||
optional: true # Don't fail if PVC doesn't exist yet
|
||||
extraVolumeMounts:
|
||||
- name: airflow-dags
|
||||
mountPath: /opt/airflow-dags
|
||||
readOnly: false
|
||||
{{- end }}
|
||||
|
||||
networkPolicy:
|
||||
egress:
|
||||
- to:
|
||||
|
||||
@@ -8,7 +8,8 @@ export JUPYTERHUB_OIDC_CLIENT_SESSION_MAX := env("JUPYTERHUB_OIDC_CLIENT_SESSION
|
||||
export JUPYTERHUB_NFS_PV_ENABLED := env("JUPYTERHUB_NFS_PV_ENABLED", "")
|
||||
export JUPYTERHUB_STORAGE_CLASS := env("JUPYTERHUB_STORAGE_CLASS", "")
|
||||
export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "")
|
||||
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-34")
|
||||
export JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED := env("JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED", "")
|
||||
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-36")
|
||||
export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook")
|
||||
export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook")
|
||||
export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false")
|
||||
@@ -28,6 +29,7 @@ export JUPYTER_BUUNSTACK_LOG_LEVEL := env("JUPYTER_BUUNSTACK_LOG_LEVEL", "warnin
|
||||
export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500")
|
||||
export SPARK_DOWNLOAD_URL := env("SPARK_DOWNLOAD_URL", "https://dlcdn.apache.org/spark/")
|
||||
export SPARK_VERSION := env("SPARK_VERSION", "4.0.1")
|
||||
export AIRFLOW_DAGS_STORAGE_SIZE := env("AIRFLOW_DAGS_STORAGE_SIZE", "10Gi")
|
||||
export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn")
|
||||
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
|
||||
export VAULT_HOST := env("VAULT_HOST", "")
|
||||
@@ -114,12 +116,33 @@ install root_token='':
|
||||
)
|
||||
done
|
||||
PVC_NAME=jupyter-nfs-pvc
|
||||
# Create StorageClass for NFS static provisioning
|
||||
if ! kubectl get storageclass jupyter-nfs-static &>/dev/null; then
|
||||
kubectl apply -f jupyter-nfs-storage-class.yaml
|
||||
fi
|
||||
if ! kubectl get pv jupyter-nfs-pv &>/dev/null; then
|
||||
gomplate -f nfs-pv.gomplate.yaml | kubectl apply -f -
|
||||
fi
|
||||
kubectl apply -n ${JUPYTERHUB_NAMESPACE} -f nfs-pvc.yaml
|
||||
fi
|
||||
|
||||
# Setup Airflow DAG storage sharing (same namespace)
|
||||
if [ -z "${JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED}" ]; then
|
||||
if gum confirm "Enable Airflow DAG storage mounting (requires Airflow in same namespace)?"; then
|
||||
JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED="true"
|
||||
else
|
||||
JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED="false"
|
||||
fi
|
||||
fi
|
||||
if [ "${JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED}" = "true" ]; then
|
||||
echo "✅ Airflow DAG mounting enabled"
|
||||
echo " Note: Airflow must be installed in the same namespace (jupyter)"
|
||||
echo " PVC: airflow-dags-pvc will be mounted at /opt/airflow-dags"
|
||||
echo ""
|
||||
echo " ⚠️ If you install Airflow AFTER JupyterHub, restart user pods to mount DAGs:"
|
||||
echo " kubectl delete pods -n jupyter -l app.kubernetes.io/component=singleuser-server"
|
||||
fi
|
||||
|
||||
# Setup Vault Agent for automatic token management
|
||||
if [ -z "${JUPYTERHUB_VAULT_INTEGRATION_ENABLED}" ]; then
|
||||
if gum confirm "Are you going to enable Vault integration?"; then
|
||||
@@ -174,7 +197,7 @@ uninstall:
|
||||
kubectl patch pv jupyter-nfs-pv -p '{"spec":{"claimRef":null}}'
|
||||
fi
|
||||
|
||||
# Delete JupyterHub PV
|
||||
# Delete JupyterHub PV and StorageClass
|
||||
delete-pv:
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
@@ -182,6 +205,7 @@ delete-pv:
|
||||
kubectl patch pv jupyter-nfs-pv -p '{"spec":{"claimRef":null}}'
|
||||
kubectl delete pv jupyter-nfs-pv
|
||||
fi
|
||||
kubectl delete storageclass jupyter-nfs-static --ignore-not-found
|
||||
|
||||
# Build Jupyter notebook kernel images
|
||||
build-kernel-images:
|
||||
|
||||
@@ -2,13 +2,16 @@ apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: jupyter-nfs-pv
|
||||
labels:
|
||||
type: jupyter-nfs
|
||||
app: jupyterhub
|
||||
spec:
|
||||
capacity:
|
||||
storage: 10Gi
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
storageClassName: longhorn
|
||||
storageClassName: jupyter-nfs-static
|
||||
volumeMode: Filesystem
|
||||
nfs:
|
||||
server: {{ .Env.JUPYTER_NFS_IP }}
|
||||
|
||||
@@ -8,4 +8,9 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
storageClassName: jupyter-nfs-static
|
||||
selector:
|
||||
matchLabels:
|
||||
type: jupyter-nfs
|
||||
app: jupyterhub
|
||||
volumeName: jupyter-nfs-pv
|
||||
|
||||
Reference in New Issue
Block a user