363 lines
16 KiB
Makefile
363 lines
16 KiB
Makefile
set fallback := true
|
|
|
|
export JUPYTERHUB_NAMESPACE := env("JUPYTERHUB_NAMESPACE", "datastack")
|
|
export JUPYTERHUB_CHART_VERSION := env("JUPYTERHUB_CHART_VERSION", "4.2.0")
|
|
export JUPYTERHUB_OIDC_CLIENT_ID := env("JUPYTERHUB_OIDC_CLIENT_ID", "jupyterhub")
|
|
export JUPYTERHUB_OIDC_CLIENT_SESSION_IDLE := env("JUPYTERHUB_OIDC_CLIENT_SESSION_IDLE", "86400")
|
|
export JUPYTERHUB_OIDC_CLIENT_SESSION_MAX := env("JUPYTERHUB_OIDC_CLIENT_SESSION_MAX", "604800")
|
|
export JUPYTERHUB_NFS_PV_ENABLED := env("JUPYTERHUB_NFS_PV_ENABLED", "")
|
|
export JUPYTERHUB_STORAGE_CLASS := env("JUPYTERHUB_STORAGE_CLASS", "")
|
|
export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "")
|
|
export JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED := env("JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED", "")
|
|
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-41")
|
|
export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook")
|
|
export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook")
|
|
export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false")
|
|
export JUPYTER_PROFILE_BASE_ENABLED := env("JUPYTER_PROFILE_BASE_ENABLED", "false")
|
|
export JUPYTER_PROFILE_DATASCIENCE_ENABLED := env("JUPYTER_PROFILE_DATASCIENCE_ENABLED", "true")
|
|
export JUPYTER_PROFILE_PYSPARK_ENABLED := env("JUPYTER_PROFILE_PYSPARK_ENABLED", "false")
|
|
export JUPYTER_PROFILE_PYTORCH_ENABLED := env("JUPYTER_PROFILE_PYTORCH_ENABLED", "false")
|
|
export JUPYTER_PROFILE_TENSORFLOW_ENABLED := env("JUPYTER_PROFILE_TENSORFLOW_ENABLED", "false")
|
|
export JUPYTER_PROFILE_BUUN_STACK_ENABLED := env("JUPYTER_PROFILE_BUUN_STACK_ENABLED", "false")
|
|
export JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED := env("JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED", "false")
|
|
export JUPYTERHUB_VAULT_TOKEN_TTL := env("JUPYTERHUB_VAULT_TOKEN_TTL", "24h")
|
|
export NOTEBOOK_VAULT_TOKEN_TTL := env("NOTEBOOK_VAULT_TOKEN_TTL", "24h")
|
|
export NOTEBOOK_VAULT_TOKEN_MAX_TTL := env("NOTEBOOK_VAULT_TOKEN_MAX_TTL", "168h")
|
|
export JUPYTERHUB_CULL_MAX_AGE := env("JUPYTERHUB_CULL_MAX_AGE", "604800")
|
|
export VAULT_AGENT_LOG_LEVEL := env("VAULT_AGENT_LOG_LEVEL", "info")
|
|
export JUPYTER_BUUNSTACK_LOG_LEVEL := env("JUPYTER_BUUNSTACK_LOG_LEVEL", "warning")
|
|
export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500")
|
|
export SPARK_DOWNLOAD_URL := env("SPARK_DOWNLOAD_URL", "https://dlcdn.apache.org/spark/")
|
|
export SPARK_VERSION := env("SPARK_VERSION", "4.0.1")
|
|
export PIP_REPOSITORY_URL := env("PIP_REPOSITORY_URL", "https://pypi.org/simple/")
|
|
export AIRFLOW_DAGS_STORAGE_SIZE := env("AIRFLOW_DAGS_STORAGE_SIZE", "10Gi")
|
|
export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn")
|
|
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
|
|
export VAULT_HOST := env("VAULT_HOST", "")
|
|
export VAULT_ADDR := "https://" + VAULT_HOST
|
|
|
|
[private]
|
|
default:
|
|
@just --list --unsorted --list-submodules
|
|
|
|
# Add Helm repository
|
|
add-helm-repo:
|
|
helm repo add jupyterhub https://jupyterhub.github.io/helm-chart
|
|
helm repo update
|
|
|
|
# Remove Helm repository
|
|
remove-helm-repo:
|
|
helm repo remove jupyterhub
|
|
|
|
# Create JupyterHub namespace
|
|
create-namespace:
|
|
kubectl get namespace ${JUPYTERHUB_NAMESPACE} &>/dev/null || \
|
|
kubectl create namespace ${JUPYTERHUB_NAMESPACE}
|
|
|
|
# Delete JupyterHub namespace
|
|
delete-namespace:
|
|
kubectl delete namespace ${JUPYTERHUB_NAMESPACE} --ignore-not-found
|
|
|
|
# Install JupyterHub
|
|
install root_token='':
|
|
#!/bin/bash
|
|
set -euo pipefail
|
|
export JUPYTERHUB_HOST=${JUPYTERHUB_HOST:-}
|
|
while [ -z "${JUPYTERHUB_HOST}" ]; do
|
|
JUPYTERHUB_HOST=$(
|
|
gum input --prompt="JupyterHub host (FQDN): " --width=100 \
|
|
--placeholder="e.g., jupyter.example.com"
|
|
)
|
|
done
|
|
|
|
# Generate JUPYTERHUB_CRYPT_KEY if not exists
|
|
if [ -z "${JUPYTERHUB_CRYPT_KEY:-}" ]; then
|
|
echo "Generating JUPYTERHUB_CRYPT_KEY..."
|
|
export JUPYTERHUB_CRYPT_KEY=$(just utils::random-password)
|
|
echo "JUPYTERHUB_CRYPT_KEY=${JUPYTERHUB_CRYPT_KEY}" >> ../../.env.local
|
|
echo "✓ JUPYTERHUB_CRYPT_KEY generated and saved to .env.local"
|
|
fi
|
|
|
|
just create-namespace
|
|
# just k8s::copy-regcred ${JUPYTERHUB_NAMESPACE}
|
|
just keycloak::create-client ${KEYCLOAK_REALM} ${JUPYTERHUB_OIDC_CLIENT_ID} \
|
|
"https://${JUPYTERHUB_HOST}/hub/oauth_callback" \
|
|
"" "${JUPYTERHUB_OIDC_CLIENT_SESSION_IDLE}" "${JUPYTERHUB_OIDC_CLIENT_SESSION_MAX}"
|
|
just add-helm-repo
|
|
export JUPYTERHUB_OIDC_CLIENT_ID=${JUPYTERHUB_OIDC_CLIENT_ID}
|
|
export KEYCLOAK_REALM=${KEYCLOAK_REALM}
|
|
export JUPYTER_PYTHON_KERNEL_TAG=${JUPYTER_PYTHON_KERNEL_TAG}
|
|
export JUPYTER_FSGID=${JUPYTER_FSGID:-100}
|
|
export PVC_NAME=""
|
|
if [ -z "${JUPYTERHUB_NFS_PV_ENABLED}" ]; then
|
|
if gum confirm "Are you going to use NFS PV?"; then
|
|
JUPYTERHUB_NFS_PV_ENABLED=true
|
|
else
|
|
JUPYTERHUB_NFS_PV_ENABLED=false
|
|
fi
|
|
fi
|
|
if [ "${JUPYTERHUB_NFS_PV_ENABLED}" = "true" ]; then
|
|
if ! helm status longhorn -n ${LONGHORN_NAMESPACE} &>/dev/null; then
|
|
echo "Longhorn is not installed. Please install Longhorn first." >&2
|
|
exit 1
|
|
fi
|
|
JUPYTERHUB_STORAGE_CLASS=${JUPYTERHUB_STORAGE_CLASS:-longhorn}
|
|
export JUPYTER_NFS_IP=${JUPYTER_NFS_IP:-}
|
|
while [ -z "${JUPYTER_NFS_IP}" ]; do
|
|
JUPYTER_NFS_IP=$(
|
|
gum input --prompt="NFS server IP address: " --width=100 \
|
|
--placeholder="e.g., 192.168.10.1"
|
|
)
|
|
done
|
|
export JUPYTER_NFS_PATH=${JUPYTER_NFS_PATH:-}
|
|
while [ -z "${JUPYTER_NFS_PATH}" ]; do
|
|
JUPYTER_NFS_PATH=$(
|
|
gum input --prompt="NFS server export path: " --width=100 \
|
|
--placeholder="e.g., /volume1/drive1/jupyter"
|
|
)
|
|
done
|
|
PVC_NAME=jupyter-nfs-pvc
|
|
# Create StorageClass for NFS static provisioning
|
|
if ! kubectl get storageclass jupyter-nfs-static &>/dev/null; then
|
|
kubectl apply -f jupyter-nfs-storage-class.yaml
|
|
fi
|
|
if ! kubectl get pv jupyter-nfs-pv &>/dev/null; then
|
|
gomplate -f nfs-pv.gomplate.yaml | kubectl apply -f -
|
|
fi
|
|
kubectl apply -n ${JUPYTERHUB_NAMESPACE} -f nfs-pvc.yaml
|
|
fi
|
|
|
|
# Setup Airflow DAG storage sharing (same namespace)
|
|
if [ -z "${JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED}" ]; then
|
|
if gum confirm "Enable Airflow DAG storage mounting (requires Airflow in same namespace)?"; then
|
|
JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED="true"
|
|
else
|
|
JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED="false"
|
|
fi
|
|
fi
|
|
if [ "${JUPYTERHUB_AIRFLOW_DAGS_PERSISTENCE_ENABLED}" = "true" ]; then
|
|
echo "✅ Airflow DAG mounting enabled"
|
|
echo " Note: Airflow must be installed in the same namespace (jupyter)"
|
|
echo " PVC: airflow-dags-pvc will be mounted at /opt/airflow-dags"
|
|
echo ""
|
|
echo " ⚠️ If you install Airflow AFTER JupyterHub, restart user pods to mount DAGs:"
|
|
echo " kubectl delete pods -n jupyter -l app.kubernetes.io/component=singleuser-server"
|
|
fi
|
|
|
|
# Setup Vault Agent for automatic token management
|
|
if [ -z "${JUPYTERHUB_VAULT_INTEGRATION_ENABLED}" ]; then
|
|
if gum confirm "Are you going to enable Vault integration?"; then
|
|
JUPYTERHUB_VAULT_INTEGRATION_ENABLED=true
|
|
else
|
|
JUPYTERHUB_VAULT_INTEGRATION_ENABLED=false
|
|
fi
|
|
fi
|
|
if [ "${JUPYTERHUB_VAULT_INTEGRATION_ENABLED}" = "true" ]; then
|
|
echo "Setting up Vault Agent for automatic token management..."
|
|
echo " Token TTL: ${JUPYTERHUB_VAULT_TOKEN_TTL}"
|
|
export VAULT_TOKEN="{{ root_token }}"
|
|
while [ -z "${VAULT_TOKEN}" ]; do
|
|
VAULT_TOKEN=$(gum input --prompt="Vault root token: " --password --width=100)
|
|
done
|
|
just setup-vault-integration ${VAULT_TOKEN}
|
|
just create-jupyterhub-vault-token ${VAULT_TOKEN}
|
|
|
|
# Create ExternalSecret for admin vault token
|
|
echo "Creating ExternalSecret for admin vault token..."
|
|
gomplate -f jupyterhub-vault-token-external-secret.gomplate.yaml | kubectl apply -f -
|
|
|
|
# Read user policy template for Vault
|
|
export USER_POLICY_HCL=$(cat user_policy.hcl)
|
|
else
|
|
echo "Vault integration disabled - deploying without Vault support"
|
|
export USER_POLICY_HCL=""
|
|
fi
|
|
|
|
# Generate pre_spawn_hook.py
|
|
echo "Generating pre_spawn_hook.py..."
|
|
gomplate -f pre_spawn_hook.gomplate.py -o pre_spawn_hook.py
|
|
|
|
# https://z2jh.jupyter.org/en/stable/
|
|
gomplate -f jupyterhub-values.gomplate.yaml -o jupyterhub-values.yaml
|
|
|
|
helm upgrade --cleanup-on-fail --install jupyterhub jupyterhub/jupyterhub \
|
|
--version ${JUPYTERHUB_CHART_VERSION} -n ${JUPYTERHUB_NAMESPACE} \
|
|
--timeout=20m -f jupyterhub-values.yaml
|
|
# wait deployments manually because `helm upgrade --wait` does not work for JupyterHub
|
|
just k8s::wait-deployments-ready ${JUPYTERHUB_NAMESPACE} hub proxy
|
|
|
|
# Uninstall JupyterHub
|
|
uninstall:
|
|
#!/bin/bash
|
|
set -euo pipefail
|
|
helm uninstall jupyterhub -n ${JUPYTERHUB_NAMESPACE} --wait --ignore-not-found
|
|
kubectl delete pods -n ${JUPYTERHUB_NAMESPACE} -l app.kubernetes.io/component=singleuser-server
|
|
kubectl delete -n ${JUPYTERHUB_NAMESPACE} pvc jupyter-nfs-pvc --ignore-not-found
|
|
kubectl delete -n ${JUPYTERHUB_NAMESPACE} externalsecret jupyterhub-vault-token --ignore-not-found
|
|
if kubectl get pv jupyter-nfs-pv &>/dev/null; then
|
|
kubectl patch pv jupyter-nfs-pv -p '{"spec":{"claimRef":null}}'
|
|
fi
|
|
|
|
# Delete JupyterHub PV and StorageClass
|
|
delete-pv:
|
|
#!/bin/bash
|
|
set -euo pipefail
|
|
if kubectl get pv jupyter-nfs-pv &>/dev/null; then
|
|
kubectl patch pv jupyter-nfs-pv -p '{"spec":{"claimRef":null}}'
|
|
kubectl delete pv jupyter-nfs-pv
|
|
fi
|
|
kubectl delete storageclass jupyter-nfs-static --ignore-not-found
|
|
|
|
# Build Jupyter notebook kernel images
|
|
build-kernel-images:
|
|
#!/bin/bash
|
|
set -euxo pipefail
|
|
(
|
|
cd ../python-package
|
|
rm -rf dist/ build/ *.egg-info/
|
|
SETUPTOOLS_SCM_PRETEND_VERSION_FOR_BUUNSTACK=0.1.0 python -m build --wheel
|
|
)
|
|
(
|
|
cd ./images/datastack-notebook
|
|
cp ../../../python-package/dist/*.whl ./
|
|
DOCKER_BUILDKIT=1 docker build -t \
|
|
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
|
|
--build-arg spark_version="${SPARK_VERSION}" \
|
|
--build-arg spark_download_url="${SPARK_DOWNLOAD_URL}" \
|
|
--build-arg pip_repository_url="${PIP_REPOSITORY_URL}" \
|
|
.
|
|
)
|
|
rm -f ./images/datastack-notebook/*.whl
|
|
if [ "${JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED}" = "true" ]; then
|
|
(
|
|
cd ./images/datastack-cuda-notebook
|
|
cp ../../../python-package/dist/*.whl ./
|
|
DOCKER_BUILDKIT=1 docker build -t \
|
|
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
|
|
--build-arg spark_version="${SPARK_VERSION}" \
|
|
--build-arg spark_download_url="${SPARK_DOWNLOAD_URL}" \
|
|
--build-arg pip_repository_url="${PIP_REPOSITORY_URL}" \
|
|
.
|
|
)
|
|
rm -f ./images/datastack-cuda-notebook/*.whl
|
|
fi
|
|
|
|
# Push Jupyter notebook kernel images
|
|
push-kernel-images:
|
|
#!/bin/bash
|
|
set -euo pipefail
|
|
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
|
|
if [ "${JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED}" = "true" ]; then
|
|
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
|
|
fi
|
|
|
|
# Setup Vault integration for JupyterHub (user-specific tokens + auto-renewal)
|
|
setup-vault-integration root_token='':
|
|
#!/bin/bash
|
|
set -euo pipefail
|
|
export VAULT_TOKEN="{{ root_token }}"
|
|
while [ -z "${VAULT_TOKEN}" ]; do
|
|
VAULT_TOKEN=$(gum input --prompt="Vault root token: " --password --width=100)
|
|
done
|
|
|
|
echo "Setting up Vault integration for JupyterHub..."
|
|
|
|
# Create JupyterHub-specific policy and Kubernetes role in Vault
|
|
echo "Creating JupyterHub-specific Vault policy and Kubernetes role..."
|
|
echo " Service Account: hub"
|
|
echo " Namespace: jupyter"
|
|
echo " Policy: jupyterhub-admin (custom policy with extended max TTL)"
|
|
echo " TTL: ${JUPYTERHUB_VAULT_TOKEN_TTL}"
|
|
|
|
# Create JupyterHub-specific policy
|
|
echo "Creating jupyterhub-admin policy..."
|
|
vault policy write jupyterhub-admin jupyterhub-admin-policy.hcl
|
|
|
|
# Create Kubernetes role (use system-safe max_ttl to avoid warnings)
|
|
echo "Creating Kubernetes role..."
|
|
vault write auth/kubernetes/role/jupyterhub-admin \
|
|
bound_service_account_names=hub \
|
|
bound_service_account_namespaces=jupyter \
|
|
policies=jupyterhub-admin \
|
|
ttl=${JUPYTERHUB_VAULT_TOKEN_TTL} \
|
|
max_ttl=720h
|
|
|
|
# Create ConfigMap with token renewal script
|
|
echo "Creating ConfigMap with token renewal script..."
|
|
kubectl create configmap vault-token-renewer-config -n ${JUPYTERHUB_NAMESPACE} \
|
|
--from-file=vault-token-renewer.sh=vault-token-renewer.sh \
|
|
--dry-run=client -o yaml | kubectl apply -f -
|
|
|
|
echo "✓ Vault integration configured (user-specific tokens + auto-renewal)"
|
|
echo ""
|
|
echo "Configuration Summary:"
|
|
echo " JupyterHub Token TTL: ${JUPYTERHUB_VAULT_TOKEN_TTL}"
|
|
echo " User Token TTL: ${NOTEBOOK_VAULT_TOKEN_TTL}"
|
|
echo " User Token Max TTL: ${NOTEBOOK_VAULT_TOKEN_MAX_TTL}"
|
|
echo " Vault Agent Log Level: ${VAULT_AGENT_LOG_LEVEL}"
|
|
echo " Auto-renewal: Every TTL/2 (minimum 30s) based on actual token TTL"
|
|
echo ""
|
|
echo "Users can now access Vault from notebooks using:"
|
|
echo " from buunstack import SecretStore"
|
|
echo " secrets = SecretStore()"
|
|
echo " # Each user gets their own isolated Vault token and policy"
|
|
echo " # Admin token is automatically renewed by Vault Agent"
|
|
|
|
# Create JupyterHub Vault token (renewable with unlimited Max TTL)
|
|
create-jupyterhub-vault-token root_token='':
|
|
#!/bin/bash
|
|
set -euo pipefail
|
|
export VAULT_TOKEN="{{ root_token }}"
|
|
while [ -z "${VAULT_TOKEN}" ]; do
|
|
VAULT_TOKEN=$(gum input --prompt="Vault root token: " --password --width=100)
|
|
done
|
|
|
|
echo "Creating JupyterHub admin Vault token"
|
|
|
|
# jupyterhub-admin policy should exist (created by setup-vault-integration)
|
|
|
|
# Check if token already exists
|
|
if vault kv get secret/jupyterhub/vault-token >/dev/null 2>&1; then
|
|
echo "Existing admin token found at secret/jupyterhub/vault-token"
|
|
if gum confirm "Replace existing token with new one?"; then
|
|
echo "Creating new admin token..."
|
|
else
|
|
echo "Using existing token"
|
|
return 0
|
|
fi
|
|
fi
|
|
|
|
# Create admin vault token with unlimited max TTL
|
|
echo ""
|
|
echo "Creating admin token (TTL: ${JUPYTERHUB_VAULT_TOKEN_TTL}, Max TTL: unlimited)..."
|
|
TOKEN_RESPONSE=$(vault token create \
|
|
-policy=jupyterhub-admin \
|
|
-ttl=${JUPYTERHUB_VAULT_TOKEN_TTL} \
|
|
-explicit-max-ttl=0 \
|
|
-display-name="jupyterhub-admin" \
|
|
-renewable=true \
|
|
-format=json)
|
|
|
|
# Extract token
|
|
ADMIN_TOKEN=$(echo "$TOKEN_RESPONSE" | jq -r .auth.client_token)
|
|
|
|
if [ -z "$ADMIN_TOKEN" ] || [ "$ADMIN_TOKEN" = "null" ]; then
|
|
echo "❌ Failed to create admin token"
|
|
exit 1
|
|
fi
|
|
|
|
# Store token in Vault for JupyterHub to retrieve
|
|
echo "Storing admin token in Vault..."
|
|
vault kv put secret/jupyterhub/vault-token token="$ADMIN_TOKEN"
|
|
|
|
echo ""
|
|
echo "✅ Admin token created and stored successfully!"
|
|
echo ""
|
|
echo "Token behavior:"
|
|
echo " - TTL: ${JUPYTERHUB_VAULT_TOKEN_TTL} (will expire without renewal)"
|
|
echo " - Max TTL: Unlimited (can be renewed forever)"
|
|
echo " - Vault Agent will renew at TTL/2 intervals (minimum 30s)"
|
|
echo " - No more 30-day limitation!"
|
|
echo ""
|
|
echo "Token stored at: secret/jupyterhub/vault-token"
|