Files
buun-stack/jupyterhub/justfile
2025-09-08 14:06:35 +09:00

276 lines
12 KiB
Makefile

set fallback := true
export JUPYTERHUB_NAMESPACE := env("JUPYTERHUB_NAMESPACE", "jupyter")
export JUPYTERHUB_CHART_VERSION := env("JUPYTERHUB_CHART_VERSION", "4.2.0")
export JUPYTERHUB_OIDC_CLIENT_ID := env("JUPYTERHUB_OIDC_CLIENT_ID", "jupyterhub")
export JUPYTERHUB_OIDC_CLIENT_SESSION_IDLE := env("JUPYTERHUB_OIDC_CLIENT_SESSION_IDLE", "86400")
export JUPYTERHUB_OIDC_CLIENT_SESSION_MAX := env("JUPYTERHUB_OIDC_CLIENT_SESSION_MAX", "604800")
export JUPYTERHUB_NFS_PV_ENABLED := env("JUPYTERHUB_NFS_PV_ENABLED", "")
export JUPYTERHUB_STORAGE_CLASS := env("JUPYTERHUB_STORAGE_CLASS", "")
export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "")
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-28")
export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook")
export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook")
export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false")
export JUPYTER_PROFILE_BASE_ENABLED := env("JUPYTER_PROFILE_BASE_ENABLED", "false")
export JUPYTER_PROFILE_DATASCIENCE_ENABLED := env("JUPYTER_PROFILE_DATASCIENCE_ENABLED", "true")
export JUPYTER_PROFILE_PYSPARK_ENABLED := env("JUPYTER_PROFILE_PYSPARK_ENABLED", "false")
export JUPYTER_PROFILE_PYTORCH_ENABLED := env("JUPYTER_PROFILE_PYTORCH_ENABLED", "false")
export JUPYTER_PROFILE_TENSORFLOW_ENABLED := env("JUPYTER_PROFILE_TENSORFLOW_ENABLED", "false")
export JUPYTER_PROFILE_BUUN_STACK_ENABLED := env("JUPYTER_PROFILE_BUUN_STACK_ENABLED", "false")
export JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED := env("JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED", "false")
export JUPYTERHUB_VAULT_TOKEN_TTL := env("JUPYTERHUB_VAULT_TOKEN_TTL", "24h")
export JUPYTERHUB_VAULT_TOKEN_MAX_TTL := env("JUPYTERHUB_VAULT_TOKEN_MAX_TTL", "720h")
export NOTEBOOK_VAULT_TOKEN_TTL := env("NOTEBOOK_VAULT_TOKEN_TTL", "24h")
export NOTEBOOK_VAULT_TOKEN_MAX_TTL := env("NOTEBOOK_VAULT_TOKEN_MAX_TTL", "168h")
export VAULT_AGENT_LOG_LEVEL := env("VAULT_AGENT_LOG_LEVEL", "info")
export JUPYTER_BUUNSTACK_LOG_LEVEL := env("JUPYTER_BUUNSTACK_LOG_LEVEL", "warning")
export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500")
export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn")
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
export VAULT_HOST := env("VAULT_HOST", "")
export VAULT_ADDR := "https://" + VAULT_HOST
[private]
default:
@just --list --unsorted --list-submodules
# Add Helm repository
add-helm-repo:
helm repo add jupyterhub https://jupyterhub.github.io/helm-chart
helm repo update
# Remove Helm repository
remove-helm-repo:
helm repo remove jupyterhub
# Create JupyterHub namespace
create-namespace:
kubectl get namespace ${JUPYTERHUB_NAMESPACE} &>/dev/null || \
kubectl create namespace ${JUPYTERHUB_NAMESPACE}
# Delete JupyterHub namespace
delete-namespace:
kubectl delete namespace ${JUPYTERHUB_NAMESPACE} --ignore-not-found
# Install JupyterHub
install:
#!/bin/bash
set -euo pipefail
export JUPYTERHUB_HOST=${JUPYTERHUB_HOST:-}
while [ -z "${JUPYTERHUB_HOST}" ]; do
JUPYTERHUB_HOST=$(
gum input --prompt="JupyterHub host (FQDN): " --width=100 \
--placeholder="e.g., jupyter.example.com"
)
done
# Generate JUPYTERHUB_CRYPT_KEY if not exists
if [ -z "${JUPYTERHUB_CRYPT_KEY:-}" ]; then
echo "Generating JUPYTERHUB_CRYPT_KEY..."
export JUPYTERHUB_CRYPT_KEY=$(just utils::random-password)
echo "JUPYTERHUB_CRYPT_KEY=${JUPYTERHUB_CRYPT_KEY}" >> ../../.env.local
echo "✓ JUPYTERHUB_CRYPT_KEY generated and saved to .env.local"
fi
just create-namespace
# just k8s::copy-regcred ${JUPYTERHUB_NAMESPACE}
just keycloak::create-client ${KEYCLOAK_REALM} ${JUPYTERHUB_OIDC_CLIENT_ID} \
"https://${JUPYTERHUB_HOST}/hub/oauth_callback" \
"" "${JUPYTERHUB_OIDC_CLIENT_SESSION_IDLE}" "${JUPYTERHUB_OIDC_CLIENT_SESSION_MAX}"
just add-helm-repo
export JUPYTERHUB_OIDC_CLIENT_ID=${JUPYTERHUB_OIDC_CLIENT_ID}
export KEYCLOAK_REALM=${KEYCLOAK_REALM}
export JUPYTER_PYTHON_KERNEL_TAG=${JUPYTER_PYTHON_KERNEL_TAG}
export JUPYTER_FSGID=${JUPYTER_FSGID:-100}
export PVC_NAME=""
if [ -z "${JUPYTERHUB_NFS_PV_ENABLED}" ]; then
if gum confirm "Are you going to use NFS PV?"; then
JUPYTERHUB_NFS_PV_ENABLED=true
else
JUPYTERHUB_NFS_PV_ENABLED=false
fi
fi
if [ "${JUPYTERHUB_NFS_PV_ENABLED}" = "true" ]; then
if ! helm status longhorn -n ${LONGHORN_NAMESPACE} &>/dev/null; then
echo "Longhorn is not installed. Please install Longhorn first." >&2
exit 1
fi
JUPYTERHUB_STORAGE_CLASS=${JUPYTERHUB_STORAGE_CLASS:-longhorn}
export JUPYTER_NFS_IP=${JUPYTER_NFS_IP:-}
while [ -z "${JUPYTER_NFS_IP}" ]; do
JUPYTER_NFS_IP=$(
gum input --prompt="NFS server IP address: " --width=100 \
--placeholder="e.g., 192.168.10.1"
)
done
export JUPYTER_NFS_PATH=${JUPYTER_NFS_PATH:-}
while [ -z "${JUPYTER_NFS_PATH}" ]; do
JUPYTER_NFS_PATH=$(
gum input --prompt="NFS server export path: " --width=100 \
--placeholder="e.g., /volume1/drive1/jupyter"
)
done
PVC_NAME=jupyter-nfs-pvc
if ! kubectl get pv jupyter-nfs-pv &>/dev/null; then
gomplate -f nfs-pv.gomplate.yaml | kubectl apply -f -
fi
kubectl apply -n ${JUPYTERHUB_NAMESPACE} -f nfs-pvc.yaml
fi
# Setup Vault Agent for automatic token management
if [ -z "${JUPYTERHUB_VAULT_INTEGRATION_ENABLED}" ]; then
if gum confirm "Are you going to enable Vault integration?"; then
JUPYTERHUB_VAULT_INTEGRATION_ENABLED=true
else
JUPYTERHUB_VAULT_INTEGRATION_ENABLED=false
fi
fi
if [ "${JUPYTERHUB_VAULT_INTEGRATION_ENABLED}" = "true" ]; then
echo "Setting up Vault Agent for automatic token management..."
echo " Token TTL: ${JUPYTERHUB_VAULT_TOKEN_TTL}"
echo " Token Max TTL: ${JUPYTERHUB_VAULT_TOKEN_MAX_TTL}"
just setup-vault-integration
# Read user policy template for Vault
export USER_POLICY_HCL=$(cat user_policy.hcl)
else
echo "Vault integration disabled - deploying without Vault support"
export USER_POLICY_HCL=""
fi
# https://z2jh.jupyter.org/en/stable/
gomplate -f jupyterhub-values.gomplate.yaml -o jupyterhub-values.yaml
helm upgrade --cleanup-on-fail --install jupyterhub jupyterhub/jupyterhub \
--version ${JUPYTERHUB_CHART_VERSION} -n ${JUPYTERHUB_NAMESPACE} \
--timeout=20m -f jupyterhub-values.yaml
# wait deployments manually because `helm upgrade --wait` does not work for JupyterHub
just k8s::wait-deployments-ready ${JUPYTERHUB_NAMESPACE} hub proxy
# Uninstall JupyterHub
uninstall:
#!/bin/bash
set -euo pipefail
helm uninstall jupyterhub -n ${JUPYTERHUB_NAMESPACE} --wait --ignore-not-found
kubectl delete pods -n ${JUPYTERHUB_NAMESPACE} -l app.kubernetes.io/component=singleuser-server
kubectl delete -n ${JUPYTERHUB_NAMESPACE} pvc jupyter-nfs-pvc --ignore-not-found
if kubectl get pv jupyter-nfs-pv &>/dev/null; then
kubectl patch pv jupyter-nfs-pv -p '{"spec":{"claimRef":null}}'
fi
# Delete JupyterHub PV
delete-pv:
#!/bin/bash
set -euo pipefail
if kubectl get pv jupyter-nfs-pv &>/dev/null; then
kubectl patch pv jupyter-nfs-pv -p '{"spec":{"claimRef":null}}'
kubectl delete pv jupyter-nfs-pv
fi
# Build Jupyter notebook kernel images
build-kernel-images:
#!/bin/bash
set -euxo pipefail
(
cd ../python-package
rm -rf dist/ build/ *.egg-info/
SETUPTOOLS_SCM_PRETEND_VERSION_FOR_BUUNSTACK=0.1.0 python -m build --wheel
)
(
cd ./images/datastack-notebook
cp ../../../python-package/dist/*.whl ./
docker build -t \
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
--build-arg spark_version="3.5.4" \
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \
.
)
rm -f ./images/datastack-notebook/*.whl
if [ "${JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED}" = "true" ]; then
(
cd ./images/datastack-cuda-notebook
cp ../../../python-package/dist/*.whl ./
docker build -t \
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
--build-arg spark_version="3.5.4" \
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \
.
)
rm -f ./images/datastack-cuda-notebook/*.whl
fi
# Push Jupyter notebook kernel images
push-kernel-images:
#!/bin/bash
set -euo pipefail
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
if [ "${JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED}" = "true" ]; then
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
fi
# Setup Vault integration for JupyterHub (user-specific tokens + auto-renewal)
setup-vault-integration root_token='':
#!/bin/bash
set -euo pipefail
echo "Setting up Vault integration for JupyterHub..."
# Create Kubernetes role for JupyterHub in Vault
echo "Creating Kubernetes authentication role for JupyterHub..."
echo " Service Account: hub"
echo " Namespace: jupyter"
echo " Policies: admin"
echo " TTL: ${JUPYTERHUB_VAULT_TOKEN_TTL}"
echo " Max TTL: ${JUPYTERHUB_VAULT_TOKEN_MAX_TTL}"
export VAULT_TOKEN="{{ root_token }}"
while [ -z "${VAULT_TOKEN}" ]; do
VAULT_TOKEN=$(gum input --prompt="Vault root token: " --password --width=100)
done
vault write auth/kubernetes/role/jupyterhub \
bound_service_account_names=hub \
bound_service_account_namespaces=jupyter \
policies=admin \
ttl=${JUPYTERHUB_VAULT_TOKEN_TTL} \
max_ttl=${JUPYTERHUB_VAULT_TOKEN_MAX_TTL}
# Create Vault Agent configuration with gomplate
echo "Creating Vault Agent configuration..."
gomplate -f vault-agent-config.gomplate.hcl -o vault-agent-config.hcl
kubectl create configmap vault-agent-config -n ${JUPYTERHUB_NAMESPACE} \
--from-file=agent.hcl=vault-agent-config.hcl \
--from-file=token-monitor.tpl=token-monitor.tpl \
--dry-run=client -o yaml | kubectl apply -f -
echo "✓ Vault integration configured (user-specific tokens + auto-renewal)"
echo ""
echo "Configuration Summary:"
echo " JupyterHub Token TTL: ${JUPYTERHUB_VAULT_TOKEN_TTL}"
echo " JupyterHub Token Max TTL: ${JUPYTERHUB_VAULT_TOKEN_MAX_TTL}"
echo " User Token TTL: ${NOTEBOOK_VAULT_TOKEN_TTL}"
echo " User Token Max TTL: ${NOTEBOOK_VAULT_TOKEN_MAX_TTL}"
echo " Vault Agent Log Level: ${VAULT_AGENT_LOG_LEVEL}"
echo " Auto-renewal: Every $(( $(echo ${JUPYTERHUB_VAULT_TOKEN_TTL} | sed 's/m/*60/g; s/h/*3600/g; s/s//g' | bc) / 2 ))s (TTL/2)"
echo ""
echo "Users can now access Vault from notebooks using:"
echo " from buunstack import SecretStore"
echo " secrets = SecretStore()"
echo " # Each user gets their own isolated Vault token and policy"
echo " # Admin token is automatically renewed by Vault Agent"
# Create JupyterHub Vault token (uses admin policy for JWT operations)
create-jupyterhub-vault-token:
#!/bin/bash
set -euo pipefail
echo "Creating JupyterHub Vault token with admin policy..."
echo " TTL: ${JUPYTERHUB_VAULT_TOKEN_TTL}"
echo " Max TTL: ${JUPYTERHUB_VAULT_TOKEN_MAX_TTL}"
# JupyterHub needs admin privileges to read Keycloak credentials from Vault
# Create token and store in Vault
just vault::create-token-and-store admin jupyterhub/vault-token ${JUPYTERHUB_VAULT_TOKEN_TTL} ${JUPYTERHUB_VAULT_TOKEN_MAX_TTL}
echo "✓ JupyterHub Vault token created and stored"
echo ""
echo "To use in JupyterHub deployment:"
echo " JUPYTERHUB_VAULT_TOKEN=\$(just vault::get jupyterhub/vault-token token)"