feat(jupyterhub): vault token w/o keycloak auth

This commit is contained in:
Masaki Yatsu
2025-09-03 10:11:06 +09:00
parent 02ec5eb1e2
commit d233373219
15 changed files with 583 additions and 612 deletions

View File

@@ -146,12 +146,6 @@ RUN pip install \
tavily-python \
tweet-preprocessor
# Install buunstack package
COPY *.whl /opt/
RUN pip install /opt/*.whl && \
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
# Install PyTorch with pip (https://pytorch.org/get-started/locally/)
# langchain-openai must be updated to avoid pydantic v2 error
# https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540
@@ -164,6 +158,11 @@ RUN pip install --no-cache-dir --extra-index-url=https://pypi.nvidia.com --index
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
# Install buunstack package
COPY *.whl /opt/
RUN pip install /opt/*.whl && \
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
WORKDIR "${HOME}"
EXPOSE 4040

View File

@@ -146,12 +146,6 @@ RUN pip install \
tavily-python \
tweet-preprocessor
# Install buunstack package
COPY *.whl /opt/
RUN pip install /opt/*.whl && \
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
# Install PyTorch with pip (https://pytorch.org/get-started/locally/)
# langchain-openai must be updated to avoid pydantic v2 error
# https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540
@@ -164,5 +158,11 @@ RUN pip install --no-cache-dir --index-url 'https://download.pytorch.org/whl/cpu
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
# Install buunstack package
COPY *.whl /opt/
RUN pip install /opt/*.whl && \
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
WORKDIR "${HOME}"
EXPOSE 4040

View File

@@ -1,4 +1,21 @@
hub:
extraEnv:
JUPYTERHUB_CRYPT_KEY: {{ .Env.JUPYTERHUB_CRYPT_KEY | quote }}
# Install packages at container startup
extraFiles:
startup.sh:
mountPath: /usr/local/bin/startup.sh
mode: 0755
stringData: |
#!/bin/bash
pip install --no-cache-dir hvac==2.3.0
exec jupyterhub --config /usr/local/etc/jupyterhub/jupyterhub_config.py --upgrade-db
# Override the default command to run our startup script first
command:
- /usr/local/bin/startup.sh
config:
JupyterHub:
authenticator_class: generic-oauth
@@ -24,48 +41,97 @@ hub:
- profile
- email
{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }}
extraConfig:
01-vault-integration: |
import os
pre-spawn-hook: |
# Set environment variables for spawned containers
import hvac
async def pre_spawn_hook(spawner):
"""Pass OIDC tokens and Vault config to notebook environment"""
auth_state = await spawner.user.get_auth_state()
if auth_state:
if 'access_token' in auth_state:
spawner.environment['JUPYTERHUB_OIDC_ACCESS_TOKEN'] = auth_state['access_token']
if 'refresh_token' in auth_state:
spawner.environment['JUPYTERHUB_OIDC_REFRESH_TOKEN'] = auth_state['refresh_token']
if 'id_token' in auth_state:
spawner.environment['JUPYTERHUB_OIDC_ID_TOKEN'] = auth_state['id_token']
if 'expires_at' in auth_state:
spawner.environment['JUPYTERHUB_OIDC_TOKEN_EXPIRES_AT'] = str(auth_state['expires_at'])
"""Set essential environment variables for spawned containers"""
# PostgreSQL configuration
spawner.environment["POSTGRES_HOST"] = "postgres-cluster-rw.postgres"
spawner.environment["POSTGRES_PORT"] = "5432"
# Add Keycloak configuration for token refresh
spawner.environment['KEYCLOAK_HOST'] = '{{ .Env.KEYCLOAK_HOST }}'
spawner.environment['KEYCLOAK_REALM'] = '{{ .Env.KEYCLOAK_REALM }}'
spawner.environment['KEYCLOAK_CLIENT_ID'] = 'jupyterhub'
# JupyterHub API configuration
spawner.environment["JUPYTERHUB_API_URL"] = "http://hub:8081/hub/api"
# Logging configuration
spawner.environment["BUUNSTACK_LOG_LEVEL"] = "{{ .Env.JUPYTER_BUUNSTACK_LOG_LEVEL }}"
# Create user-specific Vault token directly
try:
username = spawner.user.name
# Step 1: Initialize admin Vault client
vault_client = hvac.Client(url="{{ .Env.VAULT_ADDR }}", verify=False)
vault_client.token = "{{ .Env.JUPYTERHUB_VAULT_TOKEN }}"
if not vault_client.is_authenticated():
raise Exception("Admin token is not authenticated")
# Step 2: Create user-specific policy
user_policy_name = "jupyter-user-{}".format(username)
user_path = "secret/data/jupyter/users/{}/*".format(username)
user_metadata_path = "secret/metadata/jupyter/users/{}/*".format(username)
user_base_path = "secret/metadata/jupyter/users/{}".format(username)
user_policy = (
"# User-specific policy for {}\n".format(username) +
"path \"{}\" ".format(user_path) + "{\n" +
" capabilities = [\"create\", \"update\", \"read\", \"delete\", \"list\"]\n" +
"}\n\n" +
"path \"{}\" ".format(user_metadata_path) + "{\n" +
" capabilities = [\"list\", \"read\", \"delete\", \"update\"]\n" +
"}\n\n" +
"path \"{}\" ".format(user_base_path) + "{\n" +
" capabilities = [\"list\"]\n" +
"}\n\n" +
"# Read access to shared resources\n" +
"path \"secret/data/jupyter/shared/*\" {\n" +
" capabilities = [\"read\", \"list\"]\n" +
"}\n\n" +
"path \"secret/metadata/jupyter/shared\" {\n" +
" capabilities = [\"list\"]\n" +
"}\n\n" +
"# Token management capabilities\n" +
"path \"auth/token/lookup-self\" {\n" +
" capabilities = [\"read\"]\n" +
"}\n\n" +
"path \"auth/token/renew-self\" {\n" +
" capabilities = [\"update\"]\n" +
"}"
)
# Write user-specific policy
try:
vault_client.sys.create_or_update_policy(user_policy_name, user_policy)
spawner.log.info("✅ Created policy: {}".format(user_policy_name))
except Exception as policy_e:
spawner.log.warning("Policy creation failed (may already exist): {}".format(policy_e))
# Step 3: Create user-specific token
token_response = vault_client.auth.token.create(
policies=[user_policy_name],
ttl="1h",
renewable=True,
display_name="notebook-{}".format(username)
)
user_vault_token = token_response["auth"]["client_token"]
lease_duration = token_response["auth"].get("lease_duration", 3600)
# Set user-specific Vault token as environment variable
spawner.environment["NOTEBOOK_VAULT_TOKEN"] = user_vault_token
spawner.log.info("✅ User-specific Vault token created for {} (expires in {}s, renewable)".format(username, lease_duration))
except Exception as e:
spawner.log.error("Failed to create user-specific Vault token for {}: {}".format(spawner.user.name, e))
import traceback
spawner.log.error("Full traceback: {}".format(traceback.format_exc()))
c.Spawner.pre_spawn_hook = pre_spawn_hook
{{- end }}
02-postgres-integration: |
from functools import wraps
# Store the original pre_spawn_hook if it exists
original_hook = c.Spawner.pre_spawn_hook if hasattr(c.Spawner, 'pre_spawn_hook') else None
async def postgres_pre_spawn_hook(spawner):
"""Add PostgreSQL connection information to notebook environment"""
# Call the original hook first if it exists
if original_hook:
await original_hook(spawner)
# Add PostgreSQL configuration
spawner.environment['POSTGRES_HOST'] = 'postgres-cluster-rw.postgres'
spawner.environment['POSTGRES_PORT'] = '5432'
c.Spawner.pre_spawn_hook = postgres_pre_spawn_hook
podSecurityContext:
fsGroup: {{ .Env.JUPYTER_FSGID }}
@@ -85,23 +151,8 @@ singleuser:
{{ end -}}
capacity: 10Gi
{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }}
extraEnv:
VAULT_ADDR: "{{ .Env.VAULT_ADDR }}"
KEYCLOAK_HOST: "{{ .Env.KEYCLOAK_HOST }}"
KEYCLOAK_REALM: "{{ .Env.KEYCLOAK_REALM }}"
# lifecycleHooks:
# postStart:
# exec:
# command:
# - /bin/bash
# - -c
# - |
# # Install hvac for Vault integration
# mamba install hvac requests
# echo "Vault integration ready"
{{- end }}
networkPolicy:
egress:
- to:
@@ -129,7 +180,6 @@ singleuser:
ports:
- port: 4000
protocol: TCP
{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }}
- to:
- namespaceSelector:
matchLabels:
@@ -137,9 +187,6 @@ singleuser:
ports:
- port: 8200
protocol: TCP
- port: 8201
protocol: TCP
{{- end }}
- to:
- ipBlock:
cidr: 0.0.0.0/0

View File

@@ -5,7 +5,7 @@ export JUPYTERHUB_CHART_VERSION := env("JUPYTERHUB_CHART_VERSION", "4.2.0")
export JUPYTERHUB_OIDC_CLIENT_ID := env("JUPYTERHUB_OIDC_CLIENT_ID", "jupyterhub")
export JUPYTERHUB_NFS_PV_ENABLED := env("JUPYTERHUB_NFS_PV_ENABLED", "")
export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "")
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-8")
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-24")
export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook")
export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook")
export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false")
@@ -20,6 +20,7 @@ export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500")
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn")
export VAULT_ADDR := env("VAULT_ADDR", "http://vault.vault.svc:8200")
export JUPYTER_BUUNSTACK_LOG_LEVEL := env("JUPYTER_BUUNSTACK_LOG_LEVEL", "info")
[private]
default:
@@ -54,6 +55,15 @@ install:
--placeholder="e.g., jupyter.example.com"
)
done
# Generate JUPYTERHUB_CRYPT_KEY if not exists
if [ -z "${JUPYTERHUB_CRYPT_KEY:-}" ]; then
echo "Generating JUPYTERHUB_CRYPT_KEY..."
export JUPYTERHUB_CRYPT_KEY=$(just utils::random-password)
echo "JUPYTERHUB_CRYPT_KEY=${JUPYTERHUB_CRYPT_KEY}" >> ../../.env.local
echo "✓ JUPYTERHUB_CRYPT_KEY generated and saved to .env.local"
fi
just create-namespace
# just k8s::copy-regcred ${JUPYTERHUB_NAMESPACE}
just keycloak::create-client ${KEYCLOAK_REALM} ${JUPYTERHUB_OIDC_CLIENT_ID} \
@@ -96,8 +106,17 @@ install:
fi
kubectl apply -n ${JUPYTERHUB_NAMESPACE} -f nfs-pvc.yaml
fi
# Create or get JupyterHub Vault token before gomplate
if ! just vault::exist jupyterhub/vault-token &>/dev/null; then
echo "Creating JupyterHub Vault token..."
just create-jupyterhub-vault-token
fi
export JUPYTERHUB_VAULT_TOKEN=$(just vault::get jupyterhub/vault-token token)
# https://z2jh.jupyter.org/en/stable/
gomplate -f jupyterhub-values.gomplate.yaml -o jupyterhub-values.yaml
helm upgrade --cleanup-on-fail --install jupyterhub jupyterhub/jupyterhub \
--version ${JUPYTERHUB_CHART_VERSION} -n ${JUPYTERHUB_NAMESPACE} \
--timeout=20m -f jupyterhub-values.yaml
@@ -138,62 +157,68 @@ delete-pv:
# Build Jupyter notebook kernel images
build-kernel-images:
#!/bin/bash
set -euo pipefail
# Build python package wheel
cd ../python-package
rm -rf dist/ build/ *.egg-info/
SETUPTOOLS_SCM_PRETEND_VERSION_FOR_BUUNSTACK=0.1.0 python -m build --wheel
cd ../jupyterhub
# Copy built wheel to image directories
cp ../python-package/dist/*.whl ./images/datastack-notebook/
cp ../python-package/dist/*.whl ./images/datastack-cuda-notebook/
set -euxo pipefail
(
cd ../python-package
rm -rf dist/ build/ *.egg-info/
SETUPTOOLS_SCM_PRETEND_VERSION_FOR_BUUNSTACK=0.1.0 python -m build --wheel
)
(
cd ./images/datastack-notebook
cp ../../../python-package/dist/*.whl ./
docker build -t \
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
--build-arg spark_version="3.5.4" \
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \
.
)
(
cd ./images/datastack-cuda-notebook
docker build -t \
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
--build-arg spark_version="3.5.4" \
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \
.
)
# Clean up copied wheel files
rm -f ./images/datastack-notebook/*.whl
rm -f ./images/datastack-cuda-notebook/*.whl
if [ "${JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED}" = "true" ]; then
(
cd ./images/datastack-cuda-notebook
cp ../../../python-package/dist/*.whl ./
docker build -t \
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
--build-arg spark_version="3.5.4" \
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \
.
)
rm -f ./images/datastack-cuda-notebook/*.whl
fi
# Push Jupyter notebook kernel images
push-kernel-images: build-kernel-images
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
# Configure Vault for JupyterHub integration
setup-vault-integration:
#!/bin/bash
set -euo pipefail
echo "Creating JupyterHub Vault policy..."
just vault::write-policy jupyter-user $(pwd)/vault-policy.hcl
echo "✓ JupyterHub policy created"
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
if [ "${JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED}" = "true" ]; then
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
fi
# Setup JWT auth for JupyterHub tokens (no re-authentication needed)
# Setup Vault integration for JupyterHub (user-specific tokens)
setup-vault-jwt-auth:
#!/bin/bash
set -euo pipefail
echo "Setting up Vault integration for JupyterHub..."
just setup-vault-integration
just vault::setup-jwt-auth "jupyterhub" "jupyter-token" "jupyter-user"
echo "✓ Vault integration configured"
echo "✓ Vault integration configured (user-specific tokens)"
echo ""
echo "Users can now access Vault from notebooks using:"
echo " import os, hvac"
echo " client = hvac.Client(url=os.getenv('VAULT_ADDR'), verify=False)"
echo " client.auth.jwt.jwt_login("
echo " role='jupyter-token',"
echo " jwt=os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN'),"
echo " path='jwt'"
echo " )"
echo " from buunstack import SecretStore"
echo " secrets = SecretStore()"
echo " # Each user gets their own isolated Vault token and policy"
# Create JupyterHub Vault token (uses admin policy for JWT operations)
create-jupyterhub-vault-token ttl="720h":
#!/bin/bash
set -euo pipefail
echo "Creating JupyterHub Vault token with admin policy..."
# JupyterHub needs admin privileges to read Keycloak credentials from Vault
# Create token and store in Vault
just vault::create-token-and-store admin jupyterhub/vault-token {{ ttl }}
echo "✓ JupyterHub Vault token created and stored"
echo ""
echo "To use in JupyterHub deployment:"
echo " JUPYTERHUB_VAULT_TOKEN=\$(just vault::get jupyterhub/vault-token token)"

View File

@@ -1,26 +0,0 @@
# JupyterHub user policy for Vault access
# Read access to shared jupyter resources
path "secret/data/jupyter/shared/*" {
capabilities = ["read", "list"]
}
# Allow users to list shared directory
path "secret/metadata/jupyter/shared" {
capabilities = ["list"]
}
# Full access to user-specific paths
path "secret/data/jupyter/users/{{identity.entity.aliases.auth_jwt_*.metadata.username}}/*" {
capabilities = ["create", "update", "read", "delete", "list"]
}
# Allow users to list their own directory
path "secret/metadata/jupyter/users/{{identity.entity.aliases.auth_jwt_*.metadata.username}}/*" {
capabilities = ["list", "read", "delete"]
}
# Allow users to list only their own user directory for navigation
path "secret/metadata/jupyter/users/{{identity.entity.aliases.auth_jwt_*.metadata.username}}" {
capabilities = ["list"]
}