feat(jupyterhub): vault token w/o keycloak auth
This commit is contained in:
@@ -146,12 +146,6 @@ RUN pip install \
|
||||
tavily-python \
|
||||
tweet-preprocessor
|
||||
|
||||
# Install buunstack package
|
||||
COPY *.whl /opt/
|
||||
RUN pip install /opt/*.whl && \
|
||||
fix-permissions "${CONDA_DIR}" && \
|
||||
fix-permissions "/home/${NB_USER}"
|
||||
|
||||
# Install PyTorch with pip (https://pytorch.org/get-started/locally/)
|
||||
# langchain-openai must be updated to avoid pydantic v2 error
|
||||
# https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540
|
||||
@@ -164,6 +158,11 @@ RUN pip install --no-cache-dir --extra-index-url=https://pypi.nvidia.com --index
|
||||
fix-permissions "${CONDA_DIR}" && \
|
||||
fix-permissions "/home/${NB_USER}"
|
||||
|
||||
# Install buunstack package
|
||||
COPY *.whl /opt/
|
||||
RUN pip install /opt/*.whl && \
|
||||
fix-permissions "${CONDA_DIR}" && \
|
||||
fix-permissions "/home/${NB_USER}"
|
||||
|
||||
WORKDIR "${HOME}"
|
||||
EXPOSE 4040
|
||||
|
||||
@@ -146,12 +146,6 @@ RUN pip install \
|
||||
tavily-python \
|
||||
tweet-preprocessor
|
||||
|
||||
# Install buunstack package
|
||||
COPY *.whl /opt/
|
||||
RUN pip install /opt/*.whl && \
|
||||
fix-permissions "${CONDA_DIR}" && \
|
||||
fix-permissions "/home/${NB_USER}"
|
||||
|
||||
# Install PyTorch with pip (https://pytorch.org/get-started/locally/)
|
||||
# langchain-openai must be updated to avoid pydantic v2 error
|
||||
# https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540
|
||||
@@ -164,5 +158,11 @@ RUN pip install --no-cache-dir --index-url 'https://download.pytorch.org/whl/cpu
|
||||
fix-permissions "${CONDA_DIR}" && \
|
||||
fix-permissions "/home/${NB_USER}"
|
||||
|
||||
# Install buunstack package
|
||||
COPY *.whl /opt/
|
||||
RUN pip install /opt/*.whl && \
|
||||
fix-permissions "${CONDA_DIR}" && \
|
||||
fix-permissions "/home/${NB_USER}"
|
||||
|
||||
WORKDIR "${HOME}"
|
||||
EXPOSE 4040
|
||||
|
||||
@@ -1,4 +1,21 @@
|
||||
hub:
|
||||
extraEnv:
|
||||
JUPYTERHUB_CRYPT_KEY: {{ .Env.JUPYTERHUB_CRYPT_KEY | quote }}
|
||||
|
||||
# Install packages at container startup
|
||||
extraFiles:
|
||||
startup.sh:
|
||||
mountPath: /usr/local/bin/startup.sh
|
||||
mode: 0755
|
||||
stringData: |
|
||||
#!/bin/bash
|
||||
pip install --no-cache-dir hvac==2.3.0
|
||||
exec jupyterhub --config /usr/local/etc/jupyterhub/jupyterhub_config.py --upgrade-db
|
||||
|
||||
# Override the default command to run our startup script first
|
||||
command:
|
||||
- /usr/local/bin/startup.sh
|
||||
|
||||
config:
|
||||
JupyterHub:
|
||||
authenticator_class: generic-oauth
|
||||
@@ -24,48 +41,97 @@ hub:
|
||||
- profile
|
||||
- email
|
||||
|
||||
{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }}
|
||||
extraConfig:
|
||||
01-vault-integration: |
|
||||
import os
|
||||
pre-spawn-hook: |
|
||||
# Set environment variables for spawned containers
|
||||
import hvac
|
||||
|
||||
async def pre_spawn_hook(spawner):
|
||||
"""Pass OIDC tokens and Vault config to notebook environment"""
|
||||
auth_state = await spawner.user.get_auth_state()
|
||||
if auth_state:
|
||||
if 'access_token' in auth_state:
|
||||
spawner.environment['JUPYTERHUB_OIDC_ACCESS_TOKEN'] = auth_state['access_token']
|
||||
if 'refresh_token' in auth_state:
|
||||
spawner.environment['JUPYTERHUB_OIDC_REFRESH_TOKEN'] = auth_state['refresh_token']
|
||||
if 'id_token' in auth_state:
|
||||
spawner.environment['JUPYTERHUB_OIDC_ID_TOKEN'] = auth_state['id_token']
|
||||
if 'expires_at' in auth_state:
|
||||
spawner.environment['JUPYTERHUB_OIDC_TOKEN_EXPIRES_AT'] = str(auth_state['expires_at'])
|
||||
"""Set essential environment variables for spawned containers"""
|
||||
# PostgreSQL configuration
|
||||
spawner.environment["POSTGRES_HOST"] = "postgres-cluster-rw.postgres"
|
||||
spawner.environment["POSTGRES_PORT"] = "5432"
|
||||
|
||||
# Add Keycloak configuration for token refresh
|
||||
spawner.environment['KEYCLOAK_HOST'] = '{{ .Env.KEYCLOAK_HOST }}'
|
||||
spawner.environment['KEYCLOAK_REALM'] = '{{ .Env.KEYCLOAK_REALM }}'
|
||||
spawner.environment['KEYCLOAK_CLIENT_ID'] = 'jupyterhub'
|
||||
# JupyterHub API configuration
|
||||
spawner.environment["JUPYTERHUB_API_URL"] = "http://hub:8081/hub/api"
|
||||
|
||||
# Logging configuration
|
||||
spawner.environment["BUUNSTACK_LOG_LEVEL"] = "{{ .Env.JUPYTER_BUUNSTACK_LOG_LEVEL }}"
|
||||
|
||||
# Create user-specific Vault token directly
|
||||
try:
|
||||
username = spawner.user.name
|
||||
|
||||
# Step 1: Initialize admin Vault client
|
||||
vault_client = hvac.Client(url="{{ .Env.VAULT_ADDR }}", verify=False)
|
||||
vault_client.token = "{{ .Env.JUPYTERHUB_VAULT_TOKEN }}"
|
||||
|
||||
if not vault_client.is_authenticated():
|
||||
raise Exception("Admin token is not authenticated")
|
||||
|
||||
# Step 2: Create user-specific policy
|
||||
user_policy_name = "jupyter-user-{}".format(username)
|
||||
user_path = "secret/data/jupyter/users/{}/*".format(username)
|
||||
user_metadata_path = "secret/metadata/jupyter/users/{}/*".format(username)
|
||||
user_base_path = "secret/metadata/jupyter/users/{}".format(username)
|
||||
|
||||
user_policy = (
|
||||
"# User-specific policy for {}\n".format(username) +
|
||||
"path \"{}\" ".format(user_path) + "{\n" +
|
||||
" capabilities = [\"create\", \"update\", \"read\", \"delete\", \"list\"]\n" +
|
||||
"}\n\n" +
|
||||
"path \"{}\" ".format(user_metadata_path) + "{\n" +
|
||||
" capabilities = [\"list\", \"read\", \"delete\", \"update\"]\n" +
|
||||
"}\n\n" +
|
||||
"path \"{}\" ".format(user_base_path) + "{\n" +
|
||||
" capabilities = [\"list\"]\n" +
|
||||
"}\n\n" +
|
||||
"# Read access to shared resources\n" +
|
||||
"path \"secret/data/jupyter/shared/*\" {\n" +
|
||||
" capabilities = [\"read\", \"list\"]\n" +
|
||||
"}\n\n" +
|
||||
"path \"secret/metadata/jupyter/shared\" {\n" +
|
||||
" capabilities = [\"list\"]\n" +
|
||||
"}\n\n" +
|
||||
"# Token management capabilities\n" +
|
||||
"path \"auth/token/lookup-self\" {\n" +
|
||||
" capabilities = [\"read\"]\n" +
|
||||
"}\n\n" +
|
||||
"path \"auth/token/renew-self\" {\n" +
|
||||
" capabilities = [\"update\"]\n" +
|
||||
"}"
|
||||
)
|
||||
|
||||
# Write user-specific policy
|
||||
try:
|
||||
vault_client.sys.create_or_update_policy(user_policy_name, user_policy)
|
||||
spawner.log.info("✅ Created policy: {}".format(user_policy_name))
|
||||
except Exception as policy_e:
|
||||
spawner.log.warning("Policy creation failed (may already exist): {}".format(policy_e))
|
||||
|
||||
# Step 3: Create user-specific token
|
||||
token_response = vault_client.auth.token.create(
|
||||
policies=[user_policy_name],
|
||||
ttl="1h",
|
||||
renewable=True,
|
||||
display_name="notebook-{}".format(username)
|
||||
)
|
||||
|
||||
user_vault_token = token_response["auth"]["client_token"]
|
||||
lease_duration = token_response["auth"].get("lease_duration", 3600)
|
||||
|
||||
# Set user-specific Vault token as environment variable
|
||||
spawner.environment["NOTEBOOK_VAULT_TOKEN"] = user_vault_token
|
||||
|
||||
spawner.log.info("✅ User-specific Vault token created for {} (expires in {}s, renewable)".format(username, lease_duration))
|
||||
|
||||
except Exception as e:
|
||||
spawner.log.error("Failed to create user-specific Vault token for {}: {}".format(spawner.user.name, e))
|
||||
import traceback
|
||||
spawner.log.error("Full traceback: {}".format(traceback.format_exc()))
|
||||
|
||||
c.Spawner.pre_spawn_hook = pre_spawn_hook
|
||||
{{- end }}
|
||||
02-postgres-integration: |
|
||||
from functools import wraps
|
||||
|
||||
# Store the original pre_spawn_hook if it exists
|
||||
original_hook = c.Spawner.pre_spawn_hook if hasattr(c.Spawner, 'pre_spawn_hook') else None
|
||||
|
||||
async def postgres_pre_spawn_hook(spawner):
|
||||
"""Add PostgreSQL connection information to notebook environment"""
|
||||
# Call the original hook first if it exists
|
||||
if original_hook:
|
||||
await original_hook(spawner)
|
||||
|
||||
# Add PostgreSQL configuration
|
||||
spawner.environment['POSTGRES_HOST'] = 'postgres-cluster-rw.postgres'
|
||||
spawner.environment['POSTGRES_PORT'] = '5432'
|
||||
|
||||
c.Spawner.pre_spawn_hook = postgres_pre_spawn_hook
|
||||
|
||||
podSecurityContext:
|
||||
fsGroup: {{ .Env.JUPYTER_FSGID }}
|
||||
@@ -85,23 +151,8 @@ singleuser:
|
||||
{{ end -}}
|
||||
capacity: 10Gi
|
||||
|
||||
{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }}
|
||||
extraEnv:
|
||||
VAULT_ADDR: "{{ .Env.VAULT_ADDR }}"
|
||||
KEYCLOAK_HOST: "{{ .Env.KEYCLOAK_HOST }}"
|
||||
KEYCLOAK_REALM: "{{ .Env.KEYCLOAK_REALM }}"
|
||||
|
||||
# lifecycleHooks:
|
||||
# postStart:
|
||||
# exec:
|
||||
# command:
|
||||
# - /bin/bash
|
||||
# - -c
|
||||
# - |
|
||||
# # Install hvac for Vault integration
|
||||
# mamba install hvac requests
|
||||
# echo "Vault integration ready"
|
||||
{{- end }}
|
||||
networkPolicy:
|
||||
egress:
|
||||
- to:
|
||||
@@ -129,7 +180,6 @@ singleuser:
|
||||
ports:
|
||||
- port: 4000
|
||||
protocol: TCP
|
||||
{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }}
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
@@ -137,9 +187,6 @@ singleuser:
|
||||
ports:
|
||||
- port: 8200
|
||||
protocol: TCP
|
||||
- port: 8201
|
||||
protocol: TCP
|
||||
{{- end }}
|
||||
- to:
|
||||
- ipBlock:
|
||||
cidr: 0.0.0.0/0
|
||||
|
||||
@@ -5,7 +5,7 @@ export JUPYTERHUB_CHART_VERSION := env("JUPYTERHUB_CHART_VERSION", "4.2.0")
|
||||
export JUPYTERHUB_OIDC_CLIENT_ID := env("JUPYTERHUB_OIDC_CLIENT_ID", "jupyterhub")
|
||||
export JUPYTERHUB_NFS_PV_ENABLED := env("JUPYTERHUB_NFS_PV_ENABLED", "")
|
||||
export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "")
|
||||
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-8")
|
||||
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-24")
|
||||
export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook")
|
||||
export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook")
|
||||
export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false")
|
||||
@@ -20,6 +20,7 @@ export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500")
|
||||
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
|
||||
export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn")
|
||||
export VAULT_ADDR := env("VAULT_ADDR", "http://vault.vault.svc:8200")
|
||||
export JUPYTER_BUUNSTACK_LOG_LEVEL := env("JUPYTER_BUUNSTACK_LOG_LEVEL", "info")
|
||||
|
||||
[private]
|
||||
default:
|
||||
@@ -54,6 +55,15 @@ install:
|
||||
--placeholder="e.g., jupyter.example.com"
|
||||
)
|
||||
done
|
||||
|
||||
# Generate JUPYTERHUB_CRYPT_KEY if not exists
|
||||
if [ -z "${JUPYTERHUB_CRYPT_KEY:-}" ]; then
|
||||
echo "Generating JUPYTERHUB_CRYPT_KEY..."
|
||||
export JUPYTERHUB_CRYPT_KEY=$(just utils::random-password)
|
||||
echo "JUPYTERHUB_CRYPT_KEY=${JUPYTERHUB_CRYPT_KEY}" >> ../../.env.local
|
||||
echo "✓ JUPYTERHUB_CRYPT_KEY generated and saved to .env.local"
|
||||
fi
|
||||
|
||||
just create-namespace
|
||||
# just k8s::copy-regcred ${JUPYTERHUB_NAMESPACE}
|
||||
just keycloak::create-client ${KEYCLOAK_REALM} ${JUPYTERHUB_OIDC_CLIENT_ID} \
|
||||
@@ -96,8 +106,17 @@ install:
|
||||
fi
|
||||
kubectl apply -n ${JUPYTERHUB_NAMESPACE} -f nfs-pvc.yaml
|
||||
fi
|
||||
|
||||
# Create or get JupyterHub Vault token before gomplate
|
||||
if ! just vault::exist jupyterhub/vault-token &>/dev/null; then
|
||||
echo "Creating JupyterHub Vault token..."
|
||||
just create-jupyterhub-vault-token
|
||||
fi
|
||||
export JUPYTERHUB_VAULT_TOKEN=$(just vault::get jupyterhub/vault-token token)
|
||||
|
||||
# https://z2jh.jupyter.org/en/stable/
|
||||
gomplate -f jupyterhub-values.gomplate.yaml -o jupyterhub-values.yaml
|
||||
|
||||
helm upgrade --cleanup-on-fail --install jupyterhub jupyterhub/jupyterhub \
|
||||
--version ${JUPYTERHUB_CHART_VERSION} -n ${JUPYTERHUB_NAMESPACE} \
|
||||
--timeout=20m -f jupyterhub-values.yaml
|
||||
@@ -138,62 +157,68 @@ delete-pv:
|
||||
# Build Jupyter notebook kernel images
|
||||
build-kernel-images:
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
# Build python package wheel
|
||||
cd ../python-package
|
||||
rm -rf dist/ build/ *.egg-info/
|
||||
SETUPTOOLS_SCM_PRETEND_VERSION_FOR_BUUNSTACK=0.1.0 python -m build --wheel
|
||||
cd ../jupyterhub
|
||||
# Copy built wheel to image directories
|
||||
cp ../python-package/dist/*.whl ./images/datastack-notebook/
|
||||
cp ../python-package/dist/*.whl ./images/datastack-cuda-notebook/
|
||||
set -euxo pipefail
|
||||
(
|
||||
cd ../python-package
|
||||
rm -rf dist/ build/ *.egg-info/
|
||||
SETUPTOOLS_SCM_PRETEND_VERSION_FOR_BUUNSTACK=0.1.0 python -m build --wheel
|
||||
)
|
||||
(
|
||||
cd ./images/datastack-notebook
|
||||
cp ../../../python-package/dist/*.whl ./
|
||||
docker build -t \
|
||||
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
|
||||
--build-arg spark_version="3.5.4" \
|
||||
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \
|
||||
.
|
||||
)
|
||||
(
|
||||
cd ./images/datastack-cuda-notebook
|
||||
docker build -t \
|
||||
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
|
||||
--build-arg spark_version="3.5.4" \
|
||||
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \
|
||||
.
|
||||
)
|
||||
# Clean up copied wheel files
|
||||
rm -f ./images/datastack-notebook/*.whl
|
||||
rm -f ./images/datastack-cuda-notebook/*.whl
|
||||
if [ "${JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED}" = "true" ]; then
|
||||
(
|
||||
cd ./images/datastack-cuda-notebook
|
||||
cp ../../../python-package/dist/*.whl ./
|
||||
docker build -t \
|
||||
${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} \
|
||||
--build-arg spark_version="3.5.4" \
|
||||
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \
|
||||
.
|
||||
)
|
||||
rm -f ./images/datastack-cuda-notebook/*.whl
|
||||
fi
|
||||
|
||||
# Push Jupyter notebook kernel images
|
||||
push-kernel-images: build-kernel-images
|
||||
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
|
||||
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
|
||||
|
||||
# Configure Vault for JupyterHub integration
|
||||
setup-vault-integration:
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
echo "Creating JupyterHub Vault policy..."
|
||||
just vault::write-policy jupyter-user $(pwd)/vault-policy.hcl
|
||||
echo "✓ JupyterHub policy created"
|
||||
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
|
||||
if [ "${JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED}" = "true" ]; then
|
||||
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
|
||||
fi
|
||||
|
||||
# Setup JWT auth for JupyterHub tokens (no re-authentication needed)
|
||||
# Setup Vault integration for JupyterHub (user-specific tokens)
|
||||
setup-vault-jwt-auth:
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
echo "Setting up Vault integration for JupyterHub..."
|
||||
just setup-vault-integration
|
||||
just vault::setup-jwt-auth "jupyterhub" "jupyter-token" "jupyter-user"
|
||||
echo "✓ Vault integration configured"
|
||||
|
||||
echo "✓ Vault integration configured (user-specific tokens)"
|
||||
echo ""
|
||||
echo "Users can now access Vault from notebooks using:"
|
||||
echo " import os, hvac"
|
||||
echo " client = hvac.Client(url=os.getenv('VAULT_ADDR'), verify=False)"
|
||||
echo " client.auth.jwt.jwt_login("
|
||||
echo " role='jupyter-token',"
|
||||
echo " jwt=os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN'),"
|
||||
echo " path='jwt'"
|
||||
echo " )"
|
||||
echo " from buunstack import SecretStore"
|
||||
echo " secrets = SecretStore()"
|
||||
echo " # Each user gets their own isolated Vault token and policy"
|
||||
|
||||
# Create JupyterHub Vault token (uses admin policy for JWT operations)
|
||||
create-jupyterhub-vault-token ttl="720h":
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
echo "Creating JupyterHub Vault token with admin policy..."
|
||||
|
||||
# JupyterHub needs admin privileges to read Keycloak credentials from Vault
|
||||
# Create token and store in Vault
|
||||
just vault::create-token-and-store admin jupyterhub/vault-token {{ ttl }}
|
||||
|
||||
echo "✓ JupyterHub Vault token created and stored"
|
||||
echo ""
|
||||
echo "To use in JupyterHub deployment:"
|
||||
echo " JUPYTERHUB_VAULT_TOKEN=\$(just vault::get jupyterhub/vault-token token)"
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
# JupyterHub user policy for Vault access
|
||||
|
||||
# Read access to shared jupyter resources
|
||||
path "secret/data/jupyter/shared/*" {
|
||||
capabilities = ["read", "list"]
|
||||
}
|
||||
|
||||
# Allow users to list shared directory
|
||||
path "secret/metadata/jupyter/shared" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
|
||||
# Full access to user-specific paths
|
||||
path "secret/data/jupyter/users/{{identity.entity.aliases.auth_jwt_*.metadata.username}}/*" {
|
||||
capabilities = ["create", "update", "read", "delete", "list"]
|
||||
}
|
||||
|
||||
# Allow users to list their own directory
|
||||
path "secret/metadata/jupyter/users/{{identity.entity.aliases.auth_jwt_*.metadata.username}}/*" {
|
||||
capabilities = ["list", "read", "delete"]
|
||||
}
|
||||
|
||||
# Allow users to list only their own user directory for navigation
|
||||
path "secret/metadata/jupyter/users/{{identity.entity.aliases.auth_jwt_*.metadata.username}}" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
Reference in New Issue
Block a user