feat(jupyterhub): Vault integration

This commit is contained in:
Masaki Yatsu
2025-08-31 14:47:29 +09:00
parent 9a1d4fd16f
commit f529223c56
6 changed files with 376 additions and 5 deletions

View File

@@ -130,6 +130,7 @@ RUN pip install \
agno \ agno \
fastembed \ fastembed \
feature-engine \ feature-engine \
hvac \
jupyter-ai \ jupyter-ai \
jupyter-ai-magics[all] \ jupyter-ai-magics[all] \
kreuzberg \ kreuzberg \

View File

@@ -130,6 +130,7 @@ RUN pip install \
agno \ agno \
fastembed \ fastembed \
feature-engine \ feature-engine \
hvac \
jupyter-ai \ jupyter-ai \
jupyter-ai-magics[all] \ jupyter-ai-magics[all] \
kreuzberg \ kreuzberg \

View File

@@ -24,9 +24,26 @@ hub:
- profile - profile
- email - email
# db: {{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }}
# pvc: extraConfig:
# storageClassName: longhorn 01-vault-integration: |
import os
async def pre_spawn_hook(spawner):
"""Pass OIDC tokens and Vault config to notebook environment"""
auth_state = await spawner.user.get_auth_state()
if auth_state:
if 'access_token' in auth_state:
spawner.environment['JUPYTERHUB_OIDC_ACCESS_TOKEN'] = auth_state['access_token']
if 'refresh_token' in auth_state:
spawner.environment['JUPYTERHUB_OIDC_REFRESH_TOKEN'] = auth_state['refresh_token']
if 'id_token' in auth_state:
spawner.environment['JUPYTERHUB_OIDC_ID_TOKEN'] = auth_state['id_token']
if 'expires_at' in auth_state:
spawner.environment['JUPYTERHUB_OIDC_TOKEN_EXPIRES_AT'] = str(auth_state['expires_at'])
c.Spawner.pre_spawn_hook = pre_spawn_hook
{{- end }}
podSecurityContext: podSecurityContext:
fsGroup: {{ .Env.JUPYTER_FSGID }} fsGroup: {{ .Env.JUPYTER_FSGID }}
@@ -45,6 +62,24 @@ singleuser:
- ReadWriteOnce - ReadWriteOnce
{{ end -}} {{ end -}}
capacity: 10Gi capacity: 10Gi
{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }}
extraEnv:
VAULT_ADDR: "{{ .Env.VAULT_ADDR }}"
KEYCLOAK_HOST: "{{ .Env.KEYCLOAK_HOST }}"
KEYCLOAK_REALM: "{{ .Env.KEYCLOAK_REALM }}"
lifecycleHooks:
postStart:
exec:
command:
- /bin/bash
- -c
- |
# Install hvac for Vault integration
pip install --quiet hvac requests
echo "Vault integration ready"
{{- end }}
networkPolicy: networkPolicy:
egress: egress:
- to: - to:
@@ -72,6 +107,17 @@ singleuser:
ports: ports:
- port: 4000 - port: 4000
protocol: TCP protocol: TCP
{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }}
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: vault
ports:
- port: 8200
protocol: TCP
- port: 8201
protocol: TCP
{{- end }}
- to: - to:
- ipBlock: - ipBlock:
cidr: 0.0.0.0/0 cidr: 0.0.0.0/0

View File

@@ -1,10 +1,12 @@
set fallback := true set fallback := true
tempdir := `mktemp -d`
export JUPYTERHUB_NAMESPACE := env("JUPYTERHUB_NAMESPACE", "jupyter") export JUPYTERHUB_NAMESPACE := env("JUPYTERHUB_NAMESPACE", "jupyter")
export JUPYTERHUB_CHART_VERSION := env("JUPYTERHUB_CHART_VERSION", "4.2.0") export JUPYTERHUB_CHART_VERSION := env("JUPYTERHUB_CHART_VERSION", "4.2.0")
export JUPYTERHUB_OIDC_CLIENT_ID := env("JUPYTERHUB_OIDC_CLIENT_ID", "jupyterhub") export JUPYTERHUB_OIDC_CLIENT_ID := env("JUPYTERHUB_OIDC_CLIENT_ID", "jupyterhub")
export JUPYTERHUB_ENABLE_NFS_PV := env("JUPYTERHUB_ENABLE_NFS_PV", "") export JUPYTERHUB_ENABLE_NFS_PV := env("JUPYTERHUB_ENABLE_NFS_PV", "")
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-1") export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "false")
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-2")
export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook") export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook")
export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook") export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook")
export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false") export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false")
@@ -18,6 +20,7 @@ export JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED := env("JUPYTER_PROFILE_BUUN_STAC
export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500") export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500")
export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack") export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack")
export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn") export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn")
export VAULT_ADDR := env("VAULT_ADDR", "http://vault.vault.svc:8200")
[private] [private]
default: default:
@@ -56,7 +59,6 @@ install:
# just k8s::copy-regcred ${JUPYTERHUB_NAMESPACE} # just k8s::copy-regcred ${JUPYTERHUB_NAMESPACE}
just keycloak::create-client ${KEYCLOAK_REALM} ${JUPYTERHUB_OIDC_CLIENT_ID} \ just keycloak::create-client ${KEYCLOAK_REALM} ${JUPYTERHUB_OIDC_CLIENT_ID} \
"https://${JUPYTERHUB_HOST}/hub/oauth_callback" "https://${JUPYTERHUB_HOST}/hub/oauth_callback"
# just vault::create-jupyter-role
just add-helm-repo just add-helm-repo
export JUPYTERHUB_OIDC_CLIENT_ID=${JUPYTERHUB_OIDC_CLIENT_ID} export JUPYTERHUB_OIDC_CLIENT_ID=${JUPYTERHUB_OIDC_CLIENT_ID}
export KEYCLOAK_REALM=${KEYCLOAK_REALM} export KEYCLOAK_REALM=${KEYCLOAK_REALM}
@@ -103,6 +105,11 @@ install:
# wait deployments manually because `helm upgrade --wait` does not work for JupyterHub # wait deployments manually because `helm upgrade --wait` does not work for JupyterHub
just k8s::wait-deployments-ready ${JUPYTERHUB_NAMESPACE} hub proxy just k8s::wait-deployments-ready ${JUPYTERHUB_NAMESPACE} hub proxy
# Setup Vault integration if enabled
if [ "${JUPYTERHUB_VAULT_INTEGRATION_ENABLED}" = "true" ]; then
just setup-vault-jwt-auth
fi
# Uninstall JupyterHub # Uninstall JupyterHub
uninstall: uninstall:
#!/bin/bash #!/bin/bash
@@ -148,3 +155,29 @@ build-kernel-images:
push-kernel-images: build-kernel-images push-kernel-images: build-kernel-images
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG}
# Configure Vault for JupyterHub integration
setup-vault-integration:
#!/bin/bash
set -euo pipefail
echo "Creating JupyterHub Vault policy..."
just vault::write-policy jupyter-user $(pwd)/vault-policy.hcl
echo "✓ JupyterHub policy created"
# Setup JWT auth for JupyterHub tokens (no re-authentication needed)
setup-vault-jwt-auth:
#!/bin/bash
set -euo pipefail
echo "Setting up Vault integration for JupyterHub..."
just setup-vault-integration
just vault::setup-jwt-auth "jupyterhub" "jupyter-token" "jupyter-user"
echo "✓ Vault integration configured"
echo ""
echo "Users can now access Vault from notebooks using:"
echo " import os, hvac"
echo " client = hvac.Client(url=os.getenv('VAULT_ADDR'), verify=False)"
echo " client.auth.jwt.jwt_login("
echo " role='jupyter-token',"
echo " jwt=os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN'),"
echo " path='jwt'"
echo " )"

View File

@@ -0,0 +1,21 @@
# JupyterHub user policy for Vault access
# Read access to shared jupyter resources
path "secret/data/jupyter/shared/*" {
capabilities = ["read", "list"]
}
# Full access to user-specific paths
path "secret/data/jupyter/users/{{identity.entity.aliases.auth_jwt_*.metadata.username}}/*" {
capabilities = ["create", "update", "read", "delete", "list"]
}
# Allow users to list their own directory
path "secret/metadata/jupyter/users/{{identity.entity.aliases.auth_jwt_*.metadata.username}}/*" {
capabilities = ["list", "read", "delete"]
}
# Allow users to list jupyter root to navigate
path "secret/metadata/jupyter/*" {
capabilities = ["list"]
}

View File

@@ -0,0 +1,269 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Vault User Storage Example\n",
"\n",
"Each JupyterHub user has their own private storage space in Vault at `/secret/jupyter/users/<username>/`"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup and Authentication"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import hvac\n",
"import json\n",
"from datetime import datetime\n",
"\n",
"# Get environment variables\n",
"username = os.getenv('JUPYTERHUB_USER', 'testuser')\n",
"vault_addr = os.getenv('VAULT_ADDR', 'https://vault.example.com')\n",
"oidc_token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n",
"\n",
"if not oidc_token:\n",
" raise ValueError(\"OIDC token not found. Make sure auth_state is enabled.\")\n",
"\n",
"# Initialize Vault client\n",
"client = hvac.Client(url=vault_addr, verify=False)\n",
"\n",
"# Authenticate with JupyterHub token\n",
"client.auth.jwt.jwt_login(\n",
" role='jupyter-token',\n",
" jwt=oidc_token,\n",
" path='jwt'\n",
")\n",
"\n",
"print(f\"Authenticated as: {username}\")\n",
"print(f\"User storage path: secret/jupyter/users/{username}/\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Store User Preferences"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Store user preferences\n",
"preferences = {\n",
" 'theme': 'dark',\n",
" 'language': 'en',\n",
" 'font_size': 14,\n",
" 'auto_save': True,\n",
" 'last_updated': datetime.now().isoformat()\n",
"}\n",
"\n",
"client.secrets.kv.v2.create_or_update_secret(\n",
" path=f'jupyter/users/{username}/preferences',\n",
" secret=preferences,\n",
" mount_point='secret'\n",
")\n",
"\n",
"print(f\"Saved preferences for {username}\")\n",
"print(json.dumps(preferences, indent=2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": "# Read back preferences\nresponse = client.secrets.kv.v2.read_secret_version(\n path=f'jupyter/users/{username}/preferences',\n mount_point='secret',\n raise_on_deleted_version=False\n)\n\nstored_prefs = response['data']['data']\nprint(\"Retrieved preferences:\")\nprint(json.dumps(stored_prefs, indent=2))"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Store API keys securely\n",
"api_keys = {\n",
" 'openai_key': 'sk-your-api-key-here',\n",
" 'github_token': 'ghp_your-token-here',\n",
" 'aws_access_key': 'AKIA-example',\n",
" 'aws_secret_key': 'your-secret-here'\n",
"}\n",
"\n",
"client.secrets.kv.v2.create_or_update_secret(\n",
" path=f'jupyter/users/{username}/api-keys',\n",
" secret=api_keys,\n",
" mount_point='secret'\n",
")\n",
"\n",
"print(f\"Stored {len(api_keys)} API keys for {username}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Store Database Connections"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Store database connection info\n",
"db_connections = {\n",
" 'postgres_prod': {\n",
" 'host': 'db.example.com',\n",
" 'port': 5432,\n",
" 'database': 'production',\n",
" 'username': 'app_user',\n",
" 'password': 'secure-password-123'\n",
" },\n",
" 'mongodb_analytics': {\n",
" 'connection_string': 'mongodb://user:pass@mongo.example.com:27017/analytics'\n",
" }\n",
"}\n",
"\n",
"client.secrets.kv.v2.create_or_update_secret(\n",
" path=f'jupyter/users/{username}/databases',\n",
" secret=db_connections,\n",
" mount_point='secret'\n",
")\n",
"\n",
"print(f\"Stored {len(db_connections)} database connections\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read Stored Secrets"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Read back preferences\nresponse = client.secrets.kv.v2.read_secret_version(\n path=f'jupyter/users/{username}/preferences',\n mount_point='secret',\n raise_on_deleted_version=False\n)\n\nstored_prefs = response['data']['data']\nprint(\"Retrieved preferences:\")\nprint(json.dumps(stored_prefs, indent=2))"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "class UserVaultStorage:\n \"\"\"Helper class for managing user's Vault storage\"\"\"\n \n def __init__(self):\n self.username = os.getenv('JUPYTERHUB_USER')\n self.client = hvac.Client(\n url=os.getenv('VAULT_ADDR'),\n verify=False\n )\n self._authenticate()\n self.base_path = f'jupyter/users/{self.username}'\n \n def _authenticate(self):\n token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n self.client.auth.jwt.jwt_login(\n role='jupyter-token',\n jwt=token,\n path='jwt'\n )\n \n def save(self, key, data):\n \"\"\"Save data to user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.create_or_update_secret(\n path=path,\n secret=data,\n mount_point='secret'\n )\n return path\n \n def load(self, key):\n \"\"\"Load data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n response = self.client.secrets.kv.v2.read_secret_version(\n path=path,\n mount_point='secret',\n raise_on_deleted_version=False\n )\n return response['data']['data'] if response else None\n \n def delete(self, key):\n \"\"\"Delete data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.delete_metadata_and_all_versions(\n path=path,\n mount_point='secret'\n )\n \n def list(self):\n \"\"\"List all keys in user's storage\"\"\"\n try:\n response = self.client.secrets.kv.v2.list_secrets(\n path=self.base_path,\n mount_point='secret'\n )\n return response['data']['keys']\n except:\n return []\n\n# Usage example\nstorage = UserVaultStorage()\n\n# Save model parameters\nstorage.save('ml-model-config', {\n 'model_type': 'random_forest',\n 'n_estimators': 100,\n 'max_depth': 10,\n 'training_date': datetime.now().isoformat()\n})\n\n# Load them back\nconfig = storage.load('ml-model-config')\nprint(\"Loaded config:\", config)\n\n# List all stored items\nprint(f\"\\nAll stored items: {storage.list()}\")"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# List all secrets in user's directory\n",
"try:\n",
" secrets_list = client.secrets.kv.v2.list_secrets(\n",
" path=f'jupyter/users/{username}',\n",
" mount_point='secret'\n",
" )\n",
" \n",
" print(f\"Secrets stored for {username}:\")\n",
" for secret in secrets_list['data']['keys']:\n",
" print(f\" - {secret}\")\n",
"except Exception as e:\n",
" print(f\"No secrets found or error: {e}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Helper Class for User Storage"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "class UserVaultStorage:\n \"\"\"Helper class for managing user's Vault storage\"\"\"\n \n def __init__(self):\n self.username = os.getenv('JUPYTERHUB_USER')\n self.client = hvac.Client(\n url=os.getenv('VAULT_ADDR'),\n verify=False\n )\n self._authenticate()\n self.base_path = f'jupyter/users/{self.username}'\n \n def _authenticate(self):\n token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n self.client.auth.jwt.jwt_login(\n role='jupyter-token',\n jwt=token,\n path='jwt'\n )\n \n def save(self, key, data):\n \"\"\"Save data to user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.create_or_update_secret(\n path=path,\n secret=data,\n mount_point='secret'\n )\n return path\n \n def load(self, key):\n \"\"\"Load data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n response = self.client.secrets.kv.v2.read_secret_version(\n path=path,\n mount_point='secret',\n raise_on_deleted_version=False\n )\n return response['data']['data'] if response else None\n \n def delete(self, key):\n \"\"\"Delete data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.delete_metadata_and_all_versions(\n path=path,\n mount_point='secret'\n )\n \n def list(self):\n \"\"\"List all keys in user's storage\"\"\"\n try:\n response = self.client.secrets.kv.v2.list_secrets(\n path=self.base_path,\n mount_point='secret'\n )\n return response['data']['keys']\n except:\n return []\n\n# Usage example\nstorage = UserVaultStorage()\n\n# Save model parameters\nstorage.save('ml-model-config', {\n 'model_type': 'random_forest',\n 'n_estimators': 100,\n 'max_depth': 10,\n 'training_date': datetime.now().isoformat()\n})\n\n# Load them back\nconfig = storage.load('ml-model-config')\nprint(\"Loaded config:\", config)\n\n# List all stored items\nprint(f\"\\nAll stored items: {storage.list()}\")"
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Environment Variables Helper"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def load_env_from_vault(key='environment'):\n",
" \"\"\"Load environment variables from Vault\"\"\"\n",
" storage = UserVaultStorage()\n",
" \n",
" try:\n",
" env_vars = storage.load(key)\n",
" for name, value in env_vars.items():\n",
" os.environ[name] = str(value)\n",
" print(f\"Loaded: {name}\")\n",
" return list(env_vars.keys())\n",
" except Exception as e:\n",
" print(f\"No environment variables found: {e}\")\n",
" return []\n",
"\n",
"def save_env_to_vault(env_dict, key='environment'):\n",
" \"\"\"Save environment variables to Vault\"\"\"\n",
" storage = UserVaultStorage()\n",
" path = storage.save(key, env_dict)\n",
" print(f\"Saved {len(env_dict)} environment variables to {path}\")\n",
"\n",
"# Example: Save current project environment\n",
"project_env = {\n",
" 'PROJECT_NAME': 'data-analysis',\n",
" 'DATA_PATH': '/data/project',\n",
" 'MODEL_VERSION': 'v2.1',\n",
" 'DEBUG': 'false'\n",
"}\n",
"\n",
"save_env_to_vault(project_env)\n",
"loaded = load_env_from_vault()\n",
"print(f\"\\nEnvironment ready with {len(loaded)} variables\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}