From 57d187fa4ab531b3fda720862ff1460b9aa824de Mon Sep 17 00:00:00 2001 From: Masaki Yatsu Date: Mon, 8 Sep 2025 18:35:28 +0900 Subject: [PATCH] chore(jupyterhub): Improve SecretStore error messages --- docs/jupyterhub.md | 75 ++++++++++--------- jupyterhub/jupyterhub-values.gomplate.yaml | 8 +- jupyterhub/justfile | 6 +- python-package/buunstack/secrets.py | 87 +++++++++++++++++++++- 4 files changed, 132 insertions(+), 44 deletions(-) diff --git a/docs/jupyterhub.md b/docs/jupyterhub.md index 39b344b..ab25f5c 100644 --- a/docs/jupyterhub.md +++ b/docs/jupyterhub.md @@ -132,28 +132,28 @@ Vault integration enables secure secrets management directly from Jupyter notebo ### Architecture ```plain -┌──────────────────────────────────────────────────────────────────┐ -│ JupyterHub Hub Pod │ -│ │ +┌────────────────────────────────────────────────────────────────┐ +│ JupyterHub Hub Pod │ +│ │ │ ┌──────────────┐ ┌────────────────┐ ┌────────────────────┐ │ -│ │ Hub │ │ Token Renewer │ │ ExternalSecret │ │ -│ │ Container │◄─┤ Sidecar │◄─┤ (mounted as │ │ -│ │ │ │ │ │ Secret) │ │ +│ │ Hub │ │ Token Renewer │ │ ExternalSecret │ │ +│ │ Container │◄─┤ Sidecar │◄─┤ (mounted as │ │ +│ │ │ │ │ │ Secret) │ │ │ └──────────────┘ └────────────────┘ └────────────────────┘ │ -│ │ │ ▲ │ -│ │ │ │ │ -│ ▼ ▼ │ │ -│ ┌──────────────────────────────────┐ │ │ -│ │ /vault/secrets/vault-token │ │ │ -│ │ (Admin token for user creation) │ │ │ -│ └──────────────────────────────────┘ │ │ -└────────────────────────────────────────────────────┼────────────┘ - │ - ┌───────────▼──────────┐ - │ Vault │ - │ secret/jupyterhub/ │ - │ vault-token │ - └──────────────────────┘ +│ │ │ ▲ │ +│ │ │ │ │ +│ ▼ ▼ │ │ +│ ┌──────────────────────────────────┐ │ │ +│ │ /vault/secrets/vault-token │ │ │ +│ │ (Admin token for user creation) │ │ │ +│ └──────────────────────────────────┘ │ │ +└────────────────────────────────────────────────────┼───────────┘ + │ + ┌───────────▼──────────┐ + │ Vault │ + │ secret/jupyterhub/ │ + │ vault-token │ + └──────────────────────┘ ``` ### Prerequisites @@ -163,7 +163,7 @@ Vault integration requires: - Vault server installed and configured - External Secrets Operator installed - ClusterSecretStore configured for Vault -- **Buun-stack kernel images** (standard images don't include Vault integration) +- Buun-stack kernel images (standard images don't include Vault integration) ### Setup @@ -243,15 +243,16 @@ User tokens are created dynamically: ### Admin Token Renewal -The admin token renewal is handled by a sidecar container (`vault-agent`) running alongside the JupyterHub hub: +The admin token renewal is handled by a sidecar container (`vault-token-renewer`) running alongside the JupyterHub hub: **Implementation Details:** 1. **Renewal Script**: `/vault/config/vault-token-renewer.sh` - - Runs in the `vault-agent` sidecar container + - Runs in the `vault-token-renewer` sidecar container - Uses Vault 1.17.5 image with HashiCorp Vault CLI 2. **Environment-Based TTL Configuration**: + ```bash # Reads TTL from environment variable (set in .env.local) TTL_RAW="${JUPYTERHUB_VAULT_TOKEN_TTL}" # e.g., "5m", "24h" @@ -261,12 +262,14 @@ The admin token renewal is handled by a sidecar container (`vault-agent`) runnin ``` 3. **Token Source**: ExternalSecret → Kubernetes Secret → mounted file + ```bash # Token retrieved from ExternalSecret-managed mount ADMIN_TOKEN=$(cat /vault/admin-token/token) ``` 4. **Renewal Loop**: + ```bash while true; do vault token renew >/dev/null 2>&1 @@ -277,9 +280,10 @@ The admin token renewal is handled by a sidecar container (`vault-agent`) runnin 5. **Error Handling**: If renewal fails, re-retrieves token from ExternalSecret mount **Key Files:** + - `vault-token-renewer.sh`: Main renewal script - `jupyterhub-vault-token-external-secret.gomplate.yaml`: ExternalSecret configuration -- `vault-agent-config` ConfigMap: Contains the renewal script +- `vault-token-renewer-config` ConfigMap: Contains the renewal script ### User Token Renewal @@ -288,12 +292,14 @@ User token renewal is handled within the notebook environment by the `buunstack` **Implementation Details:** 1. **Token Source**: Environment variable set by pre-spawn hook + ```python # In pre_spawn_hook.gomplate.py spawner.environment["NOTEBOOK_VAULT_TOKEN"] = user_vault_token ``` 2. **Automatic Renewal**: Built into `SecretStore` class operations + ```python # In buunstack/secrets.py def _ensure_authenticated(self): @@ -312,7 +318,7 @@ User token renewal is handled within the notebook environment by the `buunstack` - Transparent to user code 4. **Token Configuration** (set during creation): - - **TTL**: `NOTEBOOK_VAULT_TOKEN_TTL` (default: 24h) + - **TTL**: `NOTEBOOK_VAULT_TOKEN_TTL` (default: 24h = 1 day) - **Max TTL**: `NOTEBOOK_VAULT_TOKEN_MAX_TTL` (default: 168h = 7 days) - **Policy**: User-specific `jupyter-user-{username}` - **Type**: Orphan token (independent of parent token lifecycle) @@ -323,6 +329,7 @@ User token renewal is handled within the notebook environment by the `buunstack` - Prevented by `JUPYTERHUB_CULL_MAX_AGE` setting (6 days < 7 day Max TTL) **Key Files:** + - `pre_spawn_hook.gomplate.py`: User token creation logic - `buunstack/secrets.py`: Token renewal implementation - `user_policy.hcl`: User token permissions template @@ -333,15 +340,15 @@ User token renewal is handled within the notebook environment by the `buunstack` ┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐ │ Admin Token │ │ User Token │ │ Pod Lifecycle │ │ │ │ │ │ │ -│ Created: Manual │ │ Created: Spawn │ │ Max Age: 6 days │ -│ TTL: 5m-24h │ │ TTL: 24h │ │ Auto-restart │ -│ Max TTL: ∞ │ │ Max TTL: 7 days │ │ before expiry │ +│ Created: Manual │ │ Created: Spawn │ │ Max Age: 7 days │ +│ TTL: 5m-24h │ │ TTL: 1 day │ │ Auto-restart │ +│ Max TTL: ∞ │ │ Max TTL: 7 days │ │ at Max TTL │ │ Renewal: Auto │ │ Renewal: Auto │ │ │ │ Interval: TTL/2 │ │ Trigger: Usage │ │ │ └─────────────────┘ └──────────────────┘ └─────────────────┘ │ │ │ ▼ ▼ ▼ - vault-agent buunstack.py cull.maxAge + vault-token-renewer buunstack.py cull.maxAge sidecar SecretStore pod restart ``` @@ -395,12 +402,12 @@ IMAGE_REGISTRY=localhost:30500 # Vault token TTL settings JUPYTERHUB_VAULT_TOKEN_TTL=24h # Admin token: renewed at TTL/2 intervals -NOTEBOOK_VAULT_TOKEN_TTL=24h # User token: 1 day +NOTEBOOK_VAULT_TOKEN_TTL=24h # User token: 1 day (renewed on usage) NOTEBOOK_VAULT_TOKEN_MAX_TTL=168h # User token: 7 days max # Server pod lifecycle settings -JUPYTERHUB_CULL_MAX_AGE=518400 # Max pod age in seconds (6 days = 518400s) - # MUST be < NOTEBOOK_VAULT_TOKEN_MAX_TTL to prevent token expiry +JUPYTERHUB_CULL_MAX_AGE=604800 # Max pod age in seconds (7 days = 604800s) + # Should be <= NOTEBOOK_VAULT_TOKEN_MAX_TTL # Logging JUPYTER_BUUNSTACK_LOG_LEVEL=warning # Options: debug, info, warning, error @@ -482,7 +489,7 @@ kubectl get externalsecret -n jupyter jupyterhub-vault-token kubectl get secret -n jupyter jupyterhub-vault-token # Check token renewal logs -kubectl logs -n jupyter -l app.kubernetes.io/component=hub -c vault-agent +kubectl logs -n jupyter -l app.kubernetes.io/component=hub -c vault-token-renewer # In a notebook, verify environment %env NOTEBOOK_VAULT_TOKEN @@ -561,7 +568,7 @@ For production deployments, consider: 1. **Annual Token Recreation**: While tokens have unlimited Max TTL, best practice suggests recreating them annually -2. **Token Expiry and Pod Lifecycle**: User tokens have a maximum TTL of 7 days (`NOTEBOOK_VAULT_TOKEN_MAX_TTL=168h`). To prevent token expiry in long-running server pods, `JUPYTERHUB_CULL_MAX_AGE` is set to 6 days (518400s) by default. This ensures pods are restarted with fresh tokens before expiry. +2. **Token Expiry and Pod Lifecycle**: User tokens have a TTL of 1 day (`NOTEBOOK_VAULT_TOKEN_TTL=24h`) and maximum TTL of 7 days (`NOTEBOOK_VAULT_TOKEN_MAX_TTL=168h`). Daily usage extends the token for another day, allowing up to 7 days of continuous use. Server pods are automatically restarted after 7 days (`JUPYTERHUB_CULL_MAX_AGE=604800s`) to refresh tokens. 3. **Cull Settings**: Server idle timeout is set to 2 hours by default. Adjust `cull.timeout` and `cull.every` in the Helm values for different requirements diff --git a/jupyterhub/jupyterhub-values.gomplate.yaml b/jupyterhub/jupyterhub-values.gomplate.yaml index fd6b645..87f9ae3 100644 --- a/jupyterhub/jupyterhub-values.gomplate.yaml +++ b/jupyterhub/jupyterhub-values.gomplate.yaml @@ -67,13 +67,13 @@ hub: exec(f.read()) {{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }} - # Vault Agent sidecar configuration + # Vault token renewal sidecar configuration extraVolumes: - name: vault-secrets emptyDir: {} - name: vault-config configMap: - name: vault-agent-config + name: vault-token-renewer-config - name: vault-admin-token secret: secretName: jupyterhub-vault-token @@ -88,7 +88,7 @@ hub: readOnly: true extraContainers: - - name: vault-agent + - name: vault-token-renewer image: hashicorp/vault:1.17.5 securityContext: runAsUser: 100 @@ -149,6 +149,8 @@ singleuser: extraEnv: VAULT_ADDR: "{{ .Env.VAULT_ADDR }}" + NOTEBOOK_VAULT_TOKEN_TTL: "{{ .Env.NOTEBOOK_VAULT_TOKEN_TTL }}" + NOTEBOOK_VAULT_TOKEN_MAX_TTL: "{{ .Env.NOTEBOOK_VAULT_TOKEN_MAX_TTL }}" networkPolicy: egress: diff --git a/jupyterhub/justfile b/jupyterhub/justfile index 305be99..b8b064a 100644 --- a/jupyterhub/justfile +++ b/jupyterhub/justfile @@ -8,7 +8,7 @@ export JUPYTERHUB_OIDC_CLIENT_SESSION_MAX := env("JUPYTERHUB_OIDC_CLIENT_SESSION export JUPYTERHUB_NFS_PV_ENABLED := env("JUPYTERHUB_NFS_PV_ENABLED", "") export JUPYTERHUB_STORAGE_CLASS := env("JUPYTERHUB_STORAGE_CLASS", "") export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "") -export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-28") +export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-30") export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook") export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook") export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false") @@ -22,7 +22,7 @@ export JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED := env("JUPYTER_PROFILE_BUUN_STAC export JUPYTERHUB_VAULT_TOKEN_TTL := env("JUPYTERHUB_VAULT_TOKEN_TTL", "24h") export NOTEBOOK_VAULT_TOKEN_TTL := env("NOTEBOOK_VAULT_TOKEN_TTL", "24h") export NOTEBOOK_VAULT_TOKEN_MAX_TTL := env("NOTEBOOK_VAULT_TOKEN_MAX_TTL", "168h") -export JUPYTERHUB_CULL_MAX_AGE := env("JUPYTERHUB_CULL_MAX_AGE", "518400") +export JUPYTERHUB_CULL_MAX_AGE := env("JUPYTERHUB_CULL_MAX_AGE", "604800") export VAULT_AGENT_LOG_LEVEL := env("VAULT_AGENT_LOG_LEVEL", "info") export JUPYTER_BUUNSTACK_LOG_LEVEL := env("JUPYTER_BUUNSTACK_LOG_LEVEL", "warning") export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500") @@ -255,7 +255,7 @@ setup-vault-integration root_token='': # Create ConfigMap with token renewal script echo "Creating ConfigMap with token renewal script..." - kubectl create configmap vault-agent-config -n ${JUPYTERHUB_NAMESPACE} \ + kubectl create configmap vault-token-renewer-config -n ${JUPYTERHUB_NAMESPACE} \ --from-file=vault-token-renewer.sh=vault-token-renewer.sh \ --dry-run=client -o yaml | kubectl apply -f - diff --git a/python-package/buunstack/secrets.py b/python-package/buunstack/secrets.py index 4cc7cb9..ec627b9 100644 --- a/python-package/buunstack/secrets.py +++ b/python-package/buunstack/secrets.py @@ -141,10 +141,89 @@ class SecretStore: except Exception: pass - # Token expired or invalid - no fallback available with user-specific tokens - raise Exception( - "User-specific Vault token expired and cannot be refreshed. Please restart your notebook server." - ) + # Token expired or invalid - provide detailed expiry information + token_ttl = os.getenv("NOTEBOOK_VAULT_TOKEN_TTL", "24h") + token_max_ttl = os.getenv("NOTEBOOK_VAULT_TOKEN_MAX_TTL", "168h") + + # Try to get actual token information for better error message + token_info = None + try: + token_info = self.client.auth.token.lookup_self() + except Exception: + pass + + if token_info: + data = token_info.get("data", {}) + ttl = data.get("ttl", 0) + creation_time = data.get("creation_time", 0) + expire_time = data.get("expire_time", "unknown") + renewable = data.get("renewable", False) + + if ttl <= 0 and renewable: + # Token expired but was renewable - likely hit Max TTL + import datetime + + try: + current_time = datetime.datetime.now() + if creation_time: + created_at = datetime.datetime.fromtimestamp(creation_time) + age_hours = (current_time - created_at).total_seconds() / 3600 + error_msg = ( + f"Vault Token Expired\n\n" + f"Your notebook's Vault token has reached its maximum lifetime and cannot be renewed.\n\n" + f"Token Details:\n" + f"• Created: {created_at.strftime('%Y-%m-%d %H:%M:%S')} ({age_hours:.1f}h ago)\n" + f"• TTL (renewal period): {token_ttl}\n" + f"• Max TTL (maximum lifetime): {token_max_ttl}\n" + f"• Expired at: {expire_time}\n\n" + f"How Token Renewal Works:\n" + f"• Your token is automatically renewed every time you use SecretStore\n" + f"• Each renewal extends the token for another {token_ttl}\n" + f"• However, tokens cannot be renewed beyond {token_max_ttl} from creation\n" + f"• Regular usage (within {token_ttl} intervals) keeps your token alive for up to {token_max_ttl}\n\n" + f"Solution:\n" + f"Please restart your notebook server to get a fresh token with a new {token_max_ttl} lifetime." + ) + else: + error_msg = ( + f"Vault Token Expired\n\n" + f"Your notebook's Vault token has expired and cannot be renewed.\n\n" + f"Token Settings:\n" + f"• TTL (renewal period): {token_ttl}\n" + f"• Max TTL (maximum lifetime): {token_max_ttl}\n\n" + f"Tip: Regular usage (within {token_ttl} intervals) keeps your token alive for up to {token_max_ttl}.\n\n" + f"Solution: Please restart your notebook server to get a fresh token." + ) + except Exception: + error_msg = ( + f"Vault Token Expired\n\n" + f"Your notebook's Vault token has expired and cannot be renewed.\n\n" + f"Token Settings:\n" + f"• TTL (renewal period): {token_ttl}\n" + f"• Max TTL (maximum lifetime): {token_max_ttl}\n\n" + f"Tip: Regular usage (within {token_ttl} intervals) keeps your token alive for up to {token_max_ttl}.\n\n" + f"Solution: Please restart your notebook server to get a fresh token." + ) + else: + # Token invalid for other reasons + error_msg = ( + "Vault Authentication Failed\n\n" + "Your notebook's Vault token is invalid or corrupted.\n\n" + "Solution: Please restart your notebook server to get a fresh token." + ) + else: + # Cannot retrieve token info - generic message + error_msg = ( + f"Vault Authentication Failed\n\n" + f"Your notebook's Vault token is invalid or has expired.\n\n" + f"Token Settings:\n" + f"• TTL (renewal period): {token_ttl}\n" + f"• Max TTL (maximum lifetime): {token_max_ttl}\n\n" + f"Tip: Regular usage (within {token_ttl} intervals) keeps your token alive for up to {token_max_ttl}.\n\n" + f"Solution: Please restart your notebook server to get a fresh token." + ) + + raise Exception(error_msg) def put(self, key: str, **kwargs: Any) -> None: """