From f529223c563dab47b3dc3ed654f90a2084f95359 Mon Sep 17 00:00:00 2001 From: Masaki Yatsu Date: Sun, 31 Aug 2025 14:47:29 +0900 Subject: [PATCH] feat(jupyterhub): Vault integration --- .../images/datastack-cuda-notebook/Dockerfile | 1 + .../images/datastack-notebook/Dockerfile | 1 + jupyterhub/jupyterhub-values.gomplate.yaml | 52 +++- jupyterhub/justfile | 37 ++- jupyterhub/vault-policy.hcl | 21 ++ jupyterhub/vault-user-storage-example.ipynb | 269 ++++++++++++++++++ 6 files changed, 376 insertions(+), 5 deletions(-) create mode 100644 jupyterhub/vault-policy.hcl create mode 100644 jupyterhub/vault-user-storage-example.ipynb diff --git a/jupyterhub/images/datastack-cuda-notebook/Dockerfile b/jupyterhub/images/datastack-cuda-notebook/Dockerfile index dd949f8..afdfb7c 100644 --- a/jupyterhub/images/datastack-cuda-notebook/Dockerfile +++ b/jupyterhub/images/datastack-cuda-notebook/Dockerfile @@ -130,6 +130,7 @@ RUN pip install \ agno \ fastembed \ feature-engine \ + hvac \ jupyter-ai \ jupyter-ai-magics[all] \ kreuzberg \ diff --git a/jupyterhub/images/datastack-notebook/Dockerfile b/jupyterhub/images/datastack-notebook/Dockerfile index 27ee415..667f55c 100644 --- a/jupyterhub/images/datastack-notebook/Dockerfile +++ b/jupyterhub/images/datastack-notebook/Dockerfile @@ -130,6 +130,7 @@ RUN pip install \ agno \ fastembed \ feature-engine \ + hvac \ jupyter-ai \ jupyter-ai-magics[all] \ kreuzberg \ diff --git a/jupyterhub/jupyterhub-values.gomplate.yaml b/jupyterhub/jupyterhub-values.gomplate.yaml index b312ee7..2a24be6 100644 --- a/jupyterhub/jupyterhub-values.gomplate.yaml +++ b/jupyterhub/jupyterhub-values.gomplate.yaml @@ -24,9 +24,26 @@ hub: - profile - email - # db: - # pvc: - # storageClassName: longhorn +{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }} + extraConfig: + 01-vault-integration: | + import os + + async def pre_spawn_hook(spawner): + """Pass OIDC tokens and Vault config to notebook environment""" + auth_state = await spawner.user.get_auth_state() + if auth_state: + if 'access_token' in auth_state: + spawner.environment['JUPYTERHUB_OIDC_ACCESS_TOKEN'] = auth_state['access_token'] + if 'refresh_token' in auth_state: + spawner.environment['JUPYTERHUB_OIDC_REFRESH_TOKEN'] = auth_state['refresh_token'] + if 'id_token' in auth_state: + spawner.environment['JUPYTERHUB_OIDC_ID_TOKEN'] = auth_state['id_token'] + if 'expires_at' in auth_state: + spawner.environment['JUPYTERHUB_OIDC_TOKEN_EXPIRES_AT'] = str(auth_state['expires_at']) + + c.Spawner.pre_spawn_hook = pre_spawn_hook +{{- end }} podSecurityContext: fsGroup: {{ .Env.JUPYTER_FSGID }} @@ -45,6 +62,24 @@ singleuser: - ReadWriteOnce {{ end -}} capacity: 10Gi + +{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }} + extraEnv: + VAULT_ADDR: "{{ .Env.VAULT_ADDR }}" + KEYCLOAK_HOST: "{{ .Env.KEYCLOAK_HOST }}" + KEYCLOAK_REALM: "{{ .Env.KEYCLOAK_REALM }}" + + lifecycleHooks: + postStart: + exec: + command: + - /bin/bash + - -c + - | + # Install hvac for Vault integration + pip install --quiet hvac requests + echo "Vault integration ready" +{{- end }} networkPolicy: egress: - to: @@ -72,6 +107,17 @@ singleuser: ports: - port: 4000 protocol: TCP +{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }} + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: vault + ports: + - port: 8200 + protocol: TCP + - port: 8201 + protocol: TCP +{{- end }} - to: - ipBlock: cidr: 0.0.0.0/0 diff --git a/jupyterhub/justfile b/jupyterhub/justfile index 723b812..c56bdcc 100644 --- a/jupyterhub/justfile +++ b/jupyterhub/justfile @@ -1,10 +1,12 @@ set fallback := true +tempdir := `mktemp -d` export JUPYTERHUB_NAMESPACE := env("JUPYTERHUB_NAMESPACE", "jupyter") export JUPYTERHUB_CHART_VERSION := env("JUPYTERHUB_CHART_VERSION", "4.2.0") export JUPYTERHUB_OIDC_CLIENT_ID := env("JUPYTERHUB_OIDC_CLIENT_ID", "jupyterhub") export JUPYTERHUB_ENABLE_NFS_PV := env("JUPYTERHUB_ENABLE_NFS_PV", "") -export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-1") +export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "false") +export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-2") export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook") export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook") export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false") @@ -18,6 +20,7 @@ export JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED := env("JUPYTER_PROFILE_BUUN_STAC export IMAGE_REGISTRY := env("IMAGE_REGISTRY", "localhost:30500") export KEYCLOAK_REALM := env("KEYCLOAK_REALM", "buunstack") export LONGHORN_NAMESPACE := env("LONGHORN_NAMESPACE", "longhorn") +export VAULT_ADDR := env("VAULT_ADDR", "http://vault.vault.svc:8200") [private] default: @@ -56,7 +59,6 @@ install: # just k8s::copy-regcred ${JUPYTERHUB_NAMESPACE} just keycloak::create-client ${KEYCLOAK_REALM} ${JUPYTERHUB_OIDC_CLIENT_ID} \ "https://${JUPYTERHUB_HOST}/hub/oauth_callback" - # just vault::create-jupyter-role just add-helm-repo export JUPYTERHUB_OIDC_CLIENT_ID=${JUPYTERHUB_OIDC_CLIENT_ID} export KEYCLOAK_REALM=${KEYCLOAK_REALM} @@ -103,6 +105,11 @@ install: # wait deployments manually because `helm upgrade --wait` does not work for JupyterHub just k8s::wait-deployments-ready ${JUPYTERHUB_NAMESPACE} hub proxy + # Setup Vault integration if enabled + if [ "${JUPYTERHUB_VAULT_INTEGRATION_ENABLED}" = "true" ]; then + just setup-vault-jwt-auth + fi + # Uninstall JupyterHub uninstall: #!/bin/bash @@ -148,3 +155,29 @@ build-kernel-images: push-kernel-images: build-kernel-images docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} docker push ${IMAGE_REGISTRY}/${KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY}:${JUPYTER_PYTHON_KERNEL_TAG} + +# Configure Vault for JupyterHub integration +setup-vault-integration: + #!/bin/bash + set -euo pipefail + echo "Creating JupyterHub Vault policy..." + just vault::write-policy jupyter-user $(pwd)/vault-policy.hcl + echo "✓ JupyterHub policy created" + +# Setup JWT auth for JupyterHub tokens (no re-authentication needed) +setup-vault-jwt-auth: + #!/bin/bash + set -euo pipefail + echo "Setting up Vault integration for JupyterHub..." + just setup-vault-integration + just vault::setup-jwt-auth "jupyterhub" "jupyter-token" "jupyter-user" + echo "✓ Vault integration configured" + echo "" + echo "Users can now access Vault from notebooks using:" + echo " import os, hvac" + echo " client = hvac.Client(url=os.getenv('VAULT_ADDR'), verify=False)" + echo " client.auth.jwt.jwt_login(" + echo " role='jupyter-token'," + echo " jwt=os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')," + echo " path='jwt'" + echo " )" diff --git a/jupyterhub/vault-policy.hcl b/jupyterhub/vault-policy.hcl new file mode 100644 index 0000000..dc62ea0 --- /dev/null +++ b/jupyterhub/vault-policy.hcl @@ -0,0 +1,21 @@ +# JupyterHub user policy for Vault access + +# Read access to shared jupyter resources +path "secret/data/jupyter/shared/*" { + capabilities = ["read", "list"] +} + +# Full access to user-specific paths +path "secret/data/jupyter/users/{{identity.entity.aliases.auth_jwt_*.metadata.username}}/*" { + capabilities = ["create", "update", "read", "delete", "list"] +} + +# Allow users to list their own directory +path "secret/metadata/jupyter/users/{{identity.entity.aliases.auth_jwt_*.metadata.username}}/*" { + capabilities = ["list", "read", "delete"] +} + +# Allow users to list jupyter root to navigate +path "secret/metadata/jupyter/*" { + capabilities = ["list"] +} diff --git a/jupyterhub/vault-user-storage-example.ipynb b/jupyterhub/vault-user-storage-example.ipynb new file mode 100644 index 0000000..7db724a --- /dev/null +++ b/jupyterhub/vault-user-storage-example.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Vault User Storage Example\n", + "\n", + "Each JupyterHub user has their own private storage space in Vault at `/secret/jupyter/users//`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup and Authentication" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import hvac\n", + "import json\n", + "from datetime import datetime\n", + "\n", + "# Get environment variables\n", + "username = os.getenv('JUPYTERHUB_USER', 'testuser')\n", + "vault_addr = os.getenv('VAULT_ADDR', 'https://vault.example.com')\n", + "oidc_token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n", + "\n", + "if not oidc_token:\n", + " raise ValueError(\"OIDC token not found. Make sure auth_state is enabled.\")\n", + "\n", + "# Initialize Vault client\n", + "client = hvac.Client(url=vault_addr, verify=False)\n", + "\n", + "# Authenticate with JupyterHub token\n", + "client.auth.jwt.jwt_login(\n", + " role='jupyter-token',\n", + " jwt=oidc_token,\n", + " path='jwt'\n", + ")\n", + "\n", + "print(f\"Authenticated as: {username}\")\n", + "print(f\"User storage path: secret/jupyter/users/{username}/\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Store User Preferences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Store user preferences\n", + "preferences = {\n", + " 'theme': 'dark',\n", + " 'language': 'en',\n", + " 'font_size': 14,\n", + " 'auto_save': True,\n", + " 'last_updated': datetime.now().isoformat()\n", + "}\n", + "\n", + "client.secrets.kv.v2.create_or_update_secret(\n", + " path=f'jupyter/users/{username}/preferences',\n", + " secret=preferences,\n", + " mount_point='secret'\n", + ")\n", + "\n", + "print(f\"Saved preferences for {username}\")\n", + "print(json.dumps(preferences, indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "# Read back preferences\nresponse = client.secrets.kv.v2.read_secret_version(\n path=f'jupyter/users/{username}/preferences',\n mount_point='secret',\n raise_on_deleted_version=False\n)\n\nstored_prefs = response['data']['data']\nprint(\"Retrieved preferences:\")\nprint(json.dumps(stored_prefs, indent=2))" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Store API keys securely\n", + "api_keys = {\n", + " 'openai_key': 'sk-your-api-key-here',\n", + " 'github_token': 'ghp_your-token-here',\n", + " 'aws_access_key': 'AKIA-example',\n", + " 'aws_secret_key': 'your-secret-here'\n", + "}\n", + "\n", + "client.secrets.kv.v2.create_or_update_secret(\n", + " path=f'jupyter/users/{username}/api-keys',\n", + " secret=api_keys,\n", + " mount_point='secret'\n", + ")\n", + "\n", + "print(f\"Stored {len(api_keys)} API keys for {username}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Store Database Connections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Store database connection info\n", + "db_connections = {\n", + " 'postgres_prod': {\n", + " 'host': 'db.example.com',\n", + " 'port': 5432,\n", + " 'database': 'production',\n", + " 'username': 'app_user',\n", + " 'password': 'secure-password-123'\n", + " },\n", + " 'mongodb_analytics': {\n", + " 'connection_string': 'mongodb://user:pass@mongo.example.com:27017/analytics'\n", + " }\n", + "}\n", + "\n", + "client.secrets.kv.v2.create_or_update_secret(\n", + " path=f'jupyter/users/{username}/databases',\n", + " secret=db_connections,\n", + " mount_point='secret'\n", + ")\n", + "\n", + "print(f\"Stored {len(db_connections)} database connections\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read Stored Secrets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "# Read back preferences\nresponse = client.secrets.kv.v2.read_secret_version(\n path=f'jupyter/users/{username}/preferences',\n mount_point='secret',\n raise_on_deleted_version=False\n)\n\nstored_prefs = response['data']['data']\nprint(\"Retrieved preferences:\")\nprint(json.dumps(stored_prefs, indent=2))" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "class UserVaultStorage:\n \"\"\"Helper class for managing user's Vault storage\"\"\"\n \n def __init__(self):\n self.username = os.getenv('JUPYTERHUB_USER')\n self.client = hvac.Client(\n url=os.getenv('VAULT_ADDR'),\n verify=False\n )\n self._authenticate()\n self.base_path = f'jupyter/users/{self.username}'\n \n def _authenticate(self):\n token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n self.client.auth.jwt.jwt_login(\n role='jupyter-token',\n jwt=token,\n path='jwt'\n )\n \n def save(self, key, data):\n \"\"\"Save data to user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.create_or_update_secret(\n path=path,\n secret=data,\n mount_point='secret'\n )\n return path\n \n def load(self, key):\n \"\"\"Load data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n response = self.client.secrets.kv.v2.read_secret_version(\n path=path,\n mount_point='secret',\n raise_on_deleted_version=False\n )\n return response['data']['data'] if response else None\n \n def delete(self, key):\n \"\"\"Delete data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.delete_metadata_and_all_versions(\n path=path,\n mount_point='secret'\n )\n \n def list(self):\n \"\"\"List all keys in user's storage\"\"\"\n try:\n response = self.client.secrets.kv.v2.list_secrets(\n path=self.base_path,\n mount_point='secret'\n )\n return response['data']['keys']\n except:\n return []\n\n# Usage example\nstorage = UserVaultStorage()\n\n# Save model parameters\nstorage.save('ml-model-config', {\n 'model_type': 'random_forest',\n 'n_estimators': 100,\n 'max_depth': 10,\n 'training_date': datetime.now().isoformat()\n})\n\n# Load them back\nconfig = storage.load('ml-model-config')\nprint(\"Loaded config:\", config)\n\n# List all stored items\nprint(f\"\\nAll stored items: {storage.list()}\")" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# List all secrets in user's directory\n", + "try:\n", + " secrets_list = client.secrets.kv.v2.list_secrets(\n", + " path=f'jupyter/users/{username}',\n", + " mount_point='secret'\n", + " )\n", + " \n", + " print(f\"Secrets stored for {username}:\")\n", + " for secret in secrets_list['data']['keys']:\n", + " print(f\" - {secret}\")\n", + "except Exception as e:\n", + " print(f\"No secrets found or error: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Helper Class for User Storage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "class UserVaultStorage:\n \"\"\"Helper class for managing user's Vault storage\"\"\"\n \n def __init__(self):\n self.username = os.getenv('JUPYTERHUB_USER')\n self.client = hvac.Client(\n url=os.getenv('VAULT_ADDR'),\n verify=False\n )\n self._authenticate()\n self.base_path = f'jupyter/users/{self.username}'\n \n def _authenticate(self):\n token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n self.client.auth.jwt.jwt_login(\n role='jupyter-token',\n jwt=token,\n path='jwt'\n )\n \n def save(self, key, data):\n \"\"\"Save data to user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.create_or_update_secret(\n path=path,\n secret=data,\n mount_point='secret'\n )\n return path\n \n def load(self, key):\n \"\"\"Load data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n response = self.client.secrets.kv.v2.read_secret_version(\n path=path,\n mount_point='secret',\n raise_on_deleted_version=False\n )\n return response['data']['data'] if response else None\n \n def delete(self, key):\n \"\"\"Delete data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.delete_metadata_and_all_versions(\n path=path,\n mount_point='secret'\n )\n \n def list(self):\n \"\"\"List all keys in user's storage\"\"\"\n try:\n response = self.client.secrets.kv.v2.list_secrets(\n path=self.base_path,\n mount_point='secret'\n )\n return response['data']['keys']\n except:\n return []\n\n# Usage example\nstorage = UserVaultStorage()\n\n# Save model parameters\nstorage.save('ml-model-config', {\n 'model_type': 'random_forest',\n 'n_estimators': 100,\n 'max_depth': 10,\n 'training_date': datetime.now().isoformat()\n})\n\n# Load them back\nconfig = storage.load('ml-model-config')\nprint(\"Loaded config:\", config)\n\n# List all stored items\nprint(f\"\\nAll stored items: {storage.list()}\")" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environment Variables Helper" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def load_env_from_vault(key='environment'):\n", + " \"\"\"Load environment variables from Vault\"\"\"\n", + " storage = UserVaultStorage()\n", + " \n", + " try:\n", + " env_vars = storage.load(key)\n", + " for name, value in env_vars.items():\n", + " os.environ[name] = str(value)\n", + " print(f\"Loaded: {name}\")\n", + " return list(env_vars.keys())\n", + " except Exception as e:\n", + " print(f\"No environment variables found: {e}\")\n", + " return []\n", + "\n", + "def save_env_to_vault(env_dict, key='environment'):\n", + " \"\"\"Save environment variables to Vault\"\"\"\n", + " storage = UserVaultStorage()\n", + " path = storage.save(key, env_dict)\n", + " print(f\"Saved {len(env_dict)} environment variables to {path}\")\n", + "\n", + "# Example: Save current project environment\n", + "project_env = {\n", + " 'PROJECT_NAME': 'data-analysis',\n", + " 'DATA_PATH': '/data/project',\n", + " 'MODEL_VERSION': 'v2.1',\n", + " 'DEBUG': 'false'\n", + "}\n", + "\n", + "save_env_to_vault(project_env)\n", + "loaded = load_env_from_vault()\n", + "print(f\"\\nEnvironment ready with {len(loaded)} variables\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file