feat(jupyterhub): install buunstack package to kernel image

This commit is contained in:
Masaki Yatsu
2025-08-31 20:43:27 +09:00
parent ddf867d1f1
commit 2480ebae82
15 changed files with 1192 additions and 270 deletions

View File

@@ -1 +1,2 @@
jupyterhub-values.yaml
/notebooks/

View File

@@ -0,0 +1,2 @@
# Temporary copy of wheel files for Docker build
*.whl

View File

@@ -143,6 +143,12 @@ RUN pip install \
tavily-python \
tweet-preprocessor
# Install buunstack package
COPY *.whl /opt/
RUN pip install /opt/*.whl && \
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
# Install PyTorch with pip (https://pytorch.org/get-started/locally/)
# langchain-openai must be updated to avoid pydantic v2 error
# https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540

View File

@@ -0,0 +1,2 @@
# Temporary copy of wheel files for Docker build
*.whl

View File

@@ -143,6 +143,12 @@ RUN pip install \
tavily-python \
tweet-preprocessor
# Install buunstack package
COPY *.whl /opt/
RUN pip install /opt/*.whl && \
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
# Install PyTorch with pip (https://pytorch.org/get-started/locally/)
# langchain-openai must be updated to avoid pydantic v2 error
# https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540

View File

@@ -42,6 +42,11 @@ hub:
if 'expires_at' in auth_state:
spawner.environment['JUPYTERHUB_OIDC_TOKEN_EXPIRES_AT'] = str(auth_state['expires_at'])
# Add Keycloak configuration for token refresh
spawner.environment['KEYCLOAK_HOST'] = '{{ .Env.KEYCLOAK_HOST }}'
spawner.environment['KEYCLOAK_REALM'] = '{{ .Env.KEYCLOAK_REALM }}'
spawner.environment['KEYCLOAK_CLIENT_ID'] = 'jupyterhub'
c.Spawner.pre_spawn_hook = pre_spawn_hook
{{- end }}

View File

@@ -6,7 +6,7 @@ export JUPYTERHUB_CHART_VERSION := env("JUPYTERHUB_CHART_VERSION", "4.2.0")
export JUPYTERHUB_OIDC_CLIENT_ID := env("JUPYTERHUB_OIDC_CLIENT_ID", "jupyterhub")
export JUPYTERHUB_ENABLE_NFS_PV := env("JUPYTERHUB_ENABLE_NFS_PV", "")
export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "false")
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-2")
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-3")
export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook")
export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook")
export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false")
@@ -134,6 +134,14 @@ delete-pv:
build-kernel-images:
#!/bin/bash
set -euo pipefail
# Build python package wheel
cd ../python-package
rm -rf dist/ build/ *.egg-info/
SETUPTOOLS_SCM_PRETEND_VERSION_FOR_BUUNSTACK=0.1.0 python -m build --wheel
cd ../jupyterhub
# Copy built wheel to image directories
cp ../python-package/dist/*.whl ./images/datastack-notebook/
cp ../python-package/dist/*.whl ./images/datastack-cuda-notebook/
(
cd ./images/datastack-notebook
docker build -t \
@@ -150,6 +158,9 @@ build-kernel-images:
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \
.
)
# Clean up copied wheel files
rm -f ./images/datastack-notebook/*.whl
rm -f ./images/datastack-cuda-notebook/*.whl
# Push Jupyter notebook kernel images
push-kernel-images: build-kernel-images

View File

@@ -1,269 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Vault User Storage Example\n",
"\n",
"Each JupyterHub user has their own private storage space in Vault at `/secret/jupyter/users/<username>/`"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup and Authentication"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import hvac\n",
"import json\n",
"from datetime import datetime\n",
"\n",
"# Get environment variables\n",
"username = os.getenv('JUPYTERHUB_USER', 'testuser')\n",
"vault_addr = os.getenv('VAULT_ADDR', 'https://vault.example.com')\n",
"oidc_token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n",
"\n",
"if not oidc_token:\n",
" raise ValueError(\"OIDC token not found. Make sure auth_state is enabled.\")\n",
"\n",
"# Initialize Vault client\n",
"client = hvac.Client(url=vault_addr, verify=False)\n",
"\n",
"# Authenticate with JupyterHub token\n",
"client.auth.jwt.jwt_login(\n",
" role='jupyter-token',\n",
" jwt=oidc_token,\n",
" path='jwt'\n",
")\n",
"\n",
"print(f\"Authenticated as: {username}\")\n",
"print(f\"User storage path: secret/jupyter/users/{username}/\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Store User Preferences"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Store user preferences\n",
"preferences = {\n",
" 'theme': 'dark',\n",
" 'language': 'en',\n",
" 'font_size': 14,\n",
" 'auto_save': True,\n",
" 'last_updated': datetime.now().isoformat()\n",
"}\n",
"\n",
"client.secrets.kv.v2.create_or_update_secret(\n",
" path=f'jupyter/users/{username}/preferences',\n",
" secret=preferences,\n",
" mount_point='secret'\n",
")\n",
"\n",
"print(f\"Saved preferences for {username}\")\n",
"print(json.dumps(preferences, indent=2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": "# Read back preferences\nresponse = client.secrets.kv.v2.read_secret_version(\n path=f'jupyter/users/{username}/preferences',\n mount_point='secret',\n raise_on_deleted_version=False\n)\n\nstored_prefs = response['data']['data']\nprint(\"Retrieved preferences:\")\nprint(json.dumps(stored_prefs, indent=2))"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Store API keys securely\n",
"api_keys = {\n",
" 'openai_key': 'sk-your-api-key-here',\n",
" 'github_token': 'ghp_your-token-here',\n",
" 'aws_access_key': 'AKIA-example',\n",
" 'aws_secret_key': 'your-secret-here'\n",
"}\n",
"\n",
"client.secrets.kv.v2.create_or_update_secret(\n",
" path=f'jupyter/users/{username}/api-keys',\n",
" secret=api_keys,\n",
" mount_point='secret'\n",
")\n",
"\n",
"print(f\"Stored {len(api_keys)} API keys for {username}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Store Database Connections"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Store database connection info\n",
"db_connections = {\n",
" 'postgres_prod': {\n",
" 'host': 'db.example.com',\n",
" 'port': 5432,\n",
" 'database': 'production',\n",
" 'username': 'app_user',\n",
" 'password': 'secure-password-123'\n",
" },\n",
" 'mongodb_analytics': {\n",
" 'connection_string': 'mongodb://user:pass@mongo.example.com:27017/analytics'\n",
" }\n",
"}\n",
"\n",
"client.secrets.kv.v2.create_or_update_secret(\n",
" path=f'jupyter/users/{username}/databases',\n",
" secret=db_connections,\n",
" mount_point='secret'\n",
")\n",
"\n",
"print(f\"Stored {len(db_connections)} database connections\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read Stored Secrets"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Read back preferences\nresponse = client.secrets.kv.v2.read_secret_version(\n path=f'jupyter/users/{username}/preferences',\n mount_point='secret',\n raise_on_deleted_version=False\n)\n\nstored_prefs = response['data']['data']\nprint(\"Retrieved preferences:\")\nprint(json.dumps(stored_prefs, indent=2))"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "class UserVaultStorage:\n \"\"\"Helper class for managing user's Vault storage\"\"\"\n \n def __init__(self):\n self.username = os.getenv('JUPYTERHUB_USER')\n self.client = hvac.Client(\n url=os.getenv('VAULT_ADDR'),\n verify=False\n )\n self._authenticate()\n self.base_path = f'jupyter/users/{self.username}'\n \n def _authenticate(self):\n token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n self.client.auth.jwt.jwt_login(\n role='jupyter-token',\n jwt=token,\n path='jwt'\n )\n \n def save(self, key, data):\n \"\"\"Save data to user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.create_or_update_secret(\n path=path,\n secret=data,\n mount_point='secret'\n )\n return path\n \n def load(self, key):\n \"\"\"Load data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n response = self.client.secrets.kv.v2.read_secret_version(\n path=path,\n mount_point='secret',\n raise_on_deleted_version=False\n )\n return response['data']['data'] if response else None\n \n def delete(self, key):\n \"\"\"Delete data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.delete_metadata_and_all_versions(\n path=path,\n mount_point='secret'\n )\n \n def list(self):\n \"\"\"List all keys in user's storage\"\"\"\n try:\n response = self.client.secrets.kv.v2.list_secrets(\n path=self.base_path,\n mount_point='secret'\n )\n return response['data']['keys']\n except:\n return []\n\n# Usage example\nstorage = UserVaultStorage()\n\n# Save model parameters\nstorage.save('ml-model-config', {\n 'model_type': 'random_forest',\n 'n_estimators': 100,\n 'max_depth': 10,\n 'training_date': datetime.now().isoformat()\n})\n\n# Load them back\nconfig = storage.load('ml-model-config')\nprint(\"Loaded config:\", config)\n\n# List all stored items\nprint(f\"\\nAll stored items: {storage.list()}\")"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# List all secrets in user's directory\n",
"try:\n",
" secrets_list = client.secrets.kv.v2.list_secrets(\n",
" path=f'jupyter/users/{username}',\n",
" mount_point='secret'\n",
" )\n",
" \n",
" print(f\"Secrets stored for {username}:\")\n",
" for secret in secrets_list['data']['keys']:\n",
" print(f\" - {secret}\")\n",
"except Exception as e:\n",
" print(f\"No secrets found or error: {e}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Helper Class for User Storage"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "class UserVaultStorage:\n \"\"\"Helper class for managing user's Vault storage\"\"\"\n \n def __init__(self):\n self.username = os.getenv('JUPYTERHUB_USER')\n self.client = hvac.Client(\n url=os.getenv('VAULT_ADDR'),\n verify=False\n )\n self._authenticate()\n self.base_path = f'jupyter/users/{self.username}'\n \n def _authenticate(self):\n token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n self.client.auth.jwt.jwt_login(\n role='jupyter-token',\n jwt=token,\n path='jwt'\n )\n \n def save(self, key, data):\n \"\"\"Save data to user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.create_or_update_secret(\n path=path,\n secret=data,\n mount_point='secret'\n )\n return path\n \n def load(self, key):\n \"\"\"Load data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n response = self.client.secrets.kv.v2.read_secret_version(\n path=path,\n mount_point='secret',\n raise_on_deleted_version=False\n )\n return response['data']['data'] if response else None\n \n def delete(self, key):\n \"\"\"Delete data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.delete_metadata_and_all_versions(\n path=path,\n mount_point='secret'\n )\n \n def list(self):\n \"\"\"List all keys in user's storage\"\"\"\n try:\n response = self.client.secrets.kv.v2.list_secrets(\n path=self.base_path,\n mount_point='secret'\n )\n return response['data']['keys']\n except:\n return []\n\n# Usage example\nstorage = UserVaultStorage()\n\n# Save model parameters\nstorage.save('ml-model-config', {\n 'model_type': 'random_forest',\n 'n_estimators': 100,\n 'max_depth': 10,\n 'training_date': datetime.now().isoformat()\n})\n\n# Load them back\nconfig = storage.load('ml-model-config')\nprint(\"Loaded config:\", config)\n\n# List all stored items\nprint(f\"\\nAll stored items: {storage.list()}\")"
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Environment Variables Helper"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def load_env_from_vault(key='environment'):\n",
" \"\"\"Load environment variables from Vault\"\"\"\n",
" storage = UserVaultStorage()\n",
" \n",
" try:\n",
" env_vars = storage.load(key)\n",
" for name, value in env_vars.items():\n",
" os.environ[name] = str(value)\n",
" print(f\"Loaded: {name}\")\n",
" return list(env_vars.keys())\n",
" except Exception as e:\n",
" print(f\"No environment variables found: {e}\")\n",
" return []\n",
"\n",
"def save_env_to_vault(env_dict, key='environment'):\n",
" \"\"\"Save environment variables to Vault\"\"\"\n",
" storage = UserVaultStorage()\n",
" path = storage.save(key, env_dict)\n",
" print(f\"Saved {len(env_dict)} environment variables to {path}\")\n",
"\n",
"# Example: Save current project environment\n",
"project_env = {\n",
" 'PROJECT_NAME': 'data-analysis',\n",
" 'DATA_PATH': '/data/project',\n",
" 'MODEL_VERSION': 'v2.1',\n",
" 'DEBUG': 'false'\n",
"}\n",
"\n",
"save_env_to_vault(project_env)\n",
"loaded = load_env_from_vault()\n",
"print(f\"\\nEnvironment ready with {len(loaded)} variables\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}