feat(jupyterhub): install buunstack package to kernel image

This commit is contained in:
Masaki Yatsu
2025-08-31 20:43:27 +09:00
parent ddf867d1f1
commit 2480ebae82
15 changed files with 1192 additions and 270 deletions

View File

@@ -1 +1,2 @@
jupyterhub-values.yaml
/notebooks/

View File

@@ -0,0 +1,2 @@
# Temporary copy of wheel files for Docker build
*.whl

View File

@@ -143,6 +143,12 @@ RUN pip install \
tavily-python \
tweet-preprocessor
# Install buunstack package
COPY *.whl /opt/
RUN pip install /opt/*.whl && \
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
# Install PyTorch with pip (https://pytorch.org/get-started/locally/)
# langchain-openai must be updated to avoid pydantic v2 error
# https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540

View File

@@ -0,0 +1,2 @@
# Temporary copy of wheel files for Docker build
*.whl

View File

@@ -143,6 +143,12 @@ RUN pip install \
tavily-python \
tweet-preprocessor
# Install buunstack package
COPY *.whl /opt/
RUN pip install /opt/*.whl && \
fix-permissions "${CONDA_DIR}" && \
fix-permissions "/home/${NB_USER}"
# Install PyTorch with pip (https://pytorch.org/get-started/locally/)
# langchain-openai must be updated to avoid pydantic v2 error
# https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540

View File

@@ -42,6 +42,11 @@ hub:
if 'expires_at' in auth_state:
spawner.environment['JUPYTERHUB_OIDC_TOKEN_EXPIRES_AT'] = str(auth_state['expires_at'])
# Add Keycloak configuration for token refresh
spawner.environment['KEYCLOAK_HOST'] = '{{ .Env.KEYCLOAK_HOST }}'
spawner.environment['KEYCLOAK_REALM'] = '{{ .Env.KEYCLOAK_REALM }}'
spawner.environment['KEYCLOAK_CLIENT_ID'] = 'jupyterhub'
c.Spawner.pre_spawn_hook = pre_spawn_hook
{{- end }}

View File

@@ -6,7 +6,7 @@ export JUPYTERHUB_CHART_VERSION := env("JUPYTERHUB_CHART_VERSION", "4.2.0")
export JUPYTERHUB_OIDC_CLIENT_ID := env("JUPYTERHUB_OIDC_CLIENT_ID", "jupyterhub")
export JUPYTERHUB_ENABLE_NFS_PV := env("JUPYTERHUB_ENABLE_NFS_PV", "")
export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "false")
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-2")
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-3")
export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook")
export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook")
export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false")
@@ -134,6 +134,14 @@ delete-pv:
build-kernel-images:
#!/bin/bash
set -euo pipefail
# Build python package wheel
cd ../python-package
rm -rf dist/ build/ *.egg-info/
SETUPTOOLS_SCM_PRETEND_VERSION_FOR_BUUNSTACK=0.1.0 python -m build --wheel
cd ../jupyterhub
# Copy built wheel to image directories
cp ../python-package/dist/*.whl ./images/datastack-notebook/
cp ../python-package/dist/*.whl ./images/datastack-cuda-notebook/
(
cd ./images/datastack-notebook
docker build -t \
@@ -150,6 +158,9 @@ build-kernel-images:
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \
.
)
# Clean up copied wheel files
rm -f ./images/datastack-notebook/*.whl
rm -f ./images/datastack-cuda-notebook/*.whl
# Push Jupyter notebook kernel images
push-kernel-images: build-kernel-images

View File

@@ -1,269 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Vault User Storage Example\n",
"\n",
"Each JupyterHub user has their own private storage space in Vault at `/secret/jupyter/users/<username>/`"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup and Authentication"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import hvac\n",
"import json\n",
"from datetime import datetime\n",
"\n",
"# Get environment variables\n",
"username = os.getenv('JUPYTERHUB_USER', 'testuser')\n",
"vault_addr = os.getenv('VAULT_ADDR', 'https://vault.example.com')\n",
"oidc_token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n",
"\n",
"if not oidc_token:\n",
" raise ValueError(\"OIDC token not found. Make sure auth_state is enabled.\")\n",
"\n",
"# Initialize Vault client\n",
"client = hvac.Client(url=vault_addr, verify=False)\n",
"\n",
"# Authenticate with JupyterHub token\n",
"client.auth.jwt.jwt_login(\n",
" role='jupyter-token',\n",
" jwt=oidc_token,\n",
" path='jwt'\n",
")\n",
"\n",
"print(f\"Authenticated as: {username}\")\n",
"print(f\"User storage path: secret/jupyter/users/{username}/\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Store User Preferences"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Store user preferences\n",
"preferences = {\n",
" 'theme': 'dark',\n",
" 'language': 'en',\n",
" 'font_size': 14,\n",
" 'auto_save': True,\n",
" 'last_updated': datetime.now().isoformat()\n",
"}\n",
"\n",
"client.secrets.kv.v2.create_or_update_secret(\n",
" path=f'jupyter/users/{username}/preferences',\n",
" secret=preferences,\n",
" mount_point='secret'\n",
")\n",
"\n",
"print(f\"Saved preferences for {username}\")\n",
"print(json.dumps(preferences, indent=2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": "# Read back preferences\nresponse = client.secrets.kv.v2.read_secret_version(\n path=f'jupyter/users/{username}/preferences',\n mount_point='secret',\n raise_on_deleted_version=False\n)\n\nstored_prefs = response['data']['data']\nprint(\"Retrieved preferences:\")\nprint(json.dumps(stored_prefs, indent=2))"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Store API keys securely\n",
"api_keys = {\n",
" 'openai_key': 'sk-your-api-key-here',\n",
" 'github_token': 'ghp_your-token-here',\n",
" 'aws_access_key': 'AKIA-example',\n",
" 'aws_secret_key': 'your-secret-here'\n",
"}\n",
"\n",
"client.secrets.kv.v2.create_or_update_secret(\n",
" path=f'jupyter/users/{username}/api-keys',\n",
" secret=api_keys,\n",
" mount_point='secret'\n",
")\n",
"\n",
"print(f\"Stored {len(api_keys)} API keys for {username}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Store Database Connections"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Store database connection info\n",
"db_connections = {\n",
" 'postgres_prod': {\n",
" 'host': 'db.example.com',\n",
" 'port': 5432,\n",
" 'database': 'production',\n",
" 'username': 'app_user',\n",
" 'password': 'secure-password-123'\n",
" },\n",
" 'mongodb_analytics': {\n",
" 'connection_string': 'mongodb://user:pass@mongo.example.com:27017/analytics'\n",
" }\n",
"}\n",
"\n",
"client.secrets.kv.v2.create_or_update_secret(\n",
" path=f'jupyter/users/{username}/databases',\n",
" secret=db_connections,\n",
" mount_point='secret'\n",
")\n",
"\n",
"print(f\"Stored {len(db_connections)} database connections\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read Stored Secrets"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Read back preferences\nresponse = client.secrets.kv.v2.read_secret_version(\n path=f'jupyter/users/{username}/preferences',\n mount_point='secret',\n raise_on_deleted_version=False\n)\n\nstored_prefs = response['data']['data']\nprint(\"Retrieved preferences:\")\nprint(json.dumps(stored_prefs, indent=2))"
},
{
"cell_type": "markdown",
"metadata": {},
"source": "class UserVaultStorage:\n \"\"\"Helper class for managing user's Vault storage\"\"\"\n \n def __init__(self):\n self.username = os.getenv('JUPYTERHUB_USER')\n self.client = hvac.Client(\n url=os.getenv('VAULT_ADDR'),\n verify=False\n )\n self._authenticate()\n self.base_path = f'jupyter/users/{self.username}'\n \n def _authenticate(self):\n token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n self.client.auth.jwt.jwt_login(\n role='jupyter-token',\n jwt=token,\n path='jwt'\n )\n \n def save(self, key, data):\n \"\"\"Save data to user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.create_or_update_secret(\n path=path,\n secret=data,\n mount_point='secret'\n )\n return path\n \n def load(self, key):\n \"\"\"Load data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n response = self.client.secrets.kv.v2.read_secret_version(\n path=path,\n mount_point='secret',\n raise_on_deleted_version=False\n )\n return response['data']['data'] if response else None\n \n def delete(self, key):\n \"\"\"Delete data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.delete_metadata_and_all_versions(\n path=path,\n mount_point='secret'\n )\n \n def list(self):\n \"\"\"List all keys in user's storage\"\"\"\n try:\n response = self.client.secrets.kv.v2.list_secrets(\n path=self.base_path,\n mount_point='secret'\n )\n return response['data']['keys']\n except:\n return []\n\n# Usage example\nstorage = UserVaultStorage()\n\n# Save model parameters\nstorage.save('ml-model-config', {\n 'model_type': 'random_forest',\n 'n_estimators': 100,\n 'max_depth': 10,\n 'training_date': datetime.now().isoformat()\n})\n\n# Load them back\nconfig = storage.load('ml-model-config')\nprint(\"Loaded config:\", config)\n\n# List all stored items\nprint(f\"\\nAll stored items: {storage.list()}\")"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# List all secrets in user's directory\n",
"try:\n",
" secrets_list = client.secrets.kv.v2.list_secrets(\n",
" path=f'jupyter/users/{username}',\n",
" mount_point='secret'\n",
" )\n",
" \n",
" print(f\"Secrets stored for {username}:\")\n",
" for secret in secrets_list['data']['keys']:\n",
" print(f\" - {secret}\")\n",
"except Exception as e:\n",
" print(f\"No secrets found or error: {e}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Helper Class for User Storage"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "class UserVaultStorage:\n \"\"\"Helper class for managing user's Vault storage\"\"\"\n \n def __init__(self):\n self.username = os.getenv('JUPYTERHUB_USER')\n self.client = hvac.Client(\n url=os.getenv('VAULT_ADDR'),\n verify=False\n )\n self._authenticate()\n self.base_path = f'jupyter/users/{self.username}'\n \n def _authenticate(self):\n token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n self.client.auth.jwt.jwt_login(\n role='jupyter-token',\n jwt=token,\n path='jwt'\n )\n \n def save(self, key, data):\n \"\"\"Save data to user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.create_or_update_secret(\n path=path,\n secret=data,\n mount_point='secret'\n )\n return path\n \n def load(self, key):\n \"\"\"Load data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n response = self.client.secrets.kv.v2.read_secret_version(\n path=path,\n mount_point='secret',\n raise_on_deleted_version=False\n )\n return response['data']['data'] if response else None\n \n def delete(self, key):\n \"\"\"Delete data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.delete_metadata_and_all_versions(\n path=path,\n mount_point='secret'\n )\n \n def list(self):\n \"\"\"List all keys in user's storage\"\"\"\n try:\n response = self.client.secrets.kv.v2.list_secrets(\n path=self.base_path,\n mount_point='secret'\n )\n return response['data']['keys']\n except:\n return []\n\n# Usage example\nstorage = UserVaultStorage()\n\n# Save model parameters\nstorage.save('ml-model-config', {\n 'model_type': 'random_forest',\n 'n_estimators': 100,\n 'max_depth': 10,\n 'training_date': datetime.now().isoformat()\n})\n\n# Load them back\nconfig = storage.load('ml-model-config')\nprint(\"Loaded config:\", config)\n\n# List all stored items\nprint(f\"\\nAll stored items: {storage.list()}\")"
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Environment Variables Helper"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def load_env_from_vault(key='environment'):\n",
" \"\"\"Load environment variables from Vault\"\"\"\n",
" storage = UserVaultStorage()\n",
" \n",
" try:\n",
" env_vars = storage.load(key)\n",
" for name, value in env_vars.items():\n",
" os.environ[name] = str(value)\n",
" print(f\"Loaded: {name}\")\n",
" return list(env_vars.keys())\n",
" except Exception as e:\n",
" print(f\"No environment variables found: {e}\")\n",
" return []\n",
"\n",
"def save_env_to_vault(env_dict, key='environment'):\n",
" \"\"\"Save environment variables to Vault\"\"\"\n",
" storage = UserVaultStorage()\n",
" path = storage.save(key, env_dict)\n",
" print(f\"Saved {len(env_dict)} environment variables to {path}\")\n",
"\n",
"# Example: Save current project environment\n",
"project_env = {\n",
" 'PROJECT_NAME': 'data-analysis',\n",
" 'DATA_PATH': '/data/project',\n",
" 'MODEL_VERSION': 'v2.1',\n",
" 'DEBUG': 'false'\n",
"}\n",
"\n",
"save_env_to_vault(project_env)\n",
"loaded = load_env_from_vault()\n",
"print(f\"\\nEnvironment ready with {len(loaded)} variables\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

12
python-package/.gitignore vendored Normal file
View File

@@ -0,0 +1,12 @@
# Build artifacts
build/
dist/
*.egg-info/
# setuptools-scm generated version file
buunstack/_version.py
# Python cache
__pycache__/
*.pyc
*.pyo

118
python-package/README.md Normal file
View File

@@ -0,0 +1,118 @@
# buunstack
A Python package for buun-stack that provides secure secrets management with HashiCorp Vault and automatic Keycloak OIDC token refresh for JupyterHub users.
## Features
- 🔒 **Secure Secrets Management**: Integration with HashiCorp Vault
- 🔄 **Automatic Token Refresh**: Seamless Keycloak OIDC token management
- 📱 **Simple API**: Easy-to-use interface for secrets storage and retrieval
- 🏢 **Enterprise Ready**: Built for production environments
- 🚀 **JupyterHub Integration**: Native support for JupyterHub workflows
## Quick Start
### Installation
```bash
pip install buunstack
```
### Basic Usage
```python
from buunstack import SecretStore
# Initialize with automatic token refresh (default)
secrets = SecretStore()
# Put API keys and configuration
secrets.put('api-keys', {
'openai_key': 'sk-your-key-here',
'github_token': 'ghp_your-token',
'database_url': 'postgresql://user:pass@host:5432/db'
})
# Get secrets
api_keys = secrets.get('api-keys')
openai_key = api_keys['openai_key']
# List all your secrets
all_secrets = secrets.list()
```
### Configuration Options
```python
# Manual token management
secrets = SecretStore(auto_token_refresh=False)
# Custom refresh timing
secrets = SecretStore(
auto_token_refresh=True,
refresh_buffer_seconds=600, # Refresh 10 minutes before expiry
background_refresh_interval=3600 # Background refresh every hour
)
# Start background auto-refresh
refresher = secrets.start_background_refresh()
```
### Environment Variables Helper
```python
from buunstack import SecretStore, get_env_from_secrets, put_env_to_secrets
secrets = SecretStore()
# Put environment variables
project_env = {
'PROJECT_NAME': 'ml-research',
'MODEL_VERSION': 'v2.1',
'DEBUG': 'false'
}
put_env_to_secrets(secrets, project_env)
# Get environment variables
loaded_vars = get_env_from_secrets(secrets)
# Now available as os.environ['PROJECT_NAME'], etc.
```
## Comparison with Other Platforms
| Platform | API | Features |
|----------|-----|----------|
| Google Colab | `userdata.get('KEY')` | Simple, strings only |
| Databricks | `dbutils.secrets.get(scope, key)` | Scoped management |
| AWS SageMaker | `boto3.client().get_secret_value()` | JSON support, IAM control |
| Azure ML | `SecretClient().get_secret()` | RBAC, HSM support |
| **buunstack** | `secrets.get('key')` | **JSON support, unlimited sessions, auto-refresh** |
## Requirements
- Python 3.8+
- JupyterHub environment with Keycloak OIDC authentication
- HashiCorp Vault backend
- Required environment variables:
- `JUPYTERHUB_USER`
- `VAULT_ADDR`
- `JUPYTERHUB_OIDC_ACCESS_TOKEN`
- `JUPYTERHUB_OIDC_REFRESH_TOKEN` (for auto-refresh)
- `KEYCLOAK_HOST`, `KEYCLOAK_REALM` (for auto-refresh)
## Architecture
buunstack integrates with:
- **JupyterHub**: For user authentication and session management
- **Keycloak**: For OIDC token management and refresh
- **HashiCorp Vault**: For secure secrets storage
- **Kubernetes**: For container orchestration and networking
## License
This project is licensed under the MIT License - see the [LICENSE](../LICENSE) file for details.
## Security
For security issues, please email security@buunstack.dev instead of using the issue tracker.

View File

@@ -0,0 +1,13 @@
"""
buunstack - Python package for buun-stack Jupyter environment
"""
from .secrets import SecretStore, get_env_from_secrets, put_env_to_secrets
try:
from ._version import __version__
except ImportError:
__version__ = "unknown"
__author__ = "Buun Stack Team"
__all__ = ["SecretStore", "get_env_from_secrets", "put_env_to_secrets"]

View File

@@ -0,0 +1 @@
# Examples module

View File

@@ -0,0 +1,126 @@
"""
Quickstart example for buunstack SecretStore
"""
import json
from buunstack import SecretStore, get_env_from_secrets, put_env_to_secrets
def quickstart_example():
"""Basic example of using SecretStore"""
print("🚀 buunstack QuickStart Example")
print("=" * 40)
# Initialize SecretStore (auto-refresh enabled by default)
secrets = SecretStore()
print(f"✅ SecretStore initialized for user: {secrets.username}")
# Save some API keys (values must be strings)
print("\n📝 Saving API keys...")
secrets.put(
"api-keys",
openai_key="sk-example-key-here",
github_token="ghp_example-token",
database_url="postgresql://user:pass@localhost:5432/mydb",
)
print(" Put 3 API keys")
# Get them back
print("\n📖 Getting API keys...")
loaded_keys = secrets.get("api-keys")
if loaded_keys and isinstance(loaded_keys, dict):
print(f" Got {len(loaded_keys)} keys:")
for key in loaded_keys.keys():
print(f" - {key}")
# Get specific field directly
print("\n🔑 Getting specific field...")
openai_key = secrets.get("api-keys", field="openai_key")
if openai_key and isinstance(openai_key, str):
print(f" OpenAI key: {openai_key[:10]}...")
# Put environment variables
print("\n🌍 Putting environment variables...")
env_vars = {
"PROJECT_NAME": "my-ml-project",
"MODEL_VERSION": "v1.0.0",
"DEBUG": "false",
}
put_env_to_secrets(secrets, env_vars)
# Store complex data as JSON strings
print("\n📦 Storing complex data as JSON...")
config_data = {"batch_size": 32, "learning_rate": 0.001}
model_layers = ["conv1", "pool1", "conv2", "pool2", "fc"]
secrets.put(
"ml-config",
hyperparameters=json.dumps(config_data),
architecture=json.dumps(model_layers),
version="1.0.0",
)
print(" Stored ML configuration")
# Get environment variables
print("\n🔄 Getting environment variables...")
loaded_vars = get_env_from_secrets(secrets)
print(f" Got {len(loaded_vars)} environment variables")
# List all secrets
print("\n📋 Listing all secrets...")
all_secrets = secrets.list()
print(f" You have {len(all_secrets)} secrets:")
for secret in all_secrets:
print(f" - {secret}")
# Show status
print("\n📊 SecretStore status:")
status = secrets.get_status()
for key, value in status.items():
print(f" {key}: {value}")
print("\n🎉 Quickstart completed!")
def advanced_example():
"""Advanced example with different configurations"""
print("\n🔧 Advanced Configuration Example")
print("=" * 40)
# Manual token management
print("\n1⃣ Manual token management:")
manual_secrets = SecretStore(auto_token_refresh=False)
print(f" Auto-refresh: {manual_secrets.auto_token_refresh}")
# Custom timing
print("\n2⃣ Custom refresh timing:")
custom_secrets = SecretStore(
auto_token_refresh=True,
refresh_buffer_seconds=600, # Refresh 10 minutes before expiry
background_refresh_interval=3600, # Background refresh every hour
)
print(f" Refresh buffer: {custom_secrets.refresh_buffer_seconds}s")
print(f" Background interval: {custom_secrets.background_refresh_interval}s")
# Background refresh (if auto_token_refresh is enabled)
if custom_secrets.auto_token_refresh and custom_secrets.refresh_token:
print("\n3⃣ Starting background refresher:")
refresher = custom_secrets.start_background_refresh()
refresher_status = refresher.get_status()
print(f" Running: {refresher_status['running']}")
print(f" Interval: {refresher_status['interval_seconds']}s")
# Stop the refresher
custom_secrets.stop_background_refresh()
print(" Stopped background refresher")
if __name__ == "__main__":
try:
quickstart_example()
advanced_example()
except Exception as e:
print(f"❌ Error: {e}")
print(
"Make sure you're running this in a JupyterHub environment with Vault integration enabled."
)

View File

@@ -0,0 +1,810 @@
"""
Secrets management for JupyterHub with Vault backend
"""
import logging
import os
import threading
import warnings
from datetime import datetime, timedelta
from typing import Any, overload
import hvac
import jwt
import requests
# Suppress SSL warnings for self-signed certificates
warnings.filterwarnings("ignore", message="Unverified HTTPS request")
# Set up logging (disabled by default)
logger = logging.getLogger("buunstack")
logger.addHandler(logging.NullHandler()) # Default to no output
class SecretStore:
"""Simple and powerful secrets management for JupyterHub with Vault backend.
SecretStore provides a secure interface for managing secrets in JupyterHub
environments using HashiCorp Vault as the backend storage. It supports
automatic OIDC token refresh via Keycloak integration and provides both
manual and background token management options.
Attributes
----------
auto_token_refresh : bool
Whether automatic token refresh is enabled.
refresh_buffer_seconds : int
Seconds before token expiry to trigger refresh.
background_refresh_interval : int
Seconds between background refresh checks.
username : str or None
JupyterHub username from environment.
vault_addr : str or None
Vault server address from environment.
base_path : str
Base path for user's secrets in Vault.
Examples
--------
>>> secrets = SecretStore()
>>> secrets.put('api-keys', openai='sk-123', github='ghp-456')
'jupyter/users/username/api-keys'
>>> data = secrets.get('api-keys')
>>> print(data['openai'])
'sk-123'
>>> # Or get specific field directly
>>> openai_key = secrets.get('api-keys', field='openai')
>>> print(openai_key)
'sk-123'
"""
def __init__(
self,
auto_token_refresh: bool = True,
refresh_buffer_seconds: int = 300,
background_refresh_interval: int = 1800,
):
"""
Initialize SecretStore with authentication and configuration.
Parameters
----------
auto_token_refresh : bool, optional
Enable automatic token refresh using Keycloak OIDC, by default True.
Requires KEYCLOAK_HOST, KEYCLOAK_REALM, and JUPYTERHUB_OIDC_REFRESH_TOKEN
environment variables.
refresh_buffer_seconds : int, optional
Seconds before token expiry to trigger refresh, by default 300.
Only used when auto_token_refresh is True.
background_refresh_interval : int, optional
Seconds between background refresh checks, by default 1800.
Only used when background refresh is started.
Raises
------
ValueError
If required environment variables are missing:
- JUPYTERHUB_USER: JupyterHub username
- VAULT_ADDR: Vault server address
- JUPYTERHUB_OIDC_ACCESS_TOKEN: Initial access token
- KEYCLOAK_HOST, KEYCLOAK_REALM: Required for auto_token_refresh
ConnectionError
If unable to connect to Vault server or authenticate.
Examples
--------
>>> # Basic usage with auto-refresh
>>> secrets = SecretStore()
>>> # Manual token management
>>> secrets = SecretStore(auto_token_refresh=False)
>>> # Custom timing
>>> secrets = SecretStore(
... refresh_buffer_seconds=600,
... background_refresh_interval=3600
... )
"""
self.auto_token_refresh = auto_token_refresh
self.refresh_buffer_seconds = refresh_buffer_seconds
self.background_refresh_interval = background_refresh_interval
# User and environment info
self.username = os.getenv("JUPYTERHUB_USER")
self.vault_addr = os.getenv("VAULT_ADDR")
# Keycloak configuration (only needed if auto_token_refresh is enabled)
if self.auto_token_refresh:
self.keycloak_host = os.getenv("KEYCLOAK_HOST")
self.keycloak_realm = os.getenv("KEYCLOAK_REALM")
self.keycloak_client_id = os.getenv("KEYCLOAK_CLIENT_ID", "jupyterhub")
self.refresh_token = os.getenv("JUPYTERHUB_OIDC_REFRESH_TOKEN")
# Token management
self.access_token = os.getenv("JUPYTERHUB_OIDC_ACCESS_TOKEN")
self.token_expiry = (
self._get_token_expiry(self.access_token) if self.access_token else None
)
# Initialize Vault client
self.client = hvac.Client(url=self.vault_addr, verify=False)
# Background refresher
self._background_refresher = None
# Authenticate initially
self._authenticate_vault()
# Set base path for user storage
self.base_path = f"jupyter/users/{self.username}"
logger.info(f"SecretStore initialized for user: {self.username}")
logger.info(
f"Auto token refresh: {'enabled' if self.auto_token_refresh else 'disabled'}"
)
if self.auto_token_refresh and self.token_expiry:
logger.info(f"Token expires at: {self.token_expiry}")
def _get_token_expiry(self, token: str) -> datetime | None:
"""Extract expiry time from JWT token"""
if not token:
return None
try:
payload = jwt.decode(token, options={"verify_signature": False})
exp = payload.get("exp")
if exp:
return datetime.fromtimestamp(exp)
# Fallback to iat + 1 hour
iat = payload.get("iat")
if iat:
return datetime.fromtimestamp(iat + 3600)
except Exception as e:
logger.warning(f"Could not decode token expiry: {e}")
return datetime.now() + timedelta(hours=1)
def _is_token_valid(self) -> bool:
"""Check if current token is still valid"""
if not self.auto_token_refresh or not self.token_expiry:
return True # Assume valid if refresh is disabled
time_until_expiry = (self.token_expiry - datetime.now()).total_seconds()
return time_until_expiry > self.refresh_buffer_seconds
def _refresh_keycloak_tokens(self) -> bool:
"""Refresh tokens using Keycloak refresh token"""
if not self.auto_token_refresh:
return False
if not self.refresh_token or not self.keycloak_host or not self.keycloak_realm:
logger.error("Missing refresh token or Keycloak configuration")
return False
token_url = f"https://{self.keycloak_host}/realms/{self.keycloak_realm}/protocol/openid-connect/token"
try:
logger.info("Refreshing tokens from Keycloak...")
response = requests.post(
token_url,
data={
"grant_type": "refresh_token",
"refresh_token": self.refresh_token,
"client_id": self.keycloak_client_id,
},
verify=False,
)
if response.status_code == 200:
tokens = response.json()
# Update tokens
self.access_token = tokens["access_token"]
if "refresh_token" in tokens:
self.refresh_token = tokens["refresh_token"]
# Update environment variables
os.environ["JUPYTERHUB_OIDC_ACCESS_TOKEN"] = self.access_token
if "refresh_token" in tokens:
os.environ["JUPYTERHUB_OIDC_REFRESH_TOKEN"] = self.refresh_token
# Update token expiry
self.token_expiry = self._get_token_expiry(self.access_token)
logger.info("✅ Tokens refreshed successfully")
return True
else:
logger.error(
f"Token refresh failed: {response.status_code} - {response.text}"
)
return False
except Exception as e:
logger.error(f"Exception during token refresh: {e}")
return False
def _authenticate_vault(self):
"""Authenticate with Vault using current access token"""
if not self.access_token:
raise ValueError("No access token available")
try:
self.client.auth.jwt.jwt_login(
role="jupyter-token", jwt=self.access_token, path="jwt"
)
logger.info("✅ Authenticated with Vault successfully")
except Exception as e:
logger.error(f"Vault authentication failed: {e}")
raise
def _ensure_authenticated(self):
"""Ensure we have valid tokens and Vault authentication"""
if self.auto_token_refresh and not self._is_token_valid():
logger.info("Token invalid or expiring soon")
if self._refresh_keycloak_tokens():
self._authenticate_vault()
else:
raise Exception(
"Failed to refresh tokens. Manual re-authentication required."
)
def put(self, key: str, **kwargs: Any) -> str:
"""
Store data in your personal secret storage.
Saves the provided key-value pairs to Vault under the specified key.
Values must be strings. For complex data types, encode them as JSON strings.
Parameters
----------
key : Any
The key/name for the secret. Must be a valid Vault path component.
**kwargs : str
Key-value pairs to store as the secret data. All values must be strings.
For complex types, encode them as JSON strings first.
Returns
-------
str
Full Vault path where the secret was stored.
Raises
------
ValueError
If key is empty or contains invalid characters, if no kwargs provided,
or if any value is not a string.
ConnectionError
If unable to connect to Vault server.
hvac.exceptions.Forbidden
If authentication fails or insufficient permissions.
hvac.exceptions.InvalidRequest
If the data format is invalid.
Examples
--------
>>> import json
>>> secrets = SecretStore()
>>> path = secrets.put('api-keys', openai='sk-123', github='ghp-456')
>>> print(path)
'jupyter/users/username/api-keys'
>>> # Store complex data as JSON strings
>>> config_data = {'debug': True, 'max_workers': 4}
>>> secrets.put('config',
... settings=json.dumps(config_data),
... endpoints=json.dumps(['api.example.com']))
"""
if not kwargs:
raise ValueError("At least one key-value pair must be provided")
# Validate all values are strings
for field_name, value in kwargs.items():
if not isinstance(value, str):
raise ValueError(
f"Value for '{field_name}' must be a string. "
f"Got {type(value).__name__}. "
"For complex types, encode as JSON string first."
)
self._ensure_authenticated()
path = f"{self.base_path}/{key}"
try:
self.client.secrets.kv.v2.create_or_update_secret(
path=path, secret=kwargs, mount_point="secret"
)
logger.info(f"Put secret: {key}")
return path
except Exception as e:
logger.error(f"Failed to put secret: {e}")
# Retry once with re-authentication
self._ensure_authenticated()
self.client.secrets.kv.v2.create_or_update_secret(
path=path, secret=kwargs, mount_point="secret"
)
return path
@overload
def get(self, key: str, field: None = None) -> dict[str, Any] | None: ...
@overload
def get(self, key: str, field: str) -> str | None: ...
def get(self, key: str, field: str | None = None) -> dict[str, Any] | str | None:
"""
Retrieve data from your personal secret storage.
Loads the data dictionary stored under the specified key from Vault.
If field is specified, returns only that field's value. Returns None
if the key doesn't exist or if there's an access error.
Parameters
----------
key : str
The key/name of the secret to retrieve.
field : str, optional
Specific field to retrieve from the secret. If provided, returns
only the value of this field instead of the entire secret dict.
Returns
-------
dict[str, Any] or str or None
- If field is None: The complete stored data dictionary if found, None otherwise.
- If field is specified: The value of the specified field, or None if
field doesn't exist or secret is not found.
Raises
------
ConnectionError
If unable to connect to Vault server.
hvac.exceptions.InvalidRequest
If the key format is invalid.
Examples
--------
>>> secrets = SecretStore()
>>> # Get entire secret
>>> api_keys = secrets.get('api-keys')
>>> if api_keys:
... openai_key = api_keys['openai']
... print(f'OpenAI key: {openai_key}')
>>> # Get specific field (like vault kv get -field=...)
>>> openai_key = secrets.get('api-keys', field='openai')
>>> print(f'OpenAI key: {openai_key}')
>>> # Handle missing keys or fields
>>> config = secrets.get('nonexistent-key')
>>> if config is None:
... print('Key not found')
>>> missing_field = secrets.get('api-keys', field='nonexistent')
>>> if missing_field is None:
... print('Field not found')
"""
self._ensure_authenticated()
path = f"{self.base_path}/{key}"
try:
response = self.client.secrets.kv.v2.read_secret_version(
path=path, mount_point="secret", raise_on_deleted_version=False
)
if response and "data" in response and "data" in response["data"]:
data = response["data"]["data"]
logger.info(f"Got secret: {key}")
# Return specific field if requested
if field is not None:
return data.get(field)
return data
return None
except Exception as e:
if "permission denied" in str(e).lower():
logger.info("Permission denied, re-authenticating...")
self._ensure_authenticated()
response = self.client.secrets.kv.v2.read_secret_version(
path=path, mount_point="secret", raise_on_deleted_version=False
)
if response and "data" in response and "data" in response["data"]:
data = response["data"]["data"]
if field is not None:
return data.get(field)
return data
logger.warning(f'Could not get secret "{key}": {e}')
return None
def delete(self, key: str) -> None:
"""
Delete a secret from your personal storage.
Permanently removes the secret and all its versions from Vault.
This operation cannot be undone.
Parameters
----------
key : str
The key/name of the secret to delete.
Raises
------
ConnectionError
If unable to connect to Vault server.
hvac.exceptions.Forbidden
If authentication fails or insufficient permissions.
hvac.exceptions.InvalidRequest
If the key format is invalid.
Examples
--------
>>> secrets = SecretStore()
>>> secrets.delete('old-api-key')
>>> # Secret is permanently removed
"""
self._ensure_authenticated()
path = f"{self.base_path}/{key}"
try:
self.client.secrets.kv.v2.delete_metadata_and_all_versions(
path=path, mount_point="secret"
)
logger.info(f"Deleted secret: {key}")
except Exception as e:
logger.error(f'Failed to delete secret "{key}": {e}')
raise
def list(self) -> list[str]:
"""
List all secret keys in your personal storage.
Returns a list of all secret keys that you have stored in Vault.
Does not include the actual secret values for security reasons.
Returns
-------
list[str]
List of secret keys. Empty list if no secrets found or on error.
Examples
--------
>>> secrets = SecretStore()
>>> keys = secrets.list()
>>> print(f'You have {len(keys)} secrets: {keys}')
['api-keys', 'database-config', 'certificates']
"""
self._ensure_authenticated()
try:
response = self.client.secrets.kv.v2.list_secrets(
path=self.base_path, mount_point="secret"
)
keys = response["data"]["keys"] if response else []
logger.info(f"Listed {len(keys)} secrets")
return keys
except Exception as e:
logger.warning(f"Could not list secrets: {e}")
return []
def get_status(self) -> dict[str, Any]:
"""
Get comprehensive status information about the SecretStore instance.
Returns detailed information about configuration, authentication status,
token validity, and background refresh status.
Returns
-------
dict[str, Any]
Status dictionary containing:
- username: JupyterHub username
- auto_token_refresh: Whether auto-refresh is enabled
- has_access_token: Whether access token is available
- vault_addr: Vault server address
- has_refresh_token: Whether refresh token is available (if auto_token_refresh=True)
- keycloak_configured: Whether Keycloak settings are configured (if auto_token_refresh=True)
- token_expires_at: Token expiration time (if available)
- token_expires_in_seconds: Seconds until token expires (if available)
- background_refresher_running: Whether background refresher is active
Examples
--------
>>> secrets = SecretStore()
>>> status = secrets.get_status()
>>> print(f"User: {status['username']}")
>>> print(f"Token expires in: {status.get('token_expires_in_seconds', 'N/A')} seconds")
"""
status = {
"username": self.username,
"auto_token_refresh": self.auto_token_refresh,
"has_access_token": bool(self.access_token),
"vault_addr": self.vault_addr,
}
if self.auto_token_refresh:
status.update(
{
"has_refresh_token": bool(self.refresh_token),
"keycloak_configured": bool(
self.keycloak_host and self.keycloak_realm
),
}
)
if self.token_expiry:
time_remaining = (self.token_expiry - datetime.now()).total_seconds()
status.update(
{
"token_valid": self._is_token_valid(),
"token_expiry": self.token_expiry.isoformat(),
"seconds_remaining": max(0, time_remaining),
"minutes_remaining": max(0, time_remaining / 60),
}
)
return status
def start_background_refresh(self) -> "BackgroundRefresher":
"""
Start automatic background token refreshing.
Begins a background thread that periodically checks and refreshes
the access token before it expires. Only available when
auto_token_refresh is enabled.
Returns
-------
BackgroundRefresher
The background refresher instance that can be used to monitor
or control the refresh process.
Raises
------
ValueError
If auto_token_refresh is False. Background refresh requires
automatic token refresh to be enabled.
Examples
--------
>>> secrets = SecretStore(auto_token_refresh=True)
>>> refresher = secrets.start_background_refresh()
>>> status = refresher.get_status()
>>> print(f"Background refresh running: {status['running']}")
"""
if not self.auto_token_refresh:
raise ValueError("Background refresh requires auto_token_refresh=True")
if self._background_refresher is None:
self._background_refresher = BackgroundRefresher(
self, interval_seconds=self.background_refresh_interval
)
self._background_refresher.start()
return self._background_refresher
def stop_background_refresh(self) -> None:
"""
Stop the background token refresher.
Stops the background thread that was refreshing tokens automatically.
It's safe to call this method even if no background refresher is running.
Examples
--------
>>> secrets = SecretStore()
>>> refresher = secrets.start_background_refresh()
>>> # ... do some work ...
>>> secrets.stop_background_refresh()
"""
if self._background_refresher:
self._background_refresher.stop()
class BackgroundRefresher:
"""
Background token refresher for automatic token management.
This class runs in a separate daemon thread and periodically checks if
the access token needs to be refreshed, automatically handling the refresh
process to maintain uninterrupted access to Vault.
Attributes
----------
secret_store : SecretStore
The SecretStore instance to refresh tokens for.
interval_seconds : int
Seconds between refresh checks.
refresh_count : int
Number of successful refreshes performed.
last_refresh : datetime or None
Timestamp of the last successful refresh.
Examples
--------
>>> secrets = SecretStore(auto_token_refresh=True)
>>> refresher = secrets.start_background_refresh()
>>> # Refresher runs automatically in background
>>> status = refresher.get_status()
>>> print(f"Refreshes performed: {status['refresh_count']}")
"""
def __init__(self, secret_store: SecretStore, interval_seconds: int = 1800):
"""
Initialize the background refresher.
Parameters
----------
secret_store : SecretStore
The SecretStore instance to manage tokens for.
interval_seconds : int, optional
Seconds between refresh checks, by default 1800 (30 minutes).
"""
self.secret_store = secret_store
self.interval_seconds = interval_seconds
self._stop_event = threading.Event()
self._thread = None
self.refresh_count = 0
self.last_refresh = None
def start(self) -> None:
"""
Start the background refresh thread.
Creates and starts a daemon thread that will periodically check
and refresh tokens. Safe to call multiple times.
"""
if self._thread is None or not self._thread.is_alive():
self._stop_event.clear()
self._thread = threading.Thread(target=self._refresh_loop, daemon=True)
self._thread.start()
logger.info(
f"Started background refresher (interval: {self.interval_seconds}s)"
)
def stop(self) -> None:
"""
Stop the background refresh thread.
Signals the refresh thread to stop and waits up to 5 seconds
for it to finish gracefully.
"""
if self._thread and self._thread.is_alive():
self._stop_event.set()
self._thread.join(timeout=5)
logger.info("Stopped background refresher")
def _refresh_loop(self):
while not self._stop_event.is_set():
if self._stop_event.wait(self.interval_seconds):
break
try:
if self.secret_store._refresh_keycloak_tokens():
self.secret_store._authenticate_vault()
self.refresh_count += 1
self.last_refresh = datetime.now()
logger.info(
f"✅ Background refresh #{self.refresh_count} successful"
)
else:
logger.error("❌ Background refresh failed")
except Exception as e:
logger.error(f"Exception in background refresh: {e}")
def get_status(self) -> dict[str, Any]:
"""
Get the current status of the background refresher.
Returns
-------
dict[str, Any]
Status dictionary containing:
- running: Whether the refresh thread is active
- refresh_count: Number of successful refreshes performed
- last_refresh: ISO timestamp of last successful refresh (or None)
- interval_seconds: Configured refresh interval
Examples
--------
>>> refresher = secrets.start_background_refresh()
>>> status = refresher.get_status()
>>> print(f"Running: {status['running']}, Count: {status['refresh_count']}")
"""
return {
"running": self._thread and self._thread.is_alive(),
"refresh_count": self.refresh_count,
"last_refresh": self.last_refresh.isoformat()
if self.last_refresh
else None,
"interval_seconds": self.interval_seconds,
}
# Utility functions
def get_env_from_secrets(secrets: SecretStore, key: str = "environment") -> list[str]:
"""
Load environment variables from SecretStore into os.environ.
Retrieves stored environment variables and sets them in the current
process's environment. This is useful for loading configuration that
was previously stored securely.
Parameters
----------
secrets : SecretStore
The SecretStore instance to load from.
key : str, optional
The key where environment variables are stored, by default "environment".
Returns
-------
list[str]
List of environment variable names that were loaded and set.
Empty list if the key doesn't exist or contains no data.
Examples
--------
>>> secrets = SecretStore()
>>> # First, store some environment variables
>>> put_env_to_secrets(secrets, {'DEBUG': 'true', 'PORT': '8080'})
>>> # Later, load them back
>>> loaded = get_env_from_secrets(secrets)
>>> print(f'Loaded {len(loaded)} variables: {loaded}')
['DEBUG', 'PORT']
>>> print(os.environ['DEBUG']) # Now available
'true'
"""
env_vars = secrets.get(key)
if env_vars:
for name, value in env_vars.items():
os.environ[name] = str(value)
logger.info(f"Set environment variable: {name}")
return list(env_vars.keys())
return []
def put_env_to_secrets(
secrets: SecretStore, env_dict: dict, key: str = "environment"
) -> str:
"""
Store environment variables in SecretStore.
Saves a dictionary of environment variables to secure storage.
This is useful for persisting configuration across sessions.
Parameters
----------
secrets : SecretStore
The SecretStore instance to save to.
env_dict : dict[str, Any]
Dictionary of environment variables to store. Keys should be
environment variable names, values will be converted to strings.
key : str, optional
The key to store environment variables under, by default "environment".
Returns
-------
str
Full Vault path where the environment variables were stored.
Examples
--------
>>> secrets = SecretStore()
>>> env_vars = {
... 'DATABASE_URL': 'postgresql://localhost:5432/mydb',
... 'DEBUG': 'false',
... 'MAX_WORKERS': '4'
... }
>>> path = put_env_to_secrets(secrets, env_vars)
>>> print(f'Stored at: {path}')
'jupyter/users/username/environment'
>>> # Store with custom key
>>> put_env_to_secrets(secrets, {'API_KEY': 'secret'}, 'production-config')
"""
# Convert all values to strings and use **kwargs for put()
string_env_dict = {k: str(v) for k, v in env_dict.items()}
path = secrets.put(key, **string_env_dict)
logger.info(f"Put {len(env_dict)} environment variables")
return path

View File

@@ -0,0 +1,78 @@
[build-system]
requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"]
build-backend = "setuptools.build_meta"
[project]
name = "buunstack"
dynamic = ["version"]
description = "Python package for buun-stack Jupyter environemnt"
authors = [{ name = "Buun ch.", email = "buun@buun.channel" }]
readme = "README.md"
license = "MIT"
requires-python = ">=3.12"
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering",
"Topic :: Security :: Cryptography",
"Topic :: Software Development :: Libraries :: Python Modules",
]
keywords = ["jupyter", "vault", "secrets", "keycloak", "oidc", "kubernetes"]
dependencies = ["hvac>=1.0.0", "requests>=2.25.0", "PyJWT>=2.0.0"]
[project.optional-dependencies]
dev = ["pytest>=7.0.0", "black>=22.0.0", "flake8>=4.0.0", "mypy>=0.950"]
docs = ["sphinx>=4.0.0", "sphinx-rtd-theme>=1.0.0"]
[project.urls]
Homepage = "https://github.com/buun-ch/buun-stack"
Repository = "https://github.com/buun-ch/buun-stack"
"Bug Reports" = "https://github.com/buun-ch/buun-stack/issues"
[project.scripts]
buunstack-secrets = "buunstack.cli:main"
[tool.setuptools_scm]
write_to = "buunstack/_version.py"
[tool.black]
line-length = 100
target-version = ['py38']
include = '\.pyi?$'
extend-exclude = '''
/(
# directories
\.eggs
| \.git
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| build
| dist
)/
'''
[tool.mypy]
python_version = "3.12"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true
check_untyped_defs = true
disallow_untyped_decorators = true
no_implicit_optional = true
warn_redundant_casts = true
warn_unused_ignores = true
warn_no_return = true
warn_unreachable = true
strict_equality = true
[tool.pytest.ini_options]
minversion = "6.0"
addopts = "-ra -q"
testpaths = ["tests"]