feat(jupyterhub): install buunstack package to kernel image
This commit is contained in:
1
jupyterhub/.gitignore
vendored
1
jupyterhub/.gitignore
vendored
@@ -1 +1,2 @@
|
||||
jupyterhub-values.yaml
|
||||
/notebooks/
|
||||
|
||||
2
jupyterhub/images/datastack-cuda-notebook/.gitignore
vendored
Normal file
2
jupyterhub/images/datastack-cuda-notebook/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# Temporary copy of wheel files for Docker build
|
||||
*.whl
|
||||
@@ -143,6 +143,12 @@ RUN pip install \
|
||||
tavily-python \
|
||||
tweet-preprocessor
|
||||
|
||||
# Install buunstack package
|
||||
COPY *.whl /opt/
|
||||
RUN pip install /opt/*.whl && \
|
||||
fix-permissions "${CONDA_DIR}" && \
|
||||
fix-permissions "/home/${NB_USER}"
|
||||
|
||||
# Install PyTorch with pip (https://pytorch.org/get-started/locally/)
|
||||
# langchain-openai must be updated to avoid pydantic v2 error
|
||||
# https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540
|
||||
|
||||
2
jupyterhub/images/datastack-notebook/.gitignore
vendored
Normal file
2
jupyterhub/images/datastack-notebook/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# Temporary copy of wheel files for Docker build
|
||||
*.whl
|
||||
@@ -143,6 +143,12 @@ RUN pip install \
|
||||
tavily-python \
|
||||
tweet-preprocessor
|
||||
|
||||
# Install buunstack package
|
||||
COPY *.whl /opt/
|
||||
RUN pip install /opt/*.whl && \
|
||||
fix-permissions "${CONDA_DIR}" && \
|
||||
fix-permissions "/home/${NB_USER}"
|
||||
|
||||
# Install PyTorch with pip (https://pytorch.org/get-started/locally/)
|
||||
# langchain-openai must be updated to avoid pydantic v2 error
|
||||
# https://github.com/run-llama/llama_index/issues/16540https://github.com/run-llama/llama_index/issues/16540
|
||||
|
||||
@@ -42,6 +42,11 @@ hub:
|
||||
if 'expires_at' in auth_state:
|
||||
spawner.environment['JUPYTERHUB_OIDC_TOKEN_EXPIRES_AT'] = str(auth_state['expires_at'])
|
||||
|
||||
# Add Keycloak configuration for token refresh
|
||||
spawner.environment['KEYCLOAK_HOST'] = '{{ .Env.KEYCLOAK_HOST }}'
|
||||
spawner.environment['KEYCLOAK_REALM'] = '{{ .Env.KEYCLOAK_REALM }}'
|
||||
spawner.environment['KEYCLOAK_CLIENT_ID'] = 'jupyterhub'
|
||||
|
||||
c.Spawner.pre_spawn_hook = pre_spawn_hook
|
||||
{{- end }}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ export JUPYTERHUB_CHART_VERSION := env("JUPYTERHUB_CHART_VERSION", "4.2.0")
|
||||
export JUPYTERHUB_OIDC_CLIENT_ID := env("JUPYTERHUB_OIDC_CLIENT_ID", "jupyterhub")
|
||||
export JUPYTERHUB_ENABLE_NFS_PV := env("JUPYTERHUB_ENABLE_NFS_PV", "")
|
||||
export JUPYTERHUB_VAULT_INTEGRATION_ENABLED := env("JUPYTERHUB_VAULT_INTEGRATION_ENABLED", "false")
|
||||
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-2")
|
||||
export JUPYTER_PYTHON_KERNEL_TAG := env("JUPYTER_PYTHON_KERNEL_TAG", "python-3.12-3")
|
||||
export KERNEL_IMAGE_BUUN_STACK_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_REPOSITORY", "buun-stack-notebook")
|
||||
export KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY := env("KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY", "buun-stack-cuda-notebook")
|
||||
export JUPYTER_PROFILE_MINIMAL_ENABLED := env("JUPYTER_PROFILE_MINIMAL_ENABLED", "false")
|
||||
@@ -134,6 +134,14 @@ delete-pv:
|
||||
build-kernel-images:
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
# Build python package wheel
|
||||
cd ../python-package
|
||||
rm -rf dist/ build/ *.egg-info/
|
||||
SETUPTOOLS_SCM_PRETEND_VERSION_FOR_BUUNSTACK=0.1.0 python -m build --wheel
|
||||
cd ../jupyterhub
|
||||
# Copy built wheel to image directories
|
||||
cp ../python-package/dist/*.whl ./images/datastack-notebook/
|
||||
cp ../python-package/dist/*.whl ./images/datastack-cuda-notebook/
|
||||
(
|
||||
cd ./images/datastack-notebook
|
||||
docker build -t \
|
||||
@@ -150,6 +158,9 @@ build-kernel-images:
|
||||
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \
|
||||
.
|
||||
)
|
||||
# Clean up copied wheel files
|
||||
rm -f ./images/datastack-notebook/*.whl
|
||||
rm -f ./images/datastack-cuda-notebook/*.whl
|
||||
|
||||
# Push Jupyter notebook kernel images
|
||||
push-kernel-images: build-kernel-images
|
||||
|
||||
@@ -1,269 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Vault User Storage Example\n",
|
||||
"\n",
|
||||
"Each JupyterHub user has their own private storage space in Vault at `/secret/jupyter/users/<username>/`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup and Authentication"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import hvac\n",
|
||||
"import json\n",
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"# Get environment variables\n",
|
||||
"username = os.getenv('JUPYTERHUB_USER', 'testuser')\n",
|
||||
"vault_addr = os.getenv('VAULT_ADDR', 'https://vault.example.com')\n",
|
||||
"oidc_token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n",
|
||||
"\n",
|
||||
"if not oidc_token:\n",
|
||||
" raise ValueError(\"OIDC token not found. Make sure auth_state is enabled.\")\n",
|
||||
"\n",
|
||||
"# Initialize Vault client\n",
|
||||
"client = hvac.Client(url=vault_addr, verify=False)\n",
|
||||
"\n",
|
||||
"# Authenticate with JupyterHub token\n",
|
||||
"client.auth.jwt.jwt_login(\n",
|
||||
" role='jupyter-token',\n",
|
||||
" jwt=oidc_token,\n",
|
||||
" path='jwt'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f\"Authenticated as: {username}\")\n",
|
||||
"print(f\"User storage path: secret/jupyter/users/{username}/\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Store User Preferences"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Store user preferences\n",
|
||||
"preferences = {\n",
|
||||
" 'theme': 'dark',\n",
|
||||
" 'language': 'en',\n",
|
||||
" 'font_size': 14,\n",
|
||||
" 'auto_save': True,\n",
|
||||
" 'last_updated': datetime.now().isoformat()\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"client.secrets.kv.v2.create_or_update_secret(\n",
|
||||
" path=f'jupyter/users/{username}/preferences',\n",
|
||||
" secret=preferences,\n",
|
||||
" mount_point='secret'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f\"Saved preferences for {username}\")\n",
|
||||
"print(json.dumps(preferences, indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": "# Read back preferences\nresponse = client.secrets.kv.v2.read_secret_version(\n path=f'jupyter/users/{username}/preferences',\n mount_point='secret',\n raise_on_deleted_version=False\n)\n\nstored_prefs = response['data']['data']\nprint(\"Retrieved preferences:\")\nprint(json.dumps(stored_prefs, indent=2))"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Store API keys securely\n",
|
||||
"api_keys = {\n",
|
||||
" 'openai_key': 'sk-your-api-key-here',\n",
|
||||
" 'github_token': 'ghp_your-token-here',\n",
|
||||
" 'aws_access_key': 'AKIA-example',\n",
|
||||
" 'aws_secret_key': 'your-secret-here'\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"client.secrets.kv.v2.create_or_update_secret(\n",
|
||||
" path=f'jupyter/users/{username}/api-keys',\n",
|
||||
" secret=api_keys,\n",
|
||||
" mount_point='secret'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f\"Stored {len(api_keys)} API keys for {username}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Store Database Connections"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Store database connection info\n",
|
||||
"db_connections = {\n",
|
||||
" 'postgres_prod': {\n",
|
||||
" 'host': 'db.example.com',\n",
|
||||
" 'port': 5432,\n",
|
||||
" 'database': 'production',\n",
|
||||
" 'username': 'app_user',\n",
|
||||
" 'password': 'secure-password-123'\n",
|
||||
" },\n",
|
||||
" 'mongodb_analytics': {\n",
|
||||
" 'connection_string': 'mongodb://user:pass@mongo.example.com:27017/analytics'\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"client.secrets.kv.v2.create_or_update_secret(\n",
|
||||
" path=f'jupyter/users/{username}/databases',\n",
|
||||
" secret=db_connections,\n",
|
||||
" mount_point='secret'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f\"Stored {len(db_connections)} database connections\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Read Stored Secrets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "# Read back preferences\nresponse = client.secrets.kv.v2.read_secret_version(\n path=f'jupyter/users/{username}/preferences',\n mount_point='secret',\n raise_on_deleted_version=False\n)\n\nstored_prefs = response['data']['data']\nprint(\"Retrieved preferences:\")\nprint(json.dumps(stored_prefs, indent=2))"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": "class UserVaultStorage:\n \"\"\"Helper class for managing user's Vault storage\"\"\"\n \n def __init__(self):\n self.username = os.getenv('JUPYTERHUB_USER')\n self.client = hvac.Client(\n url=os.getenv('VAULT_ADDR'),\n verify=False\n )\n self._authenticate()\n self.base_path = f'jupyter/users/{self.username}'\n \n def _authenticate(self):\n token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n self.client.auth.jwt.jwt_login(\n role='jupyter-token',\n jwt=token,\n path='jwt'\n )\n \n def save(self, key, data):\n \"\"\"Save data to user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.create_or_update_secret(\n path=path,\n secret=data,\n mount_point='secret'\n )\n return path\n \n def load(self, key):\n \"\"\"Load data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n response = self.client.secrets.kv.v2.read_secret_version(\n path=path,\n mount_point='secret',\n raise_on_deleted_version=False\n )\n return response['data']['data'] if response else None\n \n def delete(self, key):\n \"\"\"Delete data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.delete_metadata_and_all_versions(\n path=path,\n mount_point='secret'\n )\n \n def list(self):\n \"\"\"List all keys in user's storage\"\"\"\n try:\n response = self.client.secrets.kv.v2.list_secrets(\n path=self.base_path,\n mount_point='secret'\n )\n return response['data']['keys']\n except:\n return []\n\n# Usage example\nstorage = UserVaultStorage()\n\n# Save model parameters\nstorage.save('ml-model-config', {\n 'model_type': 'random_forest',\n 'n_estimators': 100,\n 'max_depth': 10,\n 'training_date': datetime.now().isoformat()\n})\n\n# Load them back\nconfig = storage.load('ml-model-config')\nprint(\"Loaded config:\", config)\n\n# List all stored items\nprint(f\"\\nAll stored items: {storage.list()}\")"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# List all secrets in user's directory\n",
|
||||
"try:\n",
|
||||
" secrets_list = client.secrets.kv.v2.list_secrets(\n",
|
||||
" path=f'jupyter/users/{username}',\n",
|
||||
" mount_point='secret'\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" print(f\"Secrets stored for {username}:\")\n",
|
||||
" for secret in secrets_list['data']['keys']:\n",
|
||||
" print(f\" - {secret}\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(f\"No secrets found or error: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Helper Class for User Storage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "class UserVaultStorage:\n \"\"\"Helper class for managing user's Vault storage\"\"\"\n \n def __init__(self):\n self.username = os.getenv('JUPYTERHUB_USER')\n self.client = hvac.Client(\n url=os.getenv('VAULT_ADDR'),\n verify=False\n )\n self._authenticate()\n self.base_path = f'jupyter/users/{self.username}'\n \n def _authenticate(self):\n token = os.getenv('JUPYTERHUB_OIDC_ACCESS_TOKEN')\n self.client.auth.jwt.jwt_login(\n role='jupyter-token',\n jwt=token,\n path='jwt'\n )\n \n def save(self, key, data):\n \"\"\"Save data to user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.create_or_update_secret(\n path=path,\n secret=data,\n mount_point='secret'\n )\n return path\n \n def load(self, key):\n \"\"\"Load data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n response = self.client.secrets.kv.v2.read_secret_version(\n path=path,\n mount_point='secret',\n raise_on_deleted_version=False\n )\n return response['data']['data'] if response else None\n \n def delete(self, key):\n \"\"\"Delete data from user's Vault storage\"\"\"\n path = f'{self.base_path}/{key}'\n self.client.secrets.kv.v2.delete_metadata_and_all_versions(\n path=path,\n mount_point='secret'\n )\n \n def list(self):\n \"\"\"List all keys in user's storage\"\"\"\n try:\n response = self.client.secrets.kv.v2.list_secrets(\n path=self.base_path,\n mount_point='secret'\n )\n return response['data']['keys']\n except:\n return []\n\n# Usage example\nstorage = UserVaultStorage()\n\n# Save model parameters\nstorage.save('ml-model-config', {\n 'model_type': 'random_forest',\n 'n_estimators': 100,\n 'max_depth': 10,\n 'training_date': datetime.now().isoformat()\n})\n\n# Load them back\nconfig = storage.load('ml-model-config')\nprint(\"Loaded config:\", config)\n\n# List all stored items\nprint(f\"\\nAll stored items: {storage.list()}\")"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Environment Variables Helper"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def load_env_from_vault(key='environment'):\n",
|
||||
" \"\"\"Load environment variables from Vault\"\"\"\n",
|
||||
" storage = UserVaultStorage()\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" env_vars = storage.load(key)\n",
|
||||
" for name, value in env_vars.items():\n",
|
||||
" os.environ[name] = str(value)\n",
|
||||
" print(f\"Loaded: {name}\")\n",
|
||||
" return list(env_vars.keys())\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"No environment variables found: {e}\")\n",
|
||||
" return []\n",
|
||||
"\n",
|
||||
"def save_env_to_vault(env_dict, key='environment'):\n",
|
||||
" \"\"\"Save environment variables to Vault\"\"\"\n",
|
||||
" storage = UserVaultStorage()\n",
|
||||
" path = storage.save(key, env_dict)\n",
|
||||
" print(f\"Saved {len(env_dict)} environment variables to {path}\")\n",
|
||||
"\n",
|
||||
"# Example: Save current project environment\n",
|
||||
"project_env = {\n",
|
||||
" 'PROJECT_NAME': 'data-analysis',\n",
|
||||
" 'DATA_PATH': '/data/project',\n",
|
||||
" 'MODEL_VERSION': 'v2.1',\n",
|
||||
" 'DEBUG': 'false'\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"save_env_to_vault(project_env)\n",
|
||||
"loaded = load_env_from_vault()\n",
|
||||
"print(f\"\\nEnvironment ready with {len(loaded)} variables\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
Reference in New Issue
Block a user