Files
buun-stack/jupyterhub/jupyterhub-values.gomplate.yaml
2025-09-08 14:06:35 +09:00

393 lines
14 KiB
YAML

hub:
extraEnv:
JUPYTERHUB_CRYPT_KEY: {{ .Env.JUPYTERHUB_CRYPT_KEY | quote }}
VAULT_ADDR: {{ .Env.VAULT_ADDR | quote }}
NOTEBOOK_VAULT_TOKEN_TTL: {{ .Env.NOTEBOOK_VAULT_TOKEN_TTL | quote }}
NOTEBOOK_VAULT_TOKEN_MAX_TTL: {{ .Env.NOTEBOOK_VAULT_TOKEN_MAX_TTL | quote }}
{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }}
# Vault Agent will provide token via file
VAULT_TOKEN_FILE: "/vault/secrets/vault-token"
{{- else }}
# Traditional token via environment variable
JUPYTERHUB_VAULT_TOKEN: {{ .Env.JUPYTERHUB_VAULT_TOKEN | quote }}
{{- end }}
# Install packages at container startup
extraFiles:
startup.sh:
mountPath: /usr/local/bin/startup.sh
mode: 0755
stringData: |
#!/bin/bash
pip install --no-cache-dir hvac==2.3.0
exec jupyterhub --config /usr/local/etc/jupyterhub/jupyterhub_config.py --upgrade-db
user_policy.hcl:
mountPath: /srv/jupyterhub/user_policy.hcl
mode: 0644
stringData: |
{{ .Env.USER_POLICY_HCL | strings.Indent 8 }}
# Override the default command to run our startup script first
command:
- /usr/local/bin/startup.sh
config:
JupyterHub:
authenticator_class: generic-oauth
admin_access: false
Authenticator:
enable_auth_state: true
allow_all: true # allow all Keycloak users
GenericOAuthenticator:
client_id: {{ .Env.JUPYTERHUB_OIDC_CLIENT_ID }}
oauth_callback_url: "https://{{ .Env.JUPYTERHUB_HOST }}/hub/oauth_callback"
authorize_url: "https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/auth"
token_url: "https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/token"
userdata_url: "https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/protocol/openid-connect/userinfo"
login_service: keycloak
# username_claim: email
username_claim: preferred_username
auth_refresh_age: 300 # Refresh auth token every 5 minutes
refresh_pre_spawn: true # Refresh token before spawning server
OAuthenticator:
scope:
- openid
- profile
- email
extraConfig:
pre-spawn-hook: |
# Set environment variables for spawned containers
import hvac
def get_vault_token():
"""Read Vault token from file written by Vault Agent"""
import os
token_file = os.environ.get('VAULT_TOKEN_FILE', '/vault/secrets/vault-token')
try:
with open(token_file, 'r') as f:
token = f.read().strip()
if token:
return token
else:
raise Exception(f"Empty token file: {token_file}")
except FileNotFoundError:
# Fallback to environment variable for backward compatibility
return os.environ.get("JUPYTERHUB_VAULT_TOKEN")
except Exception as e:
# Log error but attempt fallback
print(f"Error reading token file {token_file}: {e}")
return os.environ.get("JUPYTERHUB_VAULT_TOKEN")
async def pre_spawn_hook(spawner):
"""Set essential environment variables for spawned containers"""
# PostgreSQL configuration
spawner.environment["POSTGRES_HOST"] = "postgres-cluster-rw.postgres"
spawner.environment["POSTGRES_PORT"] = "5432"
# JupyterHub API configuration
spawner.environment["JUPYTERHUB_API_URL"] = "http://hub:8081/hub/api"
# Logging configuration
spawner.environment["BUUNSTACK_LOG_LEVEL"] = "{{ .Env.JUPYTER_BUUNSTACK_LOG_LEVEL }}"
# Create user-specific Vault token directly
try:
username = spawner.user.name
# Step 1: Initialize admin Vault client with file-based token
import os
vault_addr = os.environ.get("VAULT_ADDR", "{{ .Env.VAULT_ADDR }}")
vault_token = get_vault_token()
spawner.log.info(f"pre_spawn_hook starting for {username}")
spawner.log.info(f"Vault address: {vault_addr}")
spawner.log.info(f"Vault token source: {'file' if os.path.exists(os.environ.get('VAULT_TOKEN_FILE', '/vault/secrets/vault-token')) else 'env'}")
spawner.log.info(f"Vault token present: {bool(vault_token)}, length: {len(vault_token) if vault_token else 0}")
if not vault_token:
raise Exception("No Vault token available from file or environment")
vault_client = hvac.Client(url=vault_addr, verify=False)
vault_client.token = vault_token
if not vault_client.is_authenticated():
raise Exception("Admin token is not authenticated")
# Step 2: Create user-specific policy
user_policy_name = "jupyter-user-{}".format(username)
# Read policy template from file
import os
policy_template_path = "/srv/jupyterhub/user_policy.hcl"
with open(policy_template_path, 'r') as f:
policy_template = f.read()
# Replace {username} placeholder with actual username
user_policy = policy_template.replace("{username}", username)
# Write user-specific policy
try:
vault_client.sys.create_or_update_policy(user_policy_name, user_policy)
spawner.log.info("✅ Created policy: {}".format(user_policy_name))
except Exception as policy_e:
spawner.log.warning("Policy creation failed (may already exist): {}".format(policy_e))
# Step 3: Create user-specific token
# Get TTL settings from environment variables
user_token_ttl = os.environ.get("NOTEBOOK_VAULT_TOKEN_TTL", "24h")
user_token_max_ttl = os.environ.get("NOTEBOOK_VAULT_TOKEN_MAX_TTL", "168h")
token_response = vault_client.auth.token.create(
policies=[user_policy_name],
ttl=user_token_ttl,
renewable=True,
display_name="notebook-{}".format(username),
explicit_max_ttl=user_token_max_ttl
)
user_vault_token = token_response["auth"]["client_token"]
lease_duration = token_response["auth"].get("lease_duration", 3600)
# Set user-specific Vault token as environment variable
spawner.environment["NOTEBOOK_VAULT_TOKEN"] = user_vault_token
spawner.log.info("✅ User-specific Vault token created for {} (TTL: {}s, renewable, max TTL: {})".format(username, lease_duration, user_token_max_ttl))
except Exception as e:
spawner.log.error("Failed to create user-specific Vault token for {}: {}".format(spawner.user.name, e))
import traceback
spawner.log.error("Full traceback: {}".format(traceback.format_exc()))
c.KubeSpawner.pre_spawn_hook = pre_spawn_hook
{{- if eq .Env.JUPYTERHUB_VAULT_INTEGRATION_ENABLED "true" }}
# Vault Agent sidecar configuration
extraVolumes:
- name: vault-secrets
emptyDir: {}
- name: vault-config
configMap:
name: vault-agent-config
extraVolumeMounts:
- name: vault-secrets
mountPath: /vault/secrets
- name: vault-config
mountPath: /vault/config
extraContainers:
- name: vault-agent
image: hashicorp/vault:1.15.2
securityContext:
runAsUser: 100
runAsGroup: 101
runAsNonRoot: true
allowPrivilegeEscalation: false
readOnlyRootFilesystem: false
capabilities:
drop:
- ALL
command:
- /bin/sh
- -c
- |
# Start Vault Agent
vault agent -config=/vault/config/agent.hcl
env:
- name: VAULT_ADDR
value: {{ .Env.VAULT_ADDR | quote }}
volumeMounts:
- name: vault-secrets
mountPath: /vault/secrets
- name: vault-config
mountPath: /vault/config
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
cpu: 100m
memory: 128Mi
{{- end }}
podSecurityContext:
fsGroup: {{ .Env.JUPYTER_FSGID }}
singleuser:
storage:
{{ if env.Getenv "PVC_NAME" -}}
type: static
static:
pvcName: {{ .Env.PVC_NAME }}
{{ else -}}
type: dynamic
dynamic:
{{ if env.Getenv "JUPYTERHUB_STORAGE_CLASS" -}}
storageClass: {{ .Env.JUPYTERHUB_STORAGE_CLASS }}
{{ end -}}
storageAccessModes:
- ReadWriteOnce
{{ end -}}
capacity: 10Gi
extraEnv:
VAULT_ADDR: "{{ .Env.VAULT_ADDR }}"
networkPolicy:
egress:
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: postgres
ports:
- port: 5432
protocol: TCP
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: chroma
ports:
- port: 8000
protocol: TCP
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: qdrant
ports:
- port: 6333
protocol: TCP
- port: 6334
protocol: TCP
- port: 6335
protocol: TCP
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: litellm
ports:
- port: 4000
protocol: TCP
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: vault
ports:
- port: 8200
protocol: TCP
# Allow DNS resolution
- to:
- ipBlock:
cidr: 0.0.0.0/0
ports:
- port: 53
protocol: UDP
- port: 53
protocol: TCP
# Allow HTTP traffic
- to:
- ipBlock:
cidr: 0.0.0.0/0
ports:
- port: 80
protocol: TCP
# Allow HTTPS traffic
- to:
- ipBlock:
cidr: 0.0.0.0/0
ports:
- port: 443
protocol: TCP
image:
pullPolicy: IfNotPresent
profileList:
# https://quay.io/repository/jupyter/pyspark-notebook
{{- if eq .Env.JUPYTER_PROFILE_MINIMAL_ENABLED "true" }}
- display_name: "Minimal Jupyter Notebook Stack"
description: "Minimal Jupyter Notebook Stack"
kubespawner_override:
image: quay.io/jupyter/minimal-notebook
{{- end }}
{{ if eq .Env.JUPYTER_PROFILE_BASE_ENABLED "true" }}
- display_name: "Base Jupyter Notebook Stack"
description: "Base Jupyter Notebook Stack"
kubespawner_override:
image: quay.io/jupyter/base-notebook
{{- end }}
{{- if eq .Env.JUPYTER_PROFILE_DATASCIENCE_ENABLED "true" }}
- display_name: "Jupyter Notebook Data Science Stack"
description: "Jupyter Notebook Data Science Stack"
kubespawner_override:
image: quay.io/jupyter/datascience-notebook
{{- end }}
{{- if eq .Env.JUPYTER_PROFILE_PYSPARK_ENABLED "true" }}
- display_name: "Jupyter Notebook Python, Spark Stack"
description: "Jupyter Notebook Python, Spark Stack"
kubespawner_override:
image: quay.io/jupyter/pyspark-notebook
{{- end }}
{{- if eq .Env.JUPYTER_PROFILE_PYTORCH_ENABLED "true" }}
- display_name: "Jupyter Notebook PyTorch Deep Learning Stack"
description: "Jupyter Notebook PyTorch Deep Learning Stack"
kubespawner_override:
image: quay.io/jupyter/pytorch-notebook
{{- end }}
{{- if eq .Env.JUPYTER_PROFILE_TENSORFLOW_ENABLED "true" }}
- display_name: "Jupyter Notebook TensorFlow Deep Learning Stack"
description: "Jupyter Notebook TensorFlow Deep Learning Stack"
kubespawner_override:
image: quay.io/jupyter/tensorflow-notebook
{{- end }}
{{- if eq .Env.JUPYTER_PROFILE_BUUN_STACK_ENABLED "true" }}
- display_name: "Buun-stack"
description: "Jupyter Notebook with buun-stack"
kubespawner_override:
image: "{{ .Env.IMAGE_REGISTRY }}/{{ .Env.KERNEL_IMAGE_BUUN_STACK_REPOSITORY }}:{{ .Env.JUPYTER_PYTHON_KERNEL_TAG }}"
{{- end }}
{{- if eq .Env.JUPYTER_PROFILE_BUUN_STACK_CUDA_ENABLED "true" }}
- display_name: "Buun-stack with CUDA"
description: "Jupyter Notebook with buun-stack and CUDA support"
kubespawner_override:
image: "{{ .Env.IMAGE_REGISTRY }}/{{ .Env.KERNEL_IMAGE_BUUN_STACK_CUDA_REPOSITORY }}:{{ .Env.JUPYTER_PYTHON_KERNEL_TAG }}"
# resources:
# requests:
# nvidia.com/gpu: "1"
{{- end }}
cull:
enabled: true
# for production
timeout: 7200 # 2 hours idle timeout
every: 600 # Check every 10 minutes
# for testing
# timeout: 300 # 5 minutes idle timeout (for testing) │ │
# every: 60 # Check every 1 minute (for testing) │ │
# maxAge: 86400 # Maximum age of a server pod (1 day)
adminUsers: true # Also cull admin users' server pods
users: false # Don't delete user accounts, only stop server pods
imagePullSecrets:
- name: regcred
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: traefik
traefik.ingress.kubernetes.io/router.entrypoints: websecure
ingressClassName: traefik
hosts:
- {{ .Env.JUPYTERHUB_HOST }}
pathType: Prefix
tls:
- hosts:
- {{ .Env.JUPYTERHUB_HOST }}