Files
buun-stack/mlflow/values.gomplate.yaml
2025-11-09 21:31:33 +09:00

160 lines
4.8 KiB
YAML

---
# Replica count
replicaCount: 1
# Image configuration
{{- if eq (.Env.MLFLOW_OIDC_ENABLED | default "false") "true" }}
image:
repository: {{ .Env.IMAGE_REGISTRY }}/mlflow
pullPolicy: {{ .Env.MLFLOW_IMAGE_PULL_POLICY }}
tag: "{{ .Env.MLFLOW_IMAGE_TAG }}" # Custom MLflow with OIDC
{{- else }}
image:
repository: burakince/mlflow
pullPolicy: {{ .Env.MLFLOW_IMAGE_PULL_POLICY }}
tag: "3.6.0" # MLflow 3.6.0
{{- end }}
# Backend store configuration (PostgreSQL)
backendStore:
# Enable database migration
databaseMigration: true
# Enable database connection check
databaseConnectionCheck: true
postgres:
enabled: true
host: "postgres-cluster-rw.{{ .Env.POSTGRES_NAMESPACE }}.svc.cluster.local"
port: 5432
database: "mlflow"
driver: ""
# Use existing Kubernetes secret for database credentials
existingDatabaseSecret:
name: "mlflow-db-secret"
usernameKey: "username"
passwordKey: "password"
# Artifact root configuration (MinIO/S3)
artifactRoot:
# Enable proxied artifact storage
proxiedArtifactStorage: true
s3:
enabled: true
bucket: "mlflow"
path: ""
# Use existing Kubernetes secret for S3 credentials
existingSecret:
name: "mlflow-s3-secret"
keyOfAccessKeyId: "AWS_ACCESS_KEY_ID"
keyOfSecretAccessKey: "AWS_SECRET_ACCESS_KEY"
{{- if eq (.Env.MLFLOW_OIDC_ENABLED | default "false") "true" }}
# Disable MLflow logging to prevent gunicornOpts auto-injection
log:
enabled: false
# A map of arguments to pass to the `mlflow server` command (OIDC enabled)
# Use oidc-auth-fastapi for FastAPI/ASGI compatibility with Uvicorn
extraArgs:
appName: "oidc-auth-fastapi"
# Allow connections from external hostname and Kubernetes internal access
allowedHosts: "{{ .Env.MLFLOW_HOST }},{{ .Env.MLFLOW_HOST }}:443,mlflow.{{ .Env.MLFLOW_NAMESPACE }}.svc.cluster.local,mlflow.{{ .Env.MLFLOW_NAMESPACE }}.svc.cluster.local:5000,*"
# Extra secrets for OIDC configuration
extraSecretNamesForEnvFrom:
- mlflow-oidc-config
# Extra environment variables for OIDC and S3/MinIO configuration
extraEnvVars:
MLFLOW_S3_ENDPOINT_URL: "http://minio.{{ .Env.MINIO_NAMESPACE }}.svc.cluster.local:9000"
MLFLOW_S3_IGNORE_TLS: "true"
# OIDC Configuration - mlflow-oidc-auth uses OIDC Discovery
OIDC_DISCOVERY_URL: "https://{{ .Env.KEYCLOAK_HOST }}/realms/{{ .Env.KEYCLOAK_REALM }}/.well-known/openid-configuration"
OIDC_REDIRECT_URI: "https://{{ .Env.MLFLOW_HOST }}/callback"
OIDC_SCOPE: "openid profile email groups"
OIDC_PROVIDER_DISPLAY_NAME: "Keycloak"
# OIDC attribute mapping
OIDC_GROUPS_ATTRIBUTE: "groups"
# Group configuration - required for access control
OIDC_ADMIN_GROUP_NAME: "mlflow-admins"
OIDC_GROUP_NAME: "mlflow-admins,mlflow-users"
# Default permission for new resources
DEFAULT_MLFLOW_PERMISSION: "MANAGE"
# Session configuration - use cachelib with filesystem backend
SESSION_TYPE: "cachelib"
SESSION_CACHE_DIR: "/tmp/session"
# Security configuration - allow same-origin CORS and configured host
MLFLOW_SERVER_CORS_ALLOWED_ORIGINS: "https://{{ .Env.MLFLOW_HOST }}"
MLFLOW_SERVER_ALLOWED_HOSTS: "{{ .Env.MLFLOW_HOST }},{{ .Env.MLFLOW_HOST }}:443"
MLFLOW_SERVER_X_FRAME_OPTIONS: "SAMEORIGIN"
{{- else }}
# Extra environment variables for S3/MinIO configuration (OIDC disabled)
extraEnvVars:
MLFLOW_S3_ENDPOINT_URL: "http://minio.{{ .Env.MINIO_NAMESPACE }}.svc.cluster.local:9000"
MLFLOW_S3_IGNORE_TLS: "true"
# Security configuration - allow same-origin CORS and configured host
MLFLOW_SERVER_CORS_ALLOWED_ORIGINS: "https://{{ .Env.MLFLOW_HOST }}"
MLFLOW_SERVER_ALLOWED_HOSTS: "{{ .Env.MLFLOW_HOST }},{{ .Env.MLFLOW_HOST }}:443"
MLFLOW_SERVER_X_FRAME_OPTIONS: "SAMEORIGIN"
{{- end }}
# Service configuration
service:
type: ClusterIP
port: 5000
# Ingress configuration
ingress:
enabled: true
className: "traefik"
annotations:
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.middlewares: {{ .Env.MLFLOW_NAMESPACE }}-mlflow-headers@kubernetescrd
hosts:
- host: {{ .Env.MLFLOW_HOST }}
paths:
- path: /
pathType: Prefix
tls:
- hosts:
- {{ .Env.MLFLOW_HOST }}
# ServiceMonitor for Prometheus
serviceMonitor:
enabled: {{ .Env.MONITORING_ENABLED }}
useServicePort: false
namespace: "{{ .Env.PROMETHEUS_NAMESPACE }}"
interval: 30s
telemetryPath: /metrics
labels:
release: kube-prometheus-stack
timeout: 10s
# Resource limits
resources:
limits:
cpu: 1000m
memory: 2Gi
requests:
cpu: 100m
memory: 512Mi
# Security context
podSecurityContext:
fsGroup: 1001
fsGroupChangePolicy: "OnRootMismatch"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
privileged: false
runAsUser: 1001
runAsGroup: 1001