diff --git a/mlflow/.gitignore b/mlflow/.gitignore index ddf1c94..c9d1057 100644 --- a/mlflow/.gitignore +++ b/mlflow/.gitignore @@ -2,4 +2,5 @@ values.yaml mlflow-db-external-secret.yaml mlflow-s3-external-secret.yaml mlflow-oidc-config.yaml +mlflow-middleware.yaml image/.buildx-cache diff --git a/mlflow/README.md b/mlflow/README.md index a2405b1..1522a71 100644 --- a/mlflow/README.md +++ b/mlflow/README.md @@ -156,17 +156,115 @@ with mlflow.start_run(): #### Authentication for API Access -For programmatic access, create an access token: +For programmatic access (Python scripts, notebooks, CI/CD), you need to create an access key. -1. Log in to MLflow UI -2. Navigate to Permissions UI → Create access token -3. Use token in your code: +**Step 1: Create Access Key via Web UI** + +1. Navigate to `https://your-mlflow-host/` and log in via Keycloak +2. You will be redirected to the MLflow Permission Manager UI +3. Click the **"Create access key"** button at the top of the page +4. In the dialog that appears: + - Select an expiration date (maximum 1 year from today) + - Click **"Request Token"** +5. Copy the generated access key from the "Access Key" field +6. Store it securely (you won't be able to retrieve it again) + +**Step 2: Use Access Key in Python** + +Set the access key as an environment variable or in your Python code: ```python import os -os.environ["MLFLOW_TRACKING_TOKEN"] = "your-token" +import mlflow + +# Method 1: Set environment variable (recommended) +os.environ["MLFLOW_TRACKING_TOKEN"] = "your-access-key-here" +os.environ["MLFLOW_TRACKING_URI"] = "https://mlflow.example.com" + +# Method 2: Set tracking URI directly +mlflow.set_tracking_uri("https://mlflow.example.com") + +# Now you can use MLflow client +mlflow.set_experiment("my-experiment") + +with mlflow.start_run(): + mlflow.log_param("alpha", 0.5) + mlflow.log_metric("rmse", 0.786) ``` +**Complete Example** + +```python +import os +import mlflow +import mlflow.sklearn +from sklearn.ensemble import RandomForestClassifier +from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score + +# Configure MLflow +os.environ["MLFLOW_TRACKING_TOKEN"] = "your-access-key-here" +mlflow.set_tracking_uri("https://mlflow.example.com") +mlflow.set_experiment("iris-classification") + +# Load data +X, y = load_iris(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + +# Train and log model +with mlflow.start_run(): + # Log parameters + n_estimators = 100 + max_depth = 5 + mlflow.log_param("n_estimators", n_estimators) + mlflow.log_param("max_depth", max_depth) + + # Train model + clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth) + clf.fit(X_train, y_train) + + # Log metrics + y_pred = clf.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + mlflow.log_metric("accuracy", accuracy) + + # Log model + mlflow.sklearn.log_model(clf, "model") + + print(f"Model logged with accuracy: {accuracy}") +``` + +**Using .env File (Recommended)** + +Create a `.env` file in your project: + +```bash +MLFLOW_TRACKING_URI=https://mlflow.example.com +MLFLOW_TRACKING_TOKEN=your-access-key-here +``` + +Load it in your Python code: + +```python +from dotenv import load_dotenv +import mlflow + +load_dotenv() # Loads MLFLOW_TRACKING_URI and MLFLOW_TRACKING_TOKEN + +mlflow.set_experiment("my-experiment") +with mlflow.start_run(): + mlflow.log_param("param1", 5) +``` + +**Important Notes** + +- Access keys have an expiration date (max 1 year) +- Store access keys securely (use environment variables or secret management) +- Never commit access keys to version control +- Each user should create their own access key +- Expired keys need to be regenerated via the Web UI + ### Model Registry Register and manage models: diff --git a/mlflow/justfile b/mlflow/justfile index 87b6227..5441cb3 100644 --- a/mlflow/justfile +++ b/mlflow/justfile @@ -334,10 +334,20 @@ install: echo "Generating Helm values with OIDC enabled..." gomplate -f values.gomplate.yaml -o values.yaml + echo "Creating Traefik Middleware..." + gomplate -f mlflow-middleware.gomplate.yaml -o mlflow-middleware.yaml + kubectl apply -f mlflow-middleware.yaml + echo "Installing MLflow Helm chart from Community Charts with OIDC..." helm upgrade --cleanup-on-fail --install mlflow community-charts/mlflow \ --version ${MLFLOW_CHART_VERSION} -n ${MLFLOW_NAMESPACE} --wait --timeout=10m -f values.yaml + if [ "${MONITORING_ENABLED}" = "true" ]; then + echo "Enabling Prometheus monitoring for namespace ${MLFLOW_NAMESPACE}..." + kubectl label namespace ${MLFLOW_NAMESPACE} buun.channel/enable-monitoring=true --overwrite + echo "✓ Monitoring enabled" + fi + echo "" echo "=== MLflow installed with OIDC authentication ===" echo "MLflow URL: https://${MLFLOW_HOST}" @@ -372,6 +382,10 @@ upgrade: echo "Generating Helm values..." gomplate -f values.gomplate.yaml -o values.yaml + echo "Creating Traefik Middleware..." + gomplate -f mlflow-middleware.gomplate.yaml -o mlflow-middleware.yaml + kubectl apply -f mlflow-middleware.yaml + echo "Upgrading MLflow Helm chart from Community Charts..." helm upgrade mlflow community-charts/mlflow \ --version ${MLFLOW_CHART_VERSION} -n ${MLFLOW_NAMESPACE} --wait --timeout=10m -f values.yaml @@ -390,6 +404,7 @@ uninstall delete-db='true': kubectl delete secret mlflow-oidc-config -n ${MLFLOW_NAMESPACE} --ignore-not-found kubectl delete externalsecret mlflow-oidc-external-secret -n ${MLFLOW_NAMESPACE} \ --ignore-not-found + kubectl delete middleware mlflow-headers -n ${MLFLOW_NAMESPACE} --ignore-not-found just delete-namespace if [ "{{ delete-db }}" = "true" ]; then just postgres::delete-db mlflow || true diff --git a/mlflow/mlflow-middleware.gomplate.yaml b/mlflow/mlflow-middleware.gomplate.yaml new file mode 100644 index 0000000..2afd764 --- /dev/null +++ b/mlflow/mlflow-middleware.gomplate.yaml @@ -0,0 +1,11 @@ +apiVersion: traefik.io/v1alpha1 +kind: Middleware +metadata: + name: mlflow-headers + namespace: {{ .Env.MLFLOW_NAMESPACE }} +spec: + headers: + customRequestHeaders: + X-Forwarded-Proto: "https" + X-Forwarded-Host: "{{ .Env.MLFLOW_HOST }}" + X-Forwarded-Port: "443" diff --git a/mlflow/values.gomplate.yaml b/mlflow/values.gomplate.yaml index 686a786..ab3dccc 100644 --- a/mlflow/values.gomplate.yaml +++ b/mlflow/values.gomplate.yaml @@ -60,8 +60,8 @@ log: # Use oidc-auth-fastapi for FastAPI/ASGI compatibility with Uvicorn extraArgs: appName: "oidc-auth-fastapi" - # Allow connections from external hostname (with and without port) - allowedHosts: "{{ .Env.MLFLOW_HOST }},{{ .Env.MLFLOW_HOST }}:443" + # Allow connections from external hostname and Kubernetes internal access + allowedHosts: "{{ .Env.MLFLOW_HOST }},{{ .Env.MLFLOW_HOST }}:443,mlflow.{{ .Env.MLFLOW_NAMESPACE }}.svc.cluster.local,mlflow.{{ .Env.MLFLOW_NAMESPACE }}.svc.cluster.local:5000,*" # Extra secrets for OIDC configuration extraSecretNamesForEnvFrom: @@ -86,13 +86,19 @@ extraEnvVars: # Session configuration - use cachelib with filesystem backend SESSION_TYPE: "cachelib" SESSION_CACHE_DIR: "/tmp/session" + # Security configuration - allow same-origin CORS and configured host + MLFLOW_SERVER_CORS_ALLOWED_ORIGINS: "https://{{ .Env.MLFLOW_HOST }}" + MLFLOW_SERVER_ALLOWED_HOSTS: "{{ .Env.MLFLOW_HOST }},{{ .Env.MLFLOW_HOST }}:443" + MLFLOW_SERVER_X_FRAME_OPTIONS: "SAMEORIGIN" {{- else }} -# Extra environment variables for S3/MinIO configuration +# Extra environment variables for S3/MinIO configuration (OIDC disabled) extraEnvVars: MLFLOW_S3_ENDPOINT_URL: "http://minio.{{ .Env.MINIO_NAMESPACE }}.svc.cluster.local:9000" MLFLOW_S3_IGNORE_TLS: "true" - # Disable security middleware when using Gunicorn (env var approach) - MLFLOW_SERVER_DISABLE_SECURITY_MIDDLEWARE: "true" + # Security configuration - allow same-origin CORS and configured host + MLFLOW_SERVER_CORS_ALLOWED_ORIGINS: "https://{{ .Env.MLFLOW_HOST }}" + MLFLOW_SERVER_ALLOWED_HOSTS: "{{ .Env.MLFLOW_HOST }},{{ .Env.MLFLOW_HOST }}:443" + MLFLOW_SERVER_X_FRAME_OPTIONS: "SAMEORIGIN" {{- end }} # Service configuration @@ -106,6 +112,7 @@ ingress: className: "traefik" annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.middlewares: {{ .Env.MLFLOW_NAMESPACE }}-mlflow-headers@kubernetescrd hosts: - host: {{ .Env.MLFLOW_HOST }} paths: @@ -123,7 +130,7 @@ serviceMonitor: interval: 30s telemetryPath: /metrics labels: - prometheus: kube-prometheus + release: kube-prometheus-stack timeout: 10s # Resource limits