feat(kserve): install KServe

2025-11-10 21:31:35 +09:00
parent 27de65dd37
commit 2b0687330c
14 changed files with 1974 additions and 0 deletions
--- a/cert-manager/README.md
+++ b/cert-manager/README.md
@@ -0,0 +1,166 @@
 # cert-manager Module
 cert-manager is a Kubernetes add-on that automates the management and issuance of TLS certificates from various sources. It provides a common API for certificate issuers and ensures certificates are valid and up to date.
 ## Features
 - **Automatic Certificate Renewal**: Automatically renews certificates before they expire
 - **Multiple Issuers**: Supports Let's Encrypt, HashiCorp Vault, Venafi, self-signed, and more
 - **Kubernetes Native**: Uses Custom Resource Definitions (CRDs) for certificate management
 - **Webhook Integration**: Provides admission webhooks for validating and mutating certificate resources
 ## Prerequisites
 - Kubernetes cluster (installed via `just k8s::install`)
 - kubectl configured with cluster admin permissions
 ## Installation
 ### Basic Installation
 ```bash
 # Install cert-manager with default settings
 just cert-manager::install
 ```
 ### Environment Variables
 Key environment variables (set via `.env.local` or environment):
 ```bash
 CERT_MANAGER_NAMESPACE=cert-manager       # Namespace for cert-manager
 CERT_MANAGER_CHART_VERSION=v1.19.1        # cert-manager Helm chart version
 ```
 ## Usage
 ### Check Status
 ```bash
 # View status of cert-manager components
 just cert-manager::status
 ```
 ### Create a Self-Signed Issuer
 ```yaml
 apiVersion: cert-manager.io/v1
 kind: ClusterIssuer
 metadata:
    name: selfsigned-issuer
 spec:
    selfSigned: {}
 ```
 Apply the resource:
 ```bash
 kubectl apply -f issuer.yaml
 ```
 ### Create a Certificate
 ```yaml
 apiVersion: cert-manager.io/v1
 kind: Certificate
 metadata:
    name: example-cert
    namespace: default
 spec:
    secretName: example-cert-tls
    issuerRef:
        name: selfsigned-issuer
        kind: ClusterIssuer
    dnsNames:
        - example.com
        - www.example.com
 ```
 Apply the resource:
 ```bash
 kubectl apply -f certificate.yaml
 ```
 ### View Certificates
 ```bash
 # List all certificates
 kubectl get certificates -A
 # Describe a specific certificate
 kubectl describe certificate example-cert -n default
 ```
 ## Components
 cert-manager installs three main components:
 1. **cert-manager**: Main controller managing Certificate resources
 2. **cert-manager-webhook**: Admission webhook for validating and mutating cert-manager resources
 3. **cert-manager-cainjector**: Injects CA bundles into webhooks and API services
 ## Used By
 cert-manager is required by:
 - **KServe**: For webhook TLS certificates
 ## Upgrade
 ```bash
 # Upgrade cert-manager to a new version
 just cert-manager::upgrade
 ```
 ## Uninstall
 ```bash
 # Remove cert-manager
 just cert-manager::uninstall
 ```
 This will:
 - Uninstall cert-manager Helm release
 - Delete cert-manager CRDs
 - Delete namespace
 **Warning**: Uninstalling will remove all Certificate, Issuer, and ClusterIssuer resources.
 ## Troubleshooting
 ### Check Controller Logs
 ```bash
 kubectl logs -n cert-manager -l app=cert-manager
 ```
 ### Check Webhook Logs
 ```bash
 kubectl logs -n cert-manager -l app=webhook
 ```
 ### Verify CRDs
 ```bash
 kubectl get crd | grep cert-manager.io
 ```
 ### Check Certificate Status
 ```bash
 kubectl get certificate -A
 kubectl describe certificate <name> -n <namespace>
 ```
 Common issues:
 - **Certificate not ready**: Check issuer configuration and logs
 - **Webhook errors**: Ensure cert-manager webhook is running and healthy
 - **DNS validation failures**: For ACME issuers, ensure DNS records are correct
 ## References
 - [cert-manager Documentation](https://cert-manager.io/docs/)
 - [cert-manager GitHub](https://github.com/cert-manager/cert-manager)
 - [Helm Chart Configuration](https://artifacthub.io/packages/helm/cert-manager/cert-manager)
 - [Supported Issuers](https://cert-manager.io/docs/configuration/)
--- a/cert-manager/justfile
+++ b/cert-manager/justfile
@@ -0,0 +1,91 @@
 set fallback := true
 export CERT_MANAGER_NAMESPACE := env("CERT_MANAGER_NAMESPACE", "cert-manager")
 export CERT_MANAGER_CHART_VERSION := env("CERT_MANAGER_CHART_VERSION", "v1.19.1")
 [private]
 default:
    @just --list --unsorted --list-submodules
 # Create namespace
 create-namespace:
    @kubectl get namespace ${CERT_MANAGER_NAMESPACE} &>/dev/null || \
        kubectl create namespace ${CERT_MANAGER_NAMESPACE}
 # Delete namespace
 delete-namespace:
    @kubectl delete namespace ${CERT_MANAGER_NAMESPACE} --ignore-not-found
 # Install cert-manager
 install:
    #!/bin/bash
    set -euo pipefail
    echo "Installing cert-manager..."
    just create-namespace
    echo "Installing cert-manager from OCI registry..."
    helm upgrade --cleanup-on-fail --install cert-manager \
        oci://quay.io/jetstack/charts/cert-manager --version ${CERT_MANAGER_CHART_VERSION} \
        -n ${CERT_MANAGER_NAMESPACE} --set crds.enabled=true --wait --timeout=5m
    echo "Waiting for cert-manager webhook to be ready..."
    kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=webhook \
        -n ${CERT_MANAGER_NAMESPACE} --timeout=300s
    echo "Verifying cert-manager webhook is functional..."
    sleep 10
    echo ""
    echo "=== cert-manager installed ==="
    echo "Namespace: ${CERT_MANAGER_NAMESPACE}"
    echo "Version: ${CERT_MANAGER_CHART_VERSION}"
    echo ""
    echo "cert-manager provides TLS certificate management for Kubernetes"
 # Upgrade cert-manager
 upgrade:
    #!/bin/bash
    set -euo pipefail
    echo "Upgrading cert-manager..."
    echo "Upgrading cert-manager from OCI registry..."
    helm upgrade cert-manager oci://quay.io/jetstack/charts/cert-manager \
        --version ${CERT_MANAGER_CHART_VERSION} -n ${CERT_MANAGER_NAMESPACE} \
        --set crds.enabled=true --wait --timeout=5m
    echo "cert-manager upgraded successfully"
 # Uninstall cert-manager
 uninstall:
    #!/bin/bash
    set -euo pipefail
    echo "Uninstalling cert-manager..."
    helm uninstall cert-manager -n ${CERT_MANAGER_NAMESPACE} --ignore-not-found --wait
    echo "Deleting cert-manager CRDs..."
    kubectl delete crd \
        certificates.cert-manager.io \
        certificaterequests.cert-manager.io \
        challenges.acme.cert-manager.io \
        clusterissuers.cert-manager.io \
        issuers.cert-manager.io \
        orders.acme.cert-manager.io \
        --ignore-not-found
    just delete-namespace
    echo "cert-manager uninstalled"
 # Get status of cert-manager components
 status:
    #!/bin/bash
    set -euo pipefail
    echo "=== cert-manager Components Status ==="
    echo ""
    echo "Namespace: ${CERT_MANAGER_NAMESPACE}"
    echo ""
    echo "Pods:"
    kubectl get pods -n ${CERT_MANAGER_NAMESPACE}
    echo ""
    echo "Services:"
    kubectl get services -n ${CERT_MANAGER_NAMESPACE}
    echo ""
    echo "CRDs:"
    kubectl get crd | grep cert-manager.io
--- a/examples/kserve-mlflow-iris/01-train-and-register.ipynb
+++ b/examples/kserve-mlflow-iris/01-train-and-register.ipynb
@@ -0,0 +1,202 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Iris Classification with MLflow and KServe\n",
    "\n",
    "This notebook demonstrates:\n",
    "1. Training a simple scikit-learn model on the Iris dataset\n",
    "2. Logging the model to MLflow\n",
    "3. Preparing the model for deployment with KServe"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Setup and Install Dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Install required packages\n",
    "!pip install mlflow scikit-learn boto3 -q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import mlflow\n",
    "import mlflow.sklearn\n",
    "from sklearn.datasets import load_iris\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import accuracy_score, classification_report"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": "## 2. Configure MLflow\n\nSet MLflow tracking URI and authentication credentials."
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": "# MLflow configuration\nMLFLOW_TRACKING_URI = os.getenv('MLFLOW_TRACKING_URI', 'http://mlflow.mlflow.svc.cluster.local')\nmlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n\n# MLflow authentication\nos.environ['MLFLOW_TRACKING_USERNAME'] = os.getenv('MLFLOW_TRACKING_USERNAME', '')\nos.environ['MLFLOW_TRACKING_PASSWORD'] = os.getenv('MLFLOW_TRACKING_PASSWORD', '')\n\nprint(f\"MLflow Tracking URI: {MLFLOW_TRACKING_URI}\")\nprint(f\"MLflow Username: {os.environ['MLFLOW_TRACKING_USERNAME']}\")"
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Load and Prepare Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load Iris dataset\n",
    "iris = load_iris()\n",
    "X = iris.data\n",
    "y = iris.target\n",
    "\n",
    "# Split data\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "print(f\"Training samples: {len(X_train)}\")\n",
    "print(f\"Test samples: {len(X_test)}\")\n",
    "print(f\"Classes: {iris.target_names}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Train Model with MLflow Tracking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set experiment\n",
    "experiment_name = \"iris-classification\"\n",
    "mlflow.set_experiment(experiment_name)\n",
    "\n",
    "# Start MLflow run\n",
    "with mlflow.start_run(run_name=\"logistic-regression\") as run:\n",
    "    # Train model\n",
    "    model = LogisticRegression(max_iter=200, random_state=42)\n",
    "    model.fit(X_train, y_train)\n",
    "    \n",
    "    # Predictions\n",
    "    y_pred = model.predict(X_test)\n",
    "    accuracy = accuracy_score(y_test, y_pred)\n",
    "    \n",
    "    # Log parameters\n",
    "    mlflow.log_param(\"model_type\", \"LogisticRegression\")\n",
    "    mlflow.log_param(\"max_iter\", 200)\n",
    "    \n",
    "    # Log metrics\n",
    "    mlflow.log_metric(\"accuracy\", accuracy)\n",
    "    \n",
    "    # Log model\n",
    "    mlflow.sklearn.log_model(\n",
    "        model, \n",
    "        \"model\",\n",
    "        registered_model_name=\"iris-classifier\"\n",
    "    )\n",
    "    \n",
    "    print(f\"\\nRun ID: {run.info.run_id}\")\n",
    "    print(f\"Accuracy: {accuracy:.4f}\")\n",
    "    print(f\"\\nClassification Report:\")\n",
    "    print(classification_report(y_test, y_pred, target_names=iris.target_names))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Get Model Information for KServe Deployment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": "# Get the latest version of the registered model\nclient = mlflow.tracking.MlflowClient()\nmodel_name = \"iris-classifier\"\n\n# Use search_model_versions instead of deprecated get_latest_versions\nmodel_versions = client.search_model_versions(f\"name='{model_name}'\")\nlatest_version = max(model_versions, key=lambda x: int(x.version))\n\nprint(f\"\\n=== Model Information for KServe ===\")\nprint(f\"Model Name: {model_name}\")\nprint(f\"Version: {latest_version.version}\")\nprint(f\"Run ID: {latest_version.run_id}\")\nprint(f\"\\nArtifact URI: {latest_version.source}\")\nprint(f\"\\nUse this information to configure the KServe InferenceService.\")"
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Test Local Prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test with a sample\n",
    "sample_input = [[5.1, 3.5, 1.4, 0.2]]  # Should predict 'setosa'\n",
    "prediction = model.predict(sample_input)\n",
    "predicted_class = iris.target_names[prediction[0]]\n",
    "\n",
    "print(f\"\\nTest Input: {sample_input[0]}\")\n",
    "print(f\"Predicted Class: {predicted_class}\")\n",
    "print(f\"\\nThis sample will be used to test the KServe deployment.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Next Steps\n",
    "\n",
    "1. Note the Model Name and Version from above\n",
    "2. Deploy the model using KServe with the InferenceService YAML\n",
    "3. Test the deployed model endpoint"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/examples/kserve-mlflow-iris/02-deploy-model.yaml
+++ b/examples/kserve-mlflow-iris/02-deploy-model.yaml
@@ -0,0 +1,44 @@
 apiVersion: serving.kserve.io/v1beta1
 kind: InferenceService
 metadata:
  name: iris-classifier
  namespace: kserve
  annotations:
    serving.kserve.io/secretName: kserve-s3-credentials
 spec:
  predictor:
    model:
      modelFormat:
        name: mlflow
        version: "2"
      storageUri: s3://mlflow/EXPERIMENT_ID/models/MODEL_ID/artifacts
      resources:
        requests:
          cpu: "100m"
          memory: "512Mi"
        limits:
          cpu: "1000m"
          memory: "1Gi"
 ---
 # Alternative: Using SKLearn Server (does not install requirements.txt)
 # apiVersion: serving.kserve.io/v1beta1
 # kind: InferenceService
 # metadata:
 #   name: iris-classifier
 #   namespace: kserve
 #   annotations:
 #     serving.kserve.io/secretName: kserve-s3-credentials
 # spec:
 #   predictor:
 #     model:
 #       modelFormat:
 #         name: sklearn
 #         version: "1"
 #       storageUri: s3://mlflow/EXPERIMENT_ID/models/MODEL_ID/artifacts
 #       resources:
 #         requests:
 #           cpu: "100m"
 #           memory: "256Mi"
 #         limits:
 #           cpu: "500m"
 #           memory: "512Mi"
--- a/examples/kserve-mlflow-iris/03-test-inference.ipynb
+++ b/examples/kserve-mlflow-iris/03-test-inference.ipynb
@@ -0,0 +1,313 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test Iris Classifier InferenceService\n",
    "\n",
    "This notebook demonstrates how to call the deployed KServe InferenceService from JupyterHub."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Install requests if not already installed\n",
    "!pip install requests -q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "import os"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Configure Endpoint\n",
    "\n",
    "The InferenceService is accessible via the cluster-internal service URL."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": "# KServe InferenceService endpoint\n# Format: http://<service-name>-predictor.<namespace>.svc.cluster.local/v2/models/<model-name>/infer\nINFERENCE_SERVICE_NAME = \"iris-classifier\"\nNAMESPACE = \"kserve\"\nENDPOINT = f\"http://{INFERENCE_SERVICE_NAME}-predictor.{NAMESPACE}.svc.cluster.local/v2/models/{INFERENCE_SERVICE_NAME}/infer\"\n\nprint(f\"Inference Endpoint: {ENDPOINT}\")"
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Define Test Samples\n",
    "\n",
    "Iris dataset has 4 features:\n",
    "1. Sepal length (cm)\n",
    "2. Sepal width (cm)\n",
    "3. Petal length (cm)\n",
    "4. Petal width (cm)\n",
    "\n",
    "Classes:\n",
    "- 0: Iris Setosa\n",
    "- 1: Iris Versicolor\n",
    "- 2: Iris Virginica"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define class names\n",
    "CLASS_NAMES = [\"Iris Setosa\", \"Iris Versicolor\", \"Iris Virginica\"]\n",
    "\n",
    "# Test samples with expected predictions\n",
    "test_cases = [\n",
    "    {\n",
    "        \"name\": \"Typical Setosa\",\n",
    "        \"features\": [5.1, 3.5, 1.4, 0.2],\n",
    "        \"expected_class\": 0,\n",
    "        \"description\": \"Short petals, typical of Setosa\"\n",
    "    },\n",
    "    {\n",
    "        \"name\": \"Typical Virginica\",\n",
    "        \"features\": [6.7, 3.0, 5.2, 2.3],\n",
    "        \"expected_class\": 2,\n",
    "        \"description\": \"Long petals and sepals, typical of Virginica\"\n",
    "    },\n",
    "    {\n",
    "        \"name\": \"Typical Versicolor\",\n",
    "        \"features\": [5.9, 3.0, 4.2, 1.5],\n",
    "        \"expected_class\": 1,\n",
    "        \"description\": \"Medium-sized features, typical of Versicolor\"\n",
    "    },\n",
    "]\n",
    "\n",
    "print(f\"Prepared {len(test_cases)} test cases\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Test Single Prediction\n",
    "\n",
    "Send a single prediction request to the InferenceService."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": "def predict(features):\n    \"\"\"\n    Send prediction request to KServe InferenceService using v2 protocol.\n    \n    Args:\n        features: List of feature values [sepal_length, sepal_width, petal_length, petal_width]\n    \n    Returns:\n        Predicted class (0, 1, or 2)\n    \"\"\"\n    payload = {\n        \"inputs\": [\n            {\n                \"name\": \"input-0\",\n                \"shape\": [1, 4],\n                \"datatype\": \"FP64\",\n                \"data\": [features]\n            }\n        ]\n    }\n    \n    try:\n        response = requests.post(ENDPOINT, json=payload, timeout=10)\n        response.raise_for_status()\n        result = response.json()\n        return result['outputs'][0]['data'][0]\n    except requests.exceptions.RequestException as e:\n        print(f\"Error: {e}\")\n        if hasattr(e, 'response') and hasattr(e.response, 'text'):\n            print(f\"Response: {e.response.text}\")\n        return None\n\n# Test with first sample\nsample = test_cases[0]\nprint(f\"Testing: {sample['name']}\")\nprint(f\"Features: {sample['features']}\")\nprint(f\"Description: {sample['description']}\")\nprint()\n\nprediction = predict(sample['features'])\nif prediction is not None:\n    print(f\"Predicted Class: {prediction} ({CLASS_NAMES[prediction]})\")\n    print(f\"Expected Class: {sample['expected_class']} ({CLASS_NAMES[sample['expected_class']]})\")\n    print(f\"Result: {'✓ PASS' if prediction == sample['expected_class'] else '✗ FAIL'}\")"
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Test All Cases\n",
    "\n",
    "Run predictions for all test cases and display results."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"=\" * 80)\n",
    "print(\"Testing Iris Classifier InferenceService\")\n",
    "print(\"=\" * 80)\n",
    "print()\n",
    "\n",
    "results = []\n",
    "\n",
    "for i, test_case in enumerate(test_cases, 1):\n",
    "    print(f\"Test Case {i}: {test_case['name']}\")\n",
    "    print(f\"  Features: {test_case['features']}\")\n",
    "    print(f\"  Description: {test_case['description']}\")\n",
    "    print(f\"  Expected: {CLASS_NAMES[test_case['expected_class']]}\")\n",
    "    \n",
    "    prediction = predict(test_case['features'])\n",
    "    \n",
    "    if prediction is not None:\n",
    "        predicted_class_name = CLASS_NAMES[prediction]\n",
    "        is_correct = prediction == test_case['expected_class']\n",
    "        status = \"✓ PASS\" if is_correct else \"✗ FAIL\"\n",
    "        \n",
    "        print(f\"  Predicted: {predicted_class_name}\")\n",
    "        print(f\"  Status: {status}\")\n",
    "        \n",
    "        results.append({\n",
    "            'name': test_case['name'],\n",
    "            'expected': test_case['expected_class'],\n",
    "            'predicted': prediction,\n",
    "            'correct': is_correct\n",
    "        })\n",
    "    else:\n",
    "        print(f\"  Status: ✗ ERROR\")\n",
    "        results.append({\n",
    "            'name': test_case['name'],\n",
    "            'expected': test_case['expected_class'],\n",
    "            'predicted': None,\n",
    "            'correct': False\n",
    "        })\n",
    "    \n",
    "    print()\n",
    "\n",
    "# Summary\n",
    "print(\"=\" * 80)\n",
    "passed = sum(1 for r in results if r['correct'])\n",
    "total = len(results)\n",
    "print(f\"Test Summary: {passed}/{total} passed\")\n",
    "print(\"=\" * 80)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Batch Prediction\n",
    "\n",
    "Send multiple samples in a single request for batch prediction."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": "def predict_batch(features_list):\n    \"\"\"\n    Send batch prediction request to KServe InferenceService using v2 protocol.\n    \n    Args:\n        features_list: List of feature arrays\n    \n    Returns:\n        List of predicted classes\n    \"\"\"\n    payload = {\n        \"inputs\": [\n            {\n                \"name\": \"input-0\",\n                \"shape\": [len(features_list), 4],\n                \"datatype\": \"FP64\",\n                \"data\": features_list\n            }\n        ]\n    }\n    \n    try:\n        response = requests.post(ENDPOINT, json=payload, timeout=10)\n        response.raise_for_status()\n        result = response.json()\n        return result['outputs'][0]['data']\n    except requests.exceptions.RequestException as e:\n        print(f\"Error: {e}\")\n        if hasattr(e, 'response') and hasattr(e.response, 'text'):\n            print(f\"Response: {e.response.text}\")\n        return None\n\n# Prepare batch request\nbatch_features = [tc['features'] for tc in test_cases]\nprint(f\"Sending batch request with {len(batch_features)} samples...\")\nprint()\n\n# Send batch request\npredictions = predict_batch(batch_features)\n\nif predictions:\n    print(\"Batch Prediction Results:\")\n    print(\"-\" * 60)\n    for i, (test_case, prediction) in enumerate(zip(test_cases, predictions), 1):\n        print(f\"{i}. {test_case['name']}\")\n        print(f\"   Predicted: {CLASS_NAMES[prediction]}\")\n        print(f\"   Expected: {CLASS_NAMES[test_case['expected_class']]}\")\n        status = \"✓\" if prediction == test_case['expected_class'] else \"✗\"\n        print(f\"   {status}\")\n        print()"
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Custom Prediction\n",
    "\n",
    "Try your own input values!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Enter your own values here\n",
    "# Format: [sepal_length, sepal_width, petal_length, petal_width]\n",
    "custom_features = [6.0, 3.0, 4.0, 1.5]\n",
    "\n",
    "print(f\"Custom Input: {custom_features}\")\n",
    "print(f\"  Sepal Length: {custom_features[0]} cm\")\n",
    "print(f\"  Sepal Width: {custom_features[1]} cm\")\n",
    "print(f\"  Petal Length: {custom_features[2]} cm\")\n",
    "print(f\"  Petal Width: {custom_features[3]} cm\")\n",
    "print()\n",
    "\n",
    "prediction = predict(custom_features)\n",
    "if prediction is not None:\n",
    "    print(f\"Prediction: {CLASS_NAMES[prediction]} (class {prediction})\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Check InferenceService Status\n",
    "\n",
    "Verify the InferenceService is running properly."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check if we can reach the endpoint\n",
    "import subprocess\n",
    "\n",
    "print(\"Checking InferenceService status...\")\n",
    "print()\n",
    "\n",
    "# Using kubectl from the notebook\n",
    "try:\n",
    "    result = subprocess.run(\n",
    "        [\"kubectl\", \"get\", \"inferenceservice\", INFERENCE_SERVICE_NAME, \"-n\", NAMESPACE],\n",
    "        capture_output=True,\n",
    "        text=True,\n",
    "        timeout=10\n",
    "    )\n",
    "    print(result.stdout)\n",
    "    if result.returncode != 0:\n",
    "        print(result.stderr)\n",
    "except Exception as e:\n",
    "    print(f\"Could not check status: {e}\")\n",
    "    print(\"This is normal if kubectl is not available in the notebook environment.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Summary\n",
    "\n",
    "You have successfully:\n",
    "1. ✅ Connected to the KServe InferenceService\n",
    "2. ✅ Sent single prediction requests\n",
    "3. ✅ Sent batch prediction requests\n",
    "4. ✅ Verified predictions against expected results\n",
    "\n",
    "## Next Steps\n",
    "\n",
    "- Try running the Kubernetes Job-based tests (see `04-test-inference-job.yaml`)\n",
    "- Deploy a new version of the model and compare predictions\n",
    "- Implement A/B testing with multiple model versions\n",
    "- Add monitoring and logging"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/examples/kserve-mlflow-iris/04-test-inference-job.yaml
+++ b/examples/kserve-mlflow-iris/04-test-inference-job.yaml
@@ -0,0 +1,146 @@
 apiVersion: batch/v1
 kind: Job
 metadata:
  name: test-iris-inference
  namespace: kserve
 spec:
  template:
    spec:
      containers:
      - name: test
        image: python:3.9-slim
        command:
        - /bin/sh
        - -c
        - |
          cat << 'EOF' | python
          import urllib.request
          import json
          # KServe endpoint (v2 protocol)
          url = "http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer"
          # Iris class names
          class_names = ["setosa", "versicolor", "virginica"]
          # Test samples with expected results
          test_cases = [
              {"data": [5.1, 3.5, 1.4, 0.2], "expected": "setosa"},
              {"data": [6.7, 3.0, 5.2, 2.3], "expected": "virginica"},
              {"data": [5.9, 3.0, 4.2, 1.5], "expected": "versicolor"},
          ]
          print("=" * 60)
          print("Testing Iris Classifier InferenceService")
          print("=" * 60)
          print(f"Endpoint: {url}")
          print()
          for i, test_case in enumerate(test_cases, 1):
              print(f"Test Case {i}:")
              print(f"  Input: {test_case['data']}")
              print(f"  Expected: {test_case['expected']}")
              # v2 protocol payload
              payload = {
                  "inputs": [
                      {
                          "name": "input-0",
                          "shape": [1, 4],
                          "datatype": "FP64",
                          "data": [test_case['data']]
                      }
                  ]
              }
              try:
                  req = urllib.request.Request(
                      url,
                      data=json.dumps(payload).encode('utf-8'),
                      headers={'Content-Type': 'application/json'}
                  )
                  with urllib.request.urlopen(req) as response:
                      result = json.loads(response.read().decode('utf-8'))
                      prediction = result['outputs'][0]['data'][0]
                      predicted_class = class_names[prediction]
                      status = "✓ PASS" if predicted_class == test_case['expected'] else "✗ FAIL"
                      print(f"  Predicted: {predicted_class} (class {prediction})")
                      print(f"  Status: {status}")
              except Exception as e:
                  print(f"  Error: {e}")
              print()
          print("=" * 60)
          print("Test completed")
          print("=" * 60)
          EOF
      restartPolicy: Never
  backoffLimit: 1
 ---
 # Alternative: curl-based quick test
 # apiVersion: batch/v1
 # kind: Job
 # metadata:
 #   name: test-iris-inference-curl
 #   namespace: kserve
 # spec:
 #   template:
 #     spec:
 #       containers:
 #       - name: test
 #         image: curlimages/curl:latest
 #         command:
 #         - /bin/sh
 #         - -c
 #         - |
 #           echo "Testing Iris Classifier Inference Service..."
 #           echo ""
 #           echo "Endpoint: http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer"
 #           echo ""
 #           echo "Sending test request with sample data: [5.1, 3.5, 1.4, 0.2]"
 #           echo "Expected prediction: class 0 (setosa)"
 #           echo ""
 #
 #           curl -v -X POST \
 #             http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer \
 #             -H "Content-Type: application/json" \
 #             -d '{
 #               "inputs": [
 #                 {
 #                   "name": "input-0",
 #                   "shape": [1, 4],
 #                   "datatype": "FP64",
 #                   "data": [[5.1, 3.5, 1.4, 0.2]]
 #                 }
 #               ]
 #             }'
 #
 #           echo ""
 #           echo ""
 #           echo "Testing with multiple samples..."
 #           echo ""
 #
 #           curl -X POST \
 #             http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer \
 #             -H "Content-Type: application/json" \
 #             -d '{
 #               "inputs": [
 #                 {
 #                   "name": "input-0",
 #                   "shape": [3, 4],
 #                   "datatype": "FP64",
 #                   "data": [
 #                     [5.1, 3.5, 1.4, 0.2],
 #                     [6.7, 3.0, 5.2, 2.3],
 #                     [5.9, 3.0, 4.2, 1.5]
 #                   ]
 #                 }
 #               ]
 #             }'
 #
 #           echo ""
 #       restartPolicy: Never
 #   backoffLimit: 1
--- a/examples/kserve-mlflow-iris/README.md
+++ b/examples/kserve-mlflow-iris/README.md
@@ -0,0 +1,315 @@
 # KServe + MLflow + JupyterHub: Iris Classification Example
 This example demonstrates an end-to-end machine learning workflow using:
 - **JupyterHub**: Interactive development, model training, and testing
 - **MLflow**: Model tracking and registry
 - **MinIO**: Artifact storage (S3-compatible)
 - **KServe**: Model serving and inference
 ## Workflow Overview
 1. **📓 Train & Register** (`01-train-and-register.ipynb`) - Train model in JupyterHub, register to MLflow
 2. **🚀 Deploy** (`02-deploy-model.yaml`) - Deploy model with KServe InferenceService
 3. **🧪 Test from Notebook** (`03-test-inference.ipynb`) - Test inference from JupyterHub (Recommended)
 4. **🔧 Test from Pod** (`04-test-inference-job.yaml`) - Automated testing from Kubernetes Job
 ## Architecture
 ```plain
 ┌─────────────┐     ┌─────────┐     ┌────────┐     ┌─────────────────┐
 │ JupyterHub  │────>│ MLflow  │────>│ MinIO  │<────│ KServe          │
 │             │     │         │     │  (S3)  │     │ InferenceService│
 │ 1. Train    │     │ Register│     │ Store  │     │ 2. Deploy       │
 │    Model    │     │         │     │ Model  │     │    & Serve      │
 └──────┬──────┘     └─────────┘     └────────┘     └──────────┬──────┘
       │                                                      │
       │ 3. Test from Notebook (Recommended)                  │
       └──────────────────────────────────────────────────────┘
                                                              │
                                                              │
                                          4. Test from Pod    │
                                             (Alternative)    │
                                                              v
                                                       ┌──────────────┐
                                                       │ Kubernetes   │
                                                       │ Test Job     │
                                                       └──────────────┘
 ```
 ## Prerequisites
 Ensure the following components are installed:
 ```bash
 # Check installations
 kubectl get pods -n jupyterhub
 kubectl get pods -n mlflow
 kubectl get pods -n minio
 kubectl get pods -n kserve
 ```
 ## Step 1: Train and Register Model in JupyterHub
 1. **Access JupyterHub**:
   Access JupyterHub at the configured JUPYTERHUB_HOST
 2. **Upload the Notebook**:
   - Upload `01-train-and-register.ipynb` to your JupyterHub workspace
 3. **Set Environment Variables** (in the notebook or terminal):
   ```bash
   # MLflow authentication (required if MLflow has authentication enabled)
   export MLFLOW_TRACKING_USERNAME=your-username
   export MLFLOW_TRACKING_PASSWORD=your-password
   ```
   Note: MLFLOW_TRACKING_URI uses the default cluster-internal URL and does not need to be set.
 4. **Run the Notebook**:
   - Execute all cells in `01-train-and-register.ipynb`
   - The model will be automatically registered to MLflow Model Registry
 5. **Verify in MLflow UI**:
   - Access MLflow UI at the configured MLFLOW_HOST
   - Navigate to "Models" → "iris-classifier"
   - Click on the model version (e.g., "Version 1")
   - Note the **artifact_path** displayed (e.g., `mlflow-artifacts:/2/models/m-28620b840353444385fa8e62335decf5/artifacts`)
 ## Step 2: Deploy Model with KServe
 1. **Get the Model Registry Path**:
   In MLflow UI, navigate to:
   - **Models** → **iris-classifier** → **Version 1**
   - Copy the **artifact_path** from the model details
   - Example: `mlflow-artifacts:/2/models/m-28620b840353444385fa8e62335decf5/artifacts`
   **Important**: Use the artifact_path from the **Model Registry** (contains `/models/`), NOT the run-based path from the experiment runs.
 2. **Update the InferenceService YAML**:
   Use the helper command to convert the MLflow artifact path to KServe storageUri:
   ```bash
   just kserve::storage-uri "mlflow-artifacts:/2/models/m-28620b840353444385fa8e62335decf5/artifacts"
   # Output: s3://mlflow/2/models/m-28620b840353444385fa8e62335decf5/artifacts
   ```
   Edit `02-deploy-model.yaml` and replace the `storageUri` with the output:
   ```yaml
   storageUri: s3://mlflow/2/models/m-28620b840353444385fa8e62335decf5/artifacts
   ```
   **Note**: The default configuration uses `mlflow` format, which automatically installs dependencies from `requirements.txt`. This ensures compatibility but may take longer to start (initial container startup installs packages).
 3. **Deploy the InferenceService**:
   ```bash
   kubectl apply -f 02-deploy-model.yaml
   ```
 4. **Verify Deployment**:
   ```bash
   # Check InferenceService status
   kubectl get inferenceservice iris-classifier -n kserve
   # Wait for it to be ready (STATUS should show "Ready")
   # Note: First deployment may take 5-10 minutes due to dependency installation
   kubectl wait --for=condition=Ready inferenceservice/iris-classifier -n kserve --timeout=600s
   # Check the pods
   kubectl get pods -l serving.kserve.io/inferenceservice=iris-classifier -n kserve
   # Check logs if needed
   kubectl logs -l serving.kserve.io/inferenceservice=iris-classifier -n kserve -c kserve-container
   ```
 ## Step 3: Test from JupyterHub (Recommended)
 1. **Upload the Test Notebook**:
   - Upload `03-test-inference.ipynb` to your JupyterHub workspace
 2. **Run the Notebook**:
   - Execute all cells in `03-test-inference.ipynb`
   - The notebook will:
     - Send prediction requests to the KServe endpoint
     - Test single and batch predictions
     - Display results with expected vs actual comparisons
     - Allow you to try custom inputs
 3. **Expected Results**:
   ```plain
   Test Case 1: Typical Setosa
     Features: [5.1, 3.5, 1.4, 0.2]
     Expected: Iris Setosa
     Predicted: Iris Setosa
     Status: ✓ PASS
   ```
 ## Step 4: Test from Kubernetes Pod (Alternative)
 After testing in JupyterHub, you can also test from Kubernetes Pods for automated testing or CI/CD integration.
 ### Option 1: Automated Test with Python (Recommended)
 ```bash
 # Run the test job
 kubectl apply -f 04-test-inference-job.yaml
 # Check logs
 kubectl logs job/test-iris-inference -n kserve
 # Expected output:
 # Test Case 1:
 #   Input: [5.1, 3.5, 1.4, 0.2]
 #   Expected: setosa
 #   Predicted: setosa (class 0)
 #   Status: ✓ PASS
 ```
 ### Option 2: Manual Test from a Pod
 ```bash
 # Start a test pod
 kubectl run test-pod --image=curlimages/curl --rm -it --restart=Never -- sh
 # Inside the pod, run:
 curl -X POST \
  http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer \
  -H "Content-Type: application/json" \
  -d '{"inputs": [{"name": "input-0", "shape": [1, 4], "datatype": "FP64", "data": [[5.1, 3.5, 1.4, 0.2]]}]}'
 ```
 ## Model Prediction Examples
 ### Single Prediction (v2 Protocol)
 ```json
 // Request
 {
  "inputs": [
    {
      "name": "input-0",
      "shape": [1, 4],
      "datatype": "FP64",
      "data": [[5.1, 3.5, 1.4, 0.2]]  // Sepal length, Sepal width, Petal length, Petal width
    }
  ]
 }
 // Response
 {
  "outputs": [
    {
      "name": "output-0",
      "shape": [1],
      "datatype": "INT64",
      "data": [0]  // 0=setosa, 1=versicolor, 2=virginica
    }
  ]
 }
 ```
 ### Batch Prediction (v2 Protocol)
 ```json
 // Request
 {
  "inputs": [
    {
      "name": "input-0",
      "shape": [3, 4],
      "datatype": "FP64",
      "data": [
        [5.1, 3.5, 1.4, 0.2],  // Setosa
        [6.7, 3.0, 5.2, 2.3],  // Virginica
        [5.9, 3.0, 4.2, 1.5]   // Versicolor
      ]
    }
  ]
 }
 // Response
 {
  "outputs": [
    {
      "name": "output-0",
      "shape": [3],
      "datatype": "INT64",
      "data": [0, 2, 1]
    }
  ]
 }
 ```
 ## Troubleshooting
 ### InferenceService Not Ready
 ```bash
 # Check events
 kubectl describe inferenceservice iris-classifier -n kserve
 # Check pod logs
 kubectl logs -l serving.kserve.io/inferenceservice=iris-classifier -n kserve -c kserve-container
 ```
 ### S3/MinIO Connection Issues
 ```bash
 # Verify S3 credentials secret
 kubectl get secret kserve-s3-credentials -n kserve -o yaml
 # Test MinIO access from a pod
 kubectl run minio-test --image=amazon/aws-cli --rm -it --restart=Never -- \
  sh -c "AWS_ACCESS_KEY_ID=minioadmin AWS_SECRET_ACCESS_KEY=minioadmin aws --endpoint-url=http://minio.minio.svc.cluster.local:9000 s3 ls s3://mlflow/"
 ```
 ### Model Not Found
 ```bash
 # Verify the model exists in MinIO Console
 # Access MinIO Console at the configured MINIO_HOST
 # Navigate to mlflow bucket and verify the model path
 # The path should be: EXPERIMENT_ID/models/MODEL_ID/artifacts/
 # Example: 2/models/m-28620b840353444385fa8e62335decf5/artifacts/
 ```
 ### Prediction Errors
 ```bash
 # Check model format and KServe runtime compatibility
 kubectl logs -l serving.kserve.io/inferenceservice=iris-classifier -n kserve
 ```
 ## Cleanup
 ```bash
 # Delete InferenceService
 kubectl delete inferenceservice iris-classifier -n kserve
 # Delete test job
 kubectl delete job test-iris-inference -n kserve
 ```
 ## Next Steps
 - Try different models (XGBoost, TensorFlow, PyTorch)
 - Add model versioning and A/B testing
 - Implement canary deployments
 - Add monitoring and observability
 - Scale the InferenceService based on load
 ## References
 - [KServe Documentation](https://kserve.github.io/website/)
 - [MLflow Documentation](https://mlflow.org/docs/latest/index.html)
 - [KServe Model Serving](https://kserve.github.io/website/latest/modelserving/v1beta1/sklearn/v2/)
--- a/2
+++ b/2
@@ -7,6 +7,7 @@ default:
    @just --list --unsorted --list-submodules
 mod airflow
 mod cert-manager
 mod ch-ui
 mod clickhouse
 mod dagster
@@ -17,6 +18,7 @@ mod goldilocks
 mod keycloak
 mod jupyterhub
 mod k8s
 mod kserve
 mod lakekeeper
 mod longhorn
 mod metabase
--- a/kserve/.gitignore
+++ b/kserve/.gitignore
@@ -0,0 +1 @@
 values.yaml
--- a/kserve/README.md
+++ b/kserve/README.md
@@ -0,0 +1,300 @@
 # KServe
 KServe is a standard Model Inference Platform on Kubernetes for Machine Learning and Generative AI. It provides a standardized way to deploy, serve, and manage ML models across different frameworks.
 ## Features
 - **Multi-Framework Support**: TensorFlow, PyTorch, scikit-learn, XGBoost, Hugging Face, Triton, and more
 - **Deployment Modes**:
    - **RawDeployment (Standard)**: Uses native Kubernetes Deployments without Knative
    - **Serverless (Knative)**: Auto-scaling with scale-to-zero capability
 - **Model Storage**: Support for S3, GCS, Azure Blob, PVC, and more
 - **Inference Protocols**: REST and gRPC
 - **Advanced Features**: Canary deployments, traffic splitting, explainability, outlier detection
 ## Prerequisites
 - Kubernetes cluster (installed via `just k8s::install`)
 - Longhorn storage (installed via `just longhorn::install`)
 - **cert-manager** (required, installed via `just cert-manager::install`)
 - MinIO (optional, for S3-compatible model storage via `just minio::install`)
 - Prometheus (optional, for monitoring via `just prometheus::install`)
 ## Installation
 ### Basic Installation
 ```bash
 # Install cert-manager (required)
 just cert-manager::install
 # Install KServe with default settings (RawDeployment mode)
 just kserve::install
 ```
 During installation, you will be prompted for:
 - **Prometheus Monitoring**: Whether to enable ServiceMonitor (if Prometheus is installed)
 The domain for inference endpoints is configured via the `KSERVE_DOMAIN` environment variable (default: `cluster.local`).
 ### Environment Variables
 Key environment variables (set via `.env.local` or environment):
 ```bash
 KSERVE_NAMESPACE=kserve                    # Namespace for KServe
 KSERVE_CHART_VERSION=v0.15.0               # KServe Helm chart version
 KSERVE_DEPLOYMENT_MODE=RawDeployment       # Deployment mode (RawDeployment or Knative)
 KSERVE_DOMAIN=cluster.local                # Base domain for inference endpoints
 MONITORING_ENABLED=true                    # Enable Prometheus monitoring
 MINIO_NAMESPACE=minio                      # MinIO namespace (if using MinIO)
 ```
 ### Domain Configuration
 KServe uses the `KSERVE_DOMAIN` to construct URLs for inference endpoints.
 **Internal Access Only (Default):**
 ```bash
 KSERVE_DOMAIN=cluster.local
 ```
 - InferenceServices are accessible only within the cluster
 - URLs: `http://<service-name>.<namespace>.svc.cluster.local`
 - No external Ingress configuration needed
 - Recommended for development and testing
 **External Access:**
 ```bash
 KSERVE_DOMAIN=example.com
 ```
 - InferenceServices are accessible from outside the cluster
 - URLs: `https://<service-name>.<namespace>.example.com`
 - Requires Traefik Ingress configuration
 - DNS records must point to your cluster
 - Recommended for production deployments
 ## Usage
 ### Check Status
 ```bash
 # View status of KServe components
 just kserve::status
 # View controller logs
 just kserve::logs
 ```
 ### Deploy a Model
 Create an `InferenceService` resource:
 ```yaml
 apiVersion: serving.kserve.io/v1beta1
 kind: InferenceService
 metadata:
    name: sklearn-iris
    namespace: default
 spec:
    predictor:
        sklearn:
            storageUri: s3://models/sklearn/iris
 ```
 Apply the resource:
 ```bash
 kubectl apply -f inferenceservice.yaml
 ```
 ### Access Inference Endpoint
 ```bash
 # Get inference service URL
 kubectl get inferenceservice sklearn-iris
 ```
 **For cluster.local (internal access):**
 ```bash
 # From within the cluster
 curl -X POST http://sklearn-iris.default.svc.cluster.local/v1/models/sklearn-iris:predict \
    -H "Content-Type: application/json" \
    -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}'
 ```
 **For external domain:**
 ```bash
 # From anywhere (requires DNS and Ingress configuration)
 curl -X POST https://sklearn-iris.default.example.com/v1/models/sklearn-iris:predict \
    -H "Content-Type: application/json" \
    -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}'
 ```
 ## Storage Configuration
 ### Using MinIO (S3-compatible)
 If MinIO is installed, KServe will automatically configure S3 credentials:
 ```bash
 # Storage secret is created automatically during installation
 kubectl get secret kserve-s3-credentials -n kserve
 ```
 **External Secrets Integration:**
 - When External Secrets Operator is available:
    - Credentials are retrieved directly from Vault at `minio/admin`
    - ExternalSecret resource syncs credentials to Kubernetes Secret
    - Secret includes KServe-specific annotations for S3 endpoint configuration
    - No duplicate storage needed - references existing MinIO credentials
 - When External Secrets Operator is not available:
    - Credentials are retrieved from MinIO Secret
    - Kubernetes Secret is created directly with annotations
    - Credentials are also backed up to Vault at `kserve/storage` if available
 Models can be stored in MinIO buckets:
 ```bash
 # Create a bucket for models
 just minio::create-bucket models
 # Upload model files to MinIO
 # Then reference in InferenceService: s3://models/path/to/model
 ```
 ### Using Other Storage
 KServe supports various storage backends:
 - **S3**: AWS S3 or compatible services
 - **GCS**: Google Cloud Storage
 - **Azure**: Azure Blob Storage
 - **PVC**: Kubernetes Persistent Volume Claims
 - **HTTP/HTTPS**: Direct URLs
 ## Supported Frameworks
 The following serving runtimes are enabled by default:
 - **scikit-learn**: sklearn models
 - **XGBoost**: XGBoost models
 - **MLServer**: Multi-framework server (sklearn, XGBoost, etc.)
 - **Triton**: NVIDIA Triton Inference Server
 - **TensorFlow**: TensorFlow models
 - **PyTorch**: PyTorch models via TorchServe
 - **Hugging Face**: Transformer models
 ## Advanced Configuration
 ### Custom Serving Runtimes
 You can create custom `ClusterServingRuntime` or `ServingRuntime` resources for specialized model servers.
 ### Prometheus Monitoring
 When monitoring is enabled, KServe controller metrics are exposed and scraped by Prometheus:
 ```bash
 # View metrics in Grafana
 # Metrics include: inference request rates, latencies, error rates
 ```
 ## Deployment Modes
 ### RawDeployment (Standard)
 - Uses standard Kubernetes Deployments, Services, and Ingress
 - No Knative dependency
 - Simpler setup, more control over resources
 - Manual scaling configuration required
 ### Serverless (Knative)
 - Requires Knative Serving installation
 - Auto-scaling with scale-to-zero
 - Advanced traffic management
 - Better resource utilization for sporadic workloads
 ## Examples
 ### Iris Classification with MLflow
 A complete end-to-end example demonstrating model serving with KServe:
 - Train an Iris classification model in JupyterHub
 - Register the model to MLflow Model Registry
 - Deploy the registered model with KServe InferenceService
 - Test inference using v2 protocol from JupyterHub notebooks and Kubernetes Jobs
 This example demonstrates:
 - Converting MLflow artifact paths to KServe storageUri
 - Using MLflow format runtime (with automatic dependency installation)
 - Testing with both single and batch predictions
 - Using v2 Open Inference Protocol
 See: [`examples/kserve-mlflow-iris`](../examples/kserve-mlflow-iris/README.md)
 ## Uninstallation
 ```bash
 # Remove KServe (keeps CRDs for safety)
 just kserve::uninstall
 ```
 This will:
 - Uninstall KServe resources Helm chart
 - Uninstall KServe CRDs
 - Delete storage secrets
 - Delete namespace
 **Warning**: Uninstalling will remove all InferenceService resources.
 ## Troubleshooting
 ### Check Controller Logs
 ```bash
 just kserve::logs
 ```
 ### View InferenceService Status
 ```bash
 kubectl get inferenceservice -A
 kubectl describe inferenceservice <name> -n <namespace>
 ```
 ### Check Predictor Pods
 ```bash
 kubectl get pods -l serving.kserve.io/inferenceservice=<name>
 kubectl logs <pod-name>
 ```
 ### Storage Issues
 If models fail to download:
 ```bash
 # Check storage initializer logs
 kubectl logs <pod-name> -c storage-initializer
 # Verify S3 credentials
 kubectl get secret kserve-s3-credentials -n kserve -o yaml
 ```
 ## References
 - [KServe Documentation](https://kserve.github.io/website/)
 - [KServe GitHub](https://github.com/kserve/kserve)
 - [KServe Examples](https://github.com/kserve/kserve/tree/master/docs/samples)
 - [Supported ML Frameworks](https://kserve.github.io/website/latest/modelserving/v1beta1/serving_runtime/)
--- a/kserve/justfile
+++ b/kserve/justfile
@@ -0,0 +1,264 @@
 set fallback := true
 export KSERVE_NAMESPACE := env("KSERVE_NAMESPACE", "kserve")
 export KSERVE_CHART_VERSION := env("KSERVE_CHART_VERSION", "v0.16.0")
 export KSERVE_DEPLOYMENT_MODE := env("KSERVE_DEPLOYMENT_MODE", "RawDeployment")
 export KSERVE_DOMAIN := env("KSERVE_DOMAIN", "cluster.local")
 export MONITORING_ENABLED := env("MONITORING_ENABLED", "")
 export PROMETHEUS_NAMESPACE := env("PROMETHEUS_NAMESPACE", "monitoring")
 export MINIO_NAMESPACE := env("MINIO_NAMESPACE", "minio")
 export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets")
 export K8S_VAULT_NAMESPACE := env("K8S_VAULT_NAMESPACE", "vault")
 [private]
 default:
    @just --list --unsorted --list-submodules
 # Create namespace
 create-namespace:
    @kubectl get namespace ${KSERVE_NAMESPACE} &>/dev/null || \
        kubectl create namespace ${KSERVE_NAMESPACE}
 # Delete namespace
 delete-namespace:
    @kubectl delete namespace ${KSERVE_NAMESPACE} --ignore-not-found
 # Install KServe CRDs
 install-crds:
    #!/bin/bash
    set -euo pipefail
    echo "Installing KServe CRDs..."
    helm upgrade --cleanup-on-fail --install kserve-crd oci://ghcr.io/kserve/charts/kserve-crd \
        --version ${KSERVE_CHART_VERSION} -n ${KSERVE_NAMESPACE} --create-namespace --wait
    echo "KServe CRDs installed successfully"
 # Uninstall KServe CRDs
 uninstall-crds:
    #!/bin/bash
    set -euo pipefail
    echo "Uninstalling KServe CRDs..."
    helm uninstall kserve-crd -n ${KSERVE_NAMESPACE} --ignore-not-found
    echo "KServe CRDs uninstalled"
 # Setup S3 storage secret for model storage
 setup-storage:
    #!/bin/bash
    set -euo pipefail
    echo "Setting up S3 storage secret for KServe..."
    just create-namespace
    if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then
        echo "External Secrets Operator detected. Creating ExternalSecret..."
        echo "Using MinIO credentials from Vault (minio/admin)..."
        kubectl delete secret kserve-s3-credentials -n ${KSERVE_NAMESPACE} --ignore-not-found
        kubectl delete externalsecret kserve-s3-external-secret -n ${KSERVE_NAMESPACE} --ignore-not-found
        gomplate -f storage-external-secret.gomplate.yaml | kubectl apply -f -
        echo "Waiting for ExternalSecret to sync..."
        kubectl wait --for=condition=Ready externalsecret/kserve-s3-external-secret \
            -n ${KSERVE_NAMESPACE} --timeout=60s
        echo "ExternalSecret synced successfully"
    else
        echo "External Secrets not available. Creating Kubernetes Secret directly..."
        if ! kubectl get secret minio -n ${MINIO_NAMESPACE} &>/dev/null; then
            echo "Error: MinIO root credentials not found"
            echo "Please install MinIO first with 'just minio::install'"
            exit 1
        fi
        accesskey=$(kubectl get secret minio -n ${MINIO_NAMESPACE} \
            -o jsonpath='{.data.rootUser}' | base64 --decode)
        secretkey=$(kubectl get secret minio -n ${MINIO_NAMESPACE} \
            -o jsonpath='{.data.rootPassword}' | base64 --decode)
        kubectl delete secret kserve-s3-credentials -n ${KSERVE_NAMESPACE} --ignore-not-found
        kubectl create secret generic kserve-s3-credentials -n ${KSERVE_NAMESPACE} \
            --from-literal=AWS_ACCESS_KEY_ID="${accesskey}" \
            --from-literal=AWS_SECRET_ACCESS_KEY="${secretkey}"
        kubectl annotate secret kserve-s3-credentials -n ${KSERVE_NAMESPACE} \
            serving.kserve.io/s3-endpoint="minio.${MINIO_NAMESPACE}.svc.cluster.local:9000" \
            serving.kserve.io/s3-usehttps="0" \
            serving.kserve.io/s3-region="us-east-1" \
            serving.kserve.io/s3-useanoncredential="false" \
            --overwrite
        echo "Kubernetes Secret created"
        if helm status vault -n ${K8S_VAULT_NAMESPACE} &>/dev/null; then
            just vault::put kserve/storage accesskey="${accesskey}" secretkey="${secretkey}"
            echo "Storage credentials also stored in Vault for backup"
        fi
    fi
    echo "S3 storage secret created successfully"
 # Delete storage secret
 delete-storage:
    @kubectl delete secret kserve-s3-credentials -n ${KSERVE_NAMESPACE} --ignore-not-found
    @kubectl delete externalsecret kserve-s3-external-secret -n ${KSERVE_NAMESPACE} --ignore-not-found
 # Install KServe
 install:
    #!/bin/bash
    set -euo pipefail
    echo "Installing KServe..."
    just create-namespace
    # Check cert-manager prerequisite
    if ! kubectl get namespace cert-manager &>/dev/null; then
        echo "Error: cert-manager is not installed"
        echo "Please install cert-manager first with 'just cert-manager::install'"
        exit 1
    fi
    echo "Waiting for cert-manager webhook to be ready..."
    kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=webhook \
        -n cert-manager --timeout=300s
    echo "cert-manager webhook is ready"
    if helm status kube-prometheus-stack -n ${PROMETHEUS_NAMESPACE} &>/dev/null; then
        if [ -z "${MONITORING_ENABLED}" ]; then
            if gum confirm "Enable Prometheus monitoring (ServiceMonitor)?"; then
                MONITORING_ENABLED="true"
            else
                MONITORING_ENABLED="false"
            fi
        fi
    else
        MONITORING_ENABLED="false"
    fi
    just install-crds
    if kubectl get service minio -n ${MINIO_NAMESPACE} &>/dev/null; then
        echo "MinIO detected. Setting up S3 storage..."
        just setup-storage
    else
        echo "MinIO not found. Skipping S3 storage setup."
        echo "Models will need to use other storage options."
    fi
    echo "Generating Helm values..."
    gomplate -f values.gomplate.yaml -o values.yaml
    echo "Installing KServe controller..."
    helm upgrade --cleanup-on-fail --install kserve \
        oci://ghcr.io/kserve/charts/kserve --version ${KSERVE_CHART_VERSION} \
        -n ${KSERVE_NAMESPACE} --wait --timeout=10m -f values.yaml
    if [ "${MONITORING_ENABLED}" = "true" ]; then
        echo "Enabling Prometheus monitoring for namespace ${KSERVE_NAMESPACE}..."
        kubectl label namespace ${KSERVE_NAMESPACE} buun.channel/enable-monitoring=true --overwrite
        echo "✓ Monitoring enabled"
    fi
    echo ""
    echo "=== KServe installed ==="
    echo "Namespace: ${KSERVE_NAMESPACE}"
    echo "Deployment mode: ${KSERVE_DEPLOYMENT_MODE}"
    echo "Domain: ${KSERVE_DOMAIN}"
    echo ""
    echo "To deploy an inference service, create an InferenceService resource"
    echo "See: https://kserve.github.io/website/latest/get_started/first_isvc/"
 # Upgrade KServe
 upgrade:
    #!/bin/bash
    set -euo pipefail
    echo "Upgrading KServe..."
    if helm status kube-prometheus-stack -n ${PROMETHEUS_NAMESPACE} &>/dev/null; then
        if [ -z "${MONITORING_ENABLED}" ]; then
            if gum confirm "Enable Prometheus monitoring (ServiceMonitor)?"; then
                MONITORING_ENABLED="true"
            else
                MONITORING_ENABLED="false"
            fi
        fi
    else
        MONITORING_ENABLED="false"
    fi
    echo "Upgrading KServe CRDs..."
    just install-crds
    echo "Generating Helm values..."
    gomplate -f values.gomplate.yaml -o values.yaml
    echo "Upgrading KServe controller..."
    helm upgrade kserve oci://ghcr.io/kserve/charts/kserve \
        --version ${KSERVE_CHART_VERSION} -n ${KSERVE_NAMESPACE} --wait --timeout=10m \
        -f values.yaml
    echo "KServe upgraded successfully"
 # Uninstall KServe
 uninstall:
    #!/bin/bash
    set -euo pipefail
    echo "Uninstalling KServe..."
    helm uninstall kserve -n ${KSERVE_NAMESPACE} --ignore-not-found
    just uninstall-crds
    just delete-storage
    just delete-namespace
    echo "KServe uninstalled"
 # Get KServe controller logs
 logs:
    @kubectl logs -n ${KSERVE_NAMESPACE} -l control-plane=kserve-controller-manager --tail=100 -f
 # Get status of KServe components
 status:
    #!/bin/bash
    set -euo pipefail
    echo "=== KServe Components Status ==="
    echo ""
    echo "Namespace: ${KSERVE_NAMESPACE}"
    echo ""
    echo "Pods:"
    kubectl get pods -n ${KSERVE_NAMESPACE}
    echo ""
    echo "Services:"
    kubectl get services -n ${KSERVE_NAMESPACE}
    echo ""
    echo "InferenceServices:"
    kubectl get inferenceservices -A
 # Convert MLflow artifact path to KServe storageUri
 storage-uri artifact_path='':
    #!/bin/bash
    set -euo pipefail
    if [ -z "{{ artifact_path }}" ]; then
        read -p "Enter MLflow artifact path from Model Registry (e.g., mlflow-artifacts:/2/models/MODEL_ID/artifacts): " artifact_path
    else
        artifact_path="{{ artifact_path }}"
    fi
    # Convert mlflow-artifacts:/ to s3://mlflow/
    storage_uri="${artifact_path/mlflow-artifacts:/s3://mlflow}"
    # Remove trailing filename if present (e.g., MLmodel, model.pkl)
    if [[ "$storage_uri" == */artifacts/* ]] && [[ "$storage_uri" != */artifacts ]]; then
        # Remove filename after /artifacts/
        storage_uri=$(echo "$storage_uri" | sed 's|/artifacts/.*|/artifacts|')
    fi
    # Check if this is a run-based path (not model registry path)
    if [[ "$storage_uri" =~ s3://mlflow/[0-9]+/[a-f0-9]{32}/artifacts ]]; then
        echo "Warning: This appears to be a run-based path, not a model registry path."
        echo "KServe requires the model registry path which can be found in:"
        echo "  MLflow UI → Models → [Model Name] → [Version] → artifact_path"
        echo ""
        echo "Expected format: mlflow-artifacts:/EXPERIMENT_ID/models/MODEL_ID/artifacts"
        echo "Your input: $artifact_path"
        echo ""
        echo "Output (may not work): $storage_uri"
        exit 1
    fi
    echo "$storage_uri"
--- a/kserve/storage-external-secret.gomplate.yaml
+++ b/kserve/storage-external-secret.gomplate.yaml
@@ -0,0 +1,33 @@
 apiVersion: external-secrets.io/v1
 kind: ExternalSecret
 metadata:
    name: kserve-s3-external-secret
    namespace: {{ .Env.KSERVE_NAMESPACE }}
 spec:
    refreshInterval: 1h
    secretStoreRef:
        name: vault-secret-store
        kind: ClusterSecretStore
    target:
        name: kserve-s3-credentials
        creationPolicy: Owner
        template:
            type: Opaque
            metadata:
                annotations:
                    serving.kserve.io/s3-endpoint: "minio.{{ .Env.MINIO_NAMESPACE }}.svc.cluster.local:9000"
                    serving.kserve.io/s3-usehttps: "0"
                    serving.kserve.io/s3-region: "us-east-1"
                    serving.kserve.io/s3-useanoncredential: "false"
            data:
                AWS_ACCESS_KEY_ID: "{{ `{{ .accesskey }}` }}"
                AWS_SECRET_ACCESS_KEY: "{{ `{{ .secretkey }}` }}"
    data:
        - secretKey: accesskey
          remoteRef:
              key: minio/admin
              property: username
        - secretKey: secretkey
          remoteRef:
              key: minio/admin
              property: password
--- a/kserve/values.gomplate.yaml
+++ b/kserve/values.gomplate.yaml
@@ -0,0 +1,84 @@
 # KServe Helm Chart Values
 # Generated using gomplate
 kserve:
    version: v0.16.0
    controller:
        # Deployment mode: "Standard" for RawDeployment (no Knative), "Knative" for Serverless
        deploymentMode: {{ .Env.KSERVE_DEPLOYMENT_MODE }}
        gateway:
            domain: {{ .Env.KSERVE_DOMAIN }}
            {{- if eq .Env.KSERVE_DEPLOYMENT_MODE "Standard" }}
            ingressGateway:
                className: traefik
            {{- end }}
        # Enable Prometheus metrics
        {{- if eq .Env.MONITORING_ENABLED "true" }}
        metrics:
            port: 8080
        podAnnotations:
            prometheus.io/scrape: "true"
            prometheus.io/port: "8080"
            prometheus.io/path: "/metrics"
        {{- end }}
    # Storage initializer configuration
    storage:
        s3:
            enabled: true
            {{- if ne .Env.MINIO_NAMESPACE "" }}
            endpoint: "minio.{{ .Env.MINIO_NAMESPACE }}.svc.cluster.local:9000"
            useHttps: false
            region: "us-east-1"
            verifySSL: false
            useVirtualBucket: false
            useAnonymousCredential: false
            {{- end }}
        storageInitializer:
            resources:
                requests:
                    memory: "100Mi"
                    cpu: "100m"
                limits:
                    memory: "1Gi"
                    cpu: "1"
    # Model agent configuration
    agent:
        image: kserve/agent
        tag: v0.16.0
    # Router configuration
    router:
        image: kserve/router
        tag: v0.16.0
    # Serving runtimes - enable commonly used ones
    servingRuntimes:
        sklearn:
            enabled: true
        xgboost:
            enabled: true
        mlserver:
            enabled: true
        triton:
            enabled: true
        tensorflow:
            enabled: true
        pytorch:
            enabled: true
        huggingfaceserver:
            enabled: true
 {{- if eq .Env.MONITORING_ENABLED "true" }}
 # ServiceMonitor for Prometheus metrics collection
 serviceMonitor:
    enabled: true
    namespace: {{ .Env.KSERVE_NAMESPACE }}
    labels:
        release: kube-prometheus-stack
    interval: 30s
 {{- end }}
--- a/mlflow/README.md
+++ b/mlflow/README.md
@@ -586,6 +586,19 @@ docker run --rm localhost:30500/mlflow:3.6.0-oidc mlflow --version
 **Note**: All Docker commands run on the remote host specified by `DOCKER_HOST`.
 ## Examples
 ### Iris Classification with KServe
 A complete end-to-end example demonstrating the integration of JupyterHub, MLflow, and KServe:
 - Train an Iris classification model in JupyterHub
 - Register the model to MLflow Model Registry
 - Deploy the model with KServe InferenceService
 - Test inference from JupyterHub notebooks and Kubernetes Jobs
 See: [`examples/kserve-mlflow-iris`](../examples/kserve-mlflow-iris/README.md)
 ## Custom Image
 ### Dockerfile