From 2b0687330c8428d77107f608d3d642c025ab6666 Mon Sep 17 00:00:00 2001 From: Masaki Yatsu Date: Mon, 10 Nov 2025 21:31:35 +0900 Subject: [PATCH] feat(kserve): install KServe --- cert-manager/README.md | 166 +++++++++ cert-manager/justfile | 91 +++++ .../01-train-and-register.ipynb | 202 +++++++++++ .../kserve-mlflow-iris/02-deploy-model.yaml | 44 +++ .../03-test-inference.ipynb | 313 +++++++++++++++++ .../04-test-inference-job.yaml | 146 ++++++++ examples/kserve-mlflow-iris/README.md | 315 ++++++++++++++++++ justfile | 2 + kserve/.gitignore | 1 + kserve/README.md | 300 +++++++++++++++++ kserve/justfile | 264 +++++++++++++++ kserve/storage-external-secret.gomplate.yaml | 33 ++ kserve/values.gomplate.yaml | 84 +++++ mlflow/README.md | 13 + 14 files changed, 1974 insertions(+) create mode 100644 cert-manager/README.md create mode 100644 cert-manager/justfile create mode 100644 examples/kserve-mlflow-iris/01-train-and-register.ipynb create mode 100644 examples/kserve-mlflow-iris/02-deploy-model.yaml create mode 100644 examples/kserve-mlflow-iris/03-test-inference.ipynb create mode 100644 examples/kserve-mlflow-iris/04-test-inference-job.yaml create mode 100644 examples/kserve-mlflow-iris/README.md create mode 100644 kserve/.gitignore create mode 100644 kserve/README.md create mode 100644 kserve/justfile create mode 100644 kserve/storage-external-secret.gomplate.yaml create mode 100644 kserve/values.gomplate.yaml diff --git a/cert-manager/README.md b/cert-manager/README.md new file mode 100644 index 0000000..f55e58b --- /dev/null +++ b/cert-manager/README.md @@ -0,0 +1,166 @@ +# cert-manager Module + +cert-manager is a Kubernetes add-on that automates the management and issuance of TLS certificates from various sources. It provides a common API for certificate issuers and ensures certificates are valid and up to date. + +## Features + +- **Automatic Certificate Renewal**: Automatically renews certificates before they expire +- **Multiple Issuers**: Supports Let's Encrypt, HashiCorp Vault, Venafi, self-signed, and more +- **Kubernetes Native**: Uses Custom Resource Definitions (CRDs) for certificate management +- **Webhook Integration**: Provides admission webhooks for validating and mutating certificate resources + +## Prerequisites + +- Kubernetes cluster (installed via `just k8s::install`) +- kubectl configured with cluster admin permissions + +## Installation + +### Basic Installation + +```bash +# Install cert-manager with default settings +just cert-manager::install +``` + +### Environment Variables + +Key environment variables (set via `.env.local` or environment): + +```bash +CERT_MANAGER_NAMESPACE=cert-manager # Namespace for cert-manager +CERT_MANAGER_CHART_VERSION=v1.19.1 # cert-manager Helm chart version +``` + +## Usage + +### Check Status + +```bash +# View status of cert-manager components +just cert-manager::status +``` + +### Create a Self-Signed Issuer + +```yaml +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: selfsigned-issuer +spec: + selfSigned: {} +``` + +Apply the resource: + +```bash +kubectl apply -f issuer.yaml +``` + +### Create a Certificate + +```yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: example-cert + namespace: default +spec: + secretName: example-cert-tls + issuerRef: + name: selfsigned-issuer + kind: ClusterIssuer + dnsNames: + - example.com + - www.example.com +``` + +Apply the resource: + +```bash +kubectl apply -f certificate.yaml +``` + +### View Certificates + +```bash +# List all certificates +kubectl get certificates -A + +# Describe a specific certificate +kubectl describe certificate example-cert -n default +``` + +## Components + +cert-manager installs three main components: + +1. **cert-manager**: Main controller managing Certificate resources +2. **cert-manager-webhook**: Admission webhook for validating and mutating cert-manager resources +3. **cert-manager-cainjector**: Injects CA bundles into webhooks and API services + +## Used By + +cert-manager is required by: +- **KServe**: For webhook TLS certificates + +## Upgrade + +```bash +# Upgrade cert-manager to a new version +just cert-manager::upgrade +``` + +## Uninstall + +```bash +# Remove cert-manager +just cert-manager::uninstall +``` + +This will: +- Uninstall cert-manager Helm release +- Delete cert-manager CRDs +- Delete namespace + +**Warning**: Uninstalling will remove all Certificate, Issuer, and ClusterIssuer resources. + +## Troubleshooting + +### Check Controller Logs + +```bash +kubectl logs -n cert-manager -l app=cert-manager +``` + +### Check Webhook Logs + +```bash +kubectl logs -n cert-manager -l app=webhook +``` + +### Verify CRDs + +```bash +kubectl get crd | grep cert-manager.io +``` + +### Check Certificate Status + +```bash +kubectl get certificate -A +kubectl describe certificate -n +``` + +Common issues: +- **Certificate not ready**: Check issuer configuration and logs +- **Webhook errors**: Ensure cert-manager webhook is running and healthy +- **DNS validation failures**: For ACME issuers, ensure DNS records are correct + +## References + +- [cert-manager Documentation](https://cert-manager.io/docs/) +- [cert-manager GitHub](https://github.com/cert-manager/cert-manager) +- [Helm Chart Configuration](https://artifacthub.io/packages/helm/cert-manager/cert-manager) +- [Supported Issuers](https://cert-manager.io/docs/configuration/) diff --git a/cert-manager/justfile b/cert-manager/justfile new file mode 100644 index 0000000..7e7d042 --- /dev/null +++ b/cert-manager/justfile @@ -0,0 +1,91 @@ +set fallback := true + +export CERT_MANAGER_NAMESPACE := env("CERT_MANAGER_NAMESPACE", "cert-manager") +export CERT_MANAGER_CHART_VERSION := env("CERT_MANAGER_CHART_VERSION", "v1.19.1") + +[private] +default: + @just --list --unsorted --list-submodules + +# Create namespace +create-namespace: + @kubectl get namespace ${CERT_MANAGER_NAMESPACE} &>/dev/null || \ + kubectl create namespace ${CERT_MANAGER_NAMESPACE} + +# Delete namespace +delete-namespace: + @kubectl delete namespace ${CERT_MANAGER_NAMESPACE} --ignore-not-found + +# Install cert-manager +install: + #!/bin/bash + set -euo pipefail + echo "Installing cert-manager..." + just create-namespace + + echo "Installing cert-manager from OCI registry..." + helm upgrade --cleanup-on-fail --install cert-manager \ + oci://quay.io/jetstack/charts/cert-manager --version ${CERT_MANAGER_CHART_VERSION} \ + -n ${CERT_MANAGER_NAMESPACE} --set crds.enabled=true --wait --timeout=5m + + echo "Waiting for cert-manager webhook to be ready..." + kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=webhook \ + -n ${CERT_MANAGER_NAMESPACE} --timeout=300s + + echo "Verifying cert-manager webhook is functional..." + sleep 10 + + echo "" + echo "=== cert-manager installed ===" + echo "Namespace: ${CERT_MANAGER_NAMESPACE}" + echo "Version: ${CERT_MANAGER_CHART_VERSION}" + echo "" + echo "cert-manager provides TLS certificate management for Kubernetes" + +# Upgrade cert-manager +upgrade: + #!/bin/bash + set -euo pipefail + echo "Upgrading cert-manager..." + + echo "Upgrading cert-manager from OCI registry..." + helm upgrade cert-manager oci://quay.io/jetstack/charts/cert-manager \ + --version ${CERT_MANAGER_CHART_VERSION} -n ${CERT_MANAGER_NAMESPACE} \ + --set crds.enabled=true --wait --timeout=5m + + echo "cert-manager upgraded successfully" + +# Uninstall cert-manager +uninstall: + #!/bin/bash + set -euo pipefail + echo "Uninstalling cert-manager..." + helm uninstall cert-manager -n ${CERT_MANAGER_NAMESPACE} --ignore-not-found --wait + echo "Deleting cert-manager CRDs..." + kubectl delete crd \ + certificates.cert-manager.io \ + certificaterequests.cert-manager.io \ + challenges.acme.cert-manager.io \ + clusterissuers.cert-manager.io \ + issuers.cert-manager.io \ + orders.acme.cert-manager.io \ + --ignore-not-found + just delete-namespace + echo "cert-manager uninstalled" + +# Get status of cert-manager components +status: + #!/bin/bash + set -euo pipefail + echo "=== cert-manager Components Status ===" + echo "" + echo "Namespace: ${CERT_MANAGER_NAMESPACE}" + echo "" + echo "Pods:" + kubectl get pods -n ${CERT_MANAGER_NAMESPACE} + echo "" + echo "Services:" + kubectl get services -n ${CERT_MANAGER_NAMESPACE} + echo "" + echo "CRDs:" + kubectl get crd | grep cert-manager.io diff --git a/examples/kserve-mlflow-iris/01-train-and-register.ipynb b/examples/kserve-mlflow-iris/01-train-and-register.ipynb new file mode 100644 index 0000000..a150112 --- /dev/null +++ b/examples/kserve-mlflow-iris/01-train-and-register.ipynb @@ -0,0 +1,202 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Iris Classification with MLflow and KServe\n", + "\n", + "This notebook demonstrates:\n", + "1. Training a simple scikit-learn model on the Iris dataset\n", + "2. Logging the model to MLflow\n", + "3. Preparing the model for deployment with KServe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Setup and Install Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install required packages\n", + "!pip install mlflow scikit-learn boto3 -q" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import mlflow\n", + "import mlflow.sklearn\n", + "from sklearn.datasets import load_iris\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score, classification_report" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "## 2. Configure MLflow\n\nSet MLflow tracking URI and authentication credentials." + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "# MLflow configuration\nMLFLOW_TRACKING_URI = os.getenv('MLFLOW_TRACKING_URI', 'http://mlflow.mlflow.svc.cluster.local')\nmlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n\n# MLflow authentication\nos.environ['MLFLOW_TRACKING_USERNAME'] = os.getenv('MLFLOW_TRACKING_USERNAME', '')\nos.environ['MLFLOW_TRACKING_PASSWORD'] = os.getenv('MLFLOW_TRACKING_PASSWORD', '')\n\nprint(f\"MLflow Tracking URI: {MLFLOW_TRACKING_URI}\")\nprint(f\"MLflow Username: {os.environ['MLFLOW_TRACKING_USERNAME']}\")" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Load and Prepare Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load Iris dataset\n", + "iris = load_iris()\n", + "X = iris.data\n", + "y = iris.target\n", + "\n", + "# Split data\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "print(f\"Training samples: {len(X_train)}\")\n", + "print(f\"Test samples: {len(X_test)}\")\n", + "print(f\"Classes: {iris.target_names}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Train Model with MLflow Tracking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set experiment\n", + "experiment_name = \"iris-classification\"\n", + "mlflow.set_experiment(experiment_name)\n", + "\n", + "# Start MLflow run\n", + "with mlflow.start_run(run_name=\"logistic-regression\") as run:\n", + " # Train model\n", + " model = LogisticRegression(max_iter=200, random_state=42)\n", + " model.fit(X_train, y_train)\n", + " \n", + " # Predictions\n", + " y_pred = model.predict(X_test)\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " \n", + " # Log parameters\n", + " mlflow.log_param(\"model_type\", \"LogisticRegression\")\n", + " mlflow.log_param(\"max_iter\", 200)\n", + " \n", + " # Log metrics\n", + " mlflow.log_metric(\"accuracy\", accuracy)\n", + " \n", + " # Log model\n", + " mlflow.sklearn.log_model(\n", + " model, \n", + " \"model\",\n", + " registered_model_name=\"iris-classifier\"\n", + " )\n", + " \n", + " print(f\"\\nRun ID: {run.info.run_id}\")\n", + " print(f\"Accuracy: {accuracy:.4f}\")\n", + " print(f\"\\nClassification Report:\")\n", + " print(classification_report(y_test, y_pred, target_names=iris.target_names))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Get Model Information for KServe Deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "# Get the latest version of the registered model\nclient = mlflow.tracking.MlflowClient()\nmodel_name = \"iris-classifier\"\n\n# Use search_model_versions instead of deprecated get_latest_versions\nmodel_versions = client.search_model_versions(f\"name='{model_name}'\")\nlatest_version = max(model_versions, key=lambda x: int(x.version))\n\nprint(f\"\\n=== Model Information for KServe ===\")\nprint(f\"Model Name: {model_name}\")\nprint(f\"Version: {latest_version.version}\")\nprint(f\"Run ID: {latest_version.run_id}\")\nprint(f\"\\nArtifact URI: {latest_version.source}\")\nprint(f\"\\nUse this information to configure the KServe InferenceService.\")" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Test Local Prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test with a sample\n", + "sample_input = [[5.1, 3.5, 1.4, 0.2]] # Should predict 'setosa'\n", + "prediction = model.predict(sample_input)\n", + "predicted_class = iris.target_names[prediction[0]]\n", + "\n", + "print(f\"\\nTest Input: {sample_input[0]}\")\n", + "print(f\"Predicted Class: {predicted_class}\")\n", + "print(f\"\\nThis sample will be used to test the KServe deployment.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Next Steps\n", + "\n", + "1. Note the Model Name and Version from above\n", + "2. Deploy the model using KServe with the InferenceService YAML\n", + "3. Test the deployed model endpoint" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/examples/kserve-mlflow-iris/02-deploy-model.yaml b/examples/kserve-mlflow-iris/02-deploy-model.yaml new file mode 100644 index 0000000..b69ed25 --- /dev/null +++ b/examples/kserve-mlflow-iris/02-deploy-model.yaml @@ -0,0 +1,44 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: iris-classifier + namespace: kserve + annotations: + serving.kserve.io/secretName: kserve-s3-credentials +spec: + predictor: + model: + modelFormat: + name: mlflow + version: "2" + storageUri: s3://mlflow/EXPERIMENT_ID/models/MODEL_ID/artifacts + resources: + requests: + cpu: "100m" + memory: "512Mi" + limits: + cpu: "1000m" + memory: "1Gi" +--- +# Alternative: Using SKLearn Server (does not install requirements.txt) +# apiVersion: serving.kserve.io/v1beta1 +# kind: InferenceService +# metadata: +# name: iris-classifier +# namespace: kserve +# annotations: +# serving.kserve.io/secretName: kserve-s3-credentials +# spec: +# predictor: +# model: +# modelFormat: +# name: sklearn +# version: "1" +# storageUri: s3://mlflow/EXPERIMENT_ID/models/MODEL_ID/artifacts +# resources: +# requests: +# cpu: "100m" +# memory: "256Mi" +# limits: +# cpu: "500m" +# memory: "512Mi" diff --git a/examples/kserve-mlflow-iris/03-test-inference.ipynb b/examples/kserve-mlflow-iris/03-test-inference.ipynb new file mode 100644 index 0000000..0183367 --- /dev/null +++ b/examples/kserve-mlflow-iris/03-test-inference.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Iris Classifier InferenceService\n", + "\n", + "This notebook demonstrates how to call the deployed KServe InferenceService from JupyterHub." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install requests if not already installed\n", + "!pip install requests -q" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "import os" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Configure Endpoint\n", + "\n", + "The InferenceService is accessible via the cluster-internal service URL." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "# KServe InferenceService endpoint\n# Format: http://-predictor..svc.cluster.local/v2/models//infer\nINFERENCE_SERVICE_NAME = \"iris-classifier\"\nNAMESPACE = \"kserve\"\nENDPOINT = f\"http://{INFERENCE_SERVICE_NAME}-predictor.{NAMESPACE}.svc.cluster.local/v2/models/{INFERENCE_SERVICE_NAME}/infer\"\n\nprint(f\"Inference Endpoint: {ENDPOINT}\")" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Define Test Samples\n", + "\n", + "Iris dataset has 4 features:\n", + "1. Sepal length (cm)\n", + "2. Sepal width (cm)\n", + "3. Petal length (cm)\n", + "4. Petal width (cm)\n", + "\n", + "Classes:\n", + "- 0: Iris Setosa\n", + "- 1: Iris Versicolor\n", + "- 2: Iris Virginica" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define class names\n", + "CLASS_NAMES = [\"Iris Setosa\", \"Iris Versicolor\", \"Iris Virginica\"]\n", + "\n", + "# Test samples with expected predictions\n", + "test_cases = [\n", + " {\n", + " \"name\": \"Typical Setosa\",\n", + " \"features\": [5.1, 3.5, 1.4, 0.2],\n", + " \"expected_class\": 0,\n", + " \"description\": \"Short petals, typical of Setosa\"\n", + " },\n", + " {\n", + " \"name\": \"Typical Virginica\",\n", + " \"features\": [6.7, 3.0, 5.2, 2.3],\n", + " \"expected_class\": 2,\n", + " \"description\": \"Long petals and sepals, typical of Virginica\"\n", + " },\n", + " {\n", + " \"name\": \"Typical Versicolor\",\n", + " \"features\": [5.9, 3.0, 4.2, 1.5],\n", + " \"expected_class\": 1,\n", + " \"description\": \"Medium-sized features, typical of Versicolor\"\n", + " },\n", + "]\n", + "\n", + "print(f\"Prepared {len(test_cases)} test cases\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Test Single Prediction\n", + "\n", + "Send a single prediction request to the InferenceService." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "def predict(features):\n \"\"\"\n Send prediction request to KServe InferenceService using v2 protocol.\n \n Args:\n features: List of feature values [sepal_length, sepal_width, petal_length, petal_width]\n \n Returns:\n Predicted class (0, 1, or 2)\n \"\"\"\n payload = {\n \"inputs\": [\n {\n \"name\": \"input-0\",\n \"shape\": [1, 4],\n \"datatype\": \"FP64\",\n \"data\": [features]\n }\n ]\n }\n \n try:\n response = requests.post(ENDPOINT, json=payload, timeout=10)\n response.raise_for_status()\n result = response.json()\n return result['outputs'][0]['data'][0]\n except requests.exceptions.RequestException as e:\n print(f\"Error: {e}\")\n if hasattr(e, 'response') and hasattr(e.response, 'text'):\n print(f\"Response: {e.response.text}\")\n return None\n\n# Test with first sample\nsample = test_cases[0]\nprint(f\"Testing: {sample['name']}\")\nprint(f\"Features: {sample['features']}\")\nprint(f\"Description: {sample['description']}\")\nprint()\n\nprediction = predict(sample['features'])\nif prediction is not None:\n print(f\"Predicted Class: {prediction} ({CLASS_NAMES[prediction]})\")\n print(f\"Expected Class: {sample['expected_class']} ({CLASS_NAMES[sample['expected_class']]})\")\n print(f\"Result: {'✓ PASS' if prediction == sample['expected_class'] else '✗ FAIL'}\")" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Test All Cases\n", + "\n", + "Run predictions for all test cases and display results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=\" * 80)\n", + "print(\"Testing Iris Classifier InferenceService\")\n", + "print(\"=\" * 80)\n", + "print()\n", + "\n", + "results = []\n", + "\n", + "for i, test_case in enumerate(test_cases, 1):\n", + " print(f\"Test Case {i}: {test_case['name']}\")\n", + " print(f\" Features: {test_case['features']}\")\n", + " print(f\" Description: {test_case['description']}\")\n", + " print(f\" Expected: {CLASS_NAMES[test_case['expected_class']]}\")\n", + " \n", + " prediction = predict(test_case['features'])\n", + " \n", + " if prediction is not None:\n", + " predicted_class_name = CLASS_NAMES[prediction]\n", + " is_correct = prediction == test_case['expected_class']\n", + " status = \"✓ PASS\" if is_correct else \"✗ FAIL\"\n", + " \n", + " print(f\" Predicted: {predicted_class_name}\")\n", + " print(f\" Status: {status}\")\n", + " \n", + " results.append({\n", + " 'name': test_case['name'],\n", + " 'expected': test_case['expected_class'],\n", + " 'predicted': prediction,\n", + " 'correct': is_correct\n", + " })\n", + " else:\n", + " print(f\" Status: ✗ ERROR\")\n", + " results.append({\n", + " 'name': test_case['name'],\n", + " 'expected': test_case['expected_class'],\n", + " 'predicted': None,\n", + " 'correct': False\n", + " })\n", + " \n", + " print()\n", + "\n", + "# Summary\n", + "print(\"=\" * 80)\n", + "passed = sum(1 for r in results if r['correct'])\n", + "total = len(results)\n", + "print(f\"Test Summary: {passed}/{total} passed\")\n", + "print(\"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Batch Prediction\n", + "\n", + "Send multiple samples in a single request for batch prediction." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "def predict_batch(features_list):\n \"\"\"\n Send batch prediction request to KServe InferenceService using v2 protocol.\n \n Args:\n features_list: List of feature arrays\n \n Returns:\n List of predicted classes\n \"\"\"\n payload = {\n \"inputs\": [\n {\n \"name\": \"input-0\",\n \"shape\": [len(features_list), 4],\n \"datatype\": \"FP64\",\n \"data\": features_list\n }\n ]\n }\n \n try:\n response = requests.post(ENDPOINT, json=payload, timeout=10)\n response.raise_for_status()\n result = response.json()\n return result['outputs'][0]['data']\n except requests.exceptions.RequestException as e:\n print(f\"Error: {e}\")\n if hasattr(e, 'response') and hasattr(e.response, 'text'):\n print(f\"Response: {e.response.text}\")\n return None\n\n# Prepare batch request\nbatch_features = [tc['features'] for tc in test_cases]\nprint(f\"Sending batch request with {len(batch_features)} samples...\")\nprint()\n\n# Send batch request\npredictions = predict_batch(batch_features)\n\nif predictions:\n print(\"Batch Prediction Results:\")\n print(\"-\" * 60)\n for i, (test_case, prediction) in enumerate(zip(test_cases, predictions), 1):\n print(f\"{i}. {test_case['name']}\")\n print(f\" Predicted: {CLASS_NAMES[prediction]}\")\n print(f\" Expected: {CLASS_NAMES[test_case['expected_class']]}\")\n status = \"✓\" if prediction == test_case['expected_class'] else \"✗\"\n print(f\" {status}\")\n print()" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Custom Prediction\n", + "\n", + "Try your own input values!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Enter your own values here\n", + "# Format: [sepal_length, sepal_width, petal_length, petal_width]\n", + "custom_features = [6.0, 3.0, 4.0, 1.5]\n", + "\n", + "print(f\"Custom Input: {custom_features}\")\n", + "print(f\" Sepal Length: {custom_features[0]} cm\")\n", + "print(f\" Sepal Width: {custom_features[1]} cm\")\n", + "print(f\" Petal Length: {custom_features[2]} cm\")\n", + "print(f\" Petal Width: {custom_features[3]} cm\")\n", + "print()\n", + "\n", + "prediction = predict(custom_features)\n", + "if prediction is not None:\n", + " print(f\"Prediction: {CLASS_NAMES[prediction]} (class {prediction})\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Check InferenceService Status\n", + "\n", + "Verify the InferenceService is running properly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if we can reach the endpoint\n", + "import subprocess\n", + "\n", + "print(\"Checking InferenceService status...\")\n", + "print()\n", + "\n", + "# Using kubectl from the notebook\n", + "try:\n", + " result = subprocess.run(\n", + " [\"kubectl\", \"get\", \"inferenceservice\", INFERENCE_SERVICE_NAME, \"-n\", NAMESPACE],\n", + " capture_output=True,\n", + " text=True,\n", + " timeout=10\n", + " )\n", + " print(result.stdout)\n", + " if result.returncode != 0:\n", + " print(result.stderr)\n", + "except Exception as e:\n", + " print(f\"Could not check status: {e}\")\n", + " print(\"This is normal if kubectl is not available in the notebook environment.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "You have successfully:\n", + "1. ✅ Connected to the KServe InferenceService\n", + "2. ✅ Sent single prediction requests\n", + "3. ✅ Sent batch prediction requests\n", + "4. ✅ Verified predictions against expected results\n", + "\n", + "## Next Steps\n", + "\n", + "- Try running the Kubernetes Job-based tests (see `04-test-inference-job.yaml`)\n", + "- Deploy a new version of the model and compare predictions\n", + "- Implement A/B testing with multiple model versions\n", + "- Add monitoring and logging" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/examples/kserve-mlflow-iris/04-test-inference-job.yaml b/examples/kserve-mlflow-iris/04-test-inference-job.yaml new file mode 100644 index 0000000..a23ac5d --- /dev/null +++ b/examples/kserve-mlflow-iris/04-test-inference-job.yaml @@ -0,0 +1,146 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: test-iris-inference + namespace: kserve +spec: + template: + spec: + containers: + - name: test + image: python:3.9-slim + command: + - /bin/sh + - -c + - | + cat << 'EOF' | python + import urllib.request + import json + + # KServe endpoint (v2 protocol) + url = "http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer" + + # Iris class names + class_names = ["setosa", "versicolor", "virginica"] + + # Test samples with expected results + test_cases = [ + {"data": [5.1, 3.5, 1.4, 0.2], "expected": "setosa"}, + {"data": [6.7, 3.0, 5.2, 2.3], "expected": "virginica"}, + {"data": [5.9, 3.0, 4.2, 1.5], "expected": "versicolor"}, + ] + + print("=" * 60) + print("Testing Iris Classifier InferenceService") + print("=" * 60) + print(f"Endpoint: {url}") + print() + + for i, test_case in enumerate(test_cases, 1): + print(f"Test Case {i}:") + print(f" Input: {test_case['data']}") + print(f" Expected: {test_case['expected']}") + + # v2 protocol payload + payload = { + "inputs": [ + { + "name": "input-0", + "shape": [1, 4], + "datatype": "FP64", + "data": [test_case['data']] + } + ] + } + + try: + req = urllib.request.Request( + url, + data=json.dumps(payload).encode('utf-8'), + headers={'Content-Type': 'application/json'} + ) + with urllib.request.urlopen(req) as response: + result = json.loads(response.read().decode('utf-8')) + prediction = result['outputs'][0]['data'][0] + predicted_class = class_names[prediction] + + status = "✓ PASS" if predicted_class == test_case['expected'] else "✗ FAIL" + print(f" Predicted: {predicted_class} (class {prediction})") + print(f" Status: {status}") + + except Exception as e: + print(f" Error: {e}") + + print() + + print("=" * 60) + print("Test completed") + print("=" * 60) + EOF + restartPolicy: Never + backoffLimit: 1 +--- +# Alternative: curl-based quick test +# apiVersion: batch/v1 +# kind: Job +# metadata: +# name: test-iris-inference-curl +# namespace: kserve +# spec: +# template: +# spec: +# containers: +# - name: test +# image: curlimages/curl:latest +# command: +# - /bin/sh +# - -c +# - | +# echo "Testing Iris Classifier Inference Service..." +# echo "" +# echo "Endpoint: http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer" +# echo "" +# echo "Sending test request with sample data: [5.1, 3.5, 1.4, 0.2]" +# echo "Expected prediction: class 0 (setosa)" +# echo "" +# +# curl -v -X POST \ +# http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer \ +# -H "Content-Type: application/json" \ +# -d '{ +# "inputs": [ +# { +# "name": "input-0", +# "shape": [1, 4], +# "datatype": "FP64", +# "data": [[5.1, 3.5, 1.4, 0.2]] +# } +# ] +# }' +# +# echo "" +# echo "" +# echo "Testing with multiple samples..." +# echo "" +# +# curl -X POST \ +# http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer \ +# -H "Content-Type: application/json" \ +# -d '{ +# "inputs": [ +# { +# "name": "input-0", +# "shape": [3, 4], +# "datatype": "FP64", +# "data": [ +# [5.1, 3.5, 1.4, 0.2], +# [6.7, 3.0, 5.2, 2.3], +# [5.9, 3.0, 4.2, 1.5] +# ] +# } +# ] +# }' +# +# echo "" +# restartPolicy: Never +# backoffLimit: 1 diff --git a/examples/kserve-mlflow-iris/README.md b/examples/kserve-mlflow-iris/README.md new file mode 100644 index 0000000..0dbc021 --- /dev/null +++ b/examples/kserve-mlflow-iris/README.md @@ -0,0 +1,315 @@ +# KServe + MLflow + JupyterHub: Iris Classification Example + +This example demonstrates an end-to-end machine learning workflow using: + +- **JupyterHub**: Interactive development, model training, and testing +- **MLflow**: Model tracking and registry +- **MinIO**: Artifact storage (S3-compatible) +- **KServe**: Model serving and inference + +## Workflow Overview + +1. **📓 Train & Register** (`01-train-and-register.ipynb`) - Train model in JupyterHub, register to MLflow +2. **🚀 Deploy** (`02-deploy-model.yaml`) - Deploy model with KServe InferenceService +3. **🧪 Test from Notebook** (`03-test-inference.ipynb`) - Test inference from JupyterHub (Recommended) +4. **🔧 Test from Pod** (`04-test-inference-job.yaml`) - Automated testing from Kubernetes Job + +## Architecture + +```plain +┌─────────────┐ ┌─────────┐ ┌────────┐ ┌─────────────────┐ +│ JupyterHub │────>│ MLflow │────>│ MinIO │<────│ KServe │ +│ │ │ │ │ (S3) │ │ InferenceService│ +│ 1. Train │ │ Register│ │ Store │ │ 2. Deploy │ +│ Model │ │ │ │ Model │ │ & Serve │ +└──────┬──────┘ └─────────┘ └────────┘ └──────────┬──────┘ + │ │ + │ 3. Test from Notebook (Recommended) │ + └──────────────────────────────────────────────────────┘ + │ + │ + 4. Test from Pod │ + (Alternative) │ + v + ┌──────────────┐ + │ Kubernetes │ + │ Test Job │ + └──────────────┘ +``` + +## Prerequisites + +Ensure the following components are installed: + +```bash +# Check installations +kubectl get pods -n jupyterhub +kubectl get pods -n mlflow +kubectl get pods -n minio +kubectl get pods -n kserve +``` + +## Step 1: Train and Register Model in JupyterHub + +1. **Access JupyterHub**: + + Access JupyterHub at the configured JUPYTERHUB_HOST + +2. **Upload the Notebook**: + - Upload `01-train-and-register.ipynb` to your JupyterHub workspace + +3. **Set Environment Variables** (in the notebook or terminal): + + ```bash + # MLflow authentication (required if MLflow has authentication enabled) + export MLFLOW_TRACKING_USERNAME=your-username + export MLFLOW_TRACKING_PASSWORD=your-password + ``` + + Note: MLFLOW_TRACKING_URI uses the default cluster-internal URL and does not need to be set. + +4. **Run the Notebook**: + - Execute all cells in `01-train-and-register.ipynb` + - The model will be automatically registered to MLflow Model Registry + +5. **Verify in MLflow UI**: + + - Access MLflow UI at the configured MLFLOW_HOST + - Navigate to "Models" → "iris-classifier" + - Click on the model version (e.g., "Version 1") + - Note the **artifact_path** displayed (e.g., `mlflow-artifacts:/2/models/m-28620b840353444385fa8e62335decf5/artifacts`) + +## Step 2: Deploy Model with KServe + +1. **Get the Model Registry Path**: + + In MLflow UI, navigate to: + - **Models** → **iris-classifier** → **Version 1** + - Copy the **artifact_path** from the model details + - Example: `mlflow-artifacts:/2/models/m-28620b840353444385fa8e62335decf5/artifacts` + + **Important**: Use the artifact_path from the **Model Registry** (contains `/models/`), NOT the run-based path from the experiment runs. + +2. **Update the InferenceService YAML**: + + Use the helper command to convert the MLflow artifact path to KServe storageUri: + + ```bash + just kserve::storage-uri "mlflow-artifacts:/2/models/m-28620b840353444385fa8e62335decf5/artifacts" + # Output: s3://mlflow/2/models/m-28620b840353444385fa8e62335decf5/artifacts + ``` + + Edit `02-deploy-model.yaml` and replace the `storageUri` with the output: + + ```yaml + storageUri: s3://mlflow/2/models/m-28620b840353444385fa8e62335decf5/artifacts + ``` + + **Note**: The default configuration uses `mlflow` format, which automatically installs dependencies from `requirements.txt`. This ensures compatibility but may take longer to start (initial container startup installs packages). + +3. **Deploy the InferenceService**: + + ```bash + kubectl apply -f 02-deploy-model.yaml + ``` + +4. **Verify Deployment**: + + ```bash + # Check InferenceService status + kubectl get inferenceservice iris-classifier -n kserve + + # Wait for it to be ready (STATUS should show "Ready") + # Note: First deployment may take 5-10 minutes due to dependency installation + kubectl wait --for=condition=Ready inferenceservice/iris-classifier -n kserve --timeout=600s + + # Check the pods + kubectl get pods -l serving.kserve.io/inferenceservice=iris-classifier -n kserve + + # Check logs if needed + kubectl logs -l serving.kserve.io/inferenceservice=iris-classifier -n kserve -c kserve-container + ``` + +## Step 3: Test from JupyterHub (Recommended) + +1. **Upload the Test Notebook**: + - Upload `03-test-inference.ipynb` to your JupyterHub workspace + +2. **Run the Notebook**: + - Execute all cells in `03-test-inference.ipynb` + - The notebook will: + - Send prediction requests to the KServe endpoint + - Test single and batch predictions + - Display results with expected vs actual comparisons + - Allow you to try custom inputs + +3. **Expected Results**: + + ```plain + Test Case 1: Typical Setosa + Features: [5.1, 3.5, 1.4, 0.2] + Expected: Iris Setosa + Predicted: Iris Setosa + Status: ✓ PASS + ``` + +## Step 4: Test from Kubernetes Pod (Alternative) + +After testing in JupyterHub, you can also test from Kubernetes Pods for automated testing or CI/CD integration. + +### Option 1: Automated Test with Python (Recommended) + +```bash +# Run the test job +kubectl apply -f 04-test-inference-job.yaml + +# Check logs +kubectl logs job/test-iris-inference -n kserve + +# Expected output: +# Test Case 1: +# Input: [5.1, 3.5, 1.4, 0.2] +# Expected: setosa +# Predicted: setosa (class 0) +# Status: ✓ PASS +``` + +### Option 2: Manual Test from a Pod + +```bash +# Start a test pod +kubectl run test-pod --image=curlimages/curl --rm -it --restart=Never -- sh + +# Inside the pod, run: +curl -X POST \ + http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer \ + -H "Content-Type: application/json" \ + -d '{"inputs": [{"name": "input-0", "shape": [1, 4], "datatype": "FP64", "data": [[5.1, 3.5, 1.4, 0.2]]}]}' +``` + +## Model Prediction Examples + +### Single Prediction (v2 Protocol) + +```json +// Request +{ + "inputs": [ + { + "name": "input-0", + "shape": [1, 4], + "datatype": "FP64", + "data": [[5.1, 3.5, 1.4, 0.2]] // Sepal length, Sepal width, Petal length, Petal width + } + ] +} + +// Response +{ + "outputs": [ + { + "name": "output-0", + "shape": [1], + "datatype": "INT64", + "data": [0] // 0=setosa, 1=versicolor, 2=virginica + } + ] +} +``` + +### Batch Prediction (v2 Protocol) + +```json +// Request +{ + "inputs": [ + { + "name": "input-0", + "shape": [3, 4], + "datatype": "FP64", + "data": [ + [5.1, 3.5, 1.4, 0.2], // Setosa + [6.7, 3.0, 5.2, 2.3], // Virginica + [5.9, 3.0, 4.2, 1.5] // Versicolor + ] + } + ] +} + +// Response +{ + "outputs": [ + { + "name": "output-0", + "shape": [3], + "datatype": "INT64", + "data": [0, 2, 1] + } + ] +} +``` + +## Troubleshooting + +### InferenceService Not Ready + +```bash +# Check events +kubectl describe inferenceservice iris-classifier -n kserve + +# Check pod logs +kubectl logs -l serving.kserve.io/inferenceservice=iris-classifier -n kserve -c kserve-container +``` + +### S3/MinIO Connection Issues + +```bash +# Verify S3 credentials secret +kubectl get secret kserve-s3-credentials -n kserve -o yaml + +# Test MinIO access from a pod +kubectl run minio-test --image=amazon/aws-cli --rm -it --restart=Never -- \ + sh -c "AWS_ACCESS_KEY_ID=minioadmin AWS_SECRET_ACCESS_KEY=minioadmin aws --endpoint-url=http://minio.minio.svc.cluster.local:9000 s3 ls s3://mlflow/" +``` + +### Model Not Found + +```bash +# Verify the model exists in MinIO Console +# Access MinIO Console at the configured MINIO_HOST +# Navigate to mlflow bucket and verify the model path +# The path should be: EXPERIMENT_ID/models/MODEL_ID/artifacts/ + +# Example: 2/models/m-28620b840353444385fa8e62335decf5/artifacts/ +``` + +### Prediction Errors + +```bash +# Check model format and KServe runtime compatibility +kubectl logs -l serving.kserve.io/inferenceservice=iris-classifier -n kserve +``` + +## Cleanup + +```bash +# Delete InferenceService +kubectl delete inferenceservice iris-classifier -n kserve + +# Delete test job +kubectl delete job test-iris-inference -n kserve +``` + +## Next Steps + +- Try different models (XGBoost, TensorFlow, PyTorch) +- Add model versioning and A/B testing +- Implement canary deployments +- Add monitoring and observability +- Scale the InferenceService based on load + +## References + +- [KServe Documentation](https://kserve.github.io/website/) +- [MLflow Documentation](https://mlflow.org/docs/latest/index.html) +- [KServe Model Serving](https://kserve.github.io/website/latest/modelserving/v1beta1/sklearn/v2/) diff --git a/justfile b/justfile index 8e09ad2..4862610 100644 --- a/justfile +++ b/justfile @@ -7,6 +7,7 @@ default: @just --list --unsorted --list-submodules mod airflow +mod cert-manager mod ch-ui mod clickhouse mod dagster @@ -17,6 +18,7 @@ mod goldilocks mod keycloak mod jupyterhub mod k8s +mod kserve mod lakekeeper mod longhorn mod metabase diff --git a/kserve/.gitignore b/kserve/.gitignore new file mode 100644 index 0000000..7f47975 --- /dev/null +++ b/kserve/.gitignore @@ -0,0 +1 @@ +values.yaml diff --git a/kserve/README.md b/kserve/README.md new file mode 100644 index 0000000..2e68c95 --- /dev/null +++ b/kserve/README.md @@ -0,0 +1,300 @@ +# KServe + +KServe is a standard Model Inference Platform on Kubernetes for Machine Learning and Generative AI. It provides a standardized way to deploy, serve, and manage ML models across different frameworks. + +## Features + +- **Multi-Framework Support**: TensorFlow, PyTorch, scikit-learn, XGBoost, Hugging Face, Triton, and more +- **Deployment Modes**: + - **RawDeployment (Standard)**: Uses native Kubernetes Deployments without Knative + - **Serverless (Knative)**: Auto-scaling with scale-to-zero capability +- **Model Storage**: Support for S3, GCS, Azure Blob, PVC, and more +- **Inference Protocols**: REST and gRPC +- **Advanced Features**: Canary deployments, traffic splitting, explainability, outlier detection + +## Prerequisites + +- Kubernetes cluster (installed via `just k8s::install`) +- Longhorn storage (installed via `just longhorn::install`) +- **cert-manager** (required, installed via `just cert-manager::install`) +- MinIO (optional, for S3-compatible model storage via `just minio::install`) +- Prometheus (optional, for monitoring via `just prometheus::install`) + +## Installation + +### Basic Installation + +```bash +# Install cert-manager (required) +just cert-manager::install + +# Install KServe with default settings (RawDeployment mode) +just kserve::install +``` + +During installation, you will be prompted for: + +- **Prometheus Monitoring**: Whether to enable ServiceMonitor (if Prometheus is installed) + +The domain for inference endpoints is configured via the `KSERVE_DOMAIN` environment variable (default: `cluster.local`). + +### Environment Variables + +Key environment variables (set via `.env.local` or environment): + +```bash +KSERVE_NAMESPACE=kserve # Namespace for KServe +KSERVE_CHART_VERSION=v0.15.0 # KServe Helm chart version +KSERVE_DEPLOYMENT_MODE=RawDeployment # Deployment mode (RawDeployment or Knative) +KSERVE_DOMAIN=cluster.local # Base domain for inference endpoints +MONITORING_ENABLED=true # Enable Prometheus monitoring +MINIO_NAMESPACE=minio # MinIO namespace (if using MinIO) +``` + +### Domain Configuration + +KServe uses the `KSERVE_DOMAIN` to construct URLs for inference endpoints. + +**Internal Access Only (Default):** + +```bash +KSERVE_DOMAIN=cluster.local +``` + +- InferenceServices are accessible only within the cluster +- URLs: `http://..svc.cluster.local` +- No external Ingress configuration needed +- Recommended for development and testing + +**External Access:** + +```bash +KSERVE_DOMAIN=example.com +``` + +- InferenceServices are accessible from outside the cluster +- URLs: `https://..example.com` +- Requires Traefik Ingress configuration +- DNS records must point to your cluster +- Recommended for production deployments + +## Usage + +### Check Status + +```bash +# View status of KServe components +just kserve::status + +# View controller logs +just kserve::logs +``` + +### Deploy a Model + +Create an `InferenceService` resource: + +```yaml +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: sklearn-iris + namespace: default +spec: + predictor: + sklearn: + storageUri: s3://models/sklearn/iris +``` + +Apply the resource: + +```bash +kubectl apply -f inferenceservice.yaml +``` + +### Access Inference Endpoint + +```bash +# Get inference service URL +kubectl get inferenceservice sklearn-iris +``` + +**For cluster.local (internal access):** + +```bash +# From within the cluster +curl -X POST http://sklearn-iris.default.svc.cluster.local/v1/models/sklearn-iris:predict \ + -H "Content-Type: application/json" \ + -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}' +``` + +**For external domain:** + +```bash +# From anywhere (requires DNS and Ingress configuration) +curl -X POST https://sklearn-iris.default.example.com/v1/models/sklearn-iris:predict \ + -H "Content-Type: application/json" \ + -d '{"instances": [[6.8, 2.8, 4.8, 1.4]]}' +``` + +## Storage Configuration + +### Using MinIO (S3-compatible) + +If MinIO is installed, KServe will automatically configure S3 credentials: + +```bash +# Storage secret is created automatically during installation +kubectl get secret kserve-s3-credentials -n kserve +``` + +**External Secrets Integration:** + +- When External Secrets Operator is available: + - Credentials are retrieved directly from Vault at `minio/admin` + - ExternalSecret resource syncs credentials to Kubernetes Secret + - Secret includes KServe-specific annotations for S3 endpoint configuration + - No duplicate storage needed - references existing MinIO credentials +- When External Secrets Operator is not available: + - Credentials are retrieved from MinIO Secret + - Kubernetes Secret is created directly with annotations + - Credentials are also backed up to Vault at `kserve/storage` if available + +Models can be stored in MinIO buckets: + +```bash +# Create a bucket for models +just minio::create-bucket models + +# Upload model files to MinIO +# Then reference in InferenceService: s3://models/path/to/model +``` + +### Using Other Storage + +KServe supports various storage backends: + +- **S3**: AWS S3 or compatible services +- **GCS**: Google Cloud Storage +- **Azure**: Azure Blob Storage +- **PVC**: Kubernetes Persistent Volume Claims +- **HTTP/HTTPS**: Direct URLs + +## Supported Frameworks + +The following serving runtimes are enabled by default: + +- **scikit-learn**: sklearn models +- **XGBoost**: XGBoost models +- **MLServer**: Multi-framework server (sklearn, XGBoost, etc.) +- **Triton**: NVIDIA Triton Inference Server +- **TensorFlow**: TensorFlow models +- **PyTorch**: PyTorch models via TorchServe +- **Hugging Face**: Transformer models + +## Advanced Configuration + +### Custom Serving Runtimes + +You can create custom `ClusterServingRuntime` or `ServingRuntime` resources for specialized model servers. + +### Prometheus Monitoring + +When monitoring is enabled, KServe controller metrics are exposed and scraped by Prometheus: + +```bash +# View metrics in Grafana +# Metrics include: inference request rates, latencies, error rates +``` + +## Deployment Modes + +### RawDeployment (Standard) + +- Uses standard Kubernetes Deployments, Services, and Ingress +- No Knative dependency +- Simpler setup, more control over resources +- Manual scaling configuration required + +### Serverless (Knative) + +- Requires Knative Serving installation +- Auto-scaling with scale-to-zero +- Advanced traffic management +- Better resource utilization for sporadic workloads + +## Examples + +### Iris Classification with MLflow + +A complete end-to-end example demonstrating model serving with KServe: + +- Train an Iris classification model in JupyterHub +- Register the model to MLflow Model Registry +- Deploy the registered model with KServe InferenceService +- Test inference using v2 protocol from JupyterHub notebooks and Kubernetes Jobs + +This example demonstrates: +- Converting MLflow artifact paths to KServe storageUri +- Using MLflow format runtime (with automatic dependency installation) +- Testing with both single and batch predictions +- Using v2 Open Inference Protocol + +See: [`examples/kserve-mlflow-iris`](../examples/kserve-mlflow-iris/README.md) + +## Uninstallation + +```bash +# Remove KServe (keeps CRDs for safety) +just kserve::uninstall +``` + +This will: + +- Uninstall KServe resources Helm chart +- Uninstall KServe CRDs +- Delete storage secrets +- Delete namespace + +**Warning**: Uninstalling will remove all InferenceService resources. + +## Troubleshooting + +### Check Controller Logs + +```bash +just kserve::logs +``` + +### View InferenceService Status + +```bash +kubectl get inferenceservice -A +kubectl describe inferenceservice -n +``` + +### Check Predictor Pods + +```bash +kubectl get pods -l serving.kserve.io/inferenceservice= +kubectl logs +``` + +### Storage Issues + +If models fail to download: + +```bash +# Check storage initializer logs +kubectl logs -c storage-initializer + +# Verify S3 credentials +kubectl get secret kserve-s3-credentials -n kserve -o yaml +``` + +## References + +- [KServe Documentation](https://kserve.github.io/website/) +- [KServe GitHub](https://github.com/kserve/kserve) +- [KServe Examples](https://github.com/kserve/kserve/tree/master/docs/samples) +- [Supported ML Frameworks](https://kserve.github.io/website/latest/modelserving/v1beta1/serving_runtime/) diff --git a/kserve/justfile b/kserve/justfile new file mode 100644 index 0000000..f78e10a --- /dev/null +++ b/kserve/justfile @@ -0,0 +1,264 @@ +set fallback := true + +export KSERVE_NAMESPACE := env("KSERVE_NAMESPACE", "kserve") +export KSERVE_CHART_VERSION := env("KSERVE_CHART_VERSION", "v0.16.0") +export KSERVE_DEPLOYMENT_MODE := env("KSERVE_DEPLOYMENT_MODE", "RawDeployment") +export KSERVE_DOMAIN := env("KSERVE_DOMAIN", "cluster.local") +export MONITORING_ENABLED := env("MONITORING_ENABLED", "") +export PROMETHEUS_NAMESPACE := env("PROMETHEUS_NAMESPACE", "monitoring") +export MINIO_NAMESPACE := env("MINIO_NAMESPACE", "minio") +export EXTERNAL_SECRETS_NAMESPACE := env("EXTERNAL_SECRETS_NAMESPACE", "external-secrets") +export K8S_VAULT_NAMESPACE := env("K8S_VAULT_NAMESPACE", "vault") + +[private] +default: + @just --list --unsorted --list-submodules + +# Create namespace +create-namespace: + @kubectl get namespace ${KSERVE_NAMESPACE} &>/dev/null || \ + kubectl create namespace ${KSERVE_NAMESPACE} + +# Delete namespace +delete-namespace: + @kubectl delete namespace ${KSERVE_NAMESPACE} --ignore-not-found + +# Install KServe CRDs +install-crds: + #!/bin/bash + set -euo pipefail + echo "Installing KServe CRDs..." + helm upgrade --cleanup-on-fail --install kserve-crd oci://ghcr.io/kserve/charts/kserve-crd \ + --version ${KSERVE_CHART_VERSION} -n ${KSERVE_NAMESPACE} --create-namespace --wait + echo "KServe CRDs installed successfully" + +# Uninstall KServe CRDs +uninstall-crds: + #!/bin/bash + set -euo pipefail + echo "Uninstalling KServe CRDs..." + helm uninstall kserve-crd -n ${KSERVE_NAMESPACE} --ignore-not-found + echo "KServe CRDs uninstalled" + +# Setup S3 storage secret for model storage +setup-storage: + #!/bin/bash + set -euo pipefail + echo "Setting up S3 storage secret for KServe..." + just create-namespace + + if helm status external-secrets -n ${EXTERNAL_SECRETS_NAMESPACE} &>/dev/null; then + echo "External Secrets Operator detected. Creating ExternalSecret..." + echo "Using MinIO credentials from Vault (minio/admin)..." + + kubectl delete secret kserve-s3-credentials -n ${KSERVE_NAMESPACE} --ignore-not-found + kubectl delete externalsecret kserve-s3-external-secret -n ${KSERVE_NAMESPACE} --ignore-not-found + + gomplate -f storage-external-secret.gomplate.yaml | kubectl apply -f - + + echo "Waiting for ExternalSecret to sync..." + kubectl wait --for=condition=Ready externalsecret/kserve-s3-external-secret \ + -n ${KSERVE_NAMESPACE} --timeout=60s + echo "ExternalSecret synced successfully" + else + echo "External Secrets not available. Creating Kubernetes Secret directly..." + + if ! kubectl get secret minio -n ${MINIO_NAMESPACE} &>/dev/null; then + echo "Error: MinIO root credentials not found" + echo "Please install MinIO first with 'just minio::install'" + exit 1 + fi + + accesskey=$(kubectl get secret minio -n ${MINIO_NAMESPACE} \ + -o jsonpath='{.data.rootUser}' | base64 --decode) + secretkey=$(kubectl get secret minio -n ${MINIO_NAMESPACE} \ + -o jsonpath='{.data.rootPassword}' | base64 --decode) + + kubectl delete secret kserve-s3-credentials -n ${KSERVE_NAMESPACE} --ignore-not-found + + kubectl create secret generic kserve-s3-credentials -n ${KSERVE_NAMESPACE} \ + --from-literal=AWS_ACCESS_KEY_ID="${accesskey}" \ + --from-literal=AWS_SECRET_ACCESS_KEY="${secretkey}" + + kubectl annotate secret kserve-s3-credentials -n ${KSERVE_NAMESPACE} \ + serving.kserve.io/s3-endpoint="minio.${MINIO_NAMESPACE}.svc.cluster.local:9000" \ + serving.kserve.io/s3-usehttps="0" \ + serving.kserve.io/s3-region="us-east-1" \ + serving.kserve.io/s3-useanoncredential="false" \ + --overwrite + echo "Kubernetes Secret created" + + if helm status vault -n ${K8S_VAULT_NAMESPACE} &>/dev/null; then + just vault::put kserve/storage accesskey="${accesskey}" secretkey="${secretkey}" + echo "Storage credentials also stored in Vault for backup" + fi + fi + + echo "S3 storage secret created successfully" + +# Delete storage secret +delete-storage: + @kubectl delete secret kserve-s3-credentials -n ${KSERVE_NAMESPACE} --ignore-not-found + @kubectl delete externalsecret kserve-s3-external-secret -n ${KSERVE_NAMESPACE} --ignore-not-found + +# Install KServe +install: + #!/bin/bash + set -euo pipefail + echo "Installing KServe..." + just create-namespace + + # Check cert-manager prerequisite + if ! kubectl get namespace cert-manager &>/dev/null; then + echo "Error: cert-manager is not installed" + echo "Please install cert-manager first with 'just cert-manager::install'" + exit 1 + fi + + echo "Waiting for cert-manager webhook to be ready..." + kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=webhook \ + -n cert-manager --timeout=300s + echo "cert-manager webhook is ready" + + if helm status kube-prometheus-stack -n ${PROMETHEUS_NAMESPACE} &>/dev/null; then + if [ -z "${MONITORING_ENABLED}" ]; then + if gum confirm "Enable Prometheus monitoring (ServiceMonitor)?"; then + MONITORING_ENABLED="true" + else + MONITORING_ENABLED="false" + fi + fi + else + MONITORING_ENABLED="false" + fi + + just install-crds + + if kubectl get service minio -n ${MINIO_NAMESPACE} &>/dev/null; then + echo "MinIO detected. Setting up S3 storage..." + just setup-storage + else + echo "MinIO not found. Skipping S3 storage setup." + echo "Models will need to use other storage options." + fi + + echo "Generating Helm values..." + gomplate -f values.gomplate.yaml -o values.yaml + + echo "Installing KServe controller..." + helm upgrade --cleanup-on-fail --install kserve \ + oci://ghcr.io/kserve/charts/kserve --version ${KSERVE_CHART_VERSION} \ + -n ${KSERVE_NAMESPACE} --wait --timeout=10m -f values.yaml + + if [ "${MONITORING_ENABLED}" = "true" ]; then + echo "Enabling Prometheus monitoring for namespace ${KSERVE_NAMESPACE}..." + kubectl label namespace ${KSERVE_NAMESPACE} buun.channel/enable-monitoring=true --overwrite + echo "✓ Monitoring enabled" + fi + + echo "" + echo "=== KServe installed ===" + echo "Namespace: ${KSERVE_NAMESPACE}" + echo "Deployment mode: ${KSERVE_DEPLOYMENT_MODE}" + echo "Domain: ${KSERVE_DOMAIN}" + echo "" + echo "To deploy an inference service, create an InferenceService resource" + echo "See: https://kserve.github.io/website/latest/get_started/first_isvc/" + +# Upgrade KServe +upgrade: + #!/bin/bash + set -euo pipefail + echo "Upgrading KServe..." + + if helm status kube-prometheus-stack -n ${PROMETHEUS_NAMESPACE} &>/dev/null; then + if [ -z "${MONITORING_ENABLED}" ]; then + if gum confirm "Enable Prometheus monitoring (ServiceMonitor)?"; then + MONITORING_ENABLED="true" + else + MONITORING_ENABLED="false" + fi + fi + else + MONITORING_ENABLED="false" + fi + + echo "Upgrading KServe CRDs..." + just install-crds + + echo "Generating Helm values..." + gomplate -f values.gomplate.yaml -o values.yaml + + echo "Upgrading KServe controller..." + helm upgrade kserve oci://ghcr.io/kserve/charts/kserve \ + --version ${KSERVE_CHART_VERSION} -n ${KSERVE_NAMESPACE} --wait --timeout=10m \ + -f values.yaml + + echo "KServe upgraded successfully" + +# Uninstall KServe +uninstall: + #!/bin/bash + set -euo pipefail + echo "Uninstalling KServe..." + helm uninstall kserve -n ${KSERVE_NAMESPACE} --ignore-not-found + just uninstall-crds + just delete-storage + just delete-namespace + echo "KServe uninstalled" + +# Get KServe controller logs +logs: + @kubectl logs -n ${KSERVE_NAMESPACE} -l control-plane=kserve-controller-manager --tail=100 -f + +# Get status of KServe components +status: + #!/bin/bash + set -euo pipefail + echo "=== KServe Components Status ===" + echo "" + echo "Namespace: ${KSERVE_NAMESPACE}" + echo "" + echo "Pods:" + kubectl get pods -n ${KSERVE_NAMESPACE} + echo "" + echo "Services:" + kubectl get services -n ${KSERVE_NAMESPACE} + echo "" + echo "InferenceServices:" + kubectl get inferenceservices -A + +# Convert MLflow artifact path to KServe storageUri +storage-uri artifact_path='': + #!/bin/bash + set -euo pipefail + + if [ -z "{{ artifact_path }}" ]; then + read -p "Enter MLflow artifact path from Model Registry (e.g., mlflow-artifacts:/2/models/MODEL_ID/artifacts): " artifact_path + else + artifact_path="{{ artifact_path }}" + fi + + # Convert mlflow-artifacts:/ to s3://mlflow/ + storage_uri="${artifact_path/mlflow-artifacts:/s3://mlflow}" + + # Remove trailing filename if present (e.g., MLmodel, model.pkl) + if [[ "$storage_uri" == */artifacts/* ]] && [[ "$storage_uri" != */artifacts ]]; then + # Remove filename after /artifacts/ + storage_uri=$(echo "$storage_uri" | sed 's|/artifacts/.*|/artifacts|') + fi + + # Check if this is a run-based path (not model registry path) + if [[ "$storage_uri" =~ s3://mlflow/[0-9]+/[a-f0-9]{32}/artifacts ]]; then + echo "Warning: This appears to be a run-based path, not a model registry path." + echo "KServe requires the model registry path which can be found in:" + echo " MLflow UI → Models → [Model Name] → [Version] → artifact_path" + echo "" + echo "Expected format: mlflow-artifacts:/EXPERIMENT_ID/models/MODEL_ID/artifacts" + echo "Your input: $artifact_path" + echo "" + echo "Output (may not work): $storage_uri" + exit 1 + fi + + echo "$storage_uri" diff --git a/kserve/storage-external-secret.gomplate.yaml b/kserve/storage-external-secret.gomplate.yaml new file mode 100644 index 0000000..d7fde4f --- /dev/null +++ b/kserve/storage-external-secret.gomplate.yaml @@ -0,0 +1,33 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: kserve-s3-external-secret + namespace: {{ .Env.KSERVE_NAMESPACE }} +spec: + refreshInterval: 1h + secretStoreRef: + name: vault-secret-store + kind: ClusterSecretStore + target: + name: kserve-s3-credentials + creationPolicy: Owner + template: + type: Opaque + metadata: + annotations: + serving.kserve.io/s3-endpoint: "minio.{{ .Env.MINIO_NAMESPACE }}.svc.cluster.local:9000" + serving.kserve.io/s3-usehttps: "0" + serving.kserve.io/s3-region: "us-east-1" + serving.kserve.io/s3-useanoncredential: "false" + data: + AWS_ACCESS_KEY_ID: "{{ `{{ .accesskey }}` }}" + AWS_SECRET_ACCESS_KEY: "{{ `{{ .secretkey }}` }}" + data: + - secretKey: accesskey + remoteRef: + key: minio/admin + property: username + - secretKey: secretkey + remoteRef: + key: minio/admin + property: password diff --git a/kserve/values.gomplate.yaml b/kserve/values.gomplate.yaml new file mode 100644 index 0000000..f8a416e --- /dev/null +++ b/kserve/values.gomplate.yaml @@ -0,0 +1,84 @@ +# KServe Helm Chart Values +# Generated using gomplate + +kserve: + version: v0.16.0 + + controller: + # Deployment mode: "Standard" for RawDeployment (no Knative), "Knative" for Serverless + deploymentMode: {{ .Env.KSERVE_DEPLOYMENT_MODE }} + + gateway: + domain: {{ .Env.KSERVE_DOMAIN }} + {{- if eq .Env.KSERVE_DEPLOYMENT_MODE "Standard" }} + ingressGateway: + className: traefik + {{- end }} + + # Enable Prometheus metrics + {{- if eq .Env.MONITORING_ENABLED "true" }} + metrics: + port: 8080 + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" + {{- end }} + + # Storage initializer configuration + storage: + s3: + enabled: true + {{- if ne .Env.MINIO_NAMESPACE "" }} + endpoint: "minio.{{ .Env.MINIO_NAMESPACE }}.svc.cluster.local:9000" + useHttps: false + region: "us-east-1" + verifySSL: false + useVirtualBucket: false + useAnonymousCredential: false + {{- end }} + storageInitializer: + resources: + requests: + memory: "100Mi" + cpu: "100m" + limits: + memory: "1Gi" + cpu: "1" + + # Model agent configuration + agent: + image: kserve/agent + tag: v0.16.0 + + # Router configuration + router: + image: kserve/router + tag: v0.16.0 + + # Serving runtimes - enable commonly used ones + servingRuntimes: + sklearn: + enabled: true + xgboost: + enabled: true + mlserver: + enabled: true + triton: + enabled: true + tensorflow: + enabled: true + pytorch: + enabled: true + huggingfaceserver: + enabled: true + +{{- if eq .Env.MONITORING_ENABLED "true" }} +# ServiceMonitor for Prometheus metrics collection +serviceMonitor: + enabled: true + namespace: {{ .Env.KSERVE_NAMESPACE }} + labels: + release: kube-prometheus-stack + interval: 30s +{{- end }} diff --git a/mlflow/README.md b/mlflow/README.md index 9587c9f..8941162 100644 --- a/mlflow/README.md +++ b/mlflow/README.md @@ -586,6 +586,19 @@ docker run --rm localhost:30500/mlflow:3.6.0-oidc mlflow --version **Note**: All Docker commands run on the remote host specified by `DOCKER_HOST`. +## Examples + +### Iris Classification with KServe + +A complete end-to-end example demonstrating the integration of JupyterHub, MLflow, and KServe: + +- Train an Iris classification model in JupyterHub +- Register the model to MLflow Model Registry +- Deploy the model with KServe InferenceService +- Test inference from JupyterHub notebooks and Kubernetes Jobs + +See: [`examples/kserve-mlflow-iris`](../examples/kserve-mlflow-iris/README.md) + ## Custom Image ### Dockerfile