feat(kserve): install KServe
This commit is contained in:
202
examples/kserve-mlflow-iris/01-train-and-register.ipynb
Normal file
202
examples/kserve-mlflow-iris/01-train-and-register.ipynb
Normal file
@@ -0,0 +1,202 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Iris Classification with MLflow and KServe\n",
|
||||
"\n",
|
||||
"This notebook demonstrates:\n",
|
||||
"1. Training a simple scikit-learn model on the Iris dataset\n",
|
||||
"2. Logging the model to MLflow\n",
|
||||
"3. Preparing the model for deployment with KServe"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Setup and Install Dependencies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install required packages\n",
|
||||
"!pip install mlflow scikit-learn boto3 -q"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import mlflow\n",
|
||||
"import mlflow.sklearn\n",
|
||||
"from sklearn.datasets import load_iris\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.metrics import accuracy_score, classification_report"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": "## 2. Configure MLflow\n\nSet MLflow tracking URI and authentication credentials."
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "# MLflow configuration\nMLFLOW_TRACKING_URI = os.getenv('MLFLOW_TRACKING_URI', 'http://mlflow.mlflow.svc.cluster.local')\nmlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n\n# MLflow authentication\nos.environ['MLFLOW_TRACKING_USERNAME'] = os.getenv('MLFLOW_TRACKING_USERNAME', '')\nos.environ['MLFLOW_TRACKING_PASSWORD'] = os.getenv('MLFLOW_TRACKING_PASSWORD', '')\n\nprint(f\"MLflow Tracking URI: {MLFLOW_TRACKING_URI}\")\nprint(f\"MLflow Username: {os.environ['MLFLOW_TRACKING_USERNAME']}\")"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Load and Prepare Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load Iris dataset\n",
|
||||
"iris = load_iris()\n",
|
||||
"X = iris.data\n",
|
||||
"y = iris.target\n",
|
||||
"\n",
|
||||
"# Split data\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
||||
"\n",
|
||||
"print(f\"Training samples: {len(X_train)}\")\n",
|
||||
"print(f\"Test samples: {len(X_test)}\")\n",
|
||||
"print(f\"Classes: {iris.target_names}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Train Model with MLflow Tracking"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set experiment\n",
|
||||
"experiment_name = \"iris-classification\"\n",
|
||||
"mlflow.set_experiment(experiment_name)\n",
|
||||
"\n",
|
||||
"# Start MLflow run\n",
|
||||
"with mlflow.start_run(run_name=\"logistic-regression\") as run:\n",
|
||||
" # Train model\n",
|
||||
" model = LogisticRegression(max_iter=200, random_state=42)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" \n",
|
||||
" # Predictions\n",
|
||||
" y_pred = model.predict(X_test)\n",
|
||||
" accuracy = accuracy_score(y_test, y_pred)\n",
|
||||
" \n",
|
||||
" # Log parameters\n",
|
||||
" mlflow.log_param(\"model_type\", \"LogisticRegression\")\n",
|
||||
" mlflow.log_param(\"max_iter\", 200)\n",
|
||||
" \n",
|
||||
" # Log metrics\n",
|
||||
" mlflow.log_metric(\"accuracy\", accuracy)\n",
|
||||
" \n",
|
||||
" # Log model\n",
|
||||
" mlflow.sklearn.log_model(\n",
|
||||
" model, \n",
|
||||
" \"model\",\n",
|
||||
" registered_model_name=\"iris-classifier\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" print(f\"\\nRun ID: {run.info.run_id}\")\n",
|
||||
" print(f\"Accuracy: {accuracy:.4f}\")\n",
|
||||
" print(f\"\\nClassification Report:\")\n",
|
||||
" print(classification_report(y_test, y_pred, target_names=iris.target_names))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Get Model Information for KServe Deployment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "# Get the latest version of the registered model\nclient = mlflow.tracking.MlflowClient()\nmodel_name = \"iris-classifier\"\n\n# Use search_model_versions instead of deprecated get_latest_versions\nmodel_versions = client.search_model_versions(f\"name='{model_name}'\")\nlatest_version = max(model_versions, key=lambda x: int(x.version))\n\nprint(f\"\\n=== Model Information for KServe ===\")\nprint(f\"Model Name: {model_name}\")\nprint(f\"Version: {latest_version.version}\")\nprint(f\"Run ID: {latest_version.run_id}\")\nprint(f\"\\nArtifact URI: {latest_version.source}\")\nprint(f\"\\nUse this information to configure the KServe InferenceService.\")"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Test Local Prediction"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Test with a sample\n",
|
||||
"sample_input = [[5.1, 3.5, 1.4, 0.2]] # Should predict 'setosa'\n",
|
||||
"prediction = model.predict(sample_input)\n",
|
||||
"predicted_class = iris.target_names[prediction[0]]\n",
|
||||
"\n",
|
||||
"print(f\"\\nTest Input: {sample_input[0]}\")\n",
|
||||
"print(f\"Predicted Class: {predicted_class}\")\n",
|
||||
"print(f\"\\nThis sample will be used to test the KServe deployment.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next Steps\n",
|
||||
"\n",
|
||||
"1. Note the Model Name and Version from above\n",
|
||||
"2. Deploy the model using KServe with the InferenceService YAML\n",
|
||||
"3. Test the deployed model endpoint"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
44
examples/kserve-mlflow-iris/02-deploy-model.yaml
Normal file
44
examples/kserve-mlflow-iris/02-deploy-model.yaml
Normal file
@@ -0,0 +1,44 @@
|
||||
apiVersion: serving.kserve.io/v1beta1
|
||||
kind: InferenceService
|
||||
metadata:
|
||||
name: iris-classifier
|
||||
namespace: kserve
|
||||
annotations:
|
||||
serving.kserve.io/secretName: kserve-s3-credentials
|
||||
spec:
|
||||
predictor:
|
||||
model:
|
||||
modelFormat:
|
||||
name: mlflow
|
||||
version: "2"
|
||||
storageUri: s3://mlflow/EXPERIMENT_ID/models/MODEL_ID/artifacts
|
||||
resources:
|
||||
requests:
|
||||
cpu: "100m"
|
||||
memory: "512Mi"
|
||||
limits:
|
||||
cpu: "1000m"
|
||||
memory: "1Gi"
|
||||
---
|
||||
# Alternative: Using SKLearn Server (does not install requirements.txt)
|
||||
# apiVersion: serving.kserve.io/v1beta1
|
||||
# kind: InferenceService
|
||||
# metadata:
|
||||
# name: iris-classifier
|
||||
# namespace: kserve
|
||||
# annotations:
|
||||
# serving.kserve.io/secretName: kserve-s3-credentials
|
||||
# spec:
|
||||
# predictor:
|
||||
# model:
|
||||
# modelFormat:
|
||||
# name: sklearn
|
||||
# version: "1"
|
||||
# storageUri: s3://mlflow/EXPERIMENT_ID/models/MODEL_ID/artifacts
|
||||
# resources:
|
||||
# requests:
|
||||
# cpu: "100m"
|
||||
# memory: "256Mi"
|
||||
# limits:
|
||||
# cpu: "500m"
|
||||
# memory: "512Mi"
|
||||
313
examples/kserve-mlflow-iris/03-test-inference.ipynb
Normal file
313
examples/kserve-mlflow-iris/03-test-inference.ipynb
Normal file
@@ -0,0 +1,313 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test Iris Classifier InferenceService\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to call the deployed KServe InferenceService from JupyterHub."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install requests if not already installed\n",
|
||||
"!pip install requests -q"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Configure Endpoint\n",
|
||||
"\n",
|
||||
"The InferenceService is accessible via the cluster-internal service URL."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "# KServe InferenceService endpoint\n# Format: http://<service-name>-predictor.<namespace>.svc.cluster.local/v2/models/<model-name>/infer\nINFERENCE_SERVICE_NAME = \"iris-classifier\"\nNAMESPACE = \"kserve\"\nENDPOINT = f\"http://{INFERENCE_SERVICE_NAME}-predictor.{NAMESPACE}.svc.cluster.local/v2/models/{INFERENCE_SERVICE_NAME}/infer\"\n\nprint(f\"Inference Endpoint: {ENDPOINT}\")"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Define Test Samples\n",
|
||||
"\n",
|
||||
"Iris dataset has 4 features:\n",
|
||||
"1. Sepal length (cm)\n",
|
||||
"2. Sepal width (cm)\n",
|
||||
"3. Petal length (cm)\n",
|
||||
"4. Petal width (cm)\n",
|
||||
"\n",
|
||||
"Classes:\n",
|
||||
"- 0: Iris Setosa\n",
|
||||
"- 1: Iris Versicolor\n",
|
||||
"- 2: Iris Virginica"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define class names\n",
|
||||
"CLASS_NAMES = [\"Iris Setosa\", \"Iris Versicolor\", \"Iris Virginica\"]\n",
|
||||
"\n",
|
||||
"# Test samples with expected predictions\n",
|
||||
"test_cases = [\n",
|
||||
" {\n",
|
||||
" \"name\": \"Typical Setosa\",\n",
|
||||
" \"features\": [5.1, 3.5, 1.4, 0.2],\n",
|
||||
" \"expected_class\": 0,\n",
|
||||
" \"description\": \"Short petals, typical of Setosa\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"Typical Virginica\",\n",
|
||||
" \"features\": [6.7, 3.0, 5.2, 2.3],\n",
|
||||
" \"expected_class\": 2,\n",
|
||||
" \"description\": \"Long petals and sepals, typical of Virginica\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"Typical Versicolor\",\n",
|
||||
" \"features\": [5.9, 3.0, 4.2, 1.5],\n",
|
||||
" \"expected_class\": 1,\n",
|
||||
" \"description\": \"Medium-sized features, typical of Versicolor\"\n",
|
||||
" },\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"print(f\"Prepared {len(test_cases)} test cases\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Test Single Prediction\n",
|
||||
"\n",
|
||||
"Send a single prediction request to the InferenceService."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "def predict(features):\n \"\"\"\n Send prediction request to KServe InferenceService using v2 protocol.\n \n Args:\n features: List of feature values [sepal_length, sepal_width, petal_length, petal_width]\n \n Returns:\n Predicted class (0, 1, or 2)\n \"\"\"\n payload = {\n \"inputs\": [\n {\n \"name\": \"input-0\",\n \"shape\": [1, 4],\n \"datatype\": \"FP64\",\n \"data\": [features]\n }\n ]\n }\n \n try:\n response = requests.post(ENDPOINT, json=payload, timeout=10)\n response.raise_for_status()\n result = response.json()\n return result['outputs'][0]['data'][0]\n except requests.exceptions.RequestException as e:\n print(f\"Error: {e}\")\n if hasattr(e, 'response') and hasattr(e.response, 'text'):\n print(f\"Response: {e.response.text}\")\n return None\n\n# Test with first sample\nsample = test_cases[0]\nprint(f\"Testing: {sample['name']}\")\nprint(f\"Features: {sample['features']}\")\nprint(f\"Description: {sample['description']}\")\nprint()\n\nprediction = predict(sample['features'])\nif prediction is not None:\n print(f\"Predicted Class: {prediction} ({CLASS_NAMES[prediction]})\")\n print(f\"Expected Class: {sample['expected_class']} ({CLASS_NAMES[sample['expected_class']]})\")\n print(f\"Result: {'✓ PASS' if prediction == sample['expected_class'] else '✗ FAIL'}\")"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Test All Cases\n",
|
||||
"\n",
|
||||
"Run predictions for all test cases and display results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"=\" * 80)\n",
|
||||
"print(\"Testing Iris Classifier InferenceService\")\n",
|
||||
"print(\"=\" * 80)\n",
|
||||
"print()\n",
|
||||
"\n",
|
||||
"results = []\n",
|
||||
"\n",
|
||||
"for i, test_case in enumerate(test_cases, 1):\n",
|
||||
" print(f\"Test Case {i}: {test_case['name']}\")\n",
|
||||
" print(f\" Features: {test_case['features']}\")\n",
|
||||
" print(f\" Description: {test_case['description']}\")\n",
|
||||
" print(f\" Expected: {CLASS_NAMES[test_case['expected_class']]}\")\n",
|
||||
" \n",
|
||||
" prediction = predict(test_case['features'])\n",
|
||||
" \n",
|
||||
" if prediction is not None:\n",
|
||||
" predicted_class_name = CLASS_NAMES[prediction]\n",
|
||||
" is_correct = prediction == test_case['expected_class']\n",
|
||||
" status = \"✓ PASS\" if is_correct else \"✗ FAIL\"\n",
|
||||
" \n",
|
||||
" print(f\" Predicted: {predicted_class_name}\")\n",
|
||||
" print(f\" Status: {status}\")\n",
|
||||
" \n",
|
||||
" results.append({\n",
|
||||
" 'name': test_case['name'],\n",
|
||||
" 'expected': test_case['expected_class'],\n",
|
||||
" 'predicted': prediction,\n",
|
||||
" 'correct': is_correct\n",
|
||||
" })\n",
|
||||
" else:\n",
|
||||
" print(f\" Status: ✗ ERROR\")\n",
|
||||
" results.append({\n",
|
||||
" 'name': test_case['name'],\n",
|
||||
" 'expected': test_case['expected_class'],\n",
|
||||
" 'predicted': None,\n",
|
||||
" 'correct': False\n",
|
||||
" })\n",
|
||||
" \n",
|
||||
" print()\n",
|
||||
"\n",
|
||||
"# Summary\n",
|
||||
"print(\"=\" * 80)\n",
|
||||
"passed = sum(1 for r in results if r['correct'])\n",
|
||||
"total = len(results)\n",
|
||||
"print(f\"Test Summary: {passed}/{total} passed\")\n",
|
||||
"print(\"=\" * 80)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Batch Prediction\n",
|
||||
"\n",
|
||||
"Send multiple samples in a single request for batch prediction."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "def predict_batch(features_list):\n \"\"\"\n Send batch prediction request to KServe InferenceService using v2 protocol.\n \n Args:\n features_list: List of feature arrays\n \n Returns:\n List of predicted classes\n \"\"\"\n payload = {\n \"inputs\": [\n {\n \"name\": \"input-0\",\n \"shape\": [len(features_list), 4],\n \"datatype\": \"FP64\",\n \"data\": features_list\n }\n ]\n }\n \n try:\n response = requests.post(ENDPOINT, json=payload, timeout=10)\n response.raise_for_status()\n result = response.json()\n return result['outputs'][0]['data']\n except requests.exceptions.RequestException as e:\n print(f\"Error: {e}\")\n if hasattr(e, 'response') and hasattr(e.response, 'text'):\n print(f\"Response: {e.response.text}\")\n return None\n\n# Prepare batch request\nbatch_features = [tc['features'] for tc in test_cases]\nprint(f\"Sending batch request with {len(batch_features)} samples...\")\nprint()\n\n# Send batch request\npredictions = predict_batch(batch_features)\n\nif predictions:\n print(\"Batch Prediction Results:\")\n print(\"-\" * 60)\n for i, (test_case, prediction) in enumerate(zip(test_cases, predictions), 1):\n print(f\"{i}. {test_case['name']}\")\n print(f\" Predicted: {CLASS_NAMES[prediction]}\")\n print(f\" Expected: {CLASS_NAMES[test_case['expected_class']]}\")\n status = \"✓\" if prediction == test_case['expected_class'] else \"✗\"\n print(f\" {status}\")\n print()"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 7. Custom Prediction\n",
|
||||
"\n",
|
||||
"Try your own input values!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Enter your own values here\n",
|
||||
"# Format: [sepal_length, sepal_width, petal_length, petal_width]\n",
|
||||
"custom_features = [6.0, 3.0, 4.0, 1.5]\n",
|
||||
"\n",
|
||||
"print(f\"Custom Input: {custom_features}\")\n",
|
||||
"print(f\" Sepal Length: {custom_features[0]} cm\")\n",
|
||||
"print(f\" Sepal Width: {custom_features[1]} cm\")\n",
|
||||
"print(f\" Petal Length: {custom_features[2]} cm\")\n",
|
||||
"print(f\" Petal Width: {custom_features[3]} cm\")\n",
|
||||
"print()\n",
|
||||
"\n",
|
||||
"prediction = predict(custom_features)\n",
|
||||
"if prediction is not None:\n",
|
||||
" print(f\"Prediction: {CLASS_NAMES[prediction]} (class {prediction})\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 8. Check InferenceService Status\n",
|
||||
"\n",
|
||||
"Verify the InferenceService is running properly."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check if we can reach the endpoint\n",
|
||||
"import subprocess\n",
|
||||
"\n",
|
||||
"print(\"Checking InferenceService status...\")\n",
|
||||
"print()\n",
|
||||
"\n",
|
||||
"# Using kubectl from the notebook\n",
|
||||
"try:\n",
|
||||
" result = subprocess.run(\n",
|
||||
" [\"kubectl\", \"get\", \"inferenceservice\", INFERENCE_SERVICE_NAME, \"-n\", NAMESPACE],\n",
|
||||
" capture_output=True,\n",
|
||||
" text=True,\n",
|
||||
" timeout=10\n",
|
||||
" )\n",
|
||||
" print(result.stdout)\n",
|
||||
" if result.returncode != 0:\n",
|
||||
" print(result.stderr)\n",
|
||||
"except Exception as e:\n",
|
||||
" print(f\"Could not check status: {e}\")\n",
|
||||
" print(\"This is normal if kubectl is not available in the notebook environment.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Summary\n",
|
||||
"\n",
|
||||
"You have successfully:\n",
|
||||
"1. ✅ Connected to the KServe InferenceService\n",
|
||||
"2. ✅ Sent single prediction requests\n",
|
||||
"3. ✅ Sent batch prediction requests\n",
|
||||
"4. ✅ Verified predictions against expected results\n",
|
||||
"\n",
|
||||
"## Next Steps\n",
|
||||
"\n",
|
||||
"- Try running the Kubernetes Job-based tests (see `04-test-inference-job.yaml`)\n",
|
||||
"- Deploy a new version of the model and compare predictions\n",
|
||||
"- Implement A/B testing with multiple model versions\n",
|
||||
"- Add monitoring and logging"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
146
examples/kserve-mlflow-iris/04-test-inference-job.yaml
Normal file
146
examples/kserve-mlflow-iris/04-test-inference-job.yaml
Normal file
@@ -0,0 +1,146 @@
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: test-iris-inference
|
||||
namespace: kserve
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: test
|
||||
image: python:3.9-slim
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
cat << 'EOF' | python
|
||||
import urllib.request
|
||||
import json
|
||||
|
||||
# KServe endpoint (v2 protocol)
|
||||
url = "http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer"
|
||||
|
||||
# Iris class names
|
||||
class_names = ["setosa", "versicolor", "virginica"]
|
||||
|
||||
# Test samples with expected results
|
||||
test_cases = [
|
||||
{"data": [5.1, 3.5, 1.4, 0.2], "expected": "setosa"},
|
||||
{"data": [6.7, 3.0, 5.2, 2.3], "expected": "virginica"},
|
||||
{"data": [5.9, 3.0, 4.2, 1.5], "expected": "versicolor"},
|
||||
]
|
||||
|
||||
print("=" * 60)
|
||||
print("Testing Iris Classifier InferenceService")
|
||||
print("=" * 60)
|
||||
print(f"Endpoint: {url}")
|
||||
print()
|
||||
|
||||
for i, test_case in enumerate(test_cases, 1):
|
||||
print(f"Test Case {i}:")
|
||||
print(f" Input: {test_case['data']}")
|
||||
print(f" Expected: {test_case['expected']}")
|
||||
|
||||
# v2 protocol payload
|
||||
payload = {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "input-0",
|
||||
"shape": [1, 4],
|
||||
"datatype": "FP64",
|
||||
"data": [test_case['data']]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=json.dumps(payload).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json'}
|
||||
)
|
||||
with urllib.request.urlopen(req) as response:
|
||||
result = json.loads(response.read().decode('utf-8'))
|
||||
prediction = result['outputs'][0]['data'][0]
|
||||
predicted_class = class_names[prediction]
|
||||
|
||||
status = "✓ PASS" if predicted_class == test_case['expected'] else "✗ FAIL"
|
||||
print(f" Predicted: {predicted_class} (class {prediction})")
|
||||
print(f" Status: {status}")
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
|
||||
print()
|
||||
|
||||
print("=" * 60)
|
||||
print("Test completed")
|
||||
print("=" * 60)
|
||||
EOF
|
||||
restartPolicy: Never
|
||||
backoffLimit: 1
|
||||
---
|
||||
# Alternative: curl-based quick test
|
||||
# apiVersion: batch/v1
|
||||
# kind: Job
|
||||
# metadata:
|
||||
# name: test-iris-inference-curl
|
||||
# namespace: kserve
|
||||
# spec:
|
||||
# template:
|
||||
# spec:
|
||||
# containers:
|
||||
# - name: test
|
||||
# image: curlimages/curl:latest
|
||||
# command:
|
||||
# - /bin/sh
|
||||
# - -c
|
||||
# - |
|
||||
# echo "Testing Iris Classifier Inference Service..."
|
||||
# echo ""
|
||||
# echo "Endpoint: http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer"
|
||||
# echo ""
|
||||
# echo "Sending test request with sample data: [5.1, 3.5, 1.4, 0.2]"
|
||||
# echo "Expected prediction: class 0 (setosa)"
|
||||
# echo ""
|
||||
#
|
||||
# curl -v -X POST \
|
||||
# http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer \
|
||||
# -H "Content-Type: application/json" \
|
||||
# -d '{
|
||||
# "inputs": [
|
||||
# {
|
||||
# "name": "input-0",
|
||||
# "shape": [1, 4],
|
||||
# "datatype": "FP64",
|
||||
# "data": [[5.1, 3.5, 1.4, 0.2]]
|
||||
# }
|
||||
# ]
|
||||
# }'
|
||||
#
|
||||
# echo ""
|
||||
# echo ""
|
||||
# echo "Testing with multiple samples..."
|
||||
# echo ""
|
||||
#
|
||||
# curl -X POST \
|
||||
# http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer \
|
||||
# -H "Content-Type: application/json" \
|
||||
# -d '{
|
||||
# "inputs": [
|
||||
# {
|
||||
# "name": "input-0",
|
||||
# "shape": [3, 4],
|
||||
# "datatype": "FP64",
|
||||
# "data": [
|
||||
# [5.1, 3.5, 1.4, 0.2],
|
||||
# [6.7, 3.0, 5.2, 2.3],
|
||||
# [5.9, 3.0, 4.2, 1.5]
|
||||
# ]
|
||||
# }
|
||||
# ]
|
||||
# }'
|
||||
#
|
||||
# echo ""
|
||||
# restartPolicy: Never
|
||||
# backoffLimit: 1
|
||||
315
examples/kserve-mlflow-iris/README.md
Normal file
315
examples/kserve-mlflow-iris/README.md
Normal file
@@ -0,0 +1,315 @@
|
||||
# KServe + MLflow + JupyterHub: Iris Classification Example
|
||||
|
||||
This example demonstrates an end-to-end machine learning workflow using:
|
||||
|
||||
- **JupyterHub**: Interactive development, model training, and testing
|
||||
- **MLflow**: Model tracking and registry
|
||||
- **MinIO**: Artifact storage (S3-compatible)
|
||||
- **KServe**: Model serving and inference
|
||||
|
||||
## Workflow Overview
|
||||
|
||||
1. **📓 Train & Register** (`01-train-and-register.ipynb`) - Train model in JupyterHub, register to MLflow
|
||||
2. **🚀 Deploy** (`02-deploy-model.yaml`) - Deploy model with KServe InferenceService
|
||||
3. **🧪 Test from Notebook** (`03-test-inference.ipynb`) - Test inference from JupyterHub (Recommended)
|
||||
4. **🔧 Test from Pod** (`04-test-inference-job.yaml`) - Automated testing from Kubernetes Job
|
||||
|
||||
## Architecture
|
||||
|
||||
```plain
|
||||
┌─────────────┐ ┌─────────┐ ┌────────┐ ┌─────────────────┐
|
||||
│ JupyterHub │────>│ MLflow │────>│ MinIO │<────│ KServe │
|
||||
│ │ │ │ │ (S3) │ │ InferenceService│
|
||||
│ 1. Train │ │ Register│ │ Store │ │ 2. Deploy │
|
||||
│ Model │ │ │ │ Model │ │ & Serve │
|
||||
└──────┬──────┘ └─────────┘ └────────┘ └──────────┬──────┘
|
||||
│ │
|
||||
│ 3. Test from Notebook (Recommended) │
|
||||
└──────────────────────────────────────────────────────┘
|
||||
│
|
||||
│
|
||||
4. Test from Pod │
|
||||
(Alternative) │
|
||||
v
|
||||
┌──────────────┐
|
||||
│ Kubernetes │
|
||||
│ Test Job │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Ensure the following components are installed:
|
||||
|
||||
```bash
|
||||
# Check installations
|
||||
kubectl get pods -n jupyterhub
|
||||
kubectl get pods -n mlflow
|
||||
kubectl get pods -n minio
|
||||
kubectl get pods -n kserve
|
||||
```
|
||||
|
||||
## Step 1: Train and Register Model in JupyterHub
|
||||
|
||||
1. **Access JupyterHub**:
|
||||
|
||||
Access JupyterHub at the configured JUPYTERHUB_HOST
|
||||
|
||||
2. **Upload the Notebook**:
|
||||
- Upload `01-train-and-register.ipynb` to your JupyterHub workspace
|
||||
|
||||
3. **Set Environment Variables** (in the notebook or terminal):
|
||||
|
||||
```bash
|
||||
# MLflow authentication (required if MLflow has authentication enabled)
|
||||
export MLFLOW_TRACKING_USERNAME=your-username
|
||||
export MLFLOW_TRACKING_PASSWORD=your-password
|
||||
```
|
||||
|
||||
Note: MLFLOW_TRACKING_URI uses the default cluster-internal URL and does not need to be set.
|
||||
|
||||
4. **Run the Notebook**:
|
||||
- Execute all cells in `01-train-and-register.ipynb`
|
||||
- The model will be automatically registered to MLflow Model Registry
|
||||
|
||||
5. **Verify in MLflow UI**:
|
||||
|
||||
- Access MLflow UI at the configured MLFLOW_HOST
|
||||
- Navigate to "Models" → "iris-classifier"
|
||||
- Click on the model version (e.g., "Version 1")
|
||||
- Note the **artifact_path** displayed (e.g., `mlflow-artifacts:/2/models/m-28620b840353444385fa8e62335decf5/artifacts`)
|
||||
|
||||
## Step 2: Deploy Model with KServe
|
||||
|
||||
1. **Get the Model Registry Path**:
|
||||
|
||||
In MLflow UI, navigate to:
|
||||
- **Models** → **iris-classifier** → **Version 1**
|
||||
- Copy the **artifact_path** from the model details
|
||||
- Example: `mlflow-artifacts:/2/models/m-28620b840353444385fa8e62335decf5/artifacts`
|
||||
|
||||
**Important**: Use the artifact_path from the **Model Registry** (contains `/models/`), NOT the run-based path from the experiment runs.
|
||||
|
||||
2. **Update the InferenceService YAML**:
|
||||
|
||||
Use the helper command to convert the MLflow artifact path to KServe storageUri:
|
||||
|
||||
```bash
|
||||
just kserve::storage-uri "mlflow-artifacts:/2/models/m-28620b840353444385fa8e62335decf5/artifacts"
|
||||
# Output: s3://mlflow/2/models/m-28620b840353444385fa8e62335decf5/artifacts
|
||||
```
|
||||
|
||||
Edit `02-deploy-model.yaml` and replace the `storageUri` with the output:
|
||||
|
||||
```yaml
|
||||
storageUri: s3://mlflow/2/models/m-28620b840353444385fa8e62335decf5/artifacts
|
||||
```
|
||||
|
||||
**Note**: The default configuration uses `mlflow` format, which automatically installs dependencies from `requirements.txt`. This ensures compatibility but may take longer to start (initial container startup installs packages).
|
||||
|
||||
3. **Deploy the InferenceService**:
|
||||
|
||||
```bash
|
||||
kubectl apply -f 02-deploy-model.yaml
|
||||
```
|
||||
|
||||
4. **Verify Deployment**:
|
||||
|
||||
```bash
|
||||
# Check InferenceService status
|
||||
kubectl get inferenceservice iris-classifier -n kserve
|
||||
|
||||
# Wait for it to be ready (STATUS should show "Ready")
|
||||
# Note: First deployment may take 5-10 minutes due to dependency installation
|
||||
kubectl wait --for=condition=Ready inferenceservice/iris-classifier -n kserve --timeout=600s
|
||||
|
||||
# Check the pods
|
||||
kubectl get pods -l serving.kserve.io/inferenceservice=iris-classifier -n kserve
|
||||
|
||||
# Check logs if needed
|
||||
kubectl logs -l serving.kserve.io/inferenceservice=iris-classifier -n kserve -c kserve-container
|
||||
```
|
||||
|
||||
## Step 3: Test from JupyterHub (Recommended)
|
||||
|
||||
1. **Upload the Test Notebook**:
|
||||
- Upload `03-test-inference.ipynb` to your JupyterHub workspace
|
||||
|
||||
2. **Run the Notebook**:
|
||||
- Execute all cells in `03-test-inference.ipynb`
|
||||
- The notebook will:
|
||||
- Send prediction requests to the KServe endpoint
|
||||
- Test single and batch predictions
|
||||
- Display results with expected vs actual comparisons
|
||||
- Allow you to try custom inputs
|
||||
|
||||
3. **Expected Results**:
|
||||
|
||||
```plain
|
||||
Test Case 1: Typical Setosa
|
||||
Features: [5.1, 3.5, 1.4, 0.2]
|
||||
Expected: Iris Setosa
|
||||
Predicted: Iris Setosa
|
||||
Status: ✓ PASS
|
||||
```
|
||||
|
||||
## Step 4: Test from Kubernetes Pod (Alternative)
|
||||
|
||||
After testing in JupyterHub, you can also test from Kubernetes Pods for automated testing or CI/CD integration.
|
||||
|
||||
### Option 1: Automated Test with Python (Recommended)
|
||||
|
||||
```bash
|
||||
# Run the test job
|
||||
kubectl apply -f 04-test-inference-job.yaml
|
||||
|
||||
# Check logs
|
||||
kubectl logs job/test-iris-inference -n kserve
|
||||
|
||||
# Expected output:
|
||||
# Test Case 1:
|
||||
# Input: [5.1, 3.5, 1.4, 0.2]
|
||||
# Expected: setosa
|
||||
# Predicted: setosa (class 0)
|
||||
# Status: ✓ PASS
|
||||
```
|
||||
|
||||
### Option 2: Manual Test from a Pod
|
||||
|
||||
```bash
|
||||
# Start a test pod
|
||||
kubectl run test-pod --image=curlimages/curl --rm -it --restart=Never -- sh
|
||||
|
||||
# Inside the pod, run:
|
||||
curl -X POST \
|
||||
http://iris-classifier-predictor.kserve.svc.cluster.local/v2/models/iris-classifier/infer \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"inputs": [{"name": "input-0", "shape": [1, 4], "datatype": "FP64", "data": [[5.1, 3.5, 1.4, 0.2]]}]}'
|
||||
```
|
||||
|
||||
## Model Prediction Examples
|
||||
|
||||
### Single Prediction (v2 Protocol)
|
||||
|
||||
```json
|
||||
// Request
|
||||
{
|
||||
"inputs": [
|
||||
{
|
||||
"name": "input-0",
|
||||
"shape": [1, 4],
|
||||
"datatype": "FP64",
|
||||
"data": [[5.1, 3.5, 1.4, 0.2]] // Sepal length, Sepal width, Petal length, Petal width
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
// Response
|
||||
{
|
||||
"outputs": [
|
||||
{
|
||||
"name": "output-0",
|
||||
"shape": [1],
|
||||
"datatype": "INT64",
|
||||
"data": [0] // 0=setosa, 1=versicolor, 2=virginica
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Batch Prediction (v2 Protocol)
|
||||
|
||||
```json
|
||||
// Request
|
||||
{
|
||||
"inputs": [
|
||||
{
|
||||
"name": "input-0",
|
||||
"shape": [3, 4],
|
||||
"datatype": "FP64",
|
||||
"data": [
|
||||
[5.1, 3.5, 1.4, 0.2], // Setosa
|
||||
[6.7, 3.0, 5.2, 2.3], // Virginica
|
||||
[5.9, 3.0, 4.2, 1.5] // Versicolor
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
// Response
|
||||
{
|
||||
"outputs": [
|
||||
{
|
||||
"name": "output-0",
|
||||
"shape": [3],
|
||||
"datatype": "INT64",
|
||||
"data": [0, 2, 1]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### InferenceService Not Ready
|
||||
|
||||
```bash
|
||||
# Check events
|
||||
kubectl describe inferenceservice iris-classifier -n kserve
|
||||
|
||||
# Check pod logs
|
||||
kubectl logs -l serving.kserve.io/inferenceservice=iris-classifier -n kserve -c kserve-container
|
||||
```
|
||||
|
||||
### S3/MinIO Connection Issues
|
||||
|
||||
```bash
|
||||
# Verify S3 credentials secret
|
||||
kubectl get secret kserve-s3-credentials -n kserve -o yaml
|
||||
|
||||
# Test MinIO access from a pod
|
||||
kubectl run minio-test --image=amazon/aws-cli --rm -it --restart=Never -- \
|
||||
sh -c "AWS_ACCESS_KEY_ID=minioadmin AWS_SECRET_ACCESS_KEY=minioadmin aws --endpoint-url=http://minio.minio.svc.cluster.local:9000 s3 ls s3://mlflow/"
|
||||
```
|
||||
|
||||
### Model Not Found
|
||||
|
||||
```bash
|
||||
# Verify the model exists in MinIO Console
|
||||
# Access MinIO Console at the configured MINIO_HOST
|
||||
# Navigate to mlflow bucket and verify the model path
|
||||
# The path should be: EXPERIMENT_ID/models/MODEL_ID/artifacts/
|
||||
|
||||
# Example: 2/models/m-28620b840353444385fa8e62335decf5/artifacts/
|
||||
```
|
||||
|
||||
### Prediction Errors
|
||||
|
||||
```bash
|
||||
# Check model format and KServe runtime compatibility
|
||||
kubectl logs -l serving.kserve.io/inferenceservice=iris-classifier -n kserve
|
||||
```
|
||||
|
||||
## Cleanup
|
||||
|
||||
```bash
|
||||
# Delete InferenceService
|
||||
kubectl delete inferenceservice iris-classifier -n kserve
|
||||
|
||||
# Delete test job
|
||||
kubectl delete job test-iris-inference -n kserve
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
- Try different models (XGBoost, TensorFlow, PyTorch)
|
||||
- Add model versioning and A/B testing
|
||||
- Implement canary deployments
|
||||
- Add monitoring and observability
|
||||
- Scale the InferenceService based on load
|
||||
|
||||
## References
|
||||
|
||||
- [KServe Documentation](https://kserve.github.io/website/)
|
||||
- [MLflow Documentation](https://mlflow.org/docs/latest/index.html)
|
||||
- [KServe Model Serving](https://kserve.github.io/website/latest/modelserving/v1beta1/sklearn/v2/)
|
||||
Reference in New Issue
Block a user