feat(kserve): install KServe
This commit is contained in:
313
examples/kserve-mlflow-iris/03-test-inference.ipynb
Normal file
313
examples/kserve-mlflow-iris/03-test-inference.ipynb
Normal file
@@ -0,0 +1,313 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test Iris Classifier InferenceService\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to call the deployed KServe InferenceService from JupyterHub."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install requests if not already installed\n",
|
||||
"!pip install requests -q"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Configure Endpoint\n",
|
||||
"\n",
|
||||
"The InferenceService is accessible via the cluster-internal service URL."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "# KServe InferenceService endpoint\n# Format: http://<service-name>-predictor.<namespace>.svc.cluster.local/v2/models/<model-name>/infer\nINFERENCE_SERVICE_NAME = \"iris-classifier\"\nNAMESPACE = \"kserve\"\nENDPOINT = f\"http://{INFERENCE_SERVICE_NAME}-predictor.{NAMESPACE}.svc.cluster.local/v2/models/{INFERENCE_SERVICE_NAME}/infer\"\n\nprint(f\"Inference Endpoint: {ENDPOINT}\")"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Define Test Samples\n",
|
||||
"\n",
|
||||
"Iris dataset has 4 features:\n",
|
||||
"1. Sepal length (cm)\n",
|
||||
"2. Sepal width (cm)\n",
|
||||
"3. Petal length (cm)\n",
|
||||
"4. Petal width (cm)\n",
|
||||
"\n",
|
||||
"Classes:\n",
|
||||
"- 0: Iris Setosa\n",
|
||||
"- 1: Iris Versicolor\n",
|
||||
"- 2: Iris Virginica"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define class names\n",
|
||||
"CLASS_NAMES = [\"Iris Setosa\", \"Iris Versicolor\", \"Iris Virginica\"]\n",
|
||||
"\n",
|
||||
"# Test samples with expected predictions\n",
|
||||
"test_cases = [\n",
|
||||
" {\n",
|
||||
" \"name\": \"Typical Setosa\",\n",
|
||||
" \"features\": [5.1, 3.5, 1.4, 0.2],\n",
|
||||
" \"expected_class\": 0,\n",
|
||||
" \"description\": \"Short petals, typical of Setosa\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"Typical Virginica\",\n",
|
||||
" \"features\": [6.7, 3.0, 5.2, 2.3],\n",
|
||||
" \"expected_class\": 2,\n",
|
||||
" \"description\": \"Long petals and sepals, typical of Virginica\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"Typical Versicolor\",\n",
|
||||
" \"features\": [5.9, 3.0, 4.2, 1.5],\n",
|
||||
" \"expected_class\": 1,\n",
|
||||
" \"description\": \"Medium-sized features, typical of Versicolor\"\n",
|
||||
" },\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"print(f\"Prepared {len(test_cases)} test cases\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Test Single Prediction\n",
|
||||
"\n",
|
||||
"Send a single prediction request to the InferenceService."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "def predict(features):\n \"\"\"\n Send prediction request to KServe InferenceService using v2 protocol.\n \n Args:\n features: List of feature values [sepal_length, sepal_width, petal_length, petal_width]\n \n Returns:\n Predicted class (0, 1, or 2)\n \"\"\"\n payload = {\n \"inputs\": [\n {\n \"name\": \"input-0\",\n \"shape\": [1, 4],\n \"datatype\": \"FP64\",\n \"data\": [features]\n }\n ]\n }\n \n try:\n response = requests.post(ENDPOINT, json=payload, timeout=10)\n response.raise_for_status()\n result = response.json()\n return result['outputs'][0]['data'][0]\n except requests.exceptions.RequestException as e:\n print(f\"Error: {e}\")\n if hasattr(e, 'response') and hasattr(e.response, 'text'):\n print(f\"Response: {e.response.text}\")\n return None\n\n# Test with first sample\nsample = test_cases[0]\nprint(f\"Testing: {sample['name']}\")\nprint(f\"Features: {sample['features']}\")\nprint(f\"Description: {sample['description']}\")\nprint()\n\nprediction = predict(sample['features'])\nif prediction is not None:\n print(f\"Predicted Class: {prediction} ({CLASS_NAMES[prediction]})\")\n print(f\"Expected Class: {sample['expected_class']} ({CLASS_NAMES[sample['expected_class']]})\")\n print(f\"Result: {'✓ PASS' if prediction == sample['expected_class'] else '✗ FAIL'}\")"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Test All Cases\n",
|
||||
"\n",
|
||||
"Run predictions for all test cases and display results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"=\" * 80)\n",
|
||||
"print(\"Testing Iris Classifier InferenceService\")\n",
|
||||
"print(\"=\" * 80)\n",
|
||||
"print()\n",
|
||||
"\n",
|
||||
"results = []\n",
|
||||
"\n",
|
||||
"for i, test_case in enumerate(test_cases, 1):\n",
|
||||
" print(f\"Test Case {i}: {test_case['name']}\")\n",
|
||||
" print(f\" Features: {test_case['features']}\")\n",
|
||||
" print(f\" Description: {test_case['description']}\")\n",
|
||||
" print(f\" Expected: {CLASS_NAMES[test_case['expected_class']]}\")\n",
|
||||
" \n",
|
||||
" prediction = predict(test_case['features'])\n",
|
||||
" \n",
|
||||
" if prediction is not None:\n",
|
||||
" predicted_class_name = CLASS_NAMES[prediction]\n",
|
||||
" is_correct = prediction == test_case['expected_class']\n",
|
||||
" status = \"✓ PASS\" if is_correct else \"✗ FAIL\"\n",
|
||||
" \n",
|
||||
" print(f\" Predicted: {predicted_class_name}\")\n",
|
||||
" print(f\" Status: {status}\")\n",
|
||||
" \n",
|
||||
" results.append({\n",
|
||||
" 'name': test_case['name'],\n",
|
||||
" 'expected': test_case['expected_class'],\n",
|
||||
" 'predicted': prediction,\n",
|
||||
" 'correct': is_correct\n",
|
||||
" })\n",
|
||||
" else:\n",
|
||||
" print(f\" Status: ✗ ERROR\")\n",
|
||||
" results.append({\n",
|
||||
" 'name': test_case['name'],\n",
|
||||
" 'expected': test_case['expected_class'],\n",
|
||||
" 'predicted': None,\n",
|
||||
" 'correct': False\n",
|
||||
" })\n",
|
||||
" \n",
|
||||
" print()\n",
|
||||
"\n",
|
||||
"# Summary\n",
|
||||
"print(\"=\" * 80)\n",
|
||||
"passed = sum(1 for r in results if r['correct'])\n",
|
||||
"total = len(results)\n",
|
||||
"print(f\"Test Summary: {passed}/{total} passed\")\n",
|
||||
"print(\"=\" * 80)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Batch Prediction\n",
|
||||
"\n",
|
||||
"Send multiple samples in a single request for batch prediction."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "def predict_batch(features_list):\n \"\"\"\n Send batch prediction request to KServe InferenceService using v2 protocol.\n \n Args:\n features_list: List of feature arrays\n \n Returns:\n List of predicted classes\n \"\"\"\n payload = {\n \"inputs\": [\n {\n \"name\": \"input-0\",\n \"shape\": [len(features_list), 4],\n \"datatype\": \"FP64\",\n \"data\": features_list\n }\n ]\n }\n \n try:\n response = requests.post(ENDPOINT, json=payload, timeout=10)\n response.raise_for_status()\n result = response.json()\n return result['outputs'][0]['data']\n except requests.exceptions.RequestException as e:\n print(f\"Error: {e}\")\n if hasattr(e, 'response') and hasattr(e.response, 'text'):\n print(f\"Response: {e.response.text}\")\n return None\n\n# Prepare batch request\nbatch_features = [tc['features'] for tc in test_cases]\nprint(f\"Sending batch request with {len(batch_features)} samples...\")\nprint()\n\n# Send batch request\npredictions = predict_batch(batch_features)\n\nif predictions:\n print(\"Batch Prediction Results:\")\n print(\"-\" * 60)\n for i, (test_case, prediction) in enumerate(zip(test_cases, predictions), 1):\n print(f\"{i}. {test_case['name']}\")\n print(f\" Predicted: {CLASS_NAMES[prediction]}\")\n print(f\" Expected: {CLASS_NAMES[test_case['expected_class']]}\")\n status = \"✓\" if prediction == test_case['expected_class'] else \"✗\"\n print(f\" {status}\")\n print()"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 7. Custom Prediction\n",
|
||||
"\n",
|
||||
"Try your own input values!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Enter your own values here\n",
|
||||
"# Format: [sepal_length, sepal_width, petal_length, petal_width]\n",
|
||||
"custom_features = [6.0, 3.0, 4.0, 1.5]\n",
|
||||
"\n",
|
||||
"print(f\"Custom Input: {custom_features}\")\n",
|
||||
"print(f\" Sepal Length: {custom_features[0]} cm\")\n",
|
||||
"print(f\" Sepal Width: {custom_features[1]} cm\")\n",
|
||||
"print(f\" Petal Length: {custom_features[2]} cm\")\n",
|
||||
"print(f\" Petal Width: {custom_features[3]} cm\")\n",
|
||||
"print()\n",
|
||||
"\n",
|
||||
"prediction = predict(custom_features)\n",
|
||||
"if prediction is not None:\n",
|
||||
" print(f\"Prediction: {CLASS_NAMES[prediction]} (class {prediction})\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 8. Check InferenceService Status\n",
|
||||
"\n",
|
||||
"Verify the InferenceService is running properly."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check if we can reach the endpoint\n",
|
||||
"import subprocess\n",
|
||||
"\n",
|
||||
"print(\"Checking InferenceService status...\")\n",
|
||||
"print()\n",
|
||||
"\n",
|
||||
"# Using kubectl from the notebook\n",
|
||||
"try:\n",
|
||||
" result = subprocess.run(\n",
|
||||
" [\"kubectl\", \"get\", \"inferenceservice\", INFERENCE_SERVICE_NAME, \"-n\", NAMESPACE],\n",
|
||||
" capture_output=True,\n",
|
||||
" text=True,\n",
|
||||
" timeout=10\n",
|
||||
" )\n",
|
||||
" print(result.stdout)\n",
|
||||
" if result.returncode != 0:\n",
|
||||
" print(result.stderr)\n",
|
||||
"except Exception as e:\n",
|
||||
" print(f\"Could not check status: {e}\")\n",
|
||||
" print(\"This is normal if kubectl is not available in the notebook environment.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Summary\n",
|
||||
"\n",
|
||||
"You have successfully:\n",
|
||||
"1. ✅ Connected to the KServe InferenceService\n",
|
||||
"2. ✅ Sent single prediction requests\n",
|
||||
"3. ✅ Sent batch prediction requests\n",
|
||||
"4. ✅ Verified predictions against expected results\n",
|
||||
"\n",
|
||||
"## Next Steps\n",
|
||||
"\n",
|
||||
"- Try running the Kubernetes Job-based tests (see `04-test-inference-job.yaml`)\n",
|
||||
"- Deploy a new version of the model and compare predictions\n",
|
||||
"- Implement A/B testing with multiple model versions\n",
|
||||
"- Add monitoring and logging"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
Reference in New Issue
Block a user