feat(kserve): install KServe

This commit is contained in:
Masaki Yatsu
2025-11-10 21:31:35 +09:00
parent 27de65dd37
commit 2b0687330c
14 changed files with 1974 additions and 0 deletions

View File

@@ -0,0 +1,313 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test Iris Classifier InferenceService\n",
"\n",
"This notebook demonstrates how to call the deployed KServe InferenceService from JupyterHub."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Install requests if not already installed\n",
"!pip install requests -q"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"import os"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Configure Endpoint\n",
"\n",
"The InferenceService is accessible via the cluster-internal service URL."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# KServe InferenceService endpoint\n# Format: http://<service-name>-predictor.<namespace>.svc.cluster.local/v2/models/<model-name>/infer\nINFERENCE_SERVICE_NAME = \"iris-classifier\"\nNAMESPACE = \"kserve\"\nENDPOINT = f\"http://{INFERENCE_SERVICE_NAME}-predictor.{NAMESPACE}.svc.cluster.local/v2/models/{INFERENCE_SERVICE_NAME}/infer\"\n\nprint(f\"Inference Endpoint: {ENDPOINT}\")"
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Define Test Samples\n",
"\n",
"Iris dataset has 4 features:\n",
"1. Sepal length (cm)\n",
"2. Sepal width (cm)\n",
"3. Petal length (cm)\n",
"4. Petal width (cm)\n",
"\n",
"Classes:\n",
"- 0: Iris Setosa\n",
"- 1: Iris Versicolor\n",
"- 2: Iris Virginica"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Define class names\n",
"CLASS_NAMES = [\"Iris Setosa\", \"Iris Versicolor\", \"Iris Virginica\"]\n",
"\n",
"# Test samples with expected predictions\n",
"test_cases = [\n",
" {\n",
" \"name\": \"Typical Setosa\",\n",
" \"features\": [5.1, 3.5, 1.4, 0.2],\n",
" \"expected_class\": 0,\n",
" \"description\": \"Short petals, typical of Setosa\"\n",
" },\n",
" {\n",
" \"name\": \"Typical Virginica\",\n",
" \"features\": [6.7, 3.0, 5.2, 2.3],\n",
" \"expected_class\": 2,\n",
" \"description\": \"Long petals and sepals, typical of Virginica\"\n",
" },\n",
" {\n",
" \"name\": \"Typical Versicolor\",\n",
" \"features\": [5.9, 3.0, 4.2, 1.5],\n",
" \"expected_class\": 1,\n",
" \"description\": \"Medium-sized features, typical of Versicolor\"\n",
" },\n",
"]\n",
"\n",
"print(f\"Prepared {len(test_cases)} test cases\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. Test Single Prediction\n",
"\n",
"Send a single prediction request to the InferenceService."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "def predict(features):\n \"\"\"\n Send prediction request to KServe InferenceService using v2 protocol.\n \n Args:\n features: List of feature values [sepal_length, sepal_width, petal_length, petal_width]\n \n Returns:\n Predicted class (0, 1, or 2)\n \"\"\"\n payload = {\n \"inputs\": [\n {\n \"name\": \"input-0\",\n \"shape\": [1, 4],\n \"datatype\": \"FP64\",\n \"data\": [features]\n }\n ]\n }\n \n try:\n response = requests.post(ENDPOINT, json=payload, timeout=10)\n response.raise_for_status()\n result = response.json()\n return result['outputs'][0]['data'][0]\n except requests.exceptions.RequestException as e:\n print(f\"Error: {e}\")\n if hasattr(e, 'response') and hasattr(e.response, 'text'):\n print(f\"Response: {e.response.text}\")\n return None\n\n# Test with first sample\nsample = test_cases[0]\nprint(f\"Testing: {sample['name']}\")\nprint(f\"Features: {sample['features']}\")\nprint(f\"Description: {sample['description']}\")\nprint()\n\nprediction = predict(sample['features'])\nif prediction is not None:\n print(f\"Predicted Class: {prediction} ({CLASS_NAMES[prediction]})\")\n print(f\"Expected Class: {sample['expected_class']} ({CLASS_NAMES[sample['expected_class']]})\")\n print(f\"Result: {'✓ PASS' if prediction == sample['expected_class'] else '✗ FAIL'}\")"
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 5. Test All Cases\n",
"\n",
"Run predictions for all test cases and display results."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"=\" * 80)\n",
"print(\"Testing Iris Classifier InferenceService\")\n",
"print(\"=\" * 80)\n",
"print()\n",
"\n",
"results = []\n",
"\n",
"for i, test_case in enumerate(test_cases, 1):\n",
" print(f\"Test Case {i}: {test_case['name']}\")\n",
" print(f\" Features: {test_case['features']}\")\n",
" print(f\" Description: {test_case['description']}\")\n",
" print(f\" Expected: {CLASS_NAMES[test_case['expected_class']]}\")\n",
" \n",
" prediction = predict(test_case['features'])\n",
" \n",
" if prediction is not None:\n",
" predicted_class_name = CLASS_NAMES[prediction]\n",
" is_correct = prediction == test_case['expected_class']\n",
" status = \"✓ PASS\" if is_correct else \"✗ FAIL\"\n",
" \n",
" print(f\" Predicted: {predicted_class_name}\")\n",
" print(f\" Status: {status}\")\n",
" \n",
" results.append({\n",
" 'name': test_case['name'],\n",
" 'expected': test_case['expected_class'],\n",
" 'predicted': prediction,\n",
" 'correct': is_correct\n",
" })\n",
" else:\n",
" print(f\" Status: ✗ ERROR\")\n",
" results.append({\n",
" 'name': test_case['name'],\n",
" 'expected': test_case['expected_class'],\n",
" 'predicted': None,\n",
" 'correct': False\n",
" })\n",
" \n",
" print()\n",
"\n",
"# Summary\n",
"print(\"=\" * 80)\n",
"passed = sum(1 for r in results if r['correct'])\n",
"total = len(results)\n",
"print(f\"Test Summary: {passed}/{total} passed\")\n",
"print(\"=\" * 80)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6. Batch Prediction\n",
"\n",
"Send multiple samples in a single request for batch prediction."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "def predict_batch(features_list):\n \"\"\"\n Send batch prediction request to KServe InferenceService using v2 protocol.\n \n Args:\n features_list: List of feature arrays\n \n Returns:\n List of predicted classes\n \"\"\"\n payload = {\n \"inputs\": [\n {\n \"name\": \"input-0\",\n \"shape\": [len(features_list), 4],\n \"datatype\": \"FP64\",\n \"data\": features_list\n }\n ]\n }\n \n try:\n response = requests.post(ENDPOINT, json=payload, timeout=10)\n response.raise_for_status()\n result = response.json()\n return result['outputs'][0]['data']\n except requests.exceptions.RequestException as e:\n print(f\"Error: {e}\")\n if hasattr(e, 'response') and hasattr(e.response, 'text'):\n print(f\"Response: {e.response.text}\")\n return None\n\n# Prepare batch request\nbatch_features = [tc['features'] for tc in test_cases]\nprint(f\"Sending batch request with {len(batch_features)} samples...\")\nprint()\n\n# Send batch request\npredictions = predict_batch(batch_features)\n\nif predictions:\n print(\"Batch Prediction Results:\")\n print(\"-\" * 60)\n for i, (test_case, prediction) in enumerate(zip(test_cases, predictions), 1):\n print(f\"{i}. {test_case['name']}\")\n print(f\" Predicted: {CLASS_NAMES[prediction]}\")\n print(f\" Expected: {CLASS_NAMES[test_case['expected_class']]}\")\n status = \"✓\" if prediction == test_case['expected_class'] else \"✗\"\n print(f\" {status}\")\n print()"
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 7. Custom Prediction\n",
"\n",
"Try your own input values!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Enter your own values here\n",
"# Format: [sepal_length, sepal_width, petal_length, petal_width]\n",
"custom_features = [6.0, 3.0, 4.0, 1.5]\n",
"\n",
"print(f\"Custom Input: {custom_features}\")\n",
"print(f\" Sepal Length: {custom_features[0]} cm\")\n",
"print(f\" Sepal Width: {custom_features[1]} cm\")\n",
"print(f\" Petal Length: {custom_features[2]} cm\")\n",
"print(f\" Petal Width: {custom_features[3]} cm\")\n",
"print()\n",
"\n",
"prediction = predict(custom_features)\n",
"if prediction is not None:\n",
" print(f\"Prediction: {CLASS_NAMES[prediction]} (class {prediction})\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 8. Check InferenceService Status\n",
"\n",
"Verify the InferenceService is running properly."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check if we can reach the endpoint\n",
"import subprocess\n",
"\n",
"print(\"Checking InferenceService status...\")\n",
"print()\n",
"\n",
"# Using kubectl from the notebook\n",
"try:\n",
" result = subprocess.run(\n",
" [\"kubectl\", \"get\", \"inferenceservice\", INFERENCE_SERVICE_NAME, \"-n\", NAMESPACE],\n",
" capture_output=True,\n",
" text=True,\n",
" timeout=10\n",
" )\n",
" print(result.stdout)\n",
" if result.returncode != 0:\n",
" print(result.stderr)\n",
"except Exception as e:\n",
" print(f\"Could not check status: {e}\")\n",
" print(\"This is normal if kubectl is not available in the notebook environment.\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Summary\n",
"\n",
"You have successfully:\n",
"1. ✅ Connected to the KServe InferenceService\n",
"2. ✅ Sent single prediction requests\n",
"3. ✅ Sent batch prediction requests\n",
"4. ✅ Verified predictions against expected results\n",
"\n",
"## Next Steps\n",
"\n",
"- Try running the Kubernetes Job-based tests (see `04-test-inference-job.yaml`)\n",
"- Deploy a new version of the model and compare predictions\n",
"- Implement A/B testing with multiple model versions\n",
"- Add monitoring and logging"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}