313 lines
12 KiB
Plaintext
313 lines
12 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Test Iris Classifier InferenceService\n",
|
|
"\n",
|
|
"This notebook demonstrates how to call the deployed KServe InferenceService from JupyterHub."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 1. Setup"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Install requests if not already installed\n",
|
|
"!pip install requests -q"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import requests\n",
|
|
"import json\n",
|
|
"import os"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 2. Configure Endpoint\n",
|
|
"\n",
|
|
"The InferenceService is accessible via the cluster-internal service URL."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": "# KServe InferenceService endpoint\n# Format: http://<service-name>-predictor.<namespace>.svc.cluster.local/v2/models/<model-name>/infer\nINFERENCE_SERVICE_NAME = \"iris-classifier\"\nNAMESPACE = \"kserve\"\nENDPOINT = f\"http://{INFERENCE_SERVICE_NAME}-predictor.{NAMESPACE}.svc.cluster.local/v2/models/{INFERENCE_SERVICE_NAME}/infer\"\n\nprint(f\"Inference Endpoint: {ENDPOINT}\")"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 3. Define Test Samples\n",
|
|
"\n",
|
|
"Iris dataset has 4 features:\n",
|
|
"1. Sepal length (cm)\n",
|
|
"2. Sepal width (cm)\n",
|
|
"3. Petal length (cm)\n",
|
|
"4. Petal width (cm)\n",
|
|
"\n",
|
|
"Classes:\n",
|
|
"- 0: Iris Setosa\n",
|
|
"- 1: Iris Versicolor\n",
|
|
"- 2: Iris Virginica"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Define class names\n",
|
|
"CLASS_NAMES = [\"Iris Setosa\", \"Iris Versicolor\", \"Iris Virginica\"]\n",
|
|
"\n",
|
|
"# Test samples with expected predictions\n",
|
|
"test_cases = [\n",
|
|
" {\n",
|
|
" \"name\": \"Typical Setosa\",\n",
|
|
" \"features\": [5.1, 3.5, 1.4, 0.2],\n",
|
|
" \"expected_class\": 0,\n",
|
|
" \"description\": \"Short petals, typical of Setosa\"\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"name\": \"Typical Virginica\",\n",
|
|
" \"features\": [6.7, 3.0, 5.2, 2.3],\n",
|
|
" \"expected_class\": 2,\n",
|
|
" \"description\": \"Long petals and sepals, typical of Virginica\"\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"name\": \"Typical Versicolor\",\n",
|
|
" \"features\": [5.9, 3.0, 4.2, 1.5],\n",
|
|
" \"expected_class\": 1,\n",
|
|
" \"description\": \"Medium-sized features, typical of Versicolor\"\n",
|
|
" },\n",
|
|
"]\n",
|
|
"\n",
|
|
"print(f\"Prepared {len(test_cases)} test cases\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 4. Test Single Prediction\n",
|
|
"\n",
|
|
"Send a single prediction request to the InferenceService."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": "def predict(features):\n \"\"\"\n Send prediction request to KServe InferenceService using v2 protocol.\n \n Args:\n features: List of feature values [sepal_length, sepal_width, petal_length, petal_width]\n \n Returns:\n Predicted class (0, 1, or 2)\n \"\"\"\n payload = {\n \"inputs\": [\n {\n \"name\": \"input-0\",\n \"shape\": [1, 4],\n \"datatype\": \"FP64\",\n \"data\": [features]\n }\n ]\n }\n \n try:\n response = requests.post(ENDPOINT, json=payload, timeout=10)\n response.raise_for_status()\n result = response.json()\n return result['outputs'][0]['data'][0]\n except requests.exceptions.RequestException as e:\n print(f\"Error: {e}\")\n if hasattr(e, 'response') and hasattr(e.response, 'text'):\n print(f\"Response: {e.response.text}\")\n return None\n\n# Test with first sample\nsample = test_cases[0]\nprint(f\"Testing: {sample['name']}\")\nprint(f\"Features: {sample['features']}\")\nprint(f\"Description: {sample['description']}\")\nprint()\n\nprediction = predict(sample['features'])\nif prediction is not None:\n print(f\"Predicted Class: {prediction} ({CLASS_NAMES[prediction]})\")\n print(f\"Expected Class: {sample['expected_class']} ({CLASS_NAMES[sample['expected_class']]})\")\n print(f\"Result: {'✓ PASS' if prediction == sample['expected_class'] else '✗ FAIL'}\")"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 5. Test All Cases\n",
|
|
"\n",
|
|
"Run predictions for all test cases and display results."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(\"=\" * 80)\n",
|
|
"print(\"Testing Iris Classifier InferenceService\")\n",
|
|
"print(\"=\" * 80)\n",
|
|
"print()\n",
|
|
"\n",
|
|
"results = []\n",
|
|
"\n",
|
|
"for i, test_case in enumerate(test_cases, 1):\n",
|
|
" print(f\"Test Case {i}: {test_case['name']}\")\n",
|
|
" print(f\" Features: {test_case['features']}\")\n",
|
|
" print(f\" Description: {test_case['description']}\")\n",
|
|
" print(f\" Expected: {CLASS_NAMES[test_case['expected_class']]}\")\n",
|
|
" \n",
|
|
" prediction = predict(test_case['features'])\n",
|
|
" \n",
|
|
" if prediction is not None:\n",
|
|
" predicted_class_name = CLASS_NAMES[prediction]\n",
|
|
" is_correct = prediction == test_case['expected_class']\n",
|
|
" status = \"✓ PASS\" if is_correct else \"✗ FAIL\"\n",
|
|
" \n",
|
|
" print(f\" Predicted: {predicted_class_name}\")\n",
|
|
" print(f\" Status: {status}\")\n",
|
|
" \n",
|
|
" results.append({\n",
|
|
" 'name': test_case['name'],\n",
|
|
" 'expected': test_case['expected_class'],\n",
|
|
" 'predicted': prediction,\n",
|
|
" 'correct': is_correct\n",
|
|
" })\n",
|
|
" else:\n",
|
|
" print(f\" Status: ✗ ERROR\")\n",
|
|
" results.append({\n",
|
|
" 'name': test_case['name'],\n",
|
|
" 'expected': test_case['expected_class'],\n",
|
|
" 'predicted': None,\n",
|
|
" 'correct': False\n",
|
|
" })\n",
|
|
" \n",
|
|
" print()\n",
|
|
"\n",
|
|
"# Summary\n",
|
|
"print(\"=\" * 80)\n",
|
|
"passed = sum(1 for r in results if r['correct'])\n",
|
|
"total = len(results)\n",
|
|
"print(f\"Test Summary: {passed}/{total} passed\")\n",
|
|
"print(\"=\" * 80)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 6. Batch Prediction\n",
|
|
"\n",
|
|
"Send multiple samples in a single request for batch prediction."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": "def predict_batch(features_list):\n \"\"\"\n Send batch prediction request to KServe InferenceService using v2 protocol.\n \n Args:\n features_list: List of feature arrays\n \n Returns:\n List of predicted classes\n \"\"\"\n payload = {\n \"inputs\": [\n {\n \"name\": \"input-0\",\n \"shape\": [len(features_list), 4],\n \"datatype\": \"FP64\",\n \"data\": features_list\n }\n ]\n }\n \n try:\n response = requests.post(ENDPOINT, json=payload, timeout=10)\n response.raise_for_status()\n result = response.json()\n return result['outputs'][0]['data']\n except requests.exceptions.RequestException as e:\n print(f\"Error: {e}\")\n if hasattr(e, 'response') and hasattr(e.response, 'text'):\n print(f\"Response: {e.response.text}\")\n return None\n\n# Prepare batch request\nbatch_features = [tc['features'] for tc in test_cases]\nprint(f\"Sending batch request with {len(batch_features)} samples...\")\nprint()\n\n# Send batch request\npredictions = predict_batch(batch_features)\n\nif predictions:\n print(\"Batch Prediction Results:\")\n print(\"-\" * 60)\n for i, (test_case, prediction) in enumerate(zip(test_cases, predictions), 1):\n print(f\"{i}. {test_case['name']}\")\n print(f\" Predicted: {CLASS_NAMES[prediction]}\")\n print(f\" Expected: {CLASS_NAMES[test_case['expected_class']]}\")\n status = \"✓\" if prediction == test_case['expected_class'] else \"✗\"\n print(f\" {status}\")\n print()"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 7. Custom Prediction\n",
|
|
"\n",
|
|
"Try your own input values!"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Enter your own values here\n",
|
|
"# Format: [sepal_length, sepal_width, petal_length, petal_width]\n",
|
|
"custom_features = [6.0, 3.0, 4.0, 1.5]\n",
|
|
"\n",
|
|
"print(f\"Custom Input: {custom_features}\")\n",
|
|
"print(f\" Sepal Length: {custom_features[0]} cm\")\n",
|
|
"print(f\" Sepal Width: {custom_features[1]} cm\")\n",
|
|
"print(f\" Petal Length: {custom_features[2]} cm\")\n",
|
|
"print(f\" Petal Width: {custom_features[3]} cm\")\n",
|
|
"print()\n",
|
|
"\n",
|
|
"prediction = predict(custom_features)\n",
|
|
"if prediction is not None:\n",
|
|
" print(f\"Prediction: {CLASS_NAMES[prediction]} (class {prediction})\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 8. Check InferenceService Status\n",
|
|
"\n",
|
|
"Verify the InferenceService is running properly."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Check if we can reach the endpoint\n",
|
|
"import subprocess\n",
|
|
"\n",
|
|
"print(\"Checking InferenceService status...\")\n",
|
|
"print()\n",
|
|
"\n",
|
|
"# Using kubectl from the notebook\n",
|
|
"try:\n",
|
|
" result = subprocess.run(\n",
|
|
" [\"kubectl\", \"get\", \"inferenceservice\", INFERENCE_SERVICE_NAME, \"-n\", NAMESPACE],\n",
|
|
" capture_output=True,\n",
|
|
" text=True,\n",
|
|
" timeout=10\n",
|
|
" )\n",
|
|
" print(result.stdout)\n",
|
|
" if result.returncode != 0:\n",
|
|
" print(result.stderr)\n",
|
|
"except Exception as e:\n",
|
|
" print(f\"Could not check status: {e}\")\n",
|
|
" print(\"This is normal if kubectl is not available in the notebook environment.\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Summary\n",
|
|
"\n",
|
|
"You have successfully:\n",
|
|
"1. ✅ Connected to the KServe InferenceService\n",
|
|
"2. ✅ Sent single prediction requests\n",
|
|
"3. ✅ Sent batch prediction requests\n",
|
|
"4. ✅ Verified predictions against expected results\n",
|
|
"\n",
|
|
"## Next Steps\n",
|
|
"\n",
|
|
"- Try running the Kubernetes Job-based tests (see `04-test-inference-job.yaml`)\n",
|
|
"- Deploy a new version of the model and compare predictions\n",
|
|
"- Implement A/B testing with multiple model versions\n",
|
|
"- Add monitoring and logging"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
} |