Files
buun-stack/examples/kserve-mlflow-iris/01-train-and-register.ipynb
2025-11-10 21:31:35 +09:00

202 lines
6.1 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Iris Classification with MLflow and KServe\n",
"\n",
"This notebook demonstrates:\n",
"1. Training a simple scikit-learn model on the Iris dataset\n",
"2. Logging the model to MLflow\n",
"3. Preparing the model for deployment with KServe"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Setup and Install Dependencies"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Install required packages\n",
"!pip install mlflow scikit-learn boto3 -q"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import mlflow\n",
"import mlflow.sklearn\n",
"from sklearn.datasets import load_iris\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import accuracy_score, classification_report"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": "## 2. Configure MLflow\n\nSet MLflow tracking URI and authentication credentials."
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# MLflow configuration\nMLFLOW_TRACKING_URI = os.getenv('MLFLOW_TRACKING_URI', 'http://mlflow.mlflow.svc.cluster.local')\nmlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n\n# MLflow authentication\nos.environ['MLFLOW_TRACKING_USERNAME'] = os.getenv('MLFLOW_TRACKING_USERNAME', '')\nos.environ['MLFLOW_TRACKING_PASSWORD'] = os.getenv('MLFLOW_TRACKING_PASSWORD', '')\n\nprint(f\"MLflow Tracking URI: {MLFLOW_TRACKING_URI}\")\nprint(f\"MLflow Username: {os.environ['MLFLOW_TRACKING_USERNAME']}\")"
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Load and Prepare Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load Iris dataset\n",
"iris = load_iris()\n",
"X = iris.data\n",
"y = iris.target\n",
"\n",
"# Split data\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"print(f\"Training samples: {len(X_train)}\")\n",
"print(f\"Test samples: {len(X_test)}\")\n",
"print(f\"Classes: {iris.target_names}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. Train Model with MLflow Tracking"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Set experiment\n",
"experiment_name = \"iris-classification\"\n",
"mlflow.set_experiment(experiment_name)\n",
"\n",
"# Start MLflow run\n",
"with mlflow.start_run(run_name=\"logistic-regression\") as run:\n",
" # Train model\n",
" model = LogisticRegression(max_iter=200, random_state=42)\n",
" model.fit(X_train, y_train)\n",
" \n",
" # Predictions\n",
" y_pred = model.predict(X_test)\n",
" accuracy = accuracy_score(y_test, y_pred)\n",
" \n",
" # Log parameters\n",
" mlflow.log_param(\"model_type\", \"LogisticRegression\")\n",
" mlflow.log_param(\"max_iter\", 200)\n",
" \n",
" # Log metrics\n",
" mlflow.log_metric(\"accuracy\", accuracy)\n",
" \n",
" # Log model\n",
" mlflow.sklearn.log_model(\n",
" model, \n",
" \"model\",\n",
" registered_model_name=\"iris-classifier\"\n",
" )\n",
" \n",
" print(f\"\\nRun ID: {run.info.run_id}\")\n",
" print(f\"Accuracy: {accuracy:.4f}\")\n",
" print(f\"\\nClassification Report:\")\n",
" print(classification_report(y_test, y_pred, target_names=iris.target_names))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 5. Get Model Information for KServe Deployment"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Get the latest version of the registered model\nclient = mlflow.tracking.MlflowClient()\nmodel_name = \"iris-classifier\"\n\n# Use search_model_versions instead of deprecated get_latest_versions\nmodel_versions = client.search_model_versions(f\"name='{model_name}'\")\nlatest_version = max(model_versions, key=lambda x: int(x.version))\n\nprint(f\"\\n=== Model Information for KServe ===\")\nprint(f\"Model Name: {model_name}\")\nprint(f\"Version: {latest_version.version}\")\nprint(f\"Run ID: {latest_version.run_id}\")\nprint(f\"\\nArtifact URI: {latest_version.source}\")\nprint(f\"\\nUse this information to configure the KServe InferenceService.\")"
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6. Test Local Prediction"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Test with a sample\n",
"sample_input = [[5.1, 3.5, 1.4, 0.2]] # Should predict 'setosa'\n",
"prediction = model.predict(sample_input)\n",
"predicted_class = iris.target_names[prediction[0]]\n",
"\n",
"print(f\"\\nTest Input: {sample_input[0]}\")\n",
"print(f\"Predicted Class: {predicted_class}\")\n",
"print(f\"\\nThis sample will be used to test the KServe deployment.\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Next Steps\n",
"\n",
"1. Note the Model Name and Version from above\n",
"2. Deploy the model using KServe with the InferenceService YAML\n",
"3. Test the deployed model endpoint"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}