202 lines
6.1 KiB
Plaintext
202 lines
6.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Iris Classification with MLflow and KServe\n",
|
|
"\n",
|
|
"This notebook demonstrates:\n",
|
|
"1. Training a simple scikit-learn model on the Iris dataset\n",
|
|
"2. Logging the model to MLflow\n",
|
|
"3. Preparing the model for deployment with KServe"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 1. Setup and Install Dependencies"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Install required packages\n",
|
|
"!pip install mlflow scikit-learn boto3 -q"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import mlflow\n",
|
|
"import mlflow.sklearn\n",
|
|
"from sklearn.datasets import load_iris\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"from sklearn.metrics import accuracy_score, classification_report"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": "## 2. Configure MLflow\n\nSet MLflow tracking URI and authentication credentials."
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": "# MLflow configuration\nMLFLOW_TRACKING_URI = os.getenv('MLFLOW_TRACKING_URI', 'http://mlflow.mlflow.svc.cluster.local')\nmlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n\n# MLflow authentication\nos.environ['MLFLOW_TRACKING_USERNAME'] = os.getenv('MLFLOW_TRACKING_USERNAME', '')\nos.environ['MLFLOW_TRACKING_PASSWORD'] = os.getenv('MLFLOW_TRACKING_PASSWORD', '')\n\nprint(f\"MLflow Tracking URI: {MLFLOW_TRACKING_URI}\")\nprint(f\"MLflow Username: {os.environ['MLFLOW_TRACKING_USERNAME']}\")"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 3. Load and Prepare Data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Load Iris dataset\n",
|
|
"iris = load_iris()\n",
|
|
"X = iris.data\n",
|
|
"y = iris.target\n",
|
|
"\n",
|
|
"# Split data\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
|
"\n",
|
|
"print(f\"Training samples: {len(X_train)}\")\n",
|
|
"print(f\"Test samples: {len(X_test)}\")\n",
|
|
"print(f\"Classes: {iris.target_names}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 4. Train Model with MLflow Tracking"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Set experiment\n",
|
|
"experiment_name = \"iris-classification\"\n",
|
|
"mlflow.set_experiment(experiment_name)\n",
|
|
"\n",
|
|
"# Start MLflow run\n",
|
|
"with mlflow.start_run(run_name=\"logistic-regression\") as run:\n",
|
|
" # Train model\n",
|
|
" model = LogisticRegression(max_iter=200, random_state=42)\n",
|
|
" model.fit(X_train, y_train)\n",
|
|
" \n",
|
|
" # Predictions\n",
|
|
" y_pred = model.predict(X_test)\n",
|
|
" accuracy = accuracy_score(y_test, y_pred)\n",
|
|
" \n",
|
|
" # Log parameters\n",
|
|
" mlflow.log_param(\"model_type\", \"LogisticRegression\")\n",
|
|
" mlflow.log_param(\"max_iter\", 200)\n",
|
|
" \n",
|
|
" # Log metrics\n",
|
|
" mlflow.log_metric(\"accuracy\", accuracy)\n",
|
|
" \n",
|
|
" # Log model\n",
|
|
" mlflow.sklearn.log_model(\n",
|
|
" model, \n",
|
|
" \"model\",\n",
|
|
" registered_model_name=\"iris-classifier\"\n",
|
|
" )\n",
|
|
" \n",
|
|
" print(f\"\\nRun ID: {run.info.run_id}\")\n",
|
|
" print(f\"Accuracy: {accuracy:.4f}\")\n",
|
|
" print(f\"\\nClassification Report:\")\n",
|
|
" print(classification_report(y_test, y_pred, target_names=iris.target_names))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 5. Get Model Information for KServe Deployment"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": "# Get the latest version of the registered model\nclient = mlflow.tracking.MlflowClient()\nmodel_name = \"iris-classifier\"\n\n# Use search_model_versions instead of deprecated get_latest_versions\nmodel_versions = client.search_model_versions(f\"name='{model_name}'\")\nlatest_version = max(model_versions, key=lambda x: int(x.version))\n\nprint(f\"\\n=== Model Information for KServe ===\")\nprint(f\"Model Name: {model_name}\")\nprint(f\"Version: {latest_version.version}\")\nprint(f\"Run ID: {latest_version.run_id}\")\nprint(f\"\\nArtifact URI: {latest_version.source}\")\nprint(f\"\\nUse this information to configure the KServe InferenceService.\")"
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 6. Test Local Prediction"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Test with a sample\n",
|
|
"sample_input = [[5.1, 3.5, 1.4, 0.2]] # Should predict 'setosa'\n",
|
|
"prediction = model.predict(sample_input)\n",
|
|
"predicted_class = iris.target_names[prediction[0]]\n",
|
|
"\n",
|
|
"print(f\"\\nTest Input: {sample_input[0]}\")\n",
|
|
"print(f\"Predicted Class: {predicted_class}\")\n",
|
|
"print(f\"\\nThis sample will be used to test the KServe deployment.\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Next Steps\n",
|
|
"\n",
|
|
"1. Note the Model Name and Version from above\n",
|
|
"2. Deploy the model using KServe with the InferenceService YAML\n",
|
|
"3. Test the deployed model endpoint"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
} |