{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Iris Classification with MLflow and KServe\n", "\n", "This notebook demonstrates:\n", "1. Training a simple scikit-learn model on the Iris dataset\n", "2. Logging the model to MLflow\n", "3. Preparing the model for deployment with KServe" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Setup and Install Dependencies" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Install required packages\n", "!pip install mlflow scikit-learn boto3 -q" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import mlflow\n", "import mlflow.sklearn\n", "from sklearn.datasets import load_iris\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score, classification_report" ] }, { "cell_type": "markdown", "metadata": {}, "source": "## 2. Configure MLflow\n\nSet MLflow tracking URI and authentication credentials." }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# MLflow configuration\nMLFLOW_TRACKING_URI = os.getenv('MLFLOW_TRACKING_URI', 'http://mlflow.mlflow.svc.cluster.local')\nmlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n\n# MLflow authentication\nos.environ['MLFLOW_TRACKING_USERNAME'] = os.getenv('MLFLOW_TRACKING_USERNAME', '')\nos.environ['MLFLOW_TRACKING_PASSWORD'] = os.getenv('MLFLOW_TRACKING_PASSWORD', '')\n\nprint(f\"MLflow Tracking URI: {MLFLOW_TRACKING_URI}\")\nprint(f\"MLflow Username: {os.environ['MLFLOW_TRACKING_USERNAME']}\")" }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Load and Prepare Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load Iris dataset\n", "iris = load_iris()\n", "X = iris.data\n", "y = iris.target\n", "\n", "# Split data\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "print(f\"Training samples: {len(X_train)}\")\n", "print(f\"Test samples: {len(X_test)}\")\n", "print(f\"Classes: {iris.target_names}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Train Model with MLflow Tracking" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Set experiment\n", "experiment_name = \"iris-classification\"\n", "mlflow.set_experiment(experiment_name)\n", "\n", "# Start MLflow run\n", "with mlflow.start_run(run_name=\"logistic-regression\") as run:\n", " # Train model\n", " model = LogisticRegression(max_iter=200, random_state=42)\n", " model.fit(X_train, y_train)\n", " \n", " # Predictions\n", " y_pred = model.predict(X_test)\n", " accuracy = accuracy_score(y_test, y_pred)\n", " \n", " # Log parameters\n", " mlflow.log_param(\"model_type\", \"LogisticRegression\")\n", " mlflow.log_param(\"max_iter\", 200)\n", " \n", " # Log metrics\n", " mlflow.log_metric(\"accuracy\", accuracy)\n", " \n", " # Log model\n", " mlflow.sklearn.log_model(\n", " model, \n", " \"model\",\n", " registered_model_name=\"iris-classifier\"\n", " )\n", " \n", " print(f\"\\nRun ID: {run.info.run_id}\")\n", " print(f\"Accuracy: {accuracy:.4f}\")\n", " print(f\"\\nClassification Report:\")\n", " print(classification_report(y_test, y_pred, target_names=iris.target_names))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. Get Model Information for KServe Deployment" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Get the latest version of the registered model\nclient = mlflow.tracking.MlflowClient()\nmodel_name = \"iris-classifier\"\n\n# Use search_model_versions instead of deprecated get_latest_versions\nmodel_versions = client.search_model_versions(f\"name='{model_name}'\")\nlatest_version = max(model_versions, key=lambda x: int(x.version))\n\nprint(f\"\\n=== Model Information for KServe ===\")\nprint(f\"Model Name: {model_name}\")\nprint(f\"Version: {latest_version.version}\")\nprint(f\"Run ID: {latest_version.run_id}\")\nprint(f\"\\nArtifact URI: {latest_version.source}\")\nprint(f\"\\nUse this information to configure the KServe InferenceService.\")" }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6. Test Local Prediction" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Test with a sample\n", "sample_input = [[5.1, 3.5, 1.4, 0.2]] # Should predict 'setosa'\n", "prediction = model.predict(sample_input)\n", "predicted_class = iris.target_names[prediction[0]]\n", "\n", "print(f\"\\nTest Input: {sample_input[0]}\")\n", "print(f\"Predicted Class: {predicted_class}\")\n", "print(f\"\\nThis sample will be used to test the KServe deployment.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Next Steps\n", "\n", "1. Note the Model Name and Version from above\n", "2. Deploy the model using KServe with the InferenceService YAML\n", "3. Test the deployed model endpoint" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" } }, "nbformat": 4, "nbformat_minor": 4 }