buun-stack/examples/kserve-mlflow-iris/01-train-and-register.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Iris Classification with MLflow and KServe\n",
    "\n",
    "This notebook demonstrates:\n",
    "1. Training a simple scikit-learn model on the Iris dataset\n",
    "2. Logging the model to MLflow\n",
    "3. Preparing the model for deployment with KServe"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Setup and Install Dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Install required packages\n",
    "!pip install mlflow scikit-learn boto3 -q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import mlflow\n",
    "import mlflow.sklearn\n",
    "from sklearn.datasets import load_iris\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import accuracy_score, classification_report"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": "## 2. Configure MLflow\n\nSet MLflow tracking URI and authentication credentials."
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": "# MLflow configuration\nMLFLOW_TRACKING_URI = os.getenv('MLFLOW_TRACKING_URI', 'http://mlflow.mlflow.svc.cluster.local')\nmlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n\n# MLflow authentication\nos.environ['MLFLOW_TRACKING_USERNAME'] = os.getenv('MLFLOW_TRACKING_USERNAME', '')\nos.environ['MLFLOW_TRACKING_PASSWORD'] = os.getenv('MLFLOW_TRACKING_PASSWORD', '')\n\nprint(f\"MLflow Tracking URI: {MLFLOW_TRACKING_URI}\")\nprint(f\"MLflow Username: {os.environ['MLFLOW_TRACKING_USERNAME']}\")"
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Load and Prepare Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load Iris dataset\n",
    "iris = load_iris()\n",
    "X = iris.data\n",
    "y = iris.target\n",
    "\n",
    "# Split data\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "print(f\"Training samples: {len(X_train)}\")\n",
    "print(f\"Test samples: {len(X_test)}\")\n",
    "print(f\"Classes: {iris.target_names}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Train Model with MLflow Tracking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set experiment\n",
    "experiment_name = \"iris-classification\"\n",
    "mlflow.set_experiment(experiment_name)\n",
    "\n",
    "# Start MLflow run\n",
    "with mlflow.start_run(run_name=\"logistic-regression\") as run:\n",
    "    # Train model\n",
    "    model = LogisticRegression(max_iter=200, random_state=42)\n",
    "    model.fit(X_train, y_train)\n",
    "    \n",
    "    # Predictions\n",
    "    y_pred = model.predict(X_test)\n",
    "    accuracy = accuracy_score(y_test, y_pred)\n",
    "    \n",
    "    # Log parameters\n",
    "    mlflow.log_param(\"model_type\", \"LogisticRegression\")\n",
    "    mlflow.log_param(\"max_iter\", 200)\n",
    "    \n",
    "    # Log metrics\n",
    "    mlflow.log_metric(\"accuracy\", accuracy)\n",
    "    \n",
    "    # Log model\n",
    "    mlflow.sklearn.log_model(\n",
    "        model, \n",
    "        \"model\",\n",
    "        registered_model_name=\"iris-classifier\"\n",
    "    )\n",
    "    \n",
    "    print(f\"\\nRun ID: {run.info.run_id}\")\n",
    "    print(f\"Accuracy: {accuracy:.4f}\")\n",
    "    print(f\"\\nClassification Report:\")\n",
    "    print(classification_report(y_test, y_pred, target_names=iris.target_names))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Get Model Information for KServe Deployment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": "# Get the latest version of the registered model\nclient = mlflow.tracking.MlflowClient()\nmodel_name = \"iris-classifier\"\n\n# Use search_model_versions instead of deprecated get_latest_versions\nmodel_versions = client.search_model_versions(f\"name='{model_name}'\")\nlatest_version = max(model_versions, key=lambda x: int(x.version))\n\nprint(f\"\\n=== Model Information for KServe ===\")\nprint(f\"Model Name: {model_name}\")\nprint(f\"Version: {latest_version.version}\")\nprint(f\"Run ID: {latest_version.run_id}\")\nprint(f\"\\nArtifact URI: {latest_version.source}\")\nprint(f\"\\nUse this information to configure the KServe InferenceService.\")"
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Test Local Prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test with a sample\n",
    "sample_input = [[5.1, 3.5, 1.4, 0.2]]  # Should predict 'setosa'\n",
    "prediction = model.predict(sample_input)\n",
    "predicted_class = iris.target_names[prediction[0]]\n",
    "\n",
    "print(f\"\\nTest Input: {sample_input[0]}\")\n",
    "print(f\"Predicted Class: {predicted_class}\")\n",
    "print(f\"\\nThis sample will be used to test the KServe deployment.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Next Steps\n",
    "\n",
    "1. Note the Model Name and Version from above\n",
    "2. Deploy the model using KServe with the InferenceService YAML\n",
    "3. Test the deployed model endpoint"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}