comet-ml · jverre · Aug 19, 2024 · Aug 19, 2024
diff --git a/guides/MPM/end_to_end_example/.gitignore b/guides/MPM/end_to_end_example/.gitignore
@@ -0,0 +1,2 @@
+training/preprocessed_data.csv
+__pycache__
diff --git a/guides/MPM/end_to_end_example/data_processing/credit_scoring_dataset.csv b/guides/MPM/end_to_end_example/data_processing/credit_scoring_dataset.csv
diff --git a/guides/MPM/end_to_end_example/data_processing/data_processing.py b/guides/MPM/end_to_end_example/data_processing/data_processing.py
@@ -0,0 +1,68 @@
+import comet_ml
+import pandas as pd
+from io import StringIO
+import os
+
+def get_raw_data(workspace_name: str, artifact_name: str):
+    """
+    In this function, we will check if the raw data exists in Comet Artifacts. If it does, we will download it from there,
+    if not we will upload it from the local directory.
+
+    Once the file is available locally, we will load it into a pandas dataframe and return it.
+    """
+    exp = comet_ml.get_running_experiment()
+
+    try:
+        artifact = exp.get_artifact(artifact_name=f"{artifact_name}_raw")
+
+        # Download the artifact
+        artifact.download(path="./")
+    except Exception as e:
+        print(f"Error downloading artifact: {e}")
+        artifact = comet_ml.Artifact(name=f"{artifact_name}_raw", artifact_type="dataset")
+        artifact.add("./credit_scoring_dataset.csv")
+        exp.log_artifact(artifact)
+
+    df = pd.read_csv("./credit_scoring_dataset.csv")
+    return df
+
+def preprocess_data(df: pd.DataFrame):
+    """
+    In this function, we will preprocess the data to make it ready for the model. We will store the preprocessed data in a 
+    new Comet Artifact.
+    """
+    # Select the relevant columns
+    df = df.loc[:, ['CustAge', 'CustIncome', 'EmpStatus', 'UtilRate', 'OtherCC', 'ResStatus', 'TmAtAddress', 'TmWBank',
+                    'probdefault']]
+
+    # Rename the target column
+    df.rename({'probdefault': 'probability_default'}, inplace=True, axis=1)
+
+    # Convert the categorical columns to category type
+    for c in ['EmpStatus', 'OtherCC', 'ResStatus']:
+        df[c] = df[c].astype('category')
+
+    # Save the preprocessed data to a new Comet Artifact
+    csv_buffer = StringIO()
+    df.to_csv(csv_buffer, index=False)
+    csv_buffer.seek(0)
+
+    artifact = comet_ml.Artifact(name=f"{artifact_name}_preprocessed", artifact_type="dataset")
+    artifact.add(local_path_or_data=csv_buffer, logical_path="preprocessed_data.csv")
+
+    exp = comet_ml.get_running_experiment()
+    exp.log_artifact(artifact)
+
+    return df
+
+if __name__ == "__main__":
+    workspace_name = os.environ["COMET_WORKSPACE"]
+    project_name = os.environ["COMET_PROJECT_NAME"]
+    artifact_name = os.environ["COMET_PROJECT_NAME"]
+
+    exp = comet_ml.Experiment(workspace=workspace_name, project_name=project_name)
+    df =  get_raw_data(workspace_name, artifact_name)
+
+    processed_df = preprocess_data(df)
+
+    print("Data preprocessing complete.")
diff --git a/guides/MPM/end_to_end_example/readme.md b/guides/MPM/end_to_end_example/readme.md
@@ -0,0 +1,62 @@
+# MPM example scripts
+
+The MPM examples are all based on the same Credit Scoring examples, the goal of the model is to identify users that are likely to default on their loan.
+
+This folder contains three different set of scripts that showcase MPM:
+* `data_processing`: Script that processes the raw data and creates a new CSV file with the model's features
+* `training`: Script that trains a machine learning model and uploads it to Comet's Model Registry
+* `serving`: FastAPI inference server that downloads a model from Comet's Model Registry who's predictions are logged to MPM
+
+## Setup
+In order to run these demo scripts you will need to set these environment variables:
+```bash
+export COMET_API_KEY="<Comet API Key>"
+export COMET_WORKSPACE="<Comet workspace to log data to>"
+export COMET_PROJECT_NAME="<Comet project name>"
+export COMET_MODEL_REGISTRY_NAME="<Comet model registry name>"
+
+export COMET_URL_OVERRIDE="<EM endpoint, similar format to https://www.comet.com/clientlib/>"
+export COMET_URL="<MPM ingestion endpoint, similar format to https://www.comet.com/>"
+```
+
+You will also need to install the Python libraries in `requirements.txt`
+
+## Data processing
+
+For this demo, we will be using a simple credit scoring dataset available in the `data_processing` folder.
+
+The proprocessing set is quite simple in this demo but showcases how you can use Comet's Artifacts features to track all your data processing steps.
+
+The code can be run using:
+```
+cd data_processing
+python data_processing.py
+```
+
+## Training
+For this demo we train a LightGBM model that we then upload to the model registry.
+
+The code can be run using:
+```
+cd training
+python model_training.py
+```
+
+## Serving
+**Dependency**: In order to use this inference server, you will need to first train a model and upload it to the model registry using the training scripts.
+
+The inference server is built using FastAPI and demonstrates how to use both the model registry to store models as well as MPM to log predictions.
+
+The code can be run using:
+```
+cd serving
+uvicorn main:app --reload
+```
+
+Once the code has been run, an inference server will be available under `http://localhost:8000` and has the following endpoints:
+* `http://localhost:8000/`: returns the string `FastAPI inference service` and indicates the inference server is running
+* `http://localhost:8000/health_check`: Simple health check to make sure the server is running and accepting requests
+* `http://localhost:8000/prediction`: Make a prediction and log it to MPM
+* `http://localhost:8000/create_demo_data`: Creates 10,000 predictions over a one week period to populate MPM dashboards
+
+**Note:** It can take a few minutes for the data to appear in the debugger tab in the MPM UI.
diff --git a/guides/MPM/end_to_end_example/requirements.txt b/guides/MPM/end_to_end_example/requirements.txt
@@ -0,0 +1,10 @@
+comet_ml
+pandas
+numpy
+lightgbm
+fastapi
+requests
+asyncio
+tqdm
+comet_mpm
+uvicorn