From cc2cbab9e8c9b2e8e5b894ac272fb1936af6becb Mon Sep 17 00:00:00 2001 From: Alexej Penner Date: Thu, 21 Dec 2023 03:12:29 +0100 Subject: [PATCH] Further fixes --- customer-satisfaction/README.md | 45 +++++-------------- customer-satisfaction/config.yaml | 13 +++--- customer-satisfaction/model/data_cleaning.py | 3 ++ .../pipelines/deployment_pipeline.py | 21 ++++++--- .../pipelines/training_pipeline.py | 2 +- customer-satisfaction/run_deployment.py | 44 ++++++------------ customer-satisfaction/steps/model_loader.py | 1 - customer-satisfaction/steps/model_promoter.py | 17 ++++--- 8 files changed, 62 insertions(+), 84 deletions(-) diff --git a/customer-satisfaction/README.md b/customer-satisfaction/README.md index e1cc423b..5e06a596 100644 --- a/customer-satisfaction/README.md +++ b/customer-satisfaction/README.md @@ -117,27 +117,25 @@ Our standard training pipeline consists of several steps: using [MLflow autologging](https://www.mlflow.org/docs/latest/tracking.html). - `evaluation`: This step will evaluate the model and save the metrics -- using MLflow autologging -- into the artifact store. +- `model_promoter`: This step compares the newly trained model against the previous + production model, in case it performed better, the new model is promoted ### Deployment Pipeline We have another pipeline, the `deployment_pipeline.py`, that extends the training pipeline, and implements a continuous deployment workflow. It ingests and processes input data, trains a model and then (re)deploys the prediction -server that serves the model if it meets our evaluation criteria. The criteria -that we have chosen is a configurable threshold on -the [MSE](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html) -of the training. The first four steps of the pipeline are the same as above, but +server that serves the model if it met the promotion criteria. +of the training. The first five steps of the pipeline are the same as above, but we have added the following additional ones: -- `deployment_trigger`: The step checks whether the newly trained model meets - the criteria set for deployment. +- `model_loader`: The step loads the `production` model from the zenml model registry - `model_deployer`: This step deploys the model as a service using MLflow (if deployment criteria is met). -In the deployment pipeline, ZenML's MLflow tracking integration is used for -logging the hyperparameter values and the trained model itself and the model -evaluation metrics -- as MLflow experiment tracking artifacts -- into the local -MLflow backend. This pipeline also launches a local MLflow deployment server to +In the deployment pipeline ZenML's Model Control Plane is used for +logging attaching the evaluation metrics as metadata to the trained model. +This pipeline also launches a local MLflow deployment server to serve the latest MLflow model if its accuracy is above a configured threshold. The MLflow deployment server runs locally as a daemon process that will continue @@ -151,9 +149,9 @@ model service asynchronously from the pipeline logic. This can be done easily with ZenML within the Streamlit code: ```python -service = load_last_service_from_step( +service = model_deployer.find_model_server( pipeline_name="continuous_deployment_pipeline", - step_name="model_deployer", + pipeline_step_name="mlflow_model_deployer_step", running=True, ) ... @@ -200,28 +198,7 @@ streamlit run streamlit_app.py ## :question: FAQ -1. When running the continuous deployment pipeline, I get an error - stating: `No Step found for the name mlflow_deployer`. - - Solution: It happens because your artifact store is overridden after running - the continuous deployment pipeline. So, you need to delete the artifact store - and rerun the pipeline. You can get the location of the artifact store by - running the following command: - - ```bash - zenml artifact-store describe - ``` - - and then you can delete the artifact store with the following command: - - **Note**: This is a dangerous / destructive command! Please enter your path - carefully, otherwise it may delete other folders from your computer. - - ```bash - rm -rf PATH - ``` - -2. When running the continuous deployment pipeline, I get the following +1. When running the continuous deployment pipeline, I get the following error: `No Environment component with name mlflow is currently registered.` Solution: You forgot to install the MLflow integration in your ZenML diff --git a/customer-satisfaction/config.yaml b/customer-satisfaction/config.yaml index aee04294..62ffe213 100644 --- a/customer-satisfaction/config.yaml +++ b/customer-satisfaction/config.yaml @@ -1,12 +1,15 @@ enable_cache: False + extra: tags: zenml-projects + settings: docker: required_integrations: - mlflow -steps: - model_train: - experiment_tracker: mlflow_tracker - evaluation: - experiment_tracker: mlflow_tracker +# +#steps: +# train_model: +# experiment_tracker: mlflow_tracker +# evaluation: +# experiment_tracker: mlflow_tracker \ No newline at end of file diff --git a/customer-satisfaction/model/data_cleaning.py b/customer-satisfaction/model/data_cleaning.py index 0b510270..714ddf97 100644 --- a/customer-satisfaction/model/data_cleaning.py +++ b/customer-satisfaction/model/data_cleaning.py @@ -52,6 +52,9 @@ def preprocess_data(self) -> pd.DataFrame: ] self.df = self.df.drop(cols_to_drop, axis=1) + # Catchall fillna in case any where missed + self.df.fillna(self.df.mean(), inplace=True) + return self.df except Exception as e: logging.error(e) diff --git a/customer-satisfaction/pipelines/deployment_pipeline.py b/customer-satisfaction/pipelines/deployment_pipeline.py index 04b94c73..1f0a4c24 100644 --- a/customer-satisfaction/pipelines/deployment_pipeline.py +++ b/customer-satisfaction/pipelines/deployment_pipeline.py @@ -18,14 +18,21 @@ enable_cache=False, model_version=model_version ) -def continuous_deployment_pipeline(): +def continuous_deployment_pipeline( + model_type: str = "lightgbm" +): """Run a training job and deploy an mlflow model deployment.""" - model, is_promoted = customer_satisfaction_training_pipeline() + # Run a training pipeline + model, is_promoted = customer_satisfaction_training_pipeline(model_type=model_type) + # Fetch the production model from the Model Registry - production_model = model_loader(model_version.name) + production_model = model_loader(model_name=model_version.name, after="model_promoter") + + # Only actually redeploy the model if the most recent training + # led to a model promotion mlflow_model_deployer_step( workers=3, - deploy_decision=True, + deploy_decision=is_promoted, model=production_model ) @@ -34,5 +41,9 @@ def continuous_deployment_pipeline(): def inference_pipeline(): """Run a batch inference job with data loaded from an API.""" batch_data = dynamic_importer() - model_deployment_service = prediction_service_loader() + model_deployment_service = prediction_service_loader( + pipeline_name="continuous_deployment_pipeline", + step_name="mlflow_model_deployer_step", + running=True + ) predictor(model_deployment_service, batch_data) diff --git a/customer-satisfaction/pipelines/training_pipeline.py b/customer-satisfaction/pipelines/training_pipeline.py index c71f1444..aa7023ed 100644 --- a/customer-satisfaction/pipelines/training_pipeline.py +++ b/customer-satisfaction/pipelines/training_pipeline.py @@ -25,5 +25,5 @@ def customer_satisfaction_training_pipeline( x_train, x_test, y_train, y_test = clean_data(df) model = train_model(x_train=x_train, x_test=x_test, y_train=y_train, y_test=y_test, model_type=model_type) mse, rmse = evaluation(model, x_test, y_test) - is_promoted = model_promoter(accuracy=mse) + is_promoted = model_promoter(mse=mse) return model, is_promoted \ No newline at end of file diff --git a/customer-satisfaction/run_deployment.py b/customer-satisfaction/run_deployment.py index 0b020c13..433c885a 100644 --- a/customer-satisfaction/run_deployment.py +++ b/customer-satisfaction/run_deployment.py @@ -1,5 +1,4 @@ import click -from zenml.client import Client from pipelines.deployment_pipeline import ( continuous_deployment_pipeline, @@ -7,35 +6,29 @@ predictor, ) from steps.dynamic_importer import dynamic_importer -from steps.deployment_trigger import deployment_trigger from steps.prediction_service_loader import prediction_service_loader from rich import print -from steps.clean_data import clean_data -from steps.evaluation import evaluation -from steps.ingest_data import ingest_data -from steps.train_model import train_model from zenml.integrations.mlflow.mlflow_utils import get_tracking_uri from zenml.integrations.mlflow.model_deployers.mlflow_model_deployer import ( MLFlowModelDeployer, ) -from zenml.integrations.mlflow.steps import ( - mlflow_model_deployer_step, -) @click.command() -@click.option( - "--min-accuracy", - default=1.8, - help="Minimum mse required to deploy the model", -) @click.option( "--stop-service", is_flag=True, default=False, help="Stop the prediction service when done", ) -def run_main(min_accuracy: float, stop_service: bool, model_name="Customer_Satisfaction_Predictor"): +@click.option( + "--model_type", + "-m", + type=click.Choice(["lightgbm", "randomforest", "xgboost"]), + default="xgboost", + help="Here you can choose what type of model should be trained." +) +def run_main(stop_service: bool, model_type: str, model_name="Customer_Satisfaction_Predictor"): """Run the mlflow example pipeline""" if stop_service: # get the MLflow model deployer stack component @@ -53,18 +46,11 @@ def run_main(min_accuracy: float, stop_service: bool, model_name="Customer_Satis existing_services[0].stop(timeout=10) return - deployment = continuous_deployment_pipeline(min_accuracy=min_accuracy) - deployment.run(config_path="config.yaml") + continuous_deployment_pipeline.with_options(config_path="config.yaml")(model_type=model_type) - inference = inference_pipeline( - dynamic_importer=dynamic_importer(), - prediction_service_loader=prediction_service_loader( - pipeline_name="continuous_deployment_pipeline", - step_name="model_deployer" - ), - predictor=predictor(), - ) - inference.run() + model_deployer = MLFlowModelDeployer.get_active_model_deployer() + + inference_pipeline() print( "Now run \n " @@ -73,13 +59,10 @@ def run_main(min_accuracy: float, stop_service: bool, model_name="Customer_Satis "You can find your runs tracked within the `mlflow_example_pipeline`" "experiment. Here you'll also be able to compare the two runs.)" ) - - model_deployer = MLFlowModelDeployer.get_active_model_deployer() - # fetch existing services with same pipeline name, step name and model name service = model_deployer.find_model_server( pipeline_name="continuous_deployment_pipeline", - pipeline_step_name="model_deployer", + pipeline_step_name="mlflow_model_deployer_step", running=True, ) @@ -92,6 +75,5 @@ def run_main(min_accuracy: float, stop_service: bool, model_name="Customer_Satis f"`--stop-service` argument." ) - if __name__ == "__main__": run_main() diff --git a/customer-satisfaction/steps/model_loader.py b/customer-satisfaction/steps/model_loader.py index a0934d73..63e19787 100644 --- a/customer-satisfaction/steps/model_loader.py +++ b/customer-satisfaction/steps/model_loader.py @@ -12,7 +12,6 @@ def model_loader( Args: model_name: Name of the Model to load """ - model_version = ModelVersion( name=model_name, version="production" diff --git a/customer-satisfaction/steps/model_promoter.py b/customer-satisfaction/steps/model_promoter.py index 838addc3..59949615 100644 --- a/customer-satisfaction/steps/model_promoter.py +++ b/customer-satisfaction/steps/model_promoter.py @@ -6,7 +6,7 @@ @step def model_promoter( - accuracy: float, + mse: float, stage: str = "production" ) -> bool: """Model promotion step @@ -15,7 +15,7 @@ def model_promoter( the previous production model Args: - accuracy: Accuracy of the model. + mse: Mean-squared error of the model. stage: Which stage to promote the model to. Returns: @@ -31,17 +31,20 @@ def model_promoter( ) try: + # In case there already is a model version at the correct stage previous_production_model_version_mse = float( previous_production_model.get_artifact("model").run_metadata["mse"].value ) except RuntimeError: - previous_production_model_version_mse = 0.0 + # In case no model version has been promoted before, + # default to a threshold value well above the new mse + previous_production_model_version_mse = mse + 1000 - if accuracy < previous_production_model_version_mse: + if mse > previous_production_model_version_mse: logger.info( - f"Model accuracy {accuracy*100:.2f}% is below the accuracy of " - f"the previous production model " - f"{previous_production_model_version_mse*100:.2f}% ! " + f"Model mean-squared error {mse:.2f} is higher than" + f" the mse of the previous production model " + f"{previous_production_model_version_mse:.2f} ! " f"Not promoting model." ) is_promoted = False