Skip to content

Commit

Permalink
adding PymC experimental
Browse files Browse the repository at this point in the history
  • Loading branch information
James Bristow committed Mar 10, 2024
1 parent 775de09 commit f557854
Show file tree
Hide file tree
Showing 11 changed files with 219 additions and 8 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# pymc-mlflow
Testing model deployment of PyMC models using MLFlow
# PyMC MLFlow

Testing deployment of PyMC models using MLFlow.
8 changes: 4 additions & 4 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ services:
- 3001:3000
volumes:
- mlflow-prometheus-data:/prometheus
- ./pymc_mlflow:/workspace/mlflow_bentoml:rwx
- ./conf:/workspace/conf:rwx
- ./models:/workspace/models:rwx
- ./bentoml_configuration.yaml:${BENTOML_CONFIG}:rwx
- ./pymc_mlflow:/workspace/pymc_mlflow:rwx
- ./conf:/workspace/conf:rw
- ./models:/workspace/models:rw
- ./bentoml_configuration.yaml:${BENTOML_CONFIG}:rw
- ./bento:${BENTOML_HOME}
command: >
mlflow server --serve-artifacts --host 0.0.0.0 --port 5000
Expand Down
2 changes: 1 addition & 1 deletion models/pymc.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
service: "pymc_mlflow.pymc_mlflow_service:svc"
service: "pymc_mlflow.2_deploy_model:svc"
labels:
owner: bentoml-team
stage: dev
Expand Down
27 changes: 26 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

161 changes: 161 additions & 0 deletions pymc_mlflow/1_train_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import bentoml
import hydra
from omegaconf import DictConfig
from hydra.utils import instantiate
from sklearn import svm
from sklearn import datasets
import mlflow
from mlflow.models import infer_signature
from typing import Dict, List, Optional, Tuple, Union

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import xarray as xr
from pymc_experimental.model_builder import ModelBuilder

from numpy.random import RandomState

class LinearModel(ModelBuilder):
_model_type = "LinearModel"
version = "0.1"

def build_model(self, X: pd.DataFrame, y: pd.Series, **kwargs):
X_values = X["input"].values
y_values = y.values if isinstance(y, pd.Series) else y
self._generate_and_preprocess_model_data(X_values, y_values)

with pm.Model(coords=self.model_coords) as self.model:
x_data = pm.MutableData("x_data", X_values)
y_data = pm.MutableData("y_data", y_values)

a_mu_prior = self.model_config.get("a_mu_prior", 0.0)
a_sigma_prior = self.model_config.get("a_sigma_prior", 1.0)
b_mu_prior = self.model_config.get("b_mu_prior", 0.0)
b_sigma_prior = self.model_config.get("b_sigma_prior", 1.0)
eps_prior = self.model_config.get("eps_prior", 1.0)

a = pm.Normal("a", mu=a_mu_prior, sigma=a_sigma_prior)
b = pm.Normal("b", mu=b_mu_prior, sigma=b_sigma_prior)
eps = pm.HalfNormal("eps", eps_prior)

obs = pm.Normal("y", mu=a + b * x_data, sigma=eps, shape=x_data.shape, observed=y_data)

def _data_setter(
self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray] = None
):
if isinstance(X, pd.DataFrame):
x_values = X["input"].values
else:
x_values = X[:, 0]

with self.model:
pm.set_data({"x_data": x_values})
if y is not None:
pm.set_data({"y_data": y.values if isinstance(y, pd.Series) else y})

@staticmethod
def get_default_model_config() -> Dict:
model_config: Dict = {
"a_mu_prior": 0.0,
"a_sigma_prior": 1.0,
"b_mu_prior": 0.0,
"b_sigma_prior": 1.0,
"eps_prior": 1.0,
}
return model_config

@staticmethod
def get_default_sampler_config() -> Dict:
sampler_config: Dict = {
"draws": 100,
"tune": 100,
"chains": 4,
"target_accept": 0.95,
}
return sampler_config

@property
def output_var(self):
return "y"

@property
def _serializable_model_config(self) -> Dict[str, Union[int, float, Dict]]:
return self.model_config

def _save_input_params(self, idata) -> None:
pass

def _generate_and_preprocess_model_data(
self, X: Union[pd.DataFrame, pd.Series], y: Union[pd.Series, np.ndarray]
) -> None:
self.model_coords = None
self.X = X
self.y = y

@hydra.main(version_base=None, config_path="../conf", config_name="config")
def main(config: DictConfig):
RANDOM_SEED = 8927

rng = np.random.default_rng(RANDOM_SEED)
az.style.use("arviz-darkgrid")

EXPERIMENT_CONFIG = instantiate(config["experiment"])

mlflow.set_tracking_uri(EXPERIMENT_CONFIG.tracking_uri)
experiment_name = EXPERIMENT_CONFIG.name

existing_exp = mlflow.get_experiment_by_name(experiment_name)
if not existing_exp:
mlflow.create_experiment(experiment_name)
mlflow.set_experiment(experiment_name)

mlflow.set_tag("task", "pymc_mlflow_model")

x = np.linspace(start=0, stop=1, num=100)
X = pd.DataFrame(data=x, columns=["input"])
y = 0.3 * x + 0.5 + rng.normal(0, 1, len(x))

model = LinearModel()

idata = model.fit(X, y)

# # Load training data set
# iris = datasets.load_iris()
# X, y = iris.data, iris.target

# # Train the model
# clf = svm.SVC(gamma='scale')
# clf.fit(X, y)

# signature = infer_signature(
# X,
# y
# )

# model_name = "iris_clf"
# run_id = mlflow.active_run().info.run_id
# logged_model = mlflow.sklearn.log_model(
# clf, artifact_path = model_name, signature = signature
# )
# model_uri = f"runs:/{run_id}/{model_name}"

# print(model_uri)
# print(logged_model.model_uri)

# mlflow.register_model(model_uri, model_name)
# bento_model = bentoml.mlflow.import_model(
# 'iris_clf',
# logged_model.model_uri,
# labels=mlflow.active_run().data.tags,
# metadata={
# "metrics": mlflow.active_run().data.metrics,
# "params": mlflow.active_run().data.params,
# })

mlflow.end_run()

if __name__ == "__main__":
main()
Empty file added pymc_mlflow/2_deploy_model.py
Empty file.
12 changes: 12 additions & 0 deletions pymc_mlflow/DataModels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from pydantic import BaseModel

class ExperimentModel(BaseModel):
"""
The experiment data model.
Args:
BaseModel (_type_):
The Base model class.
"""
name: str
tracking_uri: str
Empty file added pymc_mlflow/__init__.py
Empty file.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ plotly = "^5.19.0"
prometheus-flask-exporter = "^0.23.0"
bentoml = "^1.2.6"
hydra-core = "^1.3.2"
pydantic = "^2.6.3"
pymc-experimental = {git = "https://github.com/pymc-devs/pymc-experimental.git"}


[build-system]
Expand Down
3 changes: 3 additions & 0 deletions scripts/pull.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env bash

docker compose pull mlflow
7 changes: 7 additions & 0 deletions scripts/pymc_mlflow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env bash

CMD="docker compose exec mlflow python -m pymc_mlflow"

${CMD}.1_train_model

# docker compose exec mlflow bentoml serve --host 0.0.0.0 -p 3000 pymc_mlflow.2_deploy_model:svc

0 comments on commit f557854

Please sign in to comment.