Skip to content

Commit

Permalink
feat: metrics added
Browse files Browse the repository at this point in the history
  • Loading branch information
vijayvammi committed Apr 6, 2024
1 parent 861a717 commit 1a3441b
Show file tree
Hide file tree
Showing 18 changed files with 374 additions and 89 deletions.
19 changes: 17 additions & 2 deletions examples/tutorials/mnist/baseline_comparison.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from typing import List

import keras
import numpy as np
from keras import layers
from pydantic import BaseModel


Expand Down Expand Up @@ -40,6 +38,8 @@ class BaseLineParams(BaseModel):


def load_data():
import keras

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

return x_train, y_train, x_test, y_test
Expand All @@ -56,13 +56,18 @@ def scale_data(x_train: np.ndarray, x_test: np.ndarray):


def convert_to_categorically(y_train: np.ndarray, y_test: np.ndarray, num_classes: int):
import keras

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

return y_train, y_test


def build_model(train_params: TrainParams, num_classes: int):
import keras
from keras import layers

model = keras.Sequential(
[
keras.Input(shape=train_params.input_shape),
Expand All @@ -82,6 +87,8 @@ def build_model(train_params: TrainParams, num_classes: int):


def build_baseline_model(baseline_params: BaseLineParams, num_classes: int):
import keras

model = keras.Sequential()
model.add(
keras.layers.Dense(
Expand All @@ -106,6 +113,8 @@ def build_baseline_model(baseline_params: BaseLineParams, num_classes: int):


def train_model(x_train: np.ndarray, y_train: np.ndarray, train_params: TrainParams):
import keras

model = keras.models.load_model("model.keras")
model.compile(loss=train_params.loss, optimizer=train_params.optimizer, metrics=train_params.metrics)

Expand All @@ -121,6 +130,8 @@ def train_model(x_train: np.ndarray, y_train: np.ndarray, train_params: TrainPar


def train_baseline_model(x_train: np.ndarray, y_train: np.ndarray, train_params: BaseLineParams):
import keras

model = keras.models.load_model("baseline_model.keras")
model.compile(loss=train_params.loss, optimizer=train_params.optimizer, metrics=train_params.metrics)

Expand All @@ -139,6 +150,8 @@ def train_baseline_model(x_train: np.ndarray, y_train: np.ndarray, train_params:


def evaluate_model(x_test: np.ndarray, y_test: np.ndarray):
import keras

trained_model = keras.models.load_model("trained_model.keras")

score = trained_model.evaluate(x_test, y_test, verbose=0)
Expand All @@ -149,6 +162,8 @@ def evaluate_model(x_test: np.ndarray, y_test: np.ndarray):


def evaluate_baseline_model(x_test: np.ndarray, y_test: np.ndarray, train_params: BaseLineParams):
import keras

trained_model = keras.models.load_model("trained_baseline_model.keras")

_x_test = x_test.reshape(x_test.shape[0], train_params.num_pixels).astype("float32")
Expand Down
17 changes: 13 additions & 4 deletions examples/tutorials/mnist/modular_source.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from typing import List

import keras
import numpy as np
from keras import layers
from pydantic import BaseModel

num_classes: int = 10
input_shape: tuple = (28, 28, 1)
Expand All @@ -24,6 +21,8 @@


def load_data():
import keras

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

return x_train, y_train, x_test, y_test
Expand All @@ -40,13 +39,18 @@ def scale_data(x_train: np.ndarray, x_test: np.ndarray):


def convert_to_categorically(y_train: np.ndarray, y_test: np.ndarray):
import keras

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

return y_train, y_test


def build_model():
import keras
from keras import layers

model = keras.Sequential(
[
keras.Input(shape=input_shape),
Expand All @@ -66,6 +70,8 @@ def build_model():


def train_model(x_train: np.ndarray, y_train: np.ndarray):
import keras

model = keras.models.load_model("model.keras")
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

Expand All @@ -81,6 +87,8 @@ def train_model(x_train: np.ndarray, y_train: np.ndarray):


def evaluate_model(x_test: np.ndarray, y_test: np.ndarray):
import keras

trained_model = keras.models.load_model("trained_model.keras")

score = trained_model.evaluate(x_test, y_test, verbose=0)
Expand All @@ -91,7 +99,7 @@ def evaluate_model(x_test: np.ndarray, y_test: np.ndarray):


def main():
from runnable import Catalog, Pipeline, PythonTask, pickled
from runnable import Catalog, Pipeline, PythonTask, metric, pickled

# x_train, y_train, x_test, y_test
load_data_task = PythonTask(
Expand Down Expand Up @@ -134,6 +142,7 @@ def main():
catalog=Catalog(
get=["trained_model.keras"],
),
returns=[metric("score")],
terminate_with_success=True,
)

Expand Down
12 changes: 6 additions & 6 deletions examples/tutorials/reddit_text_classification/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
tfidf,
tokenize,
)
from runnable import Pipeline, PythonTask, pickled
from runnable import Pipeline, PythonTask, Stub, pickled


def driver():
Expand All @@ -25,12 +25,12 @@ def driver():


def runnable_pipeline():
extract_task = PythonTask(name="extract", function=extract_text, returns=[pickled("x"), pickled("labels")])
clean_task = PythonTask(name="clean", function=clean, returns=[pickled("cleaned_x")])
tokenize_task = PythonTask(name="tokenize", function=tokenize, returns=[pickled("tokenised_x")])
vectorise_task = PythonTask(name="tfidf", function=tfidf, returns=[pickled("vectorised_x")])
extract_task = Stub(name="extract", function=extract_text, returns=[pickled("x"), pickled("labels")])
clean_task = Stub(name="clean", function=clean, returns=[pickled("cleaned_x")])
tokenize_task = Stub(name="tokenize", function=tokenize, returns=[pickled("tokenised_x")])
vectorise_task = Stub(name="tfidf", function=tfidf, returns=[pickled("vectorised_x")])

model_fit_task = PythonTask(
model_fit_task = Stub(
name="model_fit",
function=model_fit,
returns=[pickled("y_probabilities"), pickled("lr_model")],
Expand Down
11 changes: 7 additions & 4 deletions examples/tutorials/reddit_text_classification/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,19 @@

nlp = spacy.load("en_core_web_sm")

logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)


class CleanTransformer:
__uplus_pattern = re.compile("\<[uU]\+(?P<digit>[a-zA-Z0-9]+)\>")
__markup_link_pattern = re.compile("\[(.*)\]\((.*)\)")

def predict(self, X, feature_names=[]):
logging.warning(X)
logger.warning(X)
f = np.vectorize(CleanTransformer.transform_clean_text)
X_clean = f(X)
logging.warning(X_clean)
logger.warning(X_clean)
return X_clean

def fit(self, X, y=None, **fit_params):
Expand Down Expand Up @@ -48,10 +51,10 @@ class TokenizeTransformer:
__symbols = set("!$%^&*()_+|~-=`{}[]:\";'<>?,./-")

def predict(self, X, feature_names=[]):
logging.warning(X)
logger.warning(X)
f = np.vectorize(TokenizeTransformer.transform_to_token, otypes=[object])
X_tokenized = f(X)
logging.warning(X_tokenized)
logger.warning(X_tokenized)
return X_tokenized

def fit(self, X, y=None, **fit_params):
Expand Down
6 changes: 6 additions & 0 deletions runnable/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@
import logging
from logging.config import dictConfig

from rich.console import Console

from runnable import defaults

dictConfig(defaults.LOGGING_CONFIG)
logger = logging.getLogger(defaults.LOGGER_NAME)

console = Console()
console.print(":runner: Lets go!!")

from runnable.sdk import ( # noqa
Catalog,
Fail,
Expand All @@ -20,6 +25,7 @@
ShellTask,
Stub,
Success,
metric,
pickled,
)

Expand Down
1 change: 1 addition & 0 deletions runnable/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id): # prag
provided
"""
logger.setLevel(log_level)

entrypoints.execute(
configuration_file=config_file,
pipeline_file=file,
Expand Down
6 changes: 5 additions & 1 deletion runnable/context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Dict, Optional

from pydantic import BaseModel, SerializeAsAny
from pydantic import BaseModel, ConfigDict, Field, SerializeAsAny
from rich.progress import Progress

from runnable.catalog import BaseCatalog
from runnable.datastore import BaseRunLogStore
Expand All @@ -16,6 +17,9 @@ class Context(BaseModel):
secrets_handler: SerializeAsAny[BaseSecrets]
catalog_handler: SerializeAsAny[BaseCatalog]
pickler: SerializeAsAny[BasePickler]
progress: SerializeAsAny[Optional[Progress]] = Field(default=None, exclude=True)

model_config = ConfigDict(arbitrary_types_allowed=True)

pipeline_file: Optional[str] = ""
parameters_file: Optional[str] = ""
Expand Down
59 changes: 56 additions & 3 deletions runnable/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,17 @@
import os
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Annotated, Any, Dict, List, Literal, Optional, OrderedDict, Tuple, Union
from typing import (
Annotated,
Any,
Dict,
List,
Literal,
Optional,
OrderedDict,
Tuple,
Union,
)

from pydantic import BaseModel, Field, computed_field
from typing_extensions import TypeAliasType
Expand Down Expand Up @@ -65,10 +75,19 @@ class JsonParameter(BaseModel):
value: JSONType # type: ignore
reduced: bool = True

@computed_field # type: ignore
@property
def description(self) -> JSONType:
return self.value

def get_value(self) -> JSONType: # type: ignore
return self.value


class MetricParameter(JsonParameter):
kind: Literal["metric"]


class ObjectParameter(BaseModel):
kind: Literal["object"]
value: str # The name of the pickled object
Expand Down Expand Up @@ -100,7 +119,7 @@ def put_object(self, data: Any) -> None:
os.remove(self.file_name) # Remove after loading


Parameter = Annotated[Union[JsonParameter, ObjectParameter], Field(discriminator="kind")]
Parameter = Annotated[Union[JsonParameter, ObjectParameter, MetricParameter], Field(discriminator="kind")]


class StepAttempt(BaseModel):
Expand All @@ -115,6 +134,7 @@ class StepAttempt(BaseModel):
message: str = ""
input_parameters: Dict[str, Parameter] = Field(default_factory=dict)
output_parameters: Dict[str, Parameter] = Field(default_factory=dict)
user_defined_metrics: Dict[str, Any] = Field(default_factory=dict)

@property
def duration(self):
Expand Down Expand Up @@ -149,10 +169,43 @@ class StepLog(BaseModel):
mock: bool = False
code_identities: List[CodeIdentity] = Field(default_factory=list)
attempts: List[StepAttempt] = Field(default_factory=list)
user_defined_metrics: Dict[str, Any] = Field(default_factory=dict)
branches: Dict[str, BranchLog] = Field(default_factory=dict)
data_catalog: List[DataCatalog] = Field(default_factory=list)

def get_summary(self) -> Dict[str, Any]:
"""
Summarize the step log to log
"""
summary = {}

summary["Name"] = self.internal_name
summary["Input catalog content"] = [dc.name for dc in self.data_catalog if dc.stage == "get"]
summary["Available parameters"] = [
(p, v.description) for attempt in self.attempts for p, v in attempt.input_parameters.items()
]

summary["Output catalog content"] = [dc.name for dc in self.data_catalog if dc.stage == "put"]
summary["Output parameters"] = [
(p, v.description) for attempt in self.attempts for p, v in attempt.output_parameters.items()
]

summary["Metrics"] = [
(p, v.description) for attempt in self.attempts for p, v in attempt.user_defined_metrics.items()
]

cis = []
for ci in self.code_identities:
message = f"{ci.code_identifier_type}:{ci.code_identifier}"
if not ci.code_identifier_dependable:
message += " but is not dependable"
cis.append(message)

summary["Code identities"] = cis

summary["status"] = self.status

return summary

def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]:
"""
Given a stage, return the data catalogs according to the stage
Expand Down
Loading

0 comments on commit 1a3441b

Please sign in to comment.