Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: accept cron schedule for analyzers #56

Merged
merged 4 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 58 additions & 1 deletion tests/monitor/manager/test_monitor_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,4 +273,61 @@ def test_set_non_iso_data_readiness_raises(monitor_setup) -> None:

with pytest.raises(ValueError):
monitor_setup.data_readiness_duration = "Some non-conformant string"



def test_cron_schedule_for_analyzer(monitor_setup) -> None:
monitor_setup.config = FixedThresholdsConfig(
metric=DatasetMetric.classification_accuracy,
upper=0.75
)
monitor_setup.schedule = CronSchedule(cron="0 0 * * *")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 0 * * *"
)

monitor_setup.schedule = CronSchedule(cron="0 0 * * 1-5")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 0 * * 1-5"
)

monitor_setup.schedule = CronSchedule(cron="0 0 * * 6,0")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 0 * * 6,0"
)

monitor_setup.schedule = CronSchedule(cron="0 9-17 * * *")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 9-17 * * *"
)

monitor_setup.schedule = CronSchedule(cron="0 9,10,17 * * *")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 9,10,17 * * *"
)

monitor_setup.schedule = CronSchedule(cron="0,1 9,10,17 1,2,3 2,4,5 2,4")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0,1 9,10,17 1,2,3 2,4,5 2,4"
)

# All below Must fail

monitor_setup.schedule = CronSchedule(cron="* * * * *") # Every minute
with pytest.raises(ValueError):
monitor_setup.apply()

monitor_setup.schedule = CronSchedule(cron="0 0 * * * *") # Too many fields
with pytest.raises(ValueError):
monitor_setup.apply()
53 changes: 53 additions & 0 deletions whylabs_toolkit/helpers/cron_validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from dataclasses import dataclass


@dataclass
class SplitCron:
day_of_week: str
month: str
day_of_month: str
hour: str
minute: str


def split_cron_expression(cron: str) -> SplitCron:
"""Split the cron expression into its components."""
cron_slots = cron.split(" ")
if len(cron_slots) != 5:
raise ValueError("CronSchedule must have 5 fields.")
return SplitCron(
day_of_week=cron_slots[0],
month=cron_slots[1],
day_of_month=cron_slots[2],
hour=cron_slots[3],
minute=cron_slots[4],
)


def _is_not_less_granular_than_1_hour(split_cron: SplitCron) -> bool:
"""Check if the cron expression is less granular than 1 hour."""
# Specific days checks
if split_cron.minute != "*" and split_cron.minute != "0":
Copy link
Collaborator

@christinedraper christinedraper Apr 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dont get why it needs to check that its not 0 (and ditto for the other fields). Seems like this check should fail if the minute is '*' or anything with a - or , in it. Anything else would be ok.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

true. it really doesn't make sense to check for the "top of the {time}"

return True
if split_cron.hour != "*" and split_cron.hour != "0":
return True
if split_cron.day_of_month != "*" and split_cron.day_of_month != "1":
return True
if split_cron.month != "*" and split_cron.month != "1":
return True
if split_cron.day_of_week != "*" and split_cron.day_of_week != "1":
return True

# Check range
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all that happens from here on is that it iterates and then returns False... logic seems wrong. Also seems disconnected from "is granularity less than hour", seems like it is meant to be checking any range fields

for field in (split_cron.day_of_week, split_cron.month, split_cron.day_of_month, split_cron.hour):
for item in field.split(","):
if "-" in item:
start, end = map(int, item.split("-"))
if end - start > 0:
return False
return False


def validate_cron_expression(cron: str) -> bool:
split_cron = split_cron_expression(cron)
return _is_not_less_granular_than_1_hour(split_cron=split_cron)
2 changes: 1 addition & 1 deletion whylabs_toolkit/monitor/manager/monitor_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(self, monitor_id: str, dataset_id: Optional[str] = None, config: Co

self._monitor_mode: Optional[Union[EveryAnomalyMode, DigestMode]] = None
self._monitor_actions: Optional[List[Union[GlobalAction, EmailRecipient, SlackWebhook, PagerDuty]]] = None
self._analyzer_schedule: Optional[FixedCadenceSchedule] = None
self._analyzer_schedule: Optional[Union[FixedCadenceSchedule, CronSchedule]] = None
self._target_matrix: Optional[Union[ColumnMatrix, DatasetMatrix]] = None
self._analyzer_config: Optional[
Union[
Expand Down
16 changes: 13 additions & 3 deletions whylabs_toolkit/monitor/models/analyzer/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Schema for analyses."""
from typing import Any, Dict, List, Optional, Union

from pydantic import BaseModel, Field, constr
from pydantic import BaseModel, Field, constr, validator

from whylabs_toolkit.monitor.models.commons import NoExtrasBaseModel

Expand All @@ -22,6 +22,7 @@
DisjunctionConfig,
)
from .targets import ColumnMatrix, DatasetMatrix
from whylabs_toolkit.helpers.cron_validators import validate_cron_expression

murilommen marked this conversation as resolved.
Show resolved Hide resolved

class Analyzer(NoExtrasBaseModel):
Expand Down Expand Up @@ -57,8 +58,8 @@ class Analyzer(NoExtrasBaseModel):
] = Field( # noqa F722
None, description="A list of tags that are associated with the analyzer."
)
# disabling CronSchedule as it can be tricky on the BE
schedule: Optional[FixedCadenceSchedule] = Field( # Optional[Union[CronSchedule, FixedCadenceSchedule]] = Field(

schedule: Optional[Union[FixedCadenceSchedule, CronSchedule]] = Field(
christinedraper marked this conversation as resolved.
Show resolved Hide resolved
None,
description="A schedule for running the analyzer. If not set, the analyzer's considered disabled",
)
Expand Down Expand Up @@ -100,6 +101,15 @@ class Analyzer(NoExtrasBaseModel):
"monthly data.",
)

@validator("schedule", pre=True, always=True)
def validate_schedule(
cls, v: Optional[Union[FixedCadenceSchedule, CronSchedule]]
) -> Optional[Union[FixedCadenceSchedule, CronSchedule]]:
"""Validate the schedule."""
if isinstance(v, CronSchedule) and not validate_cron_expression(v.cron):
raise ValueError("CronSchedule must be no less granular than 1 hour and must have 5 fields.")
return v

# NOT YET IMPLEMENTED:
# ExperimentalConfig,
# ColumnListChangeConfig,
Expand Down
1 change: 0 additions & 1 deletion whylabs_toolkit/monitor/models/commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ class CronSchedule(NoExtrasBaseModel):
exclusionRanges: Optional[List[TimeRange]] = Field(
title="ExclusionRanges", description="The ranges of dates during which this Analyzer is NOT run."
)
# TODO: support other mode of configuring scheduling


class Cadence(str, Enum):
Expand Down
Loading