Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: accept cron schedule for analyzers #56

Merged
merged 4 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 73 additions & 1 deletion tests/monitor/manager/test_monitor_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,4 +273,76 @@ def test_set_non_iso_data_readiness_raises(monitor_setup) -> None:

with pytest.raises(ValueError):
monitor_setup.data_readiness_duration = "Some non-conformant string"



def test_cron_schedule_for_analyzer(monitor_setup) -> None:
monitor_setup.config = FixedThresholdsConfig(
metric=DatasetMetric.classification_accuracy,
upper=0.75
)
monitor_setup.schedule = CronSchedule(cron="0 0 * * *")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 0 * * *"
)

monitor_setup.schedule = CronSchedule(cron="0 0 * * 1-5")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 0 * * 1-5"
)

monitor_setup.schedule = CronSchedule(cron="0 0 * * 6,0")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 0 * * 6,0"
)

monitor_setup.schedule = CronSchedule(cron="0 9-17 * * *")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 9-17 * * *"
)

monitor_setup.schedule = CronSchedule(cron="0 9,10,17 * * *")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 9,10,17 * * *"
)

monitor_setup.schedule = CronSchedule(cron="*/90 9,10,17 * * *")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="*/90 9,10,17 * * *"
)

monitor_setup.schedule = CronSchedule(cron="0 9,10,17 1,2,3 2,4,5 2,4")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 9,10,17 1,2,3 2,4,5 2,4"
)

# All below Must fail

monitor_setup.schedule = CronSchedule(cron="* * * * *") # Every minute
with pytest.raises(ValueError):
monitor_setup.apply()

monitor_setup.schedule = CronSchedule(cron="0 0 * * * *") # Too many fields
with pytest.raises(ValueError):
monitor_setup.apply()

monitor_setup.schedule = CronSchedule(cron="1,2 0 * * *") # Less granular than 1h
with pytest.raises(ValueError):
monitor_setup.apply()

monitor_setup.schedule = CronSchedule(cron="*/15 0 * * *") # every 15min
with pytest.raises(ValueError):
monitor_setup.apply()
49 changes: 49 additions & 0 deletions whylabs_toolkit/helpers/cron_validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from dataclasses import dataclass


@dataclass
class SplitCron:
day_of_week: str
month: str
day_of_month: str
hour: str
minute: str


def split_cron_expression(cron: str) -> SplitCron:
"""Split the cron expression into its components."""
cron_slots = cron.split(" ")
if len(cron_slots) != 5:
raise ValueError("CronSchedule must have 5 fields.")
return SplitCron(
minute=cron_slots[0],
hour=cron_slots[1],
day_of_month=cron_slots[2],
month=cron_slots[3],
day_of_week=cron_slots[4],
)


def _is_not_less_granular_than_1_hour(split_cron: SplitCron) -> bool:
"""Check if the cron expression is less granular than 1 hour."""
if split_cron.minute == "*":
return False

for item in ["-", ","]:
if item in split_cron.minute:
return False

if split_cron.minute.startswith("*/"):
try:
divisor = int(split_cron.minute.split("/")[1])
if divisor < 60:
return False
except ValueError:
pass

return True


def validate_cron_expression(cron: str) -> bool:
split_cron = split_cron_expression(cron)
return _is_not_less_granular_than_1_hour(split_cron=split_cron)
2 changes: 1 addition & 1 deletion whylabs_toolkit/monitor/manager/monitor_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(self, monitor_id: str, dataset_id: Optional[str] = None, config: Co

self._monitor_mode: Optional[Union[EveryAnomalyMode, DigestMode]] = None
self._monitor_actions: Optional[List[Union[GlobalAction, EmailRecipient, SlackWebhook, PagerDuty]]] = None
self._analyzer_schedule: Optional[FixedCadenceSchedule] = None
self._analyzer_schedule: Optional[Union[FixedCadenceSchedule, CronSchedule]] = None
self._target_matrix: Optional[Union[ColumnMatrix, DatasetMatrix]] = None
self._analyzer_config: Optional[
Union[
Expand Down
16 changes: 13 additions & 3 deletions whylabs_toolkit/monitor/models/analyzer/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Schema for analyses."""
from typing import Any, Dict, List, Optional, Union

from pydantic import BaseModel, Field, constr
from pydantic import BaseModel, Field, constr, validator

from whylabs_toolkit.monitor.models.commons import NoExtrasBaseModel

Expand All @@ -22,6 +22,7 @@
DisjunctionConfig,
)
from .targets import ColumnMatrix, DatasetMatrix
from whylabs_toolkit.helpers.cron_validators import validate_cron_expression

murilommen marked this conversation as resolved.
Show resolved Hide resolved

class Analyzer(NoExtrasBaseModel):
Expand Down Expand Up @@ -57,8 +58,8 @@ class Analyzer(NoExtrasBaseModel):
] = Field( # noqa F722
None, description="A list of tags that are associated with the analyzer."
)
# disabling CronSchedule as it can be tricky on the BE
schedule: Optional[FixedCadenceSchedule] = Field( # Optional[Union[CronSchedule, FixedCadenceSchedule]] = Field(

schedule: Optional[Union[FixedCadenceSchedule, CronSchedule]] = Field(
christinedraper marked this conversation as resolved.
Show resolved Hide resolved
None,
description="A schedule for running the analyzer. If not set, the analyzer's considered disabled",
)
Expand Down Expand Up @@ -100,6 +101,15 @@ class Analyzer(NoExtrasBaseModel):
"monthly data.",
)

@validator("schedule", pre=True, always=True)
def validate_schedule(
cls, v: Optional[Union[FixedCadenceSchedule, CronSchedule]]
) -> Optional[Union[FixedCadenceSchedule, CronSchedule]]:
"""Validate the schedule."""
if isinstance(v, CronSchedule) and not validate_cron_expression(v.cron):
raise ValueError("CronSchedule must be no less granular than 1 hour and must have 5 fields.")
return v

# NOT YET IMPLEMENTED:
# ExperimentalConfig,
# ColumnListChangeConfig,
Expand Down
5 changes: 3 additions & 2 deletions whylabs_toolkit/monitor/models/commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
from pydantic import BaseModel, Extra
from pydantic.fields import Field

CRON_REGEX = "(@(annually|yearly|monthly|weekly|daily|hourly))|" "((((\\d+,)+\\d+|(\\d+(\\/|-)\\d+)|\\d+|\\*) ?){5,7})"
CRON_REGEX = (
"(@(annually|yearly|monthly|weekly|daily|hourly))|" "((((\\d+,)+\\d+|(\\d+(\\/|-)\\d+)|\\d+|\\*|\\*/\\d+) ?){5,7})"
)
DATASET_ID_REGEX = "[a-zA-Z0-9\\-_\\.]+"

DATASET_ID_DEF = Field(
Expand Down Expand Up @@ -50,7 +52,6 @@ class CronSchedule(NoExtrasBaseModel):
exclusionRanges: Optional[List[TimeRange]] = Field(
title="ExclusionRanges", description="The ranges of dates during which this Analyzer is NOT run."
)
# TODO: support other mode of configuring scheduling


class Cadence(str, Enum):
Expand Down
Loading