Skip to content

Commit

Permalink
Feat: accept cron schedule for analyzers (#56)
Browse files Browse the repository at this point in the history
details:
- include every N {field} on base regex on CronSchedule
- fix order of SplitCron implementation
- checks for "," and "-" cases only on the minute field
- added tests
  • Loading branch information
murilommen authored Apr 2, 2024
1 parent 8013b50 commit 16a79c8
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 7 deletions.
74 changes: 73 additions & 1 deletion tests/monitor/manager/test_monitor_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,4 +273,76 @@ def test_set_non_iso_data_readiness_raises(monitor_setup) -> None:

with pytest.raises(ValueError):
monitor_setup.data_readiness_duration = "Some non-conformant string"



def test_cron_schedule_for_analyzer(monitor_setup) -> None:
monitor_setup.config = FixedThresholdsConfig(
metric=DatasetMetric.classification_accuracy,
upper=0.75
)
monitor_setup.schedule = CronSchedule(cron="0 0 * * *")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 0 * * *"
)

monitor_setup.schedule = CronSchedule(cron="0 0 * * 1-5")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 0 * * 1-5"
)

monitor_setup.schedule = CronSchedule(cron="0 0 * * 6,0")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 0 * * 6,0"
)

monitor_setup.schedule = CronSchedule(cron="0 9-17 * * *")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 9-17 * * *"
)

monitor_setup.schedule = CronSchedule(cron="0 9,10,17 * * *")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 9,10,17 * * *"
)

monitor_setup.schedule = CronSchedule(cron="*/90 9,10,17 * * *")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="*/90 9,10,17 * * *"
)

monitor_setup.schedule = CronSchedule(cron="0 9,10,17 1,2,3 2,4,5 2,4")
monitor_setup.apply()

assert monitor_setup.analyzer.schedule == CronSchedule(
cron="0 9,10,17 1,2,3 2,4,5 2,4"
)

# All below Must fail

monitor_setup.schedule = CronSchedule(cron="* * * * *") # Every minute
with pytest.raises(ValueError):
monitor_setup.apply()

monitor_setup.schedule = CronSchedule(cron="0 0 * * * *") # Too many fields
with pytest.raises(ValueError):
monitor_setup.apply()

monitor_setup.schedule = CronSchedule(cron="1,2 0 * * *") # Less granular than 1h
with pytest.raises(ValueError):
monitor_setup.apply()

monitor_setup.schedule = CronSchedule(cron="*/15 0 * * *") # every 15min
with pytest.raises(ValueError):
monitor_setup.apply()
49 changes: 49 additions & 0 deletions whylabs_toolkit/helpers/cron_validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from dataclasses import dataclass


@dataclass
class SplitCron:
day_of_week: str
month: str
day_of_month: str
hour: str
minute: str


def split_cron_expression(cron: str) -> SplitCron:
"""Split the cron expression into its components."""
cron_slots = cron.split(" ")
if len(cron_slots) != 5:
raise ValueError("CronSchedule must have 5 fields.")
return SplitCron(
minute=cron_slots[0],
hour=cron_slots[1],
day_of_month=cron_slots[2],
month=cron_slots[3],
day_of_week=cron_slots[4],
)


def _is_not_less_granular_than_1_hour(split_cron: SplitCron) -> bool:
"""Check if the cron expression is less granular than 1 hour."""
if split_cron.minute == "*":
return False

for item in ["-", ","]:
if item in split_cron.minute:
return False

if split_cron.minute.startswith("*/"):
try:
divisor = int(split_cron.minute.split("/")[1])
if divisor < 60:
return False
except ValueError:
pass

return True


def validate_cron_expression(cron: str) -> bool:
split_cron = split_cron_expression(cron)
return _is_not_less_granular_than_1_hour(split_cron=split_cron)
2 changes: 1 addition & 1 deletion whylabs_toolkit/monitor/manager/monitor_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(self, monitor_id: str, dataset_id: Optional[str] = None, config: Co

self._monitor_mode: Optional[Union[EveryAnomalyMode, DigestMode]] = None
self._monitor_actions: Optional[List[Union[GlobalAction, EmailRecipient, SlackWebhook, PagerDuty]]] = None
self._analyzer_schedule: Optional[FixedCadenceSchedule] = None
self._analyzer_schedule: Optional[Union[FixedCadenceSchedule, CronSchedule]] = None
self._target_matrix: Optional[Union[ColumnMatrix, DatasetMatrix]] = None
self._analyzer_config: Optional[
Union[
Expand Down
16 changes: 13 additions & 3 deletions whylabs_toolkit/monitor/models/analyzer/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Schema for analyses."""
from typing import Any, Dict, List, Optional, Union

from pydantic import BaseModel, Field, constr
from pydantic import BaseModel, Field, constr, validator

from whylabs_toolkit.monitor.models.commons import NoExtrasBaseModel

Expand All @@ -22,6 +22,7 @@
DisjunctionConfig,
)
from .targets import ColumnMatrix, DatasetMatrix
from whylabs_toolkit.helpers.cron_validators import validate_cron_expression


class Analyzer(NoExtrasBaseModel):
Expand Down Expand Up @@ -57,8 +58,8 @@ class Analyzer(NoExtrasBaseModel):
] = Field( # noqa F722
None, description="A list of tags that are associated with the analyzer."
)
# disabling CronSchedule as it can be tricky on the BE
schedule: Optional[FixedCadenceSchedule] = Field( # Optional[Union[CronSchedule, FixedCadenceSchedule]] = Field(

schedule: Optional[Union[FixedCadenceSchedule, CronSchedule]] = Field(
None,
description="A schedule for running the analyzer. If not set, the analyzer's considered disabled",
)
Expand Down Expand Up @@ -100,6 +101,15 @@ class Analyzer(NoExtrasBaseModel):
"monthly data.",
)

@validator("schedule", pre=True, always=True)
def validate_schedule(
cls, v: Optional[Union[FixedCadenceSchedule, CronSchedule]]
) -> Optional[Union[FixedCadenceSchedule, CronSchedule]]:
"""Validate the schedule."""
if isinstance(v, CronSchedule) and not validate_cron_expression(v.cron):
raise ValueError("CronSchedule must be no less granular than 1 hour and must have 5 fields.")
return v

# NOT YET IMPLEMENTED:
# ExperimentalConfig,
# ColumnListChangeConfig,
Expand Down
5 changes: 3 additions & 2 deletions whylabs_toolkit/monitor/models/commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
from pydantic import BaseModel, Extra
from pydantic.fields import Field

CRON_REGEX = "(@(annually|yearly|monthly|weekly|daily|hourly))|" "((((\\d+,)+\\d+|(\\d+(\\/|-)\\d+)|\\d+|\\*) ?){5,7})"
CRON_REGEX = (
"(@(annually|yearly|monthly|weekly|daily|hourly))|" "((((\\d+,)+\\d+|(\\d+(\\/|-)\\d+)|\\d+|\\*|\\*/\\d+) ?){5,7})"
)
DATASET_ID_REGEX = "[a-zA-Z0-9\\-_\\.]+"

DATASET_ID_DEF = Field(
Expand Down Expand Up @@ -50,7 +52,6 @@ class CronSchedule(NoExtrasBaseModel):
exclusionRanges: Optional[List[TimeRange]] = Field(
title="ExclusionRanges", description="The ranges of dates during which this Analyzer is NOT run."
)
# TODO: support other mode of configuring scheduling


class Cadence(str, Enum):
Expand Down

0 comments on commit 16a79c8

Please sign in to comment.