From 5caf4b0253b34c382fb3a227bb97601e2b1eba30 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Fri, 8 Dec 2023 13:30:10 -0800 Subject: [PATCH 01/56] Update context objects --- .../asset_automation_condition_context.py | 319 +++++++++++++ .../definitions/asset_automation_evaluator.py | 322 ++++++++----- .../_core/definitions/asset_daemon_context.py | 153 +++---- .../_core/definitions/asset_daemon_cursor.py | 49 +- .../_core/definitions/asset_graph_subset.py | 20 +- .../definitions/auto_materialize_policy.py | 34 +- .../definitions/auto_materialize_rule.py | 425 ++++-------------- .../auto_materialize_rule_evaluation.py | 30 +- .../freshness_based_auto_materialize.py | 69 ++- .../scenarios/basic_scenarios.py | 2 +- .../scenarios/freshness_policy_scenarios.py | 25 +- .../updated_scenarios/basic_scenarios.py | 3 +- .../updated_scenarios/partition_scenarios.py | 5 +- 13 files changed, 800 insertions(+), 656 deletions(-) create mode 100644 python_modules/dagster/dagster/_core/definitions/asset_automation_condition_context.py diff --git a/python_modules/dagster/dagster/_core/definitions/asset_automation_condition_context.py b/python_modules/dagster/dagster/_core/definitions/asset_automation_condition_context.py new file mode 100644 index 0000000000000..60a3385f9c4bb --- /dev/null +++ b/python_modules/dagster/dagster/_core/definitions/asset_automation_condition_context.py @@ -0,0 +1,319 @@ +import datetime +import functools +from dataclasses import dataclass +from typing import TYPE_CHECKING, AbstractSet, Mapping, Optional + +from dagster._core.definitions.auto_materialize_rule_evaluation import RuleEvaluationResults +from dagster._core.definitions.data_time import CachingDataTimeResolver +from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey +from dagster._core.definitions.partition import PartitionsDefinition +from dagster._core.definitions.partition_mapping import IdentityPartitionMapping +from dagster._core.definitions.time_window_partition_mapping import TimeWindowPartitionMapping +from dagster._utils.caching_instance_queryer import CachingInstanceQueryer + +from .asset_daemon_cursor import AssetDaemonAssetCursor +from .asset_graph import AssetGraph +from .asset_subset import AssetSubset + +if TYPE_CHECKING: + from .asset_automation_evaluator import AutomationCondition, ConditionEvaluation + from .asset_daemon_context import AssetDaemonContext + + +@dataclass(frozen=True) +class AssetAutomationEvaluationContext: + """Context object containing methods and properties used for evaluating the entire state of an + asset's automation rules. + """ + + asset_key: AssetKey + asset_cursor: Optional[AssetDaemonAssetCursor] + root_condition: "AutomationCondition" + + instance_queryer: CachingInstanceQueryer + data_time_resolver: CachingDataTimeResolver + daemon_context: "AssetDaemonContext" + + evaluation_results_by_key: Mapping[AssetKey, "ConditionEvaluation"] + expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]] + + @property + def asset_graph(self) -> AssetGraph: + return self.instance_queryer.asset_graph + + @property + def partitions_def(self) -> Optional[PartitionsDefinition]: + return self.asset_graph.get_partitions_def(self.asset_key) + + @property + def evaluation_time(self) -> datetime.datetime: + """Returns the time at which this rule is being evaluated.""" + return self.instance_queryer.evaluation_time + + @functools.cached_property + def latest_evaluation(self) -> Optional["ConditionEvaluation"]: + if not self.asset_cursor: + return None + return self.asset_cursor.latest_evaluation + + @functools.cached_property + def parent_will_update_subset(self) -> AssetSubset: + """Returns the set of asset partitions whose parents will be updated on this tick, and which + can be materialized in the same run as this asset. + """ + subset = self.empty_subset() + for parent_key in self.asset_graph.get_parents(self.asset_key): + if not self.materializable_in_same_run(self.asset_key, parent_key): + continue + parent_result = self.evaluation_results_by_key.get(parent_key) + if not parent_result: + continue + parent_subset = parent_result.true_subset + subset |= parent_subset._replace(asset_key=self.asset_key) + return subset + + @property + def previous_tick_requested_subset(self) -> AssetSubset: + """Returns the set of asset partitions that were requested on the previous tick.""" + if not self.latest_evaluation: + return self.empty_subset() + return self.latest_evaluation.true_subset + + @functools.cached_property + def materialized_since_previous_tick_subset(self) -> AssetSubset: + """Returns the set of asset partitions that were materialized since the previous tick.""" + return AssetSubset.from_asset_partitions_set( + self.asset_key, + self.partitions_def, + self.instance_queryer.get_asset_partitions_updated_after_cursor( + self.asset_key, + asset_partitions=None, + after_cursor=self.asset_cursor.latest_storage_id if self.asset_cursor else None, + respect_materialization_data_versions=False, + ), + ) + + @functools.cached_property + def materialized_requested_or_discarded_since_previous_tick_subset(self) -> AssetSubset: + """Returns the set of asset partitions that were materialized since the previous tick.""" + if not self.latest_evaluation: + return self.materialized_since_previous_tick_subset + return ( + self.materialized_since_previous_tick_subset + | self.latest_evaluation.true_subset + | (self.latest_evaluation.discard_subset or self.empty_subset()) + ) + + @functools.cached_property + def never_materialized_requested_or_discarded_root_subset(self) -> AssetSubset: + if self.asset_key not in self.asset_graph.root_materializable_or_observable_asset_keys: + return self.empty_subset() + + handled_subset = ( + self.asset_cursor.materialized_requested_or_discarded_subset + if self.asset_cursor + else self.empty_subset() + ) + unhandled_subset = handled_subset.inverse( + self.partitions_def, + dynamic_partitions_store=self.instance_queryer, + current_time=self.evaluation_time, + ) + return unhandled_subset - self.materialized_since_previous_tick_subset + + def materializable_in_same_run(self, child_key: AssetKey, parent_key: AssetKey) -> bool: + """Returns whether a child asset can be materialized in the same run as a parent asset.""" + from dagster._core.definitions.external_asset_graph import ExternalAssetGraph + + return ( + # both assets must be materializable + child_key in self.asset_graph.materializable_asset_keys + and parent_key in self.asset_graph.materializable_asset_keys + # the parent must have the same partitioning + and self.asset_graph.have_same_partitioning(child_key, parent_key) + # the parent must have a simple partition mapping to the child + and ( + not self.asset_graph.is_partitioned(parent_key) + or isinstance( + self.asset_graph.get_partition_mapping(child_key, parent_key), + (TimeWindowPartitionMapping, IdentityPartitionMapping), + ) + ) + # the parent must be in the same repository to be materialized alongside the candidate + and ( + not isinstance(self.asset_graph, ExternalAssetGraph) + or self.asset_graph.get_repository_handle(child_key) + == self.asset_graph.get_repository_handle(parent_key) + ) + ) + + def get_parents_that_will_not_be_materialized_on_current_tick( + self, *, asset_partition: AssetKeyPartitionKey + ) -> AbstractSet[AssetKeyPartitionKey]: + """Returns the set of parent asset partitions that will not be updated in the same run of + this asset partition if a run is launched for this asset partition on this tick. + """ + return { + parent + for parent in self.asset_graph.get_parents_partitions( + dynamic_partitions_store=self.instance_queryer, + current_time=self.instance_queryer.evaluation_time, + asset_key=asset_partition.asset_key, + partition_key=asset_partition.partition_key, + ).parent_partitions + if not self.will_update_asset_partition(parent) + or not self.materializable_in_same_run(asset_partition.asset_key, parent.asset_key) + } + + def will_update_asset_partition(self, asset_partition: AssetKeyPartitionKey) -> bool: + parent_evaluation = self.evaluation_results_by_key.get(asset_partition.asset_key) + if not parent_evaluation: + return False + return asset_partition in parent_evaluation.true_subset + + def empty_subset(self) -> AssetSubset: + return AssetSubset.empty(self.asset_key, self.partitions_def) + + def get_root_condition_context(self) -> "AssetAutomationConditionEvaluationContext": + return AssetAutomationConditionEvaluationContext( + asset_context=self, + condition=self.root_condition, + candidate_subset=AssetSubset.all( + asset_key=self.asset_key, + partitions_def=self.partitions_def, + dynamic_partitions_store=self.instance_queryer, + current_time=self.instance_queryer.evaluation_time, + ), + latest_evaluation=self.latest_evaluation, + ) + + def get_new_asset_cursor(self, evaluation: "ConditionEvaluation") -> AssetDaemonAssetCursor: + """Returns a new AssetDaemonAssetCursor based on the current cursor and the results of + this tick's evaluation. + """ + previous_handled_subset = ( + self.asset_cursor.materialized_requested_or_discarded_subset + if self.asset_cursor + else self.empty_subset() + ) + new_handled_subset = ( + previous_handled_subset + | self.materialized_requested_or_discarded_since_previous_tick_subset + | evaluation.true_subset + | (evaluation.discard_subset or self.empty_subset()) + ) + return AssetDaemonAssetCursor( + asset_key=self.asset_key, + latest_storage_id=self.daemon_context.get_new_latest_storage_id(), + latest_evaluation=evaluation, + latest_evaluation_timestamp=self.evaluation_time.timestamp(), + materialized_requested_or_discarded_subset=new_handled_subset, + ) + + +@dataclass(frozen=True) +class AssetAutomationConditionEvaluationContext: + """Context object containing methods and properties used for evaluating a particular AutomationCondition.""" + + asset_context: AssetAutomationEvaluationContext + condition: "AutomationCondition" + candidate_subset: AssetSubset + latest_evaluation: Optional["ConditionEvaluation"] + + @property + def asset_key(self) -> AssetKey: + return self.asset_context.asset_key + + @property + def partitions_def(self) -> Optional[PartitionsDefinition]: + return self.asset_context.partitions_def + + @property + def asset_cursor(self) -> Optional[AssetDaemonAssetCursor]: + return self.asset_context.asset_cursor + + @property + def asset_graph(self) -> AssetGraph: + return self.asset_context.asset_graph + + @property + def instance_queryer(self) -> CachingInstanceQueryer: + return self.asset_context.instance_queryer + + @property + def max_storage_id(self) -> Optional[int]: + return self.asset_cursor.latest_storage_id if self.asset_cursor else None + + @property + def latest_evaluation_timestamp(self) -> Optional[float]: + return self.asset_cursor.latest_evaluation_timestamp if self.asset_cursor else None + + @property + def previous_tick_true_subset(self) -> AssetSubset: + """Returns the set of asset partitions that were true on the previous tick.""" + if not self.latest_evaluation: + return self.empty_subset() + return self.latest_evaluation.true_subset + + @property + def parent_has_updated_subset(self) -> AssetSubset: + """Returns the set of asset partitions whose parents have updated since the last time this + condition was evaluated. + """ + return AssetSubset.from_asset_partitions_set( + self.asset_key, + self.partitions_def, + self.asset_context.instance_queryer.asset_partitions_with_newly_updated_parents( + latest_storage_id=self.max_storage_id, + child_asset_key=self.asset_context.asset_key, + map_old_time_partitions=False, + ), + ) + + @property + def candidate_parent_has_or_will_update_subset(self) -> AssetSubset: + """Returns the set of candidates for this tick which have parents that have updated since + the previous tick, or will update on this tick. + """ + return self.candidate_subset & ( + self.parent_has_updated_subset | self.asset_context.parent_will_update_subset + ) + + @property + def candidates_not_evaluated_on_previous_tick_subset(self) -> AssetSubset: + """Returns the set of candidates for this tick which were not candidates on the previous + tick. + """ + if not self.latest_evaluation: + return self.candidate_subset + return self.candidate_subset - self.latest_evaluation.candidate_subset + + @property + def materialized_since_previous_tick_subset(self) -> AssetSubset: + """Returns the set of asset partitions that were materialized since the previous tick.""" + return self.asset_context.materialized_since_previous_tick_subset + + @property + def materialized_requested_or_discarded_since_previous_tick_subset(self) -> AssetSubset: + """Returns the set of asset partitions that were materialized since the previous tick.""" + return self.asset_context.materialized_requested_or_discarded_since_previous_tick_subset + + @property + def previous_tick_results(self) -> RuleEvaluationResults: + """Returns the RuleEvaluationResults calculated on the previous tick for this condition.""" + return self.latest_evaluation.results if self.latest_evaluation else [] + + def empty_subset(self) -> AssetSubset: + return self.asset_context.empty_subset() + + def for_child( + self, condition: "AutomationCondition", candidate_subset: AssetSubset + ) -> "AssetAutomationConditionEvaluationContext": + return AssetAutomationConditionEvaluationContext( + asset_context=self.asset_context, + condition=condition, + candidate_subset=candidate_subset, + latest_evaluation=self.latest_evaluation.for_child(condition) + if self.latest_evaluation + else None, + ) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py b/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py index 51a6f50e4174f..b5ef67da6d141 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py @@ -1,33 +1,44 @@ +import dataclasses from abc import ABC, abstractmethod from typing import TYPE_CHECKING, AbstractSet, List, NamedTuple, Optional, Sequence, Tuple +import dagster._check as check from dagster._core.definitions.asset_daemon_cursor import AssetDaemonAssetCursor from dagster._core.definitions.asset_graph import AssetGraph -from dagster._core.definitions.auto_materialize_rule import AutoMaterializeRule from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey -from .asset_subset import AssetSubset -from .auto_materialize_rule import ( - DiscardOnMaxMaterializationsExceededRule, - RuleEvaluationContext, - RuleEvaluationResults, +from .asset_automation_condition_context import ( + AssetAutomationConditionEvaluationContext, + AssetAutomationEvaluationContext, ) +from .asset_subset import AssetSubset from .auto_materialize_rule_evaluation import ( AutoMaterializeAssetEvaluation, + AutoMaterializeDecisionType, AutoMaterializeRuleEvaluation, ) if TYPE_CHECKING: from dagster._utils.caching_instance_queryer import CachingInstanceQueryer + from .auto_materialize_rule import AutoMaterializeRule, RuleEvaluationResults + class ConditionEvaluation(NamedTuple): """Internal representation of the results of evaluating a node in the evaluation tree.""" condition: "AutomationCondition" true_subset: AssetSubset - results: RuleEvaluationResults = [] - children: Sequence["ConditionEvaluation"] = [] + candidate_subset: AssetSubset + + results: "RuleEvaluationResults" = [] + child_evaluations: Sequence["ConditionEvaluation"] = [] + + # backcompat until we remove the discard concept + discard_subset: Optional[AssetSubset] = None + discard_results: Sequence[ + Tuple[AutoMaterializeRuleEvaluation, AbstractSet[AssetKeyPartitionKey]] + ] = [] @property def all_results( @@ -49,34 +60,126 @@ def all_results( ] else: results = [] - for child in self.children: + for child in self.child_evaluations: results = [*results, *child.all_results] return results + def for_child(self, child_condition: "AutomationCondition") -> Optional["ConditionEvaluation"]: + """Returns the evaluation of a given child condition.""" + for child_evaluation in self.child_evaluations: + if child_evaluation.condition == child_condition: + return child_evaluation + return None + def to_evaluation( self, asset_key: AssetKey, asset_graph: AssetGraph, instance_queryer: "CachingInstanceQueryer", - to_discard: AssetSubset, - discard_results: Sequence[ - Tuple[AutoMaterializeRuleEvaluation, AbstractSet[AssetKeyPartitionKey]] - ], - skipped_subset_size: int, ) -> AutoMaterializeAssetEvaluation: """This method is a placeholder to allow us to convert this into a shape that other parts of the system understand. """ + # backcompat way to calculate the set of skipped partitions for legacy policies + if self.condition.is_legacy and len(self.child_evaluations) == 2: + # the first child is the materialize condition, the second child is the negation of + # the skip condition + _, nor_skip_evaluation = self.child_evaluations + skip_evaluation = nor_skip_evaluation.child_evaluations[0] + skipped_subset_size = skip_evaluation.true_subset.size + else: + skipped_subset_size = 0 + + discard_subset = self.discard_subset or AssetSubset.empty( + asset_key, asset_graph.get_partitions_def(asset_key) + ) + return AutoMaterializeAssetEvaluation.from_rule_evaluation_results( asset_key=asset_key, asset_graph=asset_graph, - asset_partitions_by_rule_evaluation=[*self.all_results, *discard_results], - num_requested=(self.true_subset - to_discard).size, + asset_partitions_by_rule_evaluation=[*self.all_results, *self.discard_results], + num_requested=(self.true_subset - discard_subset).size, num_skipped=skipped_subset_size, - num_discarded=to_discard.size, + num_discarded=discard_subset.size, dynamic_partitions_store=instance_queryer, ) + @staticmethod + def from_evaluation_and_rule( + evaluation: AutoMaterializeAssetEvaluation, + asset_graph: AssetGraph, + rule: "AutoMaterializeRule", + ) -> "ConditionEvaluation": + asset_key = evaluation.asset_key + partitions_def = asset_graph.get_partitions_def(asset_key) + empty_subset = AssetSubset.empty(asset_key, partitions_def) + return ConditionEvaluation( + condition=RuleCondition(rule=rule), + true_subset=empty_subset, + candidate_subset=empty_subset + if rule.decision_type == AutoMaterializeDecisionType.MATERIALIZE + else evaluation.get_evaluated_subset(asset_graph), + discard_subset=empty_subset, + results=evaluation.get_rule_evaluation_results(rule.to_snapshot(), asset_graph), + ) + + @staticmethod + def from_evaluation( + condition: "AutomationCondition", + evaluation: Optional[AutoMaterializeAssetEvaluation], + asset_graph: AssetGraph, + ) -> Optional["ConditionEvaluation"]: + """This method is a placeholder to allow us to convert the serialized objects the system + uses into a more-convenient internal representation. + """ + if not condition.is_legacy or not evaluation: + return None + + asset_key = evaluation.asset_key + partitions_def = asset_graph.get_partitions_def(asset_key) + empty_subset = AssetSubset.empty(asset_key, partitions_def) + + materialize_condition, skip_condition = condition.children + materialize_rules = [ + materialize_condition.rule + for materialize_condition in materialize_condition.children + if isinstance(materialize_condition, RuleCondition) + and materialize_condition.rule.to_snapshot() in (evaluation.rule_snapshots or set()) + ] + skip_rules = [ + skip_condition.rule + for skip_condition in skip_condition.children + if isinstance(skip_condition, RuleCondition) + and skip_condition.rule.to_snapshot() in (evaluation.rule_snapshots or set()) + ] + children = [ + ConditionEvaluation( + condition=materialize_condition, + true_subset=empty_subset, + candidate_subset=empty_subset, + child_evaluations=[ + ConditionEvaluation.from_evaluation_and_rule(evaluation, asset_graph, rule) + for rule in materialize_rules + ], + ), + ConditionEvaluation( + condition=skip_condition, + true_subset=empty_subset, + candidate_subset=empty_subset, + child_evaluations=[ + ConditionEvaluation.from_evaluation_and_rule(evaluation, asset_graph, rule) + for rule in skip_rules + ], + ), + ] + return ConditionEvaluation( + condition=condition, + true_subset=evaluation.get_requested_subset(asset_graph), + discard_subset=evaluation.get_discarded_subset(asset_graph), + candidate_subset=empty_subset, + child_evaluations=children, + ) + class AutomationCondition(ABC): """An AutomationCondition represents some state of the world that can influence if an asset @@ -84,8 +187,12 @@ class AutomationCondition(ABC): new conditions using the `&` (and), `|` (or), and `~` (not) operators. """ + @property + def children(self) -> Sequence["AutomationCondition"]: + return [] + @abstractmethod - def evaluate(self, context: RuleEvaluationContext) -> ConditionEvaluation: + def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> ConditionEvaluation: raise NotImplementedError() def __and__(self, other: "AutomationCondition") -> "AutomationCondition": @@ -101,85 +208,120 @@ def __or__(self, other: "AutomationCondition") -> "AutomationCondition": return OrAutomationCondition(children=[self, other]) def __invert__(self) -> "AutomationCondition": - # convert a negated OrAutomationCondition into a NorAutomationCondition - if isinstance(self, OrAutomationCondition): - return NorAutomationCondition(children=self.children) - # convert a negated NorAutomationCondition into an OrAutomationCondition - elif isinstance(self, NorAutomationCondition): - return OrAutomationCondition(children=self.children) - return NorAutomationCondition(children=[self]) + return NotAutomationCondition(children=[self]) + + @property + def is_legacy(self) -> bool: + """Returns if this condition is in the legacy format. This is used to determine if we can + do certain types of backwards-compatible operations on it. + """ + return ( + isinstance(self, AndAutomationCondition) + and len(self.children) == 2 + and isinstance(self.children[0], OrAutomationCondition) + and isinstance(self.children[1], NotAutomationCondition) + ) class RuleCondition( - AutomationCondition, NamedTuple("_RuleCondition", [("rule", AutoMaterializeRule)]) + NamedTuple("_RuleCondition", [("rule", "AutoMaterializeRule")]), + AutomationCondition, ): """This class represents the condition that a particular AutoMaterializeRule is satisfied.""" - def evaluate(self, context: RuleEvaluationContext) -> ConditionEvaluation: - context.daemon_context._verbose_log_fn(f"Evaluating rule: {self.rule.to_snapshot()}") # noqa + def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> ConditionEvaluation: + context.asset_context.daemon_context._verbose_log_fn( # noqa + f"Evaluating rule: {self.rule.to_snapshot()}" + ) results = self.rule.evaluate_for_asset(context) true_subset = context.empty_subset() for _, asset_partitions in results: true_subset |= AssetSubset.from_asset_partitions_set( context.asset_key, context.partitions_def, asset_partitions ) - context.daemon_context._verbose_log_fn(f"Rule returned {true_subset.size} partitions") # noqa - return ConditionEvaluation(condition=self, true_subset=true_subset, results=results) + context.asset_context.daemon_context._verbose_log_fn( # noqa + f"Rule returned {true_subset.size} partitions" + ) + return ConditionEvaluation( + condition=self, + true_subset=true_subset, + candidate_subset=context.candidate_subset, + results=results, + ) class AndAutomationCondition( - AutomationCondition, NamedTuple("_AndAutomationCondition", [("children", Sequence[AutomationCondition])]), + AutomationCondition, ): """This class represents the condition that all of its children evaluate to true.""" - def evaluate(self, context: RuleEvaluationContext) -> ConditionEvaluation: + def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> ConditionEvaluation: child_evaluations: List[ConditionEvaluation] = [] true_subset = context.candidate_subset for child in self.children: - context = context.with_candidate_subset(true_subset) - result = child.evaluate(context) + child_context = context.for_child(condition=child, candidate_subset=true_subset) + result = child.evaluate(child_context) child_evaluations.append(result) true_subset &= result.true_subset return ConditionEvaluation( - condition=self, true_subset=true_subset, children=child_evaluations + condition=self, + true_subset=true_subset, + candidate_subset=context.candidate_subset, + child_evaluations=child_evaluations, ) class OrAutomationCondition( - AutomationCondition, NamedTuple("_OrAutomationCondition", [("children", Sequence[AutomationCondition])]), + AutomationCondition, ): """This class represents the condition that any of its children evaluate to true.""" - def evaluate(self, context: RuleEvaluationContext) -> ConditionEvaluation: + def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> ConditionEvaluation: child_evaluations: List[ConditionEvaluation] = [] true_subset = context.empty_subset() for child in self.children: - result = child.evaluate(context) + child_context = context.for_child( + condition=child, candidate_subset=context.candidate_subset + ) + result = child.evaluate(child_context) child_evaluations.append(result) true_subset |= result.true_subset return ConditionEvaluation( - condition=self, true_subset=true_subset, children=child_evaluations + condition=self, + true_subset=true_subset, + candidate_subset=context.candidate_subset, + child_evaluations=child_evaluations, ) -class NorAutomationCondition( +class NotAutomationCondition( + NamedTuple("_NotAutomationCondition", [("children", Sequence[AutomationCondition])]), AutomationCondition, - NamedTuple("_NorAutomationCondition", [("children", Sequence[AutomationCondition])]), ): """This class represents the condition that none of its children evaluate to true.""" - def evaluate(self, context: RuleEvaluationContext) -> ConditionEvaluation: - child_evaluations: List[ConditionEvaluation] = [] - true_subset = context.candidate_subset - for child in self.children: - context = context.with_candidate_subset(true_subset) - result = child.evaluate(context) - child_evaluations.append(result) - true_subset -= result.true_subset + def __new__(cls, children: Sequence[AutomationCondition]): + check.invariant(len(children) == 1) + return super().__new__(cls, children) + + @property + def child(self) -> AutomationCondition: + return self.children[0] + + def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> ConditionEvaluation: + child_context = context.for_child( + condition=self.child, candidate_subset=context.candidate_subset + ) + result = self.child.evaluate(child_context) + true_subset = context.candidate_subset - result.true_subset + return ConditionEvaluation( - condition=self, true_subset=true_subset, children=child_evaluations + condition=self, + true_subset=true_subset, + candidate_subset=context.candidate_subset, + child_evaluations=[result], ) @@ -192,67 +334,45 @@ class AssetAutomationEvaluator(NamedTuple): max_materializations_per_minute: Optional[int] = 1 def evaluate( - self, context: RuleEvaluationContext, report_num_skipped: bool - ) -> Tuple[ - AutoMaterializeAssetEvaluation, - AssetDaemonAssetCursor, - AbstractSet[AssetKeyPartitionKey], - ]: + self, context: AssetAutomationEvaluationContext + ) -> Tuple[ConditionEvaluation, AssetDaemonAssetCursor]: """Evaluates the auto materialize policy of a given asset. Returns: - - An AutoMaterializeAssetEvaluation object representing serializable information about - this evaluation. If `report_num_skipped` is set to `True`, then this will attempt to - calculate the number of skipped partitions in a backwards-compatible way. This can only be - done for policies that are in the format `(a | b | ...) & ~(c | d | ...). - - The set of AssetKeyPartitionKeys that should be materialized. - - The set of AssetKeyPartitionKeys that should be discarded. + - A ConditionEvaluation object representing information about this evaluation. If + `report_num_skipped` is set to `True`, then this will attempt to calculate the number of + skipped partitions in a backwards-compatible way. This can only be done for policies that + are in the format `(a | b | ...) & ~(c | d | ...). + - A new AssetDaemonAssetCursor that represents the state of the world after this evaluation. """ - condition_evaluation = self.condition.evaluate(context) + from .auto_materialize_rule import DiscardOnMaxMaterializationsExceededRule + + condition_context = context.get_root_condition_context() + condition_evaluation = self.condition.evaluate(condition_context) # this is treated separately from other rules, for now - to_discard, discard_results = context.empty_subset(), [] + discard_subset = context.empty_subset() + discard_results = [] if self.max_materializations_per_minute is not None: - discard_context = context.with_candidate_subset(condition_evaluation.true_subset) - condition = RuleCondition( - DiscardOnMaxMaterializationsExceededRule(limit=self.max_materializations_per_minute) + discard_context = dataclasses.replace( + condition_context, candidate_subset=condition_evaluation.true_subset ) - discard_condition_evaluation = condition.evaluate(discard_context) - to_discard = discard_condition_evaluation.true_subset - discard_results = discard_condition_evaluation.all_results - - to_materialize = condition_evaluation.true_subset - to_discard - - skipped_subset_size = 0 - if ( - report_num_skipped - # check shape of top-level condition - and isinstance(self.condition, AndAutomationCondition) - and len(self.condition.children) == 2 - and isinstance(self.condition.children[1], NorAutomationCondition) - # confirm shape of evaluation - and len(condition_evaluation.children) == 2 - ): - # the first child is the materialize condition, the second child is the skip_condition - materialize_condition, skip_evaluation = condition_evaluation.children - skipped_subset_size = ( - materialize_condition.true_subset.size - skip_evaluation.true_subset.size + discard_rule = DiscardOnMaxMaterializationsExceededRule( + limit=self.max_materializations_per_minute ) + condition = RuleCondition(discard_rule) + discard_condition_evaluation = condition.evaluate(discard_context) + discard_subset = discard_condition_evaluation.true_subset + discard_results = [ + (AutoMaterializeRuleEvaluation(discard_rule.to_snapshot(), evaluation_data), aps) + for evaluation_data, aps in discard_condition_evaluation.results + ] return ( - condition_evaluation.to_evaluation( - context.asset_key, - context.asset_graph, - context.instance_queryer, - to_discard, - discard_results, - skipped_subset_size=skipped_subset_size, - ), - context.cursor.with_updates( - asset_graph=context.asset_graph, - newly_materialized_subset=context.newly_materialized_root_subset, - requested_asset_partitions=to_materialize.asset_partitions, - discarded_asset_partitions=to_discard.asset_partitions, + condition_evaluation._replace( + true_subset=condition_evaluation.true_subset - discard_subset, + discard_subset=discard_subset, + discard_results=discard_results, ), - to_materialize.asset_partitions, + context.get_new_asset_cursor(evaluation=condition_evaluation), ) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py index b53de0ba10526..323e4bd6657b3 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py @@ -23,7 +23,6 @@ import pendulum import dagster._check as check -from dagster._core.definitions.asset_subset import AssetSubset from dagster._core.definitions.auto_materialize_policy import AutoMaterializePolicy from dagster._core.definitions.data_time import CachingDataTimeResolver from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey @@ -36,15 +35,12 @@ from ... import PartitionKeyRange from ..storage.tags import ASSET_PARTITION_RANGE_END_TAG, ASSET_PARTITION_RANGE_START_TAG +from .asset_automation_condition_context import AssetAutomationEvaluationContext +from .asset_automation_evaluator import ConditionEvaluation from .asset_daemon_cursor import AssetDaemonAssetCursor, AssetDaemonCursor from .asset_graph import AssetGraph -from .auto_materialize_rule import ( - AutoMaterializeRule, - RuleEvaluationContext, -) -from .auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, -) +from .auto_materialize_rule import AutoMaterializeRule +from .auto_materialize_rule_evaluation import AutoMaterializeAssetEvaluation from .backfill_policy import BackfillPolicy, BackfillPolicyType from .freshness_based_auto_materialize import get_expected_data_time_for_asset_key from .partition import PartitionsDefinition, ScheduleType @@ -224,13 +220,9 @@ def get_new_latest_storage_id(self) -> Optional[int]: def evaluate_asset( self, asset_key: AssetKey, - will_materialize_mapping: Mapping[AssetKey, AbstractSet[AssetKeyPartitionKey]], + evaluation_results_by_key: Mapping[AssetKey, ConditionEvaluation], expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]], - ) -> Tuple[ - AutoMaterializeAssetEvaluation, - AssetDaemonAssetCursor, - AbstractSet[AssetKeyPartitionKey], - ]: + ) -> Tuple[ConditionEvaluation, AssetDaemonAssetCursor, Optional[datetime.datetime]]: """Evaluates the auto materialize policy of a given asset key. Params: @@ -242,40 +234,32 @@ def evaluate_asset( asset after this tick. As this function is called in topological order, this mapping will contain the expected data times of all upstream assets. - Returns: - - An AutoMaterializeAssetEvaluation object representing serializable information about - this evaluation. - - The set of AssetKeyPartitionKeys that should be materialized. - - The set of AssetKeyPartitionKeys that should be discarded. """ # convert the legacy AutoMaterializePolicy to an Evaluator auto_materialize_policy_evaluator = check.not_none( self.asset_graph.auto_materialize_policies_by_key.get(asset_key) ).to_auto_materialize_policy_evaluator() - partitions_def = self.asset_graph.get_partitions_def(asset_key) - context = RuleEvaluationContext( + context = AssetAutomationEvaluationContext( asset_key=asset_key, - cursor=self.cursor.asset_cursor_for_key(asset_key, partitions_def), + asset_cursor=self.cursor.asset_cursor_for_key(asset_key, self.asset_graph), + root_condition=auto_materialize_policy_evaluator.condition, instance_queryer=self.instance_queryer, data_time_resolver=self.data_time_resolver, - will_materialize_mapping=will_materialize_mapping, - expected_data_time_mapping=expected_data_time_mapping, - candidate_subset=AssetSubset.all( - asset_key=asset_key, - partitions_def=partitions_def, - dynamic_partitions_store=self.instance_queryer, - current_time=self.instance_queryer.evaluation_time, - ), daemon_context=self, + evaluation_results_by_key=evaluation_results_by_key, + expected_data_time_mapping=expected_data_time_mapping, ) - - return auto_materialize_policy_evaluator.evaluate(context, report_num_skipped=True) + evaluation, asset_cursor = auto_materialize_policy_evaluator.evaluate(context) + expected_data_time = get_expected_data_time_for_asset_key( + context, will_materialize=evaluation.true_subset.size > 0 + ) + return evaluation, asset_cursor, expected_data_time def get_auto_materialize_asset_evaluations( self, ) -> Tuple[ - Mapping[AssetKey, AutoMaterializeAssetEvaluation], + Sequence[AutoMaterializeAssetEvaluation], Sequence[AssetDaemonAssetCursor], AbstractSet[AssetKeyPartitionKey], ]: @@ -283,17 +267,17 @@ def get_auto_materialize_asset_evaluations( sequence of new per-asset cursors, and the set of all asset partitions that should be materialized or discarded this tick. """ - evaluations_by_key: Dict[AssetKey, AutoMaterializeAssetEvaluation] = {} asset_cursors: List[AssetDaemonAssetCursor] = [] - will_materialize_mapping: Dict[AssetKey, AbstractSet[AssetKeyPartitionKey]] = defaultdict( - set - ) + + evaluation_results_by_key: Dict[AssetKey, ConditionEvaluation] = {} + legacy_evaluation_results_by_key: Dict[AssetKey, AutoMaterializeAssetEvaluation] = {} expected_data_time_mapping: Dict[AssetKey, Optional[datetime.datetime]] = defaultdict() - visited_multi_asset_keys = set() + to_request: Set[AssetKeyPartitionKey] = set() num_checked_assets = 0 num_auto_materialize_asset_keys = len(self.auto_materialize_asset_keys) + visited_multi_asset_keys = set() for asset_key in itertools.chain(*self.asset_graph.toposort_asset_keys()): # an asset may have already been visited if it was part of a non-subsettable multi-asset if asset_key not in self.auto_materialize_asset_keys: @@ -310,71 +294,54 @@ def get_auto_materialize_asset_evaluations( self._verbose_log_fn(f"Asset {asset_key.to_user_string()} already visited") continue - ( - evaluation, - asset_cursor_for_asset, - to_materialize_for_asset, - ) = self.evaluate_asset(asset_key, will_materialize_mapping, expected_data_time_mapping) + (evaluation, asset_cursor_for_asset, expected_data_time) = self.evaluate_asset( + asset_key, evaluation_results_by_key, expected_data_time_mapping + ) + + # convert the new-format evaluation to the legacy format + legacy_evaluation = evaluation.to_evaluation( + asset_key, self.asset_graph, self.instance_queryer + ) log_fn = ( self._logger.info - if (evaluation.num_requested or evaluation.num_skipped or evaluation.num_discarded) + if ( + legacy_evaluation.num_requested + or legacy_evaluation.num_skipped + or legacy_evaluation.num_discarded + ) else self._logger.debug ) - to_materialize_str = ",".join( - [ - (to_materialize.partition_key or "No partition") - for to_materialize in to_materialize_for_asset - ] + to_request_asset_partitions = evaluation.true_subset.asset_partitions + to_request_str = ",".join( + [(ap.partition_key or "No partition") for ap in to_request_asset_partitions] ) + to_request |= to_request_asset_partitions log_fn( - f"Asset {asset_key.to_user_string()} evaluation result: {evaluation.num_requested}" - f" requested ({to_materialize_str}), {evaluation.num_skipped}" - f" skipped, {evaluation.num_discarded} discarded ({format(time.time()-start_time, '.3f')} seconds)" + f"Asset {asset_key.to_user_string()} evaluation result: {legacy_evaluation.num_requested}" + f" requested ({to_request_str}), {legacy_evaluation.num_skipped}" + f" skipped, {legacy_evaluation.num_discarded} discarded ({format(time.time()-start_time, '.3f')} seconds)" ) - evaluations_by_key[asset_key] = evaluation - asset_cursors.append(asset_cursor_for_asset) - will_materialize_mapping[asset_key] = to_materialize_for_asset - - expected_data_time = get_expected_data_time_for_asset_key( - self.asset_graph, - asset_key, - will_materialize_mapping=will_materialize_mapping, - expected_data_time_mapping=expected_data_time_mapping, - data_time_resolver=self.data_time_resolver, - current_time=self.instance_queryer.evaluation_time, - will_materialize=bool(to_materialize_for_asset), - ) + evaluation_results_by_key[asset_key] = evaluation + legacy_evaluation_results_by_key[asset_key] = legacy_evaluation expected_data_time_mapping[asset_key] = expected_data_time - # if we need to materialize any partitions of a non-subsettable multi-asset, just copy - # over evaluation to any required neighbor key - if to_materialize_for_asset: - for neighbor_key in self.asset_graph.get_required_multi_asset_keys(asset_key): - auto_materialize_policy = self.asset_graph.auto_materialize_policies_by_key.get( - neighbor_key - ) - - if auto_materialize_policy is None: - check.failed(f"Expected auto materialize policy on asset {asset_key}") - - to_materialize_for_neighbor = { - ap._replace(asset_key=neighbor_key) for ap in to_materialize_for_asset - } - - evaluations_by_key[neighbor_key] = evaluation._replace( - asset_key=neighbor_key, - rule_snapshots=auto_materialize_policy.rule_snapshots, # Neighbors can have different rule snapshots - ) - will_materialize_mapping[neighbor_key] = to_materialize_for_neighbor + asset_cursors.append(asset_cursor_for_asset) + # if we need to materialize any partitions of a non-subsettable multi-asset, we need to + # materialize all of them + if legacy_evaluation.num_requested > 0: + for neighbor_key in self.asset_graph.get_required_multi_asset_keys(asset_key): expected_data_time_mapping[neighbor_key] = expected_data_time visited_multi_asset_keys.add(neighbor_key) + to_request |= { + ap._replace(asset_key=neighbor_key) + for ap in evaluation.true_subset.asset_partitions + } - to_materialize = set().union(*will_materialize_mapping.values()) - return (evaluations_by_key, asset_cursors, to_materialize) + return (list(legacy_evaluation_results_by_key.values()), asset_cursors, to_request) def evaluate( self, @@ -392,15 +359,11 @@ def evaluate( else [] ) - ( - evaluations_by_asset_key, - asset_cursors, - to_materialize, - ) = self.get_auto_materialize_asset_evaluations() + evaluations, asset_cursors, to_request = self.get_auto_materialize_asset_evaluations() run_requests = [ *build_run_requests( - asset_partitions=to_materialize, + asset_partitions=to_request, asset_graph=self.asset_graph, run_tags=self.auto_materialize_run_tags, ), @@ -418,14 +381,14 @@ def evaluate( for asset_key in cast(Sequence[AssetKey], run_request.asset_selection) ], observe_request_timestamp=observe_request_timestamp, - evaluations=list(evaluations_by_asset_key.values()), + evaluations=evaluations, evaluation_time=self.instance_queryer.evaluation_time, asset_cursors=asset_cursors, ), # only record evaluations where something changed [ evaluation - for evaluation in evaluations_by_asset_key.values() + for evaluation in evaluations if not evaluation.equivalent_to_stored_evaluation( self.cursor.latest_evaluation_by_asset_key.get(evaluation.asset_key), self.asset_graph, diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py index 63fd948d5d253..3b24f361e2c94 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py @@ -3,6 +3,7 @@ import json import zlib from typing import ( + TYPE_CHECKING, AbstractSet, Mapping, NamedTuple, @@ -14,16 +15,18 @@ from dagster._core.definitions.auto_materialize_rule_evaluation import ( AutoMaterializeAssetEvaluation, ) -from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey +from dagster._core.definitions.events import AssetKey from dagster._core.definitions.time_window_partitions import ( TimeWindowPartitionsDefinition, TimeWindowPartitionsSubset, ) from dagster._serdes.serdes import deserialize_value, serialize_value, whitelist_for_serdes +if TYPE_CHECKING: + from .asset_automation_evaluator import ConditionEvaluation from .asset_graph import AssetGraph from .asset_subset import AssetSubset -from .partition import PartitionsDefinition, PartitionsSubset +from .partition import PartitionsSubset class AssetDaemonAssetCursor(NamedTuple): @@ -34,33 +37,9 @@ class AssetDaemonAssetCursor(NamedTuple): asset_key: AssetKey latest_storage_id: Optional[int] latest_evaluation_timestamp: Optional[float] - latest_evaluation: Optional[AutoMaterializeAssetEvaluation] + latest_evaluation: Optional["ConditionEvaluation"] materialized_requested_or_discarded_subset: AssetSubset - def with_updates( - self, - asset_graph: AssetGraph, - newly_materialized_subset: AssetSubset, - requested_asset_partitions: AbstractSet[AssetKeyPartitionKey], - discarded_asset_partitions: AbstractSet[AssetKeyPartitionKey], - ) -> "AssetDaemonAssetCursor": - if self.asset_key not in asset_graph.root_asset_keys: - return self - newly_materialized_requested_or_discarded_asset_partitions = ( - newly_materialized_subset.asset_partitions - | requested_asset_partitions - | discarded_asset_partitions - ) - newly_materialized_requested_or_discarded_subset = AssetSubset.from_asset_partitions_set( - self.asset_key, - asset_graph.get_partitions_def(self.asset_key), - newly_materialized_requested_or_discarded_asset_partitions, - ) - return self._replace( - materialized_requested_or_discarded_subset=self.materialized_requested_or_discarded_subset - | newly_materialized_requested_or_discarded_subset - ) - class AssetDaemonCursor(NamedTuple): """State that's saved between reconciliation evaluations. @@ -93,8 +72,11 @@ def was_previously_handled(self, asset_key: AssetKey) -> bool: return asset_key in self.handled_root_asset_keys def asset_cursor_for_key( - self, asset_key: AssetKey, partitions_def: Optional[PartitionsDefinition] + self, asset_key: AssetKey, asset_graph: AssetGraph ) -> AssetDaemonAssetCursor: + from .asset_automation_evaluator import ConditionEvaluation + + partitions_def = asset_graph.get_partitions_def(asset_key) handled_partitions_subset = self.handled_root_partitions_by_asset_key.get(asset_key) if handled_partitions_subset is not None: handled_subset = AssetSubset(asset_key=asset_key, value=handled_partitions_subset) @@ -102,11 +84,20 @@ def asset_cursor_for_key( handled_subset = AssetSubset(asset_key=asset_key, value=True) else: handled_subset = AssetSubset.empty(asset_key, partitions_def) + condition = ( + check.not_none(asset_graph.get_auto_materialize_policy(asset_key)) + .to_auto_materialize_policy_evaluator() + .condition + ) return AssetDaemonAssetCursor( asset_key=asset_key, latest_storage_id=self.latest_storage_id, latest_evaluation_timestamp=self.latest_evaluation_timestamp, - latest_evaluation=self.latest_evaluation_by_asset_key.get(asset_key), + latest_evaluation=ConditionEvaluation.from_evaluation( + condition=condition, + evaluation=self.latest_evaluation_by_asset_key.get(asset_key), + asset_graph=asset_graph, + ), materialized_requested_or_discarded_subset=handled_subset, ) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_graph_subset.py b/python_modules/dagster/dagster/_core/definitions/asset_graph_subset.py index 3e01c34b72210..91fbadaabb2c6 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_graph_subset.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_graph_subset.py @@ -31,6 +31,7 @@ ) from .asset_graph import AssetGraph +from .asset_subset import AssetSubset from .events import AssetKey, AssetKeyPartitionKey @@ -73,11 +74,28 @@ def asset_keys(self) -> AbstractSet[AssetKey]: } | self.non_partitioned_asset_keys @property - def num_partitions_and_non_partitioned_assets(self): + def num_partitions_and_non_partitioned_assets(self) -> int: return len(self.non_partitioned_asset_keys) + sum( len(subset) for subset in self.partitions_subsets_by_asset_key.values() ) + def get_asset_subset(self, asset_key: AssetKey, asset_graph: AssetGraph) -> AssetSubset: + """Returns an AssetSubset representing the subset of a specific asset that this + AssetGraphSubset contains. + """ + partitions_def = asset_graph.get_partitions_def(asset_key) + if partitions_def is None: + return AssetSubset( + asset_key=asset_key, value=asset_key in self.non_partitioned_asset_keys + ) + else: + return AssetSubset( + asset_key=asset_key, + value=self.partitions_subsets_by_asset_key.get( + asset_key, partitions_def.empty_subset() + ), + ) + def get_partitions_subset( self, asset_key: AssetKey, asset_graph: Optional[AssetGraph] = None ) -> PartitionsSubset: diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_policy.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_policy.py index 3f05e2afadb52..80a9581d234d0 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_policy.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_policy.py @@ -1,6 +1,4 @@ -import operator from enum import Enum -from functools import reduce from typing import TYPE_CHECKING, AbstractSet, Dict, FrozenSet, NamedTuple, Optional, Sequence import dagster._check as check @@ -257,27 +255,25 @@ def rule_snapshots(self) -> Sequence["AutoMaterializeRuleSnapshot"]: def to_auto_materialize_policy_evaluator(self) -> "AssetAutomationEvaluator": """Converts a set of materialize / skip rules into a single binary expression.""" - from .asset_automation_evaluator import AssetAutomationEvaluator, RuleCondition + from .asset_automation_evaluator import ( + AndAutomationCondition, + AssetAutomationEvaluator, + NotAutomationCondition, + OrAutomationCondition, + RuleCondition, + ) - materialize_condition = ( - reduce( - operator.or_, - [RuleCondition(rule) for rule in self.materialize_rules], - ) - if self.materialize_rules - else None + materialize_condition = OrAutomationCondition( + children=[RuleCondition(rule) for rule in self.materialize_rules] ) - skip_condition = ( - ~reduce( - operator.or_, - [RuleCondition(rule) for rule in self.skip_rules], - ) - if self.skip_rules - else None + skip_condition = OrAutomationCondition( + children=[RuleCondition(rule) for rule in self.skip_rules] ) + # results in an expression of the form (m1 | m2 | ... | mn) & ~(s1 | s2 | ... | sn) - condition = reduce(operator.and_, filter(None, [materialize_condition, skip_condition])) - check.invariant(condition is not None, "must have at least one rule") + condition = AndAutomationCondition( + children=[materialize_condition, NotAutomationCondition([skip_condition])] + ) return AssetAutomationEvaluator( condition=condition, max_materializations_per_minute=self.max_materializations_per_minute, diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py index 3a17f019ade6f..594fffa4bd631 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py @@ -1,11 +1,7 @@ -import dataclasses import datetime -import functools from abc import ABC, abstractmethod, abstractproperty from collections import defaultdict -from dataclasses import dataclass from typing import ( - TYPE_CHECKING, AbstractSet, Callable, Dict, @@ -29,262 +25,25 @@ RuleEvaluationResults, WaitingOnAssetsRuleEvaluationData, ) -from dagster._core.definitions.data_time import CachingDataTimeResolver from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey from dagster._core.definitions.freshness_based_auto_materialize import ( freshness_evaluation_results_for_asset_key, ) from dagster._core.definitions.multi_dimensional_partitions import MultiPartitionsDefinition -from dagster._core.definitions.partition import PartitionsDefinition -from dagster._core.definitions.partition_mapping import IdentityPartitionMapping -from dagster._core.definitions.time_window_partition_mapping import TimeWindowPartitionMapping from dagster._core.definitions.time_window_partitions import get_time_partitions_def from dagster._core.storage.dagster_run import RunsFilter from dagster._core.storage.tags import AUTO_MATERIALIZE_TAG from dagster._serdes.serdes import ( whitelist_for_serdes, ) -from dagster._utils.caching_instance_queryer import CachingInstanceQueryer from dagster._utils.schedules import ( cron_string_iterator, is_valid_cron_string, reverse_cron_string_iterator, ) -from .asset_graph import AssetGraph, sort_key_for_asset_partition -from .asset_subset import AssetSubset - -if TYPE_CHECKING: - from dagster._core.definitions.asset_daemon_context import AssetDaemonContext - from dagster._core.definitions.asset_daemon_cursor import AssetDaemonAssetCursor - - -@dataclass(frozen=True) -class RuleEvaluationContext: - asset_key: AssetKey - cursor: "AssetDaemonAssetCursor" - instance_queryer: CachingInstanceQueryer - data_time_resolver: CachingDataTimeResolver - # Tracks which asset partitions are already slated for materialization in this tick. The asset - # keys in the values match the asset key in the corresponding key. - will_materialize_mapping: Mapping[AssetKey, AbstractSet[AssetKeyPartitionKey]] - expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]] - candidate_subset: AssetSubset - daemon_context: "AssetDaemonContext" - - def with_candidate_subset(self, candidate_subset: AssetSubset) -> "RuleEvaluationContext": - return dataclasses.replace(self, candidate_subset=candidate_subset) - - @property - def asset_graph(self) -> AssetGraph: - return self.instance_queryer.asset_graph - - @property - def partitions_def(self) -> Optional[PartitionsDefinition]: - return self.asset_graph.get_partitions_def(self.asset_key) - - @property - def evaluation_time(self) -> datetime.datetime: - """Returns the time at which this rule is being evaluated.""" - return self.instance_queryer.evaluation_time - - @property - def auto_materialize_run_tags(self) -> Mapping[str, str]: - return self.daemon_context.auto_materialize_run_tags - - @functools.cached_property - def previous_tick_requested_or_discarded_subset(self) -> AssetSubset: - """Returns the set of asset partitions that were requested or discarded on the previous tick.""" - if not self.cursor.latest_evaluation: - return self.empty_subset() - return self.cursor.latest_evaluation.get_requested_or_discarded_subset( - asset_graph=self.asset_graph - ) - - @functools.cached_property - def previous_tick_evaluated_subset(self) -> AssetSubset: - """Returns the set of asset partitions that were evaluated on the previous tick.""" - if not self.cursor.latest_evaluation: - return self.empty_subset() - return self.cursor.latest_evaluation.get_evaluated_subset(asset_graph=self.asset_graph) - - @functools.cached_property - def candidate_has_parents_that_have_or_will_update_subset(self) -> AssetSubset: - """Returns the set of candidate asset partitions whose parents have been updated since the - last tick or will be requested on this tick. - - Many rules depend on the state of the asset's parents, so this function is useful for - finding asset partitions that should be re-evaluated. - """ - subset_with_parents_which_will_update = AssetSubset.from_asset_partitions_set( - self.asset_key, self.partitions_def, set(self.get_will_update_parent_mapping().keys()) - ) - return self.candidate_subset & ( - self.subset_with_updated_parents_since_previous_tick - | subset_with_parents_which_will_update - ) - - @functools.cached_property - def candidate_not_evaluated_on_previous_tick_subset(self) -> AssetSubset: - """Returns the set of candidates that were not evaluated by the rule that is currently being - evaluated on the previous tick. - - Any asset partition that was evaluated by any rule on the previous tick must have been - evaluated by *all* skip rules. - """ - return self.candidate_subset - self.previous_tick_evaluated_subset - - @functools.cached_property - def newly_materialized_root_subset(self) -> AssetSubset: - if self.asset_key not in self.asset_graph.root_materializable_or_observable_asset_keys: - return self.empty_subset() - newly_materialized = set() - for asset_partition in self.cursor.materialized_requested_or_discarded_subset.inverse( - self.partitions_def, - dynamic_partitions_store=self.instance_queryer, - current_time=self.instance_queryer.evaluation_time, - ).asset_partitions: - if self.instance_queryer.asset_partition_has_materialization_or_observation( - asset_partition - ): - newly_materialized.add(asset_partition) - - return AssetSubset.from_asset_partitions_set( - self.asset_key, self.partitions_def, newly_materialized - ) - - @functools.cached_property - def never_materialized_requested_or_discarded_root_subset(self) -> AssetSubset: - if self.asset_key not in self.asset_graph.root_materializable_or_observable_asset_keys: - return self.empty_subset() - - never_materialized = self.cursor.materialized_requested_or_discarded_subset.inverse( - self.partitions_def, - dynamic_partitions_store=self.instance_queryer, - current_time=self.instance_queryer.evaluation_time, - ).asset_partitions - return ( - AssetSubset.from_asset_partitions_set( - self.asset_key, self.partitions_def, never_materialized - ) - - self.newly_materialized_root_subset - ) - - def empty_subset(self) -> AssetSubset: - return AssetSubset.empty(self.asset_key, self.partitions_def) - - def get_previous_tick_results(self, rule: "AutoMaterializeRule") -> "RuleEvaluationResults": - """Returns the results that were calculated for a given rule on the previous tick.""" - if not self.cursor.latest_evaluation: - return [] - return self.cursor.latest_evaluation.get_rule_evaluation_results( - rule_snapshot=rule.to_snapshot(), asset_graph=self.asset_graph - ) - - def materialized_requested_or_discarded_since_previous_tick( - self, asset_partition: AssetKeyPartitionKey - ) -> bool: - """Returns whether an asset partition has been materialized, requested, or discarded since - the last tick. - """ - if asset_partition in self.previous_tick_requested_or_discarded_subset: - return True - return self.instance_queryer.asset_partition_has_materialization_or_observation( - asset_partition, after_cursor=self.cursor.latest_storage_id - ) - - def materializable_in_same_run(self, child_key: AssetKey, parent_key: AssetKey) -> bool: - """Returns whether a child asset can be materialized in the same run as a parent asset.""" - from dagster._core.definitions.external_asset_graph import ExternalAssetGraph - - return ( - # both assets must be materializable - child_key in self.asset_graph.materializable_asset_keys - and parent_key in self.asset_graph.materializable_asset_keys - # the parent must have the same partitioning - and self.asset_graph.have_same_partitioning(child_key, parent_key) - # the parent must have a simple partition mapping to the child - and ( - not self.asset_graph.is_partitioned(parent_key) - or isinstance( - self.asset_graph.get_partition_mapping(child_key, parent_key), - (TimeWindowPartitionMapping, IdentityPartitionMapping), - ) - ) - # the parent must be in the same repository to be materialized alongside the candidate - and ( - not isinstance(self.asset_graph, ExternalAssetGraph) - or self.asset_graph.get_repository_handle(child_key) - == self.asset_graph.get_repository_handle(parent_key) - ) - ) - - def get_parents_that_will_not_be_materialized_on_current_tick( - self, *, asset_partition: AssetKeyPartitionKey - ) -> AbstractSet[AssetKeyPartitionKey]: - """Returns the set of parent asset partitions that will not be updated in the same run of - this asset partition if a run is launched for this asset partition on this tick. - """ - return { - parent - for parent in self.asset_graph.get_parents_partitions( - dynamic_partitions_store=self.instance_queryer, - current_time=self.instance_queryer.evaluation_time, - asset_key=asset_partition.asset_key, - partition_key=asset_partition.partition_key, - ).parent_partitions - if parent not in self.will_materialize_mapping.get(parent.asset_key, set()) - or not self.materializable_in_same_run(asset_partition.asset_key, parent.asset_key) - } - - @functools.cached_property - def subset_with_updated_parents_since_previous_tick(self) -> AssetSubset: - """Returns the set of asset partitions for the current key which have parents that updated - since the last tick. - """ - return AssetSubset.from_asset_partitions_set( - self.asset_key, - self.partitions_def, - self.instance_queryer.asset_partitions_with_newly_updated_parents( - latest_storage_id=self.cursor.latest_storage_id, - child_asset_key=self.asset_key, - map_old_time_partitions=False, - ), - ) - - def get_will_update_parent_mapping( - self, - ) -> Mapping[AssetKeyPartitionKey, AbstractSet[AssetKey]]: - """Returns a mapping from asset partitions of the current asset to the set of parent keys - which will be requested this tick and can execute in the same run as the current asset. - """ - will_update_parents_by_asset_partition = defaultdict(set) - # these are the set of parents that will be requested this tick and can be materialized in - # the same run as this asset - for parent_key in self.asset_graph.get_parents(self.asset_key): - if not self.materializable_in_same_run(self.asset_key, parent_key): - continue - for parent_partition in self.will_materialize_mapping.get(parent_key, set()): - asset_partition = AssetKeyPartitionKey( - self.asset_key, parent_partition.partition_key - ) - will_update_parents_by_asset_partition[asset_partition].add(parent_key) - - return will_update_parents_by_asset_partition - - def will_update_asset_partition(self, asset_partition: AssetKeyPartitionKey) -> bool: - return asset_partition in self.will_materialize_mapping.get( - asset_partition.asset_key, set() - ) - - def get_asset_partitions_by_asset_key( - self, asset_partitions: AbstractSet[AssetKeyPartitionKey] - ) -> Mapping[AssetKey, Set[AssetKeyPartitionKey]]: - asset_partitions_by_asset_key: Dict[AssetKey, Set[AssetKeyPartitionKey]] = defaultdict(set) - for parent in asset_partitions: - asset_partitions_by_asset_key[parent.asset_key].add(parent) - - return asset_partitions_by_asset_key +from .asset_automation_condition_context import AssetAutomationConditionEvaluationContext +from .asset_graph import sort_key_for_asset_partition class AutoMaterializeRule(ABC): @@ -313,7 +72,7 @@ def description(self) -> str: def add_evaluation_data_from_previous_tick( self, - context: RuleEvaluationContext, + context: AssetAutomationConditionEvaluationContext, asset_partitions_by_evaluation_data: Mapping[ Optional[AutoMaterializeRuleEvaluationData], Set[AssetKeyPartitionKey] ], @@ -333,18 +92,19 @@ def add_evaluation_data_from_previous_tick( """ asset_partitions_by_evaluation_data = defaultdict(set, asset_partitions_by_evaluation_data) evaluated_asset_partitions = set().union(*asset_partitions_by_evaluation_data.values()) - for evaluation_data, asset_partitions in context.get_previous_tick_results(self): + for evaluation_data, asset_partitions in context.previous_tick_results: for ap in asset_partitions: # evaluated data from this tick takes precedence over data from the previous tick if ap in evaluated_asset_partitions: continue elif should_use_past_data_fn(ap): asset_partitions_by_evaluation_data[evaluation_data].add(ap) - return list(asset_partitions_by_evaluation_data.items()) @abstractmethod - def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetAutomationConditionEvaluationContext + ) -> RuleEvaluationResults: """The core evaluation function for the rule. This function takes in a context object and returns a mapping from evaluated rules to the set of asset partitions that the rule applies to. @@ -503,16 +263,10 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return "required to meet this or downstream asset's freshness policy" - def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationResults: - freshness_conditions = freshness_evaluation_results_for_asset_key( - asset_key=context.asset_key, - data_time_resolver=context.data_time_resolver, - asset_graph=context.asset_graph, - current_time=context.instance_queryer.evaluation_time, - will_materialize_mapping=context.will_materialize_mapping, - expected_data_time_mapping=context.expected_data_time_mapping, - ) - return freshness_conditions + def evaluate_for_asset( + self, context: AssetAutomationConditionEvaluationContext + ) -> RuleEvaluationResults: + return freshness_evaluation_results_for_asset_key(context.asset_context) @whitelist_for_serdes @@ -531,12 +285,14 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return f"not materialized since last cron schedule tick of '{self.cron_schedule}' (timezone: {self.timezone})" - def missed_cron_ticks(self, context: RuleEvaluationContext) -> Sequence[datetime.datetime]: + def missed_cron_ticks( + self, context: AssetAutomationConditionEvaluationContext + ) -> Sequence[datetime.datetime]: """Returns the cron ticks which have been missed since the previous cursor was generated.""" - if not context.cursor.latest_evaluation_timestamp: + if not context.latest_evaluation_timestamp: previous_dt = next( reverse_cron_string_iterator( - end_timestamp=context.evaluation_time.timestamp(), + end_timestamp=context.asset_context.evaluation_time.timestamp(), cron_string=self.cron_schedule, execution_timezone=self.timezone, ) @@ -544,24 +300,24 @@ def missed_cron_ticks(self, context: RuleEvaluationContext) -> Sequence[datetime return [previous_dt] missed_ticks = [] for dt in cron_string_iterator( - start_timestamp=context.cursor.latest_evaluation_timestamp, + start_timestamp=context.latest_evaluation_timestamp, cron_string=self.cron_schedule, execution_timezone=self.timezone, ): - if dt > context.evaluation_time: + if dt > context.asset_context.evaluation_time: break missed_ticks.append(dt) return missed_ticks def get_asset_partitions_to_request( - self, context: RuleEvaluationContext + self, context: AssetAutomationConditionEvaluationContext ) -> AbstractSet[AssetKeyPartitionKey]: missed_ticks = self.missed_cron_ticks(context) if not missed_ticks: return set() - partitions_def = context.asset_graph.get_partitions_def(context.asset_key) + partitions_def = context.partitions_def if partitions_def is None: return {AssetKeyPartitionKey(context.asset_key)} @@ -570,7 +326,7 @@ def get_asset_partitions_to_request( return { AssetKeyPartitionKey(context.asset_key, partition_key) for partition_key in partitions_def.get_partition_keys( - current_time=context.evaluation_time, + current_time=context.asset_context.evaluation_time, dynamic_partitions_store=context.instance_queryer, ) } @@ -592,7 +348,8 @@ def get_asset_partitions_to_request( None, [ time_partitions_def.get_last_partition_key( - current_time=missed_tick, dynamic_partitions_store=context.instance_queryer + current_time=missed_tick, + dynamic_partitions_store=context.instance_queryer, ) for missed_tick in missed_ticks ], @@ -615,17 +372,19 @@ def get_asset_partitions_to_request( for time_partition_key in missed_time_partition_keys } - def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetAutomationConditionEvaluationContext + ) -> RuleEvaluationResults: asset_partitions_to_request = self.get_asset_partitions_to_request(context) asset_partitions_by_evaluation_data = defaultdict(set) if asset_partitions_to_request: asset_partitions_by_evaluation_data[None].update(asset_partitions_to_request) + return self.add_evaluation_data_from_previous_tick( context, asset_partitions_by_evaluation_data, - should_use_past_data_fn=lambda ap: not context.materialized_requested_or_discarded_since_previous_tick( - ap - ), + should_use_past_data_fn=lambda ap: ap + not in context.materialized_requested_or_discarded_since_previous_tick_subset, ) @@ -651,7 +410,9 @@ def description(self) -> str: return f"latest run includes required tags: {self.latest_run_required_tags}" def passes( - self, context: RuleEvaluationContext, asset_partitions: Iterable[AssetKeyPartitionKey] + self, + context: AssetAutomationConditionEvaluationContext, + asset_partitions: Iterable[AssetKeyPartitionKey], ) -> Iterable[AssetKeyPartitionKey]: if self.latest_run_required_tags is None: return asset_partitions @@ -660,7 +421,7 @@ def passes( asset_partitions_by_latest_run_id: Dict[str, Set[AssetKeyPartitionKey]] = defaultdict(set) for asset_partition in asset_partitions: - if context.will_update_asset_partition(asset_partition): + if context.asset_context.will_update_asset_partition(asset_partition): will_update_asset_partitions.add(asset_partition) else: record = context.instance_queryer.get_latest_materialization_or_observation_record( @@ -693,7 +454,10 @@ def passes( if ( self.latest_run_required_tags.items() - <= {AUTO_MATERIALIZE_TAG: "true", **context.auto_materialize_run_tags}.items() + <= { + AUTO_MATERIALIZE_TAG: "true", + **context.asset_context.daemon_context.auto_materialize_run_tags, + }.items() ): return will_update_asset_partitions | updated_partitions_with_required_tags else: @@ -726,7 +490,9 @@ def description(self) -> str: else: return base - def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetAutomationConditionEvaluationContext + ) -> RuleEvaluationResults: """Evaluates the set of asset partitions of this asset whose parents have been updated, or will update on this tick. """ @@ -737,7 +503,7 @@ def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationRe AssetKeyPartitionKey, Set[AssetKeyPartitionKey] ] = defaultdict(set) - subset_to_evaluate = context.candidate_has_parents_that_have_or_will_update_subset + subset_to_evaluate = context.candidate_parent_has_or_will_update_subset for asset_partition in subset_to_evaluate.asset_partitions: parent_asset_partitions = context.asset_graph.get_parents_partitions( dynamic_partitions_store=context.instance_queryer, @@ -751,7 +517,7 @@ def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationRe parent_asset_partitions, # do a precise check for updated parents, factoring in data versions, as long as # we're within reasonable limits on the number of partitions to check - respect_materialization_data_versions=context.daemon_context.respect_materialization_data_versions + respect_materialization_data_versions=context.asset_context.daemon_context.respect_materialization_data_versions and len(parent_asset_partitions) + subset_to_evaluate.size < 100, # ignore self-dependencies when checking for updated parents, to avoid historical # rematerializations from causing a chain of materializations to be kicked off @@ -761,7 +527,7 @@ def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationRe asset_partitions_by_updated_parents[parent].add(asset_partition) for parent in parent_asset_partitions: - if context.will_update_asset_partition(parent): + if context.asset_context.will_update_asset_partition(parent): asset_partitions_by_will_update_parents[parent].add(asset_partition) updated_and_will_update_parents = ( @@ -813,9 +579,8 @@ def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationRe return self.add_evaluation_data_from_previous_tick( context, asset_partitions_by_evaluation_data, - should_use_past_data_fn=lambda ap: not context.materialized_requested_or_discarded_since_previous_tick( - ap - ), + should_use_past_data_fn=lambda ap: ap + not in context.materialized_requested_or_discarded_since_previous_tick_subset, ) @@ -829,7 +594,9 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return "materialization is missing" - def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetAutomationConditionEvaluationContext + ) -> RuleEvaluationResults: """Evaluates the set of asset partitions for this asset which are missing and were not previously discarded. Currently only applies to root asset partitions and asset partitions with updated parents. @@ -837,11 +604,11 @@ def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationRe asset_partitions_by_evaluation_data = defaultdict(set) missing_asset_partitions = set( - context.never_materialized_requested_or_discarded_root_subset.asset_partitions + context.asset_context.never_materialized_requested_or_discarded_root_subset.asset_partitions ) # in addition to missing root asset partitions, check any asset partitions with updated # parents to see if they're missing - for candidate in context.subset_with_updated_parents_since_previous_tick.asset_partitions: + for candidate in context.candidate_parent_has_or_will_update_subset.asset_partitions: if not context.instance_queryer.asset_partition_has_materialization_or_observation( candidate ): @@ -854,7 +621,7 @@ def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationRe context, asset_partitions_by_evaluation_data, should_use_past_data_fn=lambda ap: ap not in missing_asset_partitions - and not context.materialized_requested_or_discarded_since_previous_tick(ap), + and ap not in context.materialized_requested_or_discarded_since_previous_tick_subset, ) @@ -868,18 +635,22 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return "waiting on upstream data to be up to date" - def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetAutomationConditionEvaluationContext + ) -> RuleEvaluationResults: asset_partitions_by_evaluation_data = defaultdict(set) # only need to evaluate net-new candidates and candidates whose parents have changed subset_to_evaluate = ( - context.candidate_not_evaluated_on_previous_tick_subset - | context.candidate_has_parents_that_have_or_will_update_subset + context.candidates_not_evaluated_on_previous_tick_subset + | context.candidate_parent_has_or_will_update_subset ) for candidate in subset_to_evaluate.asset_partitions: outdated_ancestors = set() # find the root cause of why this asset partition's parents are outdated (if any) - for parent in context.get_parents_that_will_not_be_materialized_on_current_tick( + for ( + parent + ) in context.asset_context.get_parents_that_will_not_be_materialized_on_current_tick( asset_partition=candidate ): if context.instance_queryer.have_ignorable_partition_mapping_for_outdated( @@ -913,24 +684,26 @@ def description(self) -> str: def evaluate_for_asset( self, - context: RuleEvaluationContext, + context: AssetAutomationConditionEvaluationContext, ) -> RuleEvaluationResults: asset_partitions_by_evaluation_data = defaultdict(set) # only need to evaluate net-new candidates and candidates whose parents have changed subset_to_evaluate = ( - context.candidate_not_evaluated_on_previous_tick_subset - | context.candidate_has_parents_that_have_or_will_update_subset + context.candidates_not_evaluated_on_previous_tick_subset + | context.candidate_parent_has_or_will_update_subset ) for candidate in subset_to_evaluate.asset_partitions: missing_parent_asset_keys = set() - for parent in context.get_parents_that_will_not_be_materialized_on_current_tick( + for ( + parent + ) in context.asset_context.get_parents_that_will_not_be_materialized_on_current_tick( asset_partition=candidate ): # ignore non-observable sources, which will never have a materialization or observation - if context.asset_graph.is_source( + if context.asset_context.asset_graph.is_source( parent.asset_key - ) and not context.asset_graph.is_observable(parent.asset_key): + ) and not context.asset_context.asset_graph.is_observable(parent.asset_key): continue if not context.instance_queryer.asset_partition_has_materialization_or_observation( parent @@ -980,14 +753,14 @@ def description(self) -> str: def evaluate_for_asset( self, - context: RuleEvaluationContext, + context: AssetAutomationConditionEvaluationContext, ) -> RuleEvaluationResults: asset_partitions_by_evaluation_data = defaultdict(set) # only need to evaluate net-new candidates and candidates whose parents have changed subset_to_evaluate = ( - context.candidate_not_evaluated_on_previous_tick_subset - | context.candidate_has_parents_that_have_or_will_update_subset + context.candidates_not_evaluated_on_previous_tick_subset + | context.candidate_parent_has_or_will_update_subset ) for candidate in subset_to_evaluate.asset_partitions: parent_partitions = context.asset_graph.get_parents_partitions( @@ -1001,15 +774,10 @@ def evaluate_for_asset( context.instance_queryer.get_parent_asset_partitions_updated_after_child( candidate, parent_partitions, - context.daemon_context.respect_materialization_data_versions, + context.asset_context.daemon_context.respect_materialization_data_versions, ignored_parent_keys=set(), ) - | set().union( - *[ - context.will_materialize_mapping.get(parent, set()) - for parent in context.asset_graph.get_parents(context.asset_key) - ] - ) + | context.asset_context.parent_will_update_subset.asset_partitions ) if self.require_update_for_all_parent_partitions: @@ -1021,14 +789,8 @@ def evaluate_for_asset( # At least one upstream partition in each upstream asset must be updated in order # for the candidate to be updated parent_asset_keys = context.asset_graph.get_parents(context.asset_key) - updated_parent_partitions_by_asset_key = context.get_asset_partitions_by_asset_key( - updated_parent_partitions - ) - non_updated_parent_keys = { - parent - for parent in parent_asset_keys - if not updated_parent_partitions_by_asset_key.get(parent) - } + updated_parent_keys = {ap.asset_key for ap in updated_parent_partitions} + non_updated_parent_keys = parent_asset_keys - updated_parent_keys # do not require past partitions of this asset to be updated non_updated_parent_keys -= {context.asset_key} @@ -1057,12 +819,14 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return "required parent partitions do not exist" - def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetAutomationConditionEvaluationContext + ) -> RuleEvaluationResults: asset_partitions_by_evaluation_data = defaultdict(set) subset_to_evaluate = ( - context.candidate_not_evaluated_on_previous_tick_subset - | context.candidate_has_parents_that_have_or_will_update_subset + context.candidates_not_evaluated_on_previous_tick_subset + | context.candidate_parent_has_or_will_update_subset ) for candidate in subset_to_evaluate.asset_partitions: nonexistent_parent_partitions = context.asset_graph.get_parents_partitions( @@ -1101,29 +865,22 @@ def description(self) -> str: else: return "targeted by an in-progress backfill" - def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationResults: - backfill_in_progress_candidates: AbstractSet[AssetKeyPartitionKey] = set() + def evaluate_for_asset( + self, context: AssetAutomationConditionEvaluationContext + ) -> RuleEvaluationResults: backfilling_subset = ( context.instance_queryer.get_active_backfill_target_asset_graph_subset() - ) + ).get_asset_subset(context.asset_key, context.asset_context.asset_graph) + + if backfilling_subset.size == 0: + return [] if self.all_partitions: - backfill_in_progress_candidates = { - candidate - for candidate in context.candidate_subset.asset_partitions - if candidate.asset_key in backfilling_subset.asset_keys - } + true_subset = context.candidate_subset else: - backfill_in_progress_candidates = { - candidate - for candidate in context.candidate_subset.asset_partitions - if candidate in backfilling_subset - } - - if backfill_in_progress_candidates: - return [(None, backfill_in_progress_candidates)] + true_subset = context.candidate_subset & backfilling_subset - return [] + return [(None, true_subset.asset_partitions)] @whitelist_for_serdes @@ -1138,7 +895,9 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return f"exceeds {self.limit} materialization(s) per minute" - def evaluate_for_asset(self, context: RuleEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetAutomationConditionEvaluationContext + ) -> RuleEvaluationResults: # the set of asset partitions which exceed the limit rate_limited_asset_partitions = set( sorted( diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index 95e5c9c902f68..aceb712324c6d 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -227,7 +227,7 @@ def get_rule_evaluation_results( return results def _get_subset_with_decision_type( - self, decision_type: AutoMaterializeDecisionType, asset_graph: AssetGraph + self, *, decision_type: AutoMaterializeDecisionType, asset_graph: AssetGraph ) -> AssetSubset: """Returns the set of asset partitions with a given decision type applied to them.""" subset = AssetSubset.empty(self.asset_key, asset_graph.get_partitions_def(self.asset_key)) @@ -242,26 +242,34 @@ def _get_subset_with_decision_type( subset |= deserialized_result[1] return subset - def get_requested_or_discarded_subset(self, asset_graph: AssetGraph) -> AssetSubset: + def get_discarded_subset(self, asset_graph: AssetGraph) -> AssetSubset: """Returns the set of asset partitions which were either requested or discarded on this evaluation. """ - to_materialize = self._get_subset_with_decision_type( - AutoMaterializeDecisionType.MATERIALIZE, - asset_graph, - ) - to_skip = self._get_subset_with_decision_type( - AutoMaterializeDecisionType.SKIP, - asset_graph, + return self._get_subset_with_decision_type( + decision_type=AutoMaterializeDecisionType.DISCARD, asset_graph=asset_graph ) - return to_materialize - to_skip def get_evaluated_subset(self, asset_graph: AssetGraph) -> AssetSubset: """Returns the set of asset partitions which were evaluated by any rule on this evaluation.""" # no asset partition can be evaluated by SKIP or DISCARD rules without having at least one # materialize rule evaluation return self._get_subset_with_decision_type( - AutoMaterializeDecisionType.MATERIALIZE, asset_graph + decision_type=AutoMaterializeDecisionType.MATERIALIZE, asset_graph=asset_graph + ) + + def get_requested_subset(self, asset_graph: AssetGraph) -> AssetSubset: + """Returns the set of asset partitions which were requested on this evaluation.""" + return ( + self._get_subset_with_decision_type( + decision_type=AutoMaterializeDecisionType.MATERIALIZE, asset_graph=asset_graph + ) + - self._get_subset_with_decision_type( + decision_type=AutoMaterializeDecisionType.SKIP, asset_graph=asset_graph + ) + - self._get_subset_with_decision_type( + decision_type=AutoMaterializeDecisionType.DISCARD, asset_graph=asset_graph + ) ) def equivalent_to_stored_evaluation( diff --git a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py index 3948ee070a3e2..2238cf36c9fc2 100644 --- a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py +++ b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py @@ -8,19 +8,16 @@ maximum lag minutes. """ import datetime -from typing import TYPE_CHECKING, AbstractSet, Mapping, Optional, Tuple +from typing import TYPE_CHECKING, AbstractSet, Optional, Tuple import pendulum -from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey +from dagster._core.definitions.events import AssetKeyPartitionKey from dagster._core.definitions.freshness_policy import FreshnessPolicy from dagster._utils.schedules import cron_string_iterator -from .asset_graph import AssetGraph - if TYPE_CHECKING: - from dagster._core.definitions.data_time import CachingDataTimeResolver - + from .asset_automation_condition_context import AssetAutomationEvaluationContext from .auto_materialize_rule_evaluation import RuleEvaluationResults, TextRuleEvaluationData @@ -112,41 +109,38 @@ def get_execution_period_and_evaluation_data_for_policies( def get_expected_data_time_for_asset_key( - asset_graph: AssetGraph, - asset_key: AssetKey, - will_materialize_mapping: Mapping[AssetKey, AbstractSet[AssetKeyPartitionKey]], - expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]], - data_time_resolver: "CachingDataTimeResolver", - current_time: datetime.datetime, - will_materialize: bool, + context: "AssetAutomationEvaluationContext", will_materialize: bool ) -> Optional[datetime.datetime]: """Returns the data time that you would expect this asset to have if you were to execute it on this tick. """ from dagster._core.definitions.external_asset_graph import ExternalAssetGraph + asset_key = context.asset_key + asset_graph = context.asset_graph + current_time = context.evaluation_time + # don't bother calculating if no downstream assets have freshness policies if not asset_graph.get_downstream_freshness_policies(asset_key=asset_key): return None # if asset will not be materialized, just return the current time elif not will_materialize: - return data_time_resolver.get_current_data_time(asset_key, current_time) + return context.data_time_resolver.get_current_data_time(asset_key, current_time) elif asset_graph.has_non_source_parents(asset_key): expected_data_time = None for parent_key in asset_graph.get_parents(asset_key): # if the parent will be materialized on this tick, and it's not in the same repo, then # we must wait for this asset to be materialized - if ( - isinstance(asset_graph, ExternalAssetGraph) - and AssetKeyPartitionKey(parent_key) in will_materialize_mapping[parent_key] + if isinstance(asset_graph, ExternalAssetGraph) and context.will_update_asset_partition( + AssetKeyPartitionKey(parent_key) ): parent_repo = asset_graph.get_repository_handle(parent_key) if parent_repo != asset_graph.get_repository_handle(asset_key): - return data_time_resolver.get_current_data_time(asset_key, current_time) + return context.data_time_resolver.get_current_data_time(asset_key, current_time) # find the minimum non-None data time of your parents - parent_expected_data_time = expected_data_time_mapping.get( + parent_expected_data_time = context.expected_data_time_mapping.get( parent_key - ) or data_time_resolver.get_current_data_time(parent_key, current_time) + ) or context.data_time_resolver.get_current_data_time(parent_key, current_time) expected_data_time = min( filter(None, [expected_data_time, parent_expected_data_time]), default=None, @@ -158,34 +152,27 @@ def get_expected_data_time_for_asset_key( def freshness_evaluation_results_for_asset_key( - asset_key: AssetKey, - data_time_resolver: "CachingDataTimeResolver", - asset_graph: AssetGraph, - current_time: datetime.datetime, - will_materialize_mapping: Mapping[AssetKey, AbstractSet[AssetKeyPartitionKey]], - expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]], + context: "AssetAutomationEvaluationContext", ) -> "RuleEvaluationResults": """Returns a set of AssetKeyPartitionKeys to materialize in order to abide by the given FreshnessPolicies. Attempts to minimize the total number of asset executions. """ - if not asset_graph.get_downstream_freshness_policies( + asset_key = context.asset_key + current_time = context.evaluation_time + + if not context.asset_graph.get_downstream_freshness_policies( asset_key=asset_key - ) or asset_graph.is_partitioned(asset_key): + ) or context.asset_graph.is_partitioned(asset_key): return [] # figure out the current contents of this asset - current_data_time = data_time_resolver.get_current_data_time(asset_key, current_time) + current_data_time = context.data_time_resolver.get_current_data_time(asset_key, current_time) # figure out the data time you would expect if you were to execute this asset on this tick expected_data_time = get_expected_data_time_for_asset_key( - asset_graph=asset_graph, - asset_key=asset_key, - will_materialize_mapping=will_materialize_mapping, - expected_data_time_mapping=expected_data_time_mapping, - data_time_resolver=data_time_resolver, - current_time=current_time, + context=context, will_materialize=True, ) @@ -195,10 +182,14 @@ def freshness_evaluation_results_for_asset_key( # calculate the data times you would expect after all currently-executing runs # were to successfully complete - in_progress_data_time = data_time_resolver.get_in_progress_data_time(asset_key, current_time) + in_progress_data_time = context.data_time_resolver.get_in_progress_data_time( + asset_key, current_time + ) # calculate the data times you would have expected if the most recent run succeeded - failed_data_time = data_time_resolver.get_ignored_failure_data_time(asset_key, current_time) + failed_data_time = context.data_time_resolver.get_ignored_failure_data_time( + asset_key, current_time + ) effective_data_time = max( filter(None, (current_data_time, in_progress_data_time, failed_data_time)), @@ -211,8 +202,8 @@ def freshness_evaluation_results_for_asset_key( execution_period, evaluation_data, ) = get_execution_period_and_evaluation_data_for_policies( - local_policy=asset_graph.freshness_policies_by_key.get(asset_key), - policies=asset_graph.get_downstream_freshness_policies(asset_key=asset_key), + local_policy=context.asset_graph.freshness_policies_by_key.get(asset_key), + policies=context.asset_graph.get_downstream_freshness_policies(asset_key=asset_key), effective_data_time=effective_data_time, current_time=current_time, ) diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/basic_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/basic_scenarios.py index 35149ccd3ac74..d8994e6dadf38 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/basic_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/basic_scenarios.py @@ -74,7 +74,7 @@ unevaluated_runs=[run(["asset1", "asset2", "asset3", "asset4", "asset5", "asset6"])], ), # don't need to run asset4 for reconciliation but asset4 must run when asset3 does - expected_run_requests=[run_request(asset_keys=["asset3", "asset4", "asset5", "asset6"])], + expected_run_requests=[run_request(asset_keys=["asset3", "asset4", "asset5"])], ), "multi_asset_in_middle_single_parent_rematerialized_subsettable": AssetReconciliationScenario( assets=multi_asset_in_middle_subsettable, diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/freshness_policy_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/freshness_policy_scenarios.py index fb1ff36bf6adb..f392ec1f3ff74 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/freshness_policy_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/freshness_policy_scenarios.py @@ -6,10 +6,8 @@ ) from dagster._core.definitions.auto_materialize_rule import AutoMaterializeRule from dagster._core.definitions.auto_materialize_rule_evaluation import ( - ParentUpdatedRuleEvaluationData, TextRuleEvaluationData, ) -from dagster._core.definitions.events import AssetKey from dagster._core.definitions.freshness_policy import FreshnessPolicy from ..base_scenario import ( @@ -72,34 +70,13 @@ unevaluated_runs=[run([f"asset{i}" for i in range(1, 6)])], evaluation_delta=datetime.timedelta(minutes=35), # need to run assets 1, 2 and 3 as they're all part of the same non-subsettable multi asset - # need to run asset 4 as it eagerly updates after asset 1 - expected_run_requests=[ - run_request(asset_keys=["asset1", "asset2", "asset3", "asset4", "asset5"]) - ], + expected_run_requests=[run_request(asset_keys=["asset1", "asset2", "asset3", "asset5"])], expected_evaluations=[ - AssetEvaluationSpec.from_single_rule( - "asset1", - AutoMaterializeRule.materialize_on_required_for_freshness(), - TextRuleEvaluationData("Required by downstream asset's policy"), - ), AssetEvaluationSpec.from_single_rule( "asset2", AutoMaterializeRule.materialize_on_required_for_freshness(), TextRuleEvaluationData("Required by downstream asset's policy"), ), - AssetEvaluationSpec.from_single_rule( - "asset3", - AutoMaterializeRule.materialize_on_required_for_freshness(), - TextRuleEvaluationData("Required by downstream asset's policy"), - ), - AssetEvaluationSpec.from_single_rule( - "asset4", - AutoMaterializeRule.materialize_on_parent_updated(), - ParentUpdatedRuleEvaluationData( - updated_asset_keys=frozenset(), - will_update_asset_keys=frozenset([AssetKey("asset1")]), - ), - ), AssetEvaluationSpec.from_single_rule( "asset5", AutoMaterializeRule.materialize_on_required_for_freshness(), diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/basic_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/basic_scenarios.py index eb9e9bc0197f5..293412db388ab 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/basic_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/basic_scenarios.py @@ -53,7 +53,8 @@ ParentUpdatedRuleEvaluationData, updated_asset_keys=set(), will_update_asset_keys={"A"}, - ) + ), + AssetRuleEvaluationSpec(rule=AutoMaterializeRule.materialize_on_missing()), ], ), ), diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py index 5cc53f14fde92..a6ff9139f070f 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py @@ -660,7 +660,8 @@ .evaluate_tick() .assert_requested_runs( run_request( - ["C"], partition_key=day_partition_key(time_partitions_start_datetime, delta=1) + ["C"], + partition_key=day_partition_key(time_partitions_start_datetime, delta=1), ) ) # new day's partition is filled in, should still be able to materialize the new partition @@ -669,7 +670,7 @@ .with_runs( run_request( ["A"], partition_key=day_partition_key(time_partitions_start_datetime, delta=3) - ) + ), ) .evaluate_tick() .assert_requested_runs( From 4059038129e946785c642219e76891c0c8760b2e Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Fri, 8 Dec 2023 13:32:27 -0800 Subject: [PATCH 02/56] Add snapshot class --- .../definitions/asset_automation_evaluator.py | 83 ++++++++++++++----- 1 file changed, 60 insertions(+), 23 deletions(-) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py b/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py index b5ef67da6d141..6fad476eea6b8 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py @@ -1,4 +1,6 @@ import dataclasses +import functools +import hashlib from abc import ABC, abstractmethod from typing import TYPE_CHECKING, AbstractSet, List, NamedTuple, Optional, Sequence, Tuple @@ -24,10 +26,25 @@ from .auto_materialize_rule import AutoMaterializeRule, RuleEvaluationResults +class AutomationConditionNodeSnapshot(NamedTuple): + """A serializable snapshot of a node in the AutomationCondition tree.""" + + class_name: str + description: str + child_hashes: Sequence[str] + + @property + def hash(self) -> str: + """Returns a unique hash for this node in the tree.""" + return hashlib.md5( + "".join([self.class_name, self.description, *self.child_hashes]).encode("utf-8") + ).hexdigest() + + class ConditionEvaluation(NamedTuple): """Internal representation of the results of evaluating a node in the evaluation tree.""" - condition: "AutomationCondition" + condition_snapshot: AutomationConditionNodeSnapshot true_subset: AssetSubset candidate_subset: AssetSubset @@ -40,18 +57,17 @@ class ConditionEvaluation(NamedTuple): Tuple[AutoMaterializeRuleEvaluation, AbstractSet[AssetKeyPartitionKey]] ] = [] - @property def all_results( - self, + self, condition: "AutomationCondition" ) -> Sequence[Tuple[AutoMaterializeRuleEvaluation, AbstractSet[AssetKeyPartitionKey]]]: """This method is a placeholder to allow us to convert this into a shape that other parts of the system understand. """ - if isinstance(self.condition, RuleCondition): + if isinstance(condition, RuleCondition): results = [ ( AutoMaterializeRuleEvaluation( - rule_snapshot=self.condition.rule.to_snapshot(), + rule_snapshot=condition.rule.to_snapshot(), evaluation_data=evaluation_data, ), subset, @@ -60,15 +76,19 @@ def all_results( ] else: results = [] - for child in self.child_evaluations: - results = [*results, *child.all_results] + for i, child in enumerate(self.child_evaluations): + results = [*results, *child.all_results(condition.children[i])] return results def for_child(self, child_condition: "AutomationCondition") -> Optional["ConditionEvaluation"]: - """Returns the evaluation of a given child condition.""" + """Returns the evaluation of a given child condition by finding the child evaluation that + has an identical hash to the given condition. + """ + child_hash = child_condition.snapshot.hash for child_evaluation in self.child_evaluations: - if child_evaluation.condition == child_condition: + if child_evaluation.condition_snapshot.hash == child_hash: return child_evaluation + return None def to_evaluation( @@ -80,8 +100,13 @@ def to_evaluation( """This method is a placeholder to allow us to convert this into a shape that other parts of the system understand. """ + condition = ( + check.not_none(asset_graph.get_auto_materialize_policy(asset_key)) + .to_auto_materialize_policy_evaluator() + .condition + ) # backcompat way to calculate the set of skipped partitions for legacy policies - if self.condition.is_legacy and len(self.child_evaluations) == 2: + if condition.is_legacy and len(self.child_evaluations) == 2: # the first child is the materialize condition, the second child is the negation of # the skip condition _, nor_skip_evaluation = self.child_evaluations @@ -97,7 +122,10 @@ def to_evaluation( return AutoMaterializeAssetEvaluation.from_rule_evaluation_results( asset_key=asset_key, asset_graph=asset_graph, - asset_partitions_by_rule_evaluation=[*self.all_results, *self.discard_results], + asset_partitions_by_rule_evaluation=[ + *self.all_results(condition), + *self.discard_results, + ], num_requested=(self.true_subset - discard_subset).size, num_skipped=skipped_subset_size, num_discarded=discard_subset.size, @@ -114,7 +142,7 @@ def from_evaluation_and_rule( partitions_def = asset_graph.get_partitions_def(asset_key) empty_subset = AssetSubset.empty(asset_key, partitions_def) return ConditionEvaluation( - condition=RuleCondition(rule=rule), + condition_snapshot=RuleCondition(rule=rule).snapshot, true_subset=empty_subset, candidate_subset=empty_subset if rule.decision_type == AutoMaterializeDecisionType.MATERIALIZE @@ -154,7 +182,7 @@ def from_evaluation( ] children = [ ConditionEvaluation( - condition=materialize_condition, + condition_snapshot=materialize_condition.snapshot, true_subset=empty_subset, candidate_subset=empty_subset, child_evaluations=[ @@ -163,7 +191,7 @@ def from_evaluation( ], ), ConditionEvaluation( - condition=skip_condition, + condition_snapshot=skip_condition.snapshot, true_subset=empty_subset, candidate_subset=empty_subset, child_evaluations=[ @@ -173,7 +201,7 @@ def from_evaluation( ), ] return ConditionEvaluation( - condition=condition, + condition_snapshot=condition.snapshot, true_subset=evaluation.get_requested_subset(asset_graph), discard_subset=evaluation.get_discarded_subset(asset_graph), candidate_subset=empty_subset, @@ -187,10 +215,6 @@ class AutomationCondition(ABC): new conditions using the `&` (and), `|` (or), and `~` (not) operators. """ - @property - def children(self) -> Sequence["AutomationCondition"]: - return [] - @abstractmethod def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> ConditionEvaluation: raise NotImplementedError() @@ -222,6 +246,19 @@ def is_legacy(self) -> bool: and isinstance(self.children[1], NotAutomationCondition) ) + @property + def children(self) -> Sequence["AutomationCondition"]: + return [] + + @functools.cached_property + def snapshot(self) -> AutomationConditionNodeSnapshot: + """Returns a snapshot of this condition that can be used for serialization.""" + return AutomationConditionNodeSnapshot( + class_name=self.__class__.__name__, + description=str(self), + child_hashes=[child.snapshot.hash for child in self.children], + ) + class RuleCondition( NamedTuple("_RuleCondition", [("rule", "AutoMaterializeRule")]), @@ -243,7 +280,7 @@ def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> Condit f"Rule returned {true_subset.size} partitions" ) return ConditionEvaluation( - condition=self, + condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, results=results, @@ -265,7 +302,7 @@ def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> Condit child_evaluations.append(result) true_subset &= result.true_subset return ConditionEvaluation( - condition=self, + condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, child_evaluations=child_evaluations, @@ -289,7 +326,7 @@ def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> Condit child_evaluations.append(result) true_subset |= result.true_subset return ConditionEvaluation( - condition=self, + condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, child_evaluations=child_evaluations, @@ -318,7 +355,7 @@ def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> Condit true_subset = context.candidate_subset - result.true_subset return ConditionEvaluation( - condition=self, + condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, child_evaluations=[result], From 45a3173942a0584ad9053c9e38d21125b491072f Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Fri, 8 Dec 2023 13:35:14 -0800 Subject: [PATCH 03/56] RuleEvaluationData -> SubsetWithMetadata --- .../asset_automation_condition_context.py | 13 +- .../definitions/asset_automation_evaluator.py | 228 +++++++++++------- .../_core/definitions/asset_daemon_cursor.py | 5 +- .../definitions/auto_materialize_policy.py | 30 ++- .../definitions/auto_materialize_rule.py | 122 ++++++---- .../auto_materialize_rule_evaluation.py | 92 ++++++- .../freshness_based_auto_materialize.py | 14 +- 7 files changed, 326 insertions(+), 178 deletions(-) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_automation_condition_context.py b/python_modules/dagster/dagster/_core/definitions/asset_automation_condition_context.py index 60a3385f9c4bb..67d83eaac8582 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_automation_condition_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_automation_condition_context.py @@ -1,9 +1,8 @@ import datetime import functools from dataclasses import dataclass -from typing import TYPE_CHECKING, AbstractSet, Mapping, Optional +from typing import TYPE_CHECKING, AbstractSet, Mapping, Optional, Sequence -from dagster._core.definitions.auto_materialize_rule_evaluation import RuleEvaluationResults from dagster._core.definitions.data_time import CachingDataTimeResolver from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey from dagster._core.definitions.partition import PartitionsDefinition @@ -16,6 +15,8 @@ from .asset_subset import AssetSubset if TYPE_CHECKING: + from dagster._core.definitions.asset_automation_evaluator import AssetSubsetWithMetdata + from .asset_automation_evaluator import AutomationCondition, ConditionEvaluation from .asset_daemon_context import AssetDaemonContext @@ -101,7 +102,7 @@ def materialized_requested_or_discarded_since_previous_tick_subset(self) -> Asse return ( self.materialized_since_previous_tick_subset | self.latest_evaluation.true_subset - | (self.latest_evaluation.discard_subset or self.empty_subset()) + | (self.latest_evaluation.discard_subset(self.root_condition) or self.empty_subset()) ) @functools.cached_property @@ -200,7 +201,7 @@ def get_new_asset_cursor(self, evaluation: "ConditionEvaluation") -> AssetDaemon previous_handled_subset | self.materialized_requested_or_discarded_since_previous_tick_subset | evaluation.true_subset - | (evaluation.discard_subset or self.empty_subset()) + | (evaluation.discard_subset(self.root_condition) or self.empty_subset()) ) return AssetDaemonAssetCursor( asset_key=self.asset_key, @@ -299,9 +300,9 @@ def materialized_requested_or_discarded_since_previous_tick_subset(self) -> Asse return self.asset_context.materialized_requested_or_discarded_since_previous_tick_subset @property - def previous_tick_results(self) -> RuleEvaluationResults: + def previous_tick_subsets_with_metadata(self) -> Sequence["AssetSubsetWithMetdata"]: """Returns the RuleEvaluationResults calculated on the previous tick for this condition.""" - return self.latest_evaluation.results if self.latest_evaluation else [] + return self.latest_evaluation.subsets_with_metadata if self.latest_evaluation else [] def empty_subset(self) -> AssetSubset: return self.asset_context.empty_subset() diff --git a/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py b/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py index 6fad476eea6b8..393ff582408be 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py @@ -1,29 +1,39 @@ -import dataclasses import functools import hashlib from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, AbstractSet, List, NamedTuple, Optional, Sequence, Tuple +from typing import ( + TYPE_CHECKING, + AbstractSet, + FrozenSet, + List, + NamedTuple, + Optional, + Sequence, + Tuple, +) import dagster._check as check from dagster._core.definitions.asset_daemon_cursor import AssetDaemonAssetCursor from dagster._core.definitions.asset_graph import AssetGraph +from dagster._core.definitions.auto_materialize_rule_evaluation import ( + AutoMaterializeAssetEvaluation, + AutoMaterializeDecisionType, + AutoMaterializeRuleEvaluation, + AutoMaterializeRuleEvaluationData, +) from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey +from dagster._core.definitions.metadata import MetadataMapping, MetadataValue from .asset_automation_condition_context import ( AssetAutomationConditionEvaluationContext, AssetAutomationEvaluationContext, ) from .asset_subset import AssetSubset -from .auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, - AutoMaterializeDecisionType, - AutoMaterializeRuleEvaluation, -) if TYPE_CHECKING: from dagster._utils.caching_instance_queryer import CachingInstanceQueryer - from .auto_materialize_rule import AutoMaterializeRule, RuleEvaluationResults + from .auto_materialize_rule import AutoMaterializeRule class AutomationConditionNodeSnapshot(NamedTuple): @@ -41,22 +51,26 @@ def hash(self) -> str: ).hexdigest() +class AssetSubsetWithMetdata(NamedTuple): + """An asset subset with metadata that corresponds to it.""" + + subset: AssetSubset + metadata: MetadataMapping + + @property + def frozen_metadata(self) -> FrozenSet[Tuple[str, MetadataValue]]: + return frozenset(self.metadata.items()) + + class ConditionEvaluation(NamedTuple): """Internal representation of the results of evaluating a node in the evaluation tree.""" condition_snapshot: AutomationConditionNodeSnapshot true_subset: AssetSubset candidate_subset: AssetSubset - - results: "RuleEvaluationResults" = [] + subsets_with_metadata: Sequence[AssetSubsetWithMetdata] = [] child_evaluations: Sequence["ConditionEvaluation"] = [] - # backcompat until we remove the discard concept - discard_subset: Optional[AssetSubset] = None - discard_results: Sequence[ - Tuple[AutoMaterializeRuleEvaluation, AbstractSet[AssetKeyPartitionKey]] - ] = [] - def all_results( self, condition: "AutomationCondition" ) -> Sequence[Tuple[AutoMaterializeRuleEvaluation, AbstractSet[AssetKeyPartitionKey]]]: @@ -64,22 +78,62 @@ def all_results( of the system understand. """ if isinstance(condition, RuleCondition): - results = [ - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=condition.rule.to_snapshot(), - evaluation_data=evaluation_data, - ), - subset, + if self.subsets_with_metadata: + results = [ + ( + AutoMaterializeRuleEvaluation( + rule_snapshot=condition.rule.to_snapshot(), + evaluation_data=AutoMaterializeRuleEvaluationData.from_metadata( + elt.metadata + ), + ), + elt.subset.asset_partitions, + ) + for elt in self.subsets_with_metadata + ] + else: + # if not provided specific metadata, just use the true subset + asset_partitions = self.true_subset.asset_partitions + results = ( + [ + ( + AutoMaterializeRuleEvaluation( + rule_snapshot=condition.rule.to_snapshot(), evaluation_data=None + ), + asset_partitions, + ) + ] + if asset_partitions + else [] ) - for evaluation_data, subset in self.results - ] else: results = [] for i, child in enumerate(self.child_evaluations): results = [*results, *child.all_results(condition.children[i])] return results + def skip_subset_size(self, condition: "AutomationCondition") -> int: + # backcompat way to calculate the set of skipped partitions for legacy policies + if not condition.is_legacy: + return 0 + + not_skip_evaluation = self.child_evaluations[1] + skip_evaluation = not_skip_evaluation.child_evaluations[0] + return skip_evaluation.true_subset.size + + def discard_subset(self, condition: "AutomationCondition") -> Optional[AssetSubset]: + not_discard_condition = condition.not_discard_condition + if not not_discard_condition or len(self.child_evaluations) != 3: + return None + + not_discard_evaluation = self.child_evaluations[2] + discard_evaluation = not_discard_evaluation.child_evaluations[0] + return discard_evaluation.true_subset + + def discard_subset_size(self, condition: "AutomationCondition") -> int: + discard_subset = self.discard_subset(condition) + return discard_subset.size if discard_subset else 0 + def for_child(self, child_condition: "AutomationCondition") -> Optional["ConditionEvaluation"]: """Returns the evaluation of a given child condition by finding the child evaluation that has an identical hash to the given condition. @@ -105,30 +159,14 @@ def to_evaluation( .to_auto_materialize_policy_evaluator() .condition ) - # backcompat way to calculate the set of skipped partitions for legacy policies - if condition.is_legacy and len(self.child_evaluations) == 2: - # the first child is the materialize condition, the second child is the negation of - # the skip condition - _, nor_skip_evaluation = self.child_evaluations - skip_evaluation = nor_skip_evaluation.child_evaluations[0] - skipped_subset_size = skip_evaluation.true_subset.size - else: - skipped_subset_size = 0 - - discard_subset = self.discard_subset or AssetSubset.empty( - asset_key, asset_graph.get_partitions_def(asset_key) - ) return AutoMaterializeAssetEvaluation.from_rule_evaluation_results( asset_key=asset_key, asset_graph=asset_graph, - asset_partitions_by_rule_evaluation=[ - *self.all_results(condition), - *self.discard_results, - ], - num_requested=(self.true_subset - discard_subset).size, - num_skipped=skipped_subset_size, - num_discarded=discard_subset.size, + asset_partitions_by_rule_evaluation=self.all_results(condition), + num_requested=self.true_subset.size, + num_skipped=self.skip_subset_size(condition), + num_discarded=self.discard_subset_size(condition), dynamic_partitions_store=instance_queryer, ) @@ -140,15 +178,17 @@ def from_evaluation_and_rule( ) -> "ConditionEvaluation": asset_key = evaluation.asset_key partitions_def = asset_graph.get_partitions_def(asset_key) - empty_subset = AssetSubset.empty(asset_key, partitions_def) + + true_subset, subsets_with_metadata = evaluation.get_rule_evaluation_results( + rule.to_snapshot(), asset_graph + ) return ConditionEvaluation( condition_snapshot=RuleCondition(rule=rule).snapshot, - true_subset=empty_subset, - candidate_subset=empty_subset + true_subset=true_subset, + candidate_subset=AssetSubset.empty(asset_key, partitions_def) if rule.decision_type == AutoMaterializeDecisionType.MATERIALIZE else evaluation.get_evaluated_subset(asset_graph), - discard_subset=empty_subset, - results=evaluation.get_rule_evaluation_results(rule.to_snapshot(), asset_graph), + subsets_with_metadata=subsets_with_metadata, ) @staticmethod @@ -167,7 +207,8 @@ def from_evaluation( partitions_def = asset_graph.get_partitions_def(asset_key) empty_subset = AssetSubset.empty(asset_key, partitions_def) - materialize_condition, skip_condition = condition.children + materialize_condition, not_skip_condition = condition.children[:2] + skip_condition = not_skip_condition.children[0] materialize_rules = [ materialize_condition.rule for materialize_condition in materialize_condition.children @@ -191,19 +232,43 @@ def from_evaluation( ], ), ConditionEvaluation( - condition_snapshot=skip_condition.snapshot, + condition_snapshot=not_skip_condition.snapshot, true_subset=empty_subset, candidate_subset=empty_subset, child_evaluations=[ - ConditionEvaluation.from_evaluation_and_rule(evaluation, asset_graph, rule) - for rule in skip_rules + ConditionEvaluation( + condition_snapshot=skip_condition.snapshot, + true_subset=empty_subset, + candidate_subset=empty_subset, + child_evaluations=[ + ConditionEvaluation.from_evaluation_and_rule( + evaluation, asset_graph, rule + ) + for rule in skip_rules + ], + ) ], ), ] + if condition.not_discard_condition: + discard_condition = condition.not_discard_condition.children[0] + if isinstance(discard_condition, RuleCondition): + children.append( + ConditionEvaluation( + condition_snapshot=condition.not_discard_condition.snapshot, + true_subset=empty_subset, + candidate_subset=empty_subset, + child_evaluations=[ + ConditionEvaluation.from_evaluation_and_rule( + evaluation, asset_graph, discard_condition.rule + ) + ], + ) + ) + return ConditionEvaluation( condition_snapshot=condition.snapshot, true_subset=evaluation.get_requested_subset(asset_graph), - discard_subset=evaluation.get_discarded_subset(asset_graph), candidate_subset=empty_subset, child_evaluations=children, ) @@ -241,15 +306,27 @@ def is_legacy(self) -> bool: """ return ( isinstance(self, AndAutomationCondition) - and len(self.children) == 2 + and len(self.children) in {2, 3} and isinstance(self.children[0], OrAutomationCondition) and isinstance(self.children[1], NotAutomationCondition) + # the third child is the discard condition, which is optional + and (len(self.children) == 2 or isinstance(self.children[2], NotAutomationCondition)) ) @property def children(self) -> Sequence["AutomationCondition"]: return [] + @property + def indexed_children(self) -> Sequence[Tuple[int, "AutomationCondition"]]: + return list(enumerate(self.children)) + + @property + def not_discard_condition(self) -> Optional["AutomationCondition"]: + if not self.is_legacy or not len(self.children) == 3: + return None + return self.children[-1] + @functools.cached_property def snapshot(self) -> AutomationConditionNodeSnapshot: """Returns a snapshot of this condition that can be used for serialization.""" @@ -270,12 +347,7 @@ def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> Condit context.asset_context.daemon_context._verbose_log_fn( # noqa f"Evaluating rule: {self.rule.to_snapshot()}" ) - results = self.rule.evaluate_for_asset(context) - true_subset = context.empty_subset() - for _, asset_partitions in results: - true_subset |= AssetSubset.from_asset_partitions_set( - context.asset_key, context.partitions_def, asset_partitions - ) + true_subset, subsets_with_metadata = self.rule.evaluate_for_asset(context) context.asset_context.daemon_context._verbose_log_fn( # noqa f"Rule returned {true_subset.size} partitions" ) @@ -283,7 +355,7 @@ def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> Condit condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, - results=results, + subsets_with_metadata=subsets_with_metadata, ) @@ -368,7 +440,6 @@ class AssetAutomationEvaluator(NamedTuple): """ condition: AutomationCondition - max_materializations_per_minute: Optional[int] = 1 def evaluate( self, context: AssetAutomationEvaluationContext @@ -382,34 +453,7 @@ def evaluate( are in the format `(a | b | ...) & ~(c | d | ...). - A new AssetDaemonAssetCursor that represents the state of the world after this evaluation. """ - from .auto_materialize_rule import DiscardOnMaxMaterializationsExceededRule - condition_context = context.get_root_condition_context() condition_evaluation = self.condition.evaluate(condition_context) - # this is treated separately from other rules, for now - discard_subset = context.empty_subset() - discard_results = [] - if self.max_materializations_per_minute is not None: - discard_context = dataclasses.replace( - condition_context, candidate_subset=condition_evaluation.true_subset - ) - discard_rule = DiscardOnMaxMaterializationsExceededRule( - limit=self.max_materializations_per_minute - ) - condition = RuleCondition(discard_rule) - discard_condition_evaluation = condition.evaluate(discard_context) - discard_subset = discard_condition_evaluation.true_subset - discard_results = [ - (AutoMaterializeRuleEvaluation(discard_rule.to_snapshot(), evaluation_data), aps) - for evaluation_data, aps in discard_condition_evaluation.results - ] - - return ( - condition_evaluation._replace( - true_subset=condition_evaluation.true_subset - discard_subset, - discard_subset=discard_subset, - discard_results=discard_results, - ), - context.get_new_asset_cursor(evaluation=condition_evaluation), - ) + return condition_evaluation, context.get_new_asset_cursor(evaluation=condition_evaluation) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py index 3b24f361e2c94..8cadb8d4afe43 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py @@ -22,12 +22,13 @@ ) from dagster._serdes.serdes import deserialize_value, serialize_value, whitelist_for_serdes -if TYPE_CHECKING: - from .asset_automation_evaluator import ConditionEvaluation from .asset_graph import AssetGraph from .asset_subset import AssetSubset from .partition import PartitionsSubset +if TYPE_CHECKING: + from .asset_automation_evaluator import ConditionEvaluation + class AssetDaemonAssetCursor(NamedTuple): """Convenience class to represent the state of an individual asset being handled by the daemon. diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_policy.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_policy.py index 80a9581d234d0..b5aac93e1cc22 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_policy.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_policy.py @@ -262,19 +262,29 @@ def to_auto_materialize_policy_evaluator(self) -> "AssetAutomationEvaluator": OrAutomationCondition, RuleCondition, ) + from .auto_materialize_rule import DiscardOnMaxMaterializationsExceededRule materialize_condition = OrAutomationCondition( - children=[RuleCondition(rule) for rule in self.materialize_rules] + children=[ + RuleCondition(rule) + for rule in sorted(self.materialize_rules, key=lambda rule: rule.description) + ] ) skip_condition = OrAutomationCondition( - children=[RuleCondition(rule) for rule in self.skip_rules] + children=[ + RuleCondition(rule) + for rule in sorted(self.skip_rules, key=lambda rule: rule.description) + ] ) + children = [ + materialize_condition, + NotAutomationCondition([skip_condition]), + ] + if self.max_materializations_per_minute: + discard_condition = RuleCondition( + DiscardOnMaxMaterializationsExceededRule(self.max_materializations_per_minute) + ) + children.append(NotAutomationCondition([discard_condition])) - # results in an expression of the form (m1 | m2 | ... | mn) & ~(s1 | s2 | ... | sn) - condition = AndAutomationCondition( - children=[materialize_condition, NotAutomationCondition([skip_condition])] - ) - return AssetAutomationEvaluator( - condition=condition, - max_materializations_per_minute=self.max_materializations_per_minute, - ) + # results in an expression of the form (m1 | m2 | ... | mn) & ~(s1 | s2 | ... | sn) & ~d + return AssetAutomationEvaluator(condition=AndAutomationCondition(children)) diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py index 594fffa4bd631..6a2debaab8239 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py @@ -1,9 +1,10 @@ import datetime +import operator from abc import ABC, abstractmethod, abstractproperty from collections import defaultdict +from functools import reduce from typing import ( AbstractSet, - Callable, Dict, Iterable, Mapping, @@ -17,6 +18,7 @@ import dagster._check as check from dagster._annotations import experimental, public +from dagster._core.definitions.asset_subset import AssetSubset from dagster._core.definitions.auto_materialize_rule_evaluation import ( AutoMaterializeDecisionType, AutoMaterializeRuleEvaluationData, @@ -74,32 +76,53 @@ def add_evaluation_data_from_previous_tick( self, context: AssetAutomationConditionEvaluationContext, asset_partitions_by_evaluation_data: Mapping[ - Optional[AutoMaterializeRuleEvaluationData], Set[AssetKeyPartitionKey] + AutoMaterializeRuleEvaluationData, Set[AssetKeyPartitionKey] ], - should_use_past_data_fn: Callable[[AssetKeyPartitionKey], bool], - ) -> "RuleEvaluationResults": - """Combines a given set of evaluation data with evaluation data from the previous tick. The - returned value will include the union of the evaluation data contained within - `asset_partitions_by_evaluation_data` and the evaluation data calculated for asset - partitions on the previous tick for which `should_use_past_data_fn` evaluates to `True`. + ignore_subset: AssetSubset, + ) -> RuleEvaluationResults: + """Combines evaluation data calculated on this tick with evaluation data calculated on the + previous tick. Args: context: The current RuleEvaluationContext. asset_partitions_by_evaluation_data: A mapping from evaluation data to the set of asset partitions that the rule applies to. - should_use_past_data_fn: A function that returns whether a given asset partition from the - previous tick should be included in the results of this tick. + ignore_subset: An AssetSubset which represents information that we should *not* carry + forward from the previous tick. """ - asset_partitions_by_evaluation_data = defaultdict(set, asset_partitions_by_evaluation_data) - evaluated_asset_partitions = set().union(*asset_partitions_by_evaluation_data.values()) - for evaluation_data, asset_partitions in context.previous_tick_results: - for ap in asset_partitions: - # evaluated data from this tick takes precedence over data from the previous tick - if ap in evaluated_asset_partitions: - continue - elif should_use_past_data_fn(ap): - asset_partitions_by_evaluation_data[evaluation_data].add(ap) - return list(asset_partitions_by_evaluation_data.items()) + from .asset_automation_evaluator import AssetSubsetWithMetdata + + mapping = defaultdict(lambda: context.empty_subset()) + for evaluation_data, asset_partitions in asset_partitions_by_evaluation_data.items(): + mapping[ + frozenset(evaluation_data.metadata.items()) + ] = AssetSubset.from_asset_partitions_set( + context.asset_key, context.partitions_def, asset_partitions + ) + + # get the set of all things we have metadata for + has_metadata_subset = context.empty_subset() + for evaluation_data, subset in mapping.items(): + has_metadata_subset |= subset + + # don't use information from the previous tick if we have explicit metadata for it or + # we've explicitly said to ignore it + ignore_subset = has_metadata_subset | ignore_subset + + for elt in context.previous_tick_subsets_with_metadata: + carry_forward_subset = elt.subset - ignore_subset + if carry_forward_subset.size > 0: + mapping[elt.frozen_metadata] |= carry_forward_subset + + # for now, an asset is in the "true" subset if and only if we have some metadata for it + true_subset = reduce(operator.or_, mapping.values(), context.empty_subset()) + return ( + true_subset, + [ + AssetSubsetWithMetdata(subset, dict(metadata)) + for metadata, subset in mapping.items() + ], + ) @abstractmethod def evaluate_for_asset( @@ -309,7 +332,7 @@ def missed_cron_ticks( missed_ticks.append(dt) return missed_ticks - def get_asset_partitions_to_request( + def get_new_asset_partitions_to_request( self, context: AssetAutomationConditionEvaluationContext ) -> AbstractSet[AssetKeyPartitionKey]: missed_ticks = self.missed_cron_ticks(context) @@ -375,18 +398,16 @@ def get_asset_partitions_to_request( def evaluate_for_asset( self, context: AssetAutomationConditionEvaluationContext ) -> RuleEvaluationResults: - asset_partitions_to_request = self.get_asset_partitions_to_request(context) - asset_partitions_by_evaluation_data = defaultdict(set) - if asset_partitions_to_request: - asset_partitions_by_evaluation_data[None].update(asset_partitions_to_request) - - return self.add_evaluation_data_from_previous_tick( - context, - asset_partitions_by_evaluation_data, - should_use_past_data_fn=lambda ap: ap - not in context.materialized_requested_or_discarded_since_previous_tick_subset, + new_asset_partitions_to_request = self.get_new_asset_partitions_to_request(context) + asset_subset_to_request = AssetSubset.from_asset_partitions_set( + context.asset_key, context.partitions_def, new_asset_partitions_to_request + ) | ( + context.previous_tick_true_subset + - context.materialized_requested_or_discarded_since_previous_tick_subset ) + return asset_subset_to_request, [] + @whitelist_for_serdes @experimental @@ -579,8 +600,7 @@ def evaluate_for_asset( return self.add_evaluation_data_from_previous_tick( context, asset_partitions_by_evaluation_data, - should_use_past_data_fn=lambda ap: ap - not in context.materialized_requested_or_discarded_since_previous_tick_subset, + ignore_subset=context.materialized_requested_or_discarded_since_previous_tick_subset, ) @@ -601,8 +621,6 @@ def evaluate_for_asset( previously discarded. Currently only applies to root asset partitions and asset partitions with updated parents. """ - asset_partitions_by_evaluation_data = defaultdict(set) - missing_asset_partitions = set( context.asset_context.never_materialized_requested_or_discarded_root_subset.asset_partitions ) @@ -614,15 +632,14 @@ def evaluate_for_asset( ): missing_asset_partitions |= {candidate} - if missing_asset_partitions: - asset_partitions_by_evaluation_data[None] = missing_asset_partitions - - return self.add_evaluation_data_from_previous_tick( - context, - asset_partitions_by_evaluation_data, - should_use_past_data_fn=lambda ap: ap not in missing_asset_partitions - and ap not in context.materialized_requested_or_discarded_since_previous_tick_subset, + newly_missing_subset = AssetSubset.from_asset_partitions_set( + context.asset_key, context.partitions_def, missing_asset_partitions + ) + missing_subset = newly_missing_subset | ( + context.previous_tick_true_subset + - context.materialized_requested_or_discarded_since_previous_tick_subset ) + return missing_subset, [] @whitelist_for_serdes @@ -668,7 +685,7 @@ def evaluate_for_asset( return self.add_evaluation_data_from_previous_tick( context, asset_partitions_by_evaluation_data, - should_use_past_data_fn=lambda ap: ap not in subset_to_evaluate, + ignore_subset=subset_to_evaluate, ) @@ -717,7 +734,7 @@ def evaluate_for_asset( return self.add_evaluation_data_from_previous_tick( context, asset_partitions_by_evaluation_data, - should_use_past_data_fn=lambda ap: ap not in subset_to_evaluate, + ignore_subset=subset_to_evaluate, ) @@ -803,7 +820,7 @@ def evaluate_for_asset( return self.add_evaluation_data_from_previous_tick( context, asset_partitions_by_evaluation_data, - should_use_past_data_fn=lambda ap: ap not in subset_to_evaluate, + ignore_subset=subset_to_evaluate, ) @@ -845,7 +862,7 @@ def evaluate_for_asset( return self.add_evaluation_data_from_previous_tick( context, asset_partitions_by_evaluation_data, - should_use_past_data_fn=lambda ap: ap not in subset_to_evaluate, + ignore_subset=subset_to_evaluate, ) @@ -873,14 +890,14 @@ def evaluate_for_asset( ).get_asset_subset(context.asset_key, context.asset_context.asset_graph) if backfilling_subset.size == 0: - return [] + return context.empty_subset(), [] if self.all_partitions: true_subset = context.candidate_subset else: true_subset = context.candidate_subset & backfilling_subset - return [(None, true_subset.asset_partitions)] + return true_subset, [] @whitelist_for_serdes @@ -905,6 +922,7 @@ def evaluate_for_asset( key=lambda x: sort_key_for_asset_partition(context.asset_graph, x), )[self.limit :] ) - if rate_limited_asset_partitions: - return [(None, rate_limited_asset_partitions)] - return [] + + return AssetSubset.from_asset_partitions_set( + context.asset_key, context.partitions_def, rate_limited_asset_partitions + ), [] diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index aceb712324c6d..701d6b988c2fd 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -1,4 +1,4 @@ -from abc import ABC +from abc import ABC, abstractproperty from enum import Enum from typing import ( TYPE_CHECKING, @@ -16,6 +16,7 @@ import dagster._check as check from dagster._core.definitions.asset_subset import AssetSubset from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey +from dagster._core.definitions.metadata import MetadataMapping, MetadataValue from dagster._serdes.serdes import ( NamedTupleSerializer, UnpackContext, @@ -28,6 +29,7 @@ from .partition import SerializedPartitionsSubset if TYPE_CHECKING: + from dagster._core.definitions.asset_automation_evaluator import AssetSubsetWithMetdata from dagster._core.instance import DynamicPartitionsStore @@ -57,7 +59,45 @@ class AutoMaterializeRuleSnapshot(NamedTuple): class AutoMaterializeRuleEvaluationData(ABC): - pass + @abstractproperty + def metadata(self) -> MetadataMapping: + raise NotImplementedError() + + @staticmethod + def from_metadata(metadata: MetadataMapping) -> Optional["AutoMaterializeRuleEvaluationData"]: + """Temporary workaround to convert the generic metadata mapping into the old format.""" + if not metadata: + return None + elif "text" in metadata: + text_value = cast(str, metadata["text"].value) + return TextRuleEvaluationData(text_value) + + waiting_on_ancestors = frozenset( + { + cast(AssetKey, v.value) + for k, v in metadata.items() + if k.startswith("waiting_on_ancestor") + } + ) + if waiting_on_ancestors: + return WaitingOnAssetsRuleEvaluationData(waiting_on_asset_keys=waiting_on_ancestors) + + updated_parents = frozenset( + {cast(AssetKey, v.value) for k, v in metadata.items() if k.startswith("updated_parent")} + ) + will_update_parents = frozenset( + { + cast(AssetKey, v.value) + for k, v in metadata.items() + if k.startswith("will_update_parent") + } + ) + if updated_parents or will_update_parents: + return ParentUpdatedRuleEvaluationData( + updated_asset_keys=updated_parents, will_update_asset_keys=will_update_parents + ) + + return None @whitelist_for_serdes @@ -65,7 +105,9 @@ class TextRuleEvaluationData( AutoMaterializeRuleEvaluationData, NamedTuple("_TextRuleEvaluationData", [("text", str)]), ): - pass + @property + def metadata(self) -> MetadataMapping: + return {"text": MetadataValue.text(self.text)} @whitelist_for_serdes @@ -79,7 +121,18 @@ class ParentUpdatedRuleEvaluationData( ], ), ): - pass + @property + def metadata(self) -> MetadataMapping: + return { + **{ + f"updated_parent_{i+1}": MetadataValue.asset(k) + for i, k in enumerate(self.updated_asset_keys) + }, + **{ + f"will_update_parent_{i+1}": MetadataValue.asset(k) + for i, k in enumerate(self.will_update_asset_keys) + }, + } @whitelist_for_serdes @@ -90,12 +143,17 @@ class WaitingOnAssetsRuleEvaluationData( [("waiting_on_asset_keys", FrozenSet[AssetKey])], ), ): - pass + @property + def metadata(self) -> MetadataMapping: + return { + **{ + f"waiting_on_ancestor_{i+1}": MetadataValue.asset(k) + for i, k in enumerate(self.waiting_on_asset_keys) + }, + } -RuleEvaluationResults = Sequence[ - Tuple[Optional[AutoMaterializeRuleEvaluationData], AbstractSet[AssetKeyPartitionKey]] -] +RuleEvaluationResults = Tuple[AssetSubset, Sequence["AssetSubsetWithMetdata"]] @whitelist_for_serdes @@ -214,7 +272,12 @@ def get_rule_evaluation_results( self, rule_snapshot: AutoMaterializeRuleSnapshot, asset_graph: AssetGraph ) -> RuleEvaluationResults: """For a given rule snapshot, returns the calculated evaluations for that rule.""" - results = [] + from dagster._core.definitions.asset_automation_evaluator import AssetSubsetWithMetdata + + true_subset = AssetSubset.empty( + self.asset_key, asset_graph.get_partitions_def(self.asset_key) + ) + subsets_with_metadata = [] for rule_evaluation, serialized_subset in self.partition_subsets_by_condition: # filter for the same rule if rule_evaluation.rule_snapshot != rule_snapshot: @@ -223,8 +286,15 @@ def get_rule_evaluation_results( rule_evaluation, serialized_subset, asset_graph ) if deserialized_result: - results.append((deserialized_result[0], deserialized_result[1].asset_partitions)) - return results + evaluation_data, subset = deserialized_result + metadata = evaluation_data.metadata if evaluation_data else {} + + true_subset |= subset + subsets_with_metadata.append( + AssetSubsetWithMetdata(subset=subset, metadata=metadata) + ) + + return true_subset, subsets_with_metadata def _get_subset_with_decision_type( self, *, decision_type: AutoMaterializeDecisionType, asset_graph: AssetGraph diff --git a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py index 2238cf36c9fc2..fb632b631b9d3 100644 --- a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py +++ b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py @@ -12,6 +12,8 @@ import pendulum +from dagster._core.definitions.asset_automation_evaluator import AssetSubsetWithMetdata +from dagster._core.definitions.asset_subset import AssetSubset from dagster._core.definitions.events import AssetKeyPartitionKey from dagster._core.definitions.freshness_policy import FreshnessPolicy from dagster._utils.schedules import cron_string_iterator @@ -165,7 +167,7 @@ def freshness_evaluation_results_for_asset_key( if not context.asset_graph.get_downstream_freshness_policies( asset_key=asset_key ) or context.asset_graph.is_partitioned(asset_key): - return [] + return context.empty_subset(), [] # figure out the current contents of this asset current_data_time = context.data_time_resolver.get_current_data_time(asset_key, current_time) @@ -178,7 +180,7 @@ def freshness_evaluation_results_for_asset_key( # if executing the asset on this tick would not change its data time, then return if current_data_time == expected_data_time: - return [] + return context.empty_subset(), [] # calculate the data times you would expect after all currently-executing runs # were to successfully complete @@ -208,7 +210,6 @@ def freshness_evaluation_results_for_asset_key( current_time=current_time, ) - asset_partition = AssetKeyPartitionKey(asset_key, None) if ( execution_period is not None and execution_period.start <= current_time @@ -217,6 +218,9 @@ def freshness_evaluation_results_for_asset_key( and expected_data_time >= execution_period.start and evaluation_data is not None ): - return [(evaluation_data, {asset_partition})] + all_subset = AssetSubset.all(asset_key, None) + return AssetSubset.all(asset_key, None), [ + AssetSubsetWithMetdata(all_subset, evaluation_data.metadata) + ] else: - return [] + return context.empty_subset(), [] From 75cf2964894fad5cc92893ab19531d524b5e1819 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Fri, 8 Dec 2023 14:06:51 -0800 Subject: [PATCH 04/56] AssetAutomationCondition -> AssetCondition --- ...mation_evaluator.py => asset_condition.py} | 183 ++++++++---------- ... => asset_condition_evaluation_context.py} | 62 +++--- .../_core/definitions/asset_daemon_context.py | 24 ++- .../_core/definitions/asset_daemon_cursor.py | 16 +- .../definitions/auto_materialize_policy.py | 23 ++- .../definitions/auto_materialize_rule.py | 84 ++++---- .../auto_materialize_rule_evaluation.py | 4 +- .../freshness_based_auto_materialize.py | 8 +- 8 files changed, 181 insertions(+), 223 deletions(-) rename python_modules/dagster/dagster/_core/definitions/{asset_automation_evaluator.py => asset_condition.py} (69%) rename python_modules/dagster/dagster/_core/definitions/{asset_automation_condition_context.py => asset_condition_evaluation_context.py} (86%) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py similarity index 69% rename from python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py rename to python_modules/dagster/dagster/_core/definitions/asset_condition.py index 393ff582408be..d40ffa4709328 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_automation_evaluator.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -13,7 +13,6 @@ ) import dagster._check as check -from dagster._core.definitions.asset_daemon_cursor import AssetDaemonAssetCursor from dagster._core.definitions.asset_graph import AssetGraph from dagster._core.definitions.auto_materialize_rule_evaluation import ( AutoMaterializeAssetEvaluation, @@ -24,9 +23,8 @@ from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey from dagster._core.definitions.metadata import MetadataMapping, MetadataValue -from .asset_automation_condition_context import ( - AssetAutomationConditionEvaluationContext, - AssetAutomationEvaluationContext, +from .asset_condition_evaluation_context import ( + AssetConditionEvaluationContext, ) from .asset_subset import AssetSubset @@ -36,7 +34,7 @@ from .auto_materialize_rule import AutoMaterializeRule -class AutomationConditionNodeSnapshot(NamedTuple): +class AssetConditionSnapshot(NamedTuple): """A serializable snapshot of a node in the AutomationCondition tree.""" class_name: str @@ -62,17 +60,17 @@ def frozen_metadata(self) -> FrozenSet[Tuple[str, MetadataValue]]: return frozenset(self.metadata.items()) -class ConditionEvaluation(NamedTuple): +class AssetConditionEvaluation(NamedTuple): """Internal representation of the results of evaluating a node in the evaluation tree.""" - condition_snapshot: AutomationConditionNodeSnapshot + condition_snapshot: AssetConditionSnapshot true_subset: AssetSubset candidate_subset: AssetSubset subsets_with_metadata: Sequence[AssetSubsetWithMetdata] = [] - child_evaluations: Sequence["ConditionEvaluation"] = [] + child_evaluations: Sequence["AssetConditionEvaluation"] = [] def all_results( - self, condition: "AutomationCondition" + self, condition: "AssetCondition" ) -> Sequence[Tuple[AutoMaterializeRuleEvaluation, AbstractSet[AssetKeyPartitionKey]]]: """This method is a placeholder to allow us to convert this into a shape that other parts of the system understand. @@ -112,7 +110,7 @@ def all_results( results = [*results, *child.all_results(condition.children[i])] return results - def skip_subset_size(self, condition: "AutomationCondition") -> int: + def skip_subset_size(self, condition: "AssetCondition") -> int: # backcompat way to calculate the set of skipped partitions for legacy policies if not condition.is_legacy: return 0 @@ -121,7 +119,7 @@ def skip_subset_size(self, condition: "AutomationCondition") -> int: skip_evaluation = not_skip_evaluation.child_evaluations[0] return skip_evaluation.true_subset.size - def discard_subset(self, condition: "AutomationCondition") -> Optional[AssetSubset]: + def discard_subset(self, condition: "AssetCondition") -> Optional[AssetSubset]: not_discard_condition = condition.not_discard_condition if not not_discard_condition or len(self.child_evaluations) != 3: return None @@ -130,11 +128,11 @@ def discard_subset(self, condition: "AutomationCondition") -> Optional[AssetSubs discard_evaluation = not_discard_evaluation.child_evaluations[0] return discard_evaluation.true_subset - def discard_subset_size(self, condition: "AutomationCondition") -> int: + def discard_subset_size(self, condition: "AssetCondition") -> int: discard_subset = self.discard_subset(condition) return discard_subset.size if discard_subset else 0 - def for_child(self, child_condition: "AutomationCondition") -> Optional["ConditionEvaluation"]: + def for_child(self, child_condition: "AssetCondition") -> Optional["AssetConditionEvaluation"]: """Returns the evaluation of a given child condition by finding the child evaluation that has an identical hash to the given condition. """ @@ -154,11 +152,9 @@ def to_evaluation( """This method is a placeholder to allow us to convert this into a shape that other parts of the system understand. """ - condition = ( - check.not_none(asset_graph.get_auto_materialize_policy(asset_key)) - .to_auto_materialize_policy_evaluator() - .condition - ) + condition = check.not_none( + asset_graph.get_auto_materialize_policy(asset_key) + ).to_asset_condition() return AutoMaterializeAssetEvaluation.from_rule_evaluation_results( asset_key=asset_key, @@ -175,14 +171,14 @@ def from_evaluation_and_rule( evaluation: AutoMaterializeAssetEvaluation, asset_graph: AssetGraph, rule: "AutoMaterializeRule", - ) -> "ConditionEvaluation": + ) -> "AssetConditionEvaluation": asset_key = evaluation.asset_key partitions_def = asset_graph.get_partitions_def(asset_key) true_subset, subsets_with_metadata = evaluation.get_rule_evaluation_results( rule.to_snapshot(), asset_graph ) - return ConditionEvaluation( + return AssetConditionEvaluation( condition_snapshot=RuleCondition(rule=rule).snapshot, true_subset=true_subset, candidate_subset=AssetSubset.empty(asset_key, partitions_def) @@ -193,10 +189,10 @@ def from_evaluation_and_rule( @staticmethod def from_evaluation( - condition: "AutomationCondition", + condition: "AssetCondition", evaluation: Optional[AutoMaterializeAssetEvaluation], asset_graph: AssetGraph, - ) -> Optional["ConditionEvaluation"]: + ) -> Optional["AssetConditionEvaluation"]: """This method is a placeholder to allow us to convert the serialized objects the system uses into a more-convenient internal representation. """ @@ -222,26 +218,26 @@ def from_evaluation( and skip_condition.rule.to_snapshot() in (evaluation.rule_snapshots or set()) ] children = [ - ConditionEvaluation( + AssetConditionEvaluation( condition_snapshot=materialize_condition.snapshot, true_subset=empty_subset, candidate_subset=empty_subset, child_evaluations=[ - ConditionEvaluation.from_evaluation_and_rule(evaluation, asset_graph, rule) + AssetConditionEvaluation.from_evaluation_and_rule(evaluation, asset_graph, rule) for rule in materialize_rules ], ), - ConditionEvaluation( + AssetConditionEvaluation( condition_snapshot=not_skip_condition.snapshot, true_subset=empty_subset, candidate_subset=empty_subset, child_evaluations=[ - ConditionEvaluation( + AssetConditionEvaluation( condition_snapshot=skip_condition.snapshot, true_subset=empty_subset, candidate_subset=empty_subset, child_evaluations=[ - ConditionEvaluation.from_evaluation_and_rule( + AssetConditionEvaluation.from_evaluation_and_rule( evaluation, asset_graph, rule ) for rule in skip_rules @@ -254,19 +250,19 @@ def from_evaluation( discard_condition = condition.not_discard_condition.children[0] if isinstance(discard_condition, RuleCondition): children.append( - ConditionEvaluation( + AssetConditionEvaluation( condition_snapshot=condition.not_discard_condition.snapshot, true_subset=empty_subset, candidate_subset=empty_subset, child_evaluations=[ - ConditionEvaluation.from_evaluation_and_rule( + AssetConditionEvaluation.from_evaluation_and_rule( evaluation, asset_graph, discard_condition.rule ) ], ) ) - return ConditionEvaluation( + return AssetConditionEvaluation( condition_snapshot=condition.snapshot, true_subset=evaluation.get_requested_subset(asset_graph), candidate_subset=empty_subset, @@ -274,30 +270,30 @@ def from_evaluation( ) -class AutomationCondition(ABC): +class AssetCondition(ABC): """An AutomationCondition represents some state of the world that can influence if an asset partition should be materialized or not. AutomationConditions can be combined together to create new conditions using the `&` (and), `|` (or), and `~` (not) operators. """ @abstractmethod - def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> ConditionEvaluation: + def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: raise NotImplementedError() - def __and__(self, other: "AutomationCondition") -> "AutomationCondition": + def __and__(self, other: "AssetCondition") -> "AssetCondition": # group AndAutomationConditions together - if isinstance(self, AndAutomationCondition): - return AndAutomationCondition(children=[*self.children, other]) - return AndAutomationCondition(children=[self, other]) + if isinstance(self, AndAssetCondition): + return AndAssetCondition(children=[*self.children, other]) + return AndAssetCondition(children=[self, other]) - def __or__(self, other: "AutomationCondition") -> "AutomationCondition": + def __or__(self, other: "AssetCondition") -> "AssetCondition": # group OrAutomationConditions together - if isinstance(self, OrAutomationCondition): - return OrAutomationCondition(children=[*self.children, other]) - return OrAutomationCondition(children=[self, other]) + if isinstance(self, OrAssetCondition): + return OrAssetCondition(children=[*self.children, other]) + return OrAssetCondition(children=[self, other]) - def __invert__(self) -> "AutomationCondition": - return NotAutomationCondition(children=[self]) + def __invert__(self) -> "AssetCondition": + return NotAssetCondition(children=[self]) @property def is_legacy(self) -> bool: @@ -305,32 +301,32 @@ def is_legacy(self) -> bool: do certain types of backwards-compatible operations on it. """ return ( - isinstance(self, AndAutomationCondition) + isinstance(self, AndAssetCondition) and len(self.children) in {2, 3} - and isinstance(self.children[0], OrAutomationCondition) - and isinstance(self.children[1], NotAutomationCondition) + and isinstance(self.children[0], OrAssetCondition) + and isinstance(self.children[1], NotAssetCondition) # the third child is the discard condition, which is optional - and (len(self.children) == 2 or isinstance(self.children[2], NotAutomationCondition)) + and (len(self.children) == 2 or isinstance(self.children[2], NotAssetCondition)) ) @property - def children(self) -> Sequence["AutomationCondition"]: + def children(self) -> Sequence["AssetCondition"]: return [] @property - def indexed_children(self) -> Sequence[Tuple[int, "AutomationCondition"]]: + def indexed_children(self) -> Sequence[Tuple[int, "AssetCondition"]]: return list(enumerate(self.children)) @property - def not_discard_condition(self) -> Optional["AutomationCondition"]: + def not_discard_condition(self) -> Optional["AssetCondition"]: if not self.is_legacy or not len(self.children) == 3: return None return self.children[-1] @functools.cached_property - def snapshot(self) -> AutomationConditionNodeSnapshot: + def snapshot(self) -> AssetConditionSnapshot: """Returns a snapshot of this condition that can be used for serialization.""" - return AutomationConditionNodeSnapshot( + return AssetConditionSnapshot( class_name=self.__class__.__name__, description=str(self), child_hashes=[child.snapshot.hash for child in self.children], @@ -339,19 +335,19 @@ def snapshot(self) -> AutomationConditionNodeSnapshot: class RuleCondition( NamedTuple("_RuleCondition", [("rule", "AutoMaterializeRule")]), - AutomationCondition, + AssetCondition, ): """This class represents the condition that a particular AutoMaterializeRule is satisfied.""" - def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> ConditionEvaluation: - context.asset_context.daemon_context._verbose_log_fn( # noqa + def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: + context.root_context.daemon_context._verbose_log_fn( # noqa f"Evaluating rule: {self.rule.to_snapshot()}" ) true_subset, subsets_with_metadata = self.rule.evaluate_for_asset(context) - context.asset_context.daemon_context._verbose_log_fn( # noqa + context.root_context.daemon_context._verbose_log_fn( # noqa f"Rule returned {true_subset.size} partitions" ) - return ConditionEvaluation( + return AssetConditionEvaluation( condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, @@ -359,21 +355,23 @@ def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> Condit ) -class AndAutomationCondition( - NamedTuple("_AndAutomationCondition", [("children", Sequence[AutomationCondition])]), - AutomationCondition, +class AndAssetCondition( + NamedTuple("_AndAssetCondition", [("children", Sequence[AssetCondition])]), + AssetCondition, ): """This class represents the condition that all of its children evaluate to true.""" - def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> ConditionEvaluation: - child_evaluations: List[ConditionEvaluation] = [] + def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: + child_evaluations: List[AssetConditionEvaluation] = [] true_subset = context.candidate_subset - for child in self.children: - child_context = context.for_child(condition=child, candidate_subset=true_subset) + for index, child in self.indexed_children: + child_context = context.for_child( + condition=child, candidate_subset=true_subset, child_index=index + ) result = child.evaluate(child_context) child_evaluations.append(result) true_subset &= result.true_subset - return ConditionEvaluation( + return AssetConditionEvaluation( condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, @@ -381,23 +379,23 @@ def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> Condit ) -class OrAutomationCondition( - NamedTuple("_OrAutomationCondition", [("children", Sequence[AutomationCondition])]), - AutomationCondition, +class OrAssetCondition( + NamedTuple("_OrAssetCondition", [("children", Sequence[AssetCondition])]), + AssetCondition, ): """This class represents the condition that any of its children evaluate to true.""" - def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> ConditionEvaluation: - child_evaluations: List[ConditionEvaluation] = [] + def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: + child_evaluations: List[AssetConditionEvaluation] = [] true_subset = context.empty_subset() - for child in self.children: + for index, child in self.indexed_children: child_context = context.for_child( - condition=child, candidate_subset=context.candidate_subset + condition=child, candidate_subset=context.candidate_subset, child_index=index ) result = child.evaluate(child_context) child_evaluations.append(result) true_subset |= result.true_subset - return ConditionEvaluation( + return AssetConditionEvaluation( condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, @@ -405,55 +403,30 @@ def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> Condit ) -class NotAutomationCondition( - NamedTuple("_NotAutomationCondition", [("children", Sequence[AutomationCondition])]), - AutomationCondition, +class NotAssetCondition( + NamedTuple("_NotAssetCondition", [("children", Sequence[AssetCondition])]), + AssetCondition, ): """This class represents the condition that none of its children evaluate to true.""" - def __new__(cls, children: Sequence[AutomationCondition]): + def __new__(cls, children: Sequence[AssetCondition]): check.invariant(len(children) == 1) return super().__new__(cls, children) @property - def child(self) -> AutomationCondition: + def child(self) -> AssetCondition: return self.children[0] - def evaluate(self, context: AssetAutomationConditionEvaluationContext) -> ConditionEvaluation: + def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: child_context = context.for_child( - condition=self.child, candidate_subset=context.candidate_subset + condition=self.child, candidate_subset=context.candidate_subset, child_index=0 ) result = self.child.evaluate(child_context) true_subset = context.candidate_subset - result.true_subset - return ConditionEvaluation( + return AssetConditionEvaluation( condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, child_evaluations=[result], ) - - -class AssetAutomationEvaluator(NamedTuple): - """For now, this is an internal class that is used to help transition from the old format to the - new. Upstack, the original AutoMaterializePolicy class will be replaced with this. - """ - - condition: AutomationCondition - - def evaluate( - self, context: AssetAutomationEvaluationContext - ) -> Tuple[ConditionEvaluation, AssetDaemonAssetCursor]: - """Evaluates the auto materialize policy of a given asset. - - Returns: - - A ConditionEvaluation object representing information about this evaluation. If - `report_num_skipped` is set to `True`, then this will attempt to calculate the number of - skipped partitions in a backwards-compatible way. This can only be done for policies that - are in the format `(a | b | ...) & ~(c | d | ...). - - A new AssetDaemonAssetCursor that represents the state of the world after this evaluation. - """ - condition_context = context.get_root_condition_context() - condition_evaluation = self.condition.evaluate(condition_context) - - return condition_evaluation, context.get_new_asset_cursor(evaluation=condition_evaluation) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_automation_condition_context.py b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py similarity index 86% rename from python_modules/dagster/dagster/_core/definitions/asset_automation_condition_context.py rename to python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py index 67d83eaac8582..c8a77a7e01d53 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_automation_condition_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py @@ -15,27 +15,27 @@ from .asset_subset import AssetSubset if TYPE_CHECKING: - from dagster._core.definitions.asset_automation_evaluator import AssetSubsetWithMetdata + from dagster._core.definitions.asset_condition import AssetSubsetWithMetdata - from .asset_automation_evaluator import AutomationCondition, ConditionEvaluation + from .asset_condition import AssetCondition, AssetConditionEvaluation from .asset_daemon_context import AssetDaemonContext @dataclass(frozen=True) -class AssetAutomationEvaluationContext: +class RootAssetConditionEvaluationContext: """Context object containing methods and properties used for evaluating the entire state of an asset's automation rules. """ asset_key: AssetKey asset_cursor: Optional[AssetDaemonAssetCursor] - root_condition: "AutomationCondition" + root_condition: "AssetCondition" instance_queryer: CachingInstanceQueryer data_time_resolver: CachingDataTimeResolver daemon_context: "AssetDaemonContext" - evaluation_results_by_key: Mapping[AssetKey, "ConditionEvaluation"] + evaluation_results_by_key: Mapping[AssetKey, "AssetConditionEvaluation"] expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]] @property @@ -52,7 +52,7 @@ def evaluation_time(self) -> datetime.datetime: return self.instance_queryer.evaluation_time @functools.cached_property - def latest_evaluation(self) -> Optional["ConditionEvaluation"]: + def latest_evaluation(self) -> Optional["AssetConditionEvaluation"]: if not self.asset_cursor: return None return self.asset_cursor.latest_evaluation @@ -175,9 +175,9 @@ def will_update_asset_partition(self, asset_partition: AssetKeyPartitionKey) -> def empty_subset(self) -> AssetSubset: return AssetSubset.empty(self.asset_key, self.partitions_def) - def get_root_condition_context(self) -> "AssetAutomationConditionEvaluationContext": - return AssetAutomationConditionEvaluationContext( - asset_context=self, + def get_root_condition_context(self) -> "AssetConditionEvaluationContext": + return AssetConditionEvaluationContext( + root_context=self, condition=self.root_condition, candidate_subset=AssetSubset.all( asset_key=self.asset_key, @@ -188,7 +188,9 @@ def get_root_condition_context(self) -> "AssetAutomationConditionEvaluationConte latest_evaluation=self.latest_evaluation, ) - def get_new_asset_cursor(self, evaluation: "ConditionEvaluation") -> AssetDaemonAssetCursor: + def get_new_asset_cursor( + self, evaluation: "AssetConditionEvaluation" + ) -> AssetDaemonAssetCursor: """Returns a new AssetDaemonAssetCursor based on the current cursor and the results of this tick's evaluation. """ @@ -213,33 +215,33 @@ def get_new_asset_cursor(self, evaluation: "ConditionEvaluation") -> AssetDaemon @dataclass(frozen=True) -class AssetAutomationConditionEvaluationContext: - """Context object containing methods and properties used for evaluating a particular AutomationCondition.""" +class AssetConditionEvaluationContext: + """Context object containing methods and properties used for evaluating a particular AssetCondition.""" - asset_context: AssetAutomationEvaluationContext - condition: "AutomationCondition" + root_context: RootAssetConditionEvaluationContext + condition: "AssetCondition" candidate_subset: AssetSubset - latest_evaluation: Optional["ConditionEvaluation"] + latest_evaluation: Optional["AssetConditionEvaluation"] @property def asset_key(self) -> AssetKey: - return self.asset_context.asset_key + return self.root_context.asset_key @property def partitions_def(self) -> Optional[PartitionsDefinition]: - return self.asset_context.partitions_def + return self.root_context.partitions_def @property def asset_cursor(self) -> Optional[AssetDaemonAssetCursor]: - return self.asset_context.asset_cursor + return self.root_context.asset_cursor @property def asset_graph(self) -> AssetGraph: - return self.asset_context.asset_graph + return self.root_context.asset_graph @property def instance_queryer(self) -> CachingInstanceQueryer: - return self.asset_context.instance_queryer + return self.root_context.instance_queryer @property def max_storage_id(self) -> Optional[int]: @@ -264,9 +266,9 @@ def parent_has_updated_subset(self) -> AssetSubset: return AssetSubset.from_asset_partitions_set( self.asset_key, self.partitions_def, - self.asset_context.instance_queryer.asset_partitions_with_newly_updated_parents( + self.root_context.instance_queryer.asset_partitions_with_newly_updated_parents( latest_storage_id=self.max_storage_id, - child_asset_key=self.asset_context.asset_key, + child_asset_key=self.root_context.asset_key, map_old_time_partitions=False, ), ) @@ -277,7 +279,7 @@ def candidate_parent_has_or_will_update_subset(self) -> AssetSubset: the previous tick, or will update on this tick. """ return self.candidate_subset & ( - self.parent_has_updated_subset | self.asset_context.parent_will_update_subset + self.parent_has_updated_subset | self.root_context.parent_will_update_subset ) @property @@ -292,12 +294,12 @@ def candidates_not_evaluated_on_previous_tick_subset(self) -> AssetSubset: @property def materialized_since_previous_tick_subset(self) -> AssetSubset: """Returns the set of asset partitions that were materialized since the previous tick.""" - return self.asset_context.materialized_since_previous_tick_subset + return self.root_context.materialized_since_previous_tick_subset @property def materialized_requested_or_discarded_since_previous_tick_subset(self) -> AssetSubset: """Returns the set of asset partitions that were materialized since the previous tick.""" - return self.asset_context.materialized_requested_or_discarded_since_previous_tick_subset + return self.root_context.materialized_requested_or_discarded_since_previous_tick_subset @property def previous_tick_subsets_with_metadata(self) -> Sequence["AssetSubsetWithMetdata"]: @@ -305,13 +307,13 @@ def previous_tick_subsets_with_metadata(self) -> Sequence["AssetSubsetWithMetdat return self.latest_evaluation.subsets_with_metadata if self.latest_evaluation else [] def empty_subset(self) -> AssetSubset: - return self.asset_context.empty_subset() + return self.root_context.empty_subset() def for_child( - self, condition: "AutomationCondition", candidate_subset: AssetSubset - ) -> "AssetAutomationConditionEvaluationContext": - return AssetAutomationConditionEvaluationContext( - asset_context=self.asset_context, + self, condition: "AssetCondition", candidate_subset: AssetSubset, child_index: int + ) -> "AssetConditionEvaluationContext": + return AssetConditionEvaluationContext( + root_context=self.root_context, condition=condition, candidate_subset=candidate_subset, latest_evaluation=self.latest_evaluation.for_child(condition) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py index 323e4bd6657b3..a152fbcf92e7f 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py @@ -35,8 +35,8 @@ from ... import PartitionKeyRange from ..storage.tags import ASSET_PARTITION_RANGE_END_TAG, ASSET_PARTITION_RANGE_START_TAG -from .asset_automation_condition_context import AssetAutomationEvaluationContext -from .asset_automation_evaluator import ConditionEvaluation +from .asset_condition import AssetConditionEvaluation +from .asset_condition_evaluation_context import RootAssetConditionEvaluationContext from .asset_daemon_cursor import AssetDaemonAssetCursor, AssetDaemonCursor from .asset_graph import AssetGraph from .auto_materialize_rule import AutoMaterializeRule @@ -220,9 +220,9 @@ def get_new_latest_storage_id(self) -> Optional[int]: def evaluate_asset( self, asset_key: AssetKey, - evaluation_results_by_key: Mapping[AssetKey, ConditionEvaluation], + evaluation_results_by_key: Mapping[AssetKey, AssetConditionEvaluation], expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]], - ) -> Tuple[ConditionEvaluation, AssetDaemonAssetCursor, Optional[datetime.datetime]]: + ) -> Tuple[AssetConditionEvaluation, AssetDaemonAssetCursor, Optional[datetime.datetime]]: """Evaluates the auto materialize policy of a given asset key. Params: @@ -236,21 +236,25 @@ def evaluate_asset( """ # convert the legacy AutoMaterializePolicy to an Evaluator - auto_materialize_policy_evaluator = check.not_none( + asset_condition = check.not_none( self.asset_graph.auto_materialize_policies_by_key.get(asset_key) - ).to_auto_materialize_policy_evaluator() + ).to_asset_condition() - context = AssetAutomationEvaluationContext( + context = RootAssetConditionEvaluationContext( asset_key=asset_key, asset_cursor=self.cursor.asset_cursor_for_key(asset_key, self.asset_graph), - root_condition=auto_materialize_policy_evaluator.condition, + root_condition=asset_condition, instance_queryer=self.instance_queryer, data_time_resolver=self.data_time_resolver, daemon_context=self, evaluation_results_by_key=evaluation_results_by_key, expected_data_time_mapping=expected_data_time_mapping, ) - evaluation, asset_cursor = auto_materialize_policy_evaluator.evaluate(context) + condition_context = context.get_root_condition_context() + + evaluation = asset_condition.evaluate(condition_context) + asset_cursor = context.get_new_asset_cursor(evaluation=evaluation) + expected_data_time = get_expected_data_time_for_asset_key( context, will_materialize=evaluation.true_subset.size > 0 ) @@ -269,7 +273,7 @@ def get_auto_materialize_asset_evaluations( """ asset_cursors: List[AssetDaemonAssetCursor] = [] - evaluation_results_by_key: Dict[AssetKey, ConditionEvaluation] = {} + evaluation_results_by_key: Dict[AssetKey, AssetConditionEvaluation] = {} legacy_evaluation_results_by_key: Dict[AssetKey, AutoMaterializeAssetEvaluation] = {} expected_data_time_mapping: Dict[AssetKey, Optional[datetime.datetime]] = defaultdict() to_request: Set[AssetKeyPartitionKey] = set() diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py index 8cadb8d4afe43..e145d2016cb75 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py @@ -27,7 +27,7 @@ from .partition import PartitionsSubset if TYPE_CHECKING: - from .asset_automation_evaluator import ConditionEvaluation + from .asset_condition import AssetConditionEvaluation class AssetDaemonAssetCursor(NamedTuple): @@ -38,7 +38,7 @@ class AssetDaemonAssetCursor(NamedTuple): asset_key: AssetKey latest_storage_id: Optional[int] latest_evaluation_timestamp: Optional[float] - latest_evaluation: Optional["ConditionEvaluation"] + latest_evaluation: Optional["AssetConditionEvaluation"] materialized_requested_or_discarded_subset: AssetSubset @@ -75,7 +75,7 @@ def was_previously_handled(self, asset_key: AssetKey) -> bool: def asset_cursor_for_key( self, asset_key: AssetKey, asset_graph: AssetGraph ) -> AssetDaemonAssetCursor: - from .asset_automation_evaluator import ConditionEvaluation + from .asset_condition import AssetConditionEvaluation partitions_def = asset_graph.get_partitions_def(asset_key) handled_partitions_subset = self.handled_root_partitions_by_asset_key.get(asset_key) @@ -85,16 +85,14 @@ def asset_cursor_for_key( handled_subset = AssetSubset(asset_key=asset_key, value=True) else: handled_subset = AssetSubset.empty(asset_key, partitions_def) - condition = ( - check.not_none(asset_graph.get_auto_materialize_policy(asset_key)) - .to_auto_materialize_policy_evaluator() - .condition - ) + condition = check.not_none( + asset_graph.get_auto_materialize_policy(asset_key) + ).to_asset_condition() return AssetDaemonAssetCursor( asset_key=asset_key, latest_storage_id=self.latest_storage_id, latest_evaluation_timestamp=self.latest_evaluation_timestamp, - latest_evaluation=ConditionEvaluation.from_evaluation( + latest_evaluation=AssetConditionEvaluation.from_evaluation( condition=condition, evaluation=self.latest_evaluation_by_asset_key.get(asset_key), asset_graph=asset_graph, diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_policy.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_policy.py index b5aac93e1cc22..1e2375258baf0 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_policy.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_policy.py @@ -11,7 +11,7 @@ ) if TYPE_CHECKING: - from dagster._core.definitions.asset_automation_evaluator import AssetAutomationEvaluator + from dagster._core.definitions.asset_condition import AssetCondition from dagster._core.definitions.auto_materialize_rule import ( AutoMaterializeRule, AutoMaterializeRuleSnapshot, @@ -253,24 +253,23 @@ def policy_type(self) -> AutoMaterializePolicyType: def rule_snapshots(self) -> Sequence["AutoMaterializeRuleSnapshot"]: return [rule.to_snapshot() for rule in self.rules] - def to_auto_materialize_policy_evaluator(self) -> "AssetAutomationEvaluator": + def to_asset_condition(self) -> "AssetCondition": """Converts a set of materialize / skip rules into a single binary expression.""" - from .asset_automation_evaluator import ( - AndAutomationCondition, - AssetAutomationEvaluator, - NotAutomationCondition, - OrAutomationCondition, + from .asset_condition import ( + AndAssetCondition, + NotAssetCondition, + OrAssetCondition, RuleCondition, ) from .auto_materialize_rule import DiscardOnMaxMaterializationsExceededRule - materialize_condition = OrAutomationCondition( + materialize_condition = OrAssetCondition( children=[ RuleCondition(rule) for rule in sorted(self.materialize_rules, key=lambda rule: rule.description) ] ) - skip_condition = OrAutomationCondition( + skip_condition = OrAssetCondition( children=[ RuleCondition(rule) for rule in sorted(self.skip_rules, key=lambda rule: rule.description) @@ -278,13 +277,13 @@ def to_auto_materialize_policy_evaluator(self) -> "AssetAutomationEvaluator": ) children = [ materialize_condition, - NotAutomationCondition([skip_condition]), + NotAssetCondition([skip_condition]), ] if self.max_materializations_per_minute: discard_condition = RuleCondition( DiscardOnMaxMaterializationsExceededRule(self.max_materializations_per_minute) ) - children.append(NotAutomationCondition([discard_condition])) + children.append(NotAssetCondition([discard_condition])) # results in an expression of the form (m1 | m2 | ... | mn) & ~(s1 | s2 | ... | sn) & ~d - return AssetAutomationEvaluator(condition=AndAutomationCondition(children)) + return AndAssetCondition(children) diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py index 6a2debaab8239..dfd0d16ce75ea 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py @@ -44,7 +44,7 @@ reverse_cron_string_iterator, ) -from .asset_automation_condition_context import AssetAutomationConditionEvaluationContext +from .asset_condition_evaluation_context import AssetConditionEvaluationContext from .asset_graph import sort_key_for_asset_partition @@ -74,7 +74,7 @@ def description(self) -> str: def add_evaluation_data_from_previous_tick( self, - context: AssetAutomationConditionEvaluationContext, + context: AssetConditionEvaluationContext, asset_partitions_by_evaluation_data: Mapping[ AutoMaterializeRuleEvaluationData, Set[AssetKeyPartitionKey] ], @@ -90,7 +90,7 @@ def add_evaluation_data_from_previous_tick( ignore_subset: An AssetSubset which represents information that we should *not* carry forward from the previous tick. """ - from .asset_automation_evaluator import AssetSubsetWithMetdata + from .asset_condition import AssetSubsetWithMetdata mapping = defaultdict(lambda: context.empty_subset()) for evaluation_data, asset_partitions in asset_partitions_by_evaluation_data.items(): @@ -125,9 +125,7 @@ def add_evaluation_data_from_previous_tick( ) @abstractmethod - def evaluate_for_asset( - self, context: AssetAutomationConditionEvaluationContext - ) -> RuleEvaluationResults: + def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: """The core evaluation function for the rule. This function takes in a context object and returns a mapping from evaluated rules to the set of asset partitions that the rule applies to. @@ -286,10 +284,8 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return "required to meet this or downstream asset's freshness policy" - def evaluate_for_asset( - self, context: AssetAutomationConditionEvaluationContext - ) -> RuleEvaluationResults: - return freshness_evaluation_results_for_asset_key(context.asset_context) + def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: + return freshness_evaluation_results_for_asset_key(context.root_context) @whitelist_for_serdes @@ -309,13 +305,13 @@ def description(self) -> str: return f"not materialized since last cron schedule tick of '{self.cron_schedule}' (timezone: {self.timezone})" def missed_cron_ticks( - self, context: AssetAutomationConditionEvaluationContext + self, context: AssetConditionEvaluationContext ) -> Sequence[datetime.datetime]: """Returns the cron ticks which have been missed since the previous cursor was generated.""" if not context.latest_evaluation_timestamp: previous_dt = next( reverse_cron_string_iterator( - end_timestamp=context.asset_context.evaluation_time.timestamp(), + end_timestamp=context.root_context.evaluation_time.timestamp(), cron_string=self.cron_schedule, execution_timezone=self.timezone, ) @@ -327,13 +323,13 @@ def missed_cron_ticks( cron_string=self.cron_schedule, execution_timezone=self.timezone, ): - if dt > context.asset_context.evaluation_time: + if dt > context.root_context.evaluation_time: break missed_ticks.append(dt) return missed_ticks def get_new_asset_partitions_to_request( - self, context: AssetAutomationConditionEvaluationContext + self, context: AssetConditionEvaluationContext ) -> AbstractSet[AssetKeyPartitionKey]: missed_ticks = self.missed_cron_ticks(context) @@ -349,7 +345,7 @@ def get_new_asset_partitions_to_request( return { AssetKeyPartitionKey(context.asset_key, partition_key) for partition_key in partitions_def.get_partition_keys( - current_time=context.asset_context.evaluation_time, + current_time=context.root_context.evaluation_time, dynamic_partitions_store=context.instance_queryer, ) } @@ -395,9 +391,7 @@ def get_new_asset_partitions_to_request( for time_partition_key in missed_time_partition_keys } - def evaluate_for_asset( - self, context: AssetAutomationConditionEvaluationContext - ) -> RuleEvaluationResults: + def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: new_asset_partitions_to_request = self.get_new_asset_partitions_to_request(context) asset_subset_to_request = AssetSubset.from_asset_partitions_set( context.asset_key, context.partitions_def, new_asset_partitions_to_request @@ -432,7 +426,7 @@ def description(self) -> str: def passes( self, - context: AssetAutomationConditionEvaluationContext, + context: AssetConditionEvaluationContext, asset_partitions: Iterable[AssetKeyPartitionKey], ) -> Iterable[AssetKeyPartitionKey]: if self.latest_run_required_tags is None: @@ -442,7 +436,7 @@ def passes( asset_partitions_by_latest_run_id: Dict[str, Set[AssetKeyPartitionKey]] = defaultdict(set) for asset_partition in asset_partitions: - if context.asset_context.will_update_asset_partition(asset_partition): + if context.root_context.will_update_asset_partition(asset_partition): will_update_asset_partitions.add(asset_partition) else: record = context.instance_queryer.get_latest_materialization_or_observation_record( @@ -477,7 +471,7 @@ def passes( self.latest_run_required_tags.items() <= { AUTO_MATERIALIZE_TAG: "true", - **context.asset_context.daemon_context.auto_materialize_run_tags, + **context.root_context.daemon_context.auto_materialize_run_tags, }.items() ): return will_update_asset_partitions | updated_partitions_with_required_tags @@ -511,9 +505,7 @@ def description(self) -> str: else: return base - def evaluate_for_asset( - self, context: AssetAutomationConditionEvaluationContext - ) -> RuleEvaluationResults: + def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: """Evaluates the set of asset partitions of this asset whose parents have been updated, or will update on this tick. """ @@ -538,7 +530,7 @@ def evaluate_for_asset( parent_asset_partitions, # do a precise check for updated parents, factoring in data versions, as long as # we're within reasonable limits on the number of partitions to check - respect_materialization_data_versions=context.asset_context.daemon_context.respect_materialization_data_versions + respect_materialization_data_versions=context.root_context.daemon_context.respect_materialization_data_versions and len(parent_asset_partitions) + subset_to_evaluate.size < 100, # ignore self-dependencies when checking for updated parents, to avoid historical # rematerializations from causing a chain of materializations to be kicked off @@ -548,7 +540,7 @@ def evaluate_for_asset( asset_partitions_by_updated_parents[parent].add(asset_partition) for parent in parent_asset_partitions: - if context.asset_context.will_update_asset_partition(parent): + if context.root_context.will_update_asset_partition(parent): asset_partitions_by_will_update_parents[parent].add(asset_partition) updated_and_will_update_parents = ( @@ -614,15 +606,13 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return "materialization is missing" - def evaluate_for_asset( - self, context: AssetAutomationConditionEvaluationContext - ) -> RuleEvaluationResults: + def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: """Evaluates the set of asset partitions for this asset which are missing and were not previously discarded. Currently only applies to root asset partitions and asset partitions with updated parents. """ missing_asset_partitions = set( - context.asset_context.never_materialized_requested_or_discarded_root_subset.asset_partitions + context.root_context.never_materialized_requested_or_discarded_root_subset.asset_partitions ) # in addition to missing root asset partitions, check any asset partitions with updated # parents to see if they're missing @@ -652,9 +642,7 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return "waiting on upstream data to be up to date" - def evaluate_for_asset( - self, context: AssetAutomationConditionEvaluationContext - ) -> RuleEvaluationResults: + def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: asset_partitions_by_evaluation_data = defaultdict(set) # only need to evaluate net-new candidates and candidates whose parents have changed @@ -667,7 +655,7 @@ def evaluate_for_asset( # find the root cause of why this asset partition's parents are outdated (if any) for ( parent - ) in context.asset_context.get_parents_that_will_not_be_materialized_on_current_tick( + ) in context.root_context.get_parents_that_will_not_be_materialized_on_current_tick( asset_partition=candidate ): if context.instance_queryer.have_ignorable_partition_mapping_for_outdated( @@ -701,7 +689,7 @@ def description(self) -> str: def evaluate_for_asset( self, - context: AssetAutomationConditionEvaluationContext, + context: AssetConditionEvaluationContext, ) -> RuleEvaluationResults: asset_partitions_by_evaluation_data = defaultdict(set) @@ -714,13 +702,13 @@ def evaluate_for_asset( missing_parent_asset_keys = set() for ( parent - ) in context.asset_context.get_parents_that_will_not_be_materialized_on_current_tick( + ) in context.root_context.get_parents_that_will_not_be_materialized_on_current_tick( asset_partition=candidate ): # ignore non-observable sources, which will never have a materialization or observation - if context.asset_context.asset_graph.is_source( + if context.root_context.asset_graph.is_source( parent.asset_key - ) and not context.asset_context.asset_graph.is_observable(parent.asset_key): + ) and not context.root_context.asset_graph.is_observable(parent.asset_key): continue if not context.instance_queryer.asset_partition_has_materialization_or_observation( parent @@ -770,7 +758,7 @@ def description(self) -> str: def evaluate_for_asset( self, - context: AssetAutomationConditionEvaluationContext, + context: AssetConditionEvaluationContext, ) -> RuleEvaluationResults: asset_partitions_by_evaluation_data = defaultdict(set) @@ -791,10 +779,10 @@ def evaluate_for_asset( context.instance_queryer.get_parent_asset_partitions_updated_after_child( candidate, parent_partitions, - context.asset_context.daemon_context.respect_materialization_data_versions, + context.root_context.daemon_context.respect_materialization_data_versions, ignored_parent_keys=set(), ) - | context.asset_context.parent_will_update_subset.asset_partitions + | context.root_context.parent_will_update_subset.asset_partitions ) if self.require_update_for_all_parent_partitions: @@ -836,9 +824,7 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return "required parent partitions do not exist" - def evaluate_for_asset( - self, context: AssetAutomationConditionEvaluationContext - ) -> RuleEvaluationResults: + def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: asset_partitions_by_evaluation_data = defaultdict(set) subset_to_evaluate = ( @@ -882,12 +868,10 @@ def description(self) -> str: else: return "targeted by an in-progress backfill" - def evaluate_for_asset( - self, context: AssetAutomationConditionEvaluationContext - ) -> RuleEvaluationResults: + def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: backfilling_subset = ( context.instance_queryer.get_active_backfill_target_asset_graph_subset() - ).get_asset_subset(context.asset_key, context.asset_context.asset_graph) + ).get_asset_subset(context.asset_key, context.root_context.asset_graph) if backfilling_subset.size == 0: return context.empty_subset(), [] @@ -912,9 +896,7 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return f"exceeds {self.limit} materialization(s) per minute" - def evaluate_for_asset( - self, context: AssetAutomationConditionEvaluationContext - ) -> RuleEvaluationResults: + def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: # the set of asset partitions which exceed the limit rate_limited_asset_partitions = set( sorted( diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index 701d6b988c2fd..726d35dd25d8e 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -29,7 +29,7 @@ from .partition import SerializedPartitionsSubset if TYPE_CHECKING: - from dagster._core.definitions.asset_automation_evaluator import AssetSubsetWithMetdata + from dagster._core.definitions.asset_condition import AssetSubsetWithMetdata from dagster._core.instance import DynamicPartitionsStore @@ -272,7 +272,7 @@ def get_rule_evaluation_results( self, rule_snapshot: AutoMaterializeRuleSnapshot, asset_graph: AssetGraph ) -> RuleEvaluationResults: """For a given rule snapshot, returns the calculated evaluations for that rule.""" - from dagster._core.definitions.asset_automation_evaluator import AssetSubsetWithMetdata + from dagster._core.definitions.asset_condition import AssetSubsetWithMetdata true_subset = AssetSubset.empty( self.asset_key, asset_graph.get_partitions_def(self.asset_key) diff --git a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py index fb632b631b9d3..e3468e1c4e4a2 100644 --- a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py +++ b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py @@ -12,14 +12,14 @@ import pendulum -from dagster._core.definitions.asset_automation_evaluator import AssetSubsetWithMetdata +from dagster._core.definitions.asset_condition import AssetSubsetWithMetdata from dagster._core.definitions.asset_subset import AssetSubset from dagster._core.definitions.events import AssetKeyPartitionKey from dagster._core.definitions.freshness_policy import FreshnessPolicy from dagster._utils.schedules import cron_string_iterator if TYPE_CHECKING: - from .asset_automation_condition_context import AssetAutomationEvaluationContext + from .asset_condition_evaluation_context import RootAssetConditionEvaluationContext from .auto_materialize_rule_evaluation import RuleEvaluationResults, TextRuleEvaluationData @@ -111,7 +111,7 @@ def get_execution_period_and_evaluation_data_for_policies( def get_expected_data_time_for_asset_key( - context: "AssetAutomationEvaluationContext", will_materialize: bool + context: "RootAssetConditionEvaluationContext", will_materialize: bool ) -> Optional[datetime.datetime]: """Returns the data time that you would expect this asset to have if you were to execute it on this tick. @@ -154,7 +154,7 @@ def get_expected_data_time_for_asset_key( def freshness_evaluation_results_for_asset_key( - context: "AssetAutomationEvaluationContext", + context: "RootAssetConditionEvaluationContext", ) -> "RuleEvaluationResults": """Returns a set of AssetKeyPartitionKeys to materialize in order to abide by the given FreshnessPolicies. From 92fede397b37ed345dd90a8077eaf93dace34609 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Mon, 11 Dec 2023 13:41:31 -0800 Subject: [PATCH 05/56] Serialize AssetConditionEvaluation --- .../auto_materialize_asset_evaluations.py | 21 +- ...test_auto_materialize_asset_evaluations.py | 277 +---------- .../_core/definitions/asset_condition.py | 225 ++------- .../asset_condition_evaluation_context.py | 6 +- .../_core/definitions/asset_daemon_context.py | 39 +- .../_core/definitions/asset_daemon_cursor.py | 33 +- .../definitions/auto_materialize_rule.py | 4 +- .../auto_materialize_rule_evaluation.py | 435 +++++++++--------- .../freshness_based_auto_materialize.py | 5 +- .../dagster/_core/scheduler/instigation.py | 23 +- .../dagster/_core/storage/legacy_storage.py | 6 +- .../dagster/_core/storage/schedules/base.py | 8 +- .../storage/schedules/sql_schedule_storage.py | 10 +- .../dagster/dagster/_daemon/asset_daemon.py | 8 +- .../dagster/_utils/test/schedule_storage.py | 158 +++---- .../asset_daemon_scenario.py | 109 +++-- .../auto_materialize_tests/base_scenario.py | 30 +- .../test_asset_daemon_failure_recovery.py | 4 +- .../test_asset_daemon_fast.py | 37 -- .../test_auto_materialize_asset_evaluation.py | 264 ++++++----- .../updated_scenarios/cron_scenarios.py | 4 - .../updated_scenarios/partition_scenarios.py | 1 - .../schedule_storage/schedule_storage.py | 10 +- .../schedule_storage/schedule_storage.py | 10 +- 24 files changed, 625 insertions(+), 1102 deletions(-) diff --git a/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_asset_evaluations.py b/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_asset_evaluations.py index e82804373632e..2e65001502af9 100644 --- a/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_asset_evaluations.py +++ b/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_asset_evaluations.py @@ -195,22 +195,13 @@ def __init__( super().__init__( id=record.id, evaluationId=record.evaluation_id, - numRequested=record.evaluation.num_requested, - numSkipped=record.evaluation.num_skipped, - numDiscarded=record.evaluation.num_discarded, - rulesWithRuleEvaluations=create_graphene_auto_materialize_rules_with_rule_evaluations( - record.evaluation.partition_subsets_by_condition, partitions_def - ), + numRequested=record.evaluation.true_subset.size, + numSkipped=0, + numDiscarded=0, + rulesWithRuleEvaluations=[], timestamp=record.timestamp, - runIds=record.evaluation.run_ids, - rules=( - [ - GrapheneAutoMaterializeRule(snapshot) - for snapshot in record.evaluation.rule_snapshots - ] - if record.evaluation.rule_snapshots is not None - else None # Return None if no rules serialized in evaluation - ), + runIds=record.run_ids, + rules=[], assetKey=GrapheneAssetKey(path=record.asset_key.path), ) diff --git a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py index ddaec319541bf..2dc470151648b 100644 --- a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py +++ b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py @@ -1,4 +1,4 @@ -from datetime import datetime +from typing import Sequence from unittest.mock import PropertyMock, patch import dagster._check as check @@ -8,24 +8,10 @@ AssetDaemonCursor, LegacyAssetDaemonCursorWrapper, ) -from dagster._core.definitions.auto_materialize_rule import AutoMaterializeRule -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, - AutoMaterializeRuleEvaluation, - ParentUpdatedRuleEvaluationData, - WaitingOnAssetsRuleEvaluationData, -) -from dagster._core.definitions.partition import ( - SerializedPartitionsSubset, - StaticPartitionsDefinition, -) from dagster._core.definitions.run_request import ( InstigatorType, ) -from dagster._core.definitions.sensor_definition import ( - SensorType, -) -from dagster._core.definitions.time_window_partitions import TimeWindowPartitionsDefinition +from dagster._core.definitions.sensor_definition import SensorType from dagster._core.host_representation.origin import ( ExternalInstigatorOrigin, ) @@ -43,12 +29,12 @@ _PRE_SENSOR_AUTO_MATERIALIZE_ORIGIN_ID, _PRE_SENSOR_AUTO_MATERIALIZE_SELECTOR_ID, ) +from dagster._serdes import deserialize_value from dagster_graphql.test.utils import execute_dagster_graphql, infer_repository from dagster_graphql_tests.graphql.graphql_context_test_suite import ( ExecutingGraphQLContextTestMatrix, ) -from dagster_graphql_tests.graphql.repo import static_partitions_def TICKS_QUERY = """ query AssetDameonTicksQuery($dayRange: Int, $dayOffset: Int, $statuses: [InstigationTickStatus!], $limit: Int, $cursor: String, $beforeTimestamp: Float, $afterTimestamp: Float) { @@ -407,24 +393,10 @@ def test_get_historic_rules(self, graphql_context: WorkspaceRequestContext): graphql_context.instance.schedule_storage ).add_auto_materialize_asset_evaluations( evaluation_id=10, - asset_evaluations=[ - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_one"), - partition_subsets_by_condition=[], - num_requested=0, - num_skipped=0, - num_discarded=0, - rule_snapshots=None, - ), - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_two"), - partition_subsets_by_condition=[], - num_requested=1, - num_skipped=0, - num_discarded=0, - rule_snapshots=[AutoMaterializeRule.materialize_on_missing().to_snapshot()], - ), - ], + asset_evaluations=deserialize_value( + '[{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_one"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": null, "run_ids": {"__set__": []}}, {"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_two"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}], "run_ids": {"__set__": []}}]', + Sequence, + ), ) results = execute_dagster_graphql( @@ -433,7 +405,7 @@ def test_get_historic_rules(self, graphql_context: WorkspaceRequestContext): variables={"assetKey": {"path": ["asset_one"]}, "limit": 10, "cursor": None}, ) assert len(results.data["autoMaterializeAssetEvaluationsOrError"]["records"]) == 1 - assert results.data["autoMaterializeAssetEvaluationsOrError"]["records"][0]["rules"] is None + assert results.data["autoMaterializeAssetEvaluationsOrError"]["records"][0]["rules"] == [] assert results.data["autoMaterializeAssetEvaluationsOrError"]["records"][0]["assetKey"] == { "path": ["asset_one"] } @@ -450,15 +422,8 @@ def test_get_historic_rules(self, graphql_context: WorkspaceRequestContext): "rules" ] ) - == 1 + == 0 ) - rule = results_asset_two.data["autoMaterializeAssetEvaluationsOrError"]["records"][0][ - "rules" - ][0] - - assert rule["decisionType"] == "MATERIALIZE" - assert rule["description"] == "materialization is missing" - assert rule["className"] == "MaterializeOnMissingRule" results_by_evaluation_id = execute_dagster_graphql( graphql_context, @@ -500,35 +465,14 @@ def test_get_historic_rules(self, graphql_context: WorkspaceRequestContext): def test_get_required_but_nonexistent_parent_evaluation( self, graphql_context: WorkspaceRequestContext ): - partitions_def = StaticPartitionsDefinition(["a", "b", "c", "d", "e", "f"]) - check.not_none( graphql_context.instance.schedule_storage ).add_auto_materialize_asset_evaluations( evaluation_id=10, - asset_evaluations=[ - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("upstream_static_partitioned_asset"), - partition_subsets_by_condition=[ - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.skip_on_required_but_nonexistent_parents().to_snapshot(), - evaluation_data=WaitingOnAssetsRuleEvaluationData( - waiting_on_asset_keys=frozenset({AssetKey("blah")}) - ), - ), - SerializedPartitionsSubset.from_subset( - partitions_def.empty_subset().with_partition_keys(["a"]), - partitions_def, - None, # type: ignore - ), - ) - ], - num_requested=0, - num_skipped=1, - num_discarded=0, - ), - ], + asset_evaluations=deserialize_value( + '[{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["upstream_static_partitioned_asset"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["blah"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}}, {"__class__": "SerializedPartitionsSubset", "serialized_partitions_def_class_name": "StaticPartitionsDefinition", "serialized_partitions_def_unique_id": "7c2047f8b02e90a69136c1a657bd99ad80b433a2", "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"a\\"]}"}]], "rule_snapshots": null, "run_ids": {"__set__": []}}]', + Sequence, + ), ) results = execute_dagster_graphql( @@ -549,22 +493,10 @@ def test_get_required_but_nonexistent_parent_evaluation( "records": [ { "numRequested": 0, - "numSkipped": 1, + "numSkipped": 0, "numDiscarded": 0, - "rulesWithRuleEvaluations": [ - { - "rule": {"decisionType": "SKIP"}, - "ruleEvaluations": [ - { - "partitionKeysOrError": {"partitionKeys": ["a"]}, - "evaluationData": { - "waitingOnAssetKeys": [{"path": ["blah"]}] - }, - } - ], - } - ], - "rules": None, + "rulesWithRuleEvaluations": [], + "rules": [], "assetKey": {"path": ["upstream_static_partitioned_asset"]}, } ], @@ -585,65 +517,10 @@ def _test_get_evaluations(self, graphql_context: WorkspaceRequestContext): graphql_context.instance.schedule_storage ).add_auto_materialize_asset_evaluations( evaluation_id=10, - asset_evaluations=[ - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_one"), - partition_subsets_by_condition=[], - num_requested=0, - num_skipped=0, - num_discarded=0, - ), - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_two"), - partition_subsets_by_condition=[ - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.materialize_on_missing().to_snapshot(), - evaluation_data=None, - ), - None, - ) - ], - num_requested=1, - num_skipped=0, - num_discarded=0, - ), - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_three"), - partition_subsets_by_condition=[ - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.skip_on_parent_outdated().to_snapshot(), - evaluation_data=WaitingOnAssetsRuleEvaluationData( - waiting_on_asset_keys=frozenset([AssetKey("asset_two")]) - ), - ), - None, - ) - ], - num_requested=0, - num_skipped=1, - num_discarded=0, - ), - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_four"), - partition_subsets_by_condition=[ - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.materialize_on_parent_updated().to_snapshot(), - evaluation_data=ParentUpdatedRuleEvaluationData( - updated_asset_keys=frozenset([AssetKey("asset_two")]), - will_update_asset_keys=frozenset([AssetKey("asset_three")]), - ), - ), - None, - ) - ], - num_requested=1, - num_skipped=0, - num_discarded=0, - ), - ], + asset_evaluations=deserialize_value( + '[{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_one"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": null, "run_ids": {"__set__": []}}, {"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_two"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}, {"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_three"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_two"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}, {"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_four"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "ParentUpdatedRuleEvaluationData", "updated_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_two"]}]}, "will_update_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_three"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}]', + Sequence, + ), ) results = execute_dagster_graphql( @@ -786,29 +663,10 @@ def _test_get_evaluations_with_partitions(self, graphql_context: WorkspaceReques graphql_context.instance.schedule_storage ).add_auto_materialize_asset_evaluations( evaluation_id=10, - asset_evaluations=[ - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("upstream_static_partitioned_asset"), - partition_subsets_by_condition=[ - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.materialize_on_missing().to_snapshot(), - evaluation_data=None, - ), - SerializedPartitionsSubset.from_subset( - static_partitions_def.empty_subset().with_partition_keys( - ["a", "b"] - ), - static_partitions_def, - None, # type: ignore - ), - ) - ], - num_requested=2, - num_skipped=0, - num_discarded=0, - ), - ], + asset_evaluations=deserialize_value( + '[{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["upstream_static_partitioned_asset"]}, "num_discarded": 0, "num_requested": 2, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}}, {"__class__": "SerializedPartitionsSubset", "serialized_partitions_def_class_name": "StaticPartitionsDefinition", "serialized_partitions_def_unique_id": "7c2047f8b02e90a69136c1a657bd99ad80b433a2", "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"a\\", \\"b\\"]}"}]], "rule_snapshots": null, "run_ids": {"__set__": []}}]', + Sequence, + ), ) results = execute_dagster_graphql( @@ -848,93 +706,6 @@ def _test_get_evaluations_with_partitions(self, graphql_context: WorkspaceReques ][0]["ruleEvaluations"][0]["partitionKeysOrError"]["partitionKeys"] ) == {"a", "b"} - def _test_get_evaluations_invalid_partitions(self, graphql_context: WorkspaceRequestContext): - wrong_partitions_def = TimeWindowPartitionsDefinition( - cron_schedule="0 0 * * *", start=datetime(year=2020, month=1, day=5), fmt="%Y-%m-%d" - ) - - check.not_none( - graphql_context.instance.schedule_storage - ).add_auto_materialize_asset_evaluations( - evaluation_id=10, - asset_evaluations=[ - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("upstream_static_partitioned_asset"), - partition_subsets_by_condition=[ - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.materialize_on_missing().to_snapshot(), - evaluation_data=None, - ), - SerializedPartitionsSubset.from_subset( - wrong_partitions_def.empty_subset().with_partition_keys( - ["2023-07-07"] - ), - wrong_partitions_def, - None, # type: ignore - ), - ) - ], - num_requested=2, - num_skipped=0, - num_discarded=0, - ), - ], - ) - - results = execute_dagster_graphql( - graphql_context, - QUERY, - variables={ - "assetKey": {"path": ["upstream_static_partitioned_asset"]}, - "limit": 10, - "cursor": None, - }, - ) - assert results.data == { - "assetNodeOrError": { - "currentAutoMaterializeEvaluationId": None, - }, - "autoMaterializeAssetEvaluationsOrError": { - "records": [ - { - "numRequested": 2, - "numSkipped": 0, - "numDiscarded": 0, - "rulesWithRuleEvaluations": [ - { - "rule": {"decisionType": "MATERIALIZE"}, - "ruleEvaluations": [ - { - "evaluationData": None, - "partitionKeysOrError": { - "message": ( - "Partition subset cannot be deserialized. The" - " PartitionsDefinition may have changed." - ) - }, - } - ], - }, - ], - } - ], - }, - } - - results_by_evaluation_id = execute_dagster_graphql( - graphql_context, - QUERY_FOR_EVALUATION_ID, - variables={"evaluationId": 10}, - ) - - records = results_by_evaluation_id.data["autoMaterializeEvaluationsForEvaluationId"][ - "records" - ] - - assert len(records) == 1 - assert records[0] == results.data["autoMaterializeAssetEvaluationsOrError"]["records"][0] - def _test_current_evaluation_id(self, graphql_context: WorkspaceRequestContext): graphql_context.instance.daemon_cursor_storage.set_cursor_values( {_PRE_SENSOR_AUTO_MATERIALIZE_CURSOR_KEY: AssetDaemonCursor.empty().serialize()} diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py index d40ffa4709328..c23a368685b9d 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -13,15 +13,9 @@ ) import dagster._check as check -from dagster._core.definitions.asset_graph import AssetGraph -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, - AutoMaterializeDecisionType, - AutoMaterializeRuleEvaluation, - AutoMaterializeRuleEvaluationData, -) -from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey +from dagster._core.definitions.events import AssetKey from dagster._core.definitions.metadata import MetadataMapping, MetadataValue +from dagster._serdes.serdes import whitelist_for_serdes from .asset_condition_evaluation_context import ( AssetConditionEvaluationContext, @@ -29,11 +23,10 @@ from .asset_subset import AssetSubset if TYPE_CHECKING: - from dagster._utils.caching_instance_queryer import CachingInstanceQueryer - from .auto_materialize_rule import AutoMaterializeRule +@whitelist_for_serdes class AssetConditionSnapshot(NamedTuple): """A serializable snapshot of a node in the AutomationCondition tree.""" @@ -49,7 +42,8 @@ def hash(self) -> str: ).hexdigest() -class AssetSubsetWithMetdata(NamedTuple): +@whitelist_for_serdes +class AssetSubsetWithMetadata(NamedTuple): """An asset subset with metadata that corresponds to it.""" subset: AssetSubset @@ -60,64 +54,30 @@ def frozen_metadata(self) -> FrozenSet[Tuple[str, MetadataValue]]: return frozenset(self.metadata.items()) +@whitelist_for_serdes class AssetConditionEvaluation(NamedTuple): """Internal representation of the results of evaluating a node in the evaluation tree.""" condition_snapshot: AssetConditionSnapshot true_subset: AssetSubset - candidate_subset: AssetSubset - subsets_with_metadata: Sequence[AssetSubsetWithMetdata] = [] + candidate_subset: Optional[AssetSubset] + subsets_with_metadata: Sequence[AssetSubsetWithMetadata] = [] child_evaluations: Sequence["AssetConditionEvaluation"] = [] - def all_results( - self, condition: "AssetCondition" - ) -> Sequence[Tuple[AutoMaterializeRuleEvaluation, AbstractSet[AssetKeyPartitionKey]]]: - """This method is a placeholder to allow us to convert this into a shape that other parts - of the system understand. - """ - if isinstance(condition, RuleCondition): - if self.subsets_with_metadata: - results = [ - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=condition.rule.to_snapshot(), - evaluation_data=AutoMaterializeRuleEvaluationData.from_metadata( - elt.metadata - ), - ), - elt.subset.asset_partitions, - ) - for elt in self.subsets_with_metadata - ] - else: - # if not provided specific metadata, just use the true subset - asset_partitions = self.true_subset.asset_partitions - results = ( - [ - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=condition.rule.to_snapshot(), evaluation_data=None - ), - asset_partitions, - ) - ] - if asset_partitions - else [] - ) - else: - results = [] - for i, child in enumerate(self.child_evaluations): - results = [*results, *child.all_results(condition.children[i])] - return results - - def skip_subset_size(self, condition: "AssetCondition") -> int: - # backcompat way to calculate the set of skipped partitions for legacy policies - if not condition.is_legacy: - return 0 - - not_skip_evaluation = self.child_evaluations[1] - skip_evaluation = not_skip_evaluation.child_evaluations[0] - return skip_evaluation.true_subset.size + @property + def asset_key(self) -> AssetKey: + return self.true_subset.asset_key + + def equivalent_to_stored_evaluation(self, other: Optional["AssetConditionEvaluation"]) -> bool: + """Returns if all fields other than `run_ids` are equal.""" + return ( + other is not None + and self.condition_snapshot == other.condition_snapshot + and self.true_subset == other.true_subset + and self.candidate_subset == other.candidate_subset + and self.subsets_with_metadata == other.subsets_with_metadata + and self.child_evaluations == other.child_evaluations + ) def discard_subset(self, condition: "AssetCondition") -> Optional[AssetSubset]: not_discard_condition = condition.not_discard_condition @@ -128,10 +88,6 @@ def discard_subset(self, condition: "AssetCondition") -> Optional[AssetSubset]: discard_evaluation = not_discard_evaluation.child_evaluations[0] return discard_evaluation.true_subset - def discard_subset_size(self, condition: "AssetCondition") -> int: - discard_subset = self.discard_subset(condition) - return discard_subset.size if discard_subset else 0 - def for_child(self, child_condition: "AssetCondition") -> Optional["AssetConditionEvaluation"]: """Returns the evaluation of a given child condition by finding the child evaluation that has an identical hash to the given condition. @@ -143,131 +99,26 @@ def for_child(self, child_condition: "AssetCondition") -> Optional["AssetConditi return None - def to_evaluation( - self, - asset_key: AssetKey, - asset_graph: AssetGraph, - instance_queryer: "CachingInstanceQueryer", - ) -> AutoMaterializeAssetEvaluation: - """This method is a placeholder to allow us to convert this into a shape that other parts - of the system understand. - """ - condition = check.not_none( - asset_graph.get_auto_materialize_policy(asset_key) - ).to_asset_condition() - - return AutoMaterializeAssetEvaluation.from_rule_evaluation_results( - asset_key=asset_key, - asset_graph=asset_graph, - asset_partitions_by_rule_evaluation=self.all_results(condition), - num_requested=self.true_subset.size, - num_skipped=self.skip_subset_size(condition), - num_discarded=self.discard_subset_size(condition), - dynamic_partitions_store=instance_queryer, - ) + def with_run_ids(self, run_ids: AbstractSet[str]) -> "AssetConditionEvaluationWithRunIds": + return AssetConditionEvaluationWithRunIds(evaluation=self, run_ids=frozenset(run_ids)) - @staticmethod - def from_evaluation_and_rule( - evaluation: AutoMaterializeAssetEvaluation, - asset_graph: AssetGraph, - rule: "AutoMaterializeRule", - ) -> "AssetConditionEvaluation": - asset_key = evaluation.asset_key - partitions_def = asset_graph.get_partitions_def(asset_key) - - true_subset, subsets_with_metadata = evaluation.get_rule_evaluation_results( - rule.to_snapshot(), asset_graph - ) - return AssetConditionEvaluation( - condition_snapshot=RuleCondition(rule=rule).snapshot, - true_subset=true_subset, - candidate_subset=AssetSubset.empty(asset_key, partitions_def) - if rule.decision_type == AutoMaterializeDecisionType.MATERIALIZE - else evaluation.get_evaluated_subset(asset_graph), - subsets_with_metadata=subsets_with_metadata, - ) - @staticmethod - def from_evaluation( - condition: "AssetCondition", - evaluation: Optional[AutoMaterializeAssetEvaluation], - asset_graph: AssetGraph, - ) -> Optional["AssetConditionEvaluation"]: - """This method is a placeholder to allow us to convert the serialized objects the system - uses into a more-convenient internal representation. - """ - if not condition.is_legacy or not evaluation: - return None +@whitelist_for_serdes +class AssetConditionEvaluationWithRunIds(NamedTuple): + """A union of an AssetConditionEvaluation and the set of run IDs that have been launched in + response to it. + """ - asset_key = evaluation.asset_key - partitions_def = asset_graph.get_partitions_def(asset_key) - empty_subset = AssetSubset.empty(asset_key, partitions_def) - - materialize_condition, not_skip_condition = condition.children[:2] - skip_condition = not_skip_condition.children[0] - materialize_rules = [ - materialize_condition.rule - for materialize_condition in materialize_condition.children - if isinstance(materialize_condition, RuleCondition) - and materialize_condition.rule.to_snapshot() in (evaluation.rule_snapshots or set()) - ] - skip_rules = [ - skip_condition.rule - for skip_condition in skip_condition.children - if isinstance(skip_condition, RuleCondition) - and skip_condition.rule.to_snapshot() in (evaluation.rule_snapshots or set()) - ] - children = [ - AssetConditionEvaluation( - condition_snapshot=materialize_condition.snapshot, - true_subset=empty_subset, - candidate_subset=empty_subset, - child_evaluations=[ - AssetConditionEvaluation.from_evaluation_and_rule(evaluation, asset_graph, rule) - for rule in materialize_rules - ], - ), - AssetConditionEvaluation( - condition_snapshot=not_skip_condition.snapshot, - true_subset=empty_subset, - candidate_subset=empty_subset, - child_evaluations=[ - AssetConditionEvaluation( - condition_snapshot=skip_condition.snapshot, - true_subset=empty_subset, - candidate_subset=empty_subset, - child_evaluations=[ - AssetConditionEvaluation.from_evaluation_and_rule( - evaluation, asset_graph, rule - ) - for rule in skip_rules - ], - ) - ], - ), - ] - if condition.not_discard_condition: - discard_condition = condition.not_discard_condition.children[0] - if isinstance(discard_condition, RuleCondition): - children.append( - AssetConditionEvaluation( - condition_snapshot=condition.not_discard_condition.snapshot, - true_subset=empty_subset, - candidate_subset=empty_subset, - child_evaluations=[ - AssetConditionEvaluation.from_evaluation_and_rule( - evaluation, asset_graph, discard_condition.rule - ) - ], - ) - ) + evaluation: AssetConditionEvaluation + run_ids: FrozenSet[str] - return AssetConditionEvaluation( - condition_snapshot=condition.snapshot, - true_subset=evaluation.get_requested_subset(asset_graph), - candidate_subset=empty_subset, - child_evaluations=children, - ) + @property + def asset_key(self) -> AssetKey: + return self.evaluation.asset_key + + @property + def num_requested(self) -> int: + return self.evaluation.true_subset.size class AssetCondition(ABC): diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py index c8a77a7e01d53..565fdbd579665 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py @@ -15,7 +15,7 @@ from .asset_subset import AssetSubset if TYPE_CHECKING: - from dagster._core.definitions.asset_condition import AssetSubsetWithMetdata + from dagster._core.definitions.asset_condition import AssetSubsetWithMetadata from .asset_condition import AssetCondition, AssetConditionEvaluation from .asset_daemon_context import AssetDaemonContext @@ -287,7 +287,7 @@ def candidates_not_evaluated_on_previous_tick_subset(self) -> AssetSubset: """Returns the set of candidates for this tick which were not candidates on the previous tick. """ - if not self.latest_evaluation: + if not self.latest_evaluation or not self.latest_evaluation.candidate_subset: return self.candidate_subset return self.candidate_subset - self.latest_evaluation.candidate_subset @@ -302,7 +302,7 @@ def materialized_requested_or_discarded_since_previous_tick_subset(self) -> Asse return self.root_context.materialized_requested_or_discarded_since_previous_tick_subset @property - def previous_tick_subsets_with_metadata(self) -> Sequence["AssetSubsetWithMetdata"]: + def previous_tick_subsets_with_metadata(self) -> Sequence["AssetSubsetWithMetadata"]: """Returns the RuleEvaluationResults calculated on the previous tick for this condition.""" return self.latest_evaluation.subsets_with_metadata if self.latest_evaluation else [] diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py index a152fbcf92e7f..651fd99ad5d7b 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py @@ -40,7 +40,6 @@ from .asset_daemon_cursor import AssetDaemonAssetCursor, AssetDaemonCursor from .asset_graph import AssetGraph from .auto_materialize_rule import AutoMaterializeRule -from .auto_materialize_rule_evaluation import AutoMaterializeAssetEvaluation from .backfill_policy import BackfillPolicy, BackfillPolicyType from .freshness_based_auto_materialize import get_expected_data_time_for_asset_key from .partition import PartitionsDefinition, ScheduleType @@ -260,10 +259,10 @@ def evaluate_asset( ) return evaluation, asset_cursor, expected_data_time - def get_auto_materialize_asset_evaluations( + def get_asset_condition_evaluations( self, ) -> Tuple[ - Sequence[AutoMaterializeAssetEvaluation], + Sequence[AssetConditionEvaluation], Sequence[AssetDaemonAssetCursor], AbstractSet[AssetKeyPartitionKey], ]: @@ -274,7 +273,6 @@ def get_auto_materialize_asset_evaluations( asset_cursors: List[AssetDaemonAssetCursor] = [] evaluation_results_by_key: Dict[AssetKey, AssetConditionEvaluation] = {} - legacy_evaluation_results_by_key: Dict[AssetKey, AutoMaterializeAssetEvaluation] = {} expected_data_time_mapping: Dict[AssetKey, Optional[datetime.datetime]] = defaultdict() to_request: Set[AssetKeyPartitionKey] = set() @@ -302,20 +300,8 @@ def get_auto_materialize_asset_evaluations( asset_key, evaluation_results_by_key, expected_data_time_mapping ) - # convert the new-format evaluation to the legacy format - legacy_evaluation = evaluation.to_evaluation( - asset_key, self.asset_graph, self.instance_queryer - ) - - log_fn = ( - self._logger.info - if ( - legacy_evaluation.num_requested - or legacy_evaluation.num_skipped - or legacy_evaluation.num_discarded - ) - else self._logger.debug - ) + num_requested = evaluation.true_subset.size + log_fn = self._logger.info if num_requested > 0 else self._logger.debug to_request_asset_partitions = evaluation.true_subset.asset_partitions to_request_str = ",".join( @@ -324,19 +310,17 @@ def get_auto_materialize_asset_evaluations( to_request |= to_request_asset_partitions log_fn( - f"Asset {asset_key.to_user_string()} evaluation result: {legacy_evaluation.num_requested}" - f" requested ({to_request_str}), {legacy_evaluation.num_skipped}" - f" skipped, {legacy_evaluation.num_discarded} discarded ({format(time.time()-start_time, '.3f')} seconds)" + f"Asset {asset_key.to_user_string()} evaluation result: {num_requested}" + f" requested ({to_request_str}) ({format(time.time()-start_time, '.3f')} seconds)" ) evaluation_results_by_key[asset_key] = evaluation - legacy_evaluation_results_by_key[asset_key] = legacy_evaluation expected_data_time_mapping[asset_key] = expected_data_time asset_cursors.append(asset_cursor_for_asset) # if we need to materialize any partitions of a non-subsettable multi-asset, we need to # materialize all of them - if legacy_evaluation.num_requested > 0: + if num_requested > 0: for neighbor_key in self.asset_graph.get_required_multi_asset_keys(asset_key): expected_data_time_mapping[neighbor_key] = expected_data_time visited_multi_asset_keys.add(neighbor_key) @@ -345,11 +329,11 @@ def get_auto_materialize_asset_evaluations( for ap in evaluation.true_subset.asset_partitions } - return (list(legacy_evaluation_results_by_key.values()), asset_cursors, to_request) + return (list(evaluation_results_by_key.values()), asset_cursors, to_request) def evaluate( self, - ) -> Tuple[Sequence[RunRequest], AssetDaemonCursor, Sequence[AutoMaterializeAssetEvaluation]]: + ) -> Tuple[Sequence[RunRequest], AssetDaemonCursor, Sequence[AssetConditionEvaluation]]: observe_request_timestamp = pendulum.now().timestamp() auto_observe_run_requests = ( get_auto_observe_run_requests( @@ -363,7 +347,7 @@ def evaluate( else [] ) - evaluations, asset_cursors, to_request = self.get_auto_materialize_asset_evaluations() + evaluations, asset_cursors, to_request = self.get_asset_condition_evaluations() run_requests = [ *build_run_requests( @@ -394,8 +378,7 @@ def evaluate( evaluation for evaluation in evaluations if not evaluation.equivalent_to_stored_evaluation( - self.cursor.latest_evaluation_by_asset_key.get(evaluation.asset_key), - self.asset_graph, + self.cursor.latest_evaluation_by_asset_key.get(evaluation.asset_key) ) ], ) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py index e145d2016cb75..75a2ed1d73ea0 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py @@ -12,9 +12,6 @@ ) import dagster._check as check -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, -) from dagster._core.definitions.events import AssetKey from dagster._core.definitions.time_window_partitions import ( TimeWindowPartitionsDefinition, @@ -66,7 +63,7 @@ class AssetDaemonCursor(NamedTuple): handled_root_partitions_by_asset_key: Mapping[AssetKey, PartitionsSubset] evaluation_id: int last_observe_request_timestamp_by_asset_key: Mapping[AssetKey, float] - latest_evaluation_by_asset_key: Mapping[AssetKey, AutoMaterializeAssetEvaluation] + latest_evaluation_by_asset_key: Mapping[AssetKey, "AssetConditionEvaluation"] latest_evaluation_timestamp: Optional[float] def was_previously_handled(self, asset_key: AssetKey) -> bool: @@ -75,8 +72,6 @@ def was_previously_handled(self, asset_key: AssetKey) -> bool: def asset_cursor_for_key( self, asset_key: AssetKey, asset_graph: AssetGraph ) -> AssetDaemonAssetCursor: - from .asset_condition import AssetConditionEvaluation - partitions_def = asset_graph.get_partitions_def(asset_key) handled_partitions_subset = self.handled_root_partitions_by_asset_key.get(asset_key) if handled_partitions_subset is not None: @@ -85,18 +80,11 @@ def asset_cursor_for_key( handled_subset = AssetSubset(asset_key=asset_key, value=True) else: handled_subset = AssetSubset.empty(asset_key, partitions_def) - condition = check.not_none( - asset_graph.get_auto_materialize_policy(asset_key) - ).to_asset_condition() return AssetDaemonAssetCursor( asset_key=asset_key, latest_storage_id=self.latest_storage_id, latest_evaluation_timestamp=self.latest_evaluation_timestamp, - latest_evaluation=AssetConditionEvaluation.from_evaluation( - condition=condition, - evaluation=self.latest_evaluation_by_asset_key.get(asset_key), - asset_graph=asset_graph, - ), + latest_evaluation=self.latest_evaluation_by_asset_key.get(asset_key), materialized_requested_or_discarded_subset=handled_subset, ) @@ -106,7 +94,7 @@ def with_updates( evaluation_id: int, newly_observe_requested_asset_keys: Sequence[AssetKey], observe_request_timestamp: float, - evaluations: Sequence[AutoMaterializeAssetEvaluation], + evaluations: Sequence["AssetConditionEvaluation"], evaluation_time: datetime.datetime, asset_cursors: Sequence[AssetDaemonAssetCursor], ) -> "AssetDaemonCursor": @@ -128,10 +116,7 @@ def with_updates( ) latest_evaluation_by_asset_key = { - evaluation.asset_key: evaluation - for evaluation in evaluations - # don't bother storing empty evaluations on the cursor - if not evaluation.is_empty + evaluation.asset_key: evaluation for evaluation in evaluations } return AssetDaemonCursor( @@ -167,6 +152,8 @@ def empty(cls) -> "AssetDaemonCursor": @classmethod def from_serialized(cls, cursor: str, asset_graph: AssetGraph) -> "AssetDaemonCursor": + from .asset_condition import AssetConditionEvaluationWithRunIds + data = json.loads(cursor) if isinstance(data, list): # backcompat @@ -232,9 +219,11 @@ def from_serialized(cls, cursor: str, asset_graph: AssetGraph) -> "AssetDaemonCu latest_evaluation_by_asset_key = {} for key_str, serialized_evaluation in serialized_latest_evaluation_by_asset_key.items(): key = AssetKey.from_user_string(key_str) - evaluation = check.inst( - deserialize_value(serialized_evaluation), AutoMaterializeAssetEvaluation - ) + deserialized_evaluation = deserialize_value(serialized_evaluation) + if isinstance(deserialized_evaluation, AssetConditionEvaluationWithRunIds): + evaluation = deserialized_evaluation.evaluation + else: + evaluation = deserialized_evaluation latest_evaluation_by_asset_key[key] = evaluation return cls( diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py index dfd0d16ce75ea..65134b80c3fe3 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py @@ -90,7 +90,7 @@ def add_evaluation_data_from_previous_tick( ignore_subset: An AssetSubset which represents information that we should *not* carry forward from the previous tick. """ - from .asset_condition import AssetSubsetWithMetdata + from .asset_condition import AssetSubsetWithMetadata mapping = defaultdict(lambda: context.empty_subset()) for evaluation_data, asset_partitions in asset_partitions_by_evaluation_data.items(): @@ -119,7 +119,7 @@ def add_evaluation_data_from_previous_tick( return ( true_subset, [ - AssetSubsetWithMetdata(subset, dict(metadata)) + AssetSubsetWithMetadata(subset, dict(metadata)) for metadata, subset in mapping.items() ], ) diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index 726d35dd25d8e..0bf9e7ae5231c 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -1,5 +1,8 @@ +import operator from abc import ABC, abstractproperty +from collections import defaultdict from enum import Enum +from functools import reduce from typing import ( TYPE_CHECKING, AbstractSet, @@ -8,14 +11,13 @@ NamedTuple, Optional, Sequence, - Set, Tuple, cast, ) import dagster._check as check from dagster._core.definitions.asset_subset import AssetSubset -from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey +from dagster._core.definitions.events import AssetKey from dagster._core.definitions.metadata import MetadataMapping, MetadataValue from dagster._serdes.serdes import ( NamedTupleSerializer, @@ -25,12 +27,16 @@ whitelist_for_serdes, ) -from .asset_graph import AssetGraph -from .partition import SerializedPartitionsSubset +from .partition import DefaultPartitionsSubset, SerializedPartitionsSubset if TYPE_CHECKING: - from dagster._core.definitions.asset_condition import AssetSubsetWithMetdata - from dagster._core.instance import DynamicPartitionsStore + from dagster._core.definitions.asset_condition import AssetSubsetWithMetadata + + from .asset_condition import ( + AssetConditionEvaluation, + AssetConditionEvaluationWithRunIds, + AssetConditionSnapshot, + ) @whitelist_for_serdes @@ -63,42 +69,6 @@ class AutoMaterializeRuleEvaluationData(ABC): def metadata(self) -> MetadataMapping: raise NotImplementedError() - @staticmethod - def from_metadata(metadata: MetadataMapping) -> Optional["AutoMaterializeRuleEvaluationData"]: - """Temporary workaround to convert the generic metadata mapping into the old format.""" - if not metadata: - return None - elif "text" in metadata: - text_value = cast(str, metadata["text"].value) - return TextRuleEvaluationData(text_value) - - waiting_on_ancestors = frozenset( - { - cast(AssetKey, v.value) - for k, v in metadata.items() - if k.startswith("waiting_on_ancestor") - } - ) - if waiting_on_ancestors: - return WaitingOnAssetsRuleEvaluationData(waiting_on_asset_keys=waiting_on_ancestors) - - updated_parents = frozenset( - {cast(AssetKey, v.value) for k, v in metadata.items() if k.startswith("updated_parent")} - ) - will_update_parents = frozenset( - { - cast(AssetKey, v.value) - for k, v in metadata.items() - if k.startswith("will_update_parent") - } - ) - if updated_parents or will_update_parents: - return ParentUpdatedRuleEvaluationData( - updated_asset_keys=updated_parents, will_update_asset_keys=will_update_parents - ) - - return None - @whitelist_for_serdes class TextRuleEvaluationData( @@ -126,11 +96,11 @@ def metadata(self) -> MetadataMapping: return { **{ f"updated_parent_{i+1}": MetadataValue.asset(k) - for i, k in enumerate(self.updated_asset_keys) + for i, k in enumerate(sorted(self.updated_asset_keys)) }, **{ f"will_update_parent_{i+1}": MetadataValue.asset(k) - for i, k in enumerate(self.will_update_asset_keys) + for i, k in enumerate(sorted(self.will_update_asset_keys)) }, } @@ -148,12 +118,12 @@ def metadata(self) -> MetadataMapping: return { **{ f"waiting_on_ancestor_{i+1}": MetadataValue.asset(k) - for i, k in enumerate(self.waiting_on_asset_keys) + for i, k in enumerate(sorted(self.waiting_on_asset_keys)) }, } -RuleEvaluationResults = Tuple[AssetSubset, Sequence["AssetSubsetWithMetdata"]] +RuleEvaluationResults = Tuple[AssetSubset, Sequence["AssetSubsetWithMetadata"]] @whitelist_for_serdes @@ -162,212 +132,221 @@ class AutoMaterializeRuleEvaluation(NamedTuple): evaluation_data: Optional[AutoMaterializeRuleEvaluationData] -@whitelist_for_serdes -class AutoMaterializeAssetEvaluation(NamedTuple): - """Represents the results of the auto-materialize logic for a single asset. - - Properties: - asset_key (AssetKey): The asset key that was evaluated. - partition_subsets_by_condition: The rule evaluations that impact if the asset should be - materialized, skipped, or discarded. If the asset is partitioned, this will be a list of - tuples, where the first element is the condition and the second element is the - serialized subset of partitions that the condition applies to. If it's not partitioned, - the second element will be None. - num_requested (int): The number of asset partitions that were requested to be materialized - num_skipped (int): The number of asset partitions that were skipped - num_discarded (int): The number of asset partitions that were discarded - run_ids (Set[str]): The set of run IDs created for this evaluation - rule_snapshots (Optional[Sequence[AutoMaterializeRuleSnapshot]]): The snapshots of the - rules on the policy at the time it was evaluated. +# BACKCOMPAT GRAVEYARD + + +class BackcompatAutoMaterializeAssetEvaluationSerializer(NamedTupleSerializer): + """This handles backcompat for the old AutoMaterializeAssetEvaluation objects, turning them into + AssetConditionEvaluationWithRunIds objects. """ - asset_key: AssetKey - partition_subsets_by_condition: Sequence[ - Tuple["AutoMaterializeRuleEvaluation", Optional[SerializedPartitionsSubset]] - ] - num_requested: int - num_skipped: int - num_discarded: int - run_ids: Set[str] = set() - rule_snapshots: Optional[Sequence[AutoMaterializeRuleSnapshot]] = None + def _asset_condition_snapshot_from_rule_snapshot( + self, rule_snapshot: AutoMaterializeRuleSnapshot + ) -> "AssetConditionSnapshot": + from .asset_condition import AssetConditionSnapshot, RuleCondition - @property - def is_empty(self) -> bool: - return ( - sum([self.num_requested, self.num_skipped, self.num_discarded]) == 0 - and len(self.partition_subsets_by_condition) == 0 + return AssetConditionSnapshot( + class_name=RuleCondition.__name__, + description=rule_snapshot.description, + child_hashes=[], ) - @staticmethod - def from_rule_evaluation_results( - asset_graph: AssetGraph, + def _get_child_rule_evaluation( + self, asset_key: AssetKey, - asset_partitions_by_rule_evaluation: Sequence[ - Tuple[AutoMaterializeRuleEvaluation, AbstractSet[AssetKeyPartitionKey]] + partition_subsets_by_condition: Sequence[ + Tuple["AutoMaterializeRuleEvaluation", Optional[SerializedPartitionsSubset]] ], - num_requested: int, - num_skipped: int, - num_discarded: int, - dynamic_partitions_store: "DynamicPartitionsStore", - ) -> "AutoMaterializeAssetEvaluation": - auto_materialize_policy = asset_graph.auto_materialize_policies_by_key.get(asset_key) - - if not auto_materialize_policy: - check.failed(f"Expected auto materialize policy on asset {asset_key}") - - partitions_def = asset_graph.get_partitions_def(asset_key) - if partitions_def is None: - return AutoMaterializeAssetEvaluation( - asset_key=asset_key, - partition_subsets_by_condition=[ - (rule_evaluation, None) - for rule_evaluation, _ in asset_partitions_by_rule_evaluation - ], - num_requested=num_requested, - num_skipped=num_skipped, - num_discarded=num_discarded, - rule_snapshots=auto_materialize_policy.rule_snapshots, - ) + is_partitioned: bool, + rule_snapshot: AutoMaterializeRuleSnapshot, + ) -> "AssetConditionEvaluation": + from .asset_condition import ( + AssetConditionEvaluation, + AssetConditionSnapshot, + AssetSubsetWithMetadata, + RuleCondition, + ) + + condition_snapshot = AssetConditionSnapshot( + class_name=RuleCondition.__name__, + description=rule_snapshot.description, + child_hashes=[], + ) + + if is_partitioned: + # for partitioned assets, we can't deserialize SerializedPartitionsSubset into an + # AssetSubset, so we just return a dummy empty default partition subset + value = DefaultPartitionsSubset(set()) else: - return AutoMaterializeAssetEvaluation( - asset_key=asset_key, - partition_subsets_by_condition=[ - ( - rule_evaluation, - SerializedPartitionsSubset.from_subset( - subset=partitions_def.empty_subset().with_partition_keys( - check.not_none(ap.partition_key) for ap in asset_partitions - ), - partitions_def=partitions_def, - dynamic_partitions_store=dynamic_partitions_store, - ), - ) - for rule_evaluation, asset_partitions in asset_partitions_by_rule_evaluation - ], - num_requested=num_requested, - num_skipped=num_skipped, - num_discarded=num_discarded, - rule_snapshots=auto_materialize_policy.rule_snapshots, - ) + value = len(partition_subsets_by_condition) > 0 + + true_subset = AssetSubset(asset_key, value) + + return AssetConditionEvaluation( + condition_snapshot=condition_snapshot, + true_subset=true_subset, + candidate_subset=None, + subsets_with_metadata=[] + if is_partitioned + else [ + AssetSubsetWithMetadata( + subset=true_subset, metadata=rule_evaluation.evaluation_data.metadata + ) + for rule_evaluation, _ in partition_subsets_by_condition + if rule_evaluation.evaluation_data + ], + ) - def _deserialize_rule_evaluation_result( + def _get_child_decision_type_evaluation( self, - rule_evaluation: AutoMaterializeRuleEvaluation, - serialized_subset: Optional[SerializedPartitionsSubset], - asset_graph: AssetGraph, - ) -> Optional[Tuple[Optional[AutoMaterializeRuleEvaluationData], AssetSubset]]: - partitions_def = asset_graph.get_partitions_def(self.asset_key) - if serialized_subset is None: - if partitions_def is None: - return (rule_evaluation.evaluation_data, AssetSubset(self.asset_key, True)) - elif serialized_subset.can_deserialize(partitions_def) and partitions_def is not None: - return ( - rule_evaluation.evaluation_data, - AssetSubset(self.asset_key, serialized_subset.deserialize(partitions_def)), - ) - # old serialized result is no longer valid - return None + asset_key: AssetKey, + partition_subsets_by_condition: Sequence[ + Tuple["AutoMaterializeRuleEvaluation", Optional[SerializedPartitionsSubset]] + ], + rule_snapshots: Sequence[AutoMaterializeRuleSnapshot], + is_partitioned: bool, + decision_type: AutoMaterializeDecisionType, + ) -> Optional["AssetConditionEvaluation"]: + from .asset_condition import ( + AssetConditionEvaluation, + AssetConditionSnapshot, + NotAssetCondition, + OrAssetCondition, + ) - def get_rule_evaluation_results( - self, rule_snapshot: AutoMaterializeRuleSnapshot, asset_graph: AssetGraph - ) -> RuleEvaluationResults: - """For a given rule snapshot, returns the calculated evaluations for that rule.""" - from dagster._core.definitions.asset_condition import AssetSubsetWithMetdata + partition_subsets_by_condition_by_rule_snapshot = defaultdict(list) + for elt in partition_subsets_by_condition: + partition_subsets_by_condition_by_rule_snapshot[elt[0].rule_snapshot].append(elt) - true_subset = AssetSubset.empty( - self.asset_key, asset_graph.get_partitions_def(self.asset_key) - ) - subsets_with_metadata = [] - for rule_evaluation, serialized_subset in self.partition_subsets_by_condition: - # filter for the same rule - if rule_evaluation.rule_snapshot != rule_snapshot: - continue - deserialized_result = self._deserialize_rule_evaluation_result( - rule_evaluation, serialized_subset, asset_graph + child_evaluations = [ + self._get_child_rule_evaluation( + asset_key, + partition_subsets_by_condition_by_rule_snapshot[rule_snapshot], + is_partitioned, + rule_snapshot, + ) + for rule_snapshot in rule_snapshots + if rule_snapshot.decision_type == decision_type + ] + + if decision_type == AutoMaterializeDecisionType.DISCARD: + # for the discard type, we don't have an OrAssetCondition + if len(child_evaluations) != 1: + return None + evaluation = child_evaluations[0] + else: + decision_type_snapshot = AssetConditionSnapshot( + class_name=OrAssetCondition.__name__, + description="", + child_hashes=[ + child_eval.condition_snapshot.hash for child_eval in child_evaluations + ], + ) + initial = ( + AssetSubset(asset_key, DefaultPartitionsSubset(set())) + if is_partitioned + else AssetSubset.empty(asset_key, None) + ) + evaluation = AssetConditionEvaluation( + condition_snapshot=decision_type_snapshot, + true_subset=reduce( + operator.or_, (e.true_subset for e in child_evaluations), initial + ), + candidate_subset=None, + subsets_with_metadata=[], + child_evaluations=child_evaluations, ) - if deserialized_result: - evaluation_data, subset = deserialized_result - metadata = evaluation_data.metadata if evaluation_data else {} - true_subset |= subset - subsets_with_metadata.append( - AssetSubsetWithMetdata(subset=subset, metadata=metadata) - ) + if decision_type == AutoMaterializeDecisionType.MATERIALIZE: + return evaluation + + # non-materialize conditions are inverted + return AssetConditionEvaluation( + condition_snapshot=AssetConditionSnapshot( + class_name=NotAssetCondition.__name__, + description="", + child_hashes=[evaluation.condition_snapshot.hash], + ), + # for partitioned assets, we don't bother calculating the true subset, as we can't + # properly deserialize the inner results + true_subset=evaluation.true_subset + if evaluation.true_subset.is_partitioned + else evaluation.true_subset._replace(value=not evaluation.true_subset.bool_value), + candidate_subset=None, + subsets_with_metadata=[], + child_evaluations=[evaluation], + ) - return true_subset, subsets_with_metadata - - def _get_subset_with_decision_type( - self, *, decision_type: AutoMaterializeDecisionType, asset_graph: AssetGraph - ) -> AssetSubset: - """Returns the set of asset partitions with a given decision type applied to them.""" - subset = AssetSubset.empty(self.asset_key, asset_graph.get_partitions_def(self.asset_key)) - for rule_evaluation, serialized_subset in self.partition_subsets_by_condition: - if rule_evaluation.rule_snapshot.decision_type != decision_type: - continue - deserialized_result = self._deserialize_rule_evaluation_result( - rule_evaluation, serialized_subset, asset_graph - ) - if deserialized_result is None: - continue - subset |= deserialized_result[1] - return subset - - def get_discarded_subset(self, asset_graph: AssetGraph) -> AssetSubset: - """Returns the set of asset partitions which were either requested or discarded on this - evaluation. - """ - return self._get_subset_with_decision_type( - decision_type=AutoMaterializeDecisionType.DISCARD, asset_graph=asset_graph + def unpack( + self, + unpacked_dict: Dict[str, UnpackedValue], + whitelist_map: WhitelistMap, + context: UnpackContext, + ) -> "AssetConditionEvaluationWithRunIds": + from .asset_condition import ( + AndAssetCondition, + AssetConditionEvaluation, + AssetConditionSnapshot, ) - def get_evaluated_subset(self, asset_graph: AssetGraph) -> AssetSubset: - """Returns the set of asset partitions which were evaluated by any rule on this evaluation.""" - # no asset partition can be evaluated by SKIP or DISCARD rules without having at least one - # materialize rule evaluation - return self._get_subset_with_decision_type( - decision_type=AutoMaterializeDecisionType.MATERIALIZE, asset_graph=asset_graph + asset_key = cast(AssetKey, unpacked_dict.get("asset_key")) + partition_subsets_by_condition = cast( + Sequence[Tuple[AutoMaterializeRuleEvaluation, Optional[SerializedPartitionsSubset]]], + unpacked_dict.get("partition_subsets_by_condition"), + ) + rule_snapshots = ( + cast(Sequence[AutoMaterializeRuleSnapshot], unpacked_dict.get("rule_snapshots", [])) + or [] + ) + is_partitioned = any(tup[1] is not None for tup in partition_subsets_by_condition) + + # get the sub-evaluations for each decision type + materialize_evaluation = self._get_child_decision_type_evaluation( + asset_key, + partition_subsets_by_condition, + rule_snapshots, + is_partitioned, + AutoMaterializeDecisionType.MATERIALIZE, + ) + not_skip_evaluation = self._get_child_decision_type_evaluation( + asset_key, + partition_subsets_by_condition, + rule_snapshots, + is_partitioned, + AutoMaterializeDecisionType.SKIP, + ) + not_discard_evaluation = self._get_child_decision_type_evaluation( + asset_key, + partition_subsets_by_condition, + rule_snapshots, + is_partitioned, + AutoMaterializeDecisionType.DISCARD, ) - def get_requested_subset(self, asset_graph: AssetGraph) -> AssetSubset: - """Returns the set of asset partitions which were requested on this evaluation.""" - return ( - self._get_subset_with_decision_type( - decision_type=AutoMaterializeDecisionType.MATERIALIZE, asset_graph=asset_graph - ) - - self._get_subset_with_decision_type( - decision_type=AutoMaterializeDecisionType.SKIP, asset_graph=asset_graph - ) - - self._get_subset_with_decision_type( - decision_type=AutoMaterializeDecisionType.DISCARD, asset_graph=asset_graph - ) + # filter out any None evaluations (should realistically only happen for discard) + child_evaluations = list( + filter(None, [materialize_evaluation, not_skip_evaluation, not_discard_evaluation]) ) - def equivalent_to_stored_evaluation( - self, stored_evaluation: Optional["AutoMaterializeAssetEvaluation"], asset_graph: AssetGraph - ) -> bool: - """This function returns if a stored record is equivalent to this one. To do so, we can't - just use regular namedtuple equality, as the serialized partition subsets will be - potentially have different string values. - """ - if stored_evaluation is None: - # empty evaluations are not stored on the cursor - return self.is_empty - return ( - self.asset_key == stored_evaluation.asset_key - and set(self.rule_snapshots or []) == set(stored_evaluation.rule_snapshots or []) - # if num_requested / num_discarded > 0 on the stored evaluation, then something changed - # in the global state on the previous tick - and stored_evaluation.num_requested == 0 - and stored_evaluation.num_discarded == 0 - and stored_evaluation.num_skipped == self.num_skipped - # when rule evaluation results are deserialized from json, they are lists instead of - # tuples, so we must convert them before comparing - and sorted(self.partition_subsets_by_condition) - == sorted([tuple(x) for x in stored_evaluation.partition_subsets_by_condition]) + # the top level condition is the AND of all the sub-conditions + condition_snapshot = AssetConditionSnapshot( + class_name=AndAssetCondition.__name__, + description="", + child_hashes=[evaluation.condition_snapshot.hash for evaluation in child_evaluations], ) + return AssetConditionEvaluation( + condition_snapshot=condition_snapshot, + true_subset=reduce(operator.and_, (e.true_subset for e in child_evaluations)), + candidate_subset=None, + subsets_with_metadata=[], + child_evaluations=child_evaluations, + ).with_run_ids(cast(AbstractSet[str], unpacked_dict.get("run_ids", set()))) -# BACKCOMPAT GRAVEYARD + +@whitelist_for_serdes(serializer=BackcompatAutoMaterializeAssetEvaluationSerializer) +class AutoMaterializeAssetEvaluation(NamedTuple): + ... class BackcompatAutoMaterializeConditionSerializer(NamedTupleSerializer): diff --git a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py index e3468e1c4e4a2..b394dd208feab 100644 --- a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py +++ b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py @@ -12,7 +12,6 @@ import pendulum -from dagster._core.definitions.asset_condition import AssetSubsetWithMetdata from dagster._core.definitions.asset_subset import AssetSubset from dagster._core.definitions.events import AssetKeyPartitionKey from dagster._core.definitions.freshness_policy import FreshnessPolicy @@ -161,6 +160,8 @@ def freshness_evaluation_results_for_asset_key( Attempts to minimize the total number of asset executions. """ + from .asset_condition import AssetSubsetWithMetadata + asset_key = context.asset_key current_time = context.evaluation_time @@ -220,7 +221,7 @@ def freshness_evaluation_results_for_asset_key( ): all_subset = AssetSubset.all(asset_key, None) return AssetSubset.all(asset_key, None), [ - AssetSubsetWithMetdata(all_subset, evaluation_data.metadata) + AssetSubsetWithMetadata(all_subset, evaluation_data.metadata) ] else: return context.empty_subset(), [] diff --git a/python_modules/dagster/dagster/_core/scheduler/instigation.py b/python_modules/dagster/dagster/_core/scheduler/instigation.py index c881b2b64816a..9bc0bf8b90e2d 100644 --- a/python_modules/dagster/dagster/_core/scheduler/instigation.py +++ b/python_modules/dagster/dagster/_core/scheduler/instigation.py @@ -6,8 +6,9 @@ import dagster._check as check from dagster._core.definitions import RunRequest -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, +from dagster._core.definitions.asset_condition import ( + AssetConditionEvaluation, + AssetConditionEvaluationWithRunIds, ) from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey @@ -725,19 +726,27 @@ def _validate_tick_args( class AutoMaterializeAssetEvaluationRecord(NamedTuple): id: int - evaluation: AutoMaterializeAssetEvaluation + evaluation_with_run_ids: AssetConditionEvaluationWithRunIds evaluation_id: int timestamp: float asset_key: AssetKey @classmethod - def from_db_row(cls, row): + def from_db_row(cls, row) -> "AutoMaterializeAssetEvaluationRecord": return cls( id=row["id"], - evaluation=deserialize_value( - row["asset_evaluation_body"], AutoMaterializeAssetEvaluation + evaluation_with_run_ids=deserialize_value( + row["asset_evaluation_body"], AssetConditionEvaluationWithRunIds ), evaluation_id=row["evaluation_id"], timestamp=datetime_as_float(row["create_timestamp"]), - asset_key=AssetKey.from_db_string(row["asset_key"]), + asset_key=check.not_none(AssetKey.from_db_string(row["asset_key"])), ) + + @property + def run_ids(self) -> AbstractSet[str]: + return self.evaluation_with_run_ids.run_ids + + @property + def evaluation(self) -> AssetConditionEvaluation: + return self.evaluation_with_run_ids.evaluation diff --git a/python_modules/dagster/dagster/_core/storage/legacy_storage.py b/python_modules/dagster/dagster/_core/storage/legacy_storage.py index 15cde482a4073..77baac9ae8f50 100644 --- a/python_modules/dagster/dagster/_core/storage/legacy_storage.py +++ b/python_modules/dagster/dagster/_core/storage/legacy_storage.py @@ -13,9 +13,7 @@ _check as check, ) from dagster._config.config_schema import UserConfigSchema -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, -) +from dagster._core.definitions.asset_condition import AssetConditionEvaluationWithRunIds from dagster._core.definitions.events import AssetKey from dagster._core.event_api import EventHandlerFn from dagster._core.storage.asset_check_execution_record import ( @@ -788,7 +786,7 @@ def purge_ticks( def add_auto_materialize_asset_evaluations( self, evaluation_id: int, - asset_evaluations: Sequence[AutoMaterializeAssetEvaluation], + asset_evaluations: Sequence[AssetConditionEvaluationWithRunIds], ) -> None: return self._storage.schedule_storage.add_auto_materialize_asset_evaluations( evaluation_id, asset_evaluations diff --git a/python_modules/dagster/dagster/_core/storage/schedules/base.py b/python_modules/dagster/dagster/_core/storage/schedules/base.py index 34df57a6614bb..8ce1d41601b82 100644 --- a/python_modules/dagster/dagster/_core/storage/schedules/base.py +++ b/python_modules/dagster/dagster/_core/storage/schedules/base.py @@ -2,8 +2,8 @@ from typing import Mapping, Optional, Sequence, Set from dagster import AssetKey -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, +from dagster._core.definitions.asset_condition import ( + AssetConditionEvaluationWithRunIds, ) from dagster._core.definitions.run_request import InstigatorType from dagster._core.instance import MayHaveInstanceWeakref, T_DagsterInstance @@ -156,9 +156,7 @@ def supports_auto_materialize_asset_evaluations(self) -> bool: @abc.abstractmethod def add_auto_materialize_asset_evaluations( - self, - evaluation_id: int, - asset_evaluations: Sequence[AutoMaterializeAssetEvaluation], + self, evaluation_id: int, asset_evaluations: Sequence[AssetConditionEvaluationWithRunIds] ) -> None: """Add asset policy evaluations to storage.""" diff --git a/python_modules/dagster/dagster/_core/storage/schedules/sql_schedule_storage.py b/python_modules/dagster/dagster/_core/storage/schedules/sql_schedule_storage.py index 5e237c53bfc7f..a28f34f058976 100644 --- a/python_modules/dagster/dagster/_core/storage/schedules/sql_schedule_storage.py +++ b/python_modules/dagster/dagster/_core/storage/schedules/sql_schedule_storage.py @@ -20,9 +20,7 @@ from sqlalchemy.engine import Connection import dagster._check as check -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, -) +from dagster._core.definitions.asset_condition import AssetConditionEvaluationWithRunIds from dagster._core.definitions.events import AssetKey from dagster._core.definitions.run_request import InstigatorType from dagster._core.errors import DagsterInvariantViolationError @@ -485,7 +483,7 @@ def supports_auto_materialize_asset_evaluations(self) -> bool: def add_auto_materialize_asset_evaluations( self, evaluation_id: int, - asset_evaluations: Sequence[AutoMaterializeAssetEvaluation], + asset_evaluations: Sequence[AssetConditionEvaluationWithRunIds], ): if not asset_evaluations: return @@ -499,8 +497,6 @@ def add_auto_materialize_asset_evaluations( "asset_key": evaluation.asset_key.to_string(), "asset_evaluation_body": serialize_value(evaluation), "num_requested": evaluation.num_requested, - "num_skipped": evaluation.num_skipped, - "num_discarded": evaluation.num_discarded, } ] ) @@ -519,8 +515,6 @@ def add_auto_materialize_asset_evaluations( .values( asset_evaluation_body=serialize_value(evaluation), num_requested=evaluation.num_requested, - num_skipped=evaluation.num_skipped, - num_discarded=evaluation.num_discarded, ) ) diff --git a/python_modules/dagster/dagster/_daemon/asset_daemon.py b/python_modules/dagster/dagster/_daemon/asset_daemon.py index 47d4e106a8b10..96ce5b0dad3cd 100644 --- a/python_modules/dagster/dagster/_daemon/asset_daemon.py +++ b/python_modules/dagster/dagster/_daemon/asset_daemon.py @@ -655,7 +655,7 @@ def _process_auto_materialize_tick_generator( ) ) evaluations_by_asset_key = { - evaluation_record.asset_key: evaluation_record.evaluation + evaluation_record.asset_key: evaluation_record.evaluation_with_run_ids for evaluation_record in evaluation_records } else: @@ -684,13 +684,15 @@ def _process_auto_materialize_tick_generator( check_for_debug_crash(debug_crash_flags, "EVALUATIONS_FINISHED") evaluations_by_asset_key = { - evaluation.asset_key: evaluation for evaluation in evaluations + evaluation.asset_key: evaluation.with_run_ids(set()) + for evaluation in evaluations } # Write the asset evaluations without run IDs first if schedule_storage.supports_auto_materialize_asset_evaluations: schedule_storage.add_auto_materialize_asset_evaluations( - evaluation_id, list(evaluations_by_asset_key.values()) + evaluation_id, + list(evaluations_by_asset_key.values()), ) check_for_debug_crash(debug_crash_flags, "ASSET_EVALUATIONS_ADDED") diff --git a/python_modules/dagster/dagster/_utils/test/schedule_storage.py b/python_modules/dagster/dagster/_utils/test/schedule_storage.py index 22d0dd9466cbf..31d1318990498 100644 --- a/python_modules/dagster/dagster/_utils/test/schedule_storage.py +++ b/python_modules/dagster/dagster/_utils/test/schedule_storage.py @@ -5,13 +5,14 @@ import pytest from dagster import StaticPartitionsDefinition -from dagster._core.definitions.auto_materialize_rule import AutoMaterializeRule -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, - AutoMaterializeRuleEvaluation, +from dagster._core.definitions.asset_condition import ( + AssetConditionEvaluation, + AssetConditionSnapshot, + AssetSubsetWithMetadata, ) +from dagster._core.definitions.asset_subset import AssetSubset from dagster._core.definitions.events import AssetKey -from dagster._core.definitions.partition import SerializedPartitionsSubset +from dagster._core.definitions.metadata import MetadataValue from dagster._core.host_representation import ( ExternalRepositoryOrigin, ManagedGrpcPythonEnvCodeLocationOrigin, @@ -726,36 +727,32 @@ def test_ticks_batched(self, storage): assert ticks_by_origin["sensor_one"][0].tick_id == b.tick_id assert ticks_by_origin["sensor_two"][0].tick_id == d.tick_id - def test_auto_materialize_asset_evaluations(self, storage): + def test_auto_materialize_asset_evaluations(self, storage) -> None: if not self.can_store_auto_materialize_asset_evaluations(): pytest.skip("Storage cannot store auto materialize asset evaluations") + condition_snapshot = AssetConditionSnapshot("foo", "bar", []) + for _ in range(2): # test idempotency storage.add_auto_materialize_asset_evaluations( evaluation_id=10, asset_evaluations=[ - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_one"), - partition_subsets_by_condition=[], - num_requested=0, - num_skipped=0, - num_discarded=0, - ), - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_two"), - partition_subsets_by_condition=[ - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.materialize_on_missing().to_snapshot(), - evaluation_data=None, - ), - None, + AssetConditionEvaluation( + condition_snapshot=condition_snapshot, + true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=False), + candidate_subset=None, + ).with_run_ids(set()), + AssetConditionEvaluation( + condition_snapshot=condition_snapshot, + true_subset=AssetSubset(asset_key=AssetKey("asset_two"), value=True), + candidate_subset=None, + subsets_with_metadata=[ + AssetSubsetWithMetadata( + AssetSubset(asset_key=AssetKey("asset_two"), value=True), + {"foo": MetadataValue.text("bar")}, ) ], - num_requested=1, - num_skipped=0, - num_discarded=0, - ), + ).with_run_ids(set()), ], ) @@ -765,7 +762,7 @@ def test_auto_materialize_asset_evaluations(self, storage): assert len(res) == 1 assert res[0].evaluation.asset_key == AssetKey("asset_one") assert res[0].evaluation_id == 10 - assert res[0].evaluation.num_requested == 0 + assert res[0].evaluation.true_subset.size == 0 res = storage.get_auto_materialize_asset_evaluations( asset_key=AssetKey("asset_two"), limit=100 @@ -773,29 +770,27 @@ def test_auto_materialize_asset_evaluations(self, storage): assert len(res) == 1 assert res[0].evaluation.asset_key == AssetKey("asset_two") assert res[0].evaluation_id == 10 - assert res[0].evaluation.num_requested == 1 + assert res[0].evaluation.true_subset.size == 1 res = storage.get_auto_materialize_evaluations_for_evaluation_id(evaluation_id=10) assert len(res) == 2 assert res[0].evaluation.asset_key == AssetKey("asset_one") assert res[0].evaluation_id == 10 - assert res[0].evaluation.num_requested == 0 + assert res[0].evaluation.true_subset.size == 0 assert res[1].evaluation.asset_key == AssetKey("asset_two") assert res[1].evaluation_id == 10 - assert res[1].evaluation.num_requested == 1 + assert res[1].evaluation.true_subset.size == 1 storage.add_auto_materialize_asset_evaluations( evaluation_id=11, asset_evaluations=[ - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_one"), - partition_subsets_by_condition=[], - num_requested=0, - num_skipped=0, - num_discarded=0, - ), + AssetConditionEvaluation( + condition_snapshot=condition_snapshot, + true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), + candidate_subset=None, + ).with_run_ids(set()), ], ) @@ -820,21 +815,17 @@ def test_auto_materialize_asset_evaluations(self, storage): # add a mix of keys - one that already is using the unique index and one that is not - eval_one = AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_one"), - partition_subsets_by_condition=[], - num_requested=1, - num_skipped=2, - num_discarded=3, - ) + eval_one = AssetConditionEvaluation( + condition_snapshot=AssetConditionSnapshot("foo", "bar", []), + true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), + candidate_subset=None, + ).with_run_ids(set()) - eval_asset_three = AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_three"), - partition_subsets_by_condition=[], - num_requested=1, - num_skipped=2, - num_discarded=3, - ) + eval_asset_three = AssetConditionEvaluation( + condition_snapshot=AssetConditionSnapshot("foo", "bar", []), + true_subset=AssetSubset(asset_key=AssetKey("asset_three"), value=True), + candidate_subset=None, + ).with_run_ids(set()) storage.add_auto_materialize_asset_evaluations( evaluation_id=11, @@ -849,7 +840,7 @@ def test_auto_materialize_asset_evaluations(self, storage): ) assert len(res) == 2 assert res[0].evaluation_id == 11 - assert res[0].evaluation == eval_one + assert res[0].evaluation == eval_one.evaluation res = storage.get_auto_materialize_asset_evaluations( asset_key=AssetKey("asset_three"), limit=100 @@ -857,33 +848,29 @@ def test_auto_materialize_asset_evaluations(self, storage): assert len(res) == 1 assert res[0].evaluation_id == 11 - assert res[0].evaluation == eval_asset_three + assert res[0].evaluation == eval_asset_three.evaluation - def test_auto_materialize_asset_evaluations_with_partitions(self, storage): + def test_auto_materialize_asset_evaluations_with_partitions(self, storage) -> None: if not self.can_store_auto_materialize_asset_evaluations(): pytest.skip("Storage cannot store auto materialize asset evaluations") partitions_def = StaticPartitionsDefinition(["a", "b"]) subset = partitions_def.empty_subset().with_partition_keys(["a"]) + asset_subset = AssetSubset(asset_key=AssetKey("asset_two"), value=subset) + asset_subset_with_metadata = AssetSubsetWithMetadata( + asset_subset, + {"foo": MetadataValue.text("bar"), "baz": MetadataValue.asset(AssetKey("asset_one"))}, + ) storage.add_auto_materialize_asset_evaluations( evaluation_id=10, asset_evaluations=[ - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_two"), - partition_subsets_by_condition=[ - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.materialize_on_missing().to_snapshot(), - evaluation_data=None, - ), - SerializedPartitionsSubset.from_subset(subset, partitions_def, None), - ) - ], - num_requested=1, - num_skipped=0, - num_discarded=0, - ), + AssetConditionEvaluation( + condition_snapshot=AssetConditionSnapshot("foo", "bar", []), + true_subset=asset_subset, + candidate_subset=None, + subsets_with_metadata=[asset_subset_with_metadata], + ).with_run_ids(set()), ], ) @@ -893,38 +880,23 @@ def test_auto_materialize_asset_evaluations_with_partitions(self, storage): assert len(res) == 1 assert res[0].evaluation.asset_key == AssetKey("asset_two") assert res[0].evaluation_id == 10 - assert res[0].evaluation.num_requested == 1 + assert res[0].evaluation.true_subset.size == 1 - assert res[0].evaluation.partition_subsets_by_condition[0][ - 0 - ] == AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.materialize_on_missing().to_snapshot(), - evaluation_data=None, - ) - assert ( - res[0].evaluation.partition_subsets_by_condition[0][1].can_deserialize(partitions_def) - ) - assert ( - partitions_def.deserialize_subset( - res[0].evaluation.partition_subsets_by_condition[0][1].serialized_subset - ) - == subset - ) + assert res[0].evaluation.subsets_with_metadata[0] == asset_subset_with_metadata - def test_purge_asset_evaluations(self, storage): + def test_purge_asset_evaluations(self, storage) -> None: if not self.can_purge(): pytest.skip("Storage cannot purge") storage.add_auto_materialize_asset_evaluations( evaluation_id=11, asset_evaluations=[ - AutoMaterializeAssetEvaluation( - asset_key=AssetKey("asset_one"), - partition_subsets_by_condition=[], - num_requested=0, - num_skipped=0, - num_discarded=0, - ), + AssetConditionEvaluation( + condition_snapshot=AssetConditionSnapshot("foo", "bar", []), + true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), + candidate_subset=None, + subsets_with_metadata=[], + ).with_run_ids(set()), ], ) diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py index 706b0f64e273c..d8de5923609d1 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py @@ -1,5 +1,6 @@ import datetime import hashlib +import itertools import json import logging import os @@ -38,6 +39,10 @@ asset, materialize, ) +from dagster._core.definitions.asset_condition import ( + AssetConditionEvaluation, + AssetSubsetWithMetadata, +) from dagster._core.definitions.asset_daemon_context import ( AssetDaemonContext, ) @@ -46,10 +51,9 @@ LegacyAssetDaemonCursorWrapper, ) from dagster._core.definitions.asset_graph import AssetGraph +from dagster._core.definitions.asset_subset import AssetSubset from dagster._core.definitions.auto_materialize_rule import AutoMaterializeRule from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, - AutoMaterializeRuleEvaluation, AutoMaterializeRuleEvaluationData, ) from dagster._core.definitions.automation_policy_sensor_definition import ( @@ -163,17 +167,20 @@ def with_rule_evaluation_data( rule_evaluation_data=data_type(**transformed_kwargs), ) - def resolve(self) -> Tuple[AutoMaterializeRuleEvaluation, Optional[Sequence[str]]]: + def resolve(self, asset_key: AssetKey, asset_graph: AssetGraph) -> AssetSubsetWithMetadata: """Returns a tuple of the resolved AutoMaterializeRuleEvaluation for this spec and the partitions that it applies to. """ - return ( - AutoMaterializeRuleEvaluation( - rule_snapshot=self.rule.to_snapshot(), - evaluation_data=self.rule_evaluation_data, - ), - sorted(self.partitions) if self.partitions else None, + subset = AssetSubset.from_asset_partitions_set( + asset_key, + asset_graph.get_partitions_def(asset_key), + { + AssetKeyPartitionKey(asset_key, partition_key) + for partition_key in self.partitions or [None] + }, ) + metadata = self.rule_evaluation_data.metadata if self.rule_evaluation_data else {} + return AssetSubsetWithMetadata(subset=subset, metadata=metadata) class AssetSpecWithPartitionsDef( @@ -204,7 +211,7 @@ class AssetDaemonScenarioState(NamedTuple): current_time: datetime.datetime = pendulum.now("UTC") run_requests: Sequence[RunRequest] = [] serialized_cursor: str = AssetDaemonCursor.empty().serialize() - evaluations: Sequence[AutoMaterializeAssetEvaluation] = [] + evaluations: Sequence[AssetConditionEvaluation] = [] logger: logging.Logger = logging.getLogger("dagster.amp") # this is set by the scenario runner scenario_instance: Optional[DagsterInstance] = None @@ -346,7 +353,7 @@ def with_dynamic_partitions( def _evaluate_tick_fast( self, - ) -> Tuple[Sequence[RunRequest], AssetDaemonCursor, Sequence[AutoMaterializeAssetEvaluation]]: + ) -> Tuple[Sequence[RunRequest], AssetDaemonCursor, Sequence[AssetConditionEvaluation]]: cursor = AssetDaemonCursor.from_serialized(self.serialized_cursor, self.asset_graph) new_run_requests, new_cursor, new_evaluations = AssetDaemonContext( @@ -414,7 +421,7 @@ def _evaluate_tick_daemon( ) -> Tuple[ Sequence[RunRequest], AssetDaemonCursor, - Sequence[AutoMaterializeAssetEvaluation], + Sequence[AssetConditionEvaluation], ]: with self._create_workspace_context() as workspace_context: workspace = workspace_context.create_request_context() @@ -594,7 +601,7 @@ def sort_run_request_key_fn(run_request) -> Tuple[AssetKey, Optional[str]]: return self def _assert_evaluation_daemon( - self, key: AssetKey, actual_evaluation: AutoMaterializeAssetEvaluation + self, key: AssetKey, actual_evaluation: AssetConditionEvaluation ) -> None: """Additional assertions for daemon mode. Checks that the evaluation for the given asset contains the expected run ids. @@ -610,15 +617,24 @@ def _assert_evaluation_daemon( ) if key in (run.asset_selection or set()) } - assert new_run_ids_for_asset == actual_evaluation.run_ids + evaluation_with_run_ids = next( + iter( + [ + e + for e in check.not_none( + self.instance.schedule_storage + ).get_auto_materialize_evaluations_for_evaluation_id(current_evaluation_id) + if e.asset_key == key + ] + ) + ) + assert new_run_ids_for_asset == evaluation_with_run_ids.run_ids def assert_evaluation( self, key: CoercibleToAssetKey, expected_evaluation_specs: Sequence[AssetRuleEvaluationSpec], num_requested: Optional[int] = None, - num_skipped: Optional[int] = None, - num_discarded: Optional[int] = None, ) -> "AssetDaemonScenarioState": """Asserts that AutoMaterializeRuleEvaluations on the AutoMaterializeAssetEvaluation for the given asset key match the given expected_evaluation_specs. @@ -632,7 +648,7 @@ def assert_evaluation( if actual_evaluation is None: try: assert len(expected_evaluation_specs) == 0 - assert all(n is None for n in (num_requested, num_skipped, num_discarded)) + assert num_requested is None except: self.logger.error( "\nAll Evaluations: \n\n" + "\n\n".join("\t" + str(e) for e in self.evaluations) @@ -640,41 +656,48 @@ def assert_evaluation( raise return self if num_requested is not None: - assert actual_evaluation.num_requested == num_requested - if num_skipped is not None: - assert actual_evaluation.num_skipped == num_skipped - if num_discarded is not None: - assert actual_evaluation.num_discarded == num_discarded - - # unpack the serialized partition subsets into an easier format - actual_rule_evaluations = [ - ( - rule_evaluation, - sorted( - serialized_subset.deserialize( - check.not_none(self.asset_graph.get_partitions_def(asset_key)) - ).get_partition_keys() - ) - if serialized_subset is not None - else None, + assert actual_evaluation.true_subset.size == num_requested + + def get_leaf_evaluations(e: AssetConditionEvaluation) -> Sequence[AssetConditionEvaluation]: + if len(e.child_evaluations) == 0: + return [e] + leaf_evals = [] + for child_eval in e.child_evaluations: + leaf_evals.extend(get_leaf_evaluations(child_eval)) + return leaf_evals + + actual_subsets_with_metadata = list( + itertools.chain( + *[ + leaf_eval.subsets_with_metadata + # backcompat as previously we stored None metadata for any true evaluation + or ( + [AssetSubsetWithMetadata(leaf_eval.true_subset, {})] + if leaf_eval.true_subset.size + else [] + ) + for leaf_eval in get_leaf_evaluations(actual_evaluation) + ] ) - for rule_evaluation, serialized_subset in actual_evaluation.partition_subsets_by_condition + ) + expected_subsets_with_metadata = [ + ees.resolve(asset_key, self.asset_graph) for ees in expected_evaluation_specs ] - expected_rule_evaluations = [ees.resolve() for ees in expected_evaluation_specs] try: - for (actual_data, actual_partitions), (expected_data, expected_partitions) in zip( - sorted(actual_rule_evaluations), sorted(expected_rule_evaluations) + for actual_sm, expected_sm in zip( + sorted(actual_subsets_with_metadata, key=lambda x: str(x)), + sorted(expected_subsets_with_metadata, key=lambda x: str(x)), ): - assert actual_data.rule_snapshot == expected_data.rule_snapshot - assert actual_partitions == expected_partitions + assert actual_sm.subset == expected_sm.subset # only check evaluation data if it was set on the expected evaluation spec - if expected_data.evaluation_data is not None: - assert actual_data.evaluation_data == expected_data.evaluation_data + if expected_sm.metadata: + assert actual_sm.metadata == expected_sm.metadata except: self._log_assertion_error( - sorted(expected_rule_evaluations), sorted(actual_rule_evaluations) + sorted(expected_subsets_with_metadata, key=lambda x: str(x)), + sorted(actual_subsets_with_metadata, key=lambda x: str(x)), ) raise diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/base_scenario.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/base_scenario.py index a000ddc1fff77..5d6eafd0901a6 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/base_scenario.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/base_scenario.py @@ -56,13 +56,12 @@ from dagster._core.definitions.auto_materialize_policy import AutoMaterializePolicy from dagster._core.definitions.auto_materialize_rule import AutoMaterializeRule from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, AutoMaterializeDecisionType, AutoMaterializeRuleEvaluation, AutoMaterializeRuleEvaluationData, ) from dagster._core.definitions.data_version import DataVersionsByPartition -from dagster._core.definitions.events import AssetKeyPartitionKey, CoercibleToAssetKey +from dagster._core.definitions.events import CoercibleToAssetKey from dagster._core.definitions.external_asset_graph import ExternalAssetGraph from dagster._core.definitions.freshness_policy import FreshnessPolicy from dagster._core.definitions.observe import observe @@ -129,33 +128,6 @@ def from_single_rule( num_discarded=1 if rule.decision_type == AutoMaterializeDecisionType.DISCARD else 0, ) - def to_evaluation( - self, asset_graph: AssetGraph, instance: DagsterInstance - ) -> AutoMaterializeAssetEvaluation: - asset_key = AssetKey.from_coercible(self.asset_key) - return AutoMaterializeAssetEvaluation.from_rule_evaluation_results( - asset_graph=asset_graph, - asset_key=asset_key, - asset_partitions_by_rule_evaluation=[ - ( - rule_evaluation, - ( - { - AssetKeyPartitionKey(asset_key, partition_key) - for partition_key in partition_keys - } - if partition_keys - else set() - ), - ) - for rule_evaluation, partition_keys in self.rule_evaluations - ], - num_requested=self.num_requested, - num_skipped=self.num_skipped, - num_discarded=self.num_discarded, - dynamic_partitions_store=instance, - ) - class AssetReconciliationScenario( NamedTuple( diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_failure_recovery.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_failure_recovery.py index 63bd3210671fa..72543077a66df 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_failure_recovery.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_failure_recovery.py @@ -505,7 +505,7 @@ def sort_run_key_fn(run): ) assert len(evaluations) == 1 assert evaluations[0].evaluation.asset_key == AssetKey("hourly") - assert evaluations[0].evaluation.run_ids == {run.run_id for run in sorted_runs} + assert evaluations[0].run_ids == {run.run_id for run in sorted_runs} @pytest.mark.parametrize( @@ -612,7 +612,7 @@ def sort_run_key_fn(run): ) assert len(evaluations) == 1 assert evaluations[0].evaluation.asset_key == AssetKey("hourly") - assert evaluations[0].evaluation.run_ids == {run.run_id for run in sorted_runs} + assert evaluations[0].run_ids == {run.run_id for run in sorted_runs} cursor = _get_pre_sensor_auto_materialize_serialized_cursor(instance) assert cursor diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_fast.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_fast.py index 9ac728f76f558..32b95a7fdb28a 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_fast.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_fast.py @@ -1,5 +1,3 @@ -from typing import Sequence - import pytest from dagster import ( AssetMaterialization, @@ -8,10 +6,6 @@ job, op, ) -from dagster._core.definitions.asset_graph import AssetGraph -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, -) from dagster._core.definitions.time_window_partitions import ( HourlyPartitionsDefinition, ) @@ -44,37 +38,6 @@ def test_reconciliation(scenario, respect_materialization_data_versions): instance, respect_materialization_data_versions=respect_materialization_data_versions ) - def _sorted_evaluations( - evaluations: Sequence[AutoMaterializeAssetEvaluation], - ) -> Sequence[AutoMaterializeAssetEvaluation]: - """Allows a stable ordering for comparison.""" - return sorted( - [ - evaluation._replace( - partition_subsets_by_condition=sorted( - evaluation.partition_subsets_by_condition, key=repr - ) - )._replace( - rule_snapshots=( - sorted(evaluation.rule_snapshots, key=repr) - if evaluation.rule_snapshots - else None - ) - ) - for evaluation in evaluations - ], - key=repr, - ) - - if scenario.expected_evaluations is not None: - asset_graph = AssetGraph.from_assets(scenario.assets) - assert _sorted_evaluations( - [ - evaluation_spec.to_evaluation(asset_graph, instance) - for evaluation_spec in scenario.expected_evaluations - ] - ) == _sorted_evaluations(evaluations) - assert len(run_requests) == len(scenario.expected_run_requests), evaluations def sort_run_request_key_fn(run_request): diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_auto_materialize_asset_evaluation.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_auto_materialize_asset_evaluation.py index e5e1a0628efb5..ecc9f5b31285d 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_auto_materialize_asset_evaluation.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_auto_materialize_asset_evaluation.py @@ -1,124 +1,166 @@ -from dagster import AssetKey, AutoMaterializePolicy, StaticPartitionsDefinition, asset -from dagster._core.definitions.asset_graph import AssetGraph -from dagster._core.definitions.auto_materialize_rule import AutoMaterializeRule -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, - AutoMaterializeRuleEvaluation, - ParentUpdatedRuleEvaluationData, - WaitingOnAssetsRuleEvaluationData, +from dagster import MetadataValue +from dagster._core.definitions.asset_condition import ( + AssetConditionEvaluationWithRunIds, + AssetSubsetWithMetadata, ) -from dagster._core.definitions.events import AssetKeyPartitionKey -from dagster._serdes.serdes import deserialize_value, serialize_value +from dagster._core.definitions.asset_subset import AssetSubset +from dagster._core.definitions.events import AssetKey +from dagster._serdes.serdes import deserialize_value -partitions = StaticPartitionsDefinition(partition_keys=["a", "b", "c"]) +def test_backcompat_unpartitioned_skipped() -> None: + serialized_asset_evaluation = ( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", ' + '"path": ["C"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, ' + '"partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", ' + '"evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", ' + '"class_name": "MaterializeOnCronRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.MATERIALIZE"}, "description": "not materialized since last ' + 'cron schedule tick of \'0 * * * *\' (timezone: UTC)"}}, null], [{"__class__": ' + '"AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": ' + '"WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": ' + '[{"__class__": "AssetKey", "path": ["A"]}, {"__class__": "AssetKey", "path": ["B"]}]}}, ' + '"rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": ' + '"SkipOnNotAllParentsUpdatedRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be ' + 'updated"}}, null]], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", ' + '"class_name": "SkipOnNotAllParentsUpdatedRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be ' + 'updated"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": ' + '"MaterializeOnCronRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.MATERIALIZE"}, "description": "not materialized since last ' + 'cron schedule tick of \'0 * * * *\' (timezone: UTC)"}], "run_ids": {"__set__": []}}' + ) + deserialized_with_run_ids = deserialize_value( + serialized_asset_evaluation, AssetConditionEvaluationWithRunIds + ) + deserialized = deserialized_with_run_ids.evaluation -@asset(partitions_def=partitions, auto_materialize_policy=AutoMaterializePolicy.eager()) -def my_asset(_): - pass + assert deserialized.true_subset.size == 0 + assert len(deserialized.child_evaluations) == 2 + materialize_evaluation, not_skip_evaluation = deserialized.child_evaluations + assert materialize_evaluation.true_subset.size == 1 + assert not_skip_evaluation.true_subset.size == 0 + skip_evaluation = not_skip_evaluation.child_evaluations[0] + assert skip_evaluation.true_subset.size == 1 + assert len(skip_evaluation.child_evaluations) == 1 + assert skip_evaluation.child_evaluations[0].true_subset.size == 1 + assert len(skip_evaluation.child_evaluations[0].subsets_with_metadata) == 1 + skip_metadata = skip_evaluation.child_evaluations[0].subsets_with_metadata[0] + assert skip_metadata == AssetSubsetWithMetadata( + subset=AssetSubset(asset_key=AssetKey(["C"]), value=True), + metadata={ + "waiting_on_ancestor_1": MetadataValue.asset(asset_key=AssetKey(["A"])), + "waiting_on_ancestor_2": MetadataValue.asset(asset_key=AssetKey(["B"])), + }, + ) -def test_backcompat(): +def test_backcompat_unpartitioned_requested() -> None: serialized_asset_evaluation = ( - '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey",' - ' "path": ["my_asset"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 2,' - ' "partition_subsets_by_condition": [[{"__class__": "MissingAutoMaterializeCondition",' - ' "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}}, {"__class__":' - ' "SerializedPartitionsSubset", "serialized_partitions_def_class_name":' - ' "StaticPartitionsDefinition", "serialized_partitions_def_unique_id":' - ' "411905f695e47a51ceafc178e6cd4eb3680f4453", "serialized_subset": "{\\"version\\": 1,' - ' \\"subset\\": [\\"a\\", \\"b\\"]}"}], [{"__class__":' - ' "ParentOutdatedAutoMaterializeCondition", "decision_type": {"__enum__":' - ' "AutoMaterializeDecisionType.SKIP"}, "waiting_on_asset_keys": {"__frozenset__":' - ' [{"__class__": "AssetKey", "path": ["parent1"]}, {"__class__": "AssetKey", "path":' - ' ["parent2"]}]}}, {"__class__": "SerializedPartitionsSubset",' - ' "serialized_partitions_def_class_name": "StaticPartitionsDefinition",' - ' "serialized_partitions_def_unique_id": "411905f695e47a51ceafc178e6cd4eb3680f4453",' - ' "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"a\\"]}"}], [{"__class__":' - ' "ParentMaterializedAutoMaterializeCondition", "decision_type": {"__enum__":' - ' "AutoMaterializeDecisionType.MATERIALIZE"}, "updated_asset_keys": {"__frozenset__":' - ' [{"__class__": "AssetKey", "path": ["parent1"]}, {"__class__": "AssetKey", "path":' - ' ["parent2"]}]}, "will_update_asset_keys": {"__frozenset__": [{"__class__": "AssetKey",' - ' "path": ["parent3"]}]}}, {"__class__": "SerializedPartitionsSubset",' - ' "serialized_partitions_def_class_name": "StaticPartitionsDefinition",' - ' "serialized_partitions_def_unique_id": "411905f695e47a51ceafc178e6cd4eb3680f4453",' - ' "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"b\\"]}"}], [{"__class__":' - ' "ParentOutdatedAutoMaterializeCondition", "decision_type": {"__enum__":' - ' "AutoMaterializeDecisionType.SKIP"}, "waiting_on_asset_keys": {"__frozenset__":' - ' [{"__class__": "AssetKey", "path": ["parent1"]}]}}, {"__class__":' - ' "SerializedPartitionsSubset", "serialized_partitions_def_class_name":' - ' "StaticPartitionsDefinition", "serialized_partitions_def_unique_id":' - ' "411905f695e47a51ceafc178e6cd4eb3680f4453", "serialized_subset": "{\\"version\\": 1,' - ' \\"subset\\": [\\"b\\"]}"}]], "run_ids": {"__set__": []}}' + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", ' + '"path": ["C"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, ' + '"partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", ' + '"evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", ' + '"class_name": "MaterializeOnCronRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.MATERIALIZE"}, "description": "not materialized since last ' + 'cron schedule tick of \'0 * * * *\' (timezone: UTC)"}}, null]], "rule_snapshots": ' + '[{"__class__": "AutoMaterializeRuleSnapshot", "class_name": ' + '"SkipOnNotAllParentsUpdatedRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be ' + 'updated"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": ' + '"MaterializeOnCronRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.MATERIALIZE"}, "description": "not materialized since last ' + 'cron schedule tick of \'0 * * * *\' (timezone: UTC)"}], "run_ids": {"__set__": []}}' ) - expected_asset_evaluation = AutoMaterializeAssetEvaluation.from_rule_evaluation_results( - asset_key=AssetKey(["my_asset"]), - asset_graph=AssetGraph.from_assets([my_asset]), - asset_partitions_by_rule_evaluation=[ - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.materialize_on_missing().to_snapshot(), - evaluation_data=None, - ), - {AssetKeyPartitionKey(AssetKey(["my_asset"]), p) for p in ("a", "b")}, - ), - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.materialize_on_parent_updated().to_snapshot(), - evaluation_data=ParentUpdatedRuleEvaluationData( - updated_asset_keys=frozenset( - {AssetKey(["parent1"]), AssetKey(["parent2"])} - ), - will_update_asset_keys=frozenset({AssetKey(["parent3"])}), - ), - ), - {AssetKeyPartitionKey(AssetKey(["my_asset"]), "b")}, - ), - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.skip_on_parent_outdated().to_snapshot(), - evaluation_data=WaitingOnAssetsRuleEvaluationData( - waiting_on_asset_keys=frozenset( - {AssetKey(["parent1"]), AssetKey(["parent2"])} - ), - ), - ), - {AssetKeyPartitionKey(AssetKey(["my_asset"]), "a")}, - ), - ( - AutoMaterializeRuleEvaluation( - rule_snapshot=AutoMaterializeRule.skip_on_parent_outdated().to_snapshot(), - evaluation_data=WaitingOnAssetsRuleEvaluationData( - waiting_on_asset_keys=frozenset({AssetKey(["parent1"])}), - ), - ), - {AssetKeyPartitionKey(AssetKey(["my_asset"]), "a")}, - ), - ], - num_requested=0, - num_skipped=2, - num_discarded=0, - dynamic_partitions_store=None, + deserialized_with_run_ids = deserialize_value( + serialized_asset_evaluation, AssetConditionEvaluationWithRunIds ) + deserialized = deserialized_with_run_ids.evaluation + assert len(deserialized.true_subset.asset_partitions) == 1 + assert len(deserialized.child_evaluations) == 2 + materialize_evaluation, not_skip_evaluation = deserialized.child_evaluations + assert len(materialize_evaluation.child_evaluations) == 1 + cron_rule_evaluation = materialize_evaluation.child_evaluations[0] + assert len(cron_rule_evaluation.child_evaluations) == 0 + assert cron_rule_evaluation.subsets_with_metadata == [] + assert len(not_skip_evaluation.child_evaluations) == 1 - # Previously serialized asset evaluations do not contain rule snapshots, so - # we override to be None - expected_asset_evaluation = expected_asset_evaluation._replace(rule_snapshots=None) - # json doesn't handle tuples, so they get turned into lists - assert ( - deserialize_value(serialized_asset_evaluation)._replace( - partition_subsets_by_condition=[ - tuple(t) for t in expected_asset_evaluation.partition_subsets_by_condition - ] - ) - == expected_asset_evaluation +def test_backcompat_partitioned_asset() -> None: + serialized_asset_evaluation = ( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey",' + ' "path": ["B"]}, "num_discarded": 1, "num_requested": 1, "num_skipped": 1, ' + '"partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", ' + '"evaluation_data": {"__class__": "ParentUpdatedRuleEvaluationData", "updated_asset_keys": ' + '{"__frozenset__": [{"__class__": "AssetKey", "path": ["A"]}]}, "will_update_asset_keys": ' + '{"__frozenset__": []}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", ' + '"class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed ' + 'since latest materialization"}}, {"__class__": "SerializedPartitionsSubset", ' + '"serialized_partitions_def_class_name": "DailyPartitionsDefinition", ' + '"serialized_partitions_def_unique_id": "809725ad60ffac0302d5c81f6e45865e21ec0b85", ' + '"serialized_subset": "{\\"version\\": 1, \\"time_windows\\": [[1357344000.0, 1357603200.0]], ' + '\\"num_partitions\\": 3}"}], [{"__class__": "AutoMaterializeRuleEvaluation", ' + '"evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", ' + '"class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}}, ' + '{"__class__": "SerializedPartitionsSubset", "serialized_partitions_def_class_name": ' + '"DailyPartitionsDefinition", "serialized_partitions_def_unique_id": ' + '"809725ad60ffac0302d5c81f6e45865e21ec0b85", "serialized_subset": ' + '"{\\"version\\": 1, \\"time_windows\\": [[1357344000.0, 1357603200.0]], \\"num_partitions\\": 3}"}], ' + '[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": ' + '"WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": ' + '[{"__class__": "AssetKey", "path": ["A"]}]}}, "rule_snapshot": {"__class__": ' + '"AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentMissingRule", "decision_type": ' + '{"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data ' + 'to be present"}}, {"__class__": "SerializedPartitionsSubset", ' + '"serialized_partitions_def_class_name": "DailyPartitionsDefinition", ' + '"serialized_partitions_def_unique_id": "809725ad60ffac0302d5c81f6e45865e21ec0b85", ' + '"serialized_subset": "{\\"version\\": 1, \\"time_windows\\": [[1357516800.0, 1357603200.0]], ' + '\\"num_partitions\\": 1}"}], [{"__class__": "AutoMaterializeRuleEvaluation", ' + '"evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", ' + '"class_name": "DiscardOnMaxMaterializationsExceededRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.DISCARD"}, "description": "exceeds 1 materialization(s) per ' + 'minute"}}, {"__class__": "SerializedPartitionsSubset", ' + '"serialized_partitions_def_class_name": "DailyPartitionsDefinition", ' + '"serialized_partitions_def_unique_id": "809725ad60ffac0302d5c81f6e45865e21ec0b85", ' + '"serialized_subset": "{\\"version\\": 1, \\"time_windows\\": [[1357344000.0, 1357430400.0]], ' + '\\"num_partitions\\": 1}"}]], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", ' + '"class_name": "SkipOnBackfillInProgressRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.SKIP"}, "description": "targeted by an in-progress backfill"' + '}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", ' + '"decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": ' + '"materialization is missing"}, {"__class__": "AutoMaterializeRuleSnapshot", ' + '"class_name": "MaterializeOnRequiredForFreshnessRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.MATERIALIZE"}, "description": "required to meet this or ' + 'downstream asset\'s freshness policy"}, {"__class__": "AutoMaterializeRuleSnapshot", ' + '"class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to ' + 'date"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": ' + '"SkipOnParentMissingRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be ' + 'present"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": ' + '"SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not ' + 'exist"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": ' + '"MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": ' + '"AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed ' + 'since latest materialization"}], "run_ids": {"__set__": []}}' ) - assert ( - deserialize_value(serialize_value(expected_asset_evaluation))._replace( - partition_subsets_by_condition=[ - tuple(t) for t in expected_asset_evaluation.partition_subsets_by_condition - ] - ) - == expected_asset_evaluation + deserialized_with_run_ids = deserialize_value( + serialized_asset_evaluation, AssetConditionEvaluationWithRunIds ) + deserialized = deserialized_with_run_ids.evaluation + + # all subsets should have zero size + assert deserialized.true_subset.size == 0 + assert len(deserialized.child_evaluations) == 2 + (materialize_evaluation, not_skip_evaluation) = deserialized.child_evaluations + assert materialize_evaluation.true_subset.size == 0 + assert not_skip_evaluation.true_subset.size == 0 + + skip_evaluation = not_skip_evaluation.child_evaluations[0] + assert skip_evaluation.true_subset.size == 0 + assert len(skip_evaluation.child_evaluations) == 4 + assert skip_evaluation.child_evaluations[0].true_subset.size == 0 diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cron_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cron_scenarios.py index 28cb890436696..1fdc6ba54caad 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cron_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cron_scenarios.py @@ -131,7 +131,6 @@ def get_cron_policy( ), ], num_requested=0, - num_skipped=1, ) .with_runs(run_request("A")) .with_current_time_advanced(seconds=30) @@ -149,7 +148,6 @@ def get_cron_policy( ), ], num_requested=0, - num_skipped=1, ) .with_runs(run_request("B")) .with_current_time_advanced(seconds=30) @@ -211,7 +209,6 @@ def get_cron_policy( ), ], num_requested=0, - num_skipped=1, ) .with_runs(run_request("A", hour_partition_key(state.current_time, delta=1))) .with_current_time_advanced(seconds=30) @@ -232,7 +229,6 @@ def get_cron_policy( ), ], num_requested=0, - num_skipped=1, ) .with_runs(run_request("B", hour_partition_key(state.current_time, delta=1))) .with_current_time_advanced(seconds=30) diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py index a6ff9139f070f..7a6b50b3043e8 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py @@ -192,7 +192,6 @@ ), ], num_requested=1, - num_discarded=29, ), ), AssetDaemonScenario( diff --git a/python_modules/libraries/dagster-mysql/dagster_mysql/schedule_storage/schedule_storage.py b/python_modules/libraries/dagster-mysql/dagster_mysql/schedule_storage/schedule_storage.py index b618ec6173da8..edd7599f06080 100644 --- a/python_modules/libraries/dagster-mysql/dagster_mysql/schedule_storage/schedule_storage.py +++ b/python_modules/libraries/dagster-mysql/dagster_mysql/schedule_storage/schedule_storage.py @@ -6,8 +6,8 @@ import sqlalchemy.dialects as db_dialects import sqlalchemy.pool as db_pool from dagster._config.config_schema import UserConfigSchema -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, +from dagster._core.definitions.asset_condition import ( + AssetConditionEvaluationWithRunIds, ) from dagster._core.storage.config import MySqlStorageConfig, mysql_config from dagster._core.storage.schedules import ScheduleStorageSqlMetadata, SqlScheduleStorage @@ -174,7 +174,7 @@ def _add_or_update_instigators_table(self, conn: Connection, state) -> None: def add_auto_materialize_asset_evaluations( self, evaluation_id: int, - asset_evaluations: Sequence[AutoMaterializeAssetEvaluation], + asset_evaluations: Sequence[AssetConditionEvaluationWithRunIds], ): if not asset_evaluations: return @@ -187,8 +187,6 @@ def add_auto_materialize_asset_evaluations( "asset_key": evaluation.asset_key.to_string(), "asset_evaluation_body": serialize_value(evaluation), "num_requested": evaluation.num_requested, - "num_skipped": evaluation.num_skipped, - "num_discarded": evaluation.num_discarded, } for evaluation in asset_evaluations ] @@ -198,8 +196,6 @@ def add_auto_materialize_asset_evaluations( upsert_stmt = insert_stmt.on_duplicate_key_update( asset_evaluation_body=insert_stmt.inserted.asset_evaluation_body, num_requested=insert_stmt.inserted.num_requested, - num_skipped=insert_stmt.inserted.num_skipped, - num_discarded=insert_stmt.inserted.num_discarded, ) with self.connect() as conn: diff --git a/python_modules/libraries/dagster-postgres/dagster_postgres/schedule_storage/schedule_storage.py b/python_modules/libraries/dagster-postgres/dagster_postgres/schedule_storage/schedule_storage.py index 9a21ea65c0056..42c296b8570af 100644 --- a/python_modules/libraries/dagster-postgres/dagster_postgres/schedule_storage/schedule_storage.py +++ b/python_modules/libraries/dagster-postgres/dagster_postgres/schedule_storage/schedule_storage.py @@ -6,9 +6,7 @@ import sqlalchemy.dialects as db_dialects import sqlalchemy.pool as db_pool from dagster._config.config_schema import UserConfigSchema -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - AutoMaterializeAssetEvaluation, -) +from dagster._core.definitions.asset_condition import AssetConditionEvaluationWithRunIds from dagster._core.scheduler.instigation import InstigatorState from dagster._core.storage.config import PostgresStorageConfig, pg_config from dagster._core.storage.schedules import ScheduleStorageSqlMetadata, SqlScheduleStorage @@ -181,7 +179,7 @@ def _add_or_update_instigators_table(self, conn: Connection, state: InstigatorSt def add_auto_materialize_asset_evaluations( self, evaluation_id: int, - asset_evaluations: Sequence[AutoMaterializeAssetEvaluation], + asset_evaluations: Sequence[AssetConditionEvaluationWithRunIds], ): if not asset_evaluations: return @@ -193,8 +191,6 @@ def add_auto_materialize_asset_evaluations( "asset_key": evaluation.asset_key.to_string(), "asset_evaluation_body": serialize_value(evaluation), "num_requested": evaluation.num_requested, - "num_skipped": evaluation.num_skipped, - "num_discarded": evaluation.num_discarded, } for evaluation in asset_evaluations ] @@ -207,8 +203,6 @@ def add_auto_materialize_asset_evaluations( set_={ "asset_evaluation_body": insert_stmt.excluded.asset_evaluation_body, "num_requested": insert_stmt.excluded.num_requested, - "num_skipped": insert_stmt.excluded.num_skipped, - "num_discarded": insert_stmt.excluded.num_discarded, }, ) From 7008767cf287e4639d2987b3d32418aace75e28e Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Tue, 12 Dec 2023 15:00:05 -0800 Subject: [PATCH 06/56] Simplify Context --- .../_core/definitions/asset_condition.py | 44 +-- .../asset_condition_evaluation_context.py | 277 +++++++++--------- .../_core/definitions/asset_daemon_context.py | 11 +- .../_core/definitions/asset_daemon_cursor.py | 12 +- .../definitions/auto_materialize_rule.py | 38 ++- .../auto_materialize_rule_evaluation.py | 43 +-- .../freshness_based_auto_materialize.py | 6 +- .../dagster/_utils/test/schedule_storage.py | 10 +- scripts/run-pyright.py | 4 +- 9 files changed, 225 insertions(+), 220 deletions(-) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py index c23a368685b9d..95951555bfbc3 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -32,14 +32,7 @@ class AssetConditionSnapshot(NamedTuple): class_name: str description: str - child_hashes: Sequence[str] - - @property - def hash(self) -> str: - """Returns a unique hash for this node in the tree.""" - return hashlib.md5( - "".join([self.class_name, self.description, *self.child_hashes]).encode("utf-8") - ).hexdigest() + unique_id: str @whitelist_for_serdes @@ -92,9 +85,9 @@ def for_child(self, child_condition: "AssetCondition") -> Optional["AssetConditi """Returns the evaluation of a given child condition by finding the child evaluation that has an identical hash to the given condition. """ - child_hash = child_condition.snapshot.hash + child_unique_id = child_condition.snapshot.unique_id for child_evaluation in self.child_evaluations: - if child_evaluation.condition_snapshot.hash == child_hash: + if child_evaluation.condition_snapshot.unique_id == child_unique_id: return child_evaluation return None @@ -127,6 +120,14 @@ class AssetCondition(ABC): new conditions using the `&` (and), `|` (or), and `~` (not) operators. """ + @property + def unique_id(self) -> str: + parts = [ + self.__class__.__name__, + *[child.unique_id for child in self.children], + ] + return hashlib.md5("".join(parts).encode()).hexdigest() + @abstractmethod def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: raise NotImplementedError() @@ -164,10 +165,6 @@ def is_legacy(self) -> bool: def children(self) -> Sequence["AssetCondition"]: return [] - @property - def indexed_children(self) -> Sequence[Tuple[int, "AssetCondition"]]: - return list(enumerate(self.children)) - @property def not_discard_condition(self) -> Optional["AssetCondition"]: if not self.is_legacy or not len(self.children) == 3: @@ -180,7 +177,7 @@ def snapshot(self) -> AssetConditionSnapshot: return AssetConditionSnapshot( class_name=self.__class__.__name__, description=str(self), - child_hashes=[child.snapshot.hash for child in self.children], + unique_id=self.unique_id, ) @@ -190,6 +187,11 @@ class RuleCondition( ): """This class represents the condition that a particular AutoMaterializeRule is satisfied.""" + @property + def unique_id(self) -> str: + parts = [self.rule.__class__.__name__, self.rule.description] + return hashlib.md5("".join(parts).encode()).hexdigest() + def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: context.root_context.daemon_context._verbose_log_fn( # noqa f"Evaluating rule: {self.rule.to_snapshot()}" @@ -215,10 +217,8 @@ class AndAssetCondition( def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: child_evaluations: List[AssetConditionEvaluation] = [] true_subset = context.candidate_subset - for index, child in self.indexed_children: - child_context = context.for_child( - condition=child, candidate_subset=true_subset, child_index=index - ) + for child in self.children: + child_context = context.for_child(condition=child, candidate_subset=true_subset) result = child.evaluate(child_context) child_evaluations.append(result) true_subset &= result.true_subset @@ -239,9 +239,9 @@ class OrAssetCondition( def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: child_evaluations: List[AssetConditionEvaluation] = [] true_subset = context.empty_subset() - for index, child in self.indexed_children: + for child in self.children: child_context = context.for_child( - condition=child, candidate_subset=context.candidate_subset, child_index=index + condition=child, candidate_subset=context.candidate_subset ) result = child.evaluate(child_context) child_evaluations.append(result) @@ -270,7 +270,7 @@ def child(self) -> AssetCondition: def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: child_context = context.for_child( - condition=self.child, candidate_subset=context.candidate_subset, child_index=0 + condition=self.child, candidate_subset=context.candidate_subset ) result = self.child.evaluate(child_context) true_subset = context.candidate_subset - result.true_subset diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py index 565fdbd579665..e8593ab139ec3 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py @@ -1,7 +1,8 @@ +import dataclasses import datetime import functools from dataclasses import dataclass -from typing import TYPE_CHECKING, AbstractSet, Mapping, Optional, Sequence +from typing import TYPE_CHECKING, AbstractSet, Any, Callable, Mapping, Optional, Sequence from dagster._core.definitions.data_time import CachingDataTimeResolver from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey @@ -21,15 +22,28 @@ from .asset_daemon_context import AssetDaemonContext +def root_property(fn: Callable[[Any], Any]) -> Callable[[Any], Any]: + """Ensures that a given property is always accessed via the root context, ensuring that any + cached properties are accessed correctly. + """ + + def wrapped(self: Any) -> Any: + return fn(self.root_context) + + return wrapped + + @dataclass(frozen=True) -class RootAssetConditionEvaluationContext: +class AssetConditionEvaluationContext: """Context object containing methods and properties used for evaluating the entire state of an asset's automation rules. """ asset_key: AssetKey + condition: "AssetCondition" asset_cursor: Optional[AssetDaemonAssetCursor] - root_condition: "AssetCondition" + previous_evaluation: Optional["AssetConditionEvaluation"] + candidate_subset: AssetSubset instance_queryer: CachingInstanceQueryer data_time_resolver: CachingDataTimeResolver @@ -38,6 +52,55 @@ class RootAssetConditionEvaluationContext: evaluation_results_by_key: Mapping[AssetKey, "AssetConditionEvaluation"] expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]] + root_ref: Optional["AssetConditionEvaluationContext"] = None + + @staticmethod + def create( + asset_key: AssetKey, + condition: "AssetCondition", + asset_cursor: Optional[AssetDaemonAssetCursor], + instance_queryer: CachingInstanceQueryer, + data_time_resolver: CachingDataTimeResolver, + daemon_context: "AssetDaemonContext", + evaluation_results_by_key: Mapping[AssetKey, "AssetConditionEvaluation"], + expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]], + ) -> "AssetConditionEvaluationContext": + return AssetConditionEvaluationContext( + asset_key=asset_key, + condition=condition, + asset_cursor=asset_cursor, + previous_evaluation=asset_cursor.previous_evaluation if asset_cursor else None, + candidate_subset=AssetSubset.all( + asset_key, + instance_queryer.asset_graph.get_partitions_def(asset_key), + instance_queryer, + instance_queryer.evaluation_time, + ), + data_time_resolver=data_time_resolver, + instance_queryer=instance_queryer, + daemon_context=daemon_context, + evaluation_results_by_key=evaluation_results_by_key, + expected_data_time_mapping=expected_data_time_mapping, + ) + + def for_child( + self, condition: "AssetCondition", candidate_subset: AssetSubset + ) -> "AssetConditionEvaluationContext": + return dataclasses.replace( + self, + condition=condition, + candidate_subset=candidate_subset, + previous_evaluation=self.previous_evaluation.for_child(condition) + if self.previous_evaluation + else None, + root_ref=self.root_context, + ) + + @property + def root_context(self) -> "AssetConditionEvaluationContext": + """A reference to the context of the root condition for this evaluation.""" + return self.root_ref or self + @property def asset_graph(self) -> AssetGraph: return self.instance_queryer.asset_graph @@ -51,13 +114,20 @@ def evaluation_time(self) -> datetime.datetime: """Returns the time at which this rule is being evaluated.""" return self.instance_queryer.evaluation_time - @functools.cached_property - def latest_evaluation(self) -> Optional["AssetConditionEvaluation"]: + @property + def previous_max_storage_id(self) -> Optional[int]: + if not self.asset_cursor: + return None + return self.asset_cursor.previous_max_storage_id + + @property + def previous_evaluation_timestamp(self) -> Optional[float]: if not self.asset_cursor: return None - return self.asset_cursor.latest_evaluation + return self.asset_cursor.previous_evaluation_timestamp @functools.cached_property + @root_property def parent_will_update_subset(self) -> AssetSubset: """Returns the set of asset partitions whose parents will be updated on this tick, and which can be materialized in the same run as this asset. @@ -73,14 +143,16 @@ def parent_will_update_subset(self) -> AssetSubset: subset |= parent_subset._replace(asset_key=self.asset_key) return subset - @property + @functools.cached_property + @root_property def previous_tick_requested_subset(self) -> AssetSubset: """Returns the set of asset partitions that were requested on the previous tick.""" - if not self.latest_evaluation: + if not self.previous_evaluation: return self.empty_subset() - return self.latest_evaluation.true_subset + return self.previous_evaluation.true_subset @functools.cached_property + @root_property def materialized_since_previous_tick_subset(self) -> AssetSubset: """Returns the set of asset partitions that were materialized since the previous tick.""" return AssetSubset.from_asset_partitions_set( @@ -89,23 +161,27 @@ def materialized_since_previous_tick_subset(self) -> AssetSubset: self.instance_queryer.get_asset_partitions_updated_after_cursor( self.asset_key, asset_partitions=None, - after_cursor=self.asset_cursor.latest_storage_id if self.asset_cursor else None, + after_cursor=self.asset_cursor.previous_max_storage_id + if self.asset_cursor + else None, respect_materialization_data_versions=False, ), ) @functools.cached_property + @root_property def materialized_requested_or_discarded_since_previous_tick_subset(self) -> AssetSubset: """Returns the set of asset partitions that were materialized since the previous tick.""" - if not self.latest_evaluation: + if not self.previous_evaluation: return self.materialized_since_previous_tick_subset return ( self.materialized_since_previous_tick_subset - | self.latest_evaluation.true_subset - | (self.latest_evaluation.discard_subset(self.root_condition) or self.empty_subset()) + | self.previous_evaluation.true_subset + | (self.previous_evaluation.discard_subset(self.condition) or self.empty_subset()) ) @functools.cached_property + @root_property def never_materialized_requested_or_discarded_root_subset(self) -> AssetSubset: if self.asset_key not in self.asset_graph.root_materializable_or_observable_asset_keys: return self.empty_subset() @@ -122,6 +198,52 @@ def never_materialized_requested_or_discarded_root_subset(self) -> AssetSubset: ) return unhandled_subset - self.materialized_since_previous_tick_subset + @property + @root_property + def parent_has_updated_subset(self) -> AssetSubset: + """Returns the set of asset partitions whose parents have updated since the last time this + condition was evaluated. + """ + return AssetSubset.from_asset_partitions_set( + self.asset_key, + self.partitions_def, + self.root_context.instance_queryer.asset_partitions_with_newly_updated_parents( + latest_storage_id=self.previous_max_storage_id, + child_asset_key=self.root_context.asset_key, + map_old_time_partitions=False, + ), + ) + + @property + def candidate_parent_has_or_will_update_subset(self) -> AssetSubset: + """Returns the set of candidates for this tick which have parents that have updated since + the previous tick, or will update on this tick. + """ + return self.candidate_subset & ( + self.parent_has_updated_subset | self.root_context.parent_will_update_subset + ) + + @property + def candidates_not_evaluated_on_previous_tick_subset(self) -> AssetSubset: + """Returns the set of candidates for this tick which were not candidates on the previous + tick. + """ + if not self.previous_evaluation or not self.previous_evaluation.candidate_subset: + return self.candidate_subset + return self.candidate_subset - self.previous_evaluation.candidate_subset + + @property + def previous_tick_subsets_with_metadata(self) -> Sequence["AssetSubsetWithMetadata"]: + """Returns the RuleEvaluationResults calculated on the previous tick for this condition.""" + return self.previous_evaluation.subsets_with_metadata if self.previous_evaluation else [] + + @property + def previous_tick_true_subset(self) -> AssetSubset: + """Returns the set of asset partitions that were true for this condition on the previous tick.""" + if not self.previous_evaluation: + return self.empty_subset() + return self.previous_evaluation.true_subset + def materializable_in_same_run(self, child_key: AssetKey, parent_key: AssetKey) -> bool: """Returns whether a child asset can be materialized in the same run as a parent asset.""" from dagster._core.definitions.external_asset_graph import ExternalAssetGraph @@ -175,19 +297,6 @@ def will_update_asset_partition(self, asset_partition: AssetKeyPartitionKey) -> def empty_subset(self) -> AssetSubset: return AssetSubset.empty(self.asset_key, self.partitions_def) - def get_root_condition_context(self) -> "AssetConditionEvaluationContext": - return AssetConditionEvaluationContext( - root_context=self, - condition=self.root_condition, - candidate_subset=AssetSubset.all( - asset_key=self.asset_key, - partitions_def=self.partitions_def, - dynamic_partitions_store=self.instance_queryer, - current_time=self.instance_queryer.evaluation_time, - ), - latest_evaluation=self.latest_evaluation, - ) - def get_new_asset_cursor( self, evaluation: "AssetConditionEvaluation" ) -> AssetDaemonAssetCursor: @@ -203,120 +312,12 @@ def get_new_asset_cursor( previous_handled_subset | self.materialized_requested_or_discarded_since_previous_tick_subset | evaluation.true_subset - | (evaluation.discard_subset(self.root_condition) or self.empty_subset()) + | (evaluation.discard_subset(self.condition) or self.empty_subset()) ) return AssetDaemonAssetCursor( asset_key=self.asset_key, - latest_storage_id=self.daemon_context.get_new_latest_storage_id(), - latest_evaluation=evaluation, - latest_evaluation_timestamp=self.evaluation_time.timestamp(), + previous_max_storage_id=self.daemon_context.get_new_latest_storage_id(), + previous_evaluation=evaluation, + previous_evaluation_timestamp=self.evaluation_time.timestamp(), materialized_requested_or_discarded_subset=new_handled_subset, ) - - -@dataclass(frozen=True) -class AssetConditionEvaluationContext: - """Context object containing methods and properties used for evaluating a particular AssetCondition.""" - - root_context: RootAssetConditionEvaluationContext - condition: "AssetCondition" - candidate_subset: AssetSubset - latest_evaluation: Optional["AssetConditionEvaluation"] - - @property - def asset_key(self) -> AssetKey: - return self.root_context.asset_key - - @property - def partitions_def(self) -> Optional[PartitionsDefinition]: - return self.root_context.partitions_def - - @property - def asset_cursor(self) -> Optional[AssetDaemonAssetCursor]: - return self.root_context.asset_cursor - - @property - def asset_graph(self) -> AssetGraph: - return self.root_context.asset_graph - - @property - def instance_queryer(self) -> CachingInstanceQueryer: - return self.root_context.instance_queryer - - @property - def max_storage_id(self) -> Optional[int]: - return self.asset_cursor.latest_storage_id if self.asset_cursor else None - - @property - def latest_evaluation_timestamp(self) -> Optional[float]: - return self.asset_cursor.latest_evaluation_timestamp if self.asset_cursor else None - - @property - def previous_tick_true_subset(self) -> AssetSubset: - """Returns the set of asset partitions that were true on the previous tick.""" - if not self.latest_evaluation: - return self.empty_subset() - return self.latest_evaluation.true_subset - - @property - def parent_has_updated_subset(self) -> AssetSubset: - """Returns the set of asset partitions whose parents have updated since the last time this - condition was evaluated. - """ - return AssetSubset.from_asset_partitions_set( - self.asset_key, - self.partitions_def, - self.root_context.instance_queryer.asset_partitions_with_newly_updated_parents( - latest_storage_id=self.max_storage_id, - child_asset_key=self.root_context.asset_key, - map_old_time_partitions=False, - ), - ) - - @property - def candidate_parent_has_or_will_update_subset(self) -> AssetSubset: - """Returns the set of candidates for this tick which have parents that have updated since - the previous tick, or will update on this tick. - """ - return self.candidate_subset & ( - self.parent_has_updated_subset | self.root_context.parent_will_update_subset - ) - - @property - def candidates_not_evaluated_on_previous_tick_subset(self) -> AssetSubset: - """Returns the set of candidates for this tick which were not candidates on the previous - tick. - """ - if not self.latest_evaluation or not self.latest_evaluation.candidate_subset: - return self.candidate_subset - return self.candidate_subset - self.latest_evaluation.candidate_subset - - @property - def materialized_since_previous_tick_subset(self) -> AssetSubset: - """Returns the set of asset partitions that were materialized since the previous tick.""" - return self.root_context.materialized_since_previous_tick_subset - - @property - def materialized_requested_or_discarded_since_previous_tick_subset(self) -> AssetSubset: - """Returns the set of asset partitions that were materialized since the previous tick.""" - return self.root_context.materialized_requested_or_discarded_since_previous_tick_subset - - @property - def previous_tick_subsets_with_metadata(self) -> Sequence["AssetSubsetWithMetadata"]: - """Returns the RuleEvaluationResults calculated on the previous tick for this condition.""" - return self.latest_evaluation.subsets_with_metadata if self.latest_evaluation else [] - - def empty_subset(self) -> AssetSubset: - return self.root_context.empty_subset() - - def for_child( - self, condition: "AssetCondition", candidate_subset: AssetSubset, child_index: int - ) -> "AssetConditionEvaluationContext": - return AssetConditionEvaluationContext( - root_context=self.root_context, - condition=condition, - candidate_subset=candidate_subset, - latest_evaluation=self.latest_evaluation.for_child(condition) - if self.latest_evaluation - else None, - ) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py index 651fd99ad5d7b..36feb348b25ee 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py @@ -36,7 +36,9 @@ from ... import PartitionKeyRange from ..storage.tags import ASSET_PARTITION_RANGE_END_TAG, ASSET_PARTITION_RANGE_START_TAG from .asset_condition import AssetConditionEvaluation -from .asset_condition_evaluation_context import RootAssetConditionEvaluationContext +from .asset_condition_evaluation_context import ( + AssetConditionEvaluationContext, +) from .asset_daemon_cursor import AssetDaemonAssetCursor, AssetDaemonCursor from .asset_graph import AssetGraph from .auto_materialize_rule import AutoMaterializeRule @@ -239,19 +241,18 @@ def evaluate_asset( self.asset_graph.auto_materialize_policies_by_key.get(asset_key) ).to_asset_condition() - context = RootAssetConditionEvaluationContext( + context = AssetConditionEvaluationContext.create( asset_key=asset_key, asset_cursor=self.cursor.asset_cursor_for_key(asset_key, self.asset_graph), - root_condition=asset_condition, + condition=asset_condition, instance_queryer=self.instance_queryer, data_time_resolver=self.data_time_resolver, daemon_context=self, evaluation_results_by_key=evaluation_results_by_key, expected_data_time_mapping=expected_data_time_mapping, ) - condition_context = context.get_root_condition_context() - evaluation = asset_condition.evaluate(condition_context) + evaluation = asset_condition.evaluate(context) asset_cursor = context.get_new_asset_cursor(evaluation=evaluation) expected_data_time = get_expected_data_time_for_asset_key( diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py index 75a2ed1d73ea0..eb0ac555ad596 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py @@ -33,9 +33,9 @@ class AssetDaemonAssetCursor(NamedTuple): """ asset_key: AssetKey - latest_storage_id: Optional[int] - latest_evaluation_timestamp: Optional[float] - latest_evaluation: Optional["AssetConditionEvaluation"] + previous_max_storage_id: Optional[int] + previous_evaluation_timestamp: Optional[float] + previous_evaluation: Optional["AssetConditionEvaluation"] materialized_requested_or_discarded_subset: AssetSubset @@ -82,9 +82,9 @@ def asset_cursor_for_key( handled_subset = AssetSubset.empty(asset_key, partitions_def) return AssetDaemonAssetCursor( asset_key=asset_key, - latest_storage_id=self.latest_storage_id, - latest_evaluation_timestamp=self.latest_evaluation_timestamp, - latest_evaluation=self.latest_evaluation_by_asset_key.get(asset_key), + previous_max_storage_id=self.latest_storage_id, + previous_evaluation_timestamp=self.latest_evaluation_timestamp, + previous_evaluation=self.latest_evaluation_by_asset_key.get(asset_key), materialized_requested_or_discarded_subset=handled_subset, ) diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py index 65134b80c3fe3..7fa3da5bb6b06 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py @@ -308,10 +308,10 @@ def missed_cron_ticks( self, context: AssetConditionEvaluationContext ) -> Sequence[datetime.datetime]: """Returns the cron ticks which have been missed since the previous cursor was generated.""" - if not context.latest_evaluation_timestamp: + if not context.previous_evaluation_timestamp: previous_dt = next( reverse_cron_string_iterator( - end_timestamp=context.root_context.evaluation_time.timestamp(), + end_timestamp=context.evaluation_time.timestamp(), cron_string=self.cron_schedule, execution_timezone=self.timezone, ) @@ -319,11 +319,11 @@ def missed_cron_ticks( return [previous_dt] missed_ticks = [] for dt in cron_string_iterator( - start_timestamp=context.latest_evaluation_timestamp, + start_timestamp=context.previous_evaluation_timestamp, cron_string=self.cron_schedule, execution_timezone=self.timezone, ): - if dt > context.root_context.evaluation_time: + if dt > context.evaluation_time: break missed_ticks.append(dt) return missed_ticks @@ -345,7 +345,7 @@ def get_new_asset_partitions_to_request( return { AssetKeyPartitionKey(context.asset_key, partition_key) for partition_key in partitions_def.get_partition_keys( - current_time=context.root_context.evaluation_time, + current_time=context.evaluation_time, dynamic_partitions_store=context.instance_queryer, ) } @@ -436,7 +436,7 @@ def passes( asset_partitions_by_latest_run_id: Dict[str, Set[AssetKeyPartitionKey]] = defaultdict(set) for asset_partition in asset_partitions: - if context.root_context.will_update_asset_partition(asset_partition): + if context.will_update_asset_partition(asset_partition): will_update_asset_partitions.add(asset_partition) else: record = context.instance_queryer.get_latest_materialization_or_observation_record( @@ -471,7 +471,7 @@ def passes( self.latest_run_required_tags.items() <= { AUTO_MATERIALIZE_TAG: "true", - **context.root_context.daemon_context.auto_materialize_run_tags, + **context.daemon_context.auto_materialize_run_tags, }.items() ): return will_update_asset_partitions | updated_partitions_with_required_tags @@ -530,7 +530,7 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv parent_asset_partitions, # do a precise check for updated parents, factoring in data versions, as long as # we're within reasonable limits on the number of partitions to check - respect_materialization_data_versions=context.root_context.daemon_context.respect_materialization_data_versions + respect_materialization_data_versions=context.daemon_context.respect_materialization_data_versions and len(parent_asset_partitions) + subset_to_evaluate.size < 100, # ignore self-dependencies when checking for updated parents, to avoid historical # rematerializations from causing a chain of materializations to be kicked off @@ -540,7 +540,7 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv asset_partitions_by_updated_parents[parent].add(asset_partition) for parent in parent_asset_partitions: - if context.root_context.will_update_asset_partition(parent): + if context.will_update_asset_partition(parent): asset_partitions_by_will_update_parents[parent].add(asset_partition) updated_and_will_update_parents = ( @@ -612,7 +612,7 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv with updated parents. """ missing_asset_partitions = set( - context.root_context.never_materialized_requested_or_discarded_root_subset.asset_partitions + context.never_materialized_requested_or_discarded_root_subset.asset_partitions ) # in addition to missing root asset partitions, check any asset partitions with updated # parents to see if they're missing @@ -653,9 +653,7 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv for candidate in subset_to_evaluate.asset_partitions: outdated_ancestors = set() # find the root cause of why this asset partition's parents are outdated (if any) - for ( - parent - ) in context.root_context.get_parents_that_will_not_be_materialized_on_current_tick( + for parent in context.get_parents_that_will_not_be_materialized_on_current_tick( asset_partition=candidate ): if context.instance_queryer.have_ignorable_partition_mapping_for_outdated( @@ -700,15 +698,13 @@ def evaluate_for_asset( ) for candidate in subset_to_evaluate.asset_partitions: missing_parent_asset_keys = set() - for ( - parent - ) in context.root_context.get_parents_that_will_not_be_materialized_on_current_tick( + for parent in context.get_parents_that_will_not_be_materialized_on_current_tick( asset_partition=candidate ): # ignore non-observable sources, which will never have a materialization or observation - if context.root_context.asset_graph.is_source( + if context.asset_graph.is_source( parent.asset_key - ) and not context.root_context.asset_graph.is_observable(parent.asset_key): + ) and not context.asset_graph.is_observable(parent.asset_key): continue if not context.instance_queryer.asset_partition_has_materialization_or_observation( parent @@ -779,10 +775,10 @@ def evaluate_for_asset( context.instance_queryer.get_parent_asset_partitions_updated_after_child( candidate, parent_partitions, - context.root_context.daemon_context.respect_materialization_data_versions, + context.daemon_context.respect_materialization_data_versions, ignored_parent_keys=set(), ) - | context.root_context.parent_will_update_subset.asset_partitions + | context.parent_will_update_subset.asset_partitions ) if self.require_update_for_all_parent_partitions: @@ -871,7 +867,7 @@ def description(self) -> str: def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: backfilling_subset = ( context.instance_queryer.get_active_backfill_target_asset_graph_subset() - ).get_asset_subset(context.asset_key, context.root_context.asset_graph) + ).get_asset_subset(context.asset_key, context.asset_graph) if backfilling_subset.size == 0: return context.empty_subset(), [] diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index 0bf9e7ae5231c..8d79a567fe617 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -1,3 +1,4 @@ +import hashlib import operator from abc import ABC, abstractproperty from collections import defaultdict @@ -145,10 +146,13 @@ def _asset_condition_snapshot_from_rule_snapshot( ) -> "AssetConditionSnapshot": from .asset_condition import AssetConditionSnapshot, RuleCondition + unique_id_parts = [rule_snapshot.class_name, rule_snapshot.description] + unique_id = hashlib.md5("".join(unique_id_parts).encode()).hexdigest() + return AssetConditionSnapshot( class_name=RuleCondition.__name__, description=rule_snapshot.description, - child_hashes=[], + unique_id=unique_id, ) def _get_child_rule_evaluation( @@ -162,16 +166,10 @@ def _get_child_rule_evaluation( ) -> "AssetConditionEvaluation": from .asset_condition import ( AssetConditionEvaluation, - AssetConditionSnapshot, AssetSubsetWithMetadata, - RuleCondition, ) - condition_snapshot = AssetConditionSnapshot( - class_name=RuleCondition.__name__, - description=rule_snapshot.description, - child_hashes=[], - ) + condition_snapshot = self._asset_condition_snapshot_from_rule_snapshot(rule_snapshot) if is_partitioned: # for partitioned assets, we can't deserialize SerializedPartitionsSubset into an @@ -235,12 +233,13 @@ def _get_child_decision_type_evaluation( return None evaluation = child_evaluations[0] else: + unique_id_parts = [ + OrAssetCondition.__name__, + *[e.condition_snapshot.unique_id for e in child_evaluations], + ] + unique_id = hashlib.md5("".join(unique_id_parts).encode()).hexdigest() decision_type_snapshot = AssetConditionSnapshot( - class_name=OrAssetCondition.__name__, - description="", - child_hashes=[ - child_eval.condition_snapshot.hash for child_eval in child_evaluations - ], + class_name=OrAssetCondition.__name__, description="", unique_id=unique_id ) initial = ( AssetSubset(asset_key, DefaultPartitionsSubset(set())) @@ -261,11 +260,14 @@ def _get_child_decision_type_evaluation( return evaluation # non-materialize conditions are inverted + unique_id_parts = [ + NotAssetCondition.__name__, + evaluation.condition_snapshot.unique_id, + ] + unique_id = hashlib.md5("".join(unique_id_parts).encode()).hexdigest() return AssetConditionEvaluation( condition_snapshot=AssetConditionSnapshot( - class_name=NotAssetCondition.__name__, - description="", - child_hashes=[evaluation.condition_snapshot.hash], + class_name=NotAssetCondition.__name__, description="", unique_id=unique_id ), # for partitioned assets, we don't bother calculating the true subset, as we can't # properly deserialize the inner results @@ -329,10 +331,13 @@ def unpack( ) # the top level condition is the AND of all the sub-conditions + unique_id_parts = [ + AndAssetCondition.__name__, + *[e.condition_snapshot.unique_id for e in child_evaluations], + ] + unique_id = hashlib.md5("".join(unique_id_parts).encode()).hexdigest() condition_snapshot = AssetConditionSnapshot( - class_name=AndAssetCondition.__name__, - description="", - child_hashes=[evaluation.condition_snapshot.hash for evaluation in child_evaluations], + class_name=AndAssetCondition.__name__, description="", unique_id=unique_id ) return AssetConditionEvaluation( diff --git a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py index b394dd208feab..97c9a6fa86808 100644 --- a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py +++ b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py @@ -18,7 +18,7 @@ from dagster._utils.schedules import cron_string_iterator if TYPE_CHECKING: - from .asset_condition_evaluation_context import RootAssetConditionEvaluationContext + from .asset_condition_evaluation_context import AssetConditionEvaluationContext from .auto_materialize_rule_evaluation import RuleEvaluationResults, TextRuleEvaluationData @@ -110,7 +110,7 @@ def get_execution_period_and_evaluation_data_for_policies( def get_expected_data_time_for_asset_key( - context: "RootAssetConditionEvaluationContext", will_materialize: bool + context: "AssetConditionEvaluationContext", will_materialize: bool ) -> Optional[datetime.datetime]: """Returns the data time that you would expect this asset to have if you were to execute it on this tick. @@ -153,7 +153,7 @@ def get_expected_data_time_for_asset_key( def freshness_evaluation_results_for_asset_key( - context: "RootAssetConditionEvaluationContext", + context: "AssetConditionEvaluationContext", ) -> "RuleEvaluationResults": """Returns a set of AssetKeyPartitionKeys to materialize in order to abide by the given FreshnessPolicies. diff --git a/python_modules/dagster/dagster/_utils/test/schedule_storage.py b/python_modules/dagster/dagster/_utils/test/schedule_storage.py index 31d1318990498..f6ba22529b112 100644 --- a/python_modules/dagster/dagster/_utils/test/schedule_storage.py +++ b/python_modules/dagster/dagster/_utils/test/schedule_storage.py @@ -731,7 +731,7 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: if not self.can_store_auto_materialize_asset_evaluations(): pytest.skip("Storage cannot store auto materialize asset evaluations") - condition_snapshot = AssetConditionSnapshot("foo", "bar", []) + condition_snapshot = AssetConditionSnapshot("foo", "bar", "") for _ in range(2): # test idempotency storage.add_auto_materialize_asset_evaluations( @@ -816,13 +816,13 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: # add a mix of keys - one that already is using the unique index and one that is not eval_one = AssetConditionEvaluation( - condition_snapshot=AssetConditionSnapshot("foo", "bar", []), + condition_snapshot=AssetConditionSnapshot("foo", "bar", ""), true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), candidate_subset=None, ).with_run_ids(set()) eval_asset_three = AssetConditionEvaluation( - condition_snapshot=AssetConditionSnapshot("foo", "bar", []), + condition_snapshot=AssetConditionSnapshot("foo", "bar", ""), true_subset=AssetSubset(asset_key=AssetKey("asset_three"), value=True), candidate_subset=None, ).with_run_ids(set()) @@ -866,7 +866,7 @@ def test_auto_materialize_asset_evaluations_with_partitions(self, storage) -> No evaluation_id=10, asset_evaluations=[ AssetConditionEvaluation( - condition_snapshot=AssetConditionSnapshot("foo", "bar", []), + condition_snapshot=AssetConditionSnapshot("foo", "bar", ""), true_subset=asset_subset, candidate_subset=None, subsets_with_metadata=[asset_subset_with_metadata], @@ -892,7 +892,7 @@ def test_purge_asset_evaluations(self, storage) -> None: evaluation_id=11, asset_evaluations=[ AssetConditionEvaluation( - condition_snapshot=AssetConditionSnapshot("foo", "bar", []), + condition_snapshot=AssetConditionSnapshot("foo", "bar", ""), true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), candidate_subset=None, subsets_with_metadata=[], diff --git a/scripts/run-pyright.py b/scripts/run-pyright.py index 2cbd6bb7c9bf3..4d705fe2ed503 100755 --- a/scripts/run-pyright.py +++ b/scripts/run-pyright.py @@ -203,7 +203,9 @@ def get_params(args: argparse.Namespace) -> Params: elif args.diff: mode = "path" targets = ( - subprocess.check_output(["git", "diff", "--name-only", "origin/master"]) + subprocess.check_output( + ["git", "diff", "--name-only", "origin/master", "--diff-filter=d"] + ) .decode("utf-8") .splitlines() ) From 80cbb07081be736f20ae0d0387035bf758488056 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Tue, 12 Dec 2023 17:19:40 -0800 Subject: [PATCH 07/56] AssetConditionCursor --- .../_core/definitions/asset_condition.py | 61 +++++--- .../asset_condition_evaluation_context.py | 129 ++++++----------- .../_core/definitions/asset_daemon_context.py | 19 +-- .../_core/definitions/asset_daemon_cursor.py | 135 ++++++++++++++++-- .../definitions/auto_materialize_rule.py | 6 +- .../asset_daemon_scenario.py | 5 + 6 files changed, 228 insertions(+), 127 deletions(-) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py index 95951555bfbc3..592e541770548 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -13,6 +13,9 @@ ) import dagster._check as check +from dagster._core.definitions.asset_daemon_cursor import ( + AssetConditionCursorExtras, +) from dagster._core.definitions.events import AssetKey from dagster._core.definitions.metadata import MetadataMapping, MetadataValue from dagster._serdes.serdes import whitelist_for_serdes @@ -72,7 +75,7 @@ def equivalent_to_stored_evaluation(self, other: Optional["AssetConditionEvaluat and self.child_evaluations == other.child_evaluations ) - def discard_subset(self, condition: "AssetCondition") -> Optional[AssetSubset]: + def discarded_subset(self, condition: "AssetCondition") -> Optional[AssetSubset]: not_discard_condition = condition.not_discard_condition if not not_discard_condition or len(self.child_evaluations) != 3: return None @@ -81,6 +84,12 @@ def discard_subset(self, condition: "AssetCondition") -> Optional[AssetSubset]: discard_evaluation = not_discard_evaluation.child_evaluations[0] return discard_evaluation.true_subset + def get_requested_or_discarded_subset(self, condition: "AssetCondition") -> AssetSubset: + discarded_subset = self.discarded_subset(condition) + if discarded_subset is None: + return self.true_subset + return self.true_subset | discarded_subset + def for_child(self, child_condition: "AssetCondition") -> Optional["AssetConditionEvaluation"]: """Returns the evaluation of a given child condition by finding the child evaluation that has an identical hash to the given condition. @@ -129,7 +138,9 @@ def unique_id(self) -> str: return hashlib.md5("".join(parts).encode()).hexdigest() @abstractmethod - def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: + def evaluate( + self, context: AssetConditionEvaluationContext + ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: raise NotImplementedError() def __and__(self, other: "AssetCondition") -> "AssetCondition": @@ -192,7 +203,9 @@ def unique_id(self) -> str: parts = [self.rule.__class__.__name__, self.rule.description] return hashlib.md5("".join(parts).encode()).hexdigest() - def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: + def evaluate( + self, context: AssetConditionEvaluationContext + ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: context.root_context.daemon_context._verbose_log_fn( # noqa f"Evaluating rule: {self.rule.to_snapshot()}" ) @@ -205,7 +218,7 @@ def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEv true_subset=true_subset, candidate_subset=context.candidate_subset, subsets_with_metadata=subsets_with_metadata, - ) + ), [AssetConditionCursorExtras(condition_snapshot=self.snapshot, extras={})] class AndAssetCondition( @@ -214,20 +227,24 @@ class AndAssetCondition( ): """This class represents the condition that all of its children evaluate to true.""" - def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: + def evaluate( + self, context: AssetConditionEvaluationContext + ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: child_evaluations: List[AssetConditionEvaluation] = [] + child_extras: List[AssetConditionCursorExtras] = [] true_subset = context.candidate_subset for child in self.children: child_context = context.for_child(condition=child, candidate_subset=true_subset) - result = child.evaluate(child_context) - child_evaluations.append(result) - true_subset &= result.true_subset + child_evaluation, child_extra = child.evaluate(child_context) + child_evaluations.append(child_evaluation) + child_extras.extend(child_extra) + true_subset &= child_evaluation.true_subset return AssetConditionEvaluation( condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, child_evaluations=child_evaluations, - ) + ), child_extras class OrAssetCondition( @@ -236,22 +253,26 @@ class OrAssetCondition( ): """This class represents the condition that any of its children evaluate to true.""" - def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: + def evaluate( + self, context: AssetConditionEvaluationContext + ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: child_evaluations: List[AssetConditionEvaluation] = [] + child_extras: List[AssetConditionCursorExtras] = [] true_subset = context.empty_subset() for child in self.children: child_context = context.for_child( condition=child, candidate_subset=context.candidate_subset ) - result = child.evaluate(child_context) - child_evaluations.append(result) - true_subset |= result.true_subset + child_evaluation, child_extra = child.evaluate(child_context) + child_evaluations.append(child_evaluation) + child_extras.extend(child_extra) + true_subset |= child_evaluation.true_subset return AssetConditionEvaluation( condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, child_evaluations=child_evaluations, - ) + ), child_extras class NotAssetCondition( @@ -268,16 +289,18 @@ def __new__(cls, children: Sequence[AssetCondition]): def child(self) -> AssetCondition: return self.children[0] - def evaluate(self, context: AssetConditionEvaluationContext) -> AssetConditionEvaluation: + def evaluate( + self, context: AssetConditionEvaluationContext + ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: child_context = context.for_child( condition=self.child, candidate_subset=context.candidate_subset ) - result = self.child.evaluate(child_context) - true_subset = context.candidate_subset - result.true_subset + child_evaluation, child_extras = self.child.evaluate(child_context) + true_subset = context.candidate_subset - child_evaluation.true_subset return AssetConditionEvaluation( condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, - child_evaluations=[result], - ) + child_evaluations=[child_evaluation], + ), child_extras diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py index e8593ab139ec3..b5852fb37f245 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py @@ -11,14 +11,12 @@ from dagster._core.definitions.time_window_partition_mapping import TimeWindowPartitionMapping from dagster._utils.caching_instance_queryer import CachingInstanceQueryer -from .asset_daemon_cursor import AssetDaemonAssetCursor +from .asset_daemon_cursor import AssetConditionCursor from .asset_graph import AssetGraph from .asset_subset import AssetSubset if TYPE_CHECKING: - from dagster._core.definitions.asset_condition import AssetSubsetWithMetadata - - from .asset_condition import AssetCondition, AssetConditionEvaluation + from .asset_condition import AssetCondition, AssetConditionEvaluation, AssetSubsetWithMetadata from .asset_daemon_context import AssetDaemonContext @@ -41,8 +39,8 @@ class AssetConditionEvaluationContext: asset_key: AssetKey condition: "AssetCondition" - asset_cursor: Optional[AssetDaemonAssetCursor] - previous_evaluation: Optional["AssetConditionEvaluation"] + cursor: AssetConditionCursor + previous_condition_evaluation: Optional["AssetConditionEvaluation"] candidate_subset: AssetSubset instance_queryer: CachingInstanceQueryer @@ -58,21 +56,23 @@ class AssetConditionEvaluationContext: def create( asset_key: AssetKey, condition: "AssetCondition", - asset_cursor: Optional[AssetDaemonAssetCursor], + cursor: AssetConditionCursor, instance_queryer: CachingInstanceQueryer, data_time_resolver: CachingDataTimeResolver, daemon_context: "AssetDaemonContext", evaluation_results_by_key: Mapping[AssetKey, "AssetConditionEvaluation"], expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]], ) -> "AssetConditionEvaluationContext": + partitions_def = instance_queryer.asset_graph.get_partitions_def(asset_key) + return AssetConditionEvaluationContext( asset_key=asset_key, condition=condition, - asset_cursor=asset_cursor, - previous_evaluation=asset_cursor.previous_evaluation if asset_cursor else None, + cursor=cursor, + previous_condition_evaluation=cursor.previous_evaluation, candidate_subset=AssetSubset.all( asset_key, - instance_queryer.asset_graph.get_partitions_def(asset_key), + partitions_def, instance_queryer, instance_queryer.evaluation_time, ), @@ -89,10 +89,10 @@ def for_child( return dataclasses.replace( self, condition=condition, - candidate_subset=candidate_subset, - previous_evaluation=self.previous_evaluation.for_child(condition) - if self.previous_evaluation + previous_condition_evaluation=self.previous_condition_evaluation.for_child(condition) + if self.previous_condition_evaluation else None, + candidate_subset=candidate_subset, root_ref=self.root_context, ) @@ -116,15 +116,23 @@ def evaluation_time(self) -> datetime.datetime: @property def previous_max_storage_id(self) -> Optional[int]: - if not self.asset_cursor: - return None - return self.asset_cursor.previous_max_storage_id + return self.cursor.previous_max_storage_id @property def previous_evaluation_timestamp(self) -> Optional[float]: - if not self.asset_cursor: - return None - return self.asset_cursor.previous_evaluation_timestamp + return self.cursor.previous_evaluation_timestamp + + @property + def previous_true_subset(self) -> AssetSubset: + if self.previous_condition_evaluation is None: + return self.empty_subset() + return self.previous_condition_evaluation.true_subset + + @property + def previous_subsets_with_metadata(self) -> Sequence["AssetSubsetWithMetadata"]: + if self.previous_condition_evaluation is None: + return [] + return self.previous_condition_evaluation.subsets_with_metadata @functools.cached_property @root_property @@ -143,16 +151,7 @@ def parent_will_update_subset(self) -> AssetSubset: subset |= parent_subset._replace(asset_key=self.asset_key) return subset - @functools.cached_property - @root_property - def previous_tick_requested_subset(self) -> AssetSubset: - """Returns the set of asset partitions that were requested on the previous tick.""" - if not self.previous_evaluation: - return self.empty_subset() - return self.previous_evaluation.true_subset - - @functools.cached_property - @root_property + @property def materialized_since_previous_tick_subset(self) -> AssetSubset: """Returns the set of asset partitions that were materialized since the previous tick.""" return AssetSubset.from_asset_partitions_set( @@ -161,35 +160,35 @@ def materialized_since_previous_tick_subset(self) -> AssetSubset: self.instance_queryer.get_asset_partitions_updated_after_cursor( self.asset_key, asset_partitions=None, - after_cursor=self.asset_cursor.previous_max_storage_id - if self.asset_cursor - else None, + after_cursor=self.cursor.previous_max_storage_id if self.cursor else None, respect_materialization_data_versions=False, ), ) - @functools.cached_property - @root_property + @property + def previous_tick_requested_or_discarded_subset(self) -> AssetSubset: + if not self.cursor.previous_evaluation: + return self.empty_subset() + return self.cursor.previous_evaluation.get_requested_or_discarded_subset( + self.root_context.condition + ) + + @property def materialized_requested_or_discarded_since_previous_tick_subset(self) -> AssetSubset: """Returns the set of asset partitions that were materialized since the previous tick.""" - if not self.previous_evaluation: - return self.materialized_since_previous_tick_subset return ( self.materialized_since_previous_tick_subset - | self.previous_evaluation.true_subset - | (self.previous_evaluation.discard_subset(self.condition) or self.empty_subset()) + | self.previous_tick_requested_or_discarded_subset ) - @functools.cached_property - @root_property + @property def never_materialized_requested_or_discarded_root_subset(self) -> AssetSubset: if self.asset_key not in self.asset_graph.root_materializable_or_observable_asset_keys: return self.empty_subset() handled_subset = ( - self.asset_cursor.materialized_requested_or_discarded_subset - if self.asset_cursor - else self.empty_subset() + self.cursor.get_extras_value(self.condition, "handled_subset", AssetSubset) + or self.empty_subset() ) unhandled_subset = handled_subset.inverse( self.partitions_def, @@ -199,7 +198,6 @@ def never_materialized_requested_or_discarded_root_subset(self) -> AssetSubset: return unhandled_subset - self.materialized_since_previous_tick_subset @property - @root_property def parent_has_updated_subset(self) -> AssetSubset: """Returns the set of asset partitions whose parents have updated since the last time this condition was evaluated. @@ -228,21 +226,13 @@ def candidates_not_evaluated_on_previous_tick_subset(self) -> AssetSubset: """Returns the set of candidates for this tick which were not candidates on the previous tick. """ - if not self.previous_evaluation or not self.previous_evaluation.candidate_subset: + if not self.previous_condition_evaluation: return self.candidate_subset - return self.candidate_subset - self.previous_evaluation.candidate_subset - - @property - def previous_tick_subsets_with_metadata(self) -> Sequence["AssetSubsetWithMetadata"]: - """Returns the RuleEvaluationResults calculated on the previous tick for this condition.""" - return self.previous_evaluation.subsets_with_metadata if self.previous_evaluation else [] - - @property - def previous_tick_true_subset(self) -> AssetSubset: - """Returns the set of asset partitions that were true for this condition on the previous tick.""" - if not self.previous_evaluation: + # when the candidate_subset is None, this indicates that the entire asset was evaluated + # for this condition on the previous tick + elif self.previous_condition_evaluation.candidate_subset is None: return self.empty_subset() - return self.previous_evaluation.true_subset + return self.candidate_subset - self.previous_condition_evaluation.candidate_subset def materializable_in_same_run(self, child_key: AssetKey, parent_key: AssetKey) -> bool: """Returns whether a child asset can be materialized in the same run as a parent asset.""" @@ -296,28 +286,3 @@ def will_update_asset_partition(self, asset_partition: AssetKeyPartitionKey) -> def empty_subset(self) -> AssetSubset: return AssetSubset.empty(self.asset_key, self.partitions_def) - - def get_new_asset_cursor( - self, evaluation: "AssetConditionEvaluation" - ) -> AssetDaemonAssetCursor: - """Returns a new AssetDaemonAssetCursor based on the current cursor and the results of - this tick's evaluation. - """ - previous_handled_subset = ( - self.asset_cursor.materialized_requested_or_discarded_subset - if self.asset_cursor - else self.empty_subset() - ) - new_handled_subset = ( - previous_handled_subset - | self.materialized_requested_or_discarded_since_previous_tick_subset - | evaluation.true_subset - | (evaluation.discard_subset(self.condition) or self.empty_subset()) - ) - return AssetDaemonAssetCursor( - asset_key=self.asset_key, - previous_max_storage_id=self.daemon_context.get_new_latest_storage_id(), - previous_evaluation=evaluation, - previous_evaluation_timestamp=self.evaluation_time.timestamp(), - materialized_requested_or_discarded_subset=new_handled_subset, - ) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py index 36feb348b25ee..c6ebc6c22addd 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py @@ -39,7 +39,7 @@ from .asset_condition_evaluation_context import ( AssetConditionEvaluationContext, ) -from .asset_daemon_cursor import AssetDaemonAssetCursor, AssetDaemonCursor +from .asset_daemon_cursor import AssetConditionCursor, AssetDaemonCursor from .asset_graph import AssetGraph from .auto_materialize_rule import AutoMaterializeRule from .backfill_policy import BackfillPolicy, BackfillPolicyType @@ -223,7 +223,7 @@ def evaluate_asset( asset_key: AssetKey, evaluation_results_by_key: Mapping[AssetKey, AssetConditionEvaluation], expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]], - ) -> Tuple[AssetConditionEvaluation, AssetDaemonAssetCursor, Optional[datetime.datetime]]: + ) -> Tuple[AssetConditionEvaluation, AssetConditionCursor, Optional[datetime.datetime]]: """Evaluates the auto materialize policy of a given asset key. Params: @@ -241,9 +241,11 @@ def evaluate_asset( self.asset_graph.auto_materialize_policies_by_key.get(asset_key) ).to_asset_condition() + asset_cursor = self.cursor.asset_cursor_for_key(asset_key, self.asset_graph) + context = AssetConditionEvaluationContext.create( asset_key=asset_key, - asset_cursor=self.cursor.asset_cursor_for_key(asset_key, self.asset_graph), + cursor=self.cursor.asset_cursor_for_key(asset_key, self.asset_graph), condition=asset_condition, instance_queryer=self.instance_queryer, data_time_resolver=self.data_time_resolver, @@ -252,26 +254,27 @@ def evaluate_asset( expected_data_time_mapping=expected_data_time_mapping, ) - evaluation = asset_condition.evaluate(context) - asset_cursor = context.get_new_asset_cursor(evaluation=evaluation) + evaluation, condition_cursor = asset_condition.evaluate(context) + + new_asset_cursor = asset_cursor.with_updates(context, evaluation) expected_data_time = get_expected_data_time_for_asset_key( context, will_materialize=evaluation.true_subset.size > 0 ) - return evaluation, asset_cursor, expected_data_time + return evaluation, new_asset_cursor, expected_data_time def get_asset_condition_evaluations( self, ) -> Tuple[ Sequence[AssetConditionEvaluation], - Sequence[AssetDaemonAssetCursor], + Sequence[AssetConditionCursor], AbstractSet[AssetKeyPartitionKey], ]: """Returns a mapping from asset key to the AutoMaterializeAssetEvaluation for that key, a sequence of new per-asset cursors, and the set of all asset partitions that should be materialized or discarded this tick. """ - asset_cursors: List[AssetDaemonAssetCursor] = [] + asset_cursors: List[AssetConditionCursor] = [] evaluation_results_by_key: Dict[AssetKey, AssetConditionEvaluation] = {} expected_data_time_mapping: Dict[AssetKey, Optional[datetime.datetime]] = defaultdict() diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py index eb0ac555ad596..679d28ac82b6b 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py @@ -9,6 +9,8 @@ NamedTuple, Optional, Sequence, + Type, + TypeVar, ) import dagster._check as check @@ -17,26 +19,121 @@ TimeWindowPartitionsDefinition, TimeWindowPartitionsSubset, ) -from dagster._serdes.serdes import deserialize_value, serialize_value, whitelist_for_serdes +from dagster._serdes.serdes import ( + PackableValue, + deserialize_value, + serialize_value, + whitelist_for_serdes, +) from .asset_graph import AssetGraph from .asset_subset import AssetSubset -from .partition import PartitionsSubset +from .partition import PartitionsDefinition, PartitionsSubset if TYPE_CHECKING: - from .asset_condition import AssetConditionEvaluation + from .asset_condition import AssetCondition, AssetConditionEvaluation, AssetConditionSnapshot + from .asset_condition_evaluation_context import AssetConditionEvaluationContext + +ExtrasDict = Mapping[str, PackableValue] + +T = TypeVar("T") + + +def _get_placeholder_missing_condition() -> "AssetCondition": + """Temporary hard-coding of the hash of the "materialize on missing" condition. This will + no longer be necessary once we start serializing the AssetDaemonCursor. + """ + from .asset_condition import RuleCondition + from .auto_materialize_rule import MaterializeOnMissingRule + + return RuleCondition(MaterializeOnMissingRule()) + +_PLACEHOLDER_HANDLED_SUBSET_KEY = "handled_subset" -class AssetDaemonAssetCursor(NamedTuple): + +class AssetConditionCursorExtras(NamedTuple): + """Class to represent additional unstructured information that may be tracked by a particular + asset condition. + """ + + condition_snapshot: "AssetConditionSnapshot" + extras: ExtrasDict + + +class AssetConditionCursor(NamedTuple): """Convenience class to represent the state of an individual asset being handled by the daemon. In the future, this will be serialized as part of the cursor. """ asset_key: AssetKey + previous_evaluation: Optional["AssetConditionEvaluation"] previous_max_storage_id: Optional[int] previous_evaluation_timestamp: Optional[float] - previous_evaluation: Optional["AssetConditionEvaluation"] - materialized_requested_or_discarded_subset: AssetSubset + + extras: Sequence[AssetConditionCursorExtras] + + @staticmethod + def empty(asset_key: AssetKey) -> "AssetConditionCursor": + return AssetConditionCursor( + asset_key=asset_key, + previous_evaluation=None, + previous_max_storage_id=None, + previous_evaluation_timestamp=None, + extras=[], + ) + + def get_extras_value( + self, condition: "AssetCondition", key: str, as_type: Type[T] + ) -> Optional[T]: + """Returns a value from the extras dict for the given condition, if it exists and is of the + expected type. Otherwise, returns None. + """ + for condition_extras in self.extras: + if condition_extras.condition_snapshot == condition.snapshot: + extras_value = condition_extras.extras.get(key) + if isinstance(extras_value, as_type): + return extras_value + return None + return None + + def get_previous_requested_or_discarded_subset( + self, condition: "AssetCondition", partitions_def: Optional[PartitionsDefinition] + ) -> AssetSubset: + if not self.previous_evaluation: + return AssetSubset.empty(self.asset_key, partitions_def) + return self.previous_evaluation.get_requested_or_discarded_subset(condition) + + @property + def handled_subset(self) -> Optional[AssetSubset]: + return self.get_extras_value( + condition=_get_placeholder_missing_condition(), + key=_PLACEHOLDER_HANDLED_SUBSET_KEY, + as_type=AssetSubset, + ) + + def with_updates( + self, context: "AssetConditionEvaluationContext", evaluation: "AssetConditionEvaluation" + ) -> "AssetConditionCursor": + newly_materialized_requested_or_discarded_subset = ( + context.materialized_since_previous_tick_subset + | evaluation.get_requested_or_discarded_subset(context.condition) + ) + + handled_subset = ( + self.handled_subset or context.empty_subset() + ) | newly_materialized_requested_or_discarded_subset + + # for now, hard-code the materialized_requested_or_discarded_subset location + return self._replace( + previous_evaluation=evaluation, + extras=[ + AssetConditionCursorExtras( + condition_snapshot=_get_placeholder_missing_condition().snapshot, + extras={_PLACEHOLDER_HANDLED_SUBSET_KEY: handled_subset}, + ) + ], + ) class AssetDaemonCursor(NamedTuple): @@ -71,7 +168,7 @@ def was_previously_handled(self, asset_key: AssetKey) -> bool: def asset_cursor_for_key( self, asset_key: AssetKey, asset_graph: AssetGraph - ) -> AssetDaemonAssetCursor: + ) -> AssetConditionCursor: partitions_def = asset_graph.get_partitions_def(asset_key) handled_partitions_subset = self.handled_root_partitions_by_asset_key.get(asset_key) if handled_partitions_subset is not None: @@ -80,12 +177,19 @@ def asset_cursor_for_key( handled_subset = AssetSubset(asset_key=asset_key, value=True) else: handled_subset = AssetSubset.empty(asset_key, partitions_def) - return AssetDaemonAssetCursor( + + previous_evaluation = self.latest_evaluation_by_asset_key.get(asset_key) + return AssetConditionCursor( asset_key=asset_key, + previous_evaluation=previous_evaluation, previous_max_storage_id=self.latest_storage_id, previous_evaluation_timestamp=self.latest_evaluation_timestamp, - previous_evaluation=self.latest_evaluation_by_asset_key.get(asset_key), - materialized_requested_or_discarded_subset=handled_subset, + extras=[ + AssetConditionCursorExtras( + condition_snapshot=_get_placeholder_missing_condition().snapshot, + extras={"handled_subset": handled_subset}, + ) + ], ) def with_updates( @@ -96,7 +200,7 @@ def with_updates( observe_request_timestamp: float, evaluations: Sequence["AssetConditionEvaluation"], evaluation_time: datetime.datetime, - asset_cursors: Sequence[AssetDaemonAssetCursor], + asset_cursors: Sequence[AssetConditionCursor], ) -> "AssetDaemonCursor": """Returns a cursor that represents this cursor plus the updates that have happened within the tick. @@ -124,13 +228,14 @@ def with_updates( handled_root_asset_keys={ cursor.asset_key for cursor in asset_cursors - if not cursor.materialized_requested_or_discarded_subset.is_partitioned - and cursor.materialized_requested_or_discarded_subset.bool_value + if cursor.handled_subset is not None + and not cursor.handled_subset.is_partitioned + and cursor.handled_subset.bool_value }, handled_root_partitions_by_asset_key={ - cursor.asset_key: cursor.materialized_requested_or_discarded_subset.subset_value + cursor.asset_key: cursor.handled_subset.subset_value for cursor in asset_cursors - if cursor.materialized_requested_or_discarded_subset.is_partitioned + if cursor.handled_subset is not None and cursor.handled_subset.is_partitioned }, evaluation_id=evaluation_id, last_observe_request_timestamp_by_asset_key=result_last_observe_request_timestamp_by_asset_key, diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py index 7fa3da5bb6b06..012dc7727ec4e 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py @@ -109,7 +109,7 @@ def add_evaluation_data_from_previous_tick( # we've explicitly said to ignore it ignore_subset = has_metadata_subset | ignore_subset - for elt in context.previous_tick_subsets_with_metadata: + for elt in context.previous_subsets_with_metadata or []: carry_forward_subset = elt.subset - ignore_subset if carry_forward_subset.size > 0: mapping[elt.frozen_metadata] |= carry_forward_subset @@ -396,7 +396,7 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv asset_subset_to_request = AssetSubset.from_asset_partitions_set( context.asset_key, context.partitions_def, new_asset_partitions_to_request ) | ( - context.previous_tick_true_subset + context.previous_true_subset - context.materialized_requested_or_discarded_since_previous_tick_subset ) @@ -626,7 +626,7 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv context.asset_key, context.partitions_def, missing_asset_partitions ) missing_subset = newly_missing_subset | ( - context.previous_tick_true_subset + context.previous_true_subset - context.materialized_requested_or_discarded_since_previous_tick_subset ) return missing_subset, [] diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py index d8de5923609d1..3371b38db08c0 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py @@ -213,6 +213,7 @@ class AssetDaemonScenarioState(NamedTuple): serialized_cursor: str = AssetDaemonCursor.empty().serialize() evaluations: Sequence[AssetConditionEvaluation] = [] logger: logging.Logger = logging.getLogger("dagster.amp") + tick_index: int = 1 # this is set by the scenario runner scenario_instance: Optional[DagsterInstance] = None is_daemon: bool = False @@ -501,6 +502,9 @@ def _evaluate_tick_daemon( return new_run_requests, new_cursor, new_evaluations def evaluate_tick(self) -> "AssetDaemonScenarioState": + self.logger.critical("********************************") + self.logger.critical(f"EVALUATING TICK {self.tick_index}") + self.logger.critical("********************************") with pendulum.test(self.current_time): if self.is_daemon: ( @@ -515,6 +519,7 @@ def evaluate_tick(self) -> "AssetDaemonScenarioState": run_requests=new_run_requests, serialized_cursor=new_cursor.serialize(), evaluations=new_evaluations, + tick_index=self.tick_index + 1, ) def _log_assertion_error(self, expected: Sequence[Any], actual: Sequence[Any]) -> None: From e4595f03f89b904864c7ebd47762aaaee77db5cb Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Thu, 11 Jan 2024 14:21:13 -0800 Subject: [PATCH 08/56] move some changes from 8/n --- .../dagster/dagster/_core/definitions/asset_condition.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py index 592e541770548..145b0698c1c8e 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -76,6 +76,10 @@ def equivalent_to_stored_evaluation(self, other: Optional["AssetConditionEvaluat ) def discarded_subset(self, condition: "AssetCondition") -> Optional[AssetSubset]: + """Returns the AssetSubset representing asset partitions that were discarded during this + evaluation. Note that 'discarding' is a deprecated concept that is only used for backwards + compatibility. + """ not_discard_condition = condition.not_discard_condition if not not_discard_condition or len(self.child_evaluations) != 3: return None @@ -88,7 +92,8 @@ def get_requested_or_discarded_subset(self, condition: "AssetCondition") -> Asse discarded_subset = self.discarded_subset(condition) if discarded_subset is None: return self.true_subset - return self.true_subset | discarded_subset + else: + return self.true_subset | discarded_subset def for_child(self, child_condition: "AssetCondition") -> Optional["AssetConditionEvaluation"]: """Returns the evaluation of a given child condition by finding the child evaluation that From 9f7fe6e8fa1e30786049eb2ce727be07b918eff8 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Fri, 15 Dec 2023 10:12:13 -0800 Subject: [PATCH 09/56] Make AssetDaemonCursor whitelisted for serdes --- ...test_auto_materialize_asset_evaluations.py | 15 +- .../_core/definitions/asset_condition.py | 32 +- .../asset_condition_evaluation_context.py | 53 +- .../_core/definitions/asset_daemon_context.py | 68 +-- .../_core/definitions/asset_daemon_cursor.py | 490 +++++++----------- .../dagster/_core/definitions/asset_subset.py | 1 + .../definitions/auto_materialize_rule.py | 68 +-- .../auto_materialize_rule_evaluation.py | 82 ++- .../freshness_based_auto_materialize.py | 14 +- .../dagster/dagster/_daemon/asset_daemon.py | 120 +++-- .../asset_daemon_scenario.py | 53 +- .../auto_materialize_tests/base_scenario.py | 14 +- .../scenarios/active_run_scenarios.py | 4 +- .../scenarios/basic_scenarios.py | 7 +- .../scenarios/blocking_check_scenarios.py | 2 +- .../scenarios/partition_scenarios.py | 5 +- .../test_asset_daemon.py | 7 +- .../test_asset_daemon_cursor.py | 60 +-- .../auto_materialize_tests/test_scenarios.py | 2 + .../updated_scenarios/basic_scenarios.py | 2 +- .../cursor_migration_scenarios.py | 54 ++ .../updated_scenarios/partition_scenarios.py | 38 +- 22 files changed, 608 insertions(+), 583 deletions(-) create mode 100644 python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cursor_migration_scenarios.py diff --git a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py index 2dc470151648b..0ffcd2a846c04 100644 --- a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py +++ b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py @@ -6,7 +6,6 @@ from dagster import AssetKey, RunRequest from dagster._core.definitions.asset_daemon_cursor import ( AssetDaemonCursor, - LegacyAssetDaemonCursorWrapper, ) from dagster._core.definitions.run_request import ( InstigatorType, @@ -28,8 +27,10 @@ _PRE_SENSOR_AUTO_MATERIALIZE_INSTIGATOR_NAME, _PRE_SENSOR_AUTO_MATERIALIZE_ORIGIN_ID, _PRE_SENSOR_AUTO_MATERIALIZE_SELECTOR_ID, + asset_daemon_cursor_to_instigator_serialized_cursor, ) from dagster._serdes import deserialize_value +from dagster._serdes.serdes import serialize_value from dagster_graphql.test.utils import execute_dagster_graphql, infer_repository from dagster_graphql_tests.graphql.graphql_context_test_suite import ( @@ -352,9 +353,9 @@ def test_automation_policy_sensor(self, graphql_context: WorkspaceRequestContext status=InstigatorStatus.RUNNING, instigator_data=SensorInstigatorData( sensor_type=SensorType.AUTOMATION_POLICY, - cursor=LegacyAssetDaemonCursorWrapper( - AssetDaemonCursor.empty()._replace(evaluation_id=12345).serialize() - ).to_compressed(), + cursor=asset_daemon_cursor_to_instigator_serialized_cursor( + AssetDaemonCursor.empty(12345) + ), ), ) ) @@ -708,7 +709,7 @@ def _test_get_evaluations_with_partitions(self, graphql_context: WorkspaceReques def _test_current_evaluation_id(self, graphql_context: WorkspaceRequestContext): graphql_context.instance.daemon_cursor_storage.set_cursor_values( - {_PRE_SENSOR_AUTO_MATERIALIZE_CURSOR_KEY: AssetDaemonCursor.empty().serialize()} + {_PRE_SENSOR_AUTO_MATERIALIZE_CURSOR_KEY: serialize_value(AssetDaemonCursor.empty(0))} ) results = execute_dagster_graphql( @@ -728,9 +729,7 @@ def _test_current_evaluation_id(self, graphql_context: WorkspaceRequestContext): graphql_context.instance.daemon_cursor_storage.set_cursor_values( { _PRE_SENSOR_AUTO_MATERIALIZE_CURSOR_KEY: ( - AssetDaemonCursor.empty() - .with_updates(0, set(), set(), set(), {}, 42, None, [], 0) # type: ignore - .serialize() + serialize_value(AssetDaemonCursor.empty(0).with_updates(0, 1.0, [], [])) ) } ) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py index 145b0698c1c8e..e411eec9f6851 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -1,6 +1,6 @@ import functools import hashlib -from abc import ABC, abstractmethod +from abc import ABC, abstractmethod, abstractproperty from typing import ( TYPE_CHECKING, AbstractSet, @@ -148,6 +148,10 @@ def evaluate( ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: raise NotImplementedError() + @abstractproperty + def description(self) -> str: + raise NotImplementedError() + def __and__(self, other: "AssetCondition") -> "AssetCondition": # group AndAutomationConditions together if isinstance(self, AndAssetCondition): @@ -192,7 +196,7 @@ def snapshot(self) -> AssetConditionSnapshot: """Returns a snapshot of this condition that can be used for serialization.""" return AssetConditionSnapshot( class_name=self.__class__.__name__, - description=str(self), + description=self.description, unique_id=self.unique_id, ) @@ -205,25 +209,29 @@ class RuleCondition( @property def unique_id(self) -> str: - parts = [self.rule.__class__.__name__, self.rule.description] + parts = [self.rule.__class__.__name__, self.description] return hashlib.md5("".join(parts).encode()).hexdigest() + @property + def description(self) -> str: + return self.rule.description + def evaluate( self, context: AssetConditionEvaluationContext ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: context.root_context.daemon_context._verbose_log_fn( # noqa f"Evaluating rule: {self.rule.to_snapshot()}" ) - true_subset, subsets_with_metadata = self.rule.evaluate_for_asset(context) + true_subset, subsets_with_metadata, extras = self.rule.evaluate_for_asset(context) context.root_context.daemon_context._verbose_log_fn( # noqa - f"Rule returned {true_subset.size} partitions" + f"Rule returned {true_subset.size} partitions" f"{true_subset}" ) return AssetConditionEvaluation( condition_snapshot=self.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, subsets_with_metadata=subsets_with_metadata, - ), [AssetConditionCursorExtras(condition_snapshot=self.snapshot, extras={})] + ), [AssetConditionCursorExtras(condition_snapshot=self.snapshot, extras=extras)] class AndAssetCondition( @@ -232,6 +240,10 @@ class AndAssetCondition( ): """This class represents the condition that all of its children evaluate to true.""" + @property + def description(self) -> str: + return "All of" + def evaluate( self, context: AssetConditionEvaluationContext ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: @@ -258,6 +270,10 @@ class OrAssetCondition( ): """This class represents the condition that any of its children evaluate to true.""" + @property + def description(self) -> str: + return "Any of" + def evaluate( self, context: AssetConditionEvaluationContext ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: @@ -290,6 +306,10 @@ def __new__(cls, children: Sequence[AssetCondition]): check.invariant(len(children) == 1) return super().__new__(cls, children) + @property + def description(self) -> str: + return "Not" + @property def child(self) -> AssetCondition: return self.children[0] diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py index b5852fb37f245..3db7b7190c533 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py @@ -151,7 +151,18 @@ def parent_will_update_subset(self) -> AssetSubset: subset |= parent_subset._replace(asset_key=self.asset_key) return subset - @property + @functools.cached_property + @root_property + def new_max_storage_id(self) -> Optional[int]: + """Returns the new max storage ID for this asset, if any.""" + # TODO: This is not a good way of doing this, as it opens us up to potential race conditions, + # but in the interest of keeping this PR simple, I'm leaving this logic as is. In the next + # PR, I'll update the code to return a "maximum observed record id" from inside the + # `get_asset_partitions_updated_after_cursor` call. + return self.instance_queryer.instance.event_log_storage.get_maximum_record_id() + + @functools.cached_property + @root_property def materialized_since_previous_tick_subset(self) -> AssetSubset: """Returns the set of asset partitions that were materialized since the previous tick.""" return AssetSubset.from_asset_partitions_set( @@ -160,44 +171,28 @@ def materialized_since_previous_tick_subset(self) -> AssetSubset: self.instance_queryer.get_asset_partitions_updated_after_cursor( self.asset_key, asset_partitions=None, - after_cursor=self.cursor.previous_max_storage_id if self.cursor else None, + after_cursor=self.cursor.previous_max_storage_id, respect_materialization_data_versions=False, ), ) @property - def previous_tick_requested_or_discarded_subset(self) -> AssetSubset: - if not self.cursor.previous_evaluation: + @root_property + def previous_tick_requested_subset(self) -> AssetSubset: + """The set of asset partitions that were requested (or discarded) on the previous tick.""" + previous_evaluation = self.cursor.previous_evaluation + if previous_evaluation is None: return self.empty_subset() - return self.cursor.previous_evaluation.get_requested_or_discarded_subset( - self.root_context.condition - ) + + return previous_evaluation.get_requested_or_discarded_subset(self.condition) @property def materialized_requested_or_discarded_since_previous_tick_subset(self) -> AssetSubset: """Returns the set of asset partitions that were materialized since the previous tick.""" - return ( - self.materialized_since_previous_tick_subset - | self.previous_tick_requested_or_discarded_subset - ) + return self.materialized_since_previous_tick_subset | self.previous_tick_requested_subset - @property - def never_materialized_requested_or_discarded_root_subset(self) -> AssetSubset: - if self.asset_key not in self.asset_graph.root_materializable_or_observable_asset_keys: - return self.empty_subset() - - handled_subset = ( - self.cursor.get_extras_value(self.condition, "handled_subset", AssetSubset) - or self.empty_subset() - ) - unhandled_subset = handled_subset.inverse( - self.partitions_def, - dynamic_partitions_store=self.instance_queryer, - current_time=self.evaluation_time, - ) - return unhandled_subset - self.materialized_since_previous_tick_subset - - @property + @functools.cached_property + @root_property def parent_has_updated_subset(self) -> AssetSubset: """Returns the set of asset partitions whose parents have updated since the last time this condition was evaluated. @@ -206,7 +201,7 @@ def parent_has_updated_subset(self) -> AssetSubset: self.asset_key, self.partitions_def, self.root_context.instance_queryer.asset_partitions_with_newly_updated_parents( - latest_storage_id=self.previous_max_storage_id, + latest_storage_id=self.cursor.previous_max_storage_id, child_asset_key=self.root_context.asset_key, map_old_time_partitions=False, ), diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py index c6ebc6c22addd..41e7a08dc3ee5 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py @@ -31,7 +31,6 @@ get_time_partitions_def, ) from dagster._core.instance import DynamicPartitionsStore -from dagster._utils.cached_method import cached_method from ... import PartitionKeyRange from ..storage.tags import ASSET_PARTITION_RANGE_END_TAG, ASSET_PARTITION_RANGE_START_TAG @@ -134,10 +133,6 @@ def cursor(self) -> AssetDaemonCursor: def asset_graph(self) -> AssetGraph: return self.instance_queryer.asset_graph - @property - def latest_storage_id(self) -> Optional[int]: - return self.cursor.latest_storage_id - @property def auto_materialize_asset_keys(self) -> AbstractSet[AssetKey]: return self._auto_materialize_asset_keys @@ -177,46 +172,6 @@ def prefetch(self) -> None: ) self.instance_queryer.prefetch_asset_records(self.asset_records_to_prefetch) self._verbose_log_fn("Done prefetching asset records.") - self._verbose_log_fn( - f"Calculated a new latest_storage_id value of {self.get_new_latest_storage_id()}.\n" - f"Precalculating updated parents for {len(self.auto_materialize_asset_keys)} assets using previous " - f"latest_storage_id of {self.latest_storage_id}." - ) - for asset_key in self.auto_materialize_asset_keys: - self.instance_queryer.asset_partitions_with_newly_updated_parents( - latest_storage_id=self.latest_storage_id, child_asset_key=asset_key - ) - self._verbose_log_fn("Done precalculating updated parents.") - - @cached_method - def get_new_latest_storage_id(self) -> Optional[int]: - """Get the latest storage id across all cached asset records. We use this method as it uses - identical data to what is used to calculate assets with updated parents, and therefore - avoids certain classes of race conditions. - """ - new_latest_storage_id = self.latest_storage_id - for asset_key in self.auto_materialize_asset_keys_and_parents: - # ignore non-observable sources - if self.asset_graph.is_source(asset_key) and not self.asset_graph.is_observable( - asset_key - ): - continue - # ignore cases where we know for sure there's no new event - if not self.instance_queryer.asset_partition_has_materialization_or_observation( - AssetKeyPartitionKey(asset_key), after_cursor=self.latest_storage_id - ): - continue - # get the latest overall storage id for this asset key - asset_latest_storage_id = ( - self.instance_queryer.get_latest_materialization_or_observation_storage_id( - AssetKeyPartitionKey(asset_key) - ) - ) - new_latest_storage_id = max( - filter(None, [new_latest_storage_id, asset_latest_storage_id]), default=None - ) - - return new_latest_storage_id def evaluate_asset( self, @@ -241,11 +196,11 @@ def evaluate_asset( self.asset_graph.auto_materialize_policies_by_key.get(asset_key) ).to_asset_condition() - asset_cursor = self.cursor.asset_cursor_for_key(asset_key, self.asset_graph) + asset_cursor = self.cursor.get_asset_cursor(asset_key) context = AssetConditionEvaluationContext.create( asset_key=asset_key, - cursor=self.cursor.asset_cursor_for_key(asset_key, self.asset_graph), + cursor=asset_cursor, condition=asset_condition, instance_queryer=self.instance_queryer, data_time_resolver=self.data_time_resolver, @@ -254,9 +209,15 @@ def evaluate_asset( expected_data_time_mapping=expected_data_time_mapping, ) - evaluation, condition_cursor = asset_condition.evaluate(context) + evaluation, extras = asset_condition.evaluate(context) - new_asset_cursor = asset_cursor.with_updates(context, evaluation) + new_asset_cursor = AssetConditionCursor( + asset_key=asset_key, + previous_max_storage_id=context.new_max_storage_id, + previous_evaluation_timestamp=context.evaluation_time.timestamp(), + previous_evaluation=evaluation, + extras=extras, + ) expected_data_time = get_expected_data_time_for_asset_key( context, will_materialize=evaluation.true_subset.size > 0 @@ -365,24 +326,21 @@ def evaluate( return ( run_requests, self.cursor.with_updates( - latest_storage_id=self.get_new_latest_storage_id(), evaluation_id=self._evaluation_id, + asset_cursors=asset_cursors, newly_observe_requested_asset_keys=[ asset_key for run_request in auto_observe_run_requests for asset_key in cast(Sequence[AssetKey], run_request.asset_selection) ], - observe_request_timestamp=observe_request_timestamp, - evaluations=evaluations, - evaluation_time=self.instance_queryer.evaluation_time, - asset_cursors=asset_cursors, + evaluation_timestamp=self.instance_queryer.evaluation_time.timestamp(), ), # only record evaluations where something changed [ evaluation for evaluation in evaluations if not evaluation.equivalent_to_stored_evaluation( - self.cursor.latest_evaluation_by_asset_key.get(evaluation.asset_key) + self.cursor.get_previous_evaluation(evaluation.asset_key) ) ], ) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py index 679d28ac82b6b..d1635599f54d1 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py @@ -1,10 +1,7 @@ -import base64 -import datetime +import functools import json -import zlib from typing import ( TYPE_CHECKING, - AbstractSet, Mapping, NamedTuple, Optional, @@ -13,57 +10,43 @@ TypeVar, ) -import dagster._check as check -from dagster._core.definitions.events import AssetKey -from dagster._core.definitions.time_window_partitions import ( - TimeWindowPartitionsDefinition, - TimeWindowPartitionsSubset, +from dagster._core.definitions.asset_graph_subset import AssetGraphSubset +from dagster._core.definitions.asset_subset import AssetSubset +from dagster._core.definitions.auto_materialize_rule_evaluation import ( + BackcompatAutoMaterializeAssetEvaluationSerializer, ) +from dagster._core.definitions.events import AssetKey +from dagster._core.definitions.partition import PartitionsDefinition from dagster._serdes.serdes import ( + _WHITELIST_MAP, PackableValue, + WhitelistMap, deserialize_value, - serialize_value, whitelist_for_serdes, ) from .asset_graph import AssetGraph -from .asset_subset import AssetSubset -from .partition import PartitionsDefinition, PartitionsSubset if TYPE_CHECKING: from .asset_condition import AssetCondition, AssetConditionEvaluation, AssetConditionSnapshot - from .asset_condition_evaluation_context import AssetConditionEvaluationContext - -ExtrasDict = Mapping[str, PackableValue] T = TypeVar("T") -def _get_placeholder_missing_condition() -> "AssetCondition": - """Temporary hard-coding of the hash of the "materialize on missing" condition. This will - no longer be necessary once we start serializing the AssetDaemonCursor. - """ - from .asset_condition import RuleCondition - from .auto_materialize_rule import MaterializeOnMissingRule - - return RuleCondition(MaterializeOnMissingRule()) - - -_PLACEHOLDER_HANDLED_SUBSET_KEY = "handled_subset" - - +@whitelist_for_serdes class AssetConditionCursorExtras(NamedTuple): - """Class to represent additional unstructured information that may be tracked by a particular - asset condition. + """Represents additional state that may be optionally saved by an AssetCondition between + evaluations. """ condition_snapshot: "AssetConditionSnapshot" - extras: ExtrasDict + extras: Mapping[str, PackableValue] +@whitelist_for_serdes class AssetConditionCursor(NamedTuple): - """Convenience class to represent the state of an individual asset being handled by the daemon. - In the future, this will be serialized as part of the cursor. + """Represents the evaluated state of an AssetConditionCursor at a certain point in time. This + information can be used to make future evaluations more efficient. """ asset_key: AssetKey @@ -104,284 +87,194 @@ def get_previous_requested_or_discarded_subset( return AssetSubset.empty(self.asset_key, partitions_def) return self.previous_evaluation.get_requested_or_discarded_subset(condition) - @property - def handled_subset(self) -> Optional[AssetSubset]: - return self.get_extras_value( - condition=_get_placeholder_missing_condition(), - key=_PLACEHOLDER_HANDLED_SUBSET_KEY, - as_type=AssetSubset, - ) - - def with_updates( - self, context: "AssetConditionEvaluationContext", evaluation: "AssetConditionEvaluation" - ) -> "AssetConditionCursor": - newly_materialized_requested_or_discarded_subset = ( - context.materialized_since_previous_tick_subset - | evaluation.get_requested_or_discarded_subset(context.condition) - ) - - handled_subset = ( - self.handled_subset or context.empty_subset() - ) | newly_materialized_requested_or_discarded_subset - - # for now, hard-code the materialized_requested_or_discarded_subset location - return self._replace( - previous_evaluation=evaluation, - extras=[ - AssetConditionCursorExtras( - condition_snapshot=_get_placeholder_missing_condition().snapshot, - extras={_PLACEHOLDER_HANDLED_SUBSET_KEY: handled_subset}, - ) - ], - ) - +@whitelist_for_serdes class AssetDaemonCursor(NamedTuple): - """State that's saved between reconciliation evaluations. + """State that's stored between daemon evaluations. Attributes: - latest_storage_id: - The latest observed storage ID across all assets. Useful for finding out what has - happened since the last tick. - handled_root_asset_keys: - Every entry is a non-partitioned asset with no parents that has been requested by this - sensor, discarded by this sensor, or has been materialized (even if not by this sensor). - handled_root_partitions_by_asset_key: - Every key is a partitioned root asset. Every value is the set of that asset's partitions - that have been requested by this sensor, discarded by this sensor, - or have been materialized (even if not by this sensor). - last_observe_request_timestamp_by_asset_key: - Every key is an observable source asset that has been auto-observed. The value is the - timestamp of the tick that requested the observation. + evaluation_id (int): The ID of the evaluation that produced this cursor. + asset_cursors (Sequence[AssetConditionCursor]): The state of each asset that the daemon + is responsible for handling. """ - latest_storage_id: Optional[int] - handled_root_asset_keys: AbstractSet[AssetKey] - handled_root_partitions_by_asset_key: Mapping[AssetKey, PartitionsSubset] evaluation_id: int + asset_cursors: Sequence[AssetConditionCursor] + last_observe_request_timestamp_by_asset_key: Mapping[AssetKey, float] - latest_evaluation_by_asset_key: Mapping[AssetKey, "AssetConditionEvaluation"] - latest_evaluation_timestamp: Optional[float] - - def was_previously_handled(self, asset_key: AssetKey) -> bool: - return asset_key in self.handled_root_asset_keys - - def asset_cursor_for_key( - self, asset_key: AssetKey, asset_graph: AssetGraph - ) -> AssetConditionCursor: - partitions_def = asset_graph.get_partitions_def(asset_key) - handled_partitions_subset = self.handled_root_partitions_by_asset_key.get(asset_key) - if handled_partitions_subset is not None: - handled_subset = AssetSubset(asset_key=asset_key, value=handled_partitions_subset) - elif asset_key in self.handled_root_asset_keys: - handled_subset = AssetSubset(asset_key=asset_key, value=True) - else: - handled_subset = AssetSubset.empty(asset_key, partitions_def) - - previous_evaluation = self.latest_evaluation_by_asset_key.get(asset_key) - return AssetConditionCursor( - asset_key=asset_key, - previous_evaluation=previous_evaluation, - previous_max_storage_id=self.latest_storage_id, - previous_evaluation_timestamp=self.latest_evaluation_timestamp, - extras=[ - AssetConditionCursorExtras( - condition_snapshot=_get_placeholder_missing_condition().snapshot, - extras={"handled_subset": handled_subset}, - ) - ], + + @staticmethod + def empty(evaluation_id: int = 0) -> "AssetDaemonCursor": + return AssetDaemonCursor( + evaluation_id=evaluation_id, + asset_cursors=[], + last_observe_request_timestamp_by_asset_key={}, ) + @property + @functools.lru_cache(maxsize=1) + def asset_cursors_by_key(self) -> Mapping[AssetKey, AssetConditionCursor]: + """Efficient lookup of asset cursors by asset key.""" + return {cursor.asset_key: cursor for cursor in self.asset_cursors} + + def get_asset_cursor(self, asset_key: AssetKey) -> AssetConditionCursor: + """Returns the AssetConditionCursor associated with the given asset key. If no stored + cursor exists, returns an empty cursor. + """ + return self.asset_cursors_by_key.get(asset_key) or AssetConditionCursor.empty(asset_key) + + def get_previous_evaluation(self, asset_key: AssetKey) -> Optional["AssetConditionEvaluation"]: + """Returns the previous AssetConditionEvaluation for a given asset key, if it exists.""" + cursor = self.get_asset_cursor(asset_key) + return cursor.previous_evaluation if cursor else None + def with_updates( self, - latest_storage_id: Optional[int], evaluation_id: int, + evaluation_timestamp: float, newly_observe_requested_asset_keys: Sequence[AssetKey], - observe_request_timestamp: float, - evaluations: Sequence["AssetConditionEvaluation"], - evaluation_time: datetime.datetime, asset_cursors: Sequence[AssetConditionCursor], ) -> "AssetDaemonCursor": - """Returns a cursor that represents this cursor plus the updates that have happened within the - tick. - """ - result_last_observe_request_timestamp_by_asset_key = { - **self.last_observe_request_timestamp_by_asset_key - } - for asset_key in newly_observe_requested_asset_keys: - result_last_observe_request_timestamp_by_asset_key[ - asset_key - ] = observe_request_timestamp - - if latest_storage_id and self.latest_storage_id: - check.invariant( - latest_storage_id >= self.latest_storage_id, - "Latest storage ID should be >= previous latest storage ID", - ) - - latest_evaluation_by_asset_key = { - evaluation.asset_key: evaluation for evaluation in evaluations - } - - return AssetDaemonCursor( - latest_storage_id=latest_storage_id or self.latest_storage_id, - handled_root_asset_keys={ - cursor.asset_key - for cursor in asset_cursors - if cursor.handled_subset is not None - and not cursor.handled_subset.is_partitioned - and cursor.handled_subset.bool_value - }, - handled_root_partitions_by_asset_key={ - cursor.asset_key: cursor.handled_subset.subset_value - for cursor in asset_cursors - if cursor.handled_subset is not None and cursor.handled_subset.is_partitioned - }, + return self._replace( evaluation_id=evaluation_id, - last_observe_request_timestamp_by_asset_key=result_last_observe_request_timestamp_by_asset_key, - latest_evaluation_by_asset_key=latest_evaluation_by_asset_key, - latest_evaluation_timestamp=evaluation_time.timestamp(), - ) - - @classmethod - def empty(cls) -> "AssetDaemonCursor": - return AssetDaemonCursor( - latest_storage_id=None, - handled_root_partitions_by_asset_key={}, - handled_root_asset_keys=set(), - evaluation_id=0, - last_observe_request_timestamp_by_asset_key={}, - latest_evaluation_by_asset_key={}, - latest_evaluation_timestamp=None, + asset_cursors=asset_cursors, + last_observe_request_timestamp_by_asset_key={ + **self.last_observe_request_timestamp_by_asset_key, + **{ + asset_key: evaluation_timestamp + for asset_key in newly_observe_requested_asset_keys + }, + }, ) - @classmethod - def from_serialized(cls, cursor: str, asset_graph: AssetGraph) -> "AssetDaemonCursor": - from .asset_condition import AssetConditionEvaluationWithRunIds - - data = json.loads(cursor) - - if isinstance(data, list): # backcompat - check.invariant(len(data) in [3, 4], "Invalid serialized cursor") - ( - latest_storage_id, - serialized_handled_root_asset_keys, - serialized_handled_root_partitions_by_asset_key, - ) = data[:3] - - evaluation_id = data[3] if len(data) == 4 else 0 - serialized_last_observe_request_timestamp_by_asset_key = {} - serialized_latest_evaluation_by_asset_key = {} - latest_evaluation_timestamp = 0 - else: - latest_storage_id = data["latest_storage_id"] - serialized_handled_root_asset_keys = data["handled_root_asset_keys"] - serialized_handled_root_partitions_by_asset_key = data[ - "handled_root_partitions_by_asset_key" - ] - evaluation_id = data["evaluation_id"] - serialized_last_observe_request_timestamp_by_asset_key = data.get( - "last_observe_request_timestamp_by_asset_key", {} + def __hash__(self) -> int: + return hash(id(self)) + + +# BACKCOMPAT + + +def get_backcompat_asset_condition_cursor( + asset_key: AssetKey, + latest_storage_id: Optional[int], + latest_timestamp: Optional[float], + latest_evaluation: Optional["AssetConditionEvaluation"], + handled_root_subset: Optional[AssetSubset], +) -> AssetConditionCursor: + """Generates an AssetDaemonCursor from information available on the old cursor format.""" + from dagster._core.definitions.asset_condition import RuleCondition + from dagster._core.definitions.auto_materialize_rule import MaterializeOnMissingRule + + return AssetConditionCursor( + asset_key=asset_key, + previous_evaluation=latest_evaluation, + previous_evaluation_timestamp=latest_timestamp, + previous_max_storage_id=latest_storage_id, + extras=[ + # the only information we need to preserve from the previous cursor is the handled + # subset + AssetConditionCursorExtras( + condition_snapshot=RuleCondition(MaterializeOnMissingRule()).snapshot, + extras={MaterializeOnMissingRule.HANDLED_SUBSET_KEY: handled_root_subset}, ) - serialized_latest_evaluation_by_asset_key = data.get( - "latest_evaluation_by_asset_key", {} + ], + ) + + +def backcompat_deserialize_asset_daemon_cursor_str( + cursor_str: str, asset_graph: Optional[AssetGraph], default_evaluation_id: int +) -> AssetDaemonCursor: + """This serves as a backcompat layer for deserializing the old cursor format. Some information + is impossible to fully recover, this will recover enough to continue operating as normal. + """ + from .asset_condition import AssetConditionEvaluationWithRunIds + + data = json.loads(cursor_str) + + if isinstance(data, list): + evaluation_id = data[0] if isinstance(data[0], int) else default_evaluation_id + return AssetDaemonCursor.empty(evaluation_id) + elif not isinstance(data, dict): + return AssetDaemonCursor.empty(default_evaluation_id) + + serialized_last_observe_request_timestamp_by_asset_key = data.get( + "last_observe_request_timestamp_by_asset_key", {} + ) + last_observe_request_timestamp_by_asset_key = { + AssetKey.from_user_string(key_str): timestamp + for key_str, timestamp in serialized_last_observe_request_timestamp_by_asset_key.items() + } + + partition_subsets_by_asset_key = {} + for key_str, serialized_str in data.get("handled_root_partitions_by_asset_key", {}).items(): + asset_key = AssetKey.from_user_string(key_str) + partitions_def = asset_graph.get_partitions_def(asset_key) if asset_graph else None + if not partitions_def: + continue + try: + partition_subsets_by_asset_key[asset_key] = partitions_def.deserialize_subset( + serialized_str ) - latest_evaluation_timestamp = data.get("latest_evaluation_timestamp", 0) - - handled_root_partitions_by_asset_key = {} - for ( - key_str, - serialized_subset, - ) in serialized_handled_root_partitions_by_asset_key.items(): - key = AssetKey.from_user_string(key_str) - if key not in asset_graph.materializable_asset_keys: - continue - - partitions_def = asset_graph.get_partitions_def(key) - if partitions_def is None: - continue - - try: - # in the case that the partitions def has changed, we may not be able to deserialize - # the corresponding subset. in this case, we just use an empty subset - subset = partitions_def.deserialize_subset(serialized_subset) - # this covers the case in which the start date has changed for a time-partitioned - # asset. in reality, we should be using the can_deserialize method but because we - # are not storing the serializable unique id, we can't do that. - if ( - isinstance(subset, TimeWindowPartitionsSubset) - and isinstance(partitions_def, TimeWindowPartitionsDefinition) - and any( - time_window.start < partitions_def.start - for time_window in subset.included_time_windows - ) - ): - subset = partitions_def.empty_subset() - except: - subset = partitions_def.empty_subset() - handled_root_partitions_by_asset_key[key] = subset - - latest_evaluation_by_asset_key = {} - for key_str, serialized_evaluation in serialized_latest_evaluation_by_asset_key.items(): - key = AssetKey.from_user_string(key_str) - deserialized_evaluation = deserialize_value(serialized_evaluation) - if isinstance(deserialized_evaluation, AssetConditionEvaluationWithRunIds): - evaluation = deserialized_evaluation.evaluation - else: - evaluation = deserialized_evaluation - latest_evaluation_by_asset_key[key] = evaluation - - return cls( - latest_storage_id=latest_storage_id, - handled_root_asset_keys={ - AssetKey.from_user_string(key_str) for key_str in serialized_handled_root_asset_keys - }, - handled_root_partitions_by_asset_key=handled_root_partitions_by_asset_key, - evaluation_id=evaluation_id, - last_observe_request_timestamp_by_asset_key={ - AssetKey.from_user_string(key_str): timestamp - for key_str, timestamp in serialized_last_observe_request_timestamp_by_asset_key.items() + except: + continue + + handled_root_asset_graph_subset = AssetGraphSubset( + non_partitioned_asset_keys={ + AssetKey.from_user_string(key_str) + for key_str in data.get("handled_root_asset_keys", set()) + }, + partitions_subsets_by_asset_key=partition_subsets_by_asset_key, + ) + + serialized_latest_evaluation_by_asset_key = data.get("latest_evaluation_by_asset_key", {}) + latest_evaluation_by_asset_key = {} + for key_str, serialized_evaluation in serialized_latest_evaluation_by_asset_key.items(): + key = AssetKey.from_user_string(key_str) + + class BackcompatDeserializer(BackcompatAutoMaterializeAssetEvaluationSerializer): + @property + def partitions_def(self) -> Optional[PartitionsDefinition]: + return asset_graph.get_partitions_def(key) if asset_graph else None + + # create a new WhitelistMap that can deserialize SerializedPartitionSubset objects stored + # on the old cursor format + whitelist_map = WhitelistMap( + object_serializers=_WHITELIST_MAP.object_serializers, + object_deserializers={ + **_WHITELIST_MAP.object_deserializers, + "AutoMaterializeAssetEvaluation": BackcompatDeserializer( + klass=AssetConditionEvaluationWithRunIds + ), }, - latest_evaluation_by_asset_key=latest_evaluation_by_asset_key, - latest_evaluation_timestamp=latest_evaluation_timestamp, + enum_serializers=_WHITELIST_MAP.enum_serializers, ) - @classmethod - def get_evaluation_id_from_serialized(cls, cursor: str) -> Optional[int]: - data = json.loads(cursor) - if isinstance(data, list): # backcompat - check.invariant(len(data) in [3, 4], "Invalid serialized cursor") - return data[3] if len(data) == 4 else None - else: - return data["evaluation_id"] - - def serialize(self) -> str: - serializable_handled_root_partitions_by_asset_key = { - key.to_user_string(): subset.serialize() - for key, subset in self.handled_root_partitions_by_asset_key.items() - } - serialized = json.dumps( - { - "latest_storage_id": self.latest_storage_id, - "handled_root_asset_keys": [ - key.to_user_string() for key in self.handled_root_asset_keys - ], - "handled_root_partitions_by_asset_key": ( - serializable_handled_root_partitions_by_asset_key - ), - "evaluation_id": self.evaluation_id, - "last_observe_request_timestamp_by_asset_key": { - key.to_user_string(): timestamp - for key, timestamp in self.last_observe_request_timestamp_by_asset_key.items() - }, - "latest_evaluation_by_asset_key": { - key.to_user_string(): serialize_value(evaluation) - for key, evaluation in self.latest_evaluation_by_asset_key.items() - }, - "latest_evaluation_timestamp": self.latest_evaluation_timestamp, - } + # these string cursors will contain AutoMaterializeAssetEvaluation objects, which get + # deserialized into AssetConditionEvaluationWithRunIds, not AssetConditionEvaluation + evaluation = deserialize_value( + serialized_evaluation, AssetConditionEvaluationWithRunIds, whitelist_map=whitelist_map + ).evaluation + latest_evaluation_by_asset_key[key] = evaluation + + asset_cursors = [] + for asset_key, latest_evaluation in latest_evaluation_by_asset_key.items(): + asset_cursors.append( + get_backcompat_asset_condition_cursor( + asset_key, + data.get("latest_storage_id"), + data.get("latest_timestamp"), + latest_evaluation, + handled_root_asset_graph_subset.get_asset_subset(asset_key, asset_graph) + if asset_graph + else None, + ) ) - return serialized + + return AssetDaemonCursor( + evaluation_id=default_evaluation_id, + asset_cursors=asset_cursors, + last_observe_request_timestamp_by_asset_key=last_observe_request_timestamp_by_asset_key, + ) @whitelist_for_serdes @@ -390,24 +283,7 @@ class LegacyAssetDaemonCursorWrapper(NamedTuple): serialized_cursor: str - def get_asset_daemon_cursor(self, asset_graph: AssetGraph) -> AssetDaemonCursor: - return AssetDaemonCursor.from_serialized(self.serialized_cursor, asset_graph) - - @staticmethod - def from_compressed(compressed: str) -> "LegacyAssetDaemonCursorWrapper": - """This method takes a b64 encoded, zlib compressed string and returns the original - BackcompatAssetDaemonEvaluationInfo object. - """ - decoded_bytes = base64.b64decode(compressed) - decompressed_bytes = zlib.decompress(decoded_bytes) - decoded_str = decompressed_bytes.decode("utf-8") - return deserialize_value(decoded_str, LegacyAssetDaemonCursorWrapper) - - def to_compressed(self) -> str: - """This method compresses the serialized cursor and returns a b64 encoded string to be - stored as a string value. - """ - serialized_bytes = serialize_value(self).encode("utf-8") - compressed_bytes = zlib.compress(serialized_bytes) - encoded_str = base64.b64encode(compressed_bytes) - return encoded_str.decode("utf-8") + def get_asset_daemon_cursor(self, asset_graph: Optional[AssetGraph]) -> AssetDaemonCursor: + return backcompat_deserialize_asset_daemon_cursor_str( + self.serialized_cursor, asset_graph, 0 + ) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_subset.py b/python_modules/dagster/dagster/_core/definitions/asset_subset.py index d41ad43a43dac..5b76926af80af 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_subset.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_subset.py @@ -126,6 +126,7 @@ def inverse( current_time: Optional[datetime.datetime] = None, dynamic_partitions_store: Optional["DynamicPartitionsStore"] = None, ) -> "AssetSubset": + """Returns the AssetSubset containing all asset partitions which are not in this AssetSubset.""" if partitions_def is None: return self._replace(value=not self.bool_value) else: diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py index 012dc7727ec4e..e840222a7d0f2 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py @@ -32,7 +32,9 @@ freshness_evaluation_results_for_asset_key, ) from dagster._core.definitions.multi_dimensional_partitions import MultiPartitionsDefinition -from dagster._core.definitions.time_window_partitions import get_time_partitions_def +from dagster._core.definitions.time_window_partitions import ( + get_time_partitions_def, +) from dagster._core.storage.dagster_run import RunsFilter from dagster._core.storage.tags import AUTO_MATERIALIZE_TAG from dagster._serdes.serdes import ( @@ -109,7 +111,7 @@ def add_evaluation_data_from_previous_tick( # we've explicitly said to ignore it ignore_subset = has_metadata_subset | ignore_subset - for elt in context.previous_subsets_with_metadata or []: + for elt in context.previous_subsets_with_metadata: carry_forward_subset = elt.subset - ignore_subset if carry_forward_subset.size > 0: mapping[elt.frozen_metadata] |= carry_forward_subset @@ -117,11 +119,12 @@ def add_evaluation_data_from_previous_tick( # for now, an asset is in the "true" subset if and only if we have some metadata for it true_subset = reduce(operator.or_, mapping.values(), context.empty_subset()) return ( - true_subset, + context.candidate_subset & true_subset, [ AssetSubsetWithMetadata(subset, dict(metadata)) for metadata, subset in mapping.items() ], + {}, ) @abstractmethod @@ -308,7 +311,7 @@ def missed_cron_ticks( self, context: AssetConditionEvaluationContext ) -> Sequence[datetime.datetime]: """Returns the cron ticks which have been missed since the previous cursor was generated.""" - if not context.previous_evaluation_timestamp: + if not context.cursor.previous_evaluation_timestamp: previous_dt = next( reverse_cron_string_iterator( end_timestamp=context.evaluation_time.timestamp(), @@ -319,7 +322,7 @@ def missed_cron_ticks( return [previous_dt] missed_ticks = [] for dt in cron_string_iterator( - start_timestamp=context.previous_evaluation_timestamp, + start_timestamp=context.cursor.previous_evaluation_timestamp, cron_string=self.cron_schedule, execution_timezone=self.timezone, ): @@ -400,7 +403,7 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv - context.materialized_requested_or_discarded_since_previous_tick_subset ) - return asset_subset_to_request, [] + return asset_subset_to_request, [], {} @whitelist_for_serdes @@ -598,6 +601,8 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv @whitelist_for_serdes class MaterializeOnMissingRule(AutoMaterializeRule, NamedTuple("_MaterializeOnMissingRule", [])): + HANDLED_SUBSET_KEY: str = "handled_subset" + @property def decision_type(self) -> AutoMaterializeDecisionType: return AutoMaterializeDecisionType.MATERIALIZE @@ -608,28 +613,27 @@ def description(self) -> str: def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: """Evaluates the set of asset partitions for this asset which are missing and were not - previously discarded. Currently only applies to root asset partitions and asset partitions - with updated parents. + previously discarded. """ - missing_asset_partitions = set( - context.never_materialized_requested_or_discarded_root_subset.asset_partitions - ) - # in addition to missing root asset partitions, check any asset partitions with updated - # parents to see if they're missing - for candidate in context.candidate_parent_has_or_will_update_subset.asset_partitions: - if not context.instance_queryer.asset_partition_has_materialization_or_observation( - candidate - ): - missing_asset_partitions |= {candidate} - - newly_missing_subset = AssetSubset.from_asset_partitions_set( - context.asset_key, context.partitions_def, missing_asset_partitions + handled_subset = ( + ( + context.cursor.get_extras_value( + context.condition, self.HANDLED_SUBSET_KEY, AssetSubset + ) + or context.empty_subset() + ) + | context.previous_tick_requested_subset + | context.materialized_since_previous_tick_subset ) - missing_subset = newly_missing_subset | ( - context.previous_true_subset - - context.materialized_requested_or_discarded_since_previous_tick_subset + unhandled_candidates = ( + context.candidate_subset + & handled_subset.inverse( + context.partitions_def, context.evaluation_time, context.instance_queryer + ) + if handled_subset.size > 0 + else context.candidate_subset ) - return missing_subset, [] + return (unhandled_candidates, [], {self.HANDLED_SUBSET_KEY: handled_subset}) @whitelist_for_serdes @@ -870,14 +874,14 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv ).get_asset_subset(context.asset_key, context.asset_graph) if backfilling_subset.size == 0: - return context.empty_subset(), [] + return context.empty_subset(), [], {} if self.all_partitions: true_subset = context.candidate_subset else: true_subset = context.candidate_subset & backfilling_subset - return true_subset, [] + return true_subset, [], {} @whitelist_for_serdes @@ -901,6 +905,10 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv )[self.limit :] ) - return AssetSubset.from_asset_partitions_set( - context.asset_key, context.partitions_def, rate_limited_asset_partitions - ), [] + return ( + AssetSubset.from_asset_partitions_set( + context.asset_key, context.partitions_def, rate_limited_asset_partitions + ), + [], + {}, + ) diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index 8d79a567fe617..1601b639c7f3b 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -9,10 +9,12 @@ AbstractSet, Dict, FrozenSet, + Mapping, NamedTuple, Optional, Sequence, Tuple, + Union, cast, ) @@ -22,13 +24,14 @@ from dagster._core.definitions.metadata import MetadataMapping, MetadataValue from dagster._serdes.serdes import ( NamedTupleSerializer, + PackableValue, UnpackContext, UnpackedValue, WhitelistMap, whitelist_for_serdes, ) -from .partition import DefaultPartitionsSubset, SerializedPartitionsSubset +from .partition import DefaultPartitionsSubset, PartitionsDefinition, SerializedPartitionsSubset if TYPE_CHECKING: from dagster._core.definitions.asset_condition import AssetSubsetWithMetadata @@ -124,7 +127,9 @@ def metadata(self) -> MetadataMapping: } -RuleEvaluationResults = Tuple[AssetSubset, Sequence["AssetSubsetWithMetadata"]] +RuleEvaluationResults = Tuple[ + AssetSubset, Sequence["AssetSubsetWithMetadata"], Mapping[str, PackableValue] +] @whitelist_for_serdes @@ -141,6 +146,41 @@ class BackcompatAutoMaterializeAssetEvaluationSerializer(NamedTupleSerializer): AssetConditionEvaluationWithRunIds objects. """ + @property + def partitions_def(self) -> Optional[PartitionsDefinition]: + """We may override this property in subclasses that are created at a point in time where + we know what the current partitions definition is. If we don't know, then we will be unable + to deserialize any SerializedPartitionsSubset objects. + """ + return None + + def _get_empty_subset(self, asset_key: AssetKey, is_partitioned: bool) -> AssetSubset: + # We know this asset is partitioned, but we don't know what its partitions def is, so we + # just use a DefaultPartitionsSubset + if is_partitioned and self.partitions_def is None: + return AssetSubset(asset_key, DefaultPartitionsSubset(set())) + else: + return AssetSubset.empty(asset_key, self.partitions_def) + + def deserialize_serialized_partitions_subset_or_none( + self, + asset_key: AssetKey, + is_partitioned: bool, + serialized: Union[None, SerializedPartitionsSubset], + ) -> AssetSubset: + if serialized is None: + # Confusingly, we used `None` to indicate "all of an unpartitioned asset" in the old + # serialization scheme + return AssetSubset(asset_key, True) + elif self.partitions_def is None or not serialized.can_deserialize(self.partitions_def): + # If we don't know the partitions def, then we can't deserialize the partitions subset, + # so we just use an empty one instead. + return self._get_empty_subset(asset_key, is_partitioned) + else: + # We are in an instance of this class that knows the partitions def, so we can + # deserialize the partitions subset + return AssetSubset(asset_key, serialized.deserialize(self.partitions_def)) + def _asset_condition_snapshot_from_rule_snapshot( self, rule_snapshot: AutoMaterializeRuleSnapshot ) -> "AssetConditionSnapshot": @@ -171,28 +211,28 @@ def _get_child_rule_evaluation( condition_snapshot = self._asset_condition_snapshot_from_rule_snapshot(rule_snapshot) - if is_partitioned: - # for partitioned assets, we can't deserialize SerializedPartitionsSubset into an - # AssetSubset, so we just return a dummy empty default partition subset - value = DefaultPartitionsSubset(set()) - else: - value = len(partition_subsets_by_condition) > 0 + subsets_with_metadata = [ + AssetSubsetWithMetadata( + subset=self.deserialize_serialized_partitions_subset_or_none( + asset_key, is_partitioned, serialized + ), + metadata=rule_evaluation.evaluation_data.metadata, + ) + for rule_evaluation, serialized in partition_subsets_by_condition + if rule_evaluation.evaluation_data + ] - true_subset = AssetSubset(asset_key, value) + true_subset = self._get_empty_subset(asset_key, is_partitioned) + for _, serialized in partition_subsets_by_condition: + true_subset |= self.deserialize_serialized_partitions_subset_or_none( + asset_key, is_partitioned, serialized + ) return AssetConditionEvaluation( condition_snapshot=condition_snapshot, true_subset=true_subset, candidate_subset=None, - subsets_with_metadata=[] - if is_partitioned - else [ - AssetSubsetWithMetadata( - subset=true_subset, metadata=rule_evaluation.evaluation_data.metadata - ) - for rule_evaluation, _ in partition_subsets_by_condition - if rule_evaluation.evaluation_data - ], + subsets_with_metadata=subsets_with_metadata, ) def _get_child_decision_type_evaluation( @@ -239,7 +279,7 @@ def _get_child_decision_type_evaluation( ] unique_id = hashlib.md5("".join(unique_id_parts).encode()).hexdigest() decision_type_snapshot = AssetConditionSnapshot( - class_name=OrAssetCondition.__name__, description="", unique_id=unique_id + class_name=OrAssetCondition.__name__, description="Any of", unique_id=unique_id ) initial = ( AssetSubset(asset_key, DefaultPartitionsSubset(set())) @@ -267,7 +307,7 @@ def _get_child_decision_type_evaluation( unique_id = hashlib.md5("".join(unique_id_parts).encode()).hexdigest() return AssetConditionEvaluation( condition_snapshot=AssetConditionSnapshot( - class_name=NotAssetCondition.__name__, description="", unique_id=unique_id + class_name=NotAssetCondition.__name__, description="Not", unique_id=unique_id ), # for partitioned assets, we don't bother calculating the true subset, as we can't # properly deserialize the inner results @@ -337,7 +377,7 @@ def unpack( ] unique_id = hashlib.md5("".join(unique_id_parts).encode()).hexdigest() condition_snapshot = AssetConditionSnapshot( - class_name=AndAssetCondition.__name__, description="", unique_id=unique_id + class_name=AndAssetCondition.__name__, description="All of", unique_id=unique_id ) return AssetConditionEvaluation( diff --git a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py index 97c9a6fa86808..99ff47c7a4b39 100644 --- a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py +++ b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py @@ -168,7 +168,7 @@ def freshness_evaluation_results_for_asset_key( if not context.asset_graph.get_downstream_freshness_policies( asset_key=asset_key ) or context.asset_graph.is_partitioned(asset_key): - return context.empty_subset(), [] + return context.empty_subset(), [], {} # figure out the current contents of this asset current_data_time = context.data_time_resolver.get_current_data_time(asset_key, current_time) @@ -181,7 +181,7 @@ def freshness_evaluation_results_for_asset_key( # if executing the asset on this tick would not change its data time, then return if current_data_time == expected_data_time: - return context.empty_subset(), [] + return context.empty_subset(), [], {} # calculate the data times you would expect after all currently-executing runs # were to successfully complete @@ -220,8 +220,10 @@ def freshness_evaluation_results_for_asset_key( and evaluation_data is not None ): all_subset = AssetSubset.all(asset_key, None) - return AssetSubset.all(asset_key, None), [ - AssetSubsetWithMetadata(all_subset, evaluation_data.metadata) - ] + return ( + AssetSubset.all(asset_key, None), + [AssetSubsetWithMetadata(all_subset, evaluation_data.metadata)], + {}, + ) else: - return context.empty_subset(), [] + return context.empty_subset(), [], {} diff --git a/python_modules/dagster/dagster/_daemon/asset_daemon.py b/python_modules/dagster/dagster/_daemon/asset_daemon.py index 96ce5b0dad3cd..4c0f931e9eaad 100644 --- a/python_modules/dagster/dagster/_daemon/asset_daemon.py +++ b/python_modules/dagster/dagster/_daemon/asset_daemon.py @@ -1,6 +1,8 @@ +import base64 import logging import sys import threading +import zlib from collections import defaultdict from types import TracebackType from typing import Dict, Optional, Sequence, Tuple, Type, cast @@ -12,6 +14,7 @@ from dagster._core.definitions.asset_daemon_cursor import ( AssetDaemonCursor, LegacyAssetDaemonCursorWrapper, + backcompat_deserialize_asset_daemon_cursor_str, ) from dagster._core.definitions.asset_graph import AssetGraph from dagster._core.definitions.external_asset_graph import ExternalAssetGraph @@ -58,6 +61,9 @@ from dagster._core.workspace.workspace import IWorkspace from dagster._daemon.daemon import DaemonIterator, DagsterDaemon from dagster._daemon.sensor import is_under_min_interval, mark_sensor_state_for_tick +from dagster._serdes import serialize_value +from dagster._serdes.errors import DeserializationError +from dagster._serdes.serdes import deserialize_value from dagster._utils import ( SingleInstigatorDebugCrashFlags, check_for_debug_crash, @@ -108,26 +114,75 @@ def get_current_evaluation_id( ) -> Optional[int]: if not sensor_origin: serialized_cursor = _get_pre_sensor_auto_materialize_serialized_cursor(instance) + if not serialized_cursor: + return None + cursor = asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor( + serialized_cursor, None + ) else: instigator_state = check.not_none(instance.schedule_storage).get_instigator_state( sensor_origin.get_id(), sensor_origin.get_selector().get_id() ) - compressed_cursor = ( + serialized_cursor = ( cast(SensorInstigatorData, instigator_state.instigator_data).cursor if instigator_state else None ) - serialized_cursor = ( - LegacyAssetDaemonCursorWrapper.from_compressed(compressed_cursor).serialized_cursor - if compressed_cursor - else None - ) + if not serialized_cursor: + return None + cursor = asset_daemon_cursor_from_instigator_serialized_cursor(serialized_cursor, None) - return ( - AssetDaemonCursor.get_evaluation_id_from_serialized(serialized_cursor) - if serialized_cursor - else None + return cursor.evaluation_id + + +def asset_daemon_cursor_to_instigator_serialized_cursor(cursor: AssetDaemonCursor) -> str: + """This method compresses the serialized cursor and returns a b64 encoded string to be stored + as a string value. + """ + # increment the version if the cursor format changes + VERSION = "0" + + serialized_bytes = serialize_value(cursor).encode("utf-8") + compressed_bytes = zlib.compress(serialized_bytes) + encoded_cursor = base64.b64encode(compressed_bytes).decode("utf-8") + return VERSION + encoded_cursor + + +def asset_daemon_cursor_from_instigator_serialized_cursor( + serialized_cursor: Optional[str], asset_graph: Optional[AssetGraph] +) -> AssetDaemonCursor: + """This method decompresses the serialized cursor and returns a deserialized cursor object, + converting from the legacy cursor format if necessary. + """ + if serialized_cursor is None: + return AssetDaemonCursor.empty() + + version, encoded_bytes = serialized_cursor[0], serialized_cursor[1:] + if version != "0": + return AssetDaemonCursor.empty() + + decoded_bytes = base64.b64decode(encoded_bytes) + decompressed_bytes = zlib.decompress(decoded_bytes) + decompressed_str = decompressed_bytes.decode("utf-8") + + deserialized_cursor = deserialize_value( + decompressed_str, (LegacyAssetDaemonCursorWrapper, AssetDaemonCursor) ) + if isinstance(deserialized_cursor, LegacyAssetDaemonCursorWrapper): + return deserialized_cursor.get_asset_daemon_cursor(asset_graph) + return deserialized_cursor + + +def asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor( + serialized_cursor: Optional[str], asset_graph: Optional[AssetGraph] +) -> AssetDaemonCursor: + if serialized_cursor is None: + return AssetDaemonCursor.empty() + + try: + return deserialize_value(serialized_cursor, AssetDaemonCursor) + except DeserializationError: + return backcompat_deserialize_asset_daemon_cursor_str(serialized_cursor, asset_graph, 0) class AutoMaterializeLaunchContext: @@ -278,16 +333,18 @@ def _initialize_evaluation_id( continue compressed_cursor = instigator_data.cursor if compressed_cursor: - stored_evaluation_id = ( - LegacyAssetDaemonCursorWrapper.from_compressed(compressed_cursor) - .get_asset_daemon_cursor(asset_graph) - .evaluation_id - ) + stored_evaluation_id = asset_daemon_cursor_from_instigator_serialized_cursor( + compressed_cursor, asset_graph + ).evaluation_id self._next_evaluation_id = max(self._next_evaluation_id, stored_evaluation_id) serialized_cursor = _get_pre_sensor_auto_materialize_serialized_cursor(instance) if serialized_cursor: - stored_cursor = AssetDaemonCursor.from_serialized(serialized_cursor, asset_graph) + stored_cursor = ( + asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor( + serialized_cursor, asset_graph + ) + ) self._next_evaluation_id = max( self._next_evaluation_id, stored_cursor.evaluation_id ) @@ -529,17 +586,12 @@ def _process_auto_materialize_tick_generator( ) if sensor: - compressed_cursor = cast( - SensorInstigatorData, - check.not_none(auto_materialize_instigator_state).instigator_data, - ).cursor - - stored_cursor: AssetDaemonCursor = ( - LegacyAssetDaemonCursorWrapper.from_compressed( - compressed_cursor - ).get_asset_daemon_cursor(asset_graph) - if compressed_cursor - else AssetDaemonCursor.empty() + stored_cursor = asset_daemon_cursor_from_instigator_serialized_cursor( + cast( + SensorInstigatorData, + check.not_none(auto_materialize_instigator_state).instigator_data, + ).cursor, + asset_graph, ) instigator_origin_id = sensor.get_external_origin().get_id() @@ -547,10 +599,8 @@ def _process_auto_materialize_tick_generator( instigator_name = sensor.name else: serialized_cursor = _get_pre_sensor_auto_materialize_serialized_cursor(instance) - stored_cursor = ( - AssetDaemonCursor.from_serialized(serialized_cursor, asset_graph) - if serialized_cursor - else AssetDaemonCursor.empty() + stored_cursor = asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor( + serialized_cursor, asset_graph ) instigator_origin_id = _PRE_SENSOR_AUTO_MATERIALIZE_ORIGIN_ID instigator_selector_id = _PRE_SENSOR_AUTO_MATERIALIZE_SELECTOR_ID @@ -718,16 +768,16 @@ def _process_auto_materialize_tick_generator( SensorInstigatorData( last_tick_timestamp=tick.timestamp, min_interval=sensor.min_interval_seconds, - cursor=LegacyAssetDaemonCursorWrapper( - new_cursor.serialize() - ).to_compressed(), + cursor=asset_daemon_cursor_to_instigator_serialized_cursor( + new_cursor + ), sensor_type=SensorType.AUTOMATION_POLICY, ) ) ) else: instance.daemon_cursor_storage.set_cursor_values( - {_PRE_SENSOR_AUTO_MATERIALIZE_CURSOR_KEY: new_cursor.serialize()} + {_PRE_SENSOR_AUTO_MATERIALIZE_CURSOR_KEY: serialize_value(new_cursor)} ) check_for_debug_crash(debug_crash_flags, "CURSOR_UPDATED") diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py index 3371b38db08c0..7d734692cd2cb 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py @@ -48,7 +48,6 @@ ) from dagster._core.definitions.asset_daemon_cursor import ( AssetDaemonCursor, - LegacyAssetDaemonCursorWrapper, ) from dagster._core.definitions.asset_graph import AssetGraph from dagster._core.definitions.asset_subset import AssetSubset @@ -84,8 +83,11 @@ _PRE_SENSOR_AUTO_MATERIALIZE_SELECTOR_ID, AssetDaemon, _get_pre_sensor_auto_materialize_serialized_cursor, + asset_daemon_cursor_from_instigator_serialized_cursor, + asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor, get_current_evaluation_id, ) +from dagster._serdes.serdes import serialize_value from .base_scenario import FAIL_TAG, run_request @@ -210,7 +212,7 @@ class AssetDaemonScenarioState(NamedTuple): asset_specs: Sequence[Union[AssetSpec, AssetSpecWithPartitionsDef]] current_time: datetime.datetime = pendulum.now("UTC") run_requests: Sequence[RunRequest] = [] - serialized_cursor: str = AssetDaemonCursor.empty().serialize() + serialized_cursor: str = serialize_value(AssetDaemonCursor.empty(0)) evaluations: Sequence[AssetConditionEvaluation] = [] logger: logging.Logger = logging.getLogger("dagster.amp") tick_index: int = 1 @@ -285,6 +287,9 @@ def with_automation_policy_sensors( ): return self._replace(automation_policy_sensors=sensors) + def with_serialized_cursor(self, serialized_cursor: str) -> "AssetDaemonScenarioState": + return self._replace(serialized_cursor=serialized_cursor) + def with_all_eager( self, max_materializations_per_minute: int = 1 ) -> "AssetDaemonScenarioState": @@ -355,7 +360,9 @@ def with_dynamic_partitions( def _evaluate_tick_fast( self, ) -> Tuple[Sequence[RunRequest], AssetDaemonCursor, Sequence[AssetConditionEvaluation]]: - cursor = AssetDaemonCursor.from_serialized(self.serialized_cursor, self.asset_graph) + cursor = asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor( + self.serialized_cursor, self.asset_graph + ) new_run_requests, new_cursor, new_evaluations = AssetDaemonContext( evaluation_id=cursor.evaluation_id + 1, @@ -458,29 +465,17 @@ def _evaluate_tick_daemon( sensor.get_external_origin_id(), sensor.selector_id ) ) - compressed_cursor = ( + new_cursor = asset_daemon_cursor_from_instigator_serialized_cursor( cast( SensorInstigatorData, check.not_none(auto_materialize_instigator_state).instigator_data, - ).cursor - or AssetDaemonCursor.empty().serialize() - ) - new_cursor = ( - LegacyAssetDaemonCursorWrapper.from_compressed( - compressed_cursor - ).get_asset_daemon_cursor(self.asset_graph) - if compressed_cursor - else AssetDaemonCursor.empty() + ).cursor, + self.asset_graph, ) else: raw_cursor = _get_pre_sensor_auto_materialize_serialized_cursor(self.instance) - new_cursor = ( - AssetDaemonCursor.from_serialized( - raw_cursor, - self.asset_graph, - ) - if raw_cursor - else AssetDaemonCursor.empty() + new_cursor = asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor( + raw_cursor, self.asset_graph ) new_run_requests = [ run_request( @@ -501,9 +496,9 @@ def _evaluate_tick_daemon( ] return new_run_requests, new_cursor, new_evaluations - def evaluate_tick(self) -> "AssetDaemonScenarioState": + def evaluate_tick(self, label: Optional[str] = None) -> "AssetDaemonScenarioState": self.logger.critical("********************************") - self.logger.critical(f"EVALUATING TICK {self.tick_index}") + self.logger.critical(f"EVALUATING TICK {label or self.tick_index}") self.logger.critical("********************************") with pendulum.test(self.current_time): if self.is_daemon: @@ -517,7 +512,7 @@ def evaluate_tick(self) -> "AssetDaemonScenarioState": return self._replace( run_requests=new_run_requests, - serialized_cursor=new_cursor.serialize(), + serialized_cursor=serialize_value(new_cursor), evaluations=new_evaluations, tick_index=self.tick_index + 1, ) @@ -691,10 +686,16 @@ def get_leaf_evaluations(e: AssetConditionEvaluation) -> Sequence[AssetCondition try: for actual_sm, expected_sm in zip( - sorted(actual_subsets_with_metadata, key=lambda x: str(x)), - sorted(expected_subsets_with_metadata, key=lambda x: str(x)), + sorted( + actual_subsets_with_metadata, + key=lambda x: (frozenset(x.subset.asset_partitions), str(x.metadata)), + ), + sorted( + expected_subsets_with_metadata, + key=lambda x: (frozenset(x.subset.asset_partitions), str(x.metadata)), + ), ): - assert actual_sm.subset == expected_sm.subset + assert actual_sm.subset.asset_partitions == expected_sm.subset.asset_partitions # only check evaluation data if it was set on the expected evaluation spec if expected_sm.metadata: assert actual_sm.metadata == expected_sm.metadata diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/base_scenario.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/base_scenario.py index 5d6eafd0901a6..f3efb0213ae0d 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/base_scenario.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/base_scenario.py @@ -50,7 +50,9 @@ AssetDaemonContext, get_implicit_auto_materialize_policy, ) -from dagster._core.definitions.asset_daemon_cursor import AssetDaemonCursor +from dagster._core.definitions.asset_daemon_cursor import ( + AssetDaemonCursor, +) from dagster._core.definitions.asset_graph import AssetGraph from dagster._core.definitions.asset_graph_subset import AssetGraphSubset from dagster._core.definitions.auto_materialize_policy import AutoMaterializePolicy @@ -79,7 +81,11 @@ create_test_daemon_workspace_context, ) from dagster._core.types.loadable_target_origin import LoadableTargetOrigin -from dagster._daemon.asset_daemon import AssetDaemon +from dagster._daemon.asset_daemon import ( + AssetDaemon, + asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor, +) +from dagster._serdes.serdes import serialize_value from dagster._utils import SingleInstigatorDebugCrashFlags @@ -340,7 +346,9 @@ def prior_repo(): ) # make sure we can deserialize it using the new asset graph - cursor = AssetDaemonCursor.from_serialized(cursor.serialize(), repo.asset_graph) + cursor = asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor( + serialize_value(cursor), repo.asset_graph + ) else: cursor = AssetDaemonCursor.empty() diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/active_run_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/active_run_scenarios.py index 879b9cebbeeb0..20d6087f769e3 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/active_run_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/active_run_scenarios.py @@ -63,7 +63,9 @@ def create_materialization_event_log_entry( active_run_scenarios = { "downstream_still_in_progress": AssetReconciliationScenario( assets=partitioned_assets, - unevaluated_runs=[], + unevaluated_runs=[ + run(["upstream_daily", "downstream_daily"], partition_key="2020-01-01"), + ], current_time=create_pendulum_time(year=2020, month=1, day=2, hour=0), # manually populate entries here to create an in-progress run for both daily assets dagster_runs=[ diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/basic_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/basic_scenarios.py index d8994e6dadf38..d96fdb419ef44 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/basic_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/basic_scenarios.py @@ -87,7 +87,10 @@ ), "multi_asset_one_parent_unreconciled": AssetReconciliationScenario( assets=multi_asset_after_fork, - unevaluated_runs=[run(["asset1", "asset2"], failed_asset_keys=["asset3"])], + unevaluated_runs=[ + run(["asset1", "asset2", "asset3"]), + run(["asset1", "asset2"], failed_asset_keys=["asset3"]), + ], expected_run_requests=[], ), ################################################################################################ @@ -95,7 +98,7 @@ ################################################################################################ "partial_run": AssetReconciliationScenario( assets=two_assets_in_sequence, - unevaluated_runs=[run(["asset1"], failed_asset_keys=["asset2"])], + unevaluated_runs=[run(["asset1", "asset2"]), run(["asset1"], failed_asset_keys=["asset2"])], expected_run_requests=[], ), "partial_run_with_another_attempt": AssetReconciliationScenario( diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/blocking_check_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/blocking_check_scenarios.py index bc31f50b3bc35..62bf46ef393a9 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/blocking_check_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/blocking_check_scenarios.py @@ -37,7 +37,7 @@ def asset3(): blocking_check_scenarios = { "blocking_check_works_inside_run": AssetReconciliationScenario( assets=[asset1_with_blocking_check, asset2, asset3], - unevaluated_runs=[run(["asset1", "asset2"])], + unevaluated_runs=[run(["asset1"]), run(["asset2"]), run(["asset1", "asset2"])], expected_run_requests=[], ), "blocking_check_doesnt_work_across_runs": AssetReconciliationScenario( diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/partition_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/partition_scenarios.py index c3312a78b4c9e..3abb1afcba4da 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/partition_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/scenarios/partition_scenarios.py @@ -196,7 +196,10 @@ ), "partial_run_partitioned": AssetReconciliationScenario( assets=two_assets_in_sequence_one_partition, - unevaluated_runs=[run(["asset1"], failed_asset_keys=["asset2"], partition_key="a")], + unevaluated_runs=[ + run(["asset1", "asset2"], partition_key="a"), + run(["asset1"], failed_asset_keys=["asset2"], partition_key="a"), + ], expected_run_requests=[], ), "partial_run_partitioned_with_another_attempt": AssetReconciliationScenario( diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon.py index 8f626951c83cf..884237dd32882 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon.py @@ -9,6 +9,7 @@ AutoMaterializeRule, DagsterInstance, instance_for_test, + serialize_value, ) from dagster._core.definitions.asset_daemon_cursor import AssetDaemonCursor from dagster._core.definitions.asset_selection import AssetSelection @@ -279,9 +280,9 @@ def test_automation_policy_sensor_ticks(): instance.daemon_cursor_storage.set_cursor_values( { - _PRE_SENSOR_AUTO_MATERIALIZE_CURSOR_KEY: AssetDaemonCursor.empty() - ._replace(evaluation_id=pre_sensor_evaluation_id) - .serialize() + _PRE_SENSOR_AUTO_MATERIALIZE_CURSOR_KEY: serialize_value( + AssetDaemonCursor.empty()._replace(evaluation_id=pre_sensor_evaluation_id) + ) } ) diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_cursor.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_cursor.py index 10d1b4ddf7995..aeefdbf4fd47e 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_cursor.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_cursor.py @@ -1,9 +1,13 @@ import datetime import json -from dagster import AssetKey, StaticPartitionsDefinition, asset +from dagster import StaticPartitionsDefinition, asset from dagster._core.definitions.asset_daemon_cursor import AssetDaemonCursor from dagster._core.definitions.asset_graph import AssetGraph +from dagster._daemon.asset_daemon import ( + asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor, +) +from dagster._serdes.serdes import serialize_value partitions = StaticPartitionsDefinition(partition_keys=["a", "b", "c"]) @@ -14,43 +18,41 @@ def my_asset(_): def test_asset_reconciliation_cursor_evaluation_id_backcompat() -> None: + # we no longer attempt to deserialize asset information from this super-old cursor format + # instead, the next tick after a transition will just start from a clean slate (preserving + # the evaluation id) backcompat_serialized = ( """[20, ["a"], {"my_asset": "{\\"version\\": 1, \\"subset\\": [\\"a\\"]}"}]""" ) - assert AssetDaemonCursor.get_evaluation_id_from_serialized(backcompat_serialized) is None + assert asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor( + backcompat_serialized, None + ) == AssetDaemonCursor.empty(20) asset_graph = AssetGraph.from_assets([my_asset]) - c = AssetDaemonCursor.from_serialized(backcompat_serialized, asset_graph) - - assert c == AssetDaemonCursor( - 20, - {AssetKey("a")}, - {AssetKey("my_asset"): partitions.empty_subset().with_partition_keys(["a"])}, - 0, - {}, - {}, - 0, + c = asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor( + backcompat_serialized, asset_graph ) - c2 = c.with_updates( - 21, - 1, - [], - 0, - [], - datetime.datetime.now(), - [], - ) + assert c == AssetDaemonCursor.empty(20) + + c2 = c.with_updates(21, datetime.datetime.now().timestamp(), [], []) - serdes_c2 = AssetDaemonCursor.from_serialized(c2.serialize(), asset_graph) + serdes_c2 = asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor( + serialize_value(c2), asset_graph + ) assert serdes_c2 == c2 - assert serdes_c2.evaluation_id == 1 + assert serdes_c2.evaluation_id == 21 - assert AssetDaemonCursor.get_evaluation_id_from_serialized(c2.serialize()) == 1 + assert ( + asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor( + serialize_value(c2), None + ).evaluation_id + == 21 + ) -def test_asset_reconciliation_cursor_auto_observe_backcompat(): +def test_asset_reconciliation_cursor_auto_observe_backcompat() -> None: partitions_def = StaticPartitionsDefinition(["a", "b", "c"]) @asset(partitions_def=partitions_def) @@ -76,9 +78,7 @@ def asset2(): ) ) - cursor = AssetDaemonCursor.from_serialized( - serialized, asset_graph=AssetGraph.from_assets([asset1, asset2]) + cursor = asset_daemon_cursor_from_pre_sensor_auto_materialize_serialized_cursor( + serialized, AssetGraph.from_assets([asset1, asset2]) ) - assert cursor.latest_storage_id == 25 - assert cursor.handled_root_asset_keys == handled_root_asset_keys - assert cursor.handled_root_partitions_by_asset_key == handled_root_partitions_by_asset_key + assert cursor.evaluation_id == 25 diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_scenarios.py index 331bb7354a520..2177e17644516 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_scenarios.py @@ -3,6 +3,7 @@ from .asset_daemon_scenario import AssetDaemonScenario from .updated_scenarios.basic_scenarios import basic_scenarios from .updated_scenarios.cron_scenarios import cron_scenarios +from .updated_scenarios.cursor_migration_scenarios import cursor_migration_scenarios from .updated_scenarios.freshness_policy_scenarios import freshness_policy_scenarios from .updated_scenarios.latest_materialization_run_tag_scenarios import ( latest_materialization_run_tag_scenarios, @@ -15,6 +16,7 @@ + freshness_policy_scenarios + partition_scenarios + latest_materialization_run_tag_scenarios + + cursor_migration_scenarios ) diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/basic_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/basic_scenarios.py index 293412db388ab..0c6071c76a225 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/basic_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/basic_scenarios.py @@ -166,7 +166,7 @@ .evaluate_tick() .assert_requested_runs() .with_runs(run_request(["A"])) - .evaluate_tick() + .evaluate_tick("a") .assert_requested_runs(run_request(["C"])) .assert_evaluation( "C", diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cursor_migration_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cursor_migration_scenarios.py new file mode 100644 index 0000000000000..91a8fe9be3be0 --- /dev/null +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cursor_migration_scenarios.py @@ -0,0 +1,54 @@ +from dagster import ( + AutoMaterializeRule, +) +from dagster._core.definitions.auto_materialize_rule import DiscardOnMaxMaterializationsExceededRule + +from ..asset_daemon_scenario import ( + AssetDaemonScenario, + AssetRuleEvaluationSpec, + day_partition_key, +) +from ..base_scenario import ( + run_request, +) +from .asset_daemon_scenario_states import ( + daily_partitions_def, + one_asset, + time_partitions_start_str, +) + +cursor_migration_scenarios = [ + AssetDaemonScenario( + id="one_asset_daily_partitions_never_materialized_respect_discards_migrate_after_discard", + initial_state=one_asset.with_asset_properties(partitions_def=daily_partitions_def) + .with_current_time(time_partitions_start_str) + .with_current_time_advanced(days=30, hours=4) + .with_all_eager(), + execution_fn=lambda state: state.evaluate_tick() + .assert_requested_runs( + run_request(asset_keys=["A"], partition_key=day_partition_key(state.current_time)) + ) + .assert_evaluation( + "A", + [ + AssetRuleEvaluationSpec( + AutoMaterializeRule.materialize_on_missing(), + [day_partition_key(state.current_time, delta=-i) for i in range(30)], + ), + AssetRuleEvaluationSpec( + DiscardOnMaxMaterializationsExceededRule(limit=1), + [day_partition_key(state.current_time, delta=-i) for i in range(1, 30)], + ), + ], + num_requested=1, + ) + .with_serialized_cursor( + # this cursor was generated by running the above scenario before the cursor changes + """{"latest_storage_id": null, "handled_root_asset_keys": [], "handled_root_partitions_by_asset_key": {"A": "{\\"version\\": 1, \\"time_windows\\": [[1357344000.0, 1359936000.0]], \\"num_partitions\\": 30}"}, "evaluation_id": 1, "last_observe_request_timestamp_by_asset_key": {}, "latest_evaluation_by_asset_key": {"A": "{\\"__class__\\": \\"AutoMaterializeAssetEvaluation\\", \\"asset_key\\": {\\"__class__\\": \\"AssetKey\\", \\"path\\": [\\"A\\"]}, \\"num_discarded\\": 29, \\"num_requested\\": 1, \\"num_skipped\\": 0, \\"partition_subsets_by_condition\\": [[{\\"__class__\\": \\"AutoMaterializeRuleEvaluation\\", \\"evaluation_data\\": null, \\"rule_snapshot\\": {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"MaterializeOnMissingRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.MATERIALIZE\\"}, \\"description\\": \\"materialization is missing\\"}}, {\\"__class__\\": \\"SerializedPartitionsSubset\\", \\"serialized_partitions_def_class_name\\": \\"DailyPartitionsDefinition\\", \\"serialized_partitions_def_unique_id\\": \\"809725ad60ffac0302d5c81f6e45865e21ec0b85\\", \\"serialized_subset\\": \\"{\\\\\\"version\\\\\\": 1, \\\\\\"time_windows\\\\\\": [[1357344000.0, 1359936000.0]], \\\\\\"num_partitions\\\\\\": 30}\\"}], [{\\"__class__\\": \\"AutoMaterializeRuleEvaluation\\", \\"evaluation_data\\": null, \\"rule_snapshot\\": {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"DiscardOnMaxMaterializationsExceededRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.DISCARD\\"}, \\"description\\": \\"exceeds 1 materialization(s) per minute\\"}}, {\\"__class__\\": \\"SerializedPartitionsSubset\\", \\"serialized_partitions_def_class_name\\": \\"DailyPartitionsDefinition\\", \\"serialized_partitions_def_unique_id\\": \\"809725ad60ffac0302d5c81f6e45865e21ec0b85\\", \\"serialized_subset\\": \\"{\\\\\\"version\\\\\\": 1, \\\\\\"time_windows\\\\\\": [[1357344000.0, 1359849600.0]], \\\\\\"num_partitions\\\\\\": 29}\\"}]], \\"rule_snapshots\\": [{\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"MaterializeOnMissingRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.MATERIALIZE\\"}, \\"description\\": \\"materialization is missing\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"SkipOnParentMissingRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.SKIP\\"}, \\"description\\": \\"waiting on upstream data to be present\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"MaterializeOnRequiredForFreshnessRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.MATERIALIZE\\"}, \\"description\\": \\"required to meet this or downstream asset's freshness policy\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"SkipOnParentOutdatedRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.SKIP\\"}, \\"description\\": \\"waiting on upstream data to be up to date\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"SkipOnRequiredButNonexistentParentsRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.SKIP\\"}, \\"description\\": \\"required parent partitions do not exist\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"MaterializeOnParentUpdatedRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.MATERIALIZE\\"}, \\"description\\": \\"upstream data has changed since latest materialization\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"SkipOnBackfillInProgressRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.SKIP\\"}, \\"description\\": \\"targeted by an in-progress backfill\\"}], \\"run_ids\\": {\\"__set__\\": []}}"}, "latest_evaluation_timestamp": 1359950400.0} + """ + ) + .evaluate_tick("a") + # the new cursor "remembers" that a bunch of partitions were discarded + .assert_requested_runs(), + ), +] diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py index 7a6b50b3043e8..21f7e5e829e89 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py @@ -508,25 +508,27 @@ ["A"], partition_key=hour_partition_key(time_partitions_start_datetime, delta=1) ) ) - .evaluate_tick() + .evaluate_tick("FOO") .assert_requested_runs() - .with_not_started_runs() + .with_not_started_runs(), + # TEMPORARILY DISABLED: this test will be re-enabled upstack. It is currently broken because + # we do not handle the case where partitions defs change in the MaterializeOnMissingRule # now the start date is updated, request the new first partition key - .with_current_time_advanced(days=5) - .with_asset_properties( - partitions_def=hourly_partitions_def._replace( - start=time_partitions_start_datetime + datetime.timedelta(days=5) - ) - ) - .evaluate_tick() - .assert_requested_runs( - run_request( - ["A"], - partition_key=hour_partition_key( - time_partitions_start_datetime + datetime.timedelta(days=5), delta=1 - ), - ) - ), + # .with_current_time_advanced(days=5) + # .with_asset_properties( + # partitions_def=hourly_partitions_def._replace( + # start=time_partitions_start_datetime + datetime.timedelta(days=5) + # ) + # ) + # .evaluate_tick("BAR") + # .assert_requested_runs( + # run_request( + # ["A"], + # partition_key=hour_partition_key( + # time_partitions_start_datetime + datetime.timedelta(days=5), delta=1 + # ), + # ) + # ), ), AssetDaemonScenario( id="one_asset_self_dependency_multi_partitions_def", @@ -656,7 +658,7 @@ ["B"], partition_key=day_partition_key(time_partitions_start_datetime, delta=1) ) ) - .evaluate_tick() + .evaluate_tick("THIS ONE") .assert_requested_runs( run_request( ["C"], From 4ce2404f3dfb7683a004557c859b30a24b8110d6 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Fri, 15 Dec 2023 15:53:41 -0800 Subject: [PATCH 10/56] Better cursor handling --- .../asset_condition_evaluation_context.py | 46 +++++++++++-------- .../dagster/_core/execution/asset_backfill.py | 4 +- .../_utils/caching_instance_queryer.py | 38 +++++++++++++-- 3 files changed, 61 insertions(+), 27 deletions(-) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py index 3db7b7190c533..c8e8bb62313a9 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py @@ -2,7 +2,7 @@ import datetime import functools from dataclasses import dataclass -from typing import TYPE_CHECKING, AbstractSet, Any, Callable, Mapping, Optional, Sequence +from typing import TYPE_CHECKING, AbstractSet, Any, Callable, Mapping, Optional, Sequence, Tuple from dagster._core.definitions.data_time import CachingDataTimeResolver from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey @@ -151,16 +151,6 @@ def parent_will_update_subset(self) -> AssetSubset: subset |= parent_subset._replace(asset_key=self.asset_key) return subset - @functools.cached_property - @root_property - def new_max_storage_id(self) -> Optional[int]: - """Returns the new max storage ID for this asset, if any.""" - # TODO: This is not a good way of doing this, as it opens us up to potential race conditions, - # but in the interest of keeping this PR simple, I'm leaving this logic as is. In the next - # PR, I'll update the code to return a "maximum observed record id" from inside the - # `get_asset_partitions_updated_after_cursor` call. - return self.instance_queryer.instance.event_log_storage.get_maximum_record_id() - @functools.cached_property @root_property def materialized_since_previous_tick_subset(self) -> AssetSubset: @@ -193,19 +183,35 @@ def materialized_requested_or_discarded_since_previous_tick_subset(self) -> Asse @functools.cached_property @root_property - def parent_has_updated_subset(self) -> AssetSubset: + def _parent_has_updated_subset_and_new_latest_storage_id( + self, + ) -> Tuple[AssetSubset, Optional[int]]: """Returns the set of asset partitions whose parents have updated since the last time this condition was evaluated. """ - return AssetSubset.from_asset_partitions_set( - self.asset_key, - self.partitions_def, - self.root_context.instance_queryer.asset_partitions_with_newly_updated_parents( - latest_storage_id=self.cursor.previous_max_storage_id, - child_asset_key=self.root_context.asset_key, - map_old_time_partitions=False, - ), + ( + asset_partitions, + cursor, + ) = self.root_context.instance_queryer.asset_partitions_with_newly_updated_parents_and_new_cursor( + latest_storage_id=self.cursor.previous_max_storage_id, + child_asset_key=self.root_context.asset_key, + map_old_time_partitions=False, ) + return AssetSubset.from_asset_partitions_set( + self.asset_key, self.partitions_def, asset_partitions + ), cursor + + @property + @root_property + def parent_has_updated_subset(self) -> AssetSubset: + subset, _ = self._parent_has_updated_subset_and_new_latest_storage_id + return subset + + @property + @root_property + def new_max_storage_id(self) -> AssetSubset: + _, storage_id = self._parent_has_updated_subset_and_new_latest_storage_id + return storage_id @property def candidate_parent_has_or_will_update_subset(self) -> AssetSubset: diff --git a/python_modules/dagster/dagster/_core/execution/asset_backfill.py b/python_modules/dagster/dagster/_core/execution/asset_backfill.py index 3f9968ad47b22..b9870eed2de2c 100644 --- a/python_modules/dagster/dagster/_core/execution/asset_backfill.py +++ b/python_modules/dagster/dagster/_core/execution/asset_backfill.py @@ -1378,10 +1378,10 @@ def execute_asset_backfill_iteration_inner( parent_materialized_asset_partitions = set().union( *( - instance_queryer.asset_partitions_with_newly_updated_parents( + instance_queryer.asset_partitions_with_newly_updated_parents_and_new_cursor( latest_storage_id=asset_backfill_data.latest_storage_id, child_asset_key=asset_key, - ) + )[0] for asset_key in asset_backfill_data.target_subset.asset_keys ) ) diff --git a/python_modules/dagster/dagster/_utils/caching_instance_queryer.py b/python_modules/dagster/dagster/_utils/caching_instance_queryer.py index b5218cc6f5a84..0b90afb7616f9 100644 --- a/python_modules/dagster/dagster/_utils/caching_instance_queryer.py +++ b/python_modules/dagster/dagster/_utils/caching_instance_queryer.py @@ -10,6 +10,7 @@ Optional, Sequence, Set, + Tuple, Union, cast, ) @@ -237,6 +238,11 @@ def get_latest_materialization_or_observation_storage_id( Args: asset_partition (AssetKeyPartitionKey): The asset partition to query. """ + if asset_partition.partition_key is None: + record = self._get_latest_materialization_or_observation_record( + asset_partition=asset_partition + ) + return record.storage_id if record else None return self._get_latest_materialization_or_observation_storage_ids_by_asset_partition( asset_key=asset_partition.asset_key ).get(asset_partition) @@ -495,29 +501,36 @@ def has_dynamic_partition(self, partitions_def_name: str, partition_key: str) -> return partition_key in self.get_dynamic_partitions(partitions_def_name) @cached_method - def asset_partitions_with_newly_updated_parents( + def asset_partitions_with_newly_updated_parents_and_new_cursor( self, *, latest_storage_id: Optional[int], child_asset_key: AssetKey, map_old_time_partitions: bool = True, - ) -> AbstractSet[AssetKeyPartitionKey]: + ) -> Tuple[AbstractSet[AssetKeyPartitionKey], Optional[int]]: """Finds asset partitions of the given child whose parents have been materialized since latest_storage_id. """ if self.asset_graph.is_source(child_asset_key): - return set() + return set(), latest_storage_id child_partitions_def = self.asset_graph.get_partitions_def(child_asset_key) child_time_partitions_def = get_time_partitions_def(child_partitions_def) child_asset_partitions_with_updated_parents = set() + + max_storage_ids = [ + self.get_latest_materialization_or_observation_storage_id( + AssetKeyPartitionKey(child_asset_key) + ) + ] for parent_asset_key in self.asset_graph.get_parents(child_asset_key): # ignore non-observable sources if self.asset_graph.is_source(parent_asset_key) and not self.asset_graph.is_observable( parent_asset_key ): continue + # if the parent has not been updated at all since the latest_storage_id, then skip if not self.get_asset_partitions_updated_after_cursor( asset_key=parent_asset_key, @@ -527,6 +540,13 @@ def asset_partitions_with_newly_updated_parents( ): continue + # keep track of the maximum storage id that we've seen for a given parent + max_storage_ids.append( + self.get_latest_materialization_or_observation_storage_id( + AssetKeyPartitionKey(parent_asset_key) + ) + ) + parent_partitions_def = self.asset_graph.get_partitions_def(parent_asset_key) if parent_partitions_def is None: latest_parent_record = check.not_none( @@ -564,7 +584,10 @@ def asset_partitions_with_newly_updated_parents( # we know a parent updated, and because the parent has a partitions def and the # child does not, the child could not have been materialized in the same run if child_partitions_def is None: - return {AssetKeyPartitionKey(child_asset_key)} + child_asset_partitions_with_updated_parents = { + AssetKeyPartitionKey(child_asset_key) + } + break # the set of asset partitions which have been updated since the latest storage id parent_partitions_subset = self.get_partitions_subset_updated_after_cursor( asset_key=parent_asset_key, after_cursor=latest_storage_id @@ -623,7 +646,12 @@ def asset_partitions_with_newly_updated_parents( ): child_asset_partitions_with_updated_parents.add(child_asset_partition) - return child_asset_partitions_with_updated_parents + # the new latest storage id will be the greatest observed storage id among this asset and + # its parents + new_latest_storage_id = max( + filter(None, [latest_storage_id, *max_storage_ids]), default=None + ) + return (child_asset_partitions_with_updated_parents, new_latest_storage_id) #################### # RECONCILIATION From f22fd8138b47a4b038fd89eeb595bf9492964d46 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Wed, 20 Dec 2023 13:22:35 -0500 Subject: [PATCH 11/56] Improve backcompat --- .../auto_materialize_asset_evaluations.py | 5 +- ...test_auto_materialize_asset_evaluations.py | 63 +++++---- .../_core/definitions/asset_condition.py | 75 +++++++++- .../asset_condition_evaluation_context.py | 12 +- .../_core/definitions/asset_daemon_cursor.py | 81 ++++++----- .../definitions/auto_materialize_rule.py | 35 ++++- .../auto_materialize_rule_evaluation.py | 87 +++++++++--- .../dagster/_core/scheduler/instigation.py | 34 +++-- .../dagster/dagster/_daemon/asset_daemon.py | 6 +- .../_utils/caching_instance_queryer.py | 33 ++++- .../dagster/_utils/test/schedule_storage.py | 53 +++++--- .../asset_daemon_scenario.py | 13 +- .../test_asset_daemon_failure_recovery.py | 16 ++- .../test_auto_materialize_asset_evaluation.py | 42 ++++-- .../cursor_migration_scenarios.py | 128 +++++++++++++++++- .../updated_scenarios/partition_scenarios.py | 34 +++-- 16 files changed, 543 insertions(+), 174 deletions(-) diff --git a/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_asset_evaluations.py b/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_asset_evaluations.py index 2e65001502af9..2d3984e9dcdab 100644 --- a/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_asset_evaluations.py +++ b/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_asset_evaluations.py @@ -192,15 +192,16 @@ def __init__( record: AutoMaterializeAssetEvaluationRecord, partitions_def: Optional[PartitionsDefinition], ): + evaluation_with_run_ids = record.get_evaluation_with_run_ids(partitions_def=partitions_def) super().__init__( id=record.id, evaluationId=record.evaluation_id, - numRequested=record.evaluation.true_subset.size, + numRequested=evaluation_with_run_ids.evaluation.true_subset.size, numSkipped=0, numDiscarded=0, rulesWithRuleEvaluations=[], timestamp=record.timestamp, - runIds=record.run_ids, + runIds=evaluation_with_run_ids.run_ids, rules=[], assetKey=GrapheneAssetKey(path=record.asset_key.path), ) diff --git a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py index 0ffcd2a846c04..54c4a831786c3 100644 --- a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py +++ b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py @@ -1,4 +1,3 @@ -from typing import Sequence from unittest.mock import PropertyMock, patch import dagster._check as check @@ -7,6 +6,10 @@ from dagster._core.definitions.asset_daemon_cursor import ( AssetDaemonCursor, ) +from dagster._core.definitions.auto_materialize_rule_evaluation import ( + deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids, +) +from dagster._core.definitions.partition import StaticPartitionsDefinition from dagster._core.definitions.run_request import ( InstigatorType, ) @@ -29,7 +32,6 @@ _PRE_SENSOR_AUTO_MATERIALIZE_SELECTOR_ID, asset_daemon_cursor_to_instigator_serialized_cursor, ) -from dagster._serdes import deserialize_value from dagster._serdes.serdes import serialize_value from dagster_graphql.test.utils import execute_dagster_graphql, infer_repository @@ -390,14 +392,18 @@ def test_automation_policy_sensor(self, graphql_context: WorkspaceRequestContext assert results.data["assetNodeOrError"]["currentAutoMaterializeEvaluationId"] == 12345 def test_get_historic_rules(self, graphql_context: WorkspaceRequestContext): + evaluation1 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_one"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": null, "run_ids": {"__set__": []}}', + None, + ) + evaluation2 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_two"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}], "run_ids": {"__set__": []}}', + None, + ) check.not_none( graphql_context.instance.schedule_storage ).add_auto_materialize_asset_evaluations( - evaluation_id=10, - asset_evaluations=deserialize_value( - '[{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_one"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": null, "run_ids": {"__set__": []}}, {"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_two"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}], "run_ids": {"__set__": []}}]', - Sequence, - ), + evaluation_id=10, asset_evaluations=[evaluation1, evaluation2] ) results = execute_dagster_graphql( @@ -466,14 +472,15 @@ def test_get_historic_rules(self, graphql_context: WorkspaceRequestContext): def test_get_required_but_nonexistent_parent_evaluation( self, graphql_context: WorkspaceRequestContext ): + evaluation = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["upstream_static_partitioned_asset"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["blah"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}}, {"__class__": "SerializedPartitionsSubset", "serialized_partitions_def_class_name": "StaticPartitionsDefinition", "serialized_partitions_def_unique_id": "7c2047f8b02e90a69136c1a657bd99ad80b433a2", "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"a\\"]}"}]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + StaticPartitionsDefinition(["a", "b"]), + ) check.not_none( graphql_context.instance.schedule_storage ).add_auto_materialize_asset_evaluations( evaluation_id=10, - asset_evaluations=deserialize_value( - '[{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["upstream_static_partitioned_asset"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["blah"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}}, {"__class__": "SerializedPartitionsSubset", "serialized_partitions_def_class_name": "StaticPartitionsDefinition", "serialized_partitions_def_unique_id": "7c2047f8b02e90a69136c1a657bd99ad80b433a2", "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"a\\"]}"}]], "rule_snapshots": null, "run_ids": {"__set__": []}}]', - Sequence, - ), + asset_evaluations=[evaluation], ) results = execute_dagster_graphql( @@ -505,6 +512,22 @@ def test_get_required_but_nonexistent_parent_evaluation( } def _test_get_evaluations(self, graphql_context: WorkspaceRequestContext): + evaluation1 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_one"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": null, "run_ids": {"__set__": []}}', + None, + ) + evaluation2 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_two"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + None, + ) + evaluation3 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_three"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_two"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + None, + ) + evaluation4 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_four"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "ParentUpdatedRuleEvaluationData", "updated_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_two"]}]}, "will_update_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_three"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + None, + ) results = execute_dagster_graphql( graphql_context, QUERY, @@ -517,11 +540,7 @@ def _test_get_evaluations(self, graphql_context: WorkspaceRequestContext): check.not_none( graphql_context.instance.schedule_storage ).add_auto_materialize_asset_evaluations( - evaluation_id=10, - asset_evaluations=deserialize_value( - '[{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_one"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": null, "run_ids": {"__set__": []}}, {"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_two"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}, {"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_three"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_two"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}, {"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_four"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "ParentUpdatedRuleEvaluationData", "updated_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_two"]}]}, "will_update_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_three"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}]', - Sequence, - ), + evaluation_id=10, asset_evaluations=[evaluation1, evaluation2, evaluation3, evaluation4] ) results = execute_dagster_graphql( @@ -644,6 +663,10 @@ def _test_get_evaluations(self, graphql_context: WorkspaceRequestContext): } def _test_get_evaluations_with_partitions(self, graphql_context: WorkspaceRequestContext): + evaluation = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["upstream_static_partitioned_asset"]}, "num_discarded": 0, "num_requested": 2, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}}, {"__class__": "SerializedPartitionsSubset", "serialized_partitions_def_class_name": "StaticPartitionsDefinition", "serialized_partitions_def_unique_id": "7c2047f8b02e90a69136c1a657bd99ad80b433a2", "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"a\\", \\"b\\"]}"}]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + StaticPartitionsDefinition(["a", "b"]), + ) results = execute_dagster_graphql( graphql_context, QUERY, @@ -662,13 +685,7 @@ def _test_get_evaluations_with_partitions(self, graphql_context: WorkspaceReques check.not_none( graphql_context.instance.schedule_storage - ).add_auto_materialize_asset_evaluations( - evaluation_id=10, - asset_evaluations=deserialize_value( - '[{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["upstream_static_partitioned_asset"]}, "num_discarded": 0, "num_requested": 2, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}}, {"__class__": "SerializedPartitionsSubset", "serialized_partitions_def_class_name": "StaticPartitionsDefinition", "serialized_partitions_def_unique_id": "7c2047f8b02e90a69136c1a657bd99ad80b433a2", "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"a\\", \\"b\\"]}"}]], "rule_snapshots": null, "run_ids": {"__set__": []}}]', - Sequence, - ), - ) + ).add_auto_materialize_asset_evaluations(evaluation_id=10, asset_evaluations=[evaluation]) results = execute_dagster_graphql( graphql_context, diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py index e411eec9f6851..92b14e07193a8 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -4,12 +4,15 @@ from typing import ( TYPE_CHECKING, AbstractSet, + Any, FrozenSet, List, + Mapping, NamedTuple, Optional, Sequence, Tuple, + Union, ) import dagster._check as check @@ -18,7 +21,15 @@ ) from dagster._core.definitions.events import AssetKey from dagster._core.definitions.metadata import MetadataMapping, MetadataValue -from dagster._serdes.serdes import whitelist_for_serdes +from dagster._core.definitions.partition import AllPartitionsSubset +from dagster._serdes.serdes import ( + FieldSerializer, + UnpackContext, + WhitelistMap, + pack_value, + unpack_value, + whitelist_for_serdes, +) from .asset_condition_evaluation_context import ( AssetConditionEvaluationContext, @@ -29,6 +40,13 @@ from .auto_materialize_rule import AutoMaterializeRule +@whitelist_for_serdes +class HistoricalAllPartitionsSubsetSentinel(NamedTuple): + """Serializable indicator that this value was an AllPartitionsSubset at serialization time, but + the partitions may have changed since that time. + """ + + @whitelist_for_serdes class AssetConditionSnapshot(NamedTuple): """A serializable snapshot of a node in the AutomationCondition tree.""" @@ -50,13 +68,54 @@ def frozen_metadata(self) -> FrozenSet[Tuple[str, MetadataValue]]: return frozenset(self.metadata.items()) -@whitelist_for_serdes +def get_serializable_candidate_subset( + candidate_subset: Union[AssetSubset, HistoricalAllPartitionsSubsetSentinel], +) -> Union[AssetSubset, HistoricalAllPartitionsSubsetSentinel]: + """Do not serialize the candidate subset directly if it is an AllPartitionsSubset.""" + if isinstance(candidate_subset, AssetSubset) and isinstance( + candidate_subset.value, AllPartitionsSubset + ): + return HistoricalAllPartitionsSubsetSentinel() + return candidate_subset + + +class CandidateSubsetSerializer(FieldSerializer): + def pack( + self, + candidate_subset: AssetSubset, + whitelist_map: WhitelistMap, + descent_path: str, + ) -> Optional[Mapping[str, Any]]: + # On all ticks, the root condition starts with an AllPartitionsSubset as the candidate + # subset. This would be wasteful to calculate and serialize in its entirety, so we instead + # store this as `None` and reconstruct it as needed. + # This does mean that if new partitions are added between serialization time and read time, + # the candidate subset will contain those new partitions. + return pack_value( + get_serializable_candidate_subset(candidate_subset), whitelist_map, descent_path + ) + + def unpack( + self, + serialized_candidate_subset: Optional[Mapping[str, Any]], + whitelist_map: WhitelistMap, + context: UnpackContext, + ) -> Union[AssetSubset, HistoricalAllPartitionsSubsetSentinel]: + return unpack_value( + serialized_candidate_subset, + (AssetSubset, HistoricalAllPartitionsSubsetSentinel), + whitelist_map, + context, + ) + + +@whitelist_for_serdes(field_serializers={"candidate_subset": CandidateSubsetSerializer}) class AssetConditionEvaluation(NamedTuple): """Internal representation of the results of evaluating a node in the evaluation tree.""" condition_snapshot: AssetConditionSnapshot true_subset: AssetSubset - candidate_subset: Optional[AssetSubset] + candidate_subset: Union[AssetSubset, HistoricalAllPartitionsSubsetSentinel] subsets_with_metadata: Sequence[AssetSubsetWithMetadata] = [] child_evaluations: Sequence["AssetConditionEvaluation"] = [] @@ -70,9 +129,15 @@ def equivalent_to_stored_evaluation(self, other: Optional["AssetConditionEvaluat other is not None and self.condition_snapshot == other.condition_snapshot and self.true_subset == other.true_subset - and self.candidate_subset == other.candidate_subset + # the candidate subset gets modified during serialization + and get_serializable_candidate_subset(self.candidate_subset) + == get_serializable_candidate_subset(other.candidate_subset) and self.subsets_with_metadata == other.subsets_with_metadata - and self.child_evaluations == other.child_evaluations + and len(self.child_evaluations) == len(other.child_evaluations) + and all( + self_child.equivalent_to_stored_evaluation(other_child) + for self_child, other_child in zip(self.child_evaluations, other.child_evaluations) + ) ) def discarded_subset(self, condition: "AssetCondition") -> Optional[AssetSubset]: diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py index c8e8bb62313a9..4368976f3a63d 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py @@ -227,11 +227,17 @@ def candidates_not_evaluated_on_previous_tick_subset(self) -> AssetSubset: """Returns the set of candidates for this tick which were not candidates on the previous tick. """ + from .asset_condition import HistoricalAllPartitionsSubsetSentinel + if not self.previous_condition_evaluation: return self.candidate_subset - # when the candidate_subset is None, this indicates that the entire asset was evaluated - # for this condition on the previous tick - elif self.previous_condition_evaluation.candidate_subset is None: + # when the candidate_subset is HistoricalAllPartitionsSubsetSentinel, this indicates that the + # entire asset was evaluated for this condition on the previous tick, and so no candidates + # were *not* evaluated on the previous tick + elif isinstance( + self.previous_condition_evaluation.candidate_subset, + HistoricalAllPartitionsSubsetSentinel, + ): return self.empty_subset() return self.candidate_subset - self.previous_condition_evaluation.candidate_subset diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py index d1635599f54d1..da0653f83e012 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py @@ -12,16 +12,17 @@ from dagster._core.definitions.asset_graph_subset import AssetGraphSubset from dagster._core.definitions.asset_subset import AssetSubset -from dagster._core.definitions.auto_materialize_rule_evaluation import ( - BackcompatAutoMaterializeAssetEvaluationSerializer, -) from dagster._core.definitions.events import AssetKey from dagster._core.definitions.partition import PartitionsDefinition from dagster._serdes.serdes import ( - _WHITELIST_MAP, + FieldSerializer, + JsonSerializableValue, PackableValue, + SerializableNonScalarKeyMapping, + UnpackContext, WhitelistMap, - deserialize_value, + pack_value, + unpack_value, whitelist_for_serdes, ) @@ -88,7 +89,29 @@ def get_previous_requested_or_discarded_subset( return self.previous_evaluation.get_requested_or_discarded_subset(condition) -@whitelist_for_serdes +class ObserveRequestTimestampSerializer(FieldSerializer): + def pack( + self, + mapping: Mapping[str, float], + whitelist_map: WhitelistMap, + descent_path: str, + ) -> JsonSerializableValue: + return pack_value(SerializableNonScalarKeyMapping(mapping), whitelist_map, descent_path) + + def unpack( + self, + unpacked_value: JsonSerializableValue, + whitelist_map: WhitelistMap, + context: UnpackContext, + ) -> PackableValue: + return unpack_value(unpacked_value, dict, whitelist_map, context) + + +@whitelist_for_serdes( + field_serializers={ + "last_observe_request_timestamp_by_asset_key": ObserveRequestTimestampSerializer + } +) class AssetDaemonCursor(NamedTuple): """State that's stored between daemon evaluations. @@ -177,7 +200,11 @@ def get_backcompat_asset_condition_cursor( condition_snapshot=RuleCondition(MaterializeOnMissingRule()).snapshot, extras={MaterializeOnMissingRule.HANDLED_SUBSET_KEY: handled_root_subset}, ) - ], + ] + # only include this information if it's non-empty (otherwise we can just rebuild it from + # the set of materialized partitions later on) + if handled_root_subset and handled_root_subset.size > 0 + else [], ) @@ -187,7 +214,9 @@ def backcompat_deserialize_asset_daemon_cursor_str( """This serves as a backcompat layer for deserializing the old cursor format. Some information is impossible to fully recover, this will recover enough to continue operating as normal. """ - from .asset_condition import AssetConditionEvaluationWithRunIds + from .auto_materialize_rule_evaluation import ( + deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids, + ) data = json.loads(cursor_str) @@ -230,39 +259,27 @@ def backcompat_deserialize_asset_daemon_cursor_str( latest_evaluation_by_asset_key = {} for key_str, serialized_evaluation in serialized_latest_evaluation_by_asset_key.items(): key = AssetKey.from_user_string(key_str) + partitions_def = asset_graph.get_partitions_def(key) if asset_graph else None - class BackcompatDeserializer(BackcompatAutoMaterializeAssetEvaluationSerializer): - @property - def partitions_def(self) -> Optional[PartitionsDefinition]: - return asset_graph.get_partitions_def(key) if asset_graph else None - - # create a new WhitelistMap that can deserialize SerializedPartitionSubset objects stored - # on the old cursor format - whitelist_map = WhitelistMap( - object_serializers=_WHITELIST_MAP.object_serializers, - object_deserializers={ - **_WHITELIST_MAP.object_deserializers, - "AutoMaterializeAssetEvaluation": BackcompatDeserializer( - klass=AssetConditionEvaluationWithRunIds - ), - }, - enum_serializers=_WHITELIST_MAP.enum_serializers, - ) - - # these string cursors will contain AutoMaterializeAssetEvaluation objects, which get - # deserialized into AssetConditionEvaluationWithRunIds, not AssetConditionEvaluation - evaluation = deserialize_value( - serialized_evaluation, AssetConditionEvaluationWithRunIds, whitelist_map=whitelist_map + evaluation = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + serialized_evaluation, partitions_def ).evaluation + latest_evaluation_by_asset_key[key] = evaluation asset_cursors = [] - for asset_key, latest_evaluation in latest_evaluation_by_asset_key.items(): + cursor_keys = ( + asset_graph.auto_materialize_policies_by_key.keys() + if asset_graph + else latest_evaluation_by_asset_key.keys() + ) + for asset_key in cursor_keys: + latest_evaluation = latest_evaluation_by_asset_key.get(asset_key) asset_cursors.append( get_backcompat_asset_condition_cursor( asset_key, data.get("latest_storage_id"), - data.get("latest_timestamp"), + data.get("latest_evaluation_timestamp"), latest_evaluation, handled_root_asset_graph_subset.get_asset_subset(asset_key, asset_graph) if asset_graph diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py index e840222a7d0f2..c73fff29ab11e 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py @@ -33,6 +33,7 @@ ) from dagster._core.definitions.multi_dimensional_partitions import MultiPartitionsDefinition from dagster._core.definitions.time_window_partitions import ( + TimeWindowPartitionsSubset, get_time_partitions_def, ) from dagster._core.storage.dagster_run import RunsFilter @@ -611,20 +612,40 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return "materialization is missing" - def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: - """Evaluates the set of asset partitions for this asset which are missing and were not - previously discarded. + def get_handled_subset(self, context: AssetConditionEvaluationContext) -> AssetSubset: + """Returns the AssetSubset which has been handled (materialized, requested, or discarded). + Accounts for cases in which the partitions definition may have changed between ticks. """ - handled_subset = ( + previous_handled_subset = context.cursor.get_extras_value( + context.condition, self.HANDLED_SUBSET_KEY, AssetSubset + ) + if previous_handled_subset: + # partitioned -> unpartitioned or vice versa + if previous_handled_subset.is_partitioned != (context.partitions_def is not None): + previous_handled_subset = None + # time partitions def changed + elif ( + previous_handled_subset.is_partitioned + and isinstance(previous_handled_subset.subset_value, TimeWindowPartitionsSubset) + and previous_handled_subset.subset_value.partitions_def != context.partitions_def + ): + previous_handled_subset = None + return ( ( - context.cursor.get_extras_value( - context.condition, self.HANDLED_SUBSET_KEY, AssetSubset + previous_handled_subset + or context.instance_queryer.get_materialized_asset_subset( + asset_key=context.asset_key ) - or context.empty_subset() ) | context.previous_tick_requested_subset | context.materialized_since_previous_tick_subset ) + + def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: + """Evaluates the set of asset partitions for this asset which are missing and were not + previously discarded. + """ + handled_subset = self.get_handled_subset(context) unhandled_candidates = ( context.candidate_subset & handled_subset.inverse( diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index 1601b639c7f3b..f3e096aa5e683 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -23,11 +23,13 @@ from dagster._core.definitions.events import AssetKey from dagster._core.definitions.metadata import MetadataMapping, MetadataValue from dagster._serdes.serdes import ( + _WHITELIST_MAP, NamedTupleSerializer, PackableValue, UnpackContext, UnpackedValue, WhitelistMap, + deserialize_value, whitelist_for_serdes, ) @@ -141,6 +143,49 @@ class AutoMaterializeRuleEvaluation(NamedTuple): # BACKCOMPAT GRAVEYARD +def deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + serialized_evaluation: str, partitions_def: Optional[PartitionsDefinition] +) -> "AssetConditionEvaluationWithRunIds": + """Provides a backcompat layer to allow deserializing old AutoMaterializeAssetEvaluation + objects into the new AssetConditionEvaluationWithRunIds objects. + """ + from .asset_condition import AssetConditionEvaluationWithRunIds + + class BackcompatDeserializer(BackcompatAutoMaterializeAssetEvaluationSerializer): + @property + def partitions_def(self) -> Optional[PartitionsDefinition]: + return partitions_def + + # create a new WhitelistMap that can deserialize SerializedPartitionSubset objects stored + # on the old cursor format + whitelist_map = WhitelistMap( + object_serializers=_WHITELIST_MAP.object_serializers, + object_deserializers={ + **_WHITELIST_MAP.object_deserializers, + "AutoMaterializeAssetEvaluation": BackcompatDeserializer( + klass=AssetConditionEvaluationWithRunIds + ), + }, + enum_serializers=_WHITELIST_MAP.enum_serializers, + ) + + return deserialize_value( + serialized_evaluation, AssetConditionEvaluationWithRunIds, whitelist_map=whitelist_map + ) + + +def deserialize_serialized_partitions_subset_to_asset_subset( + serialized: SerializedPartitionsSubset, + asset_key: AssetKey, + partitions_def: Optional[PartitionsDefinition], +) -> AssetSubset: + if partitions_def is None or not serialized.can_deserialize(partitions_def): + # partitions def has changed since storage time + return AssetSubset.empty(asset_key, partitions_def) + + return AssetSubset(asset_key, value=serialized.deserialize(partitions_def)) + + class BackcompatAutoMaterializeAssetEvaluationSerializer(NamedTupleSerializer): """This handles backcompat for the old AutoMaterializeAssetEvaluation objects, turning them into AssetConditionEvaluationWithRunIds objects. @@ -148,11 +193,10 @@ class BackcompatAutoMaterializeAssetEvaluationSerializer(NamedTupleSerializer): @property def partitions_def(self) -> Optional[PartitionsDefinition]: - """We may override this property in subclasses that are created at a point in time where - we know what the current partitions definition is. If we don't know, then we will be unable - to deserialize any SerializedPartitionsSubset objects. + """This property gets overridden by subclasses at runtime, once the partitions_def for the + specific record we're deserializing is known. """ - return None + raise NotImplementedError() def _get_empty_subset(self, asset_key: AssetKey, is_partitioned: bool) -> AssetSubset: # We know this asset is partitioned, but we don't know what its partitions def is, so we @@ -172,14 +216,9 @@ def deserialize_serialized_partitions_subset_or_none( # Confusingly, we used `None` to indicate "all of an unpartitioned asset" in the old # serialization scheme return AssetSubset(asset_key, True) - elif self.partitions_def is None or not serialized.can_deserialize(self.partitions_def): - # If we don't know the partitions def, then we can't deserialize the partitions subset, - # so we just use an empty one instead. - return self._get_empty_subset(asset_key, is_partitioned) - else: - # We are in an instance of this class that knows the partitions def, so we can - # deserialize the partitions subset - return AssetSubset(asset_key, serialized.deserialize(self.partitions_def)) + return deserialize_serialized_partitions_subset_to_asset_subset( + serialized, asset_key, self.partitions_def + ) def _asset_condition_snapshot_from_rule_snapshot( self, rule_snapshot: AutoMaterializeRuleSnapshot @@ -207,6 +246,7 @@ def _get_child_rule_evaluation( from .asset_condition import ( AssetConditionEvaluation, AssetSubsetWithMetadata, + HistoricalAllPartitionsSubsetSentinel, ) condition_snapshot = self._asset_condition_snapshot_from_rule_snapshot(rule_snapshot) @@ -231,7 +271,9 @@ def _get_child_rule_evaluation( return AssetConditionEvaluation( condition_snapshot=condition_snapshot, true_subset=true_subset, - candidate_subset=None, + candidate_subset=HistoricalAllPartitionsSubsetSentinel() + if is_partitioned + else AssetSubset.empty(asset_key, None), subsets_with_metadata=subsets_with_metadata, ) @@ -248,6 +290,7 @@ def _get_child_decision_type_evaluation( from .asset_condition import ( AssetConditionEvaluation, AssetConditionSnapshot, + HistoricalAllPartitionsSubsetSentinel, NotAssetCondition, OrAssetCondition, ) @@ -291,7 +334,9 @@ def _get_child_decision_type_evaluation( true_subset=reduce( operator.or_, (e.true_subset for e in child_evaluations), initial ), - candidate_subset=None, + candidate_subset=HistoricalAllPartitionsSubsetSentinel() + if is_partitioned + else AssetSubset.all(asset_key, None), subsets_with_metadata=[], child_evaluations=child_evaluations, ) @@ -314,7 +359,9 @@ def _get_child_decision_type_evaluation( true_subset=evaluation.true_subset if evaluation.true_subset.is_partitioned else evaluation.true_subset._replace(value=not evaluation.true_subset.bool_value), - candidate_subset=None, + candidate_subset=HistoricalAllPartitionsSubsetSentinel() + if is_partitioned + else AssetSubset.all(asset_key, None), subsets_with_metadata=[], child_evaluations=[evaluation], ) @@ -329,6 +376,7 @@ def unpack( AndAssetCondition, AssetConditionEvaluation, AssetConditionSnapshot, + HistoricalAllPartitionsSubsetSentinel, ) asset_key = cast(AssetKey, unpacked_dict.get("asset_key")) @@ -383,17 +431,14 @@ def unpack( return AssetConditionEvaluation( condition_snapshot=condition_snapshot, true_subset=reduce(operator.and_, (e.true_subset for e in child_evaluations)), - candidate_subset=None, + candidate_subset=HistoricalAllPartitionsSubsetSentinel() + if is_partitioned + else AssetSubset.all(asset_key, None), subsets_with_metadata=[], child_evaluations=child_evaluations, ).with_run_ids(cast(AbstractSet[str], unpacked_dict.get("run_ids", set()))) -@whitelist_for_serdes(serializer=BackcompatAutoMaterializeAssetEvaluationSerializer) -class AutoMaterializeAssetEvaluation(NamedTuple): - ... - - class BackcompatAutoMaterializeConditionSerializer(NamedTupleSerializer): """This handles backcompat for the old AutoMaterializeCondition objects, turning them into the proper AutoMaterializeRuleEvaluation objects. This is necessary because old diff --git a/python_modules/dagster/dagster/_core/scheduler/instigation.py b/python_modules/dagster/dagster/_core/scheduler/instigation.py index 9bc0bf8b90e2d..39ec9cc57d9f9 100644 --- a/python_modules/dagster/dagster/_core/scheduler/instigation.py +++ b/python_modules/dagster/dagster/_core/scheduler/instigation.py @@ -7,10 +7,13 @@ import dagster._check as check from dagster._core.definitions import RunRequest from dagster._core.definitions.asset_condition import ( - AssetConditionEvaluation, AssetConditionEvaluationWithRunIds, ) +from dagster._core.definitions.auto_materialize_rule_evaluation import ( + deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids, +) from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey +from dagster._core.definitions.partition import PartitionsDefinition # re-export from dagster._core.definitions.run_request import ( @@ -21,6 +24,7 @@ from dagster._core.definitions.sensor_definition import SensorType from dagster._core.host_representation.origin import ExternalInstigatorOrigin from dagster._serdes import create_snapshot_id +from dagster._serdes.errors import DeserializationError from dagster._serdes.serdes import ( EnumSerializer, deserialize_value, @@ -726,7 +730,7 @@ def _validate_tick_args( class AutoMaterializeAssetEvaluationRecord(NamedTuple): id: int - evaluation_with_run_ids: AssetConditionEvaluationWithRunIds + serialized_evaluation_body: str evaluation_id: int timestamp: float asset_key: AssetKey @@ -735,18 +739,24 @@ class AutoMaterializeAssetEvaluationRecord(NamedTuple): def from_db_row(cls, row) -> "AutoMaterializeAssetEvaluationRecord": return cls( id=row["id"], - evaluation_with_run_ids=deserialize_value( - row["asset_evaluation_body"], AssetConditionEvaluationWithRunIds - ), + serialized_evaluation_body=row["asset_evaluation_body"], evaluation_id=row["evaluation_id"], timestamp=datetime_as_float(row["create_timestamp"]), asset_key=check.not_none(AssetKey.from_db_string(row["asset_key"])), ) - @property - def run_ids(self) -> AbstractSet[str]: - return self.evaluation_with_run_ids.run_ids - - @property - def evaluation(self) -> AssetConditionEvaluation: - return self.evaluation_with_run_ids.evaluation + def get_evaluation_with_run_ids( + self, partitions_def: Optional[PartitionsDefinition] + ) -> AssetConditionEvaluationWithRunIds: + try: + # If this was serialized as an AssetConditionEvaluationWithRunIds, we can deserialize + # this directly + return deserialize_value( + self.serialized_evaluation_body, AssetConditionEvaluationWithRunIds + ) + except DeserializationError: + # If this is a legacy AutoMaterializeAssetEvaluation, we need to pass in the partitions + # definition in order to be able to deserialize the evaluation properly + return deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + self.serialized_evaluation_body, partitions_def + ) diff --git a/python_modules/dagster/dagster/_daemon/asset_daemon.py b/python_modules/dagster/dagster/_daemon/asset_daemon.py index 4c0f931e9eaad..61bf46515a31b 100644 --- a/python_modules/dagster/dagster/_daemon/asset_daemon.py +++ b/python_modules/dagster/dagster/_daemon/asset_daemon.py @@ -705,7 +705,11 @@ def _process_auto_materialize_tick_generator( ) ) evaluations_by_asset_key = { - evaluation_record.asset_key: evaluation_record.evaluation_with_run_ids + evaluation_record.asset_key: evaluation_record.get_evaluation_with_run_ids( + partitions_def=asset_graph.get_partitions_def( + evaluation_record.asset_key + ) + ) for evaluation_record in evaluation_records } else: diff --git a/python_modules/dagster/dagster/_utils/caching_instance_queryer.py b/python_modules/dagster/dagster/_utils/caching_instance_queryer.py index 0b90afb7616f9..5cf3327a3c750 100644 --- a/python_modules/dagster/dagster/_utils/caching_instance_queryer.py +++ b/python_modules/dagster/dagster/_utils/caching_instance_queryer.py @@ -20,6 +20,7 @@ import dagster._check as check from dagster._core.definitions.asset_graph import AssetGraph from dagster._core.definitions.asset_graph_subset import AssetGraphSubset +from dagster._core.definitions.asset_subset import AssetSubset from dagster._core.definitions.data_version import ( DATA_VERSION_TAG, DataVersion, @@ -50,6 +51,7 @@ if TYPE_CHECKING: from dagster._core.storage.event_log import EventLogRecord from dagster._core.storage.event_log.base import AssetRecord + from dagster._core.storage.partition_status_cache import AssetStatusCacheValue class CachingInstanceQueryer(DynamicPartitionsStore): @@ -122,23 +124,28 @@ def prefetch_asset_records(self, asset_keys: Iterable[AssetKey]): #################### @cached_method - def get_failed_or_in_progress_subset(self, *, asset_key: AssetKey) -> PartitionsSubset: - """Returns a PartitionsSubset representing the set of partitions that are either in progress - or whose last materialization attempt failed. - """ + def _get_updated_cache_value(self, *, asset_key: AssetKey) -> Optional["AssetStatusCacheValue"]: from dagster._core.storage.partition_status_cache import ( get_and_update_asset_status_cache_value, ) partitions_def = check.not_none(self.asset_graph.get_partitions_def(asset_key)) asset_record = self.get_asset_record(asset_key) - cache_value = get_and_update_asset_status_cache_value( + return get_and_update_asset_status_cache_value( instance=self.instance, asset_key=asset_key, partitions_def=partitions_def, dynamic_partitions_loader=self, asset_record=asset_record, ) + + @cached_method + def get_failed_or_in_progress_subset(self, *, asset_key: AssetKey) -> PartitionsSubset: + """Returns a PartitionsSubset representing the set of partitions that are either in progress + or whose last materialization attempt failed. + """ + partitions_def = check.not_none(self.asset_graph.get_partitions_def(asset_key)) + cache_value = self._get_updated_cache_value(asset_key=asset_key) if cache_value is None: return partitions_def.empty_subset() @@ -146,6 +153,22 @@ def get_failed_or_in_progress_subset(self, *, asset_key: AssetKey) -> Partitions partitions_def ) | cache_value.deserialize_in_progress_partition_subsets(partitions_def) + @cached_method + def get_materialized_asset_subset(self, *, asset_key: AssetKey) -> AssetSubset: + """Returns an AssetSubset representing the subset of the asset that has been materialized.""" + partitions_def = self.asset_graph.get_partitions_def(asset_key) + if partitions_def: + cache_value = self._get_updated_cache_value(asset_key=asset_key) + if cache_value is None: + value = partitions_def.empty_subset() + else: + value = cache_value.deserialize_materialized_partition_subsets(partitions_def) + else: + value = self.asset_partition_has_materialization_or_observation( + AssetKeyPartitionKey(asset_key) + ) + return AssetSubset(asset_key, value) + #################### # ASSET RECORDS / STORAGE IDS #################### diff --git a/python_modules/dagster/dagster/_utils/test/schedule_storage.py b/python_modules/dagster/dagster/_utils/test/schedule_storage.py index f6ba22529b112..f1962aa6fee97 100644 --- a/python_modules/dagster/dagster/_utils/test/schedule_storage.py +++ b/python_modules/dagster/dagster/_utils/test/schedule_storage.py @@ -740,12 +740,12 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: AssetConditionEvaluation( condition_snapshot=condition_snapshot, true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=False), - candidate_subset=None, + candidate_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=False), ).with_run_ids(set()), AssetConditionEvaluation( condition_snapshot=condition_snapshot, true_subset=AssetSubset(asset_key=AssetKey("asset_two"), value=True), - candidate_subset=None, + candidate_subset=AssetSubset(asset_key=AssetKey("asset_two"), value=True), subsets_with_metadata=[ AssetSubsetWithMetadata( AssetSubset(asset_key=AssetKey("asset_two"), value=True), @@ -760,28 +760,36 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: asset_key=AssetKey("asset_one"), limit=100 ) assert len(res) == 1 - assert res[0].evaluation.asset_key == AssetKey("asset_one") + assert res[0].get_evaluation_with_run_ids(None).evaluation.asset_key == AssetKey( + "asset_one" + ) assert res[0].evaluation_id == 10 - assert res[0].evaluation.true_subset.size == 0 + assert res[0].get_evaluation_with_run_ids(None).evaluation.true_subset.size == 0 res = storage.get_auto_materialize_asset_evaluations( asset_key=AssetKey("asset_two"), limit=100 ) assert len(res) == 1 - assert res[0].evaluation.asset_key == AssetKey("asset_two") + assert res[0].get_evaluation_with_run_ids(None).evaluation.asset_key == AssetKey( + "asset_two" + ) assert res[0].evaluation_id == 10 - assert res[0].evaluation.true_subset.size == 1 + assert res[0].get_evaluation_with_run_ids(None).evaluation.true_subset.size == 1 res = storage.get_auto_materialize_evaluations_for_evaluation_id(evaluation_id=10) assert len(res) == 2 - assert res[0].evaluation.asset_key == AssetKey("asset_one") + assert res[0].get_evaluation_with_run_ids(None).evaluation.asset_key == AssetKey( + "asset_one" + ) assert res[0].evaluation_id == 10 - assert res[0].evaluation.true_subset.size == 0 + assert res[0].get_evaluation_with_run_ids(None).evaluation.true_subset.size == 0 - assert res[1].evaluation.asset_key == AssetKey("asset_two") + assert res[1].get_evaluation_with_run_ids(None).evaluation.asset_key == AssetKey( + "asset_two" + ) assert res[1].evaluation_id == 10 - assert res[1].evaluation.true_subset.size == 1 + assert res[1].get_evaluation_with_run_ids(None).evaluation.true_subset.size == 1 storage.add_auto_materialize_asset_evaluations( evaluation_id=11, @@ -789,7 +797,7 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: AssetConditionEvaluation( condition_snapshot=condition_snapshot, true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), - candidate_subset=None, + candidate_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), ).with_run_ids(set()), ], ) @@ -818,13 +826,13 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: eval_one = AssetConditionEvaluation( condition_snapshot=AssetConditionSnapshot("foo", "bar", ""), true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), - candidate_subset=None, + candidate_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), ).with_run_ids(set()) eval_asset_three = AssetConditionEvaluation( condition_snapshot=AssetConditionSnapshot("foo", "bar", ""), true_subset=AssetSubset(asset_key=AssetKey("asset_three"), value=True), - candidate_subset=None, + candidate_subset=AssetSubset(asset_key=AssetKey("asset_three"), value=True), ).with_run_ids(set()) storage.add_auto_materialize_asset_evaluations( @@ -840,7 +848,7 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: ) assert len(res) == 2 assert res[0].evaluation_id == 11 - assert res[0].evaluation == eval_one.evaluation + assert res[0].get_evaluation_with_run_ids(None).evaluation == eval_one.evaluation res = storage.get_auto_materialize_asset_evaluations( asset_key=AssetKey("asset_three"), limit=100 @@ -848,7 +856,7 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: assert len(res) == 1 assert res[0].evaluation_id == 11 - assert res[0].evaluation == eval_asset_three.evaluation + assert res[0].get_evaluation_with_run_ids(None).evaluation == eval_asset_three.evaluation def test_auto_materialize_asset_evaluations_with_partitions(self, storage) -> None: if not self.can_store_auto_materialize_asset_evaluations(): @@ -868,7 +876,7 @@ def test_auto_materialize_asset_evaluations_with_partitions(self, storage) -> No AssetConditionEvaluation( condition_snapshot=AssetConditionSnapshot("foo", "bar", ""), true_subset=asset_subset, - candidate_subset=None, + candidate_subset=asset_subset, subsets_with_metadata=[asset_subset_with_metadata], ).with_run_ids(set()), ], @@ -878,11 +886,16 @@ def test_auto_materialize_asset_evaluations_with_partitions(self, storage) -> No asset_key=AssetKey("asset_two"), limit=100 ) assert len(res) == 1 - assert res[0].evaluation.asset_key == AssetKey("asset_two") + assert res[0].get_evaluation_with_run_ids(None).evaluation.asset_key == AssetKey( + "asset_two" + ) assert res[0].evaluation_id == 10 - assert res[0].evaluation.true_subset.size == 1 + assert res[0].get_evaluation_with_run_ids(None).evaluation.true_subset.size == 1 - assert res[0].evaluation.subsets_with_metadata[0] == asset_subset_with_metadata + assert ( + res[0].get_evaluation_with_run_ids(None).evaluation.subsets_with_metadata[0] + == asset_subset_with_metadata + ) def test_purge_asset_evaluations(self, storage) -> None: if not self.can_purge(): @@ -894,7 +907,7 @@ def test_purge_asset_evaluations(self, storage) -> None: AssetConditionEvaluation( condition_snapshot=AssetConditionSnapshot("foo", "bar", ""), true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), - candidate_subset=None, + candidate_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), subsets_with_metadata=[], ).with_run_ids(set()), ], diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py index 7d734692cd2cb..82111df971f36 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py @@ -489,7 +489,9 @@ def _evaluate_tick_daemon( ) ] new_evaluations = [ - e.evaluation + e.get_evaluation_with_run_ids( + self.asset_graph.get_partitions_def(e.asset_key) + ).evaluation for e in check.not_none( self.instance.schedule_storage ).get_auto_materialize_evaluations_for_evaluation_id(new_cursor.evaluation_id) @@ -617,7 +619,7 @@ def _assert_evaluation_daemon( ) if key in (run.asset_selection or set()) } - evaluation_with_run_ids = next( + evaluation_record = next( iter( [ e @@ -628,7 +630,12 @@ def _assert_evaluation_daemon( ] ) ) - assert new_run_ids_for_asset == evaluation_with_run_ids.run_ids + assert ( + new_run_ids_for_asset + == evaluation_record.get_evaluation_with_run_ids( + self.asset_graph.get_partitions_def(key) + ).run_ids + ) def assert_evaluation( self, diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_failure_recovery.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_failure_recovery.py index 72543077a66df..82eecfbfa5fd4 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_failure_recovery.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_asset_daemon_failure_recovery.py @@ -504,8 +504,12 @@ def sort_run_key_fn(run): asset_key=AssetKey("hourly"), limit=100 ) assert len(evaluations) == 1 - assert evaluations[0].evaluation.asset_key == AssetKey("hourly") - assert evaluations[0].run_ids == {run.run_id for run in sorted_runs} + assert evaluations[0].get_evaluation_with_run_ids(None).evaluation.asset_key == AssetKey( + "hourly" + ) + assert evaluations[0].get_evaluation_with_run_ids(None).run_ids == { + run.run_id for run in sorted_runs + } @pytest.mark.parametrize( @@ -611,8 +615,12 @@ def sort_run_key_fn(run): asset_key=AssetKey("hourly"), limit=100 ) assert len(evaluations) == 1 - assert evaluations[0].evaluation.asset_key == AssetKey("hourly") - assert evaluations[0].run_ids == {run.run_id for run in sorted_runs} + assert evaluations[0].get_evaluation_with_run_ids(None).evaluation.asset_key == AssetKey( + "hourly" + ) + assert evaluations[0].get_evaluation_with_run_ids(None).run_ids == { + run.run_id for run in sorted_runs + } cursor = _get_pre_sensor_auto_materialize_serialized_cursor(instance) assert cursor diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_auto_materialize_asset_evaluation.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_auto_materialize_asset_evaluation.py index ecc9f5b31285d..b305bdc8eb82e 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_auto_materialize_asset_evaluation.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/test_auto_materialize_asset_evaluation.py @@ -1,11 +1,12 @@ -from dagster import MetadataValue +from dagster import DailyPartitionsDefinition, MetadataValue from dagster._core.definitions.asset_condition import ( - AssetConditionEvaluationWithRunIds, AssetSubsetWithMetadata, ) from dagster._core.definitions.asset_subset import AssetSubset +from dagster._core.definitions.auto_materialize_rule_evaluation import ( + deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids, +) from dagster._core.definitions.events import AssetKey -from dagster._serdes.serdes import deserialize_value def test_backcompat_unpartitioned_skipped() -> None: @@ -31,8 +32,10 @@ def test_backcompat_unpartitioned_skipped() -> None: '"AutoMaterializeDecisionType.MATERIALIZE"}, "description": "not materialized since last ' 'cron schedule tick of \'0 * * * *\' (timezone: UTC)"}], "run_ids": {"__set__": []}}' ) - deserialized_with_run_ids = deserialize_value( - serialized_asset_evaluation, AssetConditionEvaluationWithRunIds + deserialized_with_run_ids = ( + deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + serialized_asset_evaluation, None + ) ) deserialized = deserialized_with_run_ids.evaluation @@ -73,8 +76,10 @@ def test_backcompat_unpartitioned_requested() -> None: '"AutoMaterializeDecisionType.MATERIALIZE"}, "description": "not materialized since last ' 'cron schedule tick of \'0 * * * *\' (timezone: UTC)"}], "run_ids": {"__set__": []}}' ) - deserialized_with_run_ids = deserialize_value( - serialized_asset_evaluation, AssetConditionEvaluationWithRunIds + deserialized_with_run_ids = ( + deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + serialized_asset_evaluation, None + ) ) deserialized = deserialized_with_run_ids.evaluation assert len(deserialized.true_subset.asset_partitions) == 1 @@ -148,19 +153,26 @@ def test_backcompat_partitioned_asset() -> None: '"AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed ' 'since latest materialization"}], "run_ids": {"__set__": []}}' ) - deserialized_with_run_ids = deserialize_value( - serialized_asset_evaluation, AssetConditionEvaluationWithRunIds + deserialized_with_run_ids = ( + deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + serialized_asset_evaluation, DailyPartitionsDefinition("2013-01-05") + ) ) deserialized = deserialized_with_run_ids.evaluation - # all subsets should have zero size - assert deserialized.true_subset.size == 0 + assert deserialized.true_subset.size == 1 assert len(deserialized.child_evaluations) == 2 - (materialize_evaluation, not_skip_evaluation) = deserialized.child_evaluations - assert materialize_evaluation.true_subset.size == 0 - assert not_skip_evaluation.true_subset.size == 0 + ( + materialize_evaluation, + not_skip_evaluation, + ) = deserialized.child_evaluations + assert materialize_evaluation.true_subset.size == 3 + assert not_skip_evaluation.true_subset.size == 1 skip_evaluation = not_skip_evaluation.child_evaluations[0] - assert skip_evaluation.true_subset.size == 0 + assert skip_evaluation.true_subset.size == 1 assert len(skip_evaluation.child_evaluations) == 4 assert skip_evaluation.child_evaluations[0].true_subset.size == 0 + assert skip_evaluation.child_evaluations[1].true_subset.size == 0 + assert skip_evaluation.child_evaluations[2].true_subset.size == 1 + assert skip_evaluation.child_evaluations[3].true_subset.size == 0 diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cursor_migration_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cursor_migration_scenarios.py index 91a8fe9be3be0..eb3df4686aac2 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cursor_migration_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cursor_migration_scenarios.py @@ -1,19 +1,25 @@ -from dagster import ( - AutoMaterializeRule, -) +from dagster import AutoMaterializeRule from dagster._core.definitions.auto_materialize_rule import DiscardOnMaxMaterializationsExceededRule +from dagster_tests.definitions_tests.auto_materialize_tests.updated_scenarios.cron_scenarios import ( + basic_hourly_cron_rule, + get_cron_policy, +) + from ..asset_daemon_scenario import ( AssetDaemonScenario, AssetRuleEvaluationSpec, day_partition_key, + hour_partition_key, ) from ..base_scenario import ( run_request, ) from .asset_daemon_scenario_states import ( daily_partitions_def, + hourly_partitions_def, one_asset, + three_assets_in_sequence, time_partitions_start_str, ) @@ -51,4 +57,120 @@ # the new cursor "remembers" that a bunch of partitions were discarded .assert_requested_runs(), ), + AssetDaemonScenario( + id="one_asset_daily_partitions_two_years_never_materialized_migrate_after_run_requested", + initial_state=one_asset.with_asset_properties(partitions_def=daily_partitions_def) + .with_current_time(time_partitions_start_str) + .with_current_time_advanced(years=2, hours=4) + .with_all_eager(), + execution_fn=lambda state: state.evaluate_tick() + .assert_requested_runs( + run_request(asset_keys=["A"], partition_key=day_partition_key(state.current_time)) + ) + .with_serialized_cursor( + # this cursor was generate by running the above scenario before the cursor changes + """ +{"latest_storage_id": null, "handled_root_asset_keys": [], "handled_root_partitions_by_asset_key": {"A": "{\\"version\\": 1, \\"time_windows\\": [[1357344000.0, 1420416000.0]], \\"num_partitions\\": 730}"}, "evaluation_id": 1, "last_observe_request_timestamp_by_asset_key": {}, "latest_evaluation_by_asset_key": {"A": "{\\"__class__\\": \\"AutoMaterializeAssetEvaluation\\", \\"asset_key\\": {\\"__class__\\": \\"AssetKey\\", \\"path\\": [\\"A\\"]}, \\"num_discarded\\": 729, \\"num_requested\\": 1, \\"num_skipped\\": 0, \\"partition_subsets_by_condition\\": [[{\\"__class__\\": \\"AutoMaterializeRuleEvaluation\\", \\"evaluation_data\\": null, \\"rule_snapshot\\": {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"MaterializeOnMissingRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.MATERIALIZE\\"}, \\"description\\": \\"materialization is missing\\"}}, {\\"__class__\\": \\"SerializedPartitionsSubset\\", \\"serialized_partitions_def_class_name\\": \\"DailyPartitionsDefinition\\", \\"serialized_partitions_def_unique_id\\": \\"809725ad60ffac0302d5c81f6e45865e21ec0b85\\", \\"serialized_subset\\": \\"{\\\\\\"version\\\\\\": 1, \\\\\\"time_windows\\\\\\": [[1357344000.0, 1420416000.0]], \\\\\\"num_partitions\\\\\\": 730}\\"}], [{\\"__class__\\": \\"AutoMaterializeRuleEvaluation\\", \\"evaluation_data\\": null, \\"rule_snapshot\\": {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"DiscardOnMaxMaterializationsExceededRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.DISCARD\\"}, \\"description\\": \\"exceeds 1 materialization(s) per minute\\"}}, {\\"__class__\\": \\"SerializedPartitionsSubset\\", \\"serialized_partitions_def_class_name\\": \\"DailyPartitionsDefinition\\", \\"serialized_partitions_def_unique_id\\": \\"809725ad60ffac0302d5c81f6e45865e21ec0b85\\", \\"serialized_subset\\": \\"{\\\\\\"version\\\\\\": 1, \\\\\\"time_windows\\\\\\": [[1357344000.0, 1420329600.0]], \\\\\\"num_partitions\\\\\\": 729}\\"}]], \\"rule_snapshots\\": [{\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"SkipOnParentMissingRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.SKIP\\"}, \\"description\\": \\"waiting on upstream data to be present\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"MaterializeOnRequiredForFreshnessRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.MATERIALIZE\\"}, \\"description\\": \\"required to meet this or downstream asset's freshness policy\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"MaterializeOnParentUpdatedRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.MATERIALIZE\\"}, \\"description\\": \\"upstream data has changed since latest materialization\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"SkipOnBackfillInProgressRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.SKIP\\"}, \\"description\\": \\"targeted by an in-progress backfill\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"SkipOnParentOutdatedRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.SKIP\\"}, \\"description\\": \\"waiting on upstream data to be up to date\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"MaterializeOnMissingRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.MATERIALIZE\\"}, \\"description\\": \\"materialization is missing\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"SkipOnRequiredButNonexistentParentsRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.SKIP\\"}, \\"description\\": \\"required parent partitions do not exist\\"}], \\"run_ids\\": {\\"__set__\\": []}}"}, "latest_evaluation_timestamp": 1420430400.0} +""" + ) + .evaluate_tick() + # the new cursor should not kick off a new run because the previous tick already requested one + .assert_requested_runs(), + ), + AssetDaemonScenario( + id="partitioned_non_root_asset_missing_after_migrate", + initial_state=three_assets_in_sequence.with_asset_properties( + partitions_def=daily_partitions_def + ) + .with_current_time(time_partitions_start_str) + .with_current_time_advanced(days=10, hours=4) + .with_all_eager(), + execution_fn=lambda state: state.evaluate_tick() + .assert_requested_runs( + run_request( + asset_keys=["A", "B", "C"], partition_key=day_partition_key(state.current_time) + ) + ) + # materialize the previous day's partitions manually + .with_runs( + run_request( + asset_keys=["A", "B", "C"], + partition_key=day_partition_key(state.current_time, delta=-1), + ) + ) + .evaluate_tick() + .assert_requested_runs() + # now update the cursor -- this serialized cursor does not contain any information about + # the missing partitions for B or C, because we used to only track this information for + # root assets. B or C also has not been materialized since the previous tick + .with_serialized_cursor( + """{"latest_storage_id": 24, "handled_root_asset_keys": [], "handled_root_partitions_by_asset_key": {"A": "{\\"version\\": 1, \\"time_windows\\": [[1357344000.0, 1358208000.0]], \\"num_partitions\\": 10}", "B": "{\\"version\\": 1, \\"time_windows\\": [], \\"num_partitions\\": 0}", "C": "{\\"version\\": 1, \\"time_windows\\": [], \\"num_partitions\\": 0}"}, "evaluation_id": 2, "last_observe_request_timestamp_by_asset_key": {}, "latest_evaluation_by_asset_key": {}, "latest_evaluation_timestamp": 1358222400.164996}""" + ) + .evaluate_tick() + # when getting the new cursor, we should realize that B and C are not missing any partitions + # that can be materialized + .assert_requested_runs(), + ), + AssetDaemonScenario( + id="basic_hourly_cron_unpartitioned_migrate", + initial_state=one_asset.with_asset_properties( + auto_materialize_policy=get_cron_policy(basic_hourly_cron_rule) + ).with_current_time("2020-01-01T00:05"), + execution_fn=lambda state: state.evaluate_tick() + .assert_requested_runs(run_request(["A"])) + .assert_evaluation("A", [AssetRuleEvaluationSpec(basic_hourly_cron_rule)]) + # next tick should not request any more runs + .with_current_time_advanced(seconds=30) + .evaluate_tick() + .assert_requested_runs() + # still no runs should be requested + .with_current_time_advanced(minutes=50) + .evaluate_tick() + .assert_requested_runs() + # moved to a new cron schedule tick, request another run + .with_current_time_advanced(minutes=10) + .evaluate_tick() + .assert_requested_runs(run_request(["A"])) + .assert_evaluation("A", [AssetRuleEvaluationSpec(basic_hourly_cron_rule)]) + # next tick should not request any more runs + .with_serialized_cursor( + """{"latest_storage_id": null, "handled_root_asset_keys": ["A"], "handled_root_partitions_by_asset_key": {}, "evaluation_id": 4, "last_observe_request_timestamp_by_asset_key": {}, "latest_evaluation_by_asset_key": {"A": "{\\"__class__\\": \\"AutoMaterializeAssetEvaluation\\", \\"asset_key\\": {\\"__class__\\": \\"AssetKey\\", \\"path\\": [\\"A\\"]}, \\"num_discarded\\": 0, \\"num_requested\\": 1, \\"num_skipped\\": 0, \\"partition_subsets_by_condition\\": [[{\\"__class__\\": \\"AutoMaterializeRuleEvaluation\\", \\"evaluation_data\\": null, \\"rule_snapshot\\": {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"MaterializeOnCronRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.MATERIALIZE\\"}, \\"description\\": \\"not materialized since last cron schedule tick of '0 * * * *' (timezone: UTC)\\"}}, null]], \\"rule_snapshots\\": [{\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"MaterializeOnCronRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.MATERIALIZE\\"}, \\"description\\": \\"not materialized since last cron schedule tick of '0 * * * *' (timezone: UTC)\\"}, {\\"__class__\\": \\"AutoMaterializeRuleSnapshot\\", \\"class_name\\": \\"SkipOnNotAllParentsUpdatedRule\\", \\"decision_type\\": {\\"__enum__\\": \\"AutoMaterializeDecisionType.SKIP\\"}, \\"description\\": \\"waiting on upstream data to be updated\\"}], \\"run_ids\\": {\\"__set__\\": []}}"}, "latest_evaluation_timestamp": 1577840730.0}""" + ) + .with_current_time_advanced(seconds=30) + .evaluate_tick() + .assert_requested_runs(), + ), + AssetDaemonScenario( + id="basic_hourly_cron_partitioned_migrate", + initial_state=one_asset.with_asset_properties( + partitions_def=hourly_partitions_def, + auto_materialize_policy=get_cron_policy(basic_hourly_cron_rule), + ) + .with_current_time(time_partitions_start_str) + .with_current_time_advanced(days=1, minutes=5), + execution_fn=lambda state: state.evaluate_tick() + .assert_requested_runs(run_request(["A"], hour_partition_key(state.current_time))) + .assert_evaluation( + "A", + [ + AssetRuleEvaluationSpec( + basic_hourly_cron_rule, [hour_partition_key(state.current_time)] + ) + ], + ) + # next tick should not request any more runs + .with_current_time_advanced(seconds=30) + .evaluate_tick() + .assert_requested_runs() + # still no runs should be requested + .with_current_time_advanced(minutes=50) + .evaluate_tick() + .assert_requested_runs() + # moved to a new cron schedule tick, request another run for the new partition + .with_current_time_advanced(minutes=10) + .evaluate_tick( + """{"latest_storage_id": null, "handled_root_asset_keys": [], "handled_root_partitions_by_asset_key": {"A": "{\"version\": 1, \"time_windows\": [[1357426800.0, 1357430400.0]], \"num_partitions\": 1}"}, "evaluation_id": 2, "last_observe_request_timestamp_by_asset_key": {}, "latest_evaluation_by_asset_key": {}, "latest_evaluation_timestamp": 1357430730.0}""" + ) + .assert_requested_runs(run_request(["A"], hour_partition_key(state.current_time, 1))), + ), ] diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py index 21f7e5e829e89..0a178a3a1db39 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/partition_scenarios.py @@ -510,25 +510,23 @@ ) .evaluate_tick("FOO") .assert_requested_runs() - .with_not_started_runs(), - # TEMPORARILY DISABLED: this test will be re-enabled upstack. It is currently broken because - # we do not handle the case where partitions defs change in the MaterializeOnMissingRule + .with_not_started_runs() # now the start date is updated, request the new first partition key - # .with_current_time_advanced(days=5) - # .with_asset_properties( - # partitions_def=hourly_partitions_def._replace( - # start=time_partitions_start_datetime + datetime.timedelta(days=5) - # ) - # ) - # .evaluate_tick("BAR") - # .assert_requested_runs( - # run_request( - # ["A"], - # partition_key=hour_partition_key( - # time_partitions_start_datetime + datetime.timedelta(days=5), delta=1 - # ), - # ) - # ), + .with_current_time_advanced(days=5) + .with_asset_properties( + partitions_def=hourly_partitions_def._replace( + start=time_partitions_start_datetime + datetime.timedelta(days=5) + ) + ) + .evaluate_tick("BAR") + .assert_requested_runs( + run_request( + ["A"], + partition_key=hour_partition_key( + time_partitions_start_datetime + datetime.timedelta(days=5), delta=1 + ), + ) + ), ), AssetDaemonScenario( id="one_asset_self_dependency_multi_partitions_def", From ae20d64504b6a6cb8b406a88a11c030c37c4dc00 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Mon, 18 Dec 2023 16:18:38 -0800 Subject: [PATCH 12/56] Create AssetConditionEvaluationResult object --- .../_core/definitions/asset_condition.py | 145 +++++++++++------- .../_core/definitions/asset_daemon_context.py | 10 +- .../_core/definitions/asset_daemon_cursor.py | 37 ++--- .../definitions/auto_materialize_rule.py | 8 +- .../auto_materialize_rule_evaluation.py | 5 +- 5 files changed, 113 insertions(+), 92 deletions(-) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py index 92b14e07193a8..bf34385e573b5 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -16,14 +16,12 @@ ) import dagster._check as check -from dagster._core.definitions.asset_daemon_cursor import ( - AssetConditionCursorExtras, -) from dagster._core.definitions.events import AssetKey from dagster._core.definitions.metadata import MetadataMapping, MetadataValue from dagster._core.definitions.partition import AllPartitionsSubset from dagster._serdes.serdes import ( FieldSerializer, + PackableValue, UnpackContext, WhitelistMap, pack_value, @@ -31,12 +29,10 @@ whitelist_for_serdes, ) -from .asset_condition_evaluation_context import ( - AssetConditionEvaluationContext, -) from .asset_subset import AssetSubset if TYPE_CHECKING: + from .asset_condition_evaluation_context import AssetConditionEvaluationContext from .auto_materialize_rule import AutoMaterializeRule @@ -175,6 +171,62 @@ def with_run_ids(self, run_ids: AbstractSet[str]) -> "AssetConditionEvaluationWi return AssetConditionEvaluationWithRunIds(evaluation=self, run_ids=frozenset(run_ids)) +class AssetConditionEvaluationResult(NamedTuple): + """Return value for the evaluate method of an AssetCondition.""" + + condition: "AssetCondition" + evaluation: AssetConditionEvaluation + extra_values_by_unique_id: Mapping[str, PackableValue] + + @property + def true_subset(self) -> AssetSubset: + return self.evaluation.true_subset + + @staticmethod + def create_from_children( + context: "AssetConditionEvaluationContext", + true_subset: AssetSubset, + child_results: Sequence["AssetConditionEvaluationResult"], + ) -> "AssetConditionEvaluationResult": + """Returns a new AssetConditionEvaluationResult from the given child results.""" + return AssetConditionEvaluationResult( + condition=context.condition, + evaluation=AssetConditionEvaluation( + context.condition.snapshot, + true_subset=true_subset, + candidate_subset=context.candidate_subset, + subsets_with_metadata=[], + child_evaluations=[child_result.evaluation for child_result in child_results], + ), + extra_values_by_unique_id=dict( + item + for child_result in child_results + for item in child_result.extra_values_by_unique_id.items() + ), + ) + + @staticmethod + def create( + context: "AssetConditionEvaluationContext", + true_subset: AssetSubset, + subsets_with_metadata: Sequence[AssetSubsetWithMetadata] = [], + extra_value: PackableValue = None, + ) -> "AssetConditionEvaluationResult": + """Returns a new AssetConditionEvaluationResult from the given parameters.""" + return AssetConditionEvaluationResult( + condition=context.condition, + evaluation=AssetConditionEvaluation( + context.condition.snapshot, + true_subset=true_subset, + candidate_subset=context.candidate_subset, + subsets_with_metadata=subsets_with_metadata, + ), + extra_values_by_unique_id={context.condition.unique_id: extra_value} + if extra_value + else {}, + ) + + @whitelist_for_serdes class AssetConditionEvaluationWithRunIds(NamedTuple): """A union of an AssetConditionEvaluation and the set of run IDs that have been launched in @@ -209,8 +261,8 @@ def unique_id(self) -> str: @abstractmethod def evaluate( - self, context: AssetConditionEvaluationContext - ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: + self, context: "AssetConditionEvaluationContext" + ) -> AssetConditionEvaluationResult: raise NotImplementedError() @abstractproperty @@ -282,21 +334,21 @@ def description(self) -> str: return self.rule.description def evaluate( - self, context: AssetConditionEvaluationContext - ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: + self, context: "AssetConditionEvaluationContext" + ) -> AssetConditionEvaluationResult: context.root_context.daemon_context._verbose_log_fn( # noqa f"Evaluating rule: {self.rule.to_snapshot()}" ) - true_subset, subsets_with_metadata, extras = self.rule.evaluate_for_asset(context) + true_subset, subsets_with_metadata, extra_value = self.rule.evaluate_for_asset(context) context.root_context.daemon_context._verbose_log_fn( # noqa f"Rule returned {true_subset.size} partitions" f"{true_subset}" ) - return AssetConditionEvaluation( - condition_snapshot=self.snapshot, + return AssetConditionEvaluationResult.create( + context=context, true_subset=true_subset, - candidate_subset=context.candidate_subset, subsets_with_metadata=subsets_with_metadata, - ), [AssetConditionCursorExtras(condition_snapshot=self.snapshot, extras=extras)] + extra_value=extra_value, + ) class AndAssetCondition( @@ -310,23 +362,18 @@ def description(self) -> str: return "All of" def evaluate( - self, context: AssetConditionEvaluationContext - ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: - child_evaluations: List[AssetConditionEvaluation] = [] - child_extras: List[AssetConditionCursorExtras] = [] + self, context: "AssetConditionEvaluationContext" + ) -> AssetConditionEvaluationResult: + child_results: List[AssetConditionEvaluationResult] = [] true_subset = context.candidate_subset for child in self.children: child_context = context.for_child(condition=child, candidate_subset=true_subset) - child_evaluation, child_extra = child.evaluate(child_context) - child_evaluations.append(child_evaluation) - child_extras.extend(child_extra) - true_subset &= child_evaluation.true_subset - return AssetConditionEvaluation( - condition_snapshot=self.snapshot, - true_subset=true_subset, - candidate_subset=context.candidate_subset, - child_evaluations=child_evaluations, - ), child_extras + child_result = child.evaluate(child_context) + child_results.append(child_result) + true_subset &= child_result.true_subset + return AssetConditionEvaluationResult.create_from_children( + context, true_subset, child_results + ) class OrAssetCondition( @@ -340,25 +387,20 @@ def description(self) -> str: return "Any of" def evaluate( - self, context: AssetConditionEvaluationContext - ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: - child_evaluations: List[AssetConditionEvaluation] = [] - child_extras: List[AssetConditionCursorExtras] = [] + self, context: "AssetConditionEvaluationContext" + ) -> AssetConditionEvaluationResult: + child_results: List[AssetConditionEvaluationResult] = [] true_subset = context.empty_subset() for child in self.children: child_context = context.for_child( condition=child, candidate_subset=context.candidate_subset ) - child_evaluation, child_extra = child.evaluate(child_context) - child_evaluations.append(child_evaluation) - child_extras.extend(child_extra) - true_subset |= child_evaluation.true_subset - return AssetConditionEvaluation( - condition_snapshot=self.snapshot, - true_subset=true_subset, - candidate_subset=context.candidate_subset, - child_evaluations=child_evaluations, - ), child_extras + child_result = child.evaluate(child_context) + child_results.append(child_result) + true_subset |= child_result.true_subset + return AssetConditionEvaluationResult.create_from_children( + context, true_subset, child_results + ) class NotAssetCondition( @@ -380,17 +422,14 @@ def child(self) -> AssetCondition: return self.children[0] def evaluate( - self, context: AssetConditionEvaluationContext - ) -> Tuple[AssetConditionEvaluation, Sequence[AssetConditionCursorExtras]]: + self, context: "AssetConditionEvaluationContext" + ) -> AssetConditionEvaluationResult: child_context = context.for_child( condition=self.child, candidate_subset=context.candidate_subset ) - child_evaluation, child_extras = self.child.evaluate(child_context) - true_subset = context.candidate_subset - child_evaluation.true_subset + child_result = self.child.evaluate(child_context) + true_subset = context.candidate_subset - child_result.true_subset - return AssetConditionEvaluation( - condition_snapshot=self.snapshot, - true_subset=true_subset, - candidate_subset=context.candidate_subset, - child_evaluations=[child_evaluation], - ), child_extras + return AssetConditionEvaluationResult.create_from_children( + context, true_subset, [child_result] + ) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py index 41e7a08dc3ee5..a7549ca9da64f 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py @@ -209,20 +209,20 @@ def evaluate_asset( expected_data_time_mapping=expected_data_time_mapping, ) - evaluation, extras = asset_condition.evaluate(context) + evaluation_result = asset_condition.evaluate(context) new_asset_cursor = AssetConditionCursor( asset_key=asset_key, previous_max_storage_id=context.new_max_storage_id, previous_evaluation_timestamp=context.evaluation_time.timestamp(), - previous_evaluation=evaluation, - extras=extras, + previous_evaluation=evaluation_result.evaluation, + extra_values_by_unique_id=evaluation_result.extra_values_by_unique_id, ) expected_data_time = get_expected_data_time_for_asset_key( - context, will_materialize=evaluation.true_subset.size > 0 + context, will_materialize=evaluation_result.true_subset.size > 0 ) - return evaluation, new_asset_cursor, expected_data_time + return evaluation_result.evaluation, new_asset_cursor, expected_data_time def get_asset_condition_evaluations( self, diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py index da0653f83e012..29301525ee795 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py @@ -55,7 +55,7 @@ class AssetConditionCursor(NamedTuple): previous_max_storage_id: Optional[int] previous_evaluation_timestamp: Optional[float] - extras: Sequence[AssetConditionCursorExtras] + extra_values_by_unique_id: Mapping[str, PackableValue] @staticmethod def empty(asset_key: AssetKey) -> "AssetConditionCursor": @@ -64,21 +64,16 @@ def empty(asset_key: AssetKey) -> "AssetConditionCursor": previous_evaluation=None, previous_max_storage_id=None, previous_evaluation_timestamp=None, - extras=[], + extra_values_by_unique_id={}, ) - def get_extras_value( - self, condition: "AssetCondition", key: str, as_type: Type[T] - ) -> Optional[T]: - """Returns a value from the extras dict for the given condition, if it exists and is of the - expected type. Otherwise, returns None. + def get_extras_value(self, condition: "AssetCondition", as_type: Type[T]) -> Optional[T]: + """Returns the value from the extras dict for the given condition, if it exists and is of + the expected type. Otherwise, returns None. """ - for condition_extras in self.extras: - if condition_extras.condition_snapshot == condition.snapshot: - extras_value = condition_extras.extras.get(key) - if isinstance(extras_value, as_type): - return extras_value - return None + extras_value = self.extra_values_by_unique_id.get(condition.unique_id) + if isinstance(extras_value, as_type): + return extras_value return None def get_previous_requested_or_discarded_subset( @@ -193,18 +188,12 @@ def get_backcompat_asset_condition_cursor( previous_evaluation=latest_evaluation, previous_evaluation_timestamp=latest_timestamp, previous_max_storage_id=latest_storage_id, - extras=[ - # the only information we need to preserve from the previous cursor is the handled - # subset - AssetConditionCursorExtras( - condition_snapshot=RuleCondition(MaterializeOnMissingRule()).snapshot, - extras={MaterializeOnMissingRule.HANDLED_SUBSET_KEY: handled_root_subset}, - ) - ] - # only include this information if it's non-empty (otherwise we can just rebuild it from - # the set of materialized partitions later on) + # the only information we need to preserve from the previous cursor is the handled subset + extra_values_by_unique_id={ + RuleCondition(MaterializeOnMissingRule()).unique_id: handled_root_subset, + } if handled_root_subset and handled_root_subset.size > 0 - else [], + else {}, ) diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py index c73fff29ab11e..9eac479d7d0d1 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py @@ -602,8 +602,6 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv @whitelist_for_serdes class MaterializeOnMissingRule(AutoMaterializeRule, NamedTuple("_MaterializeOnMissingRule", [])): - HANDLED_SUBSET_KEY: str = "handled_subset" - @property def decision_type(self) -> AutoMaterializeDecisionType: return AutoMaterializeDecisionType.MATERIALIZE @@ -616,9 +614,7 @@ def get_handled_subset(self, context: AssetConditionEvaluationContext) -> AssetS """Returns the AssetSubset which has been handled (materialized, requested, or discarded). Accounts for cases in which the partitions definition may have changed between ticks. """ - previous_handled_subset = context.cursor.get_extras_value( - context.condition, self.HANDLED_SUBSET_KEY, AssetSubset - ) + previous_handled_subset = context.cursor.get_extras_value(context.condition, AssetSubset) if previous_handled_subset: # partitioned -> unpartitioned or vice versa if previous_handled_subset.is_partitioned != (context.partitions_def is not None): @@ -654,7 +650,7 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv if handled_subset.size > 0 else context.candidate_subset ) - return (unhandled_candidates, [], {self.HANDLED_SUBSET_KEY: handled_subset}) + return (unhandled_candidates, [], handled_subset) @whitelist_for_serdes diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index f3e096aa5e683..8418008908a09 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -9,7 +9,6 @@ AbstractSet, Dict, FrozenSet, - Mapping, NamedTuple, Optional, Sequence, @@ -129,9 +128,7 @@ def metadata(self) -> MetadataMapping: } -RuleEvaluationResults = Tuple[ - AssetSubset, Sequence["AssetSubsetWithMetadata"], Mapping[str, PackableValue] -] +RuleEvaluationResults = Tuple[AssetSubset, Sequence["AssetSubsetWithMetadata"], PackableValue] @whitelist_for_serdes From cde3a4ab678962fa71fdaf8bf36a3fcf9bdd2cb8 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Tue, 19 Dec 2023 10:26:38 -0800 Subject: [PATCH 13/56] Update AutoMaterializeRul return type --- .../_core/definitions/asset_condition.py | 12 +- .../asset_condition_evaluation_context.py | 64 +++++- .../definitions/auto_materialize_rule.py | 197 +++++++++--------- .../auto_materialize_rule_evaluation.py | 4 + .../freshness_based_auto_materialize.py | 14 +- 5 files changed, 174 insertions(+), 117 deletions(-) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py index bf34385e573b5..c87be7b774120 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -339,16 +339,12 @@ def evaluate( context.root_context.daemon_context._verbose_log_fn( # noqa f"Evaluating rule: {self.rule.to_snapshot()}" ) - true_subset, subsets_with_metadata, extra_value = self.rule.evaluate_for_asset(context) + evaluation_result = self.rule.evaluate_for_asset(context) context.root_context.daemon_context._verbose_log_fn( # noqa - f"Rule returned {true_subset.size} partitions" f"{true_subset}" - ) - return AssetConditionEvaluationResult.create( - context=context, - true_subset=true_subset, - subsets_with_metadata=subsets_with_metadata, - extra_value=extra_value, + f"Rule returned {evaluation_result.true_subset.size} partitions:" + f"{evaluation_result.true_subset}" ) + return evaluation_result class AndAssetCondition( diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py index 4368976f3a63d..df926140847c8 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py @@ -1,11 +1,24 @@ import dataclasses import datetime import functools +import operator +from collections import defaultdict from dataclasses import dataclass -from typing import TYPE_CHECKING, AbstractSet, Any, Callable, Mapping, Optional, Sequence, Tuple +from typing import ( + TYPE_CHECKING, + AbstractSet, + Any, + Callable, + FrozenSet, + Mapping, + Optional, + Sequence, + Tuple, +) from dagster._core.definitions.data_time import CachingDataTimeResolver from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey +from dagster._core.definitions.metadata import MetadataValue from dagster._core.definitions.partition import PartitionsDefinition from dagster._core.definitions.partition_mapping import IdentityPartitionMapping from dagster._core.definitions.time_window_partition_mapping import TimeWindowPartitionMapping @@ -291,5 +304,54 @@ def will_update_asset_partition(self, asset_partition: AssetKeyPartitionKey) -> return False return asset_partition in parent_evaluation.true_subset + def add_evaluation_data_from_previous_tick( + self, + asset_partitions_by_frozen_metadata: Mapping[ + FrozenSet[Tuple[str, MetadataValue]], AbstractSet[AssetKeyPartitionKey] + ], + ignore_subset: AssetSubset, + ) -> Tuple[AssetSubset, Sequence["AssetSubsetWithMetadata"]]: + """Combines information calculated on this tick with information from the previous tick, + returning a tuple of the combined true subset and the combined subsets with metadata. + + Args: + asset_partitions_by_frozen_metadata: A mapping from metadata to the set of asset + partitions that the rule applies to. + ignore_subset: An AssetSubset which represents information that we should *not* carry + forward from the previous tick. + """ + from .asset_condition import AssetSubsetWithMetadata + + mapping = defaultdict(lambda: self.empty_subset()) + for frozen_metadata, asset_partitions in asset_partitions_by_frozen_metadata.items(): + mapping[frozen_metadata] = AssetSubset.from_asset_partitions_set( + self.asset_key, self.partitions_def, asset_partitions + ) + + # get the set of all things we have metadata for + has_new_metadata_subset = functools.reduce( + operator.or_, mapping.values(), self.empty_subset() + ) + + # don't use information from the previous tick if we have explicit metadata for it or + # we've explicitly said to ignore it + ignore_subset = has_new_metadata_subset | ignore_subset + + for elt in self.previous_subsets_with_metadata: + carry_forward_subset = elt.subset - ignore_subset + if carry_forward_subset.size > 0: + mapping[elt.frozen_metadata] |= carry_forward_subset + + # for now, an asset is in the "true" subset if and only if we have some metadata for it + true_subset = functools.reduce(operator.or_, mapping.values(), self.empty_subset()) + + return ( + self.candidate_subset & true_subset, + [ + AssetSubsetWithMetadata(subset, dict(metadata)) + for metadata, subset in mapping.items() + ], + ) + def empty_subset(self) -> AssetSubset: return AssetSubset.empty(self.asset_key, self.partitions_def) diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py index 9eac479d7d0d1..1167fc69abdf8 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py @@ -1,9 +1,8 @@ import datetime -import operator from abc import ABC, abstractmethod, abstractproperty from collections import defaultdict -from functools import reduce from typing import ( + TYPE_CHECKING, AbstractSet, Dict, Iterable, @@ -21,10 +20,8 @@ from dagster._core.definitions.asset_subset import AssetSubset from dagster._core.definitions.auto_materialize_rule_evaluation import ( AutoMaterializeDecisionType, - AutoMaterializeRuleEvaluationData, AutoMaterializeRuleSnapshot, ParentUpdatedRuleEvaluationData, - RuleEvaluationResults, WaitingOnAssetsRuleEvaluationData, ) from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey @@ -50,6 +47,9 @@ from .asset_condition_evaluation_context import AssetConditionEvaluationContext from .asset_graph import sort_key_for_asset_partition +if TYPE_CHECKING: + from dagster._core.definitions.asset_condition import AssetConditionEvaluationResult + class AutoMaterializeRule(ABC): """An AutoMaterializeRule defines a bit of logic which helps determine if a materialization @@ -75,61 +75,10 @@ def description(self) -> str: """ ... - def add_evaluation_data_from_previous_tick( - self, - context: AssetConditionEvaluationContext, - asset_partitions_by_evaluation_data: Mapping[ - AutoMaterializeRuleEvaluationData, Set[AssetKeyPartitionKey] - ], - ignore_subset: AssetSubset, - ) -> RuleEvaluationResults: - """Combines evaluation data calculated on this tick with evaluation data calculated on the - previous tick. - - Args: - context: The current RuleEvaluationContext. - asset_partitions_by_evaluation_data: A mapping from evaluation data to the set of asset - partitions that the rule applies to. - ignore_subset: An AssetSubset which represents information that we should *not* carry - forward from the previous tick. - """ - from .asset_condition import AssetSubsetWithMetadata - - mapping = defaultdict(lambda: context.empty_subset()) - for evaluation_data, asset_partitions in asset_partitions_by_evaluation_data.items(): - mapping[ - frozenset(evaluation_data.metadata.items()) - ] = AssetSubset.from_asset_partitions_set( - context.asset_key, context.partitions_def, asset_partitions - ) - - # get the set of all things we have metadata for - has_metadata_subset = context.empty_subset() - for evaluation_data, subset in mapping.items(): - has_metadata_subset |= subset - - # don't use information from the previous tick if we have explicit metadata for it or - # we've explicitly said to ignore it - ignore_subset = has_metadata_subset | ignore_subset - - for elt in context.previous_subsets_with_metadata: - carry_forward_subset = elt.subset - ignore_subset - if carry_forward_subset.size > 0: - mapping[elt.frozen_metadata] |= carry_forward_subset - - # for now, an asset is in the "true" subset if and only if we have some metadata for it - true_subset = reduce(operator.or_, mapping.values(), context.empty_subset()) - return ( - context.candidate_subset & true_subset, - [ - AssetSubsetWithMetadata(subset, dict(metadata)) - for metadata, subset in mapping.items() - ], - {}, - ) - @abstractmethod - def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetConditionEvaluationContext + ) -> "AssetConditionEvaluationResult": """The core evaluation function for the rule. This function takes in a context object and returns a mapping from evaluated rules to the set of asset partitions that the rule applies to. @@ -288,8 +237,15 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return "required to meet this or downstream asset's freshness policy" - def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: - return freshness_evaluation_results_for_asset_key(context.root_context) + def evaluate_for_asset( + self, context: AssetConditionEvaluationContext + ) -> "AssetConditionEvaluationResult": + from .asset_condition import AssetConditionEvaluationResult + + true_subset, subsets_with_metadata = freshness_evaluation_results_for_asset_key( + context.root_context + ) + return AssetConditionEvaluationResult.create(context, true_subset, subsets_with_metadata) @whitelist_for_serdes @@ -395,7 +351,11 @@ def get_new_asset_partitions_to_request( for time_partition_key in missed_time_partition_keys } - def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetConditionEvaluationContext + ) -> "AssetConditionEvaluationResult": + from .asset_condition import AssetConditionEvaluationResult + new_asset_partitions_to_request = self.get_new_asset_partitions_to_request(context) asset_subset_to_request = AssetSubset.from_asset_partitions_set( context.asset_key, context.partitions_def, new_asset_partitions_to_request @@ -404,7 +364,7 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv - context.materialized_requested_or_discarded_since_previous_tick_subset ) - return asset_subset_to_request, [], {} + return AssetConditionEvaluationResult.create(context, true_subset=asset_subset_to_request) @whitelist_for_serdes @@ -509,10 +469,14 @@ def description(self) -> str: else: return base - def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetConditionEvaluationContext + ) -> "AssetConditionEvaluationResult": """Evaluates the set of asset partitions of this asset whose parents have been updated, or will update on this tick. """ + from .asset_condition import AssetConditionEvaluationResult + asset_partitions_by_updated_parents: Mapping[ AssetKeyPartitionKey, Set[AssetKeyPartitionKey] ] = defaultdict(set) @@ -590,14 +554,14 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv will_update_asset_keys=frozenset( will_update_parent_assets_by_asset_partition.get(asset_partition, []) ), - ) + ).frozen_metadata ].add(asset_partition) - return self.add_evaluation_data_from_previous_tick( - context, + true_subset, subsets_with_metadata = context.add_evaluation_data_from_previous_tick( asset_partitions_by_evaluation_data, ignore_subset=context.materialized_requested_or_discarded_since_previous_tick_subset, ) + return AssetConditionEvaluationResult.create(context, true_subset, subsets_with_metadata) @whitelist_for_serdes @@ -637,10 +601,14 @@ def get_handled_subset(self, context: AssetConditionEvaluationContext) -> AssetS | context.materialized_since_previous_tick_subset ) - def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetConditionEvaluationContext + ) -> "AssetConditionEvaluationResult": """Evaluates the set of asset partitions for this asset which are missing and were not previously discarded. """ + from .asset_condition import AssetConditionEvaluationResult + handled_subset = self.get_handled_subset(context) unhandled_candidates = ( context.candidate_subset @@ -650,7 +618,14 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv if handled_subset.size > 0 else context.candidate_subset ) - return (unhandled_candidates, [], handled_subset) + + return AssetConditionEvaluationResult.create( + context, + true_subset=unhandled_candidates, + # we keep track of the handled subset instead of the unhandled subset because new + # partitions may spontaneously jump into existence at any time + extra_value=handled_subset, + ) @whitelist_for_serdes @@ -663,7 +638,11 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return "waiting on upstream data to be up to date" - def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetConditionEvaluationContext + ) -> "AssetConditionEvaluationResult": + from .asset_condition import AssetConditionEvaluationResult + asset_partitions_by_evaluation_data = defaultdict(set) # only need to evaluate net-new candidates and candidates whose parents have changed @@ -686,14 +665,13 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv ) if outdated_ancestors: asset_partitions_by_evaluation_data[ - WaitingOnAssetsRuleEvaluationData(frozenset(outdated_ancestors)) + WaitingOnAssetsRuleEvaluationData(frozenset(outdated_ancestors)).frozen_metadata ].add(candidate) - return self.add_evaluation_data_from_previous_tick( - context, - asset_partitions_by_evaluation_data, - ignore_subset=subset_to_evaluate, + true_subset, subsets_with_metadata = context.add_evaluation_data_from_previous_tick( + asset_partitions_by_evaluation_data, ignore_subset=subset_to_evaluate ) + return AssetConditionEvaluationResult.create(context, true_subset, subsets_with_metadata) @whitelist_for_serdes @@ -709,7 +687,9 @@ def description(self) -> str: def evaluate_for_asset( self, context: AssetConditionEvaluationContext, - ) -> RuleEvaluationResults: + ) -> "AssetConditionEvaluationResult": + from .asset_condition import AssetConditionEvaluationResult + asset_partitions_by_evaluation_data = defaultdict(set) # only need to evaluate net-new candidates and candidates whose parents have changed @@ -733,14 +713,15 @@ def evaluate_for_asset( missing_parent_asset_keys.add(parent.asset_key) if missing_parent_asset_keys: asset_partitions_by_evaluation_data[ - WaitingOnAssetsRuleEvaluationData(frozenset(missing_parent_asset_keys)) + WaitingOnAssetsRuleEvaluationData( + frozenset(missing_parent_asset_keys) + ).frozen_metadata ].add(candidate) - return self.add_evaluation_data_from_previous_tick( - context, - asset_partitions_by_evaluation_data, - ignore_subset=subset_to_evaluate, + true_subset, subsets_with_metadata = context.add_evaluation_data_from_previous_tick( + asset_partitions_by_evaluation_data, ignore_subset=subset_to_evaluate ) + return AssetConditionEvaluationResult.create(context, true_subset, subsets_with_metadata) @whitelist_for_serdes @@ -776,7 +757,9 @@ def description(self) -> str: def evaluate_for_asset( self, context: AssetConditionEvaluationContext, - ) -> RuleEvaluationResults: + ) -> "AssetConditionEvaluationResult": + from .asset_condition import AssetConditionEvaluationResult + asset_partitions_by_evaluation_data = defaultdict(set) # only need to evaluate net-new candidates and candidates whose parents have changed @@ -819,14 +802,15 @@ def evaluate_for_asset( if non_updated_parent_keys: asset_partitions_by_evaluation_data[ - WaitingOnAssetsRuleEvaluationData(frozenset(non_updated_parent_keys)) + WaitingOnAssetsRuleEvaluationData( + frozenset(non_updated_parent_keys) + ).frozen_metadata ].add(candidate) - return self.add_evaluation_data_from_previous_tick( - context, - asset_partitions_by_evaluation_data, - ignore_subset=subset_to_evaluate, + true_subset, subsets_with_metadata = context.add_evaluation_data_from_previous_tick( + asset_partitions_by_evaluation_data, ignore_subset=subset_to_evaluate ) + return AssetConditionEvaluationResult.create(context, true_subset, subsets_with_metadata) @whitelist_for_serdes @@ -841,7 +825,11 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return "required parent partitions do not exist" - def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetConditionEvaluationContext + ) -> "AssetConditionEvaluationResult": + from .asset_condition import AssetConditionEvaluationResult + asset_partitions_by_evaluation_data = defaultdict(set) subset_to_evaluate = ( @@ -859,14 +847,15 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv nonexistent_parent_keys = {parent.asset_key for parent in nonexistent_parent_partitions} if nonexistent_parent_keys: asset_partitions_by_evaluation_data[ - WaitingOnAssetsRuleEvaluationData(frozenset(nonexistent_parent_keys)) + WaitingOnAssetsRuleEvaluationData( + frozenset(nonexistent_parent_keys) + ).frozen_metadata ].add(candidate) - return self.add_evaluation_data_from_previous_tick( - context, - asset_partitions_by_evaluation_data, - ignore_subset=subset_to_evaluate, + true_subset, subsets_with_metadata = context.add_evaluation_data_from_previous_tick( + asset_partitions_by_evaluation_data, ignore_subset=subset_to_evaluate ) + return AssetConditionEvaluationResult.create(context, true_subset, subsets_with_metadata) @whitelist_for_serdes @@ -885,20 +874,23 @@ def description(self) -> str: else: return "targeted by an in-progress backfill" - def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetConditionEvaluationContext + ) -> "AssetConditionEvaluationResult": + from .asset_condition import AssetConditionEvaluationResult + backfilling_subset = ( context.instance_queryer.get_active_backfill_target_asset_graph_subset() ).get_asset_subset(context.asset_key, context.asset_graph) if backfilling_subset.size == 0: - return context.empty_subset(), [], {} - - if self.all_partitions: + true_subset = context.empty_subset() + elif self.all_partitions: true_subset = context.candidate_subset else: true_subset = context.candidate_subset & backfilling_subset - return true_subset, [], {} + return AssetConditionEvaluationResult.create(context, true_subset) @whitelist_for_serdes @@ -913,7 +905,11 @@ def decision_type(self) -> AutoMaterializeDecisionType: def description(self) -> str: return f"exceeds {self.limit} materialization(s) per minute" - def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEvaluationResults: + def evaluate_for_asset( + self, context: AssetConditionEvaluationContext + ) -> "AssetConditionEvaluationResult": + from .asset_condition import AssetConditionEvaluationResult + # the set of asset partitions which exceed the limit rate_limited_asset_partitions = set( sorted( @@ -922,10 +918,9 @@ def evaluate_for_asset(self, context: AssetConditionEvaluationContext) -> RuleEv )[self.limit :] ) - return ( + return AssetConditionEvaluationResult.create( + context, AssetSubset.from_asset_partitions_set( context.asset_key, context.partitions_def, rate_limited_asset_partitions ), - [], - {}, ) diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index 8418008908a09..96bf15cf01c9a 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -74,6 +74,10 @@ class AutoMaterializeRuleEvaluationData(ABC): def metadata(self) -> MetadataMapping: raise NotImplementedError() + @property + def frozen_metadata(self) -> FrozenSet[Tuple[str, MetadataValue]]: + return frozenset(self.metadata.items()) + @whitelist_for_serdes class TextRuleEvaluationData( diff --git a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py index 99ff47c7a4b39..e27eae206e965 100644 --- a/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py +++ b/python_modules/dagster/dagster/_core/definitions/freshness_based_auto_materialize.py @@ -8,7 +8,7 @@ maximum lag minutes. """ import datetime -from typing import TYPE_CHECKING, AbstractSet, Optional, Tuple +from typing import TYPE_CHECKING, AbstractSet, Optional, Sequence, Tuple import pendulum @@ -18,8 +18,9 @@ from dagster._utils.schedules import cron_string_iterator if TYPE_CHECKING: + from .asset_condition import AssetSubsetWithMetadata from .asset_condition_evaluation_context import AssetConditionEvaluationContext - from .auto_materialize_rule_evaluation import RuleEvaluationResults, TextRuleEvaluationData + from .auto_materialize_rule_evaluation import TextRuleEvaluationData def get_execution_period_for_policy( @@ -154,7 +155,7 @@ def get_expected_data_time_for_asset_key( def freshness_evaluation_results_for_asset_key( context: "AssetConditionEvaluationContext", -) -> "RuleEvaluationResults": +) -> Tuple[AssetSubset, Sequence["AssetSubsetWithMetadata"]]: """Returns a set of AssetKeyPartitionKeys to materialize in order to abide by the given FreshnessPolicies. @@ -168,7 +169,7 @@ def freshness_evaluation_results_for_asset_key( if not context.asset_graph.get_downstream_freshness_policies( asset_key=asset_key ) or context.asset_graph.is_partitioned(asset_key): - return context.empty_subset(), [], {} + return context.empty_subset(), [] # figure out the current contents of this asset current_data_time = context.data_time_resolver.get_current_data_time(asset_key, current_time) @@ -181,7 +182,7 @@ def freshness_evaluation_results_for_asset_key( # if executing the asset on this tick would not change its data time, then return if current_data_time == expected_data_time: - return context.empty_subset(), [], {} + return context.empty_subset(), [] # calculate the data times you would expect after all currently-executing runs # were to successfully complete @@ -223,7 +224,6 @@ def freshness_evaluation_results_for_asset_key( return ( AssetSubset.all(asset_key, None), [AssetSubsetWithMetadata(all_subset, evaluation_data.metadata)], - {}, ) else: - return context.empty_subset(), [], {} + return context.empty_subset(), [] From 5a6cee2e08f8860e068441da8db99abbd5b51561 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Fri, 29 Dec 2023 16:11:23 -0500 Subject: [PATCH 14/56] Add timing information to evaluation objects --- .../dagster/_core/definitions/asset_condition.py | 8 ++++++++ .../asset_condition_evaluation_context.py | 5 +++++ .../auto_materialize_rule_evaluation.py | 8 ++++++++ .../dagster/_utils/test/schedule_storage.py | 14 ++++++++++++++ 4 files changed, 35 insertions(+) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py index c87be7b774120..4a9e8f4d5da0e 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -15,6 +15,8 @@ Union, ) +import pendulum + import dagster._check as check from dagster._core.definitions.events import AssetKey from dagster._core.definitions.metadata import MetadataMapping, MetadataValue @@ -112,6 +114,8 @@ class AssetConditionEvaluation(NamedTuple): condition_snapshot: AssetConditionSnapshot true_subset: AssetSubset candidate_subset: Union[AssetSubset, HistoricalAllPartitionsSubsetSentinel] + start_timestamp: Optional[float] + end_timestamp: Optional[float] subsets_with_metadata: Sequence[AssetSubsetWithMetadata] = [] child_evaluations: Sequence["AssetConditionEvaluation"] = [] @@ -195,6 +199,8 @@ def create_from_children( context.condition.snapshot, true_subset=true_subset, candidate_subset=context.candidate_subset, + start_timestamp=context.start_timestamp, + end_timestamp=pendulum.now("UTC").timestamp(), subsets_with_metadata=[], child_evaluations=[child_result.evaluation for child_result in child_results], ), @@ -218,6 +224,8 @@ def create( evaluation=AssetConditionEvaluation( context.condition.snapshot, true_subset=true_subset, + start_timestamp=context.start_timestamp, + end_timestamp=pendulum.now("UTC").timestamp(), candidate_subset=context.candidate_subset, subsets_with_metadata=subsets_with_metadata, ), diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py index df926140847c8..5296f8693f15d 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py @@ -16,6 +16,8 @@ Tuple, ) +import pendulum + from dagster._core.definitions.data_time import CachingDataTimeResolver from dagster._core.definitions.events import AssetKey, AssetKeyPartitionKey from dagster._core.definitions.metadata import MetadataValue @@ -63,6 +65,7 @@ class AssetConditionEvaluationContext: evaluation_results_by_key: Mapping[AssetKey, "AssetConditionEvaluation"] expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]] + start_timestamp: float root_ref: Optional["AssetConditionEvaluationContext"] = None @staticmethod @@ -94,6 +97,7 @@ def create( daemon_context=daemon_context, evaluation_results_by_key=evaluation_results_by_key, expected_data_time_mapping=expected_data_time_mapping, + start_timestamp=pendulum.now("UTC").timestamp(), ) def for_child( @@ -107,6 +111,7 @@ def for_child( else None, candidate_subset=candidate_subset, root_ref=self.root_context, + start_timestamp=pendulum.now("UTC").timestamp(), ) @property diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index 96bf15cf01c9a..1088e27eab15f 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -275,6 +275,8 @@ def _get_child_rule_evaluation( candidate_subset=HistoricalAllPartitionsSubsetSentinel() if is_partitioned else AssetSubset.empty(asset_key, None), + start_timestamp=None, + end_timestamp=None, subsets_with_metadata=subsets_with_metadata, ) @@ -340,6 +342,8 @@ def _get_child_decision_type_evaluation( else AssetSubset.all(asset_key, None), subsets_with_metadata=[], child_evaluations=child_evaluations, + start_timestamp=None, + end_timestamp=None, ) if decision_type == AutoMaterializeDecisionType.MATERIALIZE: @@ -365,6 +369,8 @@ def _get_child_decision_type_evaluation( else AssetSubset.all(asset_key, None), subsets_with_metadata=[], child_evaluations=[evaluation], + start_timestamp=None, + end_timestamp=None, ) def unpack( @@ -437,6 +443,8 @@ def unpack( else AssetSubset.all(asset_key, None), subsets_with_metadata=[], child_evaluations=child_evaluations, + start_timestamp=None, + end_timestamp=None, ).with_run_ids(cast(AbstractSet[str], unpacked_dict.get("run_ids", set()))) diff --git a/python_modules/dagster/dagster/_utils/test/schedule_storage.py b/python_modules/dagster/dagster/_utils/test/schedule_storage.py index f1962aa6fee97..7e15f29dc3577 100644 --- a/python_modules/dagster/dagster/_utils/test/schedule_storage.py +++ b/python_modules/dagster/dagster/_utils/test/schedule_storage.py @@ -741,11 +741,15 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: condition_snapshot=condition_snapshot, true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=False), candidate_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=False), + start_timestamp=0, + end_timestamp=1, ).with_run_ids(set()), AssetConditionEvaluation( condition_snapshot=condition_snapshot, true_subset=AssetSubset(asset_key=AssetKey("asset_two"), value=True), candidate_subset=AssetSubset(asset_key=AssetKey("asset_two"), value=True), + start_timestamp=0, + end_timestamp=1, subsets_with_metadata=[ AssetSubsetWithMetadata( AssetSubset(asset_key=AssetKey("asset_two"), value=True), @@ -796,6 +800,8 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: asset_evaluations=[ AssetConditionEvaluation( condition_snapshot=condition_snapshot, + start_timestamp=0, + end_timestamp=1, true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), candidate_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), ).with_run_ids(set()), @@ -825,12 +831,16 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: eval_one = AssetConditionEvaluation( condition_snapshot=AssetConditionSnapshot("foo", "bar", ""), + start_timestamp=0, + end_timestamp=1, true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), candidate_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), ).with_run_ids(set()) eval_asset_three = AssetConditionEvaluation( condition_snapshot=AssetConditionSnapshot("foo", "bar", ""), + start_timestamp=0, + end_timestamp=1, true_subset=AssetSubset(asset_key=AssetKey("asset_three"), value=True), candidate_subset=AssetSubset(asset_key=AssetKey("asset_three"), value=True), ).with_run_ids(set()) @@ -875,6 +885,8 @@ def test_auto_materialize_asset_evaluations_with_partitions(self, storage) -> No asset_evaluations=[ AssetConditionEvaluation( condition_snapshot=AssetConditionSnapshot("foo", "bar", ""), + start_timestamp=0, + end_timestamp=1, true_subset=asset_subset, candidate_subset=asset_subset, subsets_with_metadata=[asset_subset_with_metadata], @@ -906,6 +918,8 @@ def test_purge_asset_evaluations(self, storage) -> None: asset_evaluations=[ AssetConditionEvaluation( condition_snapshot=AssetConditionSnapshot("foo", "bar", ""), + start_timestamp=0, + end_timestamp=1, true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), candidate_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), subsets_with_metadata=[], From b1a03c9235da7690f9ba7a91cf82412cdabf32ff Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Wed, 3 Jan 2024 10:35:24 -0500 Subject: [PATCH 15/56] Fix non-subsettable multi-asset handling --- .../_core/definitions/asset_daemon_context.py | 14 ++-- .../asset_daemon_scenario.py | 81 +++++++++++++------ .../asset_daemon_scenario_states.py | 10 ++- .../updated_scenarios/cron_scenarios.py | 18 +++++ 4 files changed, 90 insertions(+), 33 deletions(-) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py index a7549ca9da64f..c95480ddc42ec 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py @@ -244,7 +244,6 @@ def get_asset_condition_evaluations( num_checked_assets = 0 num_auto_materialize_asset_keys = len(self.auto_materialize_asset_keys) - visited_multi_asset_keys = set() for asset_key in itertools.chain(*self.asset_graph.toposort_asset_keys()): # an asset may have already been visited if it was part of a non-subsettable multi-asset if asset_key not in self.auto_materialize_asset_keys: @@ -257,10 +256,6 @@ def get_asset_condition_evaluations( f" {asset_key.to_user_string()} ({num_checked_assets}/{num_auto_materialize_asset_keys})" ) - if asset_key in visited_multi_asset_keys: - self._verbose_log_fn(f"Asset {asset_key.to_user_string()} already visited") - continue - (evaluation, asset_cursor_for_asset, expected_data_time) = self.evaluate_asset( asset_key, evaluation_results_by_key, expected_data_time_mapping ) @@ -288,7 +283,14 @@ def get_asset_condition_evaluations( if num_requested > 0: for neighbor_key in self.asset_graph.get_required_multi_asset_keys(asset_key): expected_data_time_mapping[neighbor_key] = expected_data_time - visited_multi_asset_keys.add(neighbor_key) + + # make sure that the true_subset of the neighbor is accurate + if neighbor_key in evaluation_results_by_key: + neighbor_evaluation = evaluation_results_by_key[neighbor_key] + evaluation_results_by_key[neighbor_key] = neighbor_evaluation._replace( + true_subset=neighbor_evaluation.true_subset + | evaluation.true_subset._replace(asset_key=neighbor_key) + ) to_request |= { ap._replace(asset_key=neighbor_key) for ap in evaluation.true_subset.asset_partitions diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py index 82111df971f36..bdabe2924f487 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py @@ -58,8 +58,10 @@ from dagster._core.definitions.automation_policy_sensor_definition import ( AutomationPolicySensorDefinition, ) +from dagster._core.definitions.decorators.asset_decorator import multi_asset from dagster._core.definitions.events import AssetKeyPartitionKey, CoercibleToAssetKey from dagster._core.definitions.executor_definition import in_process_executor +from dagster._core.definitions.partition import PartitionsDefinition from dagster._core.definitions.repository_definition.valid_definitions import ( SINGLETON_REPOSITORY_NAME, ) @@ -195,6 +197,12 @@ class AssetSpecWithPartitionsDef( ... +class MultiAssetSpec(NamedTuple): + specs: Sequence[AssetSpec] + partitions_def: Optional[PartitionsDefinition] = None + can_subset: bool = False + + class AssetDaemonScenarioState(NamedTuple): """Specifies the state of a given AssetDaemonScenario. This state can be modified by changing the set of asset definitions it contains, executing runs, updating the time, evaluating ticks, etc. @@ -209,7 +217,7 @@ class AssetDaemonScenarioState(NamedTuple): current_time (datetime): The current time of the scenario. """ - asset_specs: Sequence[Union[AssetSpec, AssetSpecWithPartitionsDef]] + asset_specs: Sequence[Union[AssetSpec, AssetSpecWithPartitionsDef, MultiAssetSpec]] current_time: datetime.datetime = pendulum.now("UTC") run_requests: Sequence[RunRequest] = [] serialized_cursor: str = serialize_value(AssetDaemonCursor.empty(0)) @@ -233,26 +241,35 @@ def compute_fn(context: AssetExecutionContext) -> None: AssetKey.from_coercible(s) for s in json.loads(context.run.tags.get(FAIL_TAG) or "[]") } - if context.asset_key in fail_keys: - raise Exception("Asset failed") + for asset_key in context.selected_asset_keys: + if asset_key in fail_keys: + raise Exception("Asset failed") assets = [] - params = { - "key", - "deps", - "group_name", - "code_version", - "auto_materialize_policy", - "freshness_policy", - "partitions_def", - } for spec in self.asset_specs: - assets.append( - asset( - compute_fn=compute_fn, - **{k: v for k, v in spec._asdict().items() if k in params}, + if isinstance(spec, MultiAssetSpec): + + @multi_asset(**spec._asdict()) + def _multi_asset(context: AssetExecutionContext): + return compute_fn(context) + + assets.append(_multi_asset) + else: + params = { + "key", + "deps", + "group_name", + "code_version", + "auto_materialize_policy", + "freshness_policy", + "partitions_def", + } + assets.append( + asset( + compute_fn=compute_fn, + **{k: v for k, v in spec._asdict().items() if k in params}, + ) ) - ) return assets @property @@ -269,16 +286,28 @@ def with_asset_properties( """Convenience method to update the properties of one or more assets in the scenario state.""" new_asset_specs = [] for spec in self.asset_specs: - if keys is None or spec.key in {AssetKey.from_coercible(key) for key in keys}: - if "partitions_def" in kwargs: - # partitions_def is not a field on AssetSpec, so we need to do this hack - new_asset_specs.append( - AssetSpecWithPartitionsDef(**{**spec._asdict(), **kwargs}) - ) - else: - new_asset_specs.append(spec._replace(**kwargs)) + if isinstance(spec, MultiAssetSpec): + partitions_def = kwargs.get("partitions_def", spec.partitions_def) + new_multi_specs = [ + s._replace(**{k: v for k, v in kwargs.items() if k != "partitions_def"}) + if keys is None or s.key in keys + else s + for s in spec.specs + ] + new_asset_specs.append( + spec._replace(partitions_def=partitions_def, specs=new_multi_specs) + ) else: - new_asset_specs.append(spec) + if keys is None or spec.key in {AssetKey.from_coercible(key) for key in keys}: + if "partitions_def" in kwargs: + # partitions_def is not a field on AssetSpec, so we need to do this hack + new_asset_specs.append( + AssetSpecWithPartitionsDef(**{**spec._asdict(), **kwargs}) + ) + else: + new_asset_specs.append(spec._replace(**kwargs)) + else: + new_asset_specs.append(spec) return self._replace(asset_specs=new_asset_specs) def with_automation_policy_sensors( diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/asset_daemon_scenario_states.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/asset_daemon_scenario_states.py index a54580faad274..4ed18af21383d 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/asset_daemon_scenario_states.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/asset_daemon_scenario_states.py @@ -10,7 +10,7 @@ HourlyPartitionsDefinition, ) -from ..asset_daemon_scenario import AssetDaemonScenarioState +from ..asset_daemon_scenario import AssetDaemonScenarioState, MultiAssetSpec ############ # PARTITIONS @@ -63,6 +63,14 @@ ] ) +three_assets_not_subsettable = AssetDaemonScenarioState( + asset_specs=[ + MultiAssetSpec( + specs=[AssetSpec("A"), AssetSpec("B"), AssetSpec("C")], + ) + ] +) + ################## # PARTITION STATES ################## diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cron_scenarios.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cron_scenarios.py index 1fdc6ba54caad..666665e368aa2 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cron_scenarios.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/updated_scenarios/cron_scenarios.py @@ -11,6 +11,7 @@ hourly_partitions_def, one_asset, one_asset_depends_on_two, + three_assets_not_subsettable, time_partitions_start_str, ) @@ -56,6 +57,23 @@ def get_cron_policy( .evaluate_tick() .assert_requested_runs(), ), + AssetDaemonScenario( + id="basic_hourly_cron_unpartitioned_multi_asset", + initial_state=three_assets_not_subsettable.with_asset_properties( + auto_materialize_policy=get_cron_policy(basic_hourly_cron_rule) + ).with_current_time("2020-01-01T00:05"), + execution_fn=lambda state: state.evaluate_tick() + .assert_requested_runs(run_request(["A", "B", "C"])) + .with_current_time_advanced(seconds=30) + .evaluate_tick() + .assert_requested_runs() + .with_current_time_advanced(hours=1) + .evaluate_tick() + .assert_requested_runs(run_request(["A", "B", "C"])) + .with_current_time_advanced(seconds=30) + .evaluate_tick() + .assert_requested_runs(), + ), AssetDaemonScenario( id="basic_hourly_cron_partitioned", initial_state=one_asset.with_asset_properties( From d27f79a04e588f6565a698119c6e6e6d9c2613ce Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Thu, 4 Jan 2024 17:16:26 -0500 Subject: [PATCH 16/56] Rename AssetConditionCursor --- .../_core/definitions/asset_condition.py | 253 ++++++++++-------- .../asset_condition_evaluation_context.py | 83 +++--- .../_core/definitions/asset_daemon_context.py | 80 +++--- .../_core/definitions/asset_daemon_cursor.py | 142 +++++----- .../definitions/auto_materialize_rule.py | 76 +++--- .../auto_materialize_rule_evaluation.py | 3 +- .../dagster/_utils/test/schedule_storage.py | 11 + .../asset_daemon_scenario.py | 4 +- 8 files changed, 340 insertions(+), 312 deletions(-) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py index 4a9e8f4d5da0e..82726fdb97b34 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -4,7 +4,7 @@ from typing import ( TYPE_CHECKING, AbstractSet, - Any, + Dict, FrozenSet, List, Mapping, @@ -12,6 +12,8 @@ Optional, Sequence, Tuple, + Type, + TypeVar, Union, ) @@ -22,12 +24,7 @@ from dagster._core.definitions.metadata import MetadataMapping, MetadataValue from dagster._core.definitions.partition import AllPartitionsSubset from dagster._serdes.serdes import ( - FieldSerializer, PackableValue, - UnpackContext, - WhitelistMap, - pack_value, - unpack_value, whitelist_for_serdes, ) @@ -38,6 +35,9 @@ from .auto_materialize_rule import AutoMaterializeRule +T = TypeVar("T") + + @whitelist_for_serdes class HistoricalAllPartitionsSubsetSentinel(NamedTuple): """Serializable indicator that this value was an AllPartitionsSubset at serialization time, but @@ -77,58 +77,100 @@ def get_serializable_candidate_subset( return candidate_subset -class CandidateSubsetSerializer(FieldSerializer): - def pack( - self, - candidate_subset: AssetSubset, - whitelist_map: WhitelistMap, - descent_path: str, - ) -> Optional[Mapping[str, Any]]: - # On all ticks, the root condition starts with an AllPartitionsSubset as the candidate - # subset. This would be wasteful to calculate and serialize in its entirety, so we instead - # store this as `None` and reconstruct it as needed. - # This does mean that if new partitions are added between serialization time and read time, - # the candidate subset will contain those new partitions. - return pack_value( - get_serializable_candidate_subset(candidate_subset), whitelist_map, descent_path +class AssetConditionResult(NamedTuple): + condition: "AssetCondition" + start_timestamp: float + end_timestamp: float + + true_subset: AssetSubset + candidate_subset: AssetSubset + subsets_with_metadata: Sequence[AssetSubsetWithMetadata] + extra_state: PackableValue + + child_results: Sequence["AssetConditionResult"] + + @staticmethod + def create_from_children( + context: "AssetConditionEvaluationContext", + true_subset: AssetSubset, + child_results: Sequence["AssetConditionResult"], + ) -> "AssetConditionResult": + """Returns a new AssetConditionEvaluation from the given child results.""" + return AssetConditionResult( + condition=context.condition, + start_timestamp=context.start_timestamp, + end_timestamp=pendulum.now("UTC").timestamp(), + true_subset=true_subset, + candidate_subset=context.candidate_subset, + subsets_with_metadata=[], + child_results=child_results, + extra_state=None, ) - def unpack( - self, - serialized_candidate_subset: Optional[Mapping[str, Any]], - whitelist_map: WhitelistMap, - context: UnpackContext, - ) -> Union[AssetSubset, HistoricalAllPartitionsSubsetSentinel]: - return unpack_value( - serialized_candidate_subset, - (AssetSubset, HistoricalAllPartitionsSubsetSentinel), - whitelist_map, - context, + @staticmethod + def create( + context: "AssetConditionEvaluationContext", + true_subset: AssetSubset, + subsets_with_metadata: Sequence[AssetSubsetWithMetadata] = [], + extra_state: PackableValue = None, + ) -> "AssetConditionResult": + """Returns a new AssetConditionEvaluation from the given parameters.""" + return AssetConditionResult( + condition=context.condition, + start_timestamp=context.start_timestamp, + end_timestamp=pendulum.now("UTC").timestamp(), + true_subset=true_subset, + candidate_subset=context.candidate_subset, + subsets_with_metadata=subsets_with_metadata, + child_results=[], + extra_state=extra_state, ) -@whitelist_for_serdes(field_serializers={"candidate_subset": CandidateSubsetSerializer}) +@whitelist_for_serdes class AssetConditionEvaluation(NamedTuple): - """Internal representation of the results of evaluating a node in the evaluation tree.""" + """Serializable representation of the results of evaluating a node in the evaluation tree.""" condition_snapshot: AssetConditionSnapshot - true_subset: AssetSubset - candidate_subset: Union[AssetSubset, HistoricalAllPartitionsSubsetSentinel] start_timestamp: Optional[float] end_timestamp: Optional[float] - subsets_with_metadata: Sequence[AssetSubsetWithMetadata] = [] - child_evaluations: Sequence["AssetConditionEvaluation"] = [] + + true_subset: AssetSubset + candidate_subset: Union[AssetSubset, HistoricalAllPartitionsSubsetSentinel] + subsets_with_metadata: Sequence[AssetSubsetWithMetadata] + + child_evaluations: Sequence["AssetConditionEvaluation"] @property def asset_key(self) -> AssetKey: return self.true_subset.asset_key + @staticmethod + def from_result(result: AssetConditionResult) -> "AssetConditionEvaluation": + return AssetConditionEvaluation( + condition_snapshot=result.condition.snapshot, + start_timestamp=result.start_timestamp, + end_timestamp=result.end_timestamp, + true_subset=result.true_subset, + candidate_subset=get_serializable_candidate_subset(result.candidate_subset), + subsets_with_metadata=result.subsets_with_metadata, + child_evaluations=[ + AssetConditionEvaluation.from_result(child_result) + for child_result in result.child_results + ], + ) + def equivalent_to_stored_evaluation(self, other: Optional["AssetConditionEvaluation"]) -> bool: - """Returns if all fields other than `run_ids` are equal.""" + """Returns if this evaluation is functionally equivalent to the given stored evaluation. + This is used to determine if it is necessary to store this new evaluation in the database. + """ return ( other is not None and self.condition_snapshot == other.condition_snapshot - and self.true_subset == other.true_subset + # if any partitions are requested, then the state of the world must have meaninfully + # changed since the previous evaluation + and self.true_subset.size == 0 + and other.true_subset.size == 0 # the candidate subset gets modified during serialization and get_serializable_candidate_subset(self.candidate_subset) == get_serializable_candidate_subset(other.candidate_subset) @@ -175,65 +217,66 @@ def with_run_ids(self, run_ids: AbstractSet[str]) -> "AssetConditionEvaluationWi return AssetConditionEvaluationWithRunIds(evaluation=self, run_ids=frozenset(run_ids)) -class AssetConditionEvaluationResult(NamedTuple): - """Return value for the evaluate method of an AssetCondition.""" +@whitelist_for_serdes +class AssetConditionEvaluationState(NamedTuple): + """Incremental state calculated during the evaluation of an AssetCondition. This may be used + on the subsequent evaluation to make the computation more efficient. + + Attributes: + evaluation: The computed AssetConditionEvaluation. + evaluation_timestamp: The evaluation_timestamp at which the evaluation was performed. + max_storage_id: The maximum storage ID over all events used in this computation. + extra_state_by_unique_id: A mapping from the unique ID of each condition in the evaluation + tree to the extra state that was calculated for it, if any. + """ - condition: "AssetCondition" evaluation: AssetConditionEvaluation - extra_values_by_unique_id: Mapping[str, PackableValue] + evaluation_timestamp: Optional[float] + + max_storage_id: Optional[int] + extra_state_by_unique_id: Mapping[str, PackableValue] + + @property + def asset_key(self) -> AssetKey: + return self.evaluation.asset_key @property def true_subset(self) -> AssetSubset: return self.evaluation.true_subset - @staticmethod - def create_from_children( - context: "AssetConditionEvaluationContext", - true_subset: AssetSubset, - child_results: Sequence["AssetConditionEvaluationResult"], - ) -> "AssetConditionEvaluationResult": - """Returns a new AssetConditionEvaluationResult from the given child results.""" - return AssetConditionEvaluationResult( - condition=context.condition, - evaluation=AssetConditionEvaluation( - context.condition.snapshot, - true_subset=true_subset, - candidate_subset=context.candidate_subset, - start_timestamp=context.start_timestamp, - end_timestamp=pendulum.now("UTC").timestamp(), - subsets_with_metadata=[], - child_evaluations=[child_result.evaluation for child_result in child_results], - ), - extra_values_by_unique_id=dict( - item - for child_result in child_results - for item in child_result.extra_values_by_unique_id.items() - ), - ) - @staticmethod def create( - context: "AssetConditionEvaluationContext", - true_subset: AssetSubset, - subsets_with_metadata: Sequence[AssetSubsetWithMetadata] = [], - extra_value: PackableValue = None, - ) -> "AssetConditionEvaluationResult": - """Returns a new AssetConditionEvaluationResult from the given parameters.""" - return AssetConditionEvaluationResult( - condition=context.condition, - evaluation=AssetConditionEvaluation( - context.condition.snapshot, - true_subset=true_subset, - start_timestamp=context.start_timestamp, - end_timestamp=pendulum.now("UTC").timestamp(), - candidate_subset=context.candidate_subset, - subsets_with_metadata=subsets_with_metadata, - ), - extra_values_by_unique_id={context.condition.unique_id: extra_value} - if extra_value - else {}, + context: "AssetConditionEvaluationContext", root_result: AssetConditionResult + ) -> "AssetConditionEvaluationState": + """Convenience constructor to generate an AssetConditionEvaluationState from the root result + and the context in which it was evaluated. + """ + + # flatten the extra state into a single dict + def _flatten_extra_state(r: AssetConditionResult) -> Mapping[str, PackableValue]: + extra_state: Dict[str, PackableValue] = ( + {r.condition.unique_id: r.extra_state} if r.extra_state else {} + ) + for child in r.child_results: + extra_state.update(_flatten_extra_state(child)) + return extra_state + + return AssetConditionEvaluationState( + evaluation=AssetConditionEvaluation.from_result(root_result), + evaluation_timestamp=context.evaluation_time.timestamp(), + max_storage_id=context.new_max_storage_id, + extra_state_by_unique_id=_flatten_extra_state(root_result), ) + def get_extra_state(self, condition: "AssetCondition", as_type: Type[T]) -> Optional[T]: + """Returns the value from the extras dict for the given condition, if it exists and is of + the expected type. Otherwise, returns None. + """ + extra_state = self.extra_state_by_unique_id.get(condition.unique_id) + if isinstance(extra_state, as_type): + return extra_state + return None + @whitelist_for_serdes class AssetConditionEvaluationWithRunIds(NamedTuple): @@ -268,9 +311,7 @@ def unique_id(self) -> str: return hashlib.md5("".join(parts).encode()).hexdigest() @abstractmethod - def evaluate( - self, context: "AssetConditionEvaluationContext" - ) -> AssetConditionEvaluationResult: + def evaluate(self, context: "AssetConditionEvaluationContext") -> AssetConditionResult: raise NotImplementedError() @abstractproperty @@ -341,9 +382,7 @@ def unique_id(self) -> str: def description(self) -> str: return self.rule.description - def evaluate( - self, context: "AssetConditionEvaluationContext" - ) -> AssetConditionEvaluationResult: + def evaluate(self, context: "AssetConditionEvaluationContext") -> AssetConditionResult: context.root_context.daemon_context._verbose_log_fn( # noqa f"Evaluating rule: {self.rule.to_snapshot()}" ) @@ -365,19 +404,15 @@ class AndAssetCondition( def description(self) -> str: return "All of" - def evaluate( - self, context: "AssetConditionEvaluationContext" - ) -> AssetConditionEvaluationResult: - child_results: List[AssetConditionEvaluationResult] = [] + def evaluate(self, context: "AssetConditionEvaluationContext") -> AssetConditionResult: + child_results: List[AssetConditionResult] = [] true_subset = context.candidate_subset for child in self.children: child_context = context.for_child(condition=child, candidate_subset=true_subset) child_result = child.evaluate(child_context) child_results.append(child_result) true_subset &= child_result.true_subset - return AssetConditionEvaluationResult.create_from_children( - context, true_subset, child_results - ) + return AssetConditionResult.create_from_children(context, true_subset, child_results) class OrAssetCondition( @@ -390,10 +425,8 @@ class OrAssetCondition( def description(self) -> str: return "Any of" - def evaluate( - self, context: "AssetConditionEvaluationContext" - ) -> AssetConditionEvaluationResult: - child_results: List[AssetConditionEvaluationResult] = [] + def evaluate(self, context: "AssetConditionEvaluationContext") -> AssetConditionResult: + child_results: List[AssetConditionResult] = [] true_subset = context.empty_subset() for child in self.children: child_context = context.for_child( @@ -402,9 +435,7 @@ def evaluate( child_result = child.evaluate(child_context) child_results.append(child_result) true_subset |= child_result.true_subset - return AssetConditionEvaluationResult.create_from_children( - context, true_subset, child_results - ) + return AssetConditionResult.create_from_children(context, true_subset, child_results) class NotAssetCondition( @@ -425,15 +456,11 @@ def description(self) -> str: def child(self) -> AssetCondition: return self.children[0] - def evaluate( - self, context: "AssetConditionEvaluationContext" - ) -> AssetConditionEvaluationResult: + def evaluate(self, context: "AssetConditionEvaluationContext") -> AssetConditionResult: child_context = context.for_child( condition=self.child, candidate_subset=context.candidate_subset ) child_result = self.child.evaluate(child_context) true_subset = context.candidate_subset - child_result.true_subset - return AssetConditionEvaluationResult.create_from_children( - context, true_subset, [child_result] - ) + return AssetConditionResult.create_from_children(context, true_subset, [child_result]) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py index 5296f8693f15d..a5a1d78e5f928 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py @@ -26,12 +26,16 @@ from dagster._core.definitions.time_window_partition_mapping import TimeWindowPartitionMapping from dagster._utils.caching_instance_queryer import CachingInstanceQueryer -from .asset_daemon_cursor import AssetConditionCursor from .asset_graph import AssetGraph from .asset_subset import AssetSubset if TYPE_CHECKING: - from .asset_condition import AssetCondition, AssetConditionEvaluation, AssetSubsetWithMetadata + from .asset_condition import ( + AssetCondition, + AssetConditionEvaluation, + AssetConditionEvaluationState, + AssetSubsetWithMetadata, + ) from .asset_daemon_context import AssetDaemonContext @@ -54,15 +58,15 @@ class AssetConditionEvaluationContext: asset_key: AssetKey condition: "AssetCondition" - cursor: AssetConditionCursor - previous_condition_evaluation: Optional["AssetConditionEvaluation"] + previous_evaluation_state: Optional["AssetConditionEvaluationState"] + previous_evaluation: Optional["AssetConditionEvaluation"] candidate_subset: AssetSubset instance_queryer: CachingInstanceQueryer data_time_resolver: CachingDataTimeResolver daemon_context: "AssetDaemonContext" - evaluation_results_by_key: Mapping[AssetKey, "AssetConditionEvaluation"] + evaluation_state_by_key: Mapping[AssetKey, "AssetConditionEvaluationState"] expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]] start_timestamp: float @@ -72,11 +76,11 @@ class AssetConditionEvaluationContext: def create( asset_key: AssetKey, condition: "AssetCondition", - cursor: AssetConditionCursor, + previous_evaluation_state: Optional["AssetConditionEvaluationState"], instance_queryer: CachingInstanceQueryer, data_time_resolver: CachingDataTimeResolver, daemon_context: "AssetDaemonContext", - evaluation_results_by_key: Mapping[AssetKey, "AssetConditionEvaluation"], + evaluation_state_by_key: Mapping[AssetKey, "AssetConditionEvaluationState"], expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]], ) -> "AssetConditionEvaluationContext": partitions_def = instance_queryer.asset_graph.get_partitions_def(asset_key) @@ -84,8 +88,10 @@ def create( return AssetConditionEvaluationContext( asset_key=asset_key, condition=condition, - cursor=cursor, - previous_condition_evaluation=cursor.previous_evaluation, + previous_evaluation_state=previous_evaluation_state, + previous_evaluation=previous_evaluation_state.evaluation + if previous_evaluation_state + else None, candidate_subset=AssetSubset.all( asset_key, partitions_def, @@ -95,7 +101,7 @@ def create( data_time_resolver=data_time_resolver, instance_queryer=instance_queryer, daemon_context=daemon_context, - evaluation_results_by_key=evaluation_results_by_key, + evaluation_state_by_key=evaluation_state_by_key, expected_data_time_mapping=expected_data_time_mapping, start_timestamp=pendulum.now("UTC").timestamp(), ) @@ -106,8 +112,8 @@ def for_child( return dataclasses.replace( self, condition=condition, - previous_condition_evaluation=self.previous_condition_evaluation.for_child(condition) - if self.previous_condition_evaluation + previous_evaluation=self.previous_evaluation.for_child(condition) + if self.previous_evaluation else None, candidate_subset=candidate_subset, root_ref=self.root_context, @@ -134,23 +140,31 @@ def evaluation_time(self) -> datetime.datetime: @property def previous_max_storage_id(self) -> Optional[int]: - return self.cursor.previous_max_storage_id + return ( + self.previous_evaluation_state.max_storage_id + if self.previous_evaluation_state + else None + ) @property def previous_evaluation_timestamp(self) -> Optional[float]: - return self.cursor.previous_evaluation_timestamp + return ( + self.previous_evaluation_state.evaluation_timestamp + if self.previous_evaluation_state + else None + ) @property def previous_true_subset(self) -> AssetSubset: - if self.previous_condition_evaluation is None: + if self.previous_evaluation is None: return self.empty_subset() - return self.previous_condition_evaluation.true_subset + return self.previous_evaluation.true_subset @property def previous_subsets_with_metadata(self) -> Sequence["AssetSubsetWithMetadata"]: - if self.previous_condition_evaluation is None: + if self.previous_evaluation is None: return [] - return self.previous_condition_evaluation.subsets_with_metadata + return self.previous_evaluation.subsets_with_metadata @functools.cached_property @root_property @@ -162,10 +176,10 @@ def parent_will_update_subset(self) -> AssetSubset: for parent_key in self.asset_graph.get_parents(self.asset_key): if not self.materializable_in_same_run(self.asset_key, parent_key): continue - parent_result = self.evaluation_results_by_key.get(parent_key) - if not parent_result: + parent_info = self.evaluation_state_by_key.get(parent_key) + if not parent_info: continue - parent_subset = parent_result.true_subset + parent_subset = parent_info.true_subset subset |= parent_subset._replace(asset_key=self.asset_key) return subset @@ -179,7 +193,7 @@ def materialized_since_previous_tick_subset(self) -> AssetSubset: self.instance_queryer.get_asset_partitions_updated_after_cursor( self.asset_key, asset_partitions=None, - after_cursor=self.cursor.previous_max_storage_id, + after_cursor=self.previous_max_storage_id, respect_materialization_data_versions=False, ), ) @@ -188,11 +202,15 @@ def materialized_since_previous_tick_subset(self) -> AssetSubset: @root_property def previous_tick_requested_subset(self) -> AssetSubset: """The set of asset partitions that were requested (or discarded) on the previous tick.""" - previous_evaluation = self.cursor.previous_evaluation - if previous_evaluation is None: + if ( + self.previous_evaluation_state is None + or self.previous_evaluation_state.evaluation is None + ): return self.empty_subset() - return previous_evaluation.get_requested_or_discarded_subset(self.condition) + return self.previous_evaluation_state.evaluation.get_requested_or_discarded_subset( + self.condition + ) @property def materialized_requested_or_discarded_since_previous_tick_subset(self) -> AssetSubset: @@ -211,7 +229,7 @@ def _parent_has_updated_subset_and_new_latest_storage_id( asset_partitions, cursor, ) = self.root_context.instance_queryer.asset_partitions_with_newly_updated_parents_and_new_cursor( - latest_storage_id=self.cursor.previous_max_storage_id, + latest_storage_id=self.previous_max_storage_id, child_asset_key=self.root_context.asset_key, map_old_time_partitions=False, ) @@ -247,17 +265,16 @@ def candidates_not_evaluated_on_previous_tick_subset(self) -> AssetSubset: """ from .asset_condition import HistoricalAllPartitionsSubsetSentinel - if not self.previous_condition_evaluation: + if not self.previous_evaluation: return self.candidate_subset # when the candidate_subset is HistoricalAllPartitionsSubsetSentinel, this indicates that the # entire asset was evaluated for this condition on the previous tick, and so no candidates # were *not* evaluated on the previous tick elif isinstance( - self.previous_condition_evaluation.candidate_subset, - HistoricalAllPartitionsSubsetSentinel, + self.previous_evaluation.candidate_subset, HistoricalAllPartitionsSubsetSentinel ): return self.empty_subset() - return self.candidate_subset - self.previous_condition_evaluation.candidate_subset + return self.candidate_subset - self.previous_evaluation.candidate_subset def materializable_in_same_run(self, child_key: AssetKey, parent_key: AssetKey) -> bool: """Returns whether a child asset can be materialized in the same run as a parent asset.""" @@ -304,10 +321,10 @@ def get_parents_that_will_not_be_materialized_on_current_tick( } def will_update_asset_partition(self, asset_partition: AssetKeyPartitionKey) -> bool: - parent_evaluation = self.evaluation_results_by_key.get(asset_partition.asset_key) - if not parent_evaluation: + parent_evaluation_state = self.evaluation_state_by_key.get(asset_partition.asset_key) + if not parent_evaluation_state: return False - return asset_partition in parent_evaluation.true_subset + return asset_partition in parent_evaluation_state.true_subset def add_evaluation_data_from_previous_tick( self, diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py index c95480ddc42ec..6ec62ecb6a880 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_context.py @@ -34,11 +34,11 @@ from ... import PartitionKeyRange from ..storage.tags import ASSET_PARTITION_RANGE_END_TAG, ASSET_PARTITION_RANGE_START_TAG -from .asset_condition import AssetConditionEvaluation +from .asset_condition import AssetConditionEvaluation, AssetConditionEvaluationState from .asset_condition_evaluation_context import ( AssetConditionEvaluationContext, ) -from .asset_daemon_cursor import AssetConditionCursor, AssetDaemonCursor +from .asset_daemon_cursor import AssetDaemonCursor from .asset_graph import AssetGraph from .auto_materialize_rule import AutoMaterializeRule from .backfill_policy import BackfillPolicy, BackfillPolicyType @@ -176,9 +176,9 @@ def prefetch(self) -> None: def evaluate_asset( self, asset_key: AssetKey, - evaluation_results_by_key: Mapping[AssetKey, AssetConditionEvaluation], + evaluation_state_by_key: Mapping[AssetKey, AssetConditionEvaluationState], expected_data_time_mapping: Mapping[AssetKey, Optional[datetime.datetime]], - ) -> Tuple[AssetConditionEvaluation, AssetConditionCursor, Optional[datetime.datetime]]: + ) -> Tuple[AssetConditionEvaluationState, Optional[datetime.datetime]]: """Evaluates the auto materialize policy of a given asset key. Params: @@ -196,48 +196,34 @@ def evaluate_asset( self.asset_graph.auto_materialize_policies_by_key.get(asset_key) ).to_asset_condition() - asset_cursor = self.cursor.get_asset_cursor(asset_key) + asset_cursor = self.cursor.get_previous_evaluation_state(asset_key) context = AssetConditionEvaluationContext.create( asset_key=asset_key, - cursor=asset_cursor, + previous_evaluation_state=asset_cursor, condition=asset_condition, instance_queryer=self.instance_queryer, data_time_resolver=self.data_time_resolver, daemon_context=self, - evaluation_results_by_key=evaluation_results_by_key, + evaluation_state_by_key=evaluation_state_by_key, expected_data_time_mapping=expected_data_time_mapping, ) - evaluation_result = asset_condition.evaluate(context) - - new_asset_cursor = AssetConditionCursor( - asset_key=asset_key, - previous_max_storage_id=context.new_max_storage_id, - previous_evaluation_timestamp=context.evaluation_time.timestamp(), - previous_evaluation=evaluation_result.evaluation, - extra_values_by_unique_id=evaluation_result.extra_values_by_unique_id, - ) + result = asset_condition.evaluate(context) expected_data_time = get_expected_data_time_for_asset_key( - context, will_materialize=evaluation_result.true_subset.size > 0 + context, will_materialize=result.true_subset.size > 0 ) - return evaluation_result.evaluation, new_asset_cursor, expected_data_time + return AssetConditionEvaluationState.create(context, result), expected_data_time def get_asset_condition_evaluations( self, - ) -> Tuple[ - Sequence[AssetConditionEvaluation], - Sequence[AssetConditionCursor], - AbstractSet[AssetKeyPartitionKey], - ]: + ) -> Tuple[Sequence[AssetConditionEvaluationState], AbstractSet[AssetKeyPartitionKey]]: """Returns a mapping from asset key to the AutoMaterializeAssetEvaluation for that key, a sequence of new per-asset cursors, and the set of all asset partitions that should be materialized or discarded this tick. """ - asset_cursors: List[AssetConditionCursor] = [] - - evaluation_results_by_key: Dict[AssetKey, AssetConditionEvaluation] = {} + evaluation_state_by_key: Dict[AssetKey, AssetConditionEvaluationState] = {} expected_data_time_mapping: Dict[AssetKey, Optional[datetime.datetime]] = defaultdict() to_request: Set[AssetKeyPartitionKey] = set() @@ -256,14 +242,14 @@ def get_asset_condition_evaluations( f" {asset_key.to_user_string()} ({num_checked_assets}/{num_auto_materialize_asset_keys})" ) - (evaluation, asset_cursor_for_asset, expected_data_time) = self.evaluate_asset( - asset_key, evaluation_results_by_key, expected_data_time_mapping + (evaluation_state, expected_data_time) = self.evaluate_asset( + asset_key, evaluation_state_by_key, expected_data_time_mapping ) - num_requested = evaluation.true_subset.size + num_requested = evaluation_state.true_subset.size log_fn = self._logger.info if num_requested > 0 else self._logger.debug - to_request_asset_partitions = evaluation.true_subset.asset_partitions + to_request_asset_partitions = evaluation_state.true_subset.asset_partitions to_request_str = ",".join( [(ap.partition_key or "No partition") for ap in to_request_asset_partitions] ) @@ -274,9 +260,8 @@ def get_asset_condition_evaluations( f" requested ({to_request_str}) ({format(time.time()-start_time, '.3f')} seconds)" ) - evaluation_results_by_key[asset_key] = evaluation + evaluation_state_by_key[asset_key] = evaluation_state expected_data_time_mapping[asset_key] = expected_data_time - asset_cursors.append(asset_cursor_for_asset) # if we need to materialize any partitions of a non-subsettable multi-asset, we need to # materialize all of them @@ -285,18 +270,21 @@ def get_asset_condition_evaluations( expected_data_time_mapping[neighbor_key] = expected_data_time # make sure that the true_subset of the neighbor is accurate - if neighbor_key in evaluation_results_by_key: - neighbor_evaluation = evaluation_results_by_key[neighbor_key] - evaluation_results_by_key[neighbor_key] = neighbor_evaluation._replace( - true_subset=neighbor_evaluation.true_subset - | evaluation.true_subset._replace(asset_key=neighbor_key) + if neighbor_key in evaluation_state_by_key: + neighbor_evaluation = evaluation_state_by_key[neighbor_key] + evaluation_state_by_key[neighbor_key] = neighbor_evaluation._replace( + evaluation=neighbor_evaluation.evaluation._replace( + true_subset=neighbor_evaluation.true_subset._replace( + asset_key=neighbor_key + ) + ) ) to_request |= { ap._replace(asset_key=neighbor_key) - for ap in evaluation.true_subset.asset_partitions + for ap in evaluation_state.true_subset.asset_partitions } - return (list(evaluation_results_by_key.values()), asset_cursors, to_request) + return (list(evaluation_state_by_key.values()), to_request) def evaluate( self, @@ -314,7 +302,7 @@ def evaluate( else [] ) - evaluations, asset_cursors, to_request = self.get_asset_condition_evaluations() + evaluation_state, to_request = self.get_asset_condition_evaluations() run_requests = [ *build_run_requests( @@ -329,7 +317,7 @@ def evaluate( run_requests, self.cursor.with_updates( evaluation_id=self._evaluation_id, - asset_cursors=asset_cursors, + evaluation_state=evaluation_state, newly_observe_requested_asset_keys=[ asset_key for run_request in auto_observe_run_requests @@ -337,12 +325,12 @@ def evaluate( ], evaluation_timestamp=self.instance_queryer.evaluation_time.timestamp(), ), - # only record evaluations where something changed + # only record evaluation results where something changed [ - evaluation - for evaluation in evaluations - if not evaluation.equivalent_to_stored_evaluation( - self.cursor.get_previous_evaluation(evaluation.asset_key) + es.evaluation + for es in evaluation_state + if not es.evaluation.equivalent_to_stored_evaluation( + self.cursor.get_previous_evaluation(es.asset_key) ) ], ) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py index 29301525ee795..163f0714e0a45 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py @@ -6,14 +6,11 @@ NamedTuple, Optional, Sequence, - Type, - TypeVar, ) from dagster._core.definitions.asset_graph_subset import AssetGraphSubset from dagster._core.definitions.asset_subset import AssetSubset from dagster._core.definitions.events import AssetKey -from dagster._core.definitions.partition import PartitionsDefinition from dagster._serdes.serdes import ( FieldSerializer, JsonSerializableValue, @@ -29,9 +26,11 @@ from .asset_graph import AssetGraph if TYPE_CHECKING: - from .asset_condition import AssetCondition, AssetConditionEvaluation, AssetConditionSnapshot - -T = TypeVar("T") + from .asset_condition import ( + AssetConditionEvaluation, + AssetConditionEvaluationState, + AssetConditionSnapshot, + ) @whitelist_for_serdes @@ -44,46 +43,6 @@ class AssetConditionCursorExtras(NamedTuple): extras: Mapping[str, PackableValue] -@whitelist_for_serdes -class AssetConditionCursor(NamedTuple): - """Represents the evaluated state of an AssetConditionCursor at a certain point in time. This - information can be used to make future evaluations more efficient. - """ - - asset_key: AssetKey - previous_evaluation: Optional["AssetConditionEvaluation"] - previous_max_storage_id: Optional[int] - previous_evaluation_timestamp: Optional[float] - - extra_values_by_unique_id: Mapping[str, PackableValue] - - @staticmethod - def empty(asset_key: AssetKey) -> "AssetConditionCursor": - return AssetConditionCursor( - asset_key=asset_key, - previous_evaluation=None, - previous_max_storage_id=None, - previous_evaluation_timestamp=None, - extra_values_by_unique_id={}, - ) - - def get_extras_value(self, condition: "AssetCondition", as_type: Type[T]) -> Optional[T]: - """Returns the value from the extras dict for the given condition, if it exists and is of - the expected type. Otherwise, returns None. - """ - extras_value = self.extra_values_by_unique_id.get(condition.unique_id) - if isinstance(extras_value, as_type): - return extras_value - return None - - def get_previous_requested_or_discarded_subset( - self, condition: "AssetCondition", partitions_def: Optional[PartitionsDefinition] - ) -> AssetSubset: - if not self.previous_evaluation: - return AssetSubset.empty(self.asset_key, partitions_def) - return self.previous_evaluation.get_requested_or_discarded_subset(condition) - - class ObserveRequestTimestampSerializer(FieldSerializer): def pack( self, @@ -112,12 +71,12 @@ class AssetDaemonCursor(NamedTuple): Attributes: evaluation_id (int): The ID of the evaluation that produced this cursor. - asset_cursors (Sequence[AssetConditionCursor]): The state of each asset that the daemon - is responsible for handling. + previous_evaluation_state (Sequence[AssetConditionEvaluationInfo]): The evaluation info + recorded for each asset on the previous tick. """ evaluation_id: int - asset_cursors: Sequence[AssetConditionCursor] + previous_evaluation_state: Sequence["AssetConditionEvaluationState"] last_observe_request_timestamp_by_asset_key: Mapping[AssetKey, float] @@ -125,37 +84,44 @@ class AssetDaemonCursor(NamedTuple): def empty(evaluation_id: int = 0) -> "AssetDaemonCursor": return AssetDaemonCursor( evaluation_id=evaluation_id, - asset_cursors=[], + previous_evaluation_state=[], last_observe_request_timestamp_by_asset_key={}, ) @property @functools.lru_cache(maxsize=1) - def asset_cursors_by_key(self) -> Mapping[AssetKey, AssetConditionCursor]: - """Efficient lookup of asset cursors by asset key.""" - return {cursor.asset_key: cursor for cursor in self.asset_cursors} + def previous_evaluation_state_by_key( + self, + ) -> Mapping[AssetKey, "AssetConditionEvaluationState"]: + """Efficient lookup of previous evaluation info by asset key.""" + return { + evaluation_state.asset_key: evaluation_state + for evaluation_state in self.previous_evaluation_state + } - def get_asset_cursor(self, asset_key: AssetKey) -> AssetConditionCursor: + def get_previous_evaluation_state( + self, asset_key: AssetKey + ) -> Optional["AssetConditionEvaluationState"]: """Returns the AssetConditionCursor associated with the given asset key. If no stored cursor exists, returns an empty cursor. """ - return self.asset_cursors_by_key.get(asset_key) or AssetConditionCursor.empty(asset_key) + return self.previous_evaluation_state_by_key.get(asset_key) def get_previous_evaluation(self, asset_key: AssetKey) -> Optional["AssetConditionEvaluation"]: """Returns the previous AssetConditionEvaluation for a given asset key, if it exists.""" - cursor = self.get_asset_cursor(asset_key) - return cursor.previous_evaluation if cursor else None + previous_evaluation_state = self.get_previous_evaluation_state(asset_key) + return previous_evaluation_state.evaluation if previous_evaluation_state else None def with_updates( self, evaluation_id: int, evaluation_timestamp: float, newly_observe_requested_asset_keys: Sequence[AssetKey], - asset_cursors: Sequence[AssetConditionCursor], + evaluation_state: Sequence["AssetConditionEvaluationState"], ) -> "AssetDaemonCursor": return self._replace( evaluation_id=evaluation_id, - asset_cursors=asset_cursors, + previous_evaluation_state=evaluation_state, last_observe_request_timestamp_by_asset_key={ **self.last_observe_request_timestamp_by_asset_key, **{ @@ -172,24 +138,25 @@ def __hash__(self) -> int: # BACKCOMPAT -def get_backcompat_asset_condition_cursor( - asset_key: AssetKey, +def get_backcompat_asset_condition_evaluation_state( + latest_evaluation: "AssetConditionEvaluation", latest_storage_id: Optional[int], latest_timestamp: Optional[float], - latest_evaluation: Optional["AssetConditionEvaluation"], handled_root_subset: Optional[AssetSubset], -) -> AssetConditionCursor: +) -> "AssetConditionEvaluationState": """Generates an AssetDaemonCursor from information available on the old cursor format.""" - from dagster._core.definitions.asset_condition import RuleCondition + from dagster._core.definitions.asset_condition import ( + AssetConditionEvaluationState, + RuleCondition, + ) from dagster._core.definitions.auto_materialize_rule import MaterializeOnMissingRule - return AssetConditionCursor( - asset_key=asset_key, - previous_evaluation=latest_evaluation, - previous_evaluation_timestamp=latest_timestamp, - previous_max_storage_id=latest_storage_id, + return AssetConditionEvaluationState( + evaluation=latest_evaluation, + evaluation_timestamp=latest_timestamp, + max_storage_id=latest_storage_id, # the only information we need to preserve from the previous cursor is the handled subset - extra_values_by_unique_id={ + extra_state_by_unique_id={ RuleCondition(MaterializeOnMissingRule()).unique_id: handled_root_subset, } if handled_root_subset and handled_root_subset.size > 0 @@ -203,6 +170,7 @@ def backcompat_deserialize_asset_daemon_cursor_str( """This serves as a backcompat layer for deserializing the old cursor format. Some information is impossible to fully recover, this will recover enough to continue operating as normal. """ + from .asset_condition import AssetConditionEvaluation, AssetConditionSnapshot from .auto_materialize_rule_evaluation import ( deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids, ) @@ -256,29 +224,39 @@ def backcompat_deserialize_asset_daemon_cursor_str( latest_evaluation_by_asset_key[key] = evaluation - asset_cursors = [] + previous_evaluation_state = [] cursor_keys = ( asset_graph.auto_materialize_policies_by_key.keys() if asset_graph else latest_evaluation_by_asset_key.keys() ) for asset_key in cursor_keys: - latest_evaluation = latest_evaluation_by_asset_key.get(asset_key) - asset_cursors.append( - get_backcompat_asset_condition_cursor( - asset_key, - data.get("latest_storage_id"), - data.get("latest_evaluation_timestamp"), - latest_evaluation, - handled_root_asset_graph_subset.get_asset_subset(asset_key, asset_graph) - if asset_graph - else None, + latest_evaluation_result = latest_evaluation_by_asset_key.get(asset_key) + # create a placeholder evaluation result if we don't have one + if not latest_evaluation_result: + partitions_def = asset_graph.get_partitions_def(asset_key) if asset_graph else None + latest_evaluation_result = AssetConditionEvaluation( + condition_snapshot=AssetConditionSnapshot("", "", ""), + true_subset=AssetSubset.empty(asset_key, partitions_def), + candidate_subset=AssetSubset.empty(asset_key, partitions_def), + start_timestamp=None, + end_timestamp=None, + subsets_with_metadata=[], + child_evaluations=[], ) + backcompat_evaluation_state = get_backcompat_asset_condition_evaluation_state( + latest_evaluation_result, + data.get("latest_storage_id"), + data.get("latest_evaluation_timestamp"), + handled_root_asset_graph_subset.get_asset_subset(asset_key, asset_graph) + if asset_graph + else None, ) + previous_evaluation_state.append(backcompat_evaluation_state) return AssetDaemonCursor( evaluation_id=default_evaluation_id, - asset_cursors=asset_cursors, + previous_evaluation_state=previous_evaluation_state, last_observe_request_timestamp_by_asset_key=last_observe_request_timestamp_by_asset_key, ) diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py index 1167fc69abdf8..50d62c3dc42b3 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule.py @@ -48,7 +48,7 @@ from .asset_graph import sort_key_for_asset_partition if TYPE_CHECKING: - from dagster._core.definitions.asset_condition import AssetConditionEvaluationResult + from dagster._core.definitions.asset_condition import AssetConditionResult class AutoMaterializeRule(ABC): @@ -78,7 +78,7 @@ def description(self) -> str: @abstractmethod def evaluate_for_asset( self, context: AssetConditionEvaluationContext - ) -> "AssetConditionEvaluationResult": + ) -> "AssetConditionResult": """The core evaluation function for the rule. This function takes in a context object and returns a mapping from evaluated rules to the set of asset partitions that the rule applies to. @@ -239,13 +239,13 @@ def description(self) -> str: def evaluate_for_asset( self, context: AssetConditionEvaluationContext - ) -> "AssetConditionEvaluationResult": - from .asset_condition import AssetConditionEvaluationResult + ) -> "AssetConditionResult": + from .asset_condition import AssetConditionResult true_subset, subsets_with_metadata = freshness_evaluation_results_for_asset_key( context.root_context ) - return AssetConditionEvaluationResult.create(context, true_subset, subsets_with_metadata) + return AssetConditionResult.create(context, true_subset, subsets_with_metadata) @whitelist_for_serdes @@ -268,7 +268,7 @@ def missed_cron_ticks( self, context: AssetConditionEvaluationContext ) -> Sequence[datetime.datetime]: """Returns the cron ticks which have been missed since the previous cursor was generated.""" - if not context.cursor.previous_evaluation_timestamp: + if not context.previous_evaluation_timestamp: previous_dt = next( reverse_cron_string_iterator( end_timestamp=context.evaluation_time.timestamp(), @@ -279,7 +279,7 @@ def missed_cron_ticks( return [previous_dt] missed_ticks = [] for dt in cron_string_iterator( - start_timestamp=context.cursor.previous_evaluation_timestamp, + start_timestamp=context.previous_evaluation_timestamp, cron_string=self.cron_schedule, execution_timezone=self.timezone, ): @@ -353,8 +353,8 @@ def get_new_asset_partitions_to_request( def evaluate_for_asset( self, context: AssetConditionEvaluationContext - ) -> "AssetConditionEvaluationResult": - from .asset_condition import AssetConditionEvaluationResult + ) -> "AssetConditionResult": + from .asset_condition import AssetConditionResult new_asset_partitions_to_request = self.get_new_asset_partitions_to_request(context) asset_subset_to_request = AssetSubset.from_asset_partitions_set( @@ -364,7 +364,7 @@ def evaluate_for_asset( - context.materialized_requested_or_discarded_since_previous_tick_subset ) - return AssetConditionEvaluationResult.create(context, true_subset=asset_subset_to_request) + return AssetConditionResult.create(context, true_subset=asset_subset_to_request) @whitelist_for_serdes @@ -471,11 +471,11 @@ def description(self) -> str: def evaluate_for_asset( self, context: AssetConditionEvaluationContext - ) -> "AssetConditionEvaluationResult": + ) -> "AssetConditionResult": """Evaluates the set of asset partitions of this asset whose parents have been updated, or will update on this tick. """ - from .asset_condition import AssetConditionEvaluationResult + from .asset_condition import AssetConditionResult asset_partitions_by_updated_parents: Mapping[ AssetKeyPartitionKey, Set[AssetKeyPartitionKey] @@ -561,7 +561,7 @@ def evaluate_for_asset( asset_partitions_by_evaluation_data, ignore_subset=context.materialized_requested_or_discarded_since_previous_tick_subset, ) - return AssetConditionEvaluationResult.create(context, true_subset, subsets_with_metadata) + return AssetConditionResult.create(context, true_subset, subsets_with_metadata) @whitelist_for_serdes @@ -578,7 +578,11 @@ def get_handled_subset(self, context: AssetConditionEvaluationContext) -> AssetS """Returns the AssetSubset which has been handled (materialized, requested, or discarded). Accounts for cases in which the partitions definition may have changed between ticks. """ - previous_handled_subset = context.cursor.get_extras_value(context.condition, AssetSubset) + previous_handled_subset = ( + context.previous_evaluation_state.get_extra_state(context.condition, AssetSubset) + if context.previous_evaluation_state + else None + ) if previous_handled_subset: # partitioned -> unpartitioned or vice versa if previous_handled_subset.is_partitioned != (context.partitions_def is not None): @@ -603,11 +607,11 @@ def get_handled_subset(self, context: AssetConditionEvaluationContext) -> AssetS def evaluate_for_asset( self, context: AssetConditionEvaluationContext - ) -> "AssetConditionEvaluationResult": + ) -> "AssetConditionResult": """Evaluates the set of asset partitions for this asset which are missing and were not previously discarded. """ - from .asset_condition import AssetConditionEvaluationResult + from .asset_condition import AssetConditionResult handled_subset = self.get_handled_subset(context) unhandled_candidates = ( @@ -619,12 +623,12 @@ def evaluate_for_asset( else context.candidate_subset ) - return AssetConditionEvaluationResult.create( + return AssetConditionResult.create( context, true_subset=unhandled_candidates, # we keep track of the handled subset instead of the unhandled subset because new # partitions may spontaneously jump into existence at any time - extra_value=handled_subset, + extra_state=handled_subset, ) @@ -640,8 +644,8 @@ def description(self) -> str: def evaluate_for_asset( self, context: AssetConditionEvaluationContext - ) -> "AssetConditionEvaluationResult": - from .asset_condition import AssetConditionEvaluationResult + ) -> "AssetConditionResult": + from .asset_condition import AssetConditionResult asset_partitions_by_evaluation_data = defaultdict(set) @@ -671,7 +675,7 @@ def evaluate_for_asset( true_subset, subsets_with_metadata = context.add_evaluation_data_from_previous_tick( asset_partitions_by_evaluation_data, ignore_subset=subset_to_evaluate ) - return AssetConditionEvaluationResult.create(context, true_subset, subsets_with_metadata) + return AssetConditionResult.create(context, true_subset, subsets_with_metadata) @whitelist_for_serdes @@ -687,8 +691,8 @@ def description(self) -> str: def evaluate_for_asset( self, context: AssetConditionEvaluationContext, - ) -> "AssetConditionEvaluationResult": - from .asset_condition import AssetConditionEvaluationResult + ) -> "AssetConditionResult": + from .asset_condition import AssetConditionResult asset_partitions_by_evaluation_data = defaultdict(set) @@ -721,7 +725,7 @@ def evaluate_for_asset( true_subset, subsets_with_metadata = context.add_evaluation_data_from_previous_tick( asset_partitions_by_evaluation_data, ignore_subset=subset_to_evaluate ) - return AssetConditionEvaluationResult.create(context, true_subset, subsets_with_metadata) + return AssetConditionResult.create(context, true_subset, subsets_with_metadata) @whitelist_for_serdes @@ -757,8 +761,8 @@ def description(self) -> str: def evaluate_for_asset( self, context: AssetConditionEvaluationContext, - ) -> "AssetConditionEvaluationResult": - from .asset_condition import AssetConditionEvaluationResult + ) -> "AssetConditionResult": + from .asset_condition import AssetConditionResult asset_partitions_by_evaluation_data = defaultdict(set) @@ -810,7 +814,7 @@ def evaluate_for_asset( true_subset, subsets_with_metadata = context.add_evaluation_data_from_previous_tick( asset_partitions_by_evaluation_data, ignore_subset=subset_to_evaluate ) - return AssetConditionEvaluationResult.create(context, true_subset, subsets_with_metadata) + return AssetConditionResult.create(context, true_subset, subsets_with_metadata) @whitelist_for_serdes @@ -827,8 +831,8 @@ def description(self) -> str: def evaluate_for_asset( self, context: AssetConditionEvaluationContext - ) -> "AssetConditionEvaluationResult": - from .asset_condition import AssetConditionEvaluationResult + ) -> "AssetConditionResult": + from .asset_condition import AssetConditionResult asset_partitions_by_evaluation_data = defaultdict(set) @@ -855,7 +859,7 @@ def evaluate_for_asset( true_subset, subsets_with_metadata = context.add_evaluation_data_from_previous_tick( asset_partitions_by_evaluation_data, ignore_subset=subset_to_evaluate ) - return AssetConditionEvaluationResult.create(context, true_subset, subsets_with_metadata) + return AssetConditionResult.create(context, true_subset, subsets_with_metadata) @whitelist_for_serdes @@ -876,8 +880,8 @@ def description(self) -> str: def evaluate_for_asset( self, context: AssetConditionEvaluationContext - ) -> "AssetConditionEvaluationResult": - from .asset_condition import AssetConditionEvaluationResult + ) -> "AssetConditionResult": + from .asset_condition import AssetConditionResult backfilling_subset = ( context.instance_queryer.get_active_backfill_target_asset_graph_subset() @@ -890,7 +894,7 @@ def evaluate_for_asset( else: true_subset = context.candidate_subset & backfilling_subset - return AssetConditionEvaluationResult.create(context, true_subset) + return AssetConditionResult.create(context, true_subset) @whitelist_for_serdes @@ -907,8 +911,8 @@ def description(self) -> str: def evaluate_for_asset( self, context: AssetConditionEvaluationContext - ) -> "AssetConditionEvaluationResult": - from .asset_condition import AssetConditionEvaluationResult + ) -> "AssetConditionResult": + from .asset_condition import AssetConditionResult # the set of asset partitions which exceed the limit rate_limited_asset_partitions = set( @@ -918,7 +922,7 @@ def evaluate_for_asset( )[self.limit :] ) - return AssetConditionEvaluationResult.create( + return AssetConditionResult.create( context, AssetSubset.from_asset_partitions_set( context.asset_key, context.partitions_def, rate_limited_asset_partitions diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index 1088e27eab15f..9c18ef3793a74 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -132,7 +132,7 @@ def metadata(self) -> MetadataMapping: } -RuleEvaluationResults = Tuple[AssetSubset, Sequence["AssetSubsetWithMetadata"], PackableValue] +RuleEvaluations = Tuple[AssetSubset, Sequence["AssetSubsetWithMetadata"], PackableValue] @whitelist_for_serdes @@ -278,6 +278,7 @@ def _get_child_rule_evaluation( start_timestamp=None, end_timestamp=None, subsets_with_metadata=subsets_with_metadata, + child_evaluations=[], ) def _get_child_decision_type_evaluation( diff --git a/python_modules/dagster/dagster/_utils/test/schedule_storage.py b/python_modules/dagster/dagster/_utils/test/schedule_storage.py index 7e15f29dc3577..2f574565027d0 100644 --- a/python_modules/dagster/dagster/_utils/test/schedule_storage.py +++ b/python_modules/dagster/dagster/_utils/test/schedule_storage.py @@ -743,6 +743,8 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: candidate_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=False), start_timestamp=0, end_timestamp=1, + subsets_with_metadata=[], + child_evaluations=[], ).with_run_ids(set()), AssetConditionEvaluation( condition_snapshot=condition_snapshot, @@ -756,6 +758,7 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: {"foo": MetadataValue.text("bar")}, ) ], + child_evaluations=[], ).with_run_ids(set()), ], ) @@ -804,6 +807,8 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: end_timestamp=1, true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), candidate_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), + subsets_with_metadata=[], + child_evaluations=[], ).with_run_ids(set()), ], ) @@ -835,6 +840,8 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: end_timestamp=1, true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), candidate_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), + subsets_with_metadata=[], + child_evaluations=[], ).with_run_ids(set()) eval_asset_three = AssetConditionEvaluation( @@ -843,6 +850,8 @@ def test_auto_materialize_asset_evaluations(self, storage) -> None: end_timestamp=1, true_subset=AssetSubset(asset_key=AssetKey("asset_three"), value=True), candidate_subset=AssetSubset(asset_key=AssetKey("asset_three"), value=True), + subsets_with_metadata=[], + child_evaluations=[], ).with_run_ids(set()) storage.add_auto_materialize_asset_evaluations( @@ -890,6 +899,7 @@ def test_auto_materialize_asset_evaluations_with_partitions(self, storage) -> No true_subset=asset_subset, candidate_subset=asset_subset, subsets_with_metadata=[asset_subset_with_metadata], + child_evaluations=[], ).with_run_ids(set()), ], ) @@ -923,6 +933,7 @@ def test_purge_asset_evaluations(self, storage) -> None: true_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), candidate_subset=AssetSubset(asset_key=AssetKey("asset_one"), value=True), subsets_with_metadata=[], + child_evaluations=[], ).with_run_ids(set()), ], ) diff --git a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py index bdabe2924f487..e4ddc18aec894 100644 --- a/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py +++ b/python_modules/dagster/dagster_tests/definitions_tests/auto_materialize_tests/asset_daemon_scenario.py @@ -694,7 +694,9 @@ def assert_evaluation( if num_requested is not None: assert actual_evaluation.true_subset.size == num_requested - def get_leaf_evaluations(e: AssetConditionEvaluation) -> Sequence[AssetConditionEvaluation]: + def get_leaf_evaluations( + e: AssetConditionEvaluation, + ) -> Sequence[AssetConditionEvaluation]: if len(e.child_evaluations) == 0: return [e] leaf_evals = [] From 67eaf9a0b2ac6694bba7f068b8cae9ae93c6a06c Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Mon, 8 Jan 2024 14:31:57 -0500 Subject: [PATCH 17/56] Reenable legacy queries --- .../auto_materialize_asset_evaluations.py | 184 ++++++++++++------ .../schema/auto_materialize_policy.py | 17 +- ...test_auto_materialize_asset_evaluations.py | 100 ++++++++-- .../_core/definitions/asset_condition.py | 32 ++- .../asset_condition_evaluation_context.py | 4 +- .../auto_materialize_rule_evaluation.py | 40 ++-- 6 files changed, 264 insertions(+), 113 deletions(-) diff --git a/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_asset_evaluations.py b/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_asset_evaluations.py index 2d3984e9dcdab..03e68b16a8794 100644 --- a/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_asset_evaluations.py +++ b/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_asset_evaluations.py @@ -1,18 +1,16 @@ -from collections import defaultdict from typing import Optional, Sequence, Tuple -import dagster._check as check import graphene from dagster import PartitionsDefinition +from dagster._core.definitions.asset_condition import ( + AssetConditionEvaluation, + AssetSubsetWithMetadata, + RuleCondition, +) from dagster._core.definitions.auto_materialize_rule_evaluation import ( AutoMaterializeDecisionType, - AutoMaterializeRuleEvaluation, - AutoMaterializeRuleEvaluationData, - ParentUpdatedRuleEvaluationData, - TextRuleEvaluationData, - WaitingOnAssetsRuleEvaluationData, ) -from dagster._core.definitions.partition import SerializedPartitionsSubset +from dagster._core.definitions.metadata import DagsterAssetMetadataValue from dagster._core.scheduler.instigation import AutoMaterializeAssetEvaluationRecord from dagster_graphql.schema.errors import GrapheneError @@ -104,72 +102,131 @@ def __init__( def create_graphene_auto_materialize_rule_evaluation( - evaluation_data_tuple: Tuple[ - AutoMaterializeRuleEvaluationData, - Optional[SerializedPartitionsSubset], - ], - partitions_def: Optional[PartitionsDefinition], -): - rule_evaluation_data, serialized_partition_subset = evaluation_data_tuple - - if not serialized_partition_subset: + asset_subset_with_metadata: AssetSubsetWithMetadata, +) -> Optional[GrapheneAutoMaterializeRuleEvaluation]: + if not asset_subset_with_metadata.subset.is_partitioned: partition_keys_or_error = None - elif not partitions_def: - partition_keys_or_error = GraphenePartitionSubsetDeserializationError( - message="PartitionsDefinition not found, cannot display partition keys" - ) - elif not serialized_partition_subset.can_deserialize(partitions_def): - partition_keys_or_error = GraphenePartitionSubsetDeserializationError( - message=( - "Partition subset cannot be deserialized. The PartitionsDefinition may have" - " changed." - ) - ) else: - subset = serialized_partition_subset.deserialize(partitions_def) - partition_keys_or_error = GraphenePartitionKeys(partitionKeys=subset.get_partition_keys()) + partition_keys_or_error = GraphenePartitionKeys( + partitionKeys=asset_subset_with_metadata.subset.subset_value.get_partition_keys() + ) - if isinstance(rule_evaluation_data, TextRuleEvaluationData): - rule_evaluation_data = GrapheneTextRuleEvaluationData(text=rule_evaluation_data.text) - elif isinstance(rule_evaluation_data, ParentUpdatedRuleEvaluationData): + metadata = asset_subset_with_metadata.metadata + if "text" in metadata.keys() and isinstance(metadata["text"], str): + rule_evaluation_data = GrapheneTextRuleEvaluationData(text=metadata["text"]) + elif any(key.startswith("updated_parent") for key in metadata.keys()): + updatedAssetKeys = { + value.asset_key + for key, value in metadata.items() + if key.startswith("updated_parent") and isinstance(value, DagsterAssetMetadataValue) + } + willUpdateAssetKeys = { + value.asset_key + for key, value in metadata.items() + if key.startswith("will_update_parent") and isinstance(value, DagsterAssetMetadataValue) + } rule_evaluation_data = GrapheneParentMaterializedRuleEvaluationData( - updatedAssetKeys=rule_evaluation_data.updated_asset_keys, - willUpdateAssetKeys=rule_evaluation_data.will_update_asset_keys, + updatedAssetKeys=updatedAssetKeys, willUpdateAssetKeys=willUpdateAssetKeys ) - elif isinstance(rule_evaluation_data, WaitingOnAssetsRuleEvaluationData): + elif any(key.startswith("waiting_on_ancestor") for key in metadata.keys()): + waitingOnAssetKeys = { + value.asset_key + for key, value in metadata.items() + if key.startswith("waiting_on_ancestor") + and isinstance(value, DagsterAssetMetadataValue) + } rule_evaluation_data = GrapheneWaitingOnKeysRuleEvaluationData( - waitingOnAssetKeys=rule_evaluation_data.waiting_on_asset_keys + waitingOnAssetKeys=waitingOnAssetKeys ) - elif rule_evaluation_data is not None: - check.failed(f"Unexpected rule evaluation data type {type(rule_evaluation_data)}") + else: + rule_evaluation_data = None return GrapheneAutoMaterializeRuleEvaluation( partitionKeysOrError=partition_keys_or_error, evaluationData=rule_evaluation_data ) +def _create_rules_with_rule_evaluations_for_decision_type( + evaluation: AssetConditionEvaluation, decision_type: AutoMaterializeDecisionType +) -> Tuple[ + Sequence[GrapheneAutoMaterializeRule], Sequence[GrapheneAutoMaterializeRuleWithRuleEvaluations] +]: + rules = [] + rules_with_rule_evaluations = [] + leaf_evaluations = evaluation.child_evaluations + for le in leaf_evaluations: + snapshot = le.condition_snapshot + if snapshot.class_name != RuleCondition.__name__: + continue + rule = GrapheneAutoMaterializeRule(snapshot.description, decision_type) + rules.append(rule) + if le.subsets_with_metadata: + rules_with_rule_evaluations.append( + GrapheneAutoMaterializeRuleWithRuleEvaluations( + rule=rule, + ruleEvaluations=[ + create_graphene_auto_materialize_rule_evaluation(sswm) + for sswm in le.subsets_with_metadata + ], + ) + ) + elif le.true_subset.size > 0: + rules_with_rule_evaluations.append( + GrapheneAutoMaterializeRuleWithRuleEvaluations( + rule=rule, + ruleEvaluations=[ + GrapheneAutoMaterializeRuleEvaluation( + partitionKeysOrError=GraphenePartitionKeys( + partitionKeys=le.true_subset.subset_value.get_partition_keys() + ) + if le.true_subset.is_partitioned + else None, + evaluationData=None, + ) + ], + ) + ) + return rules, rules_with_rule_evaluations + + def create_graphene_auto_materialize_rules_with_rule_evaluations( - partition_subsets_by_condition: Sequence[ - Tuple[AutoMaterializeRuleEvaluation, Optional[SerializedPartitionsSubset]] - ], - partitions_def: Optional[PartitionsDefinition], -) -> Sequence[GrapheneAutoMaterializeRuleWithRuleEvaluations]: - rule_mapping = defaultdict(list) - for rule_evaluation, serialized_partition_subset in partition_subsets_by_condition: - rule_mapping[rule_evaluation.rule_snapshot].append( - (rule_evaluation.evaluation_data, serialized_partition_subset) + evaluation: AssetConditionEvaluation, +) -> Tuple[ + Sequence[GrapheneAutoMaterializeRule], Sequence[GrapheneAutoMaterializeRuleWithRuleEvaluations] +]: + rules, rules_with_rule_evaluations = [], [] + + if len(evaluation.child_evaluations) > 0: + materialize_evaluation = evaluation.child_evaluations[0] + rs, rwres = _create_rules_with_rule_evaluations_for_decision_type( + materialize_evaluation, AutoMaterializeDecisionType.MATERIALIZE ) + rules.extend(rs) + rules_with_rule_evaluations.extend(rwres) - return [ - GrapheneAutoMaterializeRuleWithRuleEvaluations( - rule=GrapheneAutoMaterializeRule(rule_snapshot), - ruleEvaluations=[ - create_graphene_auto_materialize_rule_evaluation(tup, partitions_def) - for tup in tups - ], + if ( + len(evaluation.child_evaluations) > 1 + and len(evaluation.child_evaluations[1].child_evaluations) == 1 + ): + skip_evaluation = evaluation.child_evaluations[1].child_evaluations[0] + rs, rwres = _create_rules_with_rule_evaluations_for_decision_type( + skip_evaluation, AutoMaterializeDecisionType.SKIP ) - for rule_snapshot, tups in rule_mapping.items() - ] + rules.extend(rs) + rules_with_rule_evaluations.extend(rwres) + + if ( + len(evaluation.child_evaluations) > 2 + and len(evaluation.child_evaluations[2].child_evaluations) == 1 + ): + discard_evaluation = evaluation.child_evaluations[2] + rs, rwres = _create_rules_with_rule_evaluations_for_decision_type( + discard_evaluation, AutoMaterializeDecisionType.DISCARD + ) + rules.extend(rs) + rules_with_rule_evaluations.extend(rwres) + + return rules, rules_with_rule_evaluations class GrapheneAutoMaterializeAssetEvaluationRecord(graphene.ObjectType): @@ -193,16 +250,21 @@ def __init__( partitions_def: Optional[PartitionsDefinition], ): evaluation_with_run_ids = record.get_evaluation_with_run_ids(partitions_def=partitions_def) + evaluation = evaluation_with_run_ids.evaluation + ( + rules, + rules_with_rule_evaluations, + ) = create_graphene_auto_materialize_rules_with_rule_evaluations(evaluation) super().__init__( id=record.id, evaluationId=record.evaluation_id, numRequested=evaluation_with_run_ids.evaluation.true_subset.size, - numSkipped=0, - numDiscarded=0, - rulesWithRuleEvaluations=[], + numSkipped=evaluation.legacy_num_skipped(), + numDiscarded=evaluation.legacy_num_discarded(), + rulesWithRuleEvaluations=rules_with_rule_evaluations, timestamp=record.timestamp, runIds=evaluation_with_run_ids.run_ids, - rules=[], + rules=sorted(rules, key=lambda rule: rule.className), assetKey=GrapheneAssetKey(path=record.asset_key.path), ) diff --git a/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_policy.py b/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_policy.py index 37677d7606702..56de3d2728d26 100644 --- a/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_policy.py +++ b/python_modules/dagster-graphql/dagster_graphql/schema/auto_materialize_policy.py @@ -6,7 +6,6 @@ ) from dagster._core.definitions.auto_materialize_rule import ( AutoMaterializeDecisionType, - AutoMaterializeRuleSnapshot, DiscardOnMaxMaterializationsExceededRule, ) @@ -23,11 +22,12 @@ class GrapheneAutoMaterializeRule(graphene.ObjectType): class Meta: name = "AutoMaterializeRule" - def __init__(self, auto_materialize_rule_snapshot: AutoMaterializeRuleSnapshot): + def __init__(self, description: str, decision_type: AutoMaterializeDecisionType): super().__init__( - decisionType=auto_materialize_rule_snapshot.decision_type, - description=auto_materialize_rule_snapshot.description, - className=auto_materialize_rule_snapshot.class_name, + decisionType=decision_type, + description=description, + # the class name just needs to be distinct for each rule, so we use the description + className=description, ) @@ -46,15 +46,16 @@ def __init__(self, auto_materialize_policy: AutoMaterializePolicy): # for now, we don't represent the max materializations per minute rule as a proper # rule in the serialized AutoMaterializePolicy object, but do so in the GraphQL layer rules = [ - GrapheneAutoMaterializeRule(rule.to_snapshot()) + GrapheneAutoMaterializeRule(rule.description, rule.decision_type) for rule in auto_materialize_policy.rules ] if auto_materialize_policy.max_materializations_per_minute: rules.append( GrapheneAutoMaterializeRule( - DiscardOnMaxMaterializationsExceededRule( + description=DiscardOnMaxMaterializationsExceededRule( limit=auto_materialize_policy.max_materializations_per_minute - ).to_snapshot() + ).description, + decision_type=AutoMaterializeDecisionType.DISCARD, ) ) super().__init__( diff --git a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py index 54c4a831786c3..b39307ad339dd 100644 --- a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py +++ b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_auto_materialize_asset_evaluations.py @@ -6,6 +6,7 @@ from dagster._core.definitions.asset_daemon_cursor import ( AssetDaemonCursor, ) +from dagster._core.definitions.auto_materialize_policy import AutoMaterializePolicy from dagster._core.definitions.auto_materialize_rule_evaluation import ( deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids, ) @@ -393,7 +394,7 @@ def test_automation_policy_sensor(self, graphql_context: WorkspaceRequestContext def test_get_historic_rules(self, graphql_context: WorkspaceRequestContext): evaluation1 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( - '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_one"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": null, "run_ids": {"__set__": []}}', + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_one"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be present"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnBackfillInProgressRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "targeted by an in-progress backfill"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnRequiredForFreshnessRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "required to meet this or downstream asset\'s freshness policy"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}], "run_ids": {"__set__": []}}', None, ) evaluation2 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( @@ -411,8 +412,23 @@ def test_get_historic_rules(self, graphql_context: WorkspaceRequestContext): QUERY, variables={"assetKey": {"path": ["asset_one"]}, "limit": 10, "cursor": None}, ) + expected_rules = sorted( + [ + { + "className": rs.description, + "decisionType": rs.decision_type.value, + "description": rs.description, + } + for rs in AutoMaterializePolicy.eager().rule_snapshots + ], + key=lambda x: x["className"], + ) + assert len(results.data["autoMaterializeAssetEvaluationsOrError"]["records"]) == 1 - assert results.data["autoMaterializeAssetEvaluationsOrError"]["records"][0]["rules"] == [] + assert ( + results.data["autoMaterializeAssetEvaluationsOrError"]["records"][0]["rules"] + == expected_rules + ) assert results.data["autoMaterializeAssetEvaluationsOrError"]["records"][0]["assetKey"] == { "path": ["asset_one"] } @@ -429,7 +445,7 @@ def test_get_historic_rules(self, graphql_context: WorkspaceRequestContext): "rules" ] ) - == 0 + == 1 ) results_by_evaluation_id = execute_dagster_graphql( @@ -473,8 +489,8 @@ def test_get_required_but_nonexistent_parent_evaluation( self, graphql_context: WorkspaceRequestContext ): evaluation = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( - '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["upstream_static_partitioned_asset"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["blah"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}}, {"__class__": "SerializedPartitionsSubset", "serialized_partitions_def_class_name": "StaticPartitionsDefinition", "serialized_partitions_def_unique_id": "7c2047f8b02e90a69136c1a657bd99ad80b433a2", "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"a\\"]}"}]], "rule_snapshots": null, "run_ids": {"__set__": []}}', - StaticPartitionsDefinition(["a", "b"]), + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["upstream_static_partitioned_asset"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["blah"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}}, {"__class__": "SerializedPartitionsSubset", "serialized_partitions_def_class_name": "StaticPartitionsDefinition", "serialized_partitions_def_unique_id": "7c2047f8b02e90a69136c1a657bd99ad80b433a2", "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"a\\"]}"}]], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be present"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnBackfillInProgressRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "targeted by an in-progress backfill"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnRequiredForFreshnessRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "required to meet this or downstream asset\'s freshness policy"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}], "run_ids": {"__set__": []}}', + StaticPartitionsDefinition(["a", "b", "c", "d", "e", "f"]), ) check.not_none( graphql_context.instance.schedule_storage @@ -493,6 +509,18 @@ def test_get_required_but_nonexistent_parent_evaluation( }, ) + expected_rules = sorted( + [ + { + "className": rs.description, + "decisionType": rs.decision_type.value, + "description": rs.description, + } + for rs in AutoMaterializePolicy.eager().rule_snapshots + ], + key=lambda x: x["className"], + ) + assert results.data == { "assetNodeOrError": { "currentAutoMaterializeEvaluationId": None, @@ -501,31 +529,45 @@ def test_get_required_but_nonexistent_parent_evaluation( "records": [ { "numRequested": 0, - "numSkipped": 0, + "numSkipped": 1, "numDiscarded": 0, - "rulesWithRuleEvaluations": [], - "rules": [], + "rulesWithRuleEvaluations": [ + { + "rule": {"decisionType": "SKIP"}, + "ruleEvaluations": [ + { + "evaluationData": { + "waitingOnAssetKeys": [{"path": ["blah"]}], + }, + "partitionKeysOrError": { + "partitionKeys": ["a"], + }, + } + ], + }, + ], + "rules": expected_rules, "assetKey": {"path": ["upstream_static_partitioned_asset"]}, } ], }, } - def _test_get_evaluations(self, graphql_context: WorkspaceRequestContext): + def test_get_evaluations(self, graphql_context: WorkspaceRequestContext): evaluation1 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( - '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_one"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": null, "run_ids": {"__set__": []}}', + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_one"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be present"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnBackfillInProgressRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "targeted by an in-progress backfill"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnRequiredForFreshnessRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "required to meet this or downstream asset\'s freshness policy"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}], "run_ids": {"__set__": []}}', None, ) evaluation2 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( - '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_two"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_two"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}}, null]], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be present"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnBackfillInProgressRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "targeted by an in-progress backfill"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnRequiredForFreshnessRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "required to meet this or downstream asset\'s freshness policy"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}], "run_ids": {"__set__": []}}', None, ) evaluation3 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( - '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_three"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_two"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_three"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_two"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}}, null]], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be present"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnBackfillInProgressRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "targeted by an in-progress backfill"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnRequiredForFreshnessRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "required to meet this or downstream asset\'s freshness policy"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}], "run_ids": {"__set__": []}}', None, ) evaluation4 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( - '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_four"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "ParentUpdatedRuleEvaluationData", "updated_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_two"]}]}, "will_update_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_three"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_four"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "ParentUpdatedRuleEvaluationData", "updated_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_two"]}]}, "will_update_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_three"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}}, null]], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be present"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnBackfillInProgressRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "targeted by an in-progress backfill"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnRequiredForFreshnessRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "required to meet this or downstream asset\'s freshness policy"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}], "run_ids": {"__set__": []}}', None, ) results = execute_dagster_graphql( @@ -535,6 +577,7 @@ def _test_get_evaluations(self, graphql_context: WorkspaceRequestContext): ) assert results.data == { "autoMaterializeAssetEvaluationsOrError": {"records": []}, + "assetNodeOrError": {"currentAutoMaterializeEvaluationId": None}, } check.not_none( @@ -548,6 +591,17 @@ def _test_get_evaluations(self, graphql_context: WorkspaceRequestContext): QUERY, variables={"assetKey": {"path": ["asset_one"]}, "limit": 10, "cursor": None}, ) + expected_rules = sorted( + [ + { + "className": rs.description, + "decisionType": rs.decision_type.value, + "description": rs.description, + } + for rs in AutoMaterializePolicy.eager().rule_snapshots + ], + key=lambda x: x["className"], + ) assert results.data == { "assetNodeOrError": { "currentAutoMaterializeEvaluationId": None, @@ -555,9 +609,11 @@ def _test_get_evaluations(self, graphql_context: WorkspaceRequestContext): "autoMaterializeAssetEvaluationsOrError": { "records": [ { + "assetKey": {"path": ["asset_one"]}, "numRequested": 0, "numSkipped": 0, "numDiscarded": 0, + "rules": expected_rules, "rulesWithRuleEvaluations": [], } ], @@ -576,9 +632,11 @@ def _test_get_evaluations(self, graphql_context: WorkspaceRequestContext): "autoMaterializeAssetEvaluationsOrError": { "records": [ { + "assetKey": {"path": ["asset_two"]}, "numRequested": 1, "numSkipped": 0, "numDiscarded": 0, + "rules": expected_rules, "rulesWithRuleEvaluations": [ { "rule": {"decisionType": "MATERIALIZE"}, @@ -601,15 +659,15 @@ def _test_get_evaluations(self, graphql_context: WorkspaceRequestContext): variables={"assetKey": {"path": ["asset_three"]}, "limit": 10, "cursor": None}, ) assert results.data == { - "assetNodeOrError": { - "currentAutoMaterializeEvaluationId": None, - }, + "assetNodeOrError": {}, "autoMaterializeAssetEvaluationsOrError": { "records": [ { + "assetKey": {"path": ["asset_three"]}, "numRequested": 0, "numSkipped": 1, "numDiscarded": 0, + "rules": expected_rules, "rulesWithRuleEvaluations": [ { "rule": {"decisionType": "SKIP"}, @@ -634,15 +692,15 @@ def _test_get_evaluations(self, graphql_context: WorkspaceRequestContext): variables={"assetKey": {"path": ["asset_four"]}, "limit": 10, "cursor": None}, ) assert results.data == { - "assetNodeOrError": { - "currentAutoMaterializeEvaluationId": None, - }, + "assetNodeOrError": {}, "autoMaterializeAssetEvaluationsOrError": { "records": [ { + "assetKey": {"path": ["asset_four"]}, "numRequested": 1, "numSkipped": 0, "numDiscarded": 0, + "rules": expected_rules, "rulesWithRuleEvaluations": [ { "rule": {"decisionType": "MATERIALIZE"}, @@ -662,9 +720,9 @@ def _test_get_evaluations(self, graphql_context: WorkspaceRequestContext): }, } - def _test_get_evaluations_with_partitions(self, graphql_context: WorkspaceRequestContext): + def test_get_evaluations_with_partitions(self, graphql_context: WorkspaceRequestContext): evaluation = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( - '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["upstream_static_partitioned_asset"]}, "num_discarded": 0, "num_requested": 2, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}}, {"__class__": "SerializedPartitionsSubset", "serialized_partitions_def_class_name": "StaticPartitionsDefinition", "serialized_partitions_def_unique_id": "7c2047f8b02e90a69136c1a657bd99ad80b433a2", "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"a\\", \\"b\\"]}"}]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["upstream_static_partitioned_asset"]}, "num_discarded": 0, "num_requested": 2, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}}, {"__class__": "SerializedPartitionsSubset", "serialized_partitions_def_class_name": "StaticPartitionsDefinition", "serialized_partitions_def_unique_id": "7c2047f8b02e90a69136c1a657bd99ad80b433a2", "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"a\\", \\"b\\"]}"}]], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be present"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnBackfillInProgressRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "targeted by an in-progress backfill"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnRequiredForFreshnessRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "required to meet this or downstream asset\'s freshness policy"}, {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}], "run_ids": {"__set__": []}}', StaticPartitionsDefinition(["a", "b"]), ) results = execute_dagster_graphql( diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py index 82726fdb97b34..91a57e0efe5d5 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -182,21 +182,19 @@ def equivalent_to_stored_evaluation(self, other: Optional["AssetConditionEvaluat ) ) - def discarded_subset(self, condition: "AssetCondition") -> Optional[AssetSubset]: + def discarded_subset(self) -> Optional[AssetSubset]: """Returns the AssetSubset representing asset partitions that were discarded during this evaluation. Note that 'discarding' is a deprecated concept that is only used for backwards compatibility. """ - not_discard_condition = condition.not_discard_condition - if not not_discard_condition or len(self.child_evaluations) != 3: + if len(self.child_evaluations) != 3: return None - - not_discard_evaluation = self.child_evaluations[2] + not_discard_evaluation = self.child_evaluations[-1] discard_evaluation = not_discard_evaluation.child_evaluations[0] return discard_evaluation.true_subset - def get_requested_or_discarded_subset(self, condition: "AssetCondition") -> AssetSubset: - discarded_subset = self.discarded_subset(condition) + def get_requested_or_discarded_subset(self) -> AssetSubset: + discarded_subset = self.discarded_subset() if discarded_subset is None: return self.true_subset else: @@ -216,6 +214,20 @@ def for_child(self, child_condition: "AssetCondition") -> Optional["AssetConditi def with_run_ids(self, run_ids: AbstractSet[str]) -> "AssetConditionEvaluationWithRunIds": return AssetConditionEvaluationWithRunIds(evaluation=self, run_ids=frozenset(run_ids)) + def legacy_num_skipped(self) -> int: + if len(self.child_evaluations) < 2: + return 0 + + not_skip_evaluation = self.child_evaluations[-1] + skip_evaluation = not_skip_evaluation.child_evaluations[0] + return skip_evaluation.true_subset.size - self.legacy_num_discarded() + + def legacy_num_discarded(self) -> int: + discarded_subset = self.discarded_subset() + if discarded_subset is None: + return 0 + return discarded_subset.size + @whitelist_for_serdes class AssetConditionEvaluationState(NamedTuple): @@ -351,6 +363,12 @@ def is_legacy(self) -> bool: def children(self) -> Sequence["AssetCondition"]: return [] + @property + def not_skip_condition(self) -> Optional["AssetCondition"]: + if not self.is_legacy: + return None + return self.children[1] + @property def not_discard_condition(self) -> Optional["AssetCondition"]: if not self.is_legacy or not len(self.children) == 3: diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py index a5a1d78e5f928..b5c01ff77bbbd 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition_evaluation_context.py @@ -208,9 +208,7 @@ def previous_tick_requested_subset(self) -> AssetSubset: ): return self.empty_subset() - return self.previous_evaluation_state.evaluation.get_requested_or_discarded_subset( - self.condition - ) + return self.previous_evaluation_state.evaluation.get_requested_or_discarded_subset() @property def materialized_requested_or_discarded_since_previous_tick_subset(self) -> AssetSubset: diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index 9c18ef3793a74..8abcb8ddc7918 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -274,7 +274,7 @@ def _get_child_rule_evaluation( true_subset=true_subset, candidate_subset=HistoricalAllPartitionsSubsetSentinel() if is_partitioned - else AssetSubset.empty(asset_key, None), + else AssetSubset.all(asset_key, None), start_timestamp=None, end_timestamp=None, subsets_with_metadata=subsets_with_metadata, @@ -356,15 +356,21 @@ def _get_child_decision_type_evaluation( evaluation.condition_snapshot.unique_id, ] unique_id = hashlib.md5("".join(unique_id_parts).encode()).hexdigest() + + if is_partitioned: + # In reality, we'd like to invert the inner true_subset here, but this is an + # expensive operation, and error-prone as the set of all partitions may have changed + # since the evaluation was stored. Instead, we just use an empty subset. + true_subset = AssetSubset(asset_key, evaluation.true_subset.subset_value.empty_subset()) + else: + true_subset = evaluation.true_subset._replace( + value=not evaluation.true_subset.bool_value + ) return AssetConditionEvaluation( condition_snapshot=AssetConditionSnapshot( class_name=NotAssetCondition.__name__, description="Not", unique_id=unique_id ), - # for partitioned assets, we don't bother calculating the true subset, as we can't - # properly deserialize the inner results - true_subset=evaluation.true_subset - if evaluation.true_subset.is_partitioned - else evaluation.true_subset._replace(value=not evaluation.true_subset.bool_value), + true_subset=true_subset, candidate_subset=HistoricalAllPartitionsSubsetSentinel() if is_partitioned else AssetSubset.all(asset_key, None), @@ -399,12 +405,14 @@ def unpack( is_partitioned = any(tup[1] is not None for tup in partition_subsets_by_condition) # get the sub-evaluations for each decision type - materialize_evaluation = self._get_child_decision_type_evaluation( - asset_key, - partition_subsets_by_condition, - rule_snapshots, - is_partitioned, - AutoMaterializeDecisionType.MATERIALIZE, + materialize_evaluation = check.not_none( + self._get_child_decision_type_evaluation( + asset_key, + partition_subsets_by_condition, + rule_snapshots, + is_partitioned, + AutoMaterializeDecisionType.MATERIALIZE, + ) ) not_skip_evaluation = self._get_child_decision_type_evaluation( asset_key, @@ -436,9 +444,15 @@ def unpack( class_name=AndAssetCondition.__name__, description="All of", unique_id=unique_id ) + true_subset = materialize_evaluation.true_subset + if not_skip_evaluation: + true_subset -= not_skip_evaluation.child_evaluations[0].true_subset + if not_discard_evaluation: + true_subset -= not_discard_evaluation.child_evaluations[0].true_subset + return AssetConditionEvaluation( condition_snapshot=condition_snapshot, - true_subset=reduce(operator.and_, (e.true_subset for e in child_evaluations)), + true_subset=true_subset, candidate_subset=HistoricalAllPartitionsSubsetSentinel() if is_partitioned else AssetSubset.all(asset_key, None), From 4098438acf2cc30312e9b9e8bafd87c7f34b0e78 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Wed, 13 Dec 2023 10:52:12 -0800 Subject: [PATCH 18/56] PoC: AssetConditionEvaluation GQL --- .../src/graphql/possibleTypes.generated.json | 2 +- .../ui-core/src/graphql/schema.graphql | 83 ++ .../packages/ui-core/src/graphql/types.ts | 292 ++++++ .../fetch_asset_condition_evaluations.py | 132 +++ .../schema/asset_condition_evaluations.py | 308 ++++++ .../dagster_graphql/schema/asset_graph.py | 4 +- .../dagster_graphql/schema/roots/query.py | 63 ++ .../test_asset_condition_evaluations.py | 895 ++++++++++++++++++ .../auto_materialize_rule_evaluation.py | 1 + 9 files changed, 1776 insertions(+), 4 deletions(-) create mode 100644 python_modules/dagster-graphql/dagster_graphql/implementation/fetch_asset_condition_evaluations.py create mode 100644 python_modules/dagster-graphql/dagster_graphql/schema/asset_condition_evaluations.py create mode 100644 python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_asset_condition_evaluations.py diff --git a/js_modules/dagster-ui/packages/ui-core/src/graphql/possibleTypes.generated.json b/js_modules/dagster-ui/packages/ui-core/src/graphql/possibleTypes.generated.json index 173b10b2c0b9f..8a37a392790a0 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/graphql/possibleTypes.generated.json +++ b/js_modules/dagster-ui/packages/ui-core/src/graphql/possibleTypes.generated.json @@ -1 +1 @@ -{"DisplayableEvent":["EngineEvent","ExecutionStepOutputEvent","ExpectationResult","FailureMetadata","HandledOutputEvent","LoadedInputEvent","ObjectStoreOperationResult","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","TypeCheck"],"MarkerEvent":["EngineEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent"],"ErrorEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepUpForRetryEvent","HookErroredEvent","RunFailureEvent","ResourceInitFailureEvent"],"MessageEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepRestartEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","HandledOutputEvent","HookCompletedEvent","HookErroredEvent","HookSkippedEvent","LoadedInputEvent","LogMessageEvent","ObjectStoreOperationEvent","RunCanceledEvent","RunCancelingEvent","RunDequeuedEvent","RunEnqueuedEvent","RunFailureEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","RunStartEvent","RunStartingEvent","RunSuccessEvent","StepExpectationResultEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","AssetMaterializationPlannedEvent","LogsCapturedEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"RunEvent":["RunCanceledEvent","RunCancelingEvent","RunDequeuedEvent","RunEnqueuedEvent","RunFailureEvent","RunStartEvent","RunStartingEvent","RunSuccessEvent","AssetMaterializationPlannedEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent"],"PipelineRunStepStats":["RunStepStats"],"StepEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepRestartEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","HandledOutputEvent","HookCompletedEvent","HookErroredEvent","HookSkippedEvent","LoadedInputEvent","ObjectStoreOperationEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepExpectationResultEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"AssetPartitionStatuses":["DefaultPartitionStatuses","MultiPartitionStatuses","TimePartitionStatuses"],"PartitionStatus1D":["TimePartitionStatuses","DefaultPartitionStatuses"],"AssetChecksOrError":["AssetChecks","AssetCheckNeedsMigrationError","AssetCheckNeedsUserCodeUpgrade","AssetCheckNeedsAgentUpgradeError"],"Instigator":["Schedule","Sensor"],"EvaluationStackEntry":["EvaluationStackListItemEntry","EvaluationStackPathEntry","EvaluationStackMapKeyEntry","EvaluationStackMapValueEntry"],"IPipelineSnapshot":["Pipeline","PipelineSnapshot","Job"],"PipelineConfigValidationError":["FieldNotDefinedConfigError","FieldsNotDefinedConfigError","MissingFieldConfigError","MissingFieldsConfigError","RuntimeMismatchConfigError","SelectorTypeConfigError"],"PipelineConfigValidationInvalid":["RunConfigValidationInvalid"],"PipelineConfigValidationResult":["InvalidSubsetError","PipelineConfigValidationValid","RunConfigValidationInvalid","PipelineNotFoundError","PythonError"],"PipelineReference":["PipelineSnapshot","UnknownPipeline"],"PipelineRun":["Run"],"DagsterRunEvent":["ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","ExecutionStepRestartEvent","LogMessageEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","RunFailureEvent","RunStartEvent","RunEnqueuedEvent","RunDequeuedEvent","RunStartingEvent","RunCancelingEvent","RunCanceledEvent","RunSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","HandledOutputEvent","LoadedInputEvent","LogsCapturedEvent","ObjectStoreOperationEvent","StepExpectationResultEvent","MaterializationEvent","ObservationEvent","EngineEvent","HookCompletedEvent","HookSkippedEvent","HookErroredEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent","AssetMaterializationPlannedEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"PipelineRunLogsSubscriptionPayload":["PipelineRunLogsSubscriptionSuccess","PipelineRunLogsSubscriptionFailure"],"RunOrError":["Run","RunNotFoundError","PythonError"],"PipelineRunStatsSnapshot":["RunStatsSnapshot"],"RunStatsSnapshotOrError":["RunStatsSnapshot","PythonError"],"PipelineSnapshotOrError":["PipelineNotFoundError","PipelineSnapshot","PipelineSnapshotNotFoundError","PythonError"],"AssetOrError":["Asset","AssetNotFoundError"],"AssetsOrError":["AssetConnection","PythonError"],"DeletePipelineRunResult":["DeletePipelineRunSuccess","UnauthorizedError","PythonError","RunNotFoundError"],"ExecutionPlanOrError":["ExecutionPlan","RunConfigValidationInvalid","PipelineNotFoundError","InvalidSubsetError","PythonError"],"PipelineOrError":["Pipeline","PipelineNotFoundError","InvalidSubsetError","PythonError"],"ReloadRepositoryLocationMutationResult":["WorkspaceLocationEntry","ReloadNotSupported","RepositoryLocationNotFound","UnauthorizedError","PythonError"],"RepositoryLocationOrLoadError":["RepositoryLocation","PythonError"],"ReloadWorkspaceMutationResult":["Workspace","UnauthorizedError","PythonError"],"ShutdownRepositoryLocationMutationResult":["ShutdownRepositoryLocationSuccess","RepositoryLocationNotFound","UnauthorizedError","PythonError"],"TerminatePipelineExecutionFailure":["TerminateRunFailure"],"TerminatePipelineExecutionSuccess":["TerminateRunSuccess"],"TerminateRunResult":["TerminateRunSuccess","TerminateRunFailure","RunNotFoundError","UnauthorizedError","PythonError"],"ScheduleMutationResult":["PythonError","UnauthorizedError","ScheduleStateResult"],"ScheduleOrError":["Schedule","ScheduleNotFoundError","PythonError"],"SchedulerOrError":["Scheduler","SchedulerNotDefinedError","PythonError"],"SchedulesOrError":["Schedules","RepositoryNotFoundError","PythonError"],"ScheduleTickSpecificData":["ScheduleTickSuccessData","ScheduleTickFailureData"],"LaunchBackfillResult":["LaunchBackfillSuccess","PartitionSetNotFoundError","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"ConfigTypeOrError":["EnumConfigType","CompositeConfigType","RegularConfigType","PipelineNotFoundError","ConfigTypeNotFoundError","PythonError"],"ConfigType":["ArrayConfigType","CompositeConfigType","EnumConfigType","NullableConfigType","RegularConfigType","ScalarUnionConfigType","MapConfigType"],"WrappingConfigType":["ArrayConfigType","NullableConfigType"],"DagsterType":["ListDagsterType","NullableDagsterType","RegularDagsterType"],"DagsterTypeOrError":["RegularDagsterType","PipelineNotFoundError","DagsterTypeNotFoundError","PythonError"],"WrappingDagsterType":["ListDagsterType","NullableDagsterType"],"Error":["AssetCheckNeedsMigrationError","AssetCheckNeedsUserCodeUpgrade","AssetCheckNeedsAgentUpgradeError","AssetNotFoundError","ConflictingExecutionParamsError","ConfigTypeNotFoundError","DagsterTypeNotFoundError","InvalidPipelineRunsFilterError","InvalidSubsetError","ModeNotFoundError","NoModeProvidedError","PartitionSetNotFoundError","PipelineNotFoundError","RunConflict","PipelineSnapshotNotFoundError","PresetNotFoundError","PythonError","ErrorChainLink","UnauthorizedError","ReloadNotSupported","RepositoryLocationNotFound","RepositoryNotFoundError","ResourceNotFoundError","RunGroupNotFoundError","RunNotFoundError","ScheduleNotFoundError","SchedulerNotDefinedError","SensorNotFoundError","DuplicateDynamicPartitionError","InstigationStateNotFoundError","SolidStepStatusUnavailableError","GraphNotFoundError","BackfillNotFoundError","PartitionSubsetDeserializationError","AutoMaterializeAssetEvaluationNeedsMigrationError"],"PipelineRunConflict":["RunConflict"],"PipelineRunNotFoundError":["RunNotFoundError"],"RepositoriesOrError":["RepositoryConnection","RepositoryNotFoundError","PythonError"],"RepositoryOrError":["PythonError","Repository","RepositoryNotFoundError"],"InstigationTypeSpecificData":["SensorData","ScheduleData"],"InstigationStateOrError":["InstigationState","InstigationStateNotFoundError","PythonError"],"InstigationStatesOrError":["InstigationStates","PythonError"],"MetadataEntry":["TableSchemaMetadataEntry","TableMetadataEntry","FloatMetadataEntry","IntMetadataEntry","JsonMetadataEntry","BoolMetadataEntry","MarkdownMetadataEntry","PathMetadataEntry","NotebookMetadataEntry","PythonArtifactMetadataEntry","TextMetadataEntry","UrlMetadataEntry","PipelineRunMetadataEntry","AssetMetadataEntry","JobMetadataEntry","NullMetadataEntry"],"PartitionRunConfigOrError":["PartitionRunConfig","PythonError"],"AssetBackfillStatus":["AssetPartitionsStatusCounts","UnpartitionedAssetStatus"],"PartitionSetOrError":["PartitionSet","PartitionSetNotFoundError","PythonError"],"PartitionSetsOrError":["PartitionSets","PipelineNotFoundError","PythonError"],"PartitionsOrError":["Partitions","PythonError"],"PartitionStatusesOrError":["PartitionStatuses","PythonError"],"PartitionTagsOrError":["PartitionTags","PythonError"],"RunConfigSchemaOrError":["RunConfigSchema","PipelineNotFoundError","InvalidSubsetError","ModeNotFoundError","PythonError"],"LaunchRunResult":["LaunchRunSuccess","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"LaunchRunReexecutionResult":["LaunchRunSuccess","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"LaunchPipelineRunSuccess":["LaunchRunSuccess"],"RunsOrError":["Runs","InvalidPipelineRunsFilterError","PythonError"],"PipelineRuns":["Runs"],"RunGroupOrError":["RunGroup","RunGroupNotFoundError","PythonError"],"SensorOrError":["Sensor","SensorNotFoundError","UnauthorizedError","PythonError"],"SensorsOrError":["Sensors","RepositoryNotFoundError","PythonError"],"StopSensorMutationResultOrError":["StopSensorMutationResult","UnauthorizedError","PythonError"],"ISolidDefinition":["CompositeSolidDefinition","SolidDefinition"],"SolidContainer":["Pipeline","PipelineSnapshot","Job","CompositeSolidDefinition","Graph"],"SolidStepStatsOrError":["SolidStepStatsConnection","SolidStepStatusUnavailableError"],"WorkspaceOrError":["Workspace","PythonError"],"WorkspaceLocationStatusEntriesOrError":["WorkspaceLocationStatusEntries","PythonError"],"GraphOrError":["Graph","GraphNotFoundError","PythonError"],"ResourceDetailsOrError":["ResourceDetails","ResourceNotFoundError","PythonError"],"ResourcesOrError":["ResourceDetailsList","RepositoryNotFoundError","PythonError"],"EnvVarWithConsumersOrError":["EnvVarWithConsumersList","PythonError"],"RunTagKeysOrError":["PythonError","RunTagKeys"],"RunTagsOrError":["PythonError","RunTags"],"RunIdsOrError":["RunIds","InvalidPipelineRunsFilterError","PythonError"],"AssetNodeOrError":["AssetNode","AssetNotFoundError"],"PartitionBackfillOrError":["PartitionBackfill","BackfillNotFoundError","PythonError"],"PartitionBackfillsOrError":["PartitionBackfills","PythonError"],"EventConnectionOrError":["EventConnection","RunNotFoundError","PythonError"],"AutoMaterializeAssetEvaluationRecordsOrError":["AutoMaterializeAssetEvaluationRecords","AutoMaterializeAssetEvaluationNeedsMigrationError"],"PartitionKeysOrError":["PartitionKeys","PartitionSubsetDeserializationError"],"AutoMaterializeRuleEvaluationData":["TextRuleEvaluationData","ParentMaterializedRuleEvaluationData","WaitingOnKeysRuleEvaluationData"],"SensorDryRunResult":["PythonError","SensorNotFoundError","DryRunInstigationTick"],"ScheduleDryRunResult":["DryRunInstigationTick","PythonError","ScheduleNotFoundError"],"TerminateRunsResultOrError":["TerminateRunsResult","PythonError"],"AssetWipeMutationResult":["AssetNotFoundError","UnauthorizedError","PythonError","AssetWipeSuccess"],"ReportRunlessAssetEventsResult":["UnauthorizedError","PythonError","ReportRunlessAssetEventsSuccess"],"ResumeBackfillResult":["ResumeBackfillSuccess","UnauthorizedError","PythonError"],"CancelBackfillResult":["CancelBackfillSuccess","UnauthorizedError","PythonError"],"LogTelemetryMutationResult":["LogTelemetrySuccess","PythonError"],"AddDynamicPartitionResult":["AddDynamicPartitionSuccess","UnauthorizedError","PythonError","DuplicateDynamicPartitionError"]} \ No newline at end of file +{"DisplayableEvent":["EngineEvent","ExecutionStepOutputEvent","ExpectationResult","FailureMetadata","HandledOutputEvent","LoadedInputEvent","ObjectStoreOperationResult","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","TypeCheck"],"MarkerEvent":["EngineEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent"],"ErrorEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepUpForRetryEvent","HookErroredEvent","RunFailureEvent","ResourceInitFailureEvent"],"MessageEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepRestartEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","HandledOutputEvent","HookCompletedEvent","HookErroredEvent","HookSkippedEvent","LoadedInputEvent","LogMessageEvent","ObjectStoreOperationEvent","RunCanceledEvent","RunCancelingEvent","RunDequeuedEvent","RunEnqueuedEvent","RunFailureEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","RunStartEvent","RunStartingEvent","RunSuccessEvent","StepExpectationResultEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","AssetMaterializationPlannedEvent","LogsCapturedEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"RunEvent":["RunCanceledEvent","RunCancelingEvent","RunDequeuedEvent","RunEnqueuedEvent","RunFailureEvent","RunStartEvent","RunStartingEvent","RunSuccessEvent","AssetMaterializationPlannedEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent"],"PipelineRunStepStats":["RunStepStats"],"StepEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepRestartEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","HandledOutputEvent","HookCompletedEvent","HookErroredEvent","HookSkippedEvent","LoadedInputEvent","ObjectStoreOperationEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepExpectationResultEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"AssetPartitionStatuses":["DefaultPartitionStatuses","MultiPartitionStatuses","TimePartitionStatuses"],"PartitionStatus1D":["TimePartitionStatuses","DefaultPartitionStatuses"],"AssetChecksOrError":["AssetChecks","AssetCheckNeedsMigrationError","AssetCheckNeedsUserCodeUpgrade","AssetCheckNeedsAgentUpgradeError"],"Instigator":["Schedule","Sensor"],"EvaluationStackEntry":["EvaluationStackListItemEntry","EvaluationStackPathEntry","EvaluationStackMapKeyEntry","EvaluationStackMapValueEntry"],"IPipelineSnapshot":["Pipeline","PipelineSnapshot","Job"],"PipelineConfigValidationError":["FieldNotDefinedConfigError","FieldsNotDefinedConfigError","MissingFieldConfigError","MissingFieldsConfigError","RuntimeMismatchConfigError","SelectorTypeConfigError"],"PipelineConfigValidationInvalid":["RunConfigValidationInvalid"],"PipelineConfigValidationResult":["InvalidSubsetError","PipelineConfigValidationValid","RunConfigValidationInvalid","PipelineNotFoundError","PythonError"],"PipelineReference":["PipelineSnapshot","UnknownPipeline"],"PipelineRun":["Run"],"DagsterRunEvent":["ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","ExecutionStepRestartEvent","LogMessageEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","RunFailureEvent","RunStartEvent","RunEnqueuedEvent","RunDequeuedEvent","RunStartingEvent","RunCancelingEvent","RunCanceledEvent","RunSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","HandledOutputEvent","LoadedInputEvent","LogsCapturedEvent","ObjectStoreOperationEvent","StepExpectationResultEvent","MaterializationEvent","ObservationEvent","EngineEvent","HookCompletedEvent","HookSkippedEvent","HookErroredEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent","AssetMaterializationPlannedEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"PipelineRunLogsSubscriptionPayload":["PipelineRunLogsSubscriptionSuccess","PipelineRunLogsSubscriptionFailure"],"RunOrError":["Run","RunNotFoundError","PythonError"],"PipelineRunStatsSnapshot":["RunStatsSnapshot"],"RunStatsSnapshotOrError":["RunStatsSnapshot","PythonError"],"PipelineSnapshotOrError":["PipelineNotFoundError","PipelineSnapshot","PipelineSnapshotNotFoundError","PythonError"],"AssetOrError":["Asset","AssetNotFoundError"],"AssetsOrError":["AssetConnection","PythonError"],"DeletePipelineRunResult":["DeletePipelineRunSuccess","UnauthorizedError","PythonError","RunNotFoundError"],"ExecutionPlanOrError":["ExecutionPlan","RunConfigValidationInvalid","PipelineNotFoundError","InvalidSubsetError","PythonError"],"PipelineOrError":["Pipeline","PipelineNotFoundError","InvalidSubsetError","PythonError"],"ReloadRepositoryLocationMutationResult":["WorkspaceLocationEntry","ReloadNotSupported","RepositoryLocationNotFound","UnauthorizedError","PythonError"],"RepositoryLocationOrLoadError":["RepositoryLocation","PythonError"],"ReloadWorkspaceMutationResult":["Workspace","UnauthorizedError","PythonError"],"ShutdownRepositoryLocationMutationResult":["ShutdownRepositoryLocationSuccess","RepositoryLocationNotFound","UnauthorizedError","PythonError"],"TerminatePipelineExecutionFailure":["TerminateRunFailure"],"TerminatePipelineExecutionSuccess":["TerminateRunSuccess"],"TerminateRunResult":["TerminateRunSuccess","TerminateRunFailure","RunNotFoundError","UnauthorizedError","PythonError"],"ScheduleMutationResult":["PythonError","UnauthorizedError","ScheduleStateResult"],"ScheduleOrError":["Schedule","ScheduleNotFoundError","PythonError"],"SchedulerOrError":["Scheduler","SchedulerNotDefinedError","PythonError"],"SchedulesOrError":["Schedules","RepositoryNotFoundError","PythonError"],"ScheduleTickSpecificData":["ScheduleTickSuccessData","ScheduleTickFailureData"],"LaunchBackfillResult":["LaunchBackfillSuccess","PartitionSetNotFoundError","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"ConfigTypeOrError":["EnumConfigType","CompositeConfigType","RegularConfigType","PipelineNotFoundError","ConfigTypeNotFoundError","PythonError"],"ConfigType":["ArrayConfigType","CompositeConfigType","EnumConfigType","NullableConfigType","RegularConfigType","ScalarUnionConfigType","MapConfigType"],"WrappingConfigType":["ArrayConfigType","NullableConfigType"],"DagsterType":["ListDagsterType","NullableDagsterType","RegularDagsterType"],"DagsterTypeOrError":["RegularDagsterType","PipelineNotFoundError","DagsterTypeNotFoundError","PythonError"],"WrappingDagsterType":["ListDagsterType","NullableDagsterType"],"Error":["AssetCheckNeedsMigrationError","AssetCheckNeedsUserCodeUpgrade","AssetCheckNeedsAgentUpgradeError","AssetNotFoundError","ConflictingExecutionParamsError","ConfigTypeNotFoundError","DagsterTypeNotFoundError","InvalidPipelineRunsFilterError","InvalidSubsetError","ModeNotFoundError","NoModeProvidedError","PartitionSetNotFoundError","PipelineNotFoundError","RunConflict","PipelineSnapshotNotFoundError","PresetNotFoundError","PythonError","ErrorChainLink","UnauthorizedError","ReloadNotSupported","RepositoryLocationNotFound","RepositoryNotFoundError","ResourceNotFoundError","RunGroupNotFoundError","RunNotFoundError","ScheduleNotFoundError","SchedulerNotDefinedError","SensorNotFoundError","DuplicateDynamicPartitionError","InstigationStateNotFoundError","SolidStepStatusUnavailableError","GraphNotFoundError","BackfillNotFoundError","PartitionSubsetDeserializationError","AutoMaterializeAssetEvaluationNeedsMigrationError"],"PipelineRunConflict":["RunConflict"],"PipelineRunNotFoundError":["RunNotFoundError"],"RepositoriesOrError":["RepositoryConnection","RepositoryNotFoundError","PythonError"],"RepositoryOrError":["PythonError","Repository","RepositoryNotFoundError"],"InstigationTypeSpecificData":["SensorData","ScheduleData"],"InstigationStateOrError":["InstigationState","InstigationStateNotFoundError","PythonError"],"InstigationStatesOrError":["InstigationStates","PythonError"],"MetadataEntry":["TableSchemaMetadataEntry","TableMetadataEntry","FloatMetadataEntry","IntMetadataEntry","JsonMetadataEntry","BoolMetadataEntry","MarkdownMetadataEntry","PathMetadataEntry","NotebookMetadataEntry","PythonArtifactMetadataEntry","TextMetadataEntry","UrlMetadataEntry","PipelineRunMetadataEntry","AssetMetadataEntry","JobMetadataEntry","NullMetadataEntry"],"PartitionRunConfigOrError":["PartitionRunConfig","PythonError"],"AssetBackfillStatus":["AssetPartitionsStatusCounts","UnpartitionedAssetStatus"],"PartitionSetOrError":["PartitionSet","PartitionSetNotFoundError","PythonError"],"PartitionSetsOrError":["PartitionSets","PipelineNotFoundError","PythonError"],"PartitionsOrError":["Partitions","PythonError"],"PartitionStatusesOrError":["PartitionStatuses","PythonError"],"PartitionTagsOrError":["PartitionTags","PythonError"],"RunConfigSchemaOrError":["RunConfigSchema","PipelineNotFoundError","InvalidSubsetError","ModeNotFoundError","PythonError"],"LaunchRunResult":["LaunchRunSuccess","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"LaunchRunReexecutionResult":["LaunchRunSuccess","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"LaunchPipelineRunSuccess":["LaunchRunSuccess"],"RunsOrError":["Runs","InvalidPipelineRunsFilterError","PythonError"],"PipelineRuns":["Runs"],"RunGroupOrError":["RunGroup","RunGroupNotFoundError","PythonError"],"SensorOrError":["Sensor","SensorNotFoundError","UnauthorizedError","PythonError"],"SensorsOrError":["Sensors","RepositoryNotFoundError","PythonError"],"StopSensorMutationResultOrError":["StopSensorMutationResult","UnauthorizedError","PythonError"],"ISolidDefinition":["CompositeSolidDefinition","SolidDefinition"],"SolidContainer":["Pipeline","PipelineSnapshot","Job","CompositeSolidDefinition","Graph"],"SolidStepStatsOrError":["SolidStepStatsConnection","SolidStepStatusUnavailableError"],"WorkspaceOrError":["Workspace","PythonError"],"WorkspaceLocationStatusEntriesOrError":["WorkspaceLocationStatusEntries","PythonError"],"GraphOrError":["Graph","GraphNotFoundError","PythonError"],"ResourceDetailsOrError":["ResourceDetails","ResourceNotFoundError","PythonError"],"ResourcesOrError":["ResourceDetailsList","RepositoryNotFoundError","PythonError"],"EnvVarWithConsumersOrError":["EnvVarWithConsumersList","PythonError"],"RunTagKeysOrError":["PythonError","RunTagKeys"],"RunTagsOrError":["PythonError","RunTags"],"RunIdsOrError":["RunIds","InvalidPipelineRunsFilterError","PythonError"],"AssetNodeOrError":["AssetNode","AssetNotFoundError"],"PartitionBackfillOrError":["PartitionBackfill","BackfillNotFoundError","PythonError"],"PartitionBackfillsOrError":["PartitionBackfills","PythonError"],"EventConnectionOrError":["EventConnection","RunNotFoundError","PythonError"],"AutoMaterializeAssetEvaluationRecordsOrError":["AutoMaterializeAssetEvaluationRecords","AutoMaterializeAssetEvaluationNeedsMigrationError"],"PartitionKeysOrError":["PartitionKeys","PartitionSubsetDeserializationError"],"AutoMaterializeRuleEvaluationData":["TextRuleEvaluationData","ParentMaterializedRuleEvaluationData","WaitingOnKeysRuleEvaluationData"],"AssetConditionEvaluationRecordsOrError":["AssetConditionEvaluationRecords","AutoMaterializeAssetEvaluationNeedsMigrationError"],"AssetConditionEvaluation":["UnpartitionedAssetConditionEvaluation","PartitionedAssetConditionEvaluation","SpecificPartitionAssetConditionEvaluation"],"SensorDryRunResult":["PythonError","SensorNotFoundError","DryRunInstigationTick"],"ScheduleDryRunResult":["DryRunInstigationTick","PythonError","ScheduleNotFoundError"],"TerminateRunsResultOrError":["TerminateRunsResult","PythonError"],"AssetWipeMutationResult":["AssetNotFoundError","UnauthorizedError","PythonError","AssetWipeSuccess"],"ReportRunlessAssetEventsResult":["UnauthorizedError","PythonError","ReportRunlessAssetEventsSuccess"],"ResumeBackfillResult":["ResumeBackfillSuccess","UnauthorizedError","PythonError"],"CancelBackfillResult":["CancelBackfillSuccess","UnauthorizedError","PythonError"],"LogTelemetryMutationResult":["LogTelemetrySuccess","PythonError"],"AddDynamicPartitionResult":["AddDynamicPartitionSuccess","UnauthorizedError","PythonError","DuplicateDynamicPartitionError"]} \ No newline at end of file diff --git a/js_modules/dagster-ui/packages/ui-core/src/graphql/schema.graphql b/js_modules/dagster-ui/packages/ui-core/src/graphql/schema.graphql index b3b29974ad715..63e00745f9c39 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/graphql/schema.graphql +++ b/js_modules/dagster-ui/packages/ui-core/src/graphql/schema.graphql @@ -3178,6 +3178,19 @@ type Query { autoMaterializeEvaluationsForEvaluationId( evaluationId: Int! ): AutoMaterializeAssetEvaluationRecordsOrError + assetConditionEvaluationForPartition( + assetKey: AssetKeyInput! + evaluationId: Int! + partition: String! + ): SpecificPartitionAssetConditionEvaluation + assetConditionEvaluationRecordsOrError( + assetKey: AssetKeyInput! + limit: Int! + cursor: String + ): AssetConditionEvaluationRecordsOrError + assetConditionEvaluationsForEvaluationId( + evaluationId: Int! + ): AssetConditionEvaluationRecordsOrError autoMaterializeTicks( dayRange: Int dayOffset: Int @@ -3403,6 +3416,76 @@ type AutoMaterializeAssetEvaluationNeedsMigrationError implements Error { message: String! } +type SpecificPartitionAssetConditionEvaluation { + description: String! + metadataEntries: [MetadataEntry!]! + status: AssetConditionEvaluationStatus! + childEvaluations: [SpecificPartitionAssetConditionEvaluation!] +} + +enum AssetConditionEvaluationStatus { + TRUE + FALSE + SKIPPED +} + +union AssetConditionEvaluationRecordsOrError = + AssetConditionEvaluationRecords + | AutoMaterializeAssetEvaluationNeedsMigrationError + +type AssetConditionEvaluationRecords { + records: [AssetConditionEvaluationRecord!]! +} + +type AssetConditionEvaluationRecord { + id: ID! + evaluationId: Int! + runIds: [String!]! + timestamp: Float! + assetKey: AssetKey! + numRequested: Int! + evaluation: AssetConditionEvaluation! +} + +union AssetConditionEvaluation = + UnpartitionedAssetConditionEvaluation + | PartitionedAssetConditionEvaluation + | SpecificPartitionAssetConditionEvaluation + +type UnpartitionedAssetConditionEvaluation { + description: String! + startTimestamp: Float + endTimestamp: Float + metadataEntries: [MetadataEntry!]! + status: AssetConditionEvaluationStatus! + childEvaluations: [UnpartitionedAssetConditionEvaluation!] +} + +type PartitionedAssetConditionEvaluation { + description: String! + startTimestamp: Float + endTimestamp: Float + trueSubset: AssetSubset! + falseSubset: AssetSubset! + candidateSubset: AssetSubset + numTrue: Int! + numFalse: Int! + numSkipped: Int! + childEvaluations: [PartitionedAssetConditionEvaluation!] +} + +type AssetSubset { + assetKey: AssetKey! + subsetValue: AssetSubsetValue! +} + +type AssetSubsetValue { + boolValue: Boolean + partitionKeys: [String!] + partitionKeyRanges: [PartitionKeyRange!] + isPartitioned: Boolean! +} + type Mutation { launchPipelineExecution(executionParams: ExecutionParams!): LaunchRunResult! launchRun(executionParams: ExecutionParams!): LaunchRunResult! diff --git a/js_modules/dagster-ui/packages/ui-core/src/graphql/types.ts b/js_modules/dagster-ui/packages/ui-core/src/graphql/types.ts index 84e9d065c3931..2018b8539765d 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/graphql/types.ts +++ b/js_modules/dagster-ui/packages/ui-core/src/graphql/types.ts @@ -245,6 +245,37 @@ export type AssetChecksOrError = | AssetCheckNeedsUserCodeUpgrade | AssetChecks; +export type AssetConditionEvaluation = + | PartitionedAssetConditionEvaluation + | SpecificPartitionAssetConditionEvaluation + | UnpartitionedAssetConditionEvaluation; + +export type AssetConditionEvaluationRecord = { + __typename: 'AssetConditionEvaluationRecord'; + assetKey: AssetKey; + evaluation: AssetConditionEvaluation; + evaluationId: Scalars['Int']; + id: Scalars['ID']; + numRequested: Scalars['Int']; + runIds: Array; + timestamp: Scalars['Float']; +}; + +export type AssetConditionEvaluationRecords = { + __typename: 'AssetConditionEvaluationRecords'; + records: Array; +}; + +export type AssetConditionEvaluationRecordsOrError = + | AssetConditionEvaluationRecords + | AutoMaterializeAssetEvaluationNeedsMigrationError; + +export enum AssetConditionEvaluationStatus { + FALSE = 'FALSE', + SKIPPED = 'SKIPPED', + TRUE = 'TRUE', +} + export type AssetConnection = { __typename: 'AssetConnection'; nodes: Array; @@ -481,6 +512,20 @@ export type AssetSelection = { assetSelectionString: Maybe; }; +export type AssetSubset = { + __typename: 'AssetSubset'; + assetKey: AssetKey; + subsetValue: AssetSubsetValue; +}; + +export type AssetSubsetValue = { + __typename: 'AssetSubsetValue'; + boolValue: Maybe; + isPartitioned: Scalars['Boolean']; + partitionKeyRanges: Maybe>; + partitionKeys: Maybe>; +}; + export type AssetWipeMutationResult = | AssetNotFoundError | AssetWipeSuccess @@ -2629,6 +2674,20 @@ export type PartitionTags = { export type PartitionTagsOrError = PartitionTags | PythonError; +export type PartitionedAssetConditionEvaluation = { + __typename: 'PartitionedAssetConditionEvaluation'; + candidateSubset: Maybe; + childEvaluations: Maybe>; + description: Scalars['String']; + endTimestamp: Maybe; + falseSubset: AssetSubset; + numFalse: Scalars['Int']; + numSkipped: Scalars['Int']; + numTrue: Scalars['Int']; + startTimestamp: Maybe; + trueSubset: AssetSubset; +}; + export type Partitions = { __typename: 'Partitions'; results: Array; @@ -2963,6 +3022,9 @@ export type Query = { allTopLevelResourceDetailsOrError: ResourcesOrError; assetBackfillPreview: Array; assetCheckExecutions: Array; + assetConditionEvaluationForPartition: Maybe; + assetConditionEvaluationRecordsOrError: Maybe; + assetConditionEvaluationsForEvaluationId: Maybe; assetNodeDefinitionCollisions: Array; assetNodeOrError: AssetNodeOrError; assetNodes: Array; @@ -3029,6 +3091,22 @@ export type QueryAssetCheckExecutionsArgs = { limit: Scalars['Int']; }; +export type QueryAssetConditionEvaluationForPartitionArgs = { + assetKey: AssetKeyInput; + evaluationId: Scalars['Int']; + partition: Scalars['String']; +}; + +export type QueryAssetConditionEvaluationRecordsOrErrorArgs = { + assetKey: AssetKeyInput; + cursor?: InputMaybe; + limit: Scalars['Int']; +}; + +export type QueryAssetConditionEvaluationsForEvaluationIdArgs = { + evaluationId: Scalars['Int']; +}; + export type QueryAssetNodeDefinitionCollisionsArgs = { assetKeys?: InputMaybe>; }; @@ -4122,6 +4200,14 @@ export type SolidStepStatusUnavailableError = Error & { message: Scalars['String']; }; +export type SpecificPartitionAssetConditionEvaluation = { + __typename: 'SpecificPartitionAssetConditionEvaluation'; + childEvaluations: Maybe>; + description: Scalars['String']; + metadataEntries: Array; + status: AssetConditionEvaluationStatus; +}; + export type StaleCause = { __typename: 'StaleCause'; category: StaleCauseCategory; @@ -4431,6 +4517,16 @@ export type UnknownPipeline = PipelineReference & { solidSelection: Maybe>; }; +export type UnpartitionedAssetConditionEvaluation = { + __typename: 'UnpartitionedAssetConditionEvaluation'; + childEvaluations: Maybe>; + description: Scalars['String']; + endTimestamp: Maybe; + metadataEntries: Array; + startTimestamp: Maybe; + status: AssetConditionEvaluationStatus; +}; + export type UnpartitionedAssetStatus = { __typename: 'UnpartitionedAssetStatus'; assetKey: AssetKey; @@ -4944,6 +5040,51 @@ export const buildAssetChecks = ( }; }; +export const buildAssetConditionEvaluationRecord = ( + overrides?: Partial, + _relationshipsToOmit: Set = new Set(), +): {__typename: 'AssetConditionEvaluationRecord'} & AssetConditionEvaluationRecord => { + const relationshipsToOmit: Set = new Set(_relationshipsToOmit); + relationshipsToOmit.add('AssetConditionEvaluationRecord'); + return { + __typename: 'AssetConditionEvaluationRecord', + assetKey: + overrides && overrides.hasOwnProperty('assetKey') + ? overrides.assetKey! + : relationshipsToOmit.has('AssetKey') + ? ({} as AssetKey) + : buildAssetKey({}, relationshipsToOmit), + evaluation: + overrides && overrides.hasOwnProperty('evaluation') + ? overrides.evaluation! + : relationshipsToOmit.has('PartitionedAssetConditionEvaluation') + ? ({} as PartitionedAssetConditionEvaluation) + : buildPartitionedAssetConditionEvaluation({}, relationshipsToOmit), + evaluationId: + overrides && overrides.hasOwnProperty('evaluationId') ? overrides.evaluationId! : 5501, + id: + overrides && overrides.hasOwnProperty('id') + ? overrides.id! + : '1c158e55-c1c1-43c2-9f14-8e369549e154', + numRequested: + overrides && overrides.hasOwnProperty('numRequested') ? overrides.numRequested! : 2364, + runIds: overrides && overrides.hasOwnProperty('runIds') ? overrides.runIds! : [], + timestamp: overrides && overrides.hasOwnProperty('timestamp') ? overrides.timestamp! : 6.88, + }; +}; + +export const buildAssetConditionEvaluationRecords = ( + overrides?: Partial, + _relationshipsToOmit: Set = new Set(), +): {__typename: 'AssetConditionEvaluationRecords'} & AssetConditionEvaluationRecords => { + const relationshipsToOmit: Set = new Set(_relationshipsToOmit); + relationshipsToOmit.add('AssetConditionEvaluationRecords'); + return { + __typename: 'AssetConditionEvaluationRecords', + records: overrides && overrides.hasOwnProperty('records') ? overrides.records! : [], + }; +}; + export const buildAssetConnection = ( overrides?: Partial, _relationshipsToOmit: Set = new Set(), @@ -5459,6 +5600,49 @@ export const buildAssetSelection = ( }; }; +export const buildAssetSubset = ( + overrides?: Partial, + _relationshipsToOmit: Set = new Set(), +): {__typename: 'AssetSubset'} & AssetSubset => { + const relationshipsToOmit: Set = new Set(_relationshipsToOmit); + relationshipsToOmit.add('AssetSubset'); + return { + __typename: 'AssetSubset', + assetKey: + overrides && overrides.hasOwnProperty('assetKey') + ? overrides.assetKey! + : relationshipsToOmit.has('AssetKey') + ? ({} as AssetKey) + : buildAssetKey({}, relationshipsToOmit), + subsetValue: + overrides && overrides.hasOwnProperty('subsetValue') + ? overrides.subsetValue! + : relationshipsToOmit.has('AssetSubsetValue') + ? ({} as AssetSubsetValue) + : buildAssetSubsetValue({}, relationshipsToOmit), + }; +}; + +export const buildAssetSubsetValue = ( + overrides?: Partial, + _relationshipsToOmit: Set = new Set(), +): {__typename: 'AssetSubsetValue'} & AssetSubsetValue => { + const relationshipsToOmit: Set = new Set(_relationshipsToOmit); + relationshipsToOmit.add('AssetSubsetValue'); + return { + __typename: 'AssetSubsetValue', + boolValue: overrides && overrides.hasOwnProperty('boolValue') ? overrides.boolValue! : false, + isPartitioned: + overrides && overrides.hasOwnProperty('isPartitioned') ? overrides.isPartitioned! : false, + partitionKeyRanges: + overrides && overrides.hasOwnProperty('partitionKeyRanges') + ? overrides.partitionKeyRanges! + : [], + partitionKeys: + overrides && overrides.hasOwnProperty('partitionKeys') ? overrides.partitionKeys! : [], + }; +}; + export const buildAssetWipeSuccess = ( overrides?: Partial, _relationshipsToOmit: Set = new Set(), @@ -9537,6 +9721,46 @@ export const buildPartitionTags = ( }; }; +export const buildPartitionedAssetConditionEvaluation = ( + overrides?: Partial, + _relationshipsToOmit: Set = new Set(), +): {__typename: 'PartitionedAssetConditionEvaluation'} & PartitionedAssetConditionEvaluation => { + const relationshipsToOmit: Set = new Set(_relationshipsToOmit); + relationshipsToOmit.add('PartitionedAssetConditionEvaluation'); + return { + __typename: 'PartitionedAssetConditionEvaluation', + candidateSubset: + overrides && overrides.hasOwnProperty('candidateSubset') + ? overrides.candidateSubset! + : relationshipsToOmit.has('AssetSubset') + ? ({} as AssetSubset) + : buildAssetSubset({}, relationshipsToOmit), + childEvaluations: + overrides && overrides.hasOwnProperty('childEvaluations') ? overrides.childEvaluations! : [], + description: + overrides && overrides.hasOwnProperty('description') ? overrides.description! : 'non', + endTimestamp: + overrides && overrides.hasOwnProperty('endTimestamp') ? overrides.endTimestamp! : 6.63, + falseSubset: + overrides && overrides.hasOwnProperty('falseSubset') + ? overrides.falseSubset! + : relationshipsToOmit.has('AssetSubset') + ? ({} as AssetSubset) + : buildAssetSubset({}, relationshipsToOmit), + numFalse: overrides && overrides.hasOwnProperty('numFalse') ? overrides.numFalse! : 7739, + numSkipped: overrides && overrides.hasOwnProperty('numSkipped') ? overrides.numSkipped! : 7712, + numTrue: overrides && overrides.hasOwnProperty('numTrue') ? overrides.numTrue! : 6991, + startTimestamp: + overrides && overrides.hasOwnProperty('startTimestamp') ? overrides.startTimestamp! : 3.43, + trueSubset: + overrides && overrides.hasOwnProperty('trueSubset') + ? overrides.trueSubset! + : relationshipsToOmit.has('AssetSubset') + ? ({} as AssetSubset) + : buildAssetSubset({}, relationshipsToOmit), + }; +}; + export const buildPartitions = ( overrides?: Partial, _relationshipsToOmit: Set = new Set(), @@ -10226,6 +10450,24 @@ export const buildQuery = ( overrides && overrides.hasOwnProperty('assetCheckExecutions') ? overrides.assetCheckExecutions! : [], + assetConditionEvaluationForPartition: + overrides && overrides.hasOwnProperty('assetConditionEvaluationForPartition') + ? overrides.assetConditionEvaluationForPartition! + : relationshipsToOmit.has('SpecificPartitionAssetConditionEvaluation') + ? ({} as SpecificPartitionAssetConditionEvaluation) + : buildSpecificPartitionAssetConditionEvaluation({}, relationshipsToOmit), + assetConditionEvaluationRecordsOrError: + overrides && overrides.hasOwnProperty('assetConditionEvaluationRecordsOrError') + ? overrides.assetConditionEvaluationRecordsOrError! + : relationshipsToOmit.has('AssetConditionEvaluationRecords') + ? ({} as AssetConditionEvaluationRecords) + : buildAssetConditionEvaluationRecords({}, relationshipsToOmit), + assetConditionEvaluationsForEvaluationId: + overrides && overrides.hasOwnProperty('assetConditionEvaluationsForEvaluationId') + ? overrides.assetConditionEvaluationsForEvaluationId! + : relationshipsToOmit.has('AssetConditionEvaluationRecords') + ? ({} as AssetConditionEvaluationRecords) + : buildAssetConditionEvaluationRecords({}, relationshipsToOmit), assetNodeDefinitionCollisions: overrides && overrides.hasOwnProperty('assetNodeDefinitionCollisions') ? overrides.assetNodeDefinitionCollisions! @@ -12368,6 +12610,29 @@ export const buildSolidStepStatusUnavailableError = ( }; }; +export const buildSpecificPartitionAssetConditionEvaluation = ( + overrides?: Partial, + _relationshipsToOmit: Set = new Set(), +): { + __typename: 'SpecificPartitionAssetConditionEvaluation'; +} & SpecificPartitionAssetConditionEvaluation => { + const relationshipsToOmit: Set = new Set(_relationshipsToOmit); + relationshipsToOmit.add('SpecificPartitionAssetConditionEvaluation'); + return { + __typename: 'SpecificPartitionAssetConditionEvaluation', + childEvaluations: + overrides && overrides.hasOwnProperty('childEvaluations') ? overrides.childEvaluations! : [], + description: + overrides && overrides.hasOwnProperty('description') ? overrides.description! : 'vel', + metadataEntries: + overrides && overrides.hasOwnProperty('metadataEntries') ? overrides.metadataEntries! : [], + status: + overrides && overrides.hasOwnProperty('status') + ? overrides.status! + : AssetConditionEvaluationStatus.FALSE, + }; +}; + export const buildStaleCause = ( overrides?: Partial, _relationshipsToOmit: Set = new Set(), @@ -13025,6 +13290,33 @@ export const buildUnknownPipeline = ( }; }; +export const buildUnpartitionedAssetConditionEvaluation = ( + overrides?: Partial, + _relationshipsToOmit: Set = new Set(), +): { + __typename: 'UnpartitionedAssetConditionEvaluation'; +} & UnpartitionedAssetConditionEvaluation => { + const relationshipsToOmit: Set = new Set(_relationshipsToOmit); + relationshipsToOmit.add('UnpartitionedAssetConditionEvaluation'); + return { + __typename: 'UnpartitionedAssetConditionEvaluation', + childEvaluations: + overrides && overrides.hasOwnProperty('childEvaluations') ? overrides.childEvaluations! : [], + description: + overrides && overrides.hasOwnProperty('description') ? overrides.description! : 'deserunt', + endTimestamp: + overrides && overrides.hasOwnProperty('endTimestamp') ? overrides.endTimestamp! : 7.57, + metadataEntries: + overrides && overrides.hasOwnProperty('metadataEntries') ? overrides.metadataEntries! : [], + startTimestamp: + overrides && overrides.hasOwnProperty('startTimestamp') ? overrides.startTimestamp! : 0.96, + status: + overrides && overrides.hasOwnProperty('status') + ? overrides.status! + : AssetConditionEvaluationStatus.FALSE, + }; +}; + export const buildUnpartitionedAssetStatus = ( overrides?: Partial, _relationshipsToOmit: Set = new Set(), diff --git a/python_modules/dagster-graphql/dagster_graphql/implementation/fetch_asset_condition_evaluations.py b/python_modules/dagster-graphql/dagster_graphql/implementation/fetch_asset_condition_evaluations.py new file mode 100644 index 0000000000000..b96f42a563e6e --- /dev/null +++ b/python_modules/dagster-graphql/dagster_graphql/implementation/fetch_asset_condition_evaluations.py @@ -0,0 +1,132 @@ +from typing import TYPE_CHECKING, Optional, Sequence + +import dagster._check as check +from dagster import AssetKey +from dagster._core.scheduler.instigation import AutoMaterializeAssetEvaluationRecord + +from dagster_graphql.implementation.fetch_assets import get_asset_nodes_by_asset_key +from dagster_graphql.schema.asset_condition_evaluations import ( + GrapheneAssetConditionEvaluation, + GrapheneAssetConditionEvaluationRecord, + GrapheneAssetConditionEvaluationRecords, + GrapheneAssetConditionEvaluationRecordsOrError, + GrapheneSpecificPartitionAssetConditionEvaluation, +) +from dagster_graphql.schema.auto_materialize_asset_evaluations import ( + GrapheneAutoMaterializeAssetEvaluationNeedsMigrationError, +) +from dagster_graphql.schema.inputs import GrapheneAssetKeyInput + +if TYPE_CHECKING: + from ..schema.util import ResolveInfo + + +def _get_migration_error( + graphene_info: "ResolveInfo", +) -> Optional[GrapheneAutoMaterializeAssetEvaluationNeedsMigrationError]: + if graphene_info.context.instance.schedule_storage is None: + return GrapheneAutoMaterializeAssetEvaluationNeedsMigrationError( + message="Instance does not have schedule storage configured, cannot fetch evaluations." + ) + if not graphene_info.context.instance.schedule_storage.supports_auto_materialize_asset_evaluations: + return GrapheneAutoMaterializeAssetEvaluationNeedsMigrationError( + message=( + "Auto materialize evaluations are not getting logged. Run `dagster instance" + " migrate` to enable." + ) + ) + return None + + +def _get_graphene_records_from_evaluations( + graphene_info: "ResolveInfo", + evaluation_records: Sequence[AutoMaterializeAssetEvaluationRecord], +) -> GrapheneAssetConditionEvaluationRecords: + asset_keys = {record.asset_key for record in evaluation_records} + + partitions_defs = {} + + nodes = get_asset_nodes_by_asset_key(graphene_info) + for asset_key in asset_keys: + asset_node = nodes.get(asset_key) + partitions_defs[asset_key] = ( + asset_node.external_asset_node.partitions_def_data.get_partitions_definition() + if asset_node and asset_node.external_asset_node.partitions_def_data + else None + ) + + return GrapheneAssetConditionEvaluationRecords( + records=[ + GrapheneAssetConditionEvaluationRecord( + evaluation, partitions_defs[evaluation.asset_key], graphene_info.context.instance + ) + for evaluation in evaluation_records + ] + ) + + +def fetch_asset_condition_evaluation_record_for_partition( + graphene_info: "ResolveInfo", + graphene_asset_key: GrapheneAssetKeyInput, + evaluation_id: int, + partition_key: str, +) -> GrapheneAssetConditionEvaluation: + asset_key = AssetKey.from_graphql_input(graphene_asset_key) + schedule_storage = check.not_none(graphene_info.context.instance.schedule_storage) + record = next( + iter( + schedule_storage.get_auto_materialize_asset_evaluations( + asset_key, cursor=evaluation_id + 1, limit=1 + ) + ) + ) + asset_node = get_asset_nodes_by_asset_key(graphene_info).get(asset_key) + partitions_def = ( + asset_node.external_asset_node.partitions_def_data.get_partitions_definition() + if asset_node and asset_node.external_asset_node.partitions_def_data + else None + ) + return GrapheneSpecificPartitionAssetConditionEvaluation( + record.get_evaluation_with_run_ids(partitions_def).evaluation, partition_key + ) + + +def fetch_asset_condition_evaluation_records_for_asset_key( + graphene_info: "ResolveInfo", + graphene_asset_key: GrapheneAssetKeyInput, + limit: int, + cursor: Optional[str], +) -> GrapheneAssetConditionEvaluationRecordsOrError: + """Fetch asset policy evaluations from storage.""" + migration_error = _get_migration_error(graphene_info) + if migration_error: + return migration_error + + asset_key = AssetKey.from_graphql_input(graphene_asset_key) + + schedule_storage = check.not_none(graphene_info.context.instance.schedule_storage) + return _get_graphene_records_from_evaluations( + graphene_info, + schedule_storage.get_auto_materialize_asset_evaluations( + asset_key=asset_key, + limit=limit, + cursor=int(cursor) if cursor else None, + ), + ) + + +def fetch_asset_condition_evaluation_records_for_evaluation_id( + graphene_info: "ResolveInfo", + evaluation_id: int, +) -> GrapheneAssetConditionEvaluationRecordsOrError: + migration_error = _get_migration_error(graphene_info) + if migration_error: + return migration_error + + schedule_storage = check.not_none(graphene_info.context.instance.schedule_storage) + return _get_graphene_records_from_evaluations( + graphene_info, + schedule_storage.get_auto_materialize_evaluations_for_evaluation_id( + evaluation_id=evaluation_id + ), + ) diff --git a/python_modules/dagster-graphql/dagster_graphql/schema/asset_condition_evaluations.py b/python_modules/dagster-graphql/dagster_graphql/schema/asset_condition_evaluations.py new file mode 100644 index 0000000000000..9b81ff2204b70 --- /dev/null +++ b/python_modules/dagster-graphql/dagster_graphql/schema/asset_condition_evaluations.py @@ -0,0 +1,308 @@ +import enum +from typing import Optional, Sequence, Union + +import graphene +import pendulum +from dagster._core.definitions.asset_condition import AssetConditionEvaluation +from dagster._core.definitions.asset_subset import AssetSubset +from dagster._core.definitions.partition import PartitionsDefinition, PartitionsSubset +from dagster._core.definitions.time_window_partitions import BaseTimeWindowPartitionsSubset +from dagster._core.instance import DynamicPartitionsStore +from dagster._core.scheduler.instigation import AutoMaterializeAssetEvaluationRecord + +from dagster_graphql.schema.auto_materialize_asset_evaluations import ( + GrapheneAutoMaterializeAssetEvaluationNeedsMigrationError, +) +from dagster_graphql.schema.metadata import GrapheneMetadataEntry + +from .asset_key import GrapheneAssetKey +from .partition_sets import GraphenePartitionKeyRange +from .util import ResolveInfo, non_null_list + + +class AssetConditionEvaluationStatus(enum.Enum): + TRUE = "TRUE" + FALSE = "FALSE" + SKIPPED = "SKIPPED" + + +GrapheneAssetConditionEvaluationStatus = graphene.Enum.from_enum(AssetConditionEvaluationStatus) + + +class GrapheneAssetSubsetValue(graphene.ObjectType): + class Meta: + name = "AssetSubsetValue" + + boolValue = graphene.Field(graphene.Boolean) + partitionKeys = graphene.List(graphene.NonNull(graphene.String)) + partitionKeyRanges = graphene.List(graphene.NonNull(GraphenePartitionKeyRange)) + + isPartitioned = graphene.NonNull(graphene.Boolean) + + def __init__(self, value: Union[bool, PartitionsSubset]): + bool_value, partition_keys, partition_key_ranges = None, None, None + if isinstance(value, bool): + bool_value = value + elif isinstance(value, BaseTimeWindowPartitionsSubset): + partition_key_ranges = [ + GraphenePartitionKeyRange(start, end) + for start, end in value.get_partition_key_ranges(value.partitions_def) + ] + else: + partition_keys = value.get_partition_keys() + + super().__init__( + boolValue=bool_value, + partitionKeys=partition_keys, + partitionKeyRanges=partition_key_ranges, + ) + + def resolve_isPartitioned(self, graphene_info: ResolveInfo) -> bool: + return self.boolValue is not None + + +class GrapheneAssetSubset(graphene.ObjectType): + assetKey = graphene.NonNull(GrapheneAssetKey) + subsetValue = graphene.NonNull(GrapheneAssetSubsetValue) + + class Meta: + name = "AssetSubset" + + def __init__(self, asset_subset: AssetSubset): + super().__init__( + assetKey=GrapheneAssetKey(path=asset_subset.asset_key.path), + subsetValue=GrapheneAssetSubsetValue(asset_subset.subset_value), + ) + + +class GrapheneUnpartitionedAssetConditionEvaluation(graphene.ObjectType): + description = graphene.NonNull(graphene.String) + + startTimestamp = graphene.Field(graphene.Float) + endTimestamp = graphene.Field(graphene.Float) + + metadataEntries = non_null_list(GrapheneMetadataEntry) + status = graphene.NonNull(GrapheneAssetConditionEvaluationStatus) + + childEvaluations = graphene.Field( + graphene.List(graphene.NonNull(lambda: GrapheneUnpartitionedAssetConditionEvaluation)) + ) + + class Meta: + name = "UnpartitionedAssetConditionEvaluation" + + def __init__(self, evaluation: AssetConditionEvaluation): + if evaluation.true_subset.bool_value: + status = AssetConditionEvaluationStatus.TRUE + elif evaluation.candidate_subset and evaluation.candidate_subset.bool_value: + status = AssetConditionEvaluationStatus.FALSE + else: + status = AssetConditionEvaluationStatus.SKIPPED + + super().__init__( + description=evaluation.condition_snapshot.description, + startTimestamp=evaluation.start_timestamp, + endTimestamp=evaluation.end_timestamp, + status=status, + childEvaluations=[ + GrapheneUnpartitionedAssetConditionEvaluation(child) + for child in evaluation.child_evaluations + ], + ) + + def resolve_metadataEntries( + self, graphene_info: ResolveInfo + ) -> Sequence[GrapheneMetadataEntry]: + metadata = next( + (subset.metadata for subset in self._evaluation.subsets_with_metadata), + {}, + ) + return [GrapheneMetadataEntry(key=key, value=value) for key, value in metadata.items()] + + +class GraphenePartitionedAssetConditionEvaluation(graphene.ObjectType): + description = graphene.NonNull(graphene.String) + + startTimestamp = graphene.Field(graphene.Float) + endTimestamp = graphene.Field(graphene.Float) + + trueSubset = graphene.NonNull(GrapheneAssetSubset) + falseSubset = graphene.NonNull(GrapheneAssetSubset) + candidateSubset = graphene.Field(GrapheneAssetSubset) + + numTrue = graphene.NonNull(graphene.Int) + numFalse = graphene.NonNull(graphene.Int) + numSkipped = graphene.NonNull(graphene.Int) + + childEvaluations = graphene.Field( + graphene.List(graphene.NonNull(lambda: GraphenePartitionedAssetConditionEvaluation)) + ) + + class Meta: + name = "PartitionedAssetConditionEvaluation" + + def __init__( + self, + evaluation: AssetConditionEvaluation, + partitions_def: Optional[PartitionsDefinition], + dynamic_partitions_store: DynamicPartitionsStore, + ): + self._partitions_def = partitions_def + self._true_subset = evaluation.true_subset + + self._all_subset = AssetSubset.all( + evaluation.asset_key, partitions_def, dynamic_partitions_store, pendulum.now("UTC") + ) + + # if the candidate_subset is unset, then we evaluated all partitions + self._candidate_subset = evaluation.candidate_subset or self._all_subset + + super().__init__( + description=evaluation.condition_snapshot.description, + startTimestamp=evaluation.start_timestamp, + endTimestamp=evaluation.end_timestamp, + trueSubset=GrapheneAssetSubset(evaluation.true_subset), + candidateSubset=GrapheneAssetSubset(self._candidate_subset), + childEvaluations=[ + GraphenePartitionedAssetConditionEvaluation( + child, partitions_def, dynamic_partitions_store + ) + for child in evaluation.child_evaluations + ], + ) + + def resolve_numTrue(self, graphene_info: ResolveInfo) -> int: + return self._true_subset.size + + def resolve_numFalse(self, graphene_info: ResolveInfo) -> int: + return self._candidate_subset.size - self._true_subset.size + + def resolve_falseSubset(self, graphene_info: ResolveInfo) -> GrapheneAssetSubset: + return GrapheneAssetSubset(self._candidate_subset - self._true_subset) + + def resolve_numSkipped(self, graphene_info: ResolveInfo) -> int: + return self._all_subset.size - self._candidate_subset.size + + +class GrapheneSpecificPartitionAssetConditionEvaluation(graphene.ObjectType): + description = graphene.NonNull(graphene.String) + + metadataEntries = non_null_list(GrapheneMetadataEntry) + status = graphene.NonNull(GrapheneAssetConditionEvaluationStatus) + + childEvaluations = graphene.Field( + graphene.List(graphene.NonNull(lambda: GrapheneSpecificPartitionAssetConditionEvaluation)) + ) + + class Meta: + name = "SpecificPartitionAssetConditionEvaluation" + + def __init__(self, evaluation: AssetConditionEvaluation, partition_key: str): + self._evaluation = evaluation + self._partition_key = partition_key + + if partition_key in evaluation.true_subset.subset_value: + status = AssetConditionEvaluationStatus.TRUE + elif ( + evaluation.candidate_subset is None + or partition_key in evaluation.candidate_subset.subset_value + ): + status = AssetConditionEvaluationStatus.FALSE + else: + status = AssetConditionEvaluationStatus.SKIPPED + + super().__init__( + description=evaluation.condition_snapshot.description, + status=status, + childEvaluations=[ + GrapheneSpecificPartitionAssetConditionEvaluation(child, partition_key) + for child in evaluation.child_evaluations + ], + ) + + def resolve_metadataEntries( + self, graphene_info: ResolveInfo + ) -> Sequence[GrapheneMetadataEntry]: + # find the metadata associated with a subset that contains this partition key + metadata = next( + ( + subset.metadata + for subset in self._evaluation.subsets_with_metadata + if self._partition_key in subset.subset.subset_value + ), + {}, + ) + return [GrapheneMetadataEntry(key=key, value=value) for key, value in metadata.items()] + + +class GrapheneAssetConditionEvaluation(graphene.Union): + class Meta: + types = ( + GrapheneUnpartitionedAssetConditionEvaluation, + GraphenePartitionedAssetConditionEvaluation, + GrapheneSpecificPartitionAssetConditionEvaluation, + ) + name = "AssetConditionEvaluation" + + +class GrapheneAssetConditionEvaluationRecord(graphene.ObjectType): + id = graphene.NonNull(graphene.ID) + evaluationId = graphene.NonNull(graphene.Int) + runIds = non_null_list(graphene.String) + timestamp = graphene.NonNull(graphene.Float) + + assetKey = graphene.NonNull(GrapheneAssetKey) + numRequested = graphene.NonNull(graphene.Int) + + evaluation = graphene.NonNull(GrapheneAssetConditionEvaluation) + + class Meta: + name = "AssetConditionEvaluationRecord" + + def __init__( + self, + record: AutoMaterializeAssetEvaluationRecord, + partitions_def: Optional[PartitionsDefinition], + dynamic_partitions_store: DynamicPartitionsStore, + partition_key: Optional[str] = None, + ): + evaluation_with_run_ids = record.get_evaluation_with_run_ids(partitions_def) + if evaluation_with_run_ids.evaluation.true_subset.is_partitioned: + if partition_key is None: + evaluation = GraphenePartitionedAssetConditionEvaluation( + evaluation_with_run_ids.evaluation, partitions_def, dynamic_partitions_store + ) + else: + evaluation = GrapheneSpecificPartitionAssetConditionEvaluation( + evaluation_with_run_ids.evaluation, partition_key + ) + else: + evaluation = GrapheneUnpartitionedAssetConditionEvaluation( + evaluation_with_run_ids.evaluation + ) + + super().__init__( + id=record.id, + evaluationId=record.evaluation_id, + timestamp=record.timestamp, + runIds=evaluation_with_run_ids.run_ids, + assetKey=GrapheneAssetKey(path=record.asset_key.path), + numRequested=evaluation_with_run_ids.evaluation.true_subset.size, + evaluation=evaluation, + ) + + +class GrapheneAssetConditionEvaluationRecords(graphene.ObjectType): + records = non_null_list(GrapheneAssetConditionEvaluationRecord) + + class Meta: + name = "AssetConditionEvaluationRecords" + + +class GrapheneAssetConditionEvaluationRecordsOrError(graphene.Union): + class Meta: + types = ( + GrapheneAssetConditionEvaluationRecords, + GrapheneAutoMaterializeAssetEvaluationNeedsMigrationError, + ) + name = "AssetConditionEvaluationRecordsOrError" diff --git a/python_modules/dagster-graphql/dagster_graphql/schema/asset_graph.py b/python_modules/dagster-graphql/dagster_graphql/schema/asset_graph.py index 403d092453874..2a3a2181f96ad 100644 --- a/python_modules/dagster-graphql/dagster_graphql/schema/asset_graph.py +++ b/python_modules/dagster-graphql/dagster_graphql/schema/asset_graph.py @@ -923,9 +923,7 @@ def _get_automation_policy_external_sensor(self) -> Optional[ExternalSensor]: return matching_sensors[0] def resolve_currentAutoMaterializeEvaluationId(self, graphene_info): - from dagster._daemon.asset_daemon import ( - get_current_evaluation_id, - ) + from dagster._daemon.asset_daemon import get_current_evaluation_id instance = graphene_info.context.instance if instance.auto_materialize_use_automation_policy_sensors: diff --git a/python_modules/dagster-graphql/dagster_graphql/schema/roots/query.py b/python_modules/dagster-graphql/dagster_graphql/schema/roots/query.py index df67e27979f83..193b5e84b4a0a 100644 --- a/python_modules/dagster-graphql/dagster_graphql/schema/roots/query.py +++ b/python_modules/dagster-graphql/dagster_graphql/schema/roots/query.py @@ -23,6 +23,11 @@ from dagster_graphql.implementation.asset_checks_loader import AssetChecksLoader from dagster_graphql.implementation.execution.backfill import get_asset_backfill_preview +from dagster_graphql.implementation.fetch_asset_condition_evaluations import ( + fetch_asset_condition_evaluation_record_for_partition, + fetch_asset_condition_evaluation_records_for_asset_key, + fetch_asset_condition_evaluation_records_for_evaluation_id, +) from dagster_graphql.implementation.fetch_auto_materialize_asset_evaluations import ( fetch_auto_materialize_asset_evaluations, fetch_auto_materialize_asset_evaluations_for_evaluation_id, @@ -30,6 +35,10 @@ from dagster_graphql.implementation.fetch_env_vars import get_utilized_env_vars_or_error from dagster_graphql.implementation.fetch_logs import get_captured_log_metadata from dagster_graphql.implementation.fetch_runs import get_assets_latest_info +from dagster_graphql.schema.asset_condition_evaluations import ( + GrapheneAssetConditionEvaluationRecordsOrError, + GrapheneSpecificPartitionAssetConditionEvaluation, +) from dagster_graphql.schema.auto_materialize_asset_evaluations import ( GrapheneAutoMaterializeAssetEvaluationRecordsOrError, ) @@ -513,6 +522,28 @@ class Meta: ), ) + assetConditionEvaluationForPartition = graphene.Field( + GrapheneSpecificPartitionAssetConditionEvaluation, + assetKey=graphene.Argument(graphene.NonNull(GrapheneAssetKeyInput)), + evaluationId=graphene.Argument(graphene.NonNull(graphene.Int)), + partition=graphene.Argument(graphene.NonNull(graphene.String)), + description="Retrieve the condition evaluation for an asset and partition.", + ) + + assetConditionEvaluationRecordsOrError = graphene.Field( + GrapheneAssetConditionEvaluationRecordsOrError, + assetKey=graphene.Argument(graphene.NonNull(GrapheneAssetKeyInput)), + limit=graphene.Argument(graphene.NonNull(graphene.Int)), + cursor=graphene.Argument(graphene.String), + description="Retrieve the condition evaluation records for an asset.", + ) + + assetConditionEvaluationsForEvaluationId = graphene.Field( + GrapheneAssetConditionEvaluationRecordsOrError, + evaluationId=graphene.Argument(graphene.NonNull(graphene.Int)), + description=("Retrieve the condition evaluation records for a given evaluation ID."), + ) + autoMaterializeTicks = graphene.Field( non_null_list(GrapheneInstigationTick), dayRange=graphene.Int(), @@ -1092,6 +1123,38 @@ def resolve_autoMaterializeEvaluationsForEvaluationId( graphene_info=graphene_info, evaluation_id=evaluationId ) + def resolve_assetConditionEvaluationForPartition( + self, + graphene_info: ResolveInfo, + assetKey: GrapheneAssetKeyInput, + evaluationId: int, + partition: str, + ): + return fetch_asset_condition_evaluation_record_for_partition( + graphene_info=graphene_info, + graphene_asset_key=assetKey, + evaluation_id=evaluationId, + partition_key=partition, + ) + + def resolve_assetConditionEvaluationRecordsOrError( + self, + graphene_info: ResolveInfo, + assetKey: GrapheneAssetKeyInput, + limit: int, + cursor: Optional[str] = None, + ): + return fetch_asset_condition_evaluation_records_for_asset_key( + graphene_info=graphene_info, graphene_asset_key=assetKey, cursor=cursor, limit=limit + ) + + def resolve_assetConditionEvaluationsForEvaluationId( + self, graphene_info: ResolveInfo, evaluationId: int + ): + return fetch_asset_condition_evaluation_records_for_evaluation_id( + graphene_info=graphene_info, evaluation_id=evaluationId + ) + def resolve_autoMaterializeTicks( self, graphene_info, diff --git a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_asset_condition_evaluations.py b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_asset_condition_evaluations.py new file mode 100644 index 0000000000000..d62d4b8524bcf --- /dev/null +++ b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_asset_condition_evaluations.py @@ -0,0 +1,895 @@ +from typing import Optional, Sequence +from unittest.mock import PropertyMock, patch + +import dagster._check as check +import pendulum +from dagster import AssetKey, RunRequest +from dagster._core.definitions.asset_condition import ( + AssetConditionEvaluation, + AssetConditionEvaluationWithRunIds, + AssetConditionSnapshot, + HistoricalAllPartitionsSubset, +) +from dagster._core.definitions.asset_daemon_cursor import AssetDaemonCursor +from dagster._core.definitions.asset_subset import AssetSubset +from dagster._core.definitions.auto_materialize_rule_evaluation import ( + deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids, +) +from dagster._core.definitions.partition import PartitionsDefinition, StaticPartitionsDefinition +from dagster._core.definitions.run_request import ( + InstigatorType, +) +from dagster._core.definitions.sensor_definition import SensorType +from dagster._core.host_representation.origin import ( + ExternalInstigatorOrigin, +) +from dagster._core.scheduler.instigation import ( + InstigatorState, + InstigatorStatus, + SensorInstigatorData, + TickData, + TickStatus, +) +from dagster._core.workspace.context import WorkspaceRequestContext +from dagster._daemon.asset_daemon import ( + _PRE_SENSOR_AUTO_MATERIALIZE_CURSOR_KEY, + _PRE_SENSOR_AUTO_MATERIALIZE_INSTIGATOR_NAME, + _PRE_SENSOR_AUTO_MATERIALIZE_ORIGIN_ID, + _PRE_SENSOR_AUTO_MATERIALIZE_SELECTOR_ID, +) +from dagster._serdes.serdes import serialize_value +from dagster_graphql.test.utils import execute_dagster_graphql, infer_repository + +from dagster_graphql_tests.graphql.graphql_context_test_suite import ( + ExecutingGraphQLContextTestMatrix, +) + +TICKS_QUERY = """ +query AssetDameonTicksQuery($dayRange: Int, $dayOffset: Int, $statuses: [InstigationTickStatus!], $limit: Int, $cursor: String, $beforeTimestamp: Float, $afterTimestamp: Float) { + autoMaterializeTicks(dayRange: $dayRange, dayOffset: $dayOffset, statuses: $statuses, limit: $limit, cursor: $cursor, beforeTimestamp: $beforeTimestamp, afterTimestamp: $afterTimestamp) { + id + timestamp + endTimestamp + status + requestedAssetKeys { + path + } + requestedMaterializationsForAssets { + assetKey { + path + } + partitionKeys + } + requestedAssetMaterializationCount + autoMaterializeAssetEvaluationId + } +} +""" + + +def _create_tick(instance, status, timestamp, evaluation_id, run_requests=None, end_timestamp=None): + return instance.create_tick( + TickData( + instigator_origin_id=_PRE_SENSOR_AUTO_MATERIALIZE_ORIGIN_ID, + instigator_name=_PRE_SENSOR_AUTO_MATERIALIZE_INSTIGATOR_NAME, + instigator_type=InstigatorType.AUTO_MATERIALIZE, + status=status, + timestamp=timestamp, + end_timestamp=end_timestamp, + selector_id=_PRE_SENSOR_AUTO_MATERIALIZE_SELECTOR_ID, + run_ids=[], + auto_materialize_evaluation_id=evaluation_id, + run_requests=run_requests, + ) + ) + + +class TestAutoMaterializeTicks(ExecutingGraphQLContextTestMatrix): + def test_get_tick_range(self, graphql_context): + result = execute_dagster_graphql( + graphql_context, + TICKS_QUERY, + variables={"dayRange": None, "dayOffset": None}, + ) + assert len(result.data["autoMaterializeTicks"]) == 0 + + now = pendulum.now("UTC") + end_timestamp = now.timestamp() + 20 + + success_1 = _create_tick( + graphql_context.instance, + TickStatus.SUCCESS, + now.timestamp(), + end_timestamp=end_timestamp, + evaluation_id=3, + run_requests=[ + RunRequest(asset_selection=[AssetKey("foo"), AssetKey("bar")], partition_key="abc"), + RunRequest(asset_selection=[AssetKey("bar")], partition_key="def"), + RunRequest(asset_selection=[AssetKey("baz")], partition_key=None), + ], + ) + + success_2 = _create_tick( + graphql_context.instance, + TickStatus.SUCCESS, + now.subtract(days=1, hours=1).timestamp(), + evaluation_id=2, + ) + + _create_tick( + graphql_context.instance, + TickStatus.SKIPPED, + now.subtract(days=2, hours=1).timestamp(), + evaluation_id=1, + ) + + result = execute_dagster_graphql( + graphql_context, + TICKS_QUERY, + variables={"dayRange": None, "dayOffset": None}, + ) + assert len(result.data["autoMaterializeTicks"]) == 3 + + result = execute_dagster_graphql( + graphql_context, + TICKS_QUERY, + variables={"dayRange": 1, "dayOffset": None}, + ) + assert len(result.data["autoMaterializeTicks"]) == 1 + tick = result.data["autoMaterializeTicks"][0] + assert tick["endTimestamp"] == end_timestamp + assert tick["autoMaterializeAssetEvaluationId"] == 3 + assert sorted(tick["requestedAssetKeys"], key=lambda x: x["path"][0]) == [ + {"path": ["bar"]}, + {"path": ["baz"]}, + {"path": ["foo"]}, + ] + + asset_materializations = tick["requestedMaterializationsForAssets"] + by_asset_key = { + AssetKey.from_coercible(mat["assetKey"]["path"]).to_user_string(): mat["partitionKeys"] + for mat in asset_materializations + } + + assert {key: sorted(val) for key, val in by_asset_key.items()} == { + "foo": ["abc"], + "bar": ["abc", "def"], + "baz": [], + } + + assert tick["requestedAssetMaterializationCount"] == 4 + + result = execute_dagster_graphql( + graphql_context, + TICKS_QUERY, + variables={ + "beforeTimestamp": success_2.timestamp + 1, + "afterTimestamp": success_2.timestamp - 1, + }, + ) + assert len(result.data["autoMaterializeTicks"]) == 1 + tick = result.data["autoMaterializeTicks"][0] + assert ( + tick["autoMaterializeAssetEvaluationId"] + == success_2.tick_data.auto_materialize_evaluation_id + ) + + result = execute_dagster_graphql( + graphql_context, + TICKS_QUERY, + variables={"dayRange": None, "dayOffset": None, "statuses": ["SUCCESS"]}, + ) + assert len(result.data["autoMaterializeTicks"]) == 2 + + result = execute_dagster_graphql( + graphql_context, + TICKS_QUERY, + variables={"dayRange": None, "dayOffset": None, "statuses": ["SUCCESS"], "limit": 1}, + ) + ticks = result.data["autoMaterializeTicks"] + assert len(ticks) == 1 + assert ticks[0]["timestamp"] == success_1.timestamp + assert ( + ticks[0]["autoMaterializeAssetEvaluationId"] + == success_1.tick_data.auto_materialize_evaluation_id + ) + + cursor = ticks[0]["id"] + + result = execute_dagster_graphql( + graphql_context, + TICKS_QUERY, + variables={ + "dayRange": None, + "dayOffset": None, + "statuses": ["SUCCESS"], + "limit": 1, + "cursor": cursor, + }, + ) + ticks = result.data["autoMaterializeTicks"] + assert len(ticks) == 1 + assert ticks[0]["timestamp"] == success_2.timestamp + + +FRAGMENTS = """ +fragment unpartitionedEvaluationFields on UnpartitionedAssetConditionEvaluation { + description + startTimestamp + endTimestamp + status +} + +fragment partitionedEvaluationFields on PartitionedAssetConditionEvaluation { + description + startTimestamp + endTimestamp + numTrue + numFalse + numSkipped + trueSubset { + subsetValue { + isPartitioned + partitionKeys + } + } + falseSubset { + subsetValue { + isPartitioned + partitionKeys + } + } +} + +fragment evaluationFields on AssetConditionEvaluation { + ... on UnpartitionedAssetConditionEvaluation { + ...unpartitionedEvaluationFields + childEvaluations { + ...unpartitionedEvaluationFields + childEvaluations { + ...unpartitionedEvaluationFields + childEvaluations { + ...unpartitionedEvaluationFields + childEvaluations { + ...unpartitionedEvaluationFields + } + } + } + } + } + ... on PartitionedAssetConditionEvaluation { + ...partitionedEvaluationFields + childEvaluations { + ...partitionedEvaluationFields + childEvaluations { + ...partitionedEvaluationFields + childEvaluations { + ...partitionedEvaluationFields + childEvaluations { + ...partitionedEvaluationFields + } + } + } + } + } +} +""" +QUERY = ( + FRAGMENTS + + """ +query GetEvaluationsQuery($assetKey: AssetKeyInput!, $limit: Int!, $cursor: String) { + assetNodeOrError(assetKey: $assetKey) { + ... on AssetNode { + currentAutoMaterializeEvaluationId + automationPolicySensor { + name + } + } + } + assetConditionEvaluationRecordsOrError(assetKey: $assetKey, limit: $limit, cursor: $cursor) { + ... on AssetConditionEvaluationRecords { + records { + id + numRequested + assetKey { + path + } + evaluation { + ...evaluationFields + } + } + } + } +} +""" +) + +QUERY_FOR_SPECIFIC_PARTITION = """ +fragment specificPartitionEvaluationFields on SpecificPartitionAssetConditionEvaluation { + description + status +} +query GetPartitionEvaluationQuery($assetKey: AssetKeyInput!, $partition: String!, $evaluationId: Int!) { + assetConditionEvaluationForPartition(assetKey: $assetKey, partition: $partition, evaluationId: $evaluationId) { + ...specificPartitionEvaluationFields + childEvaluations { + ...specificPartitionEvaluationFields + childEvaluations { + ...specificPartitionEvaluationFields + childEvaluations { + ...specificPartitionEvaluationFields + childEvaluations { + ...specificPartitionEvaluationFields + } + } + } + } + } +} +""" + +QUERY_FOR_EVALUATION_ID = ( + FRAGMENTS + + """ +query GetEvaluationsForEvaluationIdQuery($evaluationId: Int!) { + assetConditionEvaluationsForEvaluationId(evaluationId: $evaluationId) { + ... on AssetConditionEvaluationRecords { + records { + id + numRequested + assetKey { + path + } + evaluation { + ...evaluationFields + } + } + } + } +} +""" +) + + +class TestAutoMaterializeAssetEvaluations(ExecutingGraphQLContextTestMatrix): + def test_automation_policy_sensor(self, graphql_context: WorkspaceRequestContext): + sensor_origin = ExternalInstigatorOrigin( + external_repository_origin=infer_repository(graphql_context).get_external_origin(), + instigator_name="my_automation_policy_sensor", + ) + + check.not_none(graphql_context.instance.schedule_storage).add_instigator_state( + InstigatorState( + sensor_origin, + InstigatorType.SENSOR, + status=InstigatorStatus.RUNNING, + instigator_data=SensorInstigatorData( + sensor_type=SensorType.AUTOMATION_POLICY, + cursor=serialize_value(AssetDaemonCursor.empty(12345)), + ), + ) + ) + + with patch( + "dagster._core.instance.DagsterInstance.auto_materialize_use_automation_policy_sensors", + new_callable=PropertyMock, + ) as mock_my_property: + mock_my_property.return_value = True + + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={ + "assetKey": {"path": ["fresh_diamond_bottom"]}, + "limit": 10, + "cursor": None, + }, + ) + assert ( + results.data["assetNodeOrError"]["automationPolicySensor"]["name"] + == "my_automation_policy_sensor" + ) + assert results.data["assetNodeOrError"]["currentAutoMaterializeEvaluationId"] == 12345 + + def test_get_historic_rules_without_evaluation_data( + self, graphql_context: WorkspaceRequestContext + ): + evaluation1 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_one"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": null, "run_ids": {"__set__": []}}', + None, + ) + evaluation2 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_two"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": [{"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}], "run_ids": {"__set__": []}}', + None, + ) + check.not_none( + graphql_context.instance.schedule_storage + ).add_auto_materialize_asset_evaluations( + evaluation_id=10, asset_evaluations=[evaluation1, evaluation2] + ) + + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={"assetKey": {"path": ["asset_one"]}, "limit": 10, "cursor": None}, + ) + assert len(results.data["assetConditionEvaluationRecordsOrError"]["records"]) == 1 + asset_one_record = results.data["assetConditionEvaluationRecordsOrError"]["records"][0] + assert asset_one_record["assetKey"] == {"path": ["asset_one"]} + assert asset_one_record["evaluation"]["status"] == "SKIPPED" + + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={"assetKey": {"path": ["asset_two"]}, "limit": 10, "cursor": None}, + ) + assert len(results.data["assetConditionEvaluationRecordsOrError"]["records"]) == 1 + asset_two_record = results.data["assetConditionEvaluationRecordsOrError"]["records"][0] + assert asset_two_record["evaluation"]["description"] == "All of" + assert asset_two_record["evaluation"]["status"] == "SKIPPED" + asset_two_children = asset_two_record["evaluation"]["childEvaluations"] + assert len(asset_two_children) == 2 + assert asset_two_children[0]["description"] == "Any of" + assert asset_two_children[0]["status"] == "SKIPPED" + assert ( + asset_two_children[0]["childEvaluations"][0]["description"] + == "materialization is missing" + ) + + results = execute_dagster_graphql( + graphql_context, + QUERY_FOR_EVALUATION_ID, + variables={"evaluationId": 10}, + ) + + records = results.data["assetConditionEvaluationsForEvaluationId"]["records"] + + assert len(records) == 2 + + # record from both previous queries are contained here + assert any(record == asset_one_record for record in records) + assert any(record == asset_two_record for record in records) + + results = execute_dagster_graphql( + graphql_context, + QUERY_FOR_EVALUATION_ID, + variables={"evaluationId": 12345}, + ) + + records = results.data["assetConditionEvaluationsForEvaluationId"]["records"] + assert len(records) == 0 + + def test_get_historic_evaluation_with_evaluation_data( + self, graphql_context: WorkspaceRequestContext + ): + evaluation = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["upstream_static_partitioned_asset"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["blah"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnRequiredButNonexistentParentsRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "required parent partitions do not exist"}}, {"__class__": "SerializedPartitionsSubset", "serialized_partitions_def_class_name": "StaticPartitionsDefinition", "serialized_partitions_def_unique_id": "7c2047f8b02e90a69136c1a657bd99ad80b433a2", "serialized_subset": "{\\"version\\": 1, \\"subset\\": [\\"a\\"]}"}]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + StaticPartitionsDefinition(["a", "b", "c", "d", "e", "f"]), + ) + check.not_none( + graphql_context.instance.schedule_storage + ).add_auto_materialize_asset_evaluations( + evaluation_id=10, + asset_evaluations=[evaluation], + ) + + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={ + "assetKey": {"path": ["upstream_static_partitioned_asset"]}, + "limit": 10, + "cursor": None, + }, + ) + + records = results.data["assetConditionEvaluationRecordsOrError"]["records"] + assert len(records) == 1 + evaluation = records[0]["evaluation"] + assert evaluation["numTrue"] == 0 + assert evaluation["numFalse"] == 6 + assert evaluation["numSkipped"] == 0 + assert len(evaluation["childEvaluations"]) == 2 + not_skip_evaluation = evaluation["childEvaluations"][1] + assert not_skip_evaluation["description"] == "Not" + assert not_skip_evaluation["numTrue"] == 1 + assert len(not_skip_evaluation["childEvaluations"]) == 1 + assert not_skip_evaluation["childEvaluations"][0]["description"] == "Any of" + assert len(not_skip_evaluation["childEvaluations"][0]["childEvaluations"]) == 2 + + def test_get_evaluations(self, graphql_context: WorkspaceRequestContext): + evaluation1 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_one"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 0, "partition_subsets_by_condition": [], "rule_snapshots": null, "run_ids": {"__set__": []}}', + None, + ) + evaluation2 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_two"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": null, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnMissingRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "materialization is missing"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + None, + ) + evaluation3 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_three"]}, "num_discarded": 0, "num_requested": 0, "num_skipped": 1, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "WaitingOnAssetsRuleEvaluationData", "waiting_on_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_two"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "SkipOnParentOutdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.SKIP"}, "description": "waiting on upstream data to be up to date"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + None, + ) + evaluation4 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( + '{"__class__": "AutoMaterializeAssetEvaluation", "asset_key": {"__class__": "AssetKey", "path": ["asset_four"]}, "num_discarded": 0, "num_requested": 1, "num_skipped": 0, "partition_subsets_by_condition": [[{"__class__": "AutoMaterializeRuleEvaluation", "evaluation_data": {"__class__": "ParentUpdatedRuleEvaluationData", "updated_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_two"]}]}, "will_update_asset_keys": {"__frozenset__": [{"__class__": "AssetKey", "path": ["asset_three"]}]}}, "rule_snapshot": {"__class__": "AutoMaterializeRuleSnapshot", "class_name": "MaterializeOnParentUpdatedRule", "decision_type": {"__enum__": "AutoMaterializeDecisionType.MATERIALIZE"}, "description": "upstream data has changed since latest materialization"}}, null]], "rule_snapshots": null, "run_ids": {"__set__": []}}', + None, + ) + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={"assetKey": {"path": ["foo"]}, "limit": 10, "cursor": None}, + ) + assert results.data == { + "autoMaterializeAssetEvaluationsOrError": {"records": []}, + "assetNodeOrError": { + "currentAutoMaterializeEvaluationId": None, + "automationPolicySensor": None, + }, + } + + check.not_none( + graphql_context.instance.schedule_storage + ).add_auto_materialize_asset_evaluations( + evaluation_id=10, asset_evaluations=[evaluation1, evaluation2, evaluation3, evaluation4] + ) + + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={"assetKey": {"path": ["asset_one"]}, "limit": 10, "cursor": None}, + ) + assert results.data == { + "assetNodeOrError": { + "currentAutoMaterializeEvaluationId": None, + "automationPolicySensor": None, + }, + "autoMaterializeAssetEvaluationsOrError": { + "records": [ + { + "numRequested": 0, + "numSkipped": 0, + "numDiscarded": 0, + "rules": [], + "rulesWithRuleEvaluations": [], + "assetKey": {"path": ["asset_one"]}, + } + ], + }, + } + + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={"assetKey": {"path": ["asset_two"]}, "limit": 10, "cursor": None}, + ) + assert results.data == { + "assetNodeOrError": { + "currentAutoMaterializeEvaluationId": None, + "automationPolicySensor": None, + }, + "autoMaterializeAssetEvaluationsOrError": { + "records": [ + { + "numRequested": 1, + "numSkipped": 0, + "numDiscarded": 0, + "rulesWithRuleEvaluations": [ + { + "rule": {"decisionType": "MATERIALIZE"}, + "ruleEvaluations": [ + { + "evaluationData": None, + "partitionKeysOrError": None, + } + ], + } + ], + } + ], + }, + } + + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={"assetKey": {"path": ["asset_three"]}, "limit": 10, "cursor": None}, + ) + assert results.data == { + "assetNodeOrError": { + "currentAutoMaterializeEvaluationId": None, + "automationPolicySensor": None, + }, + "autoMaterializeAssetEvaluationsOrError": { + "records": [ + { + "numRequested": 0, + "numSkipped": 1, + "numDiscarded": 0, + "rulesWithRuleEvaluations": [ + { + "rule": {"decisionType": "SKIP"}, + "ruleEvaluations": [ + { + "evaluationData": { + "waitingOnAssetKeys": [{"path": ["asset_two"]}], + }, + "partitionKeysOrError": None, + } + ], + } + ], + } + ], + }, + } + + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={"assetKey": {"path": ["asset_four"]}, "limit": 10, "cursor": None}, + ) + assert results.data == { + "assetNodeOrError": { + "currentAutoMaterializeEvaluationId": None, + "automationPolicySensor": None, + }, + "autoMaterializeAssetEvaluationsOrError": { + "records": [ + { + "numRequested": 1, + "numSkipped": 0, + "numDiscarded": 0, + "rulesWithRuleEvaluations": [ + { + "rule": {"decisionType": "MATERIALIZE"}, + "ruleEvaluations": [ + { + "evaluationData": { + "updatedAssetKeys": [{"path": ["asset_two"]}], + "willUpdateAssetKeys": [{"path": ["asset_three"]}], + }, + "partitionKeysOrError": None, + } + ], + } + ], + } + ], + }, + } + + def _get_condition_evaluation( + self, + asset_key: AssetKey, + description: str, + partitions_def: PartitionsDefinition, + true_partition_keys: Sequence[str], + candidate_partition_keys: Optional[Sequence[str]] = None, + child_evaluations: Optional[Sequence[AssetConditionEvaluation]] = None, + ) -> AssetConditionEvaluation: + return AssetConditionEvaluation( + condition_snapshot=AssetConditionSnapshot("...", description, "a1b2"), + true_subset=AssetSubset( + asset_key=asset_key, + value=partitions_def.subset_with_partition_keys(true_partition_keys), + ), + candidate_subset=AssetSubset( + asset_key=asset_key, + value=partitions_def.subset_with_partition_keys(candidate_partition_keys), + ) + if candidate_partition_keys + else HistoricalAllPartitionsSubset(), + start_timestamp=123, + end_timestamp=456, + child_evaluations=child_evaluations or [], + ) + + def test_get_evaluations_with_partitions(self, graphql_context: WorkspaceRequestContext): + asset_key = AssetKey("upstream_static_partitioned_asset") + partitions_def = StaticPartitionsDefinition(["a", "b", "c", "d", "e", "f"]) + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={ + "assetKey": {"path": ["upstream_static_partitioned_asset"]}, + "limit": 10, + "cursor": None, + }, + ) + assert results.data == { + "assetNodeOrError": { + "currentAutoMaterializeEvaluationId": None, + "automationPolicySensor": None, + }, + "assetConditionEvaluationRecordsOrError": {"records": []}, + } + + evaluation = self._get_condition_evaluation( + asset_key, + "All of", + partitions_def, + ["a", "b"], + child_evaluations=[ + self._get_condition_evaluation( + asset_key, + "Any of", + partitions_def, + ["a", "b", "c"], + child_evaluations=[ + self._get_condition_evaluation( + asset_key, "parent_updated", partitions_def, ["a", "c"] + ), + self._get_condition_evaluation(asset_key, "missing", partitions_def, ["b"]), + self._get_condition_evaluation(asset_key, "other", partitions_def, []), + ], + ), + self._get_condition_evaluation( + asset_key, + "Not", + partitions_def, + ["a", "b"], + candidate_partition_keys=["a", "b", "c"], + child_evaluations=[ + self._get_condition_evaluation( + asset_key, + "Any of", + partitions_def, + ["c"], + ["a", "b", "c"], + child_evaluations=[ + self._get_condition_evaluation( + asset_key, + "parent missing", + partitions_def, + ["c"], + ["a", "b", "c"], + ), + self._get_condition_evaluation( + asset_key, + "parent outdated", + partitions_def, + [], + ["a", "b", "c"], + ), + ], + ), + ], + ), + ], + ) + + check.not_none( + graphql_context.instance.schedule_storage + ).add_auto_materialize_asset_evaluations( + evaluation_id=10, + asset_evaluations=[ + AssetConditionEvaluationWithRunIds(evaluation, frozenset({"runid1", "runid2"})) + ], + ) + + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={ + "assetKey": {"path": ["upstream_static_partitioned_asset"]}, + "limit": 10, + "cursor": None, + }, + ) + + records = results.data["assetConditionEvaluationRecordsOrError"]["records"] + assert len(records) == 1 + + assert records[0]["numRequested"] == 2 + evaluation = records[0]["evaluation"] + assert evaluation["description"] == "All of" + assert evaluation["numTrue"] == 2 + assert evaluation["numFalse"] == 4 + assert evaluation["numSkipped"] == 0 + assert set(evaluation["trueSubset"]["subsetValue"]["partitionKeys"]) == {"a", "b"} + assert len(evaluation["childEvaluations"]) == 2 + + not_evaluation = evaluation["childEvaluations"][1] + assert not_evaluation["description"] == "Not" + assert not_evaluation["numTrue"] == 2 + assert not_evaluation["numFalse"] == 1 + assert not_evaluation["numSkipped"] == 3 + assert set(not_evaluation["trueSubset"]["subsetValue"]["partitionKeys"]) == {"a", "b"} + + skip_evaluation = not_evaluation["childEvaluations"][0] + assert skip_evaluation["description"] == "Any of" + assert skip_evaluation["numTrue"] == 1 + assert skip_evaluation["numFalse"] == 2 + assert skip_evaluation["numSkipped"] == 3 + assert set(skip_evaluation["trueSubset"]["subsetValue"]["partitionKeys"]) == {"c"} + + # test one of the true partitions + specific_result = execute_dagster_graphql( + graphql_context, + QUERY_FOR_SPECIFIC_PARTITION, + variables={ + "assetKey": {"path": ["upstream_static_partitioned_asset"]}, + "partition": "b", + "evaluationId": 10, + }, + ) + + evaluation = specific_result.data["assetConditionEvaluationForPartition"] + assert evaluation["description"] == "All of" + assert evaluation["status"] == "TRUE" + assert len(evaluation["childEvaluations"]) == 2 + + not_evaluation = evaluation["childEvaluations"][1] + assert not_evaluation["description"] == "Not" + assert not_evaluation["status"] == "TRUE" + + skip_evaluation = not_evaluation["childEvaluations"][0] + assert skip_evaluation["description"] == "Any of" + assert skip_evaluation["status"] == "FALSE" + + # test one of the false partitions + specific_result = execute_dagster_graphql( + graphql_context, + QUERY_FOR_SPECIFIC_PARTITION, + variables={ + "assetKey": {"path": ["upstream_static_partitioned_asset"]}, + "partition": "d", + "evaluationId": 10, + }, + ) + + evaluation = specific_result.data["assetConditionEvaluationForPartition"] + assert evaluation["description"] == "All of" + assert evaluation["status"] == "FALSE" + assert len(evaluation["childEvaluations"]) == 2 + + not_evaluation = evaluation["childEvaluations"][1] + assert not_evaluation["description"] == "Not" + assert not_evaluation["status"] == "SKIPPED" + + skip_evaluation = not_evaluation["childEvaluations"][0] + assert skip_evaluation["description"] == "Any of" + assert skip_evaluation["status"] == "SKIPPED" + + def _test_current_evaluation_id(self, graphql_context: WorkspaceRequestContext): + graphql_context.instance.daemon_cursor_storage.set_cursor_values( + {_PRE_SENSOR_AUTO_MATERIALIZE_CURSOR_KEY: serialize_value(AssetDaemonCursor.empty(0))} + ) + + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={"assetKey": {"path": ["asset_two"]}, "limit": 10, "cursor": None}, + ) + assert results.data == { + "assetNodeOrError": { + "currentAutoMaterializeEvaluationId": 0, + "automationPolicySensor": None, + }, + "autoMaterializeAssetEvaluationsOrError": { + "records": [], + }, + } + + graphql_context.instance.daemon_cursor_storage.set_cursor_values( + { + _PRE_SENSOR_AUTO_MATERIALIZE_CURSOR_KEY: ( + serialize_value(AssetDaemonCursor.empty(0).with_updates(0, 1.0, [], [])) + ) + } + ) + + results = execute_dagster_graphql( + graphql_context, + QUERY, + variables={"assetKey": {"path": ["asset_two"]}, "limit": 10, "cursor": None}, + ) + assert results.data == { + "assetNodeOrError": { + "currentAutoMaterializeEvaluationId": 42, + "automationPolicySensor": None, + }, + "autoMaterializeAssetEvaluationsOrError": { + "records": [], + }, + } diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index 8abcb8ddc7918..b3fd30e875665 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -311,6 +311,7 @@ def _get_child_decision_type_evaluation( rule_snapshot, ) for rule_snapshot in rule_snapshots + or set(partition_subsets_by_condition_by_rule_snapshot.keys()) if rule_snapshot.decision_type == decision_type ] From 78a294b8df91ca1ef2477ac834e9f756995da6e6 Mon Sep 17 00:00:00 2001 From: Owen Kephart Date: Wed, 3 Jan 2024 10:11:17 -0500 Subject: [PATCH 19/56] Return tree instead of recursive resolver --- .../src/graphql/possibleTypes.generated.json | 2 +- .../ui-core/src/graphql/schema.graphql | 89 +++--- .../packages/ui-core/src/graphql/types.ts | 153 ++++++---- .../fetch_asset_condition_evaluations.py | 9 +- .../schema/asset_condition_evaluations.py | 164 ++++++----- .../dagster_graphql/schema/roots/query.py | 4 +- .../test_asset_condition_evaluations.py | 266 +++++++++--------- .../_core/definitions/asset_condition.py | 5 +- .../_core/definitions/asset_daemon_cursor.py | 2 +- .../auto_materialize_rule_evaluation.py | 30 +- 10 files changed, 395 insertions(+), 329 deletions(-) diff --git a/js_modules/dagster-ui/packages/ui-core/src/graphql/possibleTypes.generated.json b/js_modules/dagster-ui/packages/ui-core/src/graphql/possibleTypes.generated.json index 8a37a392790a0..4d529a469d3a4 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/graphql/possibleTypes.generated.json +++ b/js_modules/dagster-ui/packages/ui-core/src/graphql/possibleTypes.generated.json @@ -1 +1 @@ -{"DisplayableEvent":["EngineEvent","ExecutionStepOutputEvent","ExpectationResult","FailureMetadata","HandledOutputEvent","LoadedInputEvent","ObjectStoreOperationResult","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","TypeCheck"],"MarkerEvent":["EngineEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent"],"ErrorEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepUpForRetryEvent","HookErroredEvent","RunFailureEvent","ResourceInitFailureEvent"],"MessageEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepRestartEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","HandledOutputEvent","HookCompletedEvent","HookErroredEvent","HookSkippedEvent","LoadedInputEvent","LogMessageEvent","ObjectStoreOperationEvent","RunCanceledEvent","RunCancelingEvent","RunDequeuedEvent","RunEnqueuedEvent","RunFailureEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","RunStartEvent","RunStartingEvent","RunSuccessEvent","StepExpectationResultEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","AssetMaterializationPlannedEvent","LogsCapturedEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"RunEvent":["RunCanceledEvent","RunCancelingEvent","RunDequeuedEvent","RunEnqueuedEvent","RunFailureEvent","RunStartEvent","RunStartingEvent","RunSuccessEvent","AssetMaterializationPlannedEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent"],"PipelineRunStepStats":["RunStepStats"],"StepEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepRestartEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","HandledOutputEvent","HookCompletedEvent","HookErroredEvent","HookSkippedEvent","LoadedInputEvent","ObjectStoreOperationEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepExpectationResultEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"AssetPartitionStatuses":["DefaultPartitionStatuses","MultiPartitionStatuses","TimePartitionStatuses"],"PartitionStatus1D":["TimePartitionStatuses","DefaultPartitionStatuses"],"AssetChecksOrError":["AssetChecks","AssetCheckNeedsMigrationError","AssetCheckNeedsUserCodeUpgrade","AssetCheckNeedsAgentUpgradeError"],"Instigator":["Schedule","Sensor"],"EvaluationStackEntry":["EvaluationStackListItemEntry","EvaluationStackPathEntry","EvaluationStackMapKeyEntry","EvaluationStackMapValueEntry"],"IPipelineSnapshot":["Pipeline","PipelineSnapshot","Job"],"PipelineConfigValidationError":["FieldNotDefinedConfigError","FieldsNotDefinedConfigError","MissingFieldConfigError","MissingFieldsConfigError","RuntimeMismatchConfigError","SelectorTypeConfigError"],"PipelineConfigValidationInvalid":["RunConfigValidationInvalid"],"PipelineConfigValidationResult":["InvalidSubsetError","PipelineConfigValidationValid","RunConfigValidationInvalid","PipelineNotFoundError","PythonError"],"PipelineReference":["PipelineSnapshot","UnknownPipeline"],"PipelineRun":["Run"],"DagsterRunEvent":["ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","ExecutionStepRestartEvent","LogMessageEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","RunFailureEvent","RunStartEvent","RunEnqueuedEvent","RunDequeuedEvent","RunStartingEvent","RunCancelingEvent","RunCanceledEvent","RunSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","HandledOutputEvent","LoadedInputEvent","LogsCapturedEvent","ObjectStoreOperationEvent","StepExpectationResultEvent","MaterializationEvent","ObservationEvent","EngineEvent","HookCompletedEvent","HookSkippedEvent","HookErroredEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent","AssetMaterializationPlannedEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"PipelineRunLogsSubscriptionPayload":["PipelineRunLogsSubscriptionSuccess","PipelineRunLogsSubscriptionFailure"],"RunOrError":["Run","RunNotFoundError","PythonError"],"PipelineRunStatsSnapshot":["RunStatsSnapshot"],"RunStatsSnapshotOrError":["RunStatsSnapshot","PythonError"],"PipelineSnapshotOrError":["PipelineNotFoundError","PipelineSnapshot","PipelineSnapshotNotFoundError","PythonError"],"AssetOrError":["Asset","AssetNotFoundError"],"AssetsOrError":["AssetConnection","PythonError"],"DeletePipelineRunResult":["DeletePipelineRunSuccess","UnauthorizedError","PythonError","RunNotFoundError"],"ExecutionPlanOrError":["ExecutionPlan","RunConfigValidationInvalid","PipelineNotFoundError","InvalidSubsetError","PythonError"],"PipelineOrError":["Pipeline","PipelineNotFoundError","InvalidSubsetError","PythonError"],"ReloadRepositoryLocationMutationResult":["WorkspaceLocationEntry","ReloadNotSupported","RepositoryLocationNotFound","UnauthorizedError","PythonError"],"RepositoryLocationOrLoadError":["RepositoryLocation","PythonError"],"ReloadWorkspaceMutationResult":["Workspace","UnauthorizedError","PythonError"],"ShutdownRepositoryLocationMutationResult":["ShutdownRepositoryLocationSuccess","RepositoryLocationNotFound","UnauthorizedError","PythonError"],"TerminatePipelineExecutionFailure":["TerminateRunFailure"],"TerminatePipelineExecutionSuccess":["TerminateRunSuccess"],"TerminateRunResult":["TerminateRunSuccess","TerminateRunFailure","RunNotFoundError","UnauthorizedError","PythonError"],"ScheduleMutationResult":["PythonError","UnauthorizedError","ScheduleStateResult"],"ScheduleOrError":["Schedule","ScheduleNotFoundError","PythonError"],"SchedulerOrError":["Scheduler","SchedulerNotDefinedError","PythonError"],"SchedulesOrError":["Schedules","RepositoryNotFoundError","PythonError"],"ScheduleTickSpecificData":["ScheduleTickSuccessData","ScheduleTickFailureData"],"LaunchBackfillResult":["LaunchBackfillSuccess","PartitionSetNotFoundError","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"ConfigTypeOrError":["EnumConfigType","CompositeConfigType","RegularConfigType","PipelineNotFoundError","ConfigTypeNotFoundError","PythonError"],"ConfigType":["ArrayConfigType","CompositeConfigType","EnumConfigType","NullableConfigType","RegularConfigType","ScalarUnionConfigType","MapConfigType"],"WrappingConfigType":["ArrayConfigType","NullableConfigType"],"DagsterType":["ListDagsterType","NullableDagsterType","RegularDagsterType"],"DagsterTypeOrError":["RegularDagsterType","PipelineNotFoundError","DagsterTypeNotFoundError","PythonError"],"WrappingDagsterType":["ListDagsterType","NullableDagsterType"],"Error":["AssetCheckNeedsMigrationError","AssetCheckNeedsUserCodeUpgrade","AssetCheckNeedsAgentUpgradeError","AssetNotFoundError","ConflictingExecutionParamsError","ConfigTypeNotFoundError","DagsterTypeNotFoundError","InvalidPipelineRunsFilterError","InvalidSubsetError","ModeNotFoundError","NoModeProvidedError","PartitionSetNotFoundError","PipelineNotFoundError","RunConflict","PipelineSnapshotNotFoundError","PresetNotFoundError","PythonError","ErrorChainLink","UnauthorizedError","ReloadNotSupported","RepositoryLocationNotFound","RepositoryNotFoundError","ResourceNotFoundError","RunGroupNotFoundError","RunNotFoundError","ScheduleNotFoundError","SchedulerNotDefinedError","SensorNotFoundError","DuplicateDynamicPartitionError","InstigationStateNotFoundError","SolidStepStatusUnavailableError","GraphNotFoundError","BackfillNotFoundError","PartitionSubsetDeserializationError","AutoMaterializeAssetEvaluationNeedsMigrationError"],"PipelineRunConflict":["RunConflict"],"PipelineRunNotFoundError":["RunNotFoundError"],"RepositoriesOrError":["RepositoryConnection","RepositoryNotFoundError","PythonError"],"RepositoryOrError":["PythonError","Repository","RepositoryNotFoundError"],"InstigationTypeSpecificData":["SensorData","ScheduleData"],"InstigationStateOrError":["InstigationState","InstigationStateNotFoundError","PythonError"],"InstigationStatesOrError":["InstigationStates","PythonError"],"MetadataEntry":["TableSchemaMetadataEntry","TableMetadataEntry","FloatMetadataEntry","IntMetadataEntry","JsonMetadataEntry","BoolMetadataEntry","MarkdownMetadataEntry","PathMetadataEntry","NotebookMetadataEntry","PythonArtifactMetadataEntry","TextMetadataEntry","UrlMetadataEntry","PipelineRunMetadataEntry","AssetMetadataEntry","JobMetadataEntry","NullMetadataEntry"],"PartitionRunConfigOrError":["PartitionRunConfig","PythonError"],"AssetBackfillStatus":["AssetPartitionsStatusCounts","UnpartitionedAssetStatus"],"PartitionSetOrError":["PartitionSet","PartitionSetNotFoundError","PythonError"],"PartitionSetsOrError":["PartitionSets","PipelineNotFoundError","PythonError"],"PartitionsOrError":["Partitions","PythonError"],"PartitionStatusesOrError":["PartitionStatuses","PythonError"],"PartitionTagsOrError":["PartitionTags","PythonError"],"RunConfigSchemaOrError":["RunConfigSchema","PipelineNotFoundError","InvalidSubsetError","ModeNotFoundError","PythonError"],"LaunchRunResult":["LaunchRunSuccess","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"LaunchRunReexecutionResult":["LaunchRunSuccess","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"LaunchPipelineRunSuccess":["LaunchRunSuccess"],"RunsOrError":["Runs","InvalidPipelineRunsFilterError","PythonError"],"PipelineRuns":["Runs"],"RunGroupOrError":["RunGroup","RunGroupNotFoundError","PythonError"],"SensorOrError":["Sensor","SensorNotFoundError","UnauthorizedError","PythonError"],"SensorsOrError":["Sensors","RepositoryNotFoundError","PythonError"],"StopSensorMutationResultOrError":["StopSensorMutationResult","UnauthorizedError","PythonError"],"ISolidDefinition":["CompositeSolidDefinition","SolidDefinition"],"SolidContainer":["Pipeline","PipelineSnapshot","Job","CompositeSolidDefinition","Graph"],"SolidStepStatsOrError":["SolidStepStatsConnection","SolidStepStatusUnavailableError"],"WorkspaceOrError":["Workspace","PythonError"],"WorkspaceLocationStatusEntriesOrError":["WorkspaceLocationStatusEntries","PythonError"],"GraphOrError":["Graph","GraphNotFoundError","PythonError"],"ResourceDetailsOrError":["ResourceDetails","ResourceNotFoundError","PythonError"],"ResourcesOrError":["ResourceDetailsList","RepositoryNotFoundError","PythonError"],"EnvVarWithConsumersOrError":["EnvVarWithConsumersList","PythonError"],"RunTagKeysOrError":["PythonError","RunTagKeys"],"RunTagsOrError":["PythonError","RunTags"],"RunIdsOrError":["RunIds","InvalidPipelineRunsFilterError","PythonError"],"AssetNodeOrError":["AssetNode","AssetNotFoundError"],"PartitionBackfillOrError":["PartitionBackfill","BackfillNotFoundError","PythonError"],"PartitionBackfillsOrError":["PartitionBackfills","PythonError"],"EventConnectionOrError":["EventConnection","RunNotFoundError","PythonError"],"AutoMaterializeAssetEvaluationRecordsOrError":["AutoMaterializeAssetEvaluationRecords","AutoMaterializeAssetEvaluationNeedsMigrationError"],"PartitionKeysOrError":["PartitionKeys","PartitionSubsetDeserializationError"],"AutoMaterializeRuleEvaluationData":["TextRuleEvaluationData","ParentMaterializedRuleEvaluationData","WaitingOnKeysRuleEvaluationData"],"AssetConditionEvaluationRecordsOrError":["AssetConditionEvaluationRecords","AutoMaterializeAssetEvaluationNeedsMigrationError"],"AssetConditionEvaluation":["UnpartitionedAssetConditionEvaluation","PartitionedAssetConditionEvaluation","SpecificPartitionAssetConditionEvaluation"],"SensorDryRunResult":["PythonError","SensorNotFoundError","DryRunInstigationTick"],"ScheduleDryRunResult":["DryRunInstigationTick","PythonError","ScheduleNotFoundError"],"TerminateRunsResultOrError":["TerminateRunsResult","PythonError"],"AssetWipeMutationResult":["AssetNotFoundError","UnauthorizedError","PythonError","AssetWipeSuccess"],"ReportRunlessAssetEventsResult":["UnauthorizedError","PythonError","ReportRunlessAssetEventsSuccess"],"ResumeBackfillResult":["ResumeBackfillSuccess","UnauthorizedError","PythonError"],"CancelBackfillResult":["CancelBackfillSuccess","UnauthorizedError","PythonError"],"LogTelemetryMutationResult":["LogTelemetrySuccess","PythonError"],"AddDynamicPartitionResult":["AddDynamicPartitionSuccess","UnauthorizedError","PythonError","DuplicateDynamicPartitionError"]} \ No newline at end of file +{"DisplayableEvent":["EngineEvent","ExecutionStepOutputEvent","ExpectationResult","FailureMetadata","HandledOutputEvent","LoadedInputEvent","ObjectStoreOperationResult","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","TypeCheck"],"MarkerEvent":["EngineEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent"],"ErrorEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepUpForRetryEvent","HookErroredEvent","RunFailureEvent","ResourceInitFailureEvent"],"MessageEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepRestartEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","HandledOutputEvent","HookCompletedEvent","HookErroredEvent","HookSkippedEvent","LoadedInputEvent","LogMessageEvent","ObjectStoreOperationEvent","RunCanceledEvent","RunCancelingEvent","RunDequeuedEvent","RunEnqueuedEvent","RunFailureEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","RunStartEvent","RunStartingEvent","RunSuccessEvent","StepExpectationResultEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","AssetMaterializationPlannedEvent","LogsCapturedEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"RunEvent":["RunCanceledEvent","RunCancelingEvent","RunDequeuedEvent","RunEnqueuedEvent","RunFailureEvent","RunStartEvent","RunStartingEvent","RunSuccessEvent","AssetMaterializationPlannedEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent"],"PipelineRunStepStats":["RunStepStats"],"StepEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepRestartEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","HandledOutputEvent","HookCompletedEvent","HookErroredEvent","HookSkippedEvent","LoadedInputEvent","ObjectStoreOperationEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepExpectationResultEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"AssetPartitionStatuses":["DefaultPartitionStatuses","MultiPartitionStatuses","TimePartitionStatuses"],"PartitionStatus1D":["TimePartitionStatuses","DefaultPartitionStatuses"],"AssetChecksOrError":["AssetChecks","AssetCheckNeedsMigrationError","AssetCheckNeedsUserCodeUpgrade","AssetCheckNeedsAgentUpgradeError"],"Instigator":["Schedule","Sensor"],"EvaluationStackEntry":["EvaluationStackListItemEntry","EvaluationStackPathEntry","EvaluationStackMapKeyEntry","EvaluationStackMapValueEntry"],"IPipelineSnapshot":["Pipeline","PipelineSnapshot","Job"],"PipelineConfigValidationError":["FieldNotDefinedConfigError","FieldsNotDefinedConfigError","MissingFieldConfigError","MissingFieldsConfigError","RuntimeMismatchConfigError","SelectorTypeConfigError"],"PipelineConfigValidationInvalid":["RunConfigValidationInvalid"],"PipelineConfigValidationResult":["InvalidSubsetError","PipelineConfigValidationValid","RunConfigValidationInvalid","PipelineNotFoundError","PythonError"],"PipelineReference":["PipelineSnapshot","UnknownPipeline"],"PipelineRun":["Run"],"DagsterRunEvent":["ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","ExecutionStepRestartEvent","LogMessageEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","RunFailureEvent","RunStartEvent","RunEnqueuedEvent","RunDequeuedEvent","RunStartingEvent","RunCancelingEvent","RunCanceledEvent","RunSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","HandledOutputEvent","LoadedInputEvent","LogsCapturedEvent","ObjectStoreOperationEvent","StepExpectationResultEvent","MaterializationEvent","ObservationEvent","EngineEvent","HookCompletedEvent","HookSkippedEvent","HookErroredEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent","AssetMaterializationPlannedEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"PipelineRunLogsSubscriptionPayload":["PipelineRunLogsSubscriptionSuccess","PipelineRunLogsSubscriptionFailure"],"RunOrError":["Run","RunNotFoundError","PythonError"],"PipelineRunStatsSnapshot":["RunStatsSnapshot"],"RunStatsSnapshotOrError":["RunStatsSnapshot","PythonError"],"PipelineSnapshotOrError":["PipelineNotFoundError","PipelineSnapshot","PipelineSnapshotNotFoundError","PythonError"],"AssetOrError":["Asset","AssetNotFoundError"],"AssetsOrError":["AssetConnection","PythonError"],"DeletePipelineRunResult":["DeletePipelineRunSuccess","UnauthorizedError","PythonError","RunNotFoundError"],"ExecutionPlanOrError":["ExecutionPlan","RunConfigValidationInvalid","PipelineNotFoundError","InvalidSubsetError","PythonError"],"PipelineOrError":["Pipeline","PipelineNotFoundError","InvalidSubsetError","PythonError"],"ReloadRepositoryLocationMutationResult":["WorkspaceLocationEntry","ReloadNotSupported","RepositoryLocationNotFound","UnauthorizedError","PythonError"],"RepositoryLocationOrLoadError":["RepositoryLocation","PythonError"],"ReloadWorkspaceMutationResult":["Workspace","UnauthorizedError","PythonError"],"ShutdownRepositoryLocationMutationResult":["ShutdownRepositoryLocationSuccess","RepositoryLocationNotFound","UnauthorizedError","PythonError"],"TerminatePipelineExecutionFailure":["TerminateRunFailure"],"TerminatePipelineExecutionSuccess":["TerminateRunSuccess"],"TerminateRunResult":["TerminateRunSuccess","TerminateRunFailure","RunNotFoundError","UnauthorizedError","PythonError"],"ScheduleMutationResult":["PythonError","UnauthorizedError","ScheduleStateResult"],"ScheduleOrError":["Schedule","ScheduleNotFoundError","PythonError"],"SchedulerOrError":["Scheduler","SchedulerNotDefinedError","PythonError"],"SchedulesOrError":["Schedules","RepositoryNotFoundError","PythonError"],"ScheduleTickSpecificData":["ScheduleTickSuccessData","ScheduleTickFailureData"],"LaunchBackfillResult":["LaunchBackfillSuccess","PartitionSetNotFoundError","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"ConfigTypeOrError":["EnumConfigType","CompositeConfigType","RegularConfigType","PipelineNotFoundError","ConfigTypeNotFoundError","PythonError"],"ConfigType":["ArrayConfigType","CompositeConfigType","EnumConfigType","NullableConfigType","RegularConfigType","ScalarUnionConfigType","MapConfigType"],"WrappingConfigType":["ArrayConfigType","NullableConfigType"],"DagsterType":["ListDagsterType","NullableDagsterType","RegularDagsterType"],"DagsterTypeOrError":["RegularDagsterType","PipelineNotFoundError","DagsterTypeNotFoundError","PythonError"],"WrappingDagsterType":["ListDagsterType","NullableDagsterType"],"Error":["AssetCheckNeedsMigrationError","AssetCheckNeedsUserCodeUpgrade","AssetCheckNeedsAgentUpgradeError","AssetNotFoundError","ConflictingExecutionParamsError","ConfigTypeNotFoundError","DagsterTypeNotFoundError","InvalidPipelineRunsFilterError","InvalidSubsetError","ModeNotFoundError","NoModeProvidedError","PartitionSetNotFoundError","PipelineNotFoundError","RunConflict","PipelineSnapshotNotFoundError","PresetNotFoundError","PythonError","ErrorChainLink","UnauthorizedError","ReloadNotSupported","RepositoryLocationNotFound","RepositoryNotFoundError","ResourceNotFoundError","RunGroupNotFoundError","RunNotFoundError","ScheduleNotFoundError","SchedulerNotDefinedError","SensorNotFoundError","DuplicateDynamicPartitionError","InstigationStateNotFoundError","SolidStepStatusUnavailableError","GraphNotFoundError","BackfillNotFoundError","PartitionSubsetDeserializationError","AutoMaterializeAssetEvaluationNeedsMigrationError"],"PipelineRunConflict":["RunConflict"],"PipelineRunNotFoundError":["RunNotFoundError"],"RepositoriesOrError":["RepositoryConnection","RepositoryNotFoundError","PythonError"],"RepositoryOrError":["PythonError","Repository","RepositoryNotFoundError"],"InstigationTypeSpecificData":["SensorData","ScheduleData"],"InstigationStateOrError":["InstigationState","InstigationStateNotFoundError","PythonError"],"InstigationStatesOrError":["InstigationStates","PythonError"],"MetadataEntry":["TableSchemaMetadataEntry","TableMetadataEntry","FloatMetadataEntry","IntMetadataEntry","JsonMetadataEntry","BoolMetadataEntry","MarkdownMetadataEntry","PathMetadataEntry","NotebookMetadataEntry","PythonArtifactMetadataEntry","TextMetadataEntry","UrlMetadataEntry","PipelineRunMetadataEntry","AssetMetadataEntry","JobMetadataEntry","NullMetadataEntry"],"PartitionRunConfigOrError":["PartitionRunConfig","PythonError"],"AssetBackfillStatus":["AssetPartitionsStatusCounts","UnpartitionedAssetStatus"],"PartitionSetOrError":["PartitionSet","PartitionSetNotFoundError","PythonError"],"PartitionSetsOrError":["PartitionSets","PipelineNotFoundError","PythonError"],"PartitionsOrError":["Partitions","PythonError"],"PartitionStatusesOrError":["PartitionStatuses","PythonError"],"PartitionTagsOrError":["PartitionTags","PythonError"],"RunConfigSchemaOrError":["RunConfigSchema","PipelineNotFoundError","InvalidSubsetError","ModeNotFoundError","PythonError"],"LaunchRunResult":["LaunchRunSuccess","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"LaunchRunReexecutionResult":["LaunchRunSuccess","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"LaunchPipelineRunSuccess":["LaunchRunSuccess"],"RunsOrError":["Runs","InvalidPipelineRunsFilterError","PythonError"],"PipelineRuns":["Runs"],"RunGroupOrError":["RunGroup","RunGroupNotFoundError","PythonError"],"SensorOrError":["Sensor","SensorNotFoundError","UnauthorizedError","PythonError"],"SensorsOrError":["Sensors","RepositoryNotFoundError","PythonError"],"StopSensorMutationResultOrError":["StopSensorMutationResult","UnauthorizedError","PythonError"],"ISolidDefinition":["CompositeSolidDefinition","SolidDefinition"],"SolidContainer":["Pipeline","PipelineSnapshot","Job","CompositeSolidDefinition","Graph"],"SolidStepStatsOrError":["SolidStepStatsConnection","SolidStepStatusUnavailableError"],"WorkspaceOrError":["Workspace","PythonError"],"WorkspaceLocationStatusEntriesOrError":["WorkspaceLocationStatusEntries","PythonError"],"GraphOrError":["Graph","GraphNotFoundError","PythonError"],"ResourceDetailsOrError":["ResourceDetails","ResourceNotFoundError","PythonError"],"ResourcesOrError":["ResourceDetailsList","RepositoryNotFoundError","PythonError"],"EnvVarWithConsumersOrError":["EnvVarWithConsumersList","PythonError"],"RunTagKeysOrError":["PythonError","RunTagKeys"],"RunTagsOrError":["PythonError","RunTags"],"RunIdsOrError":["RunIds","InvalidPipelineRunsFilterError","PythonError"],"AssetNodeOrError":["AssetNode","AssetNotFoundError"],"PartitionBackfillOrError":["PartitionBackfill","BackfillNotFoundError","PythonError"],"PartitionBackfillsOrError":["PartitionBackfills","PythonError"],"EventConnectionOrError":["EventConnection","RunNotFoundError","PythonError"],"AutoMaterializeAssetEvaluationRecordsOrError":["AutoMaterializeAssetEvaluationRecords","AutoMaterializeAssetEvaluationNeedsMigrationError"],"PartitionKeysOrError":["PartitionKeys","PartitionSubsetDeserializationError"],"AutoMaterializeRuleEvaluationData":["TextRuleEvaluationData","ParentMaterializedRuleEvaluationData","WaitingOnKeysRuleEvaluationData"],"AssetConditionEvaluationNode":["UnpartitionedAssetConditionEvaluationNode","PartitionedAssetConditionEvaluationNode","SpecificPartitionAssetConditionEvaluationNode"],"AssetConditionEvaluationRecordsOrError":["AssetConditionEvaluationRecords","AutoMaterializeAssetEvaluationNeedsMigrationError"],"SensorDryRunResult":["PythonError","SensorNotFoundError","DryRunInstigationTick"],"ScheduleDryRunResult":["DryRunInstigationTick","PythonError","ScheduleNotFoundError"],"TerminateRunsResultOrError":["TerminateRunsResult","PythonError"],"AssetWipeMutationResult":["AssetNotFoundError","UnauthorizedError","PythonError","AssetWipeSuccess"],"ReportRunlessAssetEventsResult":["UnauthorizedError","PythonError","ReportRunlessAssetEventsSuccess"],"ResumeBackfillResult":["ResumeBackfillSuccess","UnauthorizedError","PythonError"],"CancelBackfillResult":["CancelBackfillSuccess","UnauthorizedError","PythonError"],"LogTelemetryMutationResult":["LogTelemetrySuccess","PythonError"],"AddDynamicPartitionResult":["AddDynamicPartitionSuccess","UnauthorizedError","PythonError","DuplicateDynamicPartitionError"]} \ No newline at end of file diff --git a/js_modules/dagster-ui/packages/ui-core/src/graphql/schema.graphql b/js_modules/dagster-ui/packages/ui-core/src/graphql/schema.graphql index 63e00745f9c39..afa97e7306daa 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/graphql/schema.graphql +++ b/js_modules/dagster-ui/packages/ui-core/src/graphql/schema.graphql @@ -3182,7 +3182,7 @@ type Query { assetKey: AssetKeyInput! evaluationId: Int! partition: String! - ): SpecificPartitionAssetConditionEvaluation + ): AssetConditionEvaluation assetConditionEvaluationRecordsOrError( assetKey: AssetKeyInput! limit: Int! @@ -3416,11 +3416,24 @@ type AutoMaterializeAssetEvaluationNeedsMigrationError implements Error { message: String! } -type SpecificPartitionAssetConditionEvaluation { +type AssetConditionEvaluation { + rootUniqueId: String! + evaluationNodes: [AssetConditionEvaluationNode!]! +} + +union AssetConditionEvaluationNode = + UnpartitionedAssetConditionEvaluationNode + | PartitionedAssetConditionEvaluationNode + | SpecificPartitionAssetConditionEvaluationNode + +type UnpartitionedAssetConditionEvaluationNode { + uniqueId: String! description: String! + startTimestamp: Float + endTimestamp: Float metadataEntries: [MetadataEntry!]! status: AssetConditionEvaluationStatus! - childEvaluations: [SpecificPartitionAssetConditionEvaluation!] + childUniqueIds: [String!]! } enum AssetConditionEvaluationStatus { @@ -3429,49 +3442,17 @@ enum AssetConditionEvaluationStatus { SKIPPED } -union AssetConditionEvaluationRecordsOrError = - AssetConditionEvaluationRecords - | AutoMaterializeAssetEvaluationNeedsMigrationError - -type AssetConditionEvaluationRecords { - records: [AssetConditionEvaluationRecord!]! -} - -type AssetConditionEvaluationRecord { - id: ID! - evaluationId: Int! - runIds: [String!]! - timestamp: Float! - assetKey: AssetKey! - numRequested: Int! - evaluation: AssetConditionEvaluation! -} - -union AssetConditionEvaluation = - UnpartitionedAssetConditionEvaluation - | PartitionedAssetConditionEvaluation - | SpecificPartitionAssetConditionEvaluation - -type UnpartitionedAssetConditionEvaluation { - description: String! - startTimestamp: Float - endTimestamp: Float - metadataEntries: [MetadataEntry!]! - status: AssetConditionEvaluationStatus! - childEvaluations: [UnpartitionedAssetConditionEvaluation!] -} - -type PartitionedAssetConditionEvaluation { +type PartitionedAssetConditionEvaluationNode { + uniqueId: String! description: String! startTimestamp: Float endTimestamp: Float trueSubset: AssetSubset! - falseSubset: AssetSubset! candidateSubset: AssetSubset numTrue: Int! - numFalse: Int! - numSkipped: Int! - childEvaluations: [PartitionedAssetConditionEvaluation!] + numFalse: Int + numSkipped: Int + childUniqueIds: [String!]! } type AssetSubset { @@ -3486,6 +3467,34 @@ type AssetSubsetValue { isPartitioned: Boolean! } +type SpecificPartitionAssetConditionEvaluationNode { + uniqueId: String! + description: String! + metadataEntries: [MetadataEntry!]! + status: AssetConditionEvaluationStatus! + childUniqueIds: [String!]! +} + +union AssetConditionEvaluationRecordsOrError = + AssetConditionEvaluationRecords + | AutoMaterializeAssetEvaluationNeedsMigrationError + +type AssetConditionEvaluationRecords { + records: [AssetConditionEvaluationRecord!]! +} + +type AssetConditionEvaluationRecord { + id: ID! + evaluationId: Int! + runIds: [String!]! + timestamp: Float! + assetKey: AssetKey! + numRequested: Int! + startTimestamp: Float + endTimestamp: Float + evaluation: AssetConditionEvaluation! +} + type Mutation { launchPipelineExecution(executionParams: ExecutionParams!): LaunchRunResult! launchRun(executionParams: ExecutionParams!): LaunchRunResult! diff --git a/js_modules/dagster-ui/packages/ui-core/src/graphql/types.ts b/js_modules/dagster-ui/packages/ui-core/src/graphql/types.ts index 2018b8539765d..e6313a312410e 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/graphql/types.ts +++ b/js_modules/dagster-ui/packages/ui-core/src/graphql/types.ts @@ -245,19 +245,27 @@ export type AssetChecksOrError = | AssetCheckNeedsUserCodeUpgrade | AssetChecks; -export type AssetConditionEvaluation = - | PartitionedAssetConditionEvaluation - | SpecificPartitionAssetConditionEvaluation - | UnpartitionedAssetConditionEvaluation; +export type AssetConditionEvaluation = { + __typename: 'AssetConditionEvaluation'; + evaluationNodes: Array; + rootUniqueId: Scalars['String']; +}; + +export type AssetConditionEvaluationNode = + | PartitionedAssetConditionEvaluationNode + | SpecificPartitionAssetConditionEvaluationNode + | UnpartitionedAssetConditionEvaluationNode; export type AssetConditionEvaluationRecord = { __typename: 'AssetConditionEvaluationRecord'; assetKey: AssetKey; + endTimestamp: Maybe; evaluation: AssetConditionEvaluation; evaluationId: Scalars['Int']; id: Scalars['ID']; numRequested: Scalars['Int']; runIds: Array; + startTimestamp: Maybe; timestamp: Scalars['Float']; }; @@ -2674,18 +2682,18 @@ export type PartitionTags = { export type PartitionTagsOrError = PartitionTags | PythonError; -export type PartitionedAssetConditionEvaluation = { - __typename: 'PartitionedAssetConditionEvaluation'; +export type PartitionedAssetConditionEvaluationNode = { + __typename: 'PartitionedAssetConditionEvaluationNode'; candidateSubset: Maybe; - childEvaluations: Maybe>; + childUniqueIds: Array; description: Scalars['String']; endTimestamp: Maybe; - falseSubset: AssetSubset; - numFalse: Scalars['Int']; - numSkipped: Scalars['Int']; + numFalse: Maybe; + numSkipped: Maybe; numTrue: Scalars['Int']; startTimestamp: Maybe; trueSubset: AssetSubset; + uniqueId: Scalars['String']; }; export type Partitions = { @@ -3022,7 +3030,7 @@ export type Query = { allTopLevelResourceDetailsOrError: ResourcesOrError; assetBackfillPreview: Array; assetCheckExecutions: Array; - assetConditionEvaluationForPartition: Maybe; + assetConditionEvaluationForPartition: Maybe; assetConditionEvaluationRecordsOrError: Maybe; assetConditionEvaluationsForEvaluationId: Maybe; assetNodeDefinitionCollisions: Array; @@ -4200,12 +4208,13 @@ export type SolidStepStatusUnavailableError = Error & { message: Scalars['String']; }; -export type SpecificPartitionAssetConditionEvaluation = { - __typename: 'SpecificPartitionAssetConditionEvaluation'; - childEvaluations: Maybe>; +export type SpecificPartitionAssetConditionEvaluationNode = { + __typename: 'SpecificPartitionAssetConditionEvaluationNode'; + childUniqueIds: Array; description: Scalars['String']; metadataEntries: Array; status: AssetConditionEvaluationStatus; + uniqueId: Scalars['String']; }; export type StaleCause = { @@ -4517,14 +4526,15 @@ export type UnknownPipeline = PipelineReference & { solidSelection: Maybe>; }; -export type UnpartitionedAssetConditionEvaluation = { - __typename: 'UnpartitionedAssetConditionEvaluation'; - childEvaluations: Maybe>; +export type UnpartitionedAssetConditionEvaluationNode = { + __typename: 'UnpartitionedAssetConditionEvaluationNode'; + childUniqueIds: Array; description: Scalars['String']; endTimestamp: Maybe; metadataEntries: Array; startTimestamp: Maybe; status: AssetConditionEvaluationStatus; + uniqueId: Scalars['String']; }; export type UnpartitionedAssetStatus = { @@ -5040,6 +5050,21 @@ export const buildAssetChecks = ( }; }; +export const buildAssetConditionEvaluation = ( + overrides?: Partial, + _relationshipsToOmit: Set = new Set(), +): {__typename: 'AssetConditionEvaluation'} & AssetConditionEvaluation => { + const relationshipsToOmit: Set = new Set(_relationshipsToOmit); + relationshipsToOmit.add('AssetConditionEvaluation'); + return { + __typename: 'AssetConditionEvaluation', + evaluationNodes: + overrides && overrides.hasOwnProperty('evaluationNodes') ? overrides.evaluationNodes! : [], + rootUniqueId: + overrides && overrides.hasOwnProperty('rootUniqueId') ? overrides.rootUniqueId! : 'eos', + }; +}; + export const buildAssetConditionEvaluationRecord = ( overrides?: Partial, _relationshipsToOmit: Set = new Set(), @@ -5054,12 +5079,14 @@ export const buildAssetConditionEvaluationRecord = ( : relationshipsToOmit.has('AssetKey') ? ({} as AssetKey) : buildAssetKey({}, relationshipsToOmit), + endTimestamp: + overrides && overrides.hasOwnProperty('endTimestamp') ? overrides.endTimestamp! : 4.33, evaluation: overrides && overrides.hasOwnProperty('evaluation') ? overrides.evaluation! - : relationshipsToOmit.has('PartitionedAssetConditionEvaluation') - ? ({} as PartitionedAssetConditionEvaluation) - : buildPartitionedAssetConditionEvaluation({}, relationshipsToOmit), + : relationshipsToOmit.has('AssetConditionEvaluation') + ? ({} as AssetConditionEvaluation) + : buildAssetConditionEvaluation({}, relationshipsToOmit), evaluationId: overrides && overrides.hasOwnProperty('evaluationId') ? overrides.evaluationId! : 5501, id: @@ -5069,6 +5096,8 @@ export const buildAssetConditionEvaluationRecord = ( numRequested: overrides && overrides.hasOwnProperty('numRequested') ? overrides.numRequested! : 2364, runIds: overrides && overrides.hasOwnProperty('runIds') ? overrides.runIds! : [], + startTimestamp: + overrides && overrides.hasOwnProperty('startTimestamp') ? overrides.startTimestamp! : 6.66, timestamp: overrides && overrides.hasOwnProperty('timestamp') ? overrides.timestamp! : 6.88, }; }; @@ -9721,43 +9750,40 @@ export const buildPartitionTags = ( }; }; -export const buildPartitionedAssetConditionEvaluation = ( - overrides?: Partial, +export const buildPartitionedAssetConditionEvaluationNode = ( + overrides?: Partial, _relationshipsToOmit: Set = new Set(), -): {__typename: 'PartitionedAssetConditionEvaluation'} & PartitionedAssetConditionEvaluation => { +): { + __typename: 'PartitionedAssetConditionEvaluationNode'; +} & PartitionedAssetConditionEvaluationNode => { const relationshipsToOmit: Set = new Set(_relationshipsToOmit); - relationshipsToOmit.add('PartitionedAssetConditionEvaluation'); + relationshipsToOmit.add('PartitionedAssetConditionEvaluationNode'); return { - __typename: 'PartitionedAssetConditionEvaluation', + __typename: 'PartitionedAssetConditionEvaluationNode', candidateSubset: overrides && overrides.hasOwnProperty('candidateSubset') ? overrides.candidateSubset! : relationshipsToOmit.has('AssetSubset') ? ({} as AssetSubset) : buildAssetSubset({}, relationshipsToOmit), - childEvaluations: - overrides && overrides.hasOwnProperty('childEvaluations') ? overrides.childEvaluations! : [], + childUniqueIds: + overrides && overrides.hasOwnProperty('childUniqueIds') ? overrides.childUniqueIds! : [], description: - overrides && overrides.hasOwnProperty('description') ? overrides.description! : 'non', + overrides && overrides.hasOwnProperty('description') ? overrides.description! : 'quam', endTimestamp: - overrides && overrides.hasOwnProperty('endTimestamp') ? overrides.endTimestamp! : 6.63, - falseSubset: - overrides && overrides.hasOwnProperty('falseSubset') - ? overrides.falseSubset! - : relationshipsToOmit.has('AssetSubset') - ? ({} as AssetSubset) - : buildAssetSubset({}, relationshipsToOmit), - numFalse: overrides && overrides.hasOwnProperty('numFalse') ? overrides.numFalse! : 7739, - numSkipped: overrides && overrides.hasOwnProperty('numSkipped') ? overrides.numSkipped! : 7712, - numTrue: overrides && overrides.hasOwnProperty('numTrue') ? overrides.numTrue! : 6991, + overrides && overrides.hasOwnProperty('endTimestamp') ? overrides.endTimestamp! : 9.74, + numFalse: overrides && overrides.hasOwnProperty('numFalse') ? overrides.numFalse! : 4729, + numSkipped: overrides && overrides.hasOwnProperty('numSkipped') ? overrides.numSkipped! : 5678, + numTrue: overrides && overrides.hasOwnProperty('numTrue') ? overrides.numTrue! : 3015, startTimestamp: - overrides && overrides.hasOwnProperty('startTimestamp') ? overrides.startTimestamp! : 3.43, + overrides && overrides.hasOwnProperty('startTimestamp') ? overrides.startTimestamp! : 5.96, trueSubset: overrides && overrides.hasOwnProperty('trueSubset') ? overrides.trueSubset! : relationshipsToOmit.has('AssetSubset') ? ({} as AssetSubset) : buildAssetSubset({}, relationshipsToOmit), + uniqueId: overrides && overrides.hasOwnProperty('uniqueId') ? overrides.uniqueId! : 'sed', }; }; @@ -10453,9 +10479,9 @@ export const buildQuery = ( assetConditionEvaluationForPartition: overrides && overrides.hasOwnProperty('assetConditionEvaluationForPartition') ? overrides.assetConditionEvaluationForPartition! - : relationshipsToOmit.has('SpecificPartitionAssetConditionEvaluation') - ? ({} as SpecificPartitionAssetConditionEvaluation) - : buildSpecificPartitionAssetConditionEvaluation({}, relationshipsToOmit), + : relationshipsToOmit.has('AssetConditionEvaluation') + ? ({} as AssetConditionEvaluation) + : buildAssetConditionEvaluation({}, relationshipsToOmit), assetConditionEvaluationRecordsOrError: overrides && overrides.hasOwnProperty('assetConditionEvaluationRecordsOrError') ? overrides.assetConditionEvaluationRecordsOrError! @@ -12610,26 +12636,28 @@ export const buildSolidStepStatusUnavailableError = ( }; }; -export const buildSpecificPartitionAssetConditionEvaluation = ( - overrides?: Partial, +export const buildSpecificPartitionAssetConditionEvaluationNode = ( + overrides?: Partial, _relationshipsToOmit: Set = new Set(), ): { - __typename: 'SpecificPartitionAssetConditionEvaluation'; -} & SpecificPartitionAssetConditionEvaluation => { + __typename: 'SpecificPartitionAssetConditionEvaluationNode'; +} & SpecificPartitionAssetConditionEvaluationNode => { const relationshipsToOmit: Set = new Set(_relationshipsToOmit); - relationshipsToOmit.add('SpecificPartitionAssetConditionEvaluation'); + relationshipsToOmit.add('SpecificPartitionAssetConditionEvaluationNode'); return { - __typename: 'SpecificPartitionAssetConditionEvaluation', - childEvaluations: - overrides && overrides.hasOwnProperty('childEvaluations') ? overrides.childEvaluations! : [], + __typename: 'SpecificPartitionAssetConditionEvaluationNode', + childUniqueIds: + overrides && overrides.hasOwnProperty('childUniqueIds') ? overrides.childUniqueIds! : [], description: - overrides && overrides.hasOwnProperty('description') ? overrides.description! : 'vel', + overrides && overrides.hasOwnProperty('description') ? overrides.description! : 'ut', metadataEntries: overrides && overrides.hasOwnProperty('metadataEntries') ? overrides.metadataEntries! : [], status: overrides && overrides.hasOwnProperty('status') ? overrides.status! : AssetConditionEvaluationStatus.FALSE, + uniqueId: + overrides && overrides.hasOwnProperty('uniqueId') ? overrides.uniqueId! : 'repudiandae', }; }; @@ -13290,30 +13318,31 @@ export const buildUnknownPipeline = ( }; }; -export const buildUnpartitionedAssetConditionEvaluation = ( - overrides?: Partial, +export const buildUnpartitionedAssetConditionEvaluationNode = ( + overrides?: Partial, _relationshipsToOmit: Set = new Set(), ): { - __typename: 'UnpartitionedAssetConditionEvaluation'; -} & UnpartitionedAssetConditionEvaluation => { + __typename: 'UnpartitionedAssetConditionEvaluationNode'; +} & UnpartitionedAssetConditionEvaluationNode => { const relationshipsToOmit: Set = new Set(_relationshipsToOmit); - relationshipsToOmit.add('UnpartitionedAssetConditionEvaluation'); + relationshipsToOmit.add('UnpartitionedAssetConditionEvaluationNode'); return { - __typename: 'UnpartitionedAssetConditionEvaluation', - childEvaluations: - overrides && overrides.hasOwnProperty('childEvaluations') ? overrides.childEvaluations! : [], + __typename: 'UnpartitionedAssetConditionEvaluationNode', + childUniqueIds: + overrides && overrides.hasOwnProperty('childUniqueIds') ? overrides.childUniqueIds! : [], description: - overrides && overrides.hasOwnProperty('description') ? overrides.description! : 'deserunt', + overrides && overrides.hasOwnProperty('description') ? overrides.description! : 'veniam', endTimestamp: - overrides && overrides.hasOwnProperty('endTimestamp') ? overrides.endTimestamp! : 7.57, + overrides && overrides.hasOwnProperty('endTimestamp') ? overrides.endTimestamp! : 3.21, metadataEntries: overrides && overrides.hasOwnProperty('metadataEntries') ? overrides.metadataEntries! : [], startTimestamp: - overrides && overrides.hasOwnProperty('startTimestamp') ? overrides.startTimestamp! : 0.96, + overrides && overrides.hasOwnProperty('startTimestamp') ? overrides.startTimestamp! : 2.94, status: overrides && overrides.hasOwnProperty('status') ? overrides.status! : AssetConditionEvaluationStatus.FALSE, + uniqueId: overrides && overrides.hasOwnProperty('uniqueId') ? overrides.uniqueId! : 'et', }; }; diff --git a/python_modules/dagster-graphql/dagster_graphql/implementation/fetch_asset_condition_evaluations.py b/python_modules/dagster-graphql/dagster_graphql/implementation/fetch_asset_condition_evaluations.py index b96f42a563e6e..7f8358454b80f 100644 --- a/python_modules/dagster-graphql/dagster_graphql/implementation/fetch_asset_condition_evaluations.py +++ b/python_modules/dagster-graphql/dagster_graphql/implementation/fetch_asset_condition_evaluations.py @@ -10,7 +10,6 @@ GrapheneAssetConditionEvaluationRecord, GrapheneAssetConditionEvaluationRecords, GrapheneAssetConditionEvaluationRecordsOrError, - GrapheneSpecificPartitionAssetConditionEvaluation, ) from dagster_graphql.schema.auto_materialize_asset_evaluations import ( GrapheneAutoMaterializeAssetEvaluationNeedsMigrationError, @@ -58,7 +57,7 @@ def _get_graphene_records_from_evaluations( return GrapheneAssetConditionEvaluationRecords( records=[ GrapheneAssetConditionEvaluationRecord( - evaluation, partitions_defs[evaluation.asset_key], graphene_info.context.instance + evaluation, partitions_defs[evaluation.asset_key] ) for evaluation in evaluation_records ] @@ -86,8 +85,10 @@ def fetch_asset_condition_evaluation_record_for_partition( if asset_node and asset_node.external_asset_node.partitions_def_data else None ) - return GrapheneSpecificPartitionAssetConditionEvaluation( - record.get_evaluation_with_run_ids(partitions_def).evaluation, partition_key + return GrapheneAssetConditionEvaluation( + record.get_evaluation_with_run_ids(partitions_def).evaluation, + partitions_def, + partition_key, ) diff --git a/python_modules/dagster-graphql/dagster_graphql/schema/asset_condition_evaluations.py b/python_modules/dagster-graphql/dagster_graphql/schema/asset_condition_evaluations.py index 9b81ff2204b70..02aca5d068d11 100644 --- a/python_modules/dagster-graphql/dagster_graphql/schema/asset_condition_evaluations.py +++ b/python_modules/dagster-graphql/dagster_graphql/schema/asset_condition_evaluations.py @@ -1,15 +1,15 @@ import enum +import itertools from typing import Optional, Sequence, Union import graphene -import pendulum from dagster._core.definitions.asset_condition import AssetConditionEvaluation from dagster._core.definitions.asset_subset import AssetSubset from dagster._core.definitions.partition import PartitionsDefinition, PartitionsSubset from dagster._core.definitions.time_window_partitions import BaseTimeWindowPartitionsSubset -from dagster._core.instance import DynamicPartitionsStore from dagster._core.scheduler.instigation import AutoMaterializeAssetEvaluationRecord +from dagster_graphql.implementation.events import iterate_metadata_entries from dagster_graphql.schema.auto_materialize_asset_evaluations import ( GrapheneAutoMaterializeAssetEvaluationNeedsMigrationError, ) @@ -48,6 +48,7 @@ def __init__(self, value: Union[bool, PartitionsSubset]): GraphenePartitionKeyRange(start, end) for start, end in value.get_partition_key_ranges(value.partitions_def) ] + partition_keys = value.get_partition_keys() else: partition_keys = value.get_partition_keys() @@ -75,7 +76,8 @@ def __init__(self, asset_subset: AssetSubset): ) -class GrapheneUnpartitionedAssetConditionEvaluation(graphene.ObjectType): +class GrapheneUnpartitionedAssetConditionEvaluationNode(graphene.ObjectType): + uniqueId = graphene.NonNull(graphene.String) description = graphene.NonNull(graphene.String) startTimestamp = graphene.Field(graphene.Float) @@ -84,29 +86,31 @@ class GrapheneUnpartitionedAssetConditionEvaluation(graphene.ObjectType): metadataEntries = non_null_list(GrapheneMetadataEntry) status = graphene.NonNull(GrapheneAssetConditionEvaluationStatus) - childEvaluations = graphene.Field( - graphene.List(graphene.NonNull(lambda: GrapheneUnpartitionedAssetConditionEvaluation)) - ) + childUniqueIds = non_null_list(graphene.String) class Meta: - name = "UnpartitionedAssetConditionEvaluation" + name = "UnpartitionedAssetConditionEvaluationNode" def __init__(self, evaluation: AssetConditionEvaluation): + self._evaluation = evaluation if evaluation.true_subset.bool_value: status = AssetConditionEvaluationStatus.TRUE - elif evaluation.candidate_subset and evaluation.candidate_subset.bool_value: + elif ( + isinstance(evaluation.candidate_subset, AssetSubset) + and evaluation.candidate_subset.bool_value + ): status = AssetConditionEvaluationStatus.FALSE else: status = AssetConditionEvaluationStatus.SKIPPED super().__init__( + uniqueId=evaluation.condition_snapshot.unique_id, description=evaluation.condition_snapshot.description, startTimestamp=evaluation.start_timestamp, endTimestamp=evaluation.end_timestamp, status=status, - childEvaluations=[ - GrapheneUnpartitionedAssetConditionEvaluation(child) - for child in evaluation.child_evaluations + childUniqueIds=[ + child.condition_snapshot.unique_id for child in evaluation.child_evaluations ], ) @@ -117,85 +121,65 @@ def resolve_metadataEntries( (subset.metadata for subset in self._evaluation.subsets_with_metadata), {}, ) - return [GrapheneMetadataEntry(key=key, value=value) for key, value in metadata.items()] + return list(iterate_metadata_entries(metadata)) -class GraphenePartitionedAssetConditionEvaluation(graphene.ObjectType): +class GraphenePartitionedAssetConditionEvaluationNode(graphene.ObjectType): + uniqueId = graphene.NonNull(graphene.String) description = graphene.NonNull(graphene.String) startTimestamp = graphene.Field(graphene.Float) endTimestamp = graphene.Field(graphene.Float) trueSubset = graphene.NonNull(GrapheneAssetSubset) - falseSubset = graphene.NonNull(GrapheneAssetSubset) candidateSubset = graphene.Field(GrapheneAssetSubset) numTrue = graphene.NonNull(graphene.Int) - numFalse = graphene.NonNull(graphene.Int) - numSkipped = graphene.NonNull(graphene.Int) + numFalse = graphene.Field(graphene.Int) + numSkipped = graphene.Field(graphene.Int) - childEvaluations = graphene.Field( - graphene.List(graphene.NonNull(lambda: GraphenePartitionedAssetConditionEvaluation)) - ) + childUniqueIds = non_null_list(graphene.String) class Meta: - name = "PartitionedAssetConditionEvaluation" + name = "PartitionedAssetConditionEvaluationNode" def __init__( self, evaluation: AssetConditionEvaluation, partitions_def: Optional[PartitionsDefinition], - dynamic_partitions_store: DynamicPartitionsStore, ): self._partitions_def = partitions_def self._true_subset = evaluation.true_subset - self._all_subset = AssetSubset.all( - evaluation.asset_key, partitions_def, dynamic_partitions_store, pendulum.now("UTC") - ) - - # if the candidate_subset is unset, then we evaluated all partitions - self._candidate_subset = evaluation.candidate_subset or self._all_subset - super().__init__( + uniqueId=evaluation.condition_snapshot.unique_id, description=evaluation.condition_snapshot.description, startTimestamp=evaluation.start_timestamp, endTimestamp=evaluation.end_timestamp, trueSubset=GrapheneAssetSubset(evaluation.true_subset), - candidateSubset=GrapheneAssetSubset(self._candidate_subset), - childEvaluations=[ - GraphenePartitionedAssetConditionEvaluation( - child, partitions_def, dynamic_partitions_store - ) - for child in evaluation.child_evaluations + candidateSubset=GrapheneAssetSubset(evaluation.candidate_subset) + if isinstance(evaluation.candidate_subset, AssetSubset) + else None, + childUniqueIds=[ + child.condition_snapshot.unique_id for child in evaluation.child_evaluations ], ) def resolve_numTrue(self, graphene_info: ResolveInfo) -> int: return self._true_subset.size - def resolve_numFalse(self, graphene_info: ResolveInfo) -> int: - return self._candidate_subset.size - self._true_subset.size - - def resolve_falseSubset(self, graphene_info: ResolveInfo) -> GrapheneAssetSubset: - return GrapheneAssetSubset(self._candidate_subset - self._true_subset) - - def resolve_numSkipped(self, graphene_info: ResolveInfo) -> int: - return self._all_subset.size - self._candidate_subset.size - -class GrapheneSpecificPartitionAssetConditionEvaluation(graphene.ObjectType): +class GrapheneSpecificPartitionAssetConditionEvaluationNode(graphene.ObjectType): + uniqueId = graphene.NonNull(graphene.String) description = graphene.NonNull(graphene.String) metadataEntries = non_null_list(GrapheneMetadataEntry) status = graphene.NonNull(GrapheneAssetConditionEvaluationStatus) - childEvaluations = graphene.Field( - graphene.List(graphene.NonNull(lambda: GrapheneSpecificPartitionAssetConditionEvaluation)) - ) + childUniqueIds = non_null_list(graphene.String) class Meta: - name = "SpecificPartitionAssetConditionEvaluation" + name = "SpecificPartitionAssetConditionEvaluationNode" def __init__(self, evaluation: AssetConditionEvaluation, partition_key: str): self._evaluation = evaluation @@ -204,7 +188,7 @@ def __init__(self, evaluation: AssetConditionEvaluation, partition_key: str): if partition_key in evaluation.true_subset.subset_value: status = AssetConditionEvaluationStatus.TRUE elif ( - evaluation.candidate_subset is None + not isinstance(evaluation.candidate_subset, AssetSubset) or partition_key in evaluation.candidate_subset.subset_value ): status = AssetConditionEvaluationStatus.FALSE @@ -212,11 +196,11 @@ def __init__(self, evaluation: AssetConditionEvaluation, partition_key: str): status = AssetConditionEvaluationStatus.SKIPPED super().__init__( + uniqueId=evaluation.condition_snapshot.unique_id, description=evaluation.condition_snapshot.description, status=status, - childEvaluations=[ - GrapheneSpecificPartitionAssetConditionEvaluation(child, partition_key) - for child in evaluation.child_evaluations + childUniqueIds=[ + child.condition_snapshot.unique_id for child in evaluation.child_evaluations ], ) @@ -232,18 +216,60 @@ def resolve_metadataEntries( ), {}, ) - return [GrapheneMetadataEntry(key=key, value=value) for key, value in metadata.items()] + return list(iterate_metadata_entries(metadata)) -class GrapheneAssetConditionEvaluation(graphene.Union): +class GrapheneAssetConditionEvaluationNode(graphene.Union): class Meta: types = ( - GrapheneUnpartitionedAssetConditionEvaluation, - GraphenePartitionedAssetConditionEvaluation, - GrapheneSpecificPartitionAssetConditionEvaluation, + GrapheneUnpartitionedAssetConditionEvaluationNode, + GraphenePartitionedAssetConditionEvaluationNode, + GrapheneSpecificPartitionAssetConditionEvaluationNode, ) + name = "AssetConditionEvaluationNode" + + +class GrapheneAssetConditionEvaluation(graphene.ObjectType): + rootUniqueId = graphene.NonNull(graphene.String) + evaluationNodes = non_null_list(GrapheneAssetConditionEvaluationNode) + + class Meta: name = "AssetConditionEvaluation" + def __init__( + self, + evaluation: AssetConditionEvaluation, + partitions_def: Optional[PartitionsDefinition], + partition_key: Optional[str] = None, + ): + # flatten the evaluation tree into a list of nodes + def _flatten(e: AssetConditionEvaluation) -> Sequence[AssetConditionEvaluation]: + return list(itertools.chain([e], *(_flatten(ce) for ce in e.child_evaluations))) + + all_nodes = _flatten(evaluation) + + if evaluation.true_subset.is_partitioned: + if partition_key is None: + evaluationNodes = [ + GraphenePartitionedAssetConditionEvaluationNode(evaluation, partitions_def) + for evaluation in all_nodes + ] + else: + evaluationNodes = [ + GrapheneSpecificPartitionAssetConditionEvaluationNode(evaluation, partition_key) + for evaluation in all_nodes + ] + else: + evaluationNodes = [ + GrapheneUnpartitionedAssetConditionEvaluationNode(evaluation) + for evaluation in all_nodes + ] + + super().__init__( + rootUniqueId=evaluation.condition_snapshot.unique_id, + evaluationNodes=evaluationNodes, + ) + class GrapheneAssetConditionEvaluationRecord(graphene.ObjectType): id = graphene.NonNull(graphene.ID) @@ -254,6 +280,9 @@ class GrapheneAssetConditionEvaluationRecord(graphene.ObjectType): assetKey = graphene.NonNull(GrapheneAssetKey) numRequested = graphene.NonNull(graphene.Int) + startTimestamp = graphene.Field(graphene.Float) + endTimestamp = graphene.Field(graphene.Float) + evaluation = graphene.NonNull(GrapheneAssetConditionEvaluation) class Meta: @@ -263,23 +292,8 @@ def __init__( self, record: AutoMaterializeAssetEvaluationRecord, partitions_def: Optional[PartitionsDefinition], - dynamic_partitions_store: DynamicPartitionsStore, - partition_key: Optional[str] = None, ): evaluation_with_run_ids = record.get_evaluation_with_run_ids(partitions_def) - if evaluation_with_run_ids.evaluation.true_subset.is_partitioned: - if partition_key is None: - evaluation = GraphenePartitionedAssetConditionEvaluation( - evaluation_with_run_ids.evaluation, partitions_def, dynamic_partitions_store - ) - else: - evaluation = GrapheneSpecificPartitionAssetConditionEvaluation( - evaluation_with_run_ids.evaluation, partition_key - ) - else: - evaluation = GrapheneUnpartitionedAssetConditionEvaluation( - evaluation_with_run_ids.evaluation - ) super().__init__( id=record.id, @@ -288,7 +302,11 @@ def __init__( runIds=evaluation_with_run_ids.run_ids, assetKey=GrapheneAssetKey(path=record.asset_key.path), numRequested=evaluation_with_run_ids.evaluation.true_subset.size, - evaluation=evaluation, + startTimestamp=evaluation_with_run_ids.evaluation.start_timestamp, + endTimestamp=evaluation_with_run_ids.evaluation.end_timestamp, + evaluation=GrapheneAssetConditionEvaluation( + evaluation_with_run_ids.evaluation, partitions_def + ), ) diff --git a/python_modules/dagster-graphql/dagster_graphql/schema/roots/query.py b/python_modules/dagster-graphql/dagster_graphql/schema/roots/query.py index 193b5e84b4a0a..431ac626f9867 100644 --- a/python_modules/dagster-graphql/dagster_graphql/schema/roots/query.py +++ b/python_modules/dagster-graphql/dagster_graphql/schema/roots/query.py @@ -36,8 +36,8 @@ from dagster_graphql.implementation.fetch_logs import get_captured_log_metadata from dagster_graphql.implementation.fetch_runs import get_assets_latest_info from dagster_graphql.schema.asset_condition_evaluations import ( + GrapheneAssetConditionEvaluation, GrapheneAssetConditionEvaluationRecordsOrError, - GrapheneSpecificPartitionAssetConditionEvaluation, ) from dagster_graphql.schema.auto_materialize_asset_evaluations import ( GrapheneAutoMaterializeAssetEvaluationRecordsOrError, @@ -523,7 +523,7 @@ class Meta: ) assetConditionEvaluationForPartition = graphene.Field( - GrapheneSpecificPartitionAssetConditionEvaluation, + GrapheneAssetConditionEvaluation, assetKey=graphene.Argument(graphene.NonNull(GrapheneAssetKeyInput)), evaluationId=graphene.Argument(graphene.NonNull(graphene.Int)), partition=graphene.Argument(graphene.NonNull(graphene.String)), diff --git a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_asset_condition_evaluations.py b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_asset_condition_evaluations.py index d62d4b8524bcf..f45f718058a9d 100644 --- a/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_asset_condition_evaluations.py +++ b/python_modules/dagster-graphql/dagster_graphql_tests/graphql/test_asset_condition_evaluations.py @@ -1,4 +1,5 @@ -from typing import Optional, Sequence +import random +from typing import Any, Mapping, Optional, Sequence from unittest.mock import PropertyMock, patch import dagster._check as check @@ -213,63 +214,44 @@ def test_get_tick_range(self, graphql_context): FRAGMENTS = """ -fragment unpartitionedEvaluationFields on UnpartitionedAssetConditionEvaluation { - description - startTimestamp - endTimestamp - status -} - -fragment partitionedEvaluationFields on PartitionedAssetConditionEvaluation { - description - startTimestamp - endTimestamp - numTrue - numFalse - numSkipped - trueSubset { - subsetValue { - isPartitioned - partitionKeys - } - } - falseSubset { - subsetValue { - isPartitioned - partitionKeys - } - } -} - fragment evaluationFields on AssetConditionEvaluation { - ... on UnpartitionedAssetConditionEvaluation { - ...unpartitionedEvaluationFields - childEvaluations { - ...unpartitionedEvaluationFields - childEvaluations { - ...unpartitionedEvaluationFields - childEvaluations { - ...unpartitionedEvaluationFields - childEvaluations { - ...unpartitionedEvaluationFields - } + rootUniqueId + evaluationNodes { + ... on UnpartitionedAssetConditionEvaluationNode { + description + startTimestamp + endTimestamp + status + uniqueId + childUniqueIds + } + ... on PartitionedAssetConditionEvaluationNode { + description + startTimestamp + endTimestamp + numTrue + numFalse + numSkipped + trueSubset { + subsetValue { + isPartitioned + partitionKeys } } - } - } - ... on PartitionedAssetConditionEvaluation { - ...partitionedEvaluationFields - childEvaluations { - ...partitionedEvaluationFields - childEvaluations { - ...partitionedEvaluationFields - childEvaluations { - ...partitionedEvaluationFields - childEvaluations { - ...partitionedEvaluationFields - } + falseSubset { + subsetValue { + isPartitioned + partitionKeys } } + uniqueId + childUniqueIds + } + ... on SpecificPartitionAssetConditionEvaluationNode { + description + status + uniqueId + childUniqueIds } } } @@ -304,29 +286,16 @@ def test_get_tick_range(self, graphql_context): """ ) -QUERY_FOR_SPECIFIC_PARTITION = """ -fragment specificPartitionEvaluationFields on SpecificPartitionAssetConditionEvaluation { - description - status -} +QUERY_FOR_SPECIFIC_PARTITION = ( + FRAGMENTS + + """ query GetPartitionEvaluationQuery($assetKey: AssetKeyInput!, $partition: String!, $evaluationId: Int!) { assetConditionEvaluationForPartition(assetKey: $assetKey, partition: $partition, evaluationId: $evaluationId) { - ...specificPartitionEvaluationFields - childEvaluations { - ...specificPartitionEvaluationFields - childEvaluations { - ...specificPartitionEvaluationFields - childEvaluations { - ...specificPartitionEvaluationFields - childEvaluations { - ...specificPartitionEvaluationFields - } - } - } - } + ...evaluationFields } } """ +) QUERY_FOR_EVALUATION_ID = ( FRAGMENTS @@ -416,7 +385,7 @@ def test_get_historic_rules_without_evaluation_data( assert len(results.data["assetConditionEvaluationRecordsOrError"]["records"]) == 1 asset_one_record = results.data["assetConditionEvaluationRecordsOrError"]["records"][0] assert asset_one_record["assetKey"] == {"path": ["asset_one"]} - assert asset_one_record["evaluation"]["status"] == "SKIPPED" + assert asset_one_record["evaluation"]["evaluationNodes"][0]["status"] == "SKIPPED" results = execute_dagster_graphql( graphql_context, @@ -425,16 +394,22 @@ def test_get_historic_rules_without_evaluation_data( ) assert len(results.data["assetConditionEvaluationRecordsOrError"]["records"]) == 1 asset_two_record = results.data["assetConditionEvaluationRecordsOrError"]["records"][0] - assert asset_two_record["evaluation"]["description"] == "All of" - assert asset_two_record["evaluation"]["status"] == "SKIPPED" - asset_two_children = asset_two_record["evaluation"]["childEvaluations"] - assert len(asset_two_children) == 2 - assert asset_two_children[0]["description"] == "Any of" - assert asset_two_children[0]["status"] == "SKIPPED" - assert ( - asset_two_children[0]["childEvaluations"][0]["description"] - == "materialization is missing" + asset_two_root = asset_two_record["evaluation"]["evaluationNodes"][0] + + assert asset_two_root["description"] == "All of" + assert asset_two_root["status"] == "SKIPPED" + assert len(asset_two_root["childUniqueIds"]) == 2 + + asset_two_child = self._get_node( + asset_two_root["childUniqueIds"][0], asset_two_record["evaluation"]["evaluationNodes"] ) + assert asset_two_child["description"] == "Any of" + assert asset_two_child["status"] == "SKIPPED" + + asset_two_missing_node = self._get_node( + asset_two_child["childUniqueIds"][0], asset_two_record["evaluation"]["evaluationNodes"] + ) + assert asset_two_missing_node["description"] == "materialization is missing" results = execute_dagster_graphql( graphql_context, @@ -450,6 +425,7 @@ def test_get_historic_rules_without_evaluation_data( assert any(record == asset_one_record for record in records) assert any(record == asset_two_record for record in records) + # this evaluationId doesn't exist results = execute_dagster_graphql( graphql_context, QUERY_FOR_EVALUATION_ID, @@ -485,17 +461,24 @@ def test_get_historic_evaluation_with_evaluation_data( records = results.data["assetConditionEvaluationRecordsOrError"]["records"] assert len(records) == 1 + evaluation = records[0]["evaluation"] - assert evaluation["numTrue"] == 0 - assert evaluation["numFalse"] == 6 - assert evaluation["numSkipped"] == 0 - assert len(evaluation["childEvaluations"]) == 2 - not_skip_evaluation = evaluation["childEvaluations"][1] - assert not_skip_evaluation["description"] == "Not" - assert not_skip_evaluation["numTrue"] == 1 - assert len(not_skip_evaluation["childEvaluations"]) == 1 - assert not_skip_evaluation["childEvaluations"][0]["description"] == "Any of" - assert len(not_skip_evaluation["childEvaluations"][0]["childEvaluations"]) == 2 + rootNode = evaluation["evaluationNodes"][0] + assert rootNode["uniqueId"] == evaluation["rootUniqueId"] + + assert rootNode["numTrue"] == 0 + assert rootNode["numFalse"] == 6 + assert rootNode["numSkipped"] == 0 + assert len(rootNode["childUniqueIds"]) == 2 + + notSkipNode = self._get_node(rootNode["childUniqueIds"][0], evaluation["evaluationNodes"]) + assert notSkipNode["description"] == "Not" + assert notSkipNode["numTrue"] == 1 + assert len(notSkipNode["childUniqueIds"]) == 1 + + skipNode = self._get_node(rootNode["childUniqueIds"][1], evaluation["evaluationNodes"]) + assert skipNode["description"] == "Any of" + assert len(skipNode["childUniqueIds"]) == 2 def test_get_evaluations(self, graphql_context: WorkspaceRequestContext): evaluation1 = deserialize_auto_materialize_asset_evaluation_to_asset_condition_evaluation_with_run_ids( @@ -658,6 +641,11 @@ def test_get_evaluations(self, graphql_context: WorkspaceRequestContext): }, } + def _get_node( + self, unique_id: str, evaluations: Sequence[Mapping[str, Any]] + ) -> Mapping[str, Any]: + return next(iter([node for node in evaluations if node["uniqueId"] == unique_id])) + def _get_condition_evaluation( self, asset_key: AssetKey, @@ -668,7 +656,9 @@ def _get_condition_evaluation( child_evaluations: Optional[Sequence[AssetConditionEvaluation]] = None, ) -> AssetConditionEvaluation: return AssetConditionEvaluation( - condition_snapshot=AssetConditionSnapshot("...", description, "a1b2"), + condition_snapshot=AssetConditionSnapshot( + "...", description, str(random.randint(0, 100000000)) + ), true_subset=AssetSubset( asset_key=asset_key, value=partitions_def.subset_with_partition_keys(true_partition_keys), @@ -782,26 +772,32 @@ def test_get_evaluations_with_partitions(self, graphql_context: WorkspaceRequest assert records[0]["numRequested"] == 2 evaluation = records[0]["evaluation"] - assert evaluation["description"] == "All of" - assert evaluation["numTrue"] == 2 - assert evaluation["numFalse"] == 4 - assert evaluation["numSkipped"] == 0 - assert set(evaluation["trueSubset"]["subsetValue"]["partitionKeys"]) == {"a", "b"} - assert len(evaluation["childEvaluations"]) == 2 - - not_evaluation = evaluation["childEvaluations"][1] - assert not_evaluation["description"] == "Not" - assert not_evaluation["numTrue"] == 2 - assert not_evaluation["numFalse"] == 1 - assert not_evaluation["numSkipped"] == 3 - assert set(not_evaluation["trueSubset"]["subsetValue"]["partitionKeys"]) == {"a", "b"} - - skip_evaluation = not_evaluation["childEvaluations"][0] - assert skip_evaluation["description"] == "Any of" - assert skip_evaluation["numTrue"] == 1 - assert skip_evaluation["numFalse"] == 2 - assert skip_evaluation["numSkipped"] == 3 - assert set(skip_evaluation["trueSubset"]["subsetValue"]["partitionKeys"]) == {"c"} + + # all nodes in the tree + assert len(evaluation["evaluationNodes"]) == 9 + + rootNode = evaluation["evaluationNodes"][0] + assert rootNode["uniqueId"] == evaluation["rootUniqueId"] + assert rootNode["description"] == "All of" + assert rootNode["numTrue"] == 2 + assert rootNode["numFalse"] == 4 + assert rootNode["numSkipped"] == 0 + assert set(rootNode["trueSubset"]["subsetValue"]["partitionKeys"]) == {"a", "b"} + assert len(rootNode["childUniqueIds"]) == 2 + + notNode = self._get_node(rootNode["childUniqueIds"][1], evaluation["evaluationNodes"]) + assert notNode["description"] == "Not" + assert notNode["numTrue"] == 2 + assert notNode["numFalse"] == 1 + assert notNode["numSkipped"] == 3 + assert set(notNode["trueSubset"]["subsetValue"]["partitionKeys"]) == {"a", "b"} + + skipNode = self._get_node(notNode["childUniqueIds"][0], evaluation["evaluationNodes"]) + assert skipNode["description"] == "Any of" + assert skipNode["numTrue"] == 1 + assert skipNode["numFalse"] == 2 + assert skipNode["numSkipped"] == 3 + assert set(skipNode["trueSubset"]["subsetValue"]["partitionKeys"]) == {"c"} # test one of the true partitions specific_result = execute_dagster_graphql( @@ -815,17 +811,22 @@ def test_get_evaluations_with_partitions(self, graphql_context: WorkspaceRequest ) evaluation = specific_result.data["assetConditionEvaluationForPartition"] - assert evaluation["description"] == "All of" - assert evaluation["status"] == "TRUE" - assert len(evaluation["childEvaluations"]) == 2 + assert len(evaluation["evaluationNodes"]) == 9 + + rootNode = evaluation["evaluationNodes"][0] + assert rootNode["uniqueId"] == evaluation["rootUniqueId"] + + assert rootNode["description"] == "All of" + assert rootNode["status"] == "TRUE" + assert len(rootNode["childUniqueIds"]) == 2 - not_evaluation = evaluation["childEvaluations"][1] - assert not_evaluation["description"] == "Not" - assert not_evaluation["status"] == "TRUE" + notNode = self._get_node(rootNode["childUniqueIds"][1], evaluation["evaluationNodes"]) + assert notNode["description"] == "Not" + assert notNode["status"] == "TRUE" - skip_evaluation = not_evaluation["childEvaluations"][0] - assert skip_evaluation["description"] == "Any of" - assert skip_evaluation["status"] == "FALSE" + skipNode = self._get_node(notNode["childUniqueIds"][0], evaluation["evaluationNodes"]) + assert skipNode["description"] == "Any of" + assert skipNode["status"] == "FALSE" # test one of the false partitions specific_result = execute_dagster_graphql( @@ -839,17 +840,22 @@ def test_get_evaluations_with_partitions(self, graphql_context: WorkspaceRequest ) evaluation = specific_result.data["assetConditionEvaluationForPartition"] - assert evaluation["description"] == "All of" - assert evaluation["status"] == "FALSE" - assert len(evaluation["childEvaluations"]) == 2 + assert len(evaluation["evaluationNodes"]) == 9 + + rootNode = evaluation["evaluationNodes"][0] + assert rootNode["uniqueId"] == evaluation["rootUniqueId"] + + assert rootNode["description"] == "All of" + assert rootNode["status"] == "FALSE" + assert len(rootNode["childUniqueIds"]) == 2 - not_evaluation = evaluation["childEvaluations"][1] - assert not_evaluation["description"] == "Not" - assert not_evaluation["status"] == "SKIPPED" + notNode = self._get_node(rootNode["childUniqueIds"][1], evaluation["evaluationNodes"]) + assert notNode["description"] == "Not" + assert notNode["status"] == "SKIPPED" - skip_evaluation = not_evaluation["childEvaluations"][0] - assert skip_evaluation["description"] == "Any of" - assert skip_evaluation["status"] == "SKIPPED" + skipNode = self._get_node(notNode["childUniqueIds"][0], evaluation["evaluationNodes"]) + assert skipNode["description"] == "Any of" + assert skipNode["status"] == "SKIPPED" def _test_current_evaluation_id(self, graphql_context: WorkspaceRequestContext): graphql_context.instance.daemon_cursor_storage.set_cursor_values( diff --git a/python_modules/dagster/dagster/_core/definitions/asset_condition.py b/python_modules/dagster/dagster/_core/definitions/asset_condition.py index 91a57e0efe5d5..40136daaea996 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_condition.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_condition.py @@ -167,10 +167,7 @@ def equivalent_to_stored_evaluation(self, other: Optional["AssetConditionEvaluat return ( other is not None and self.condition_snapshot == other.condition_snapshot - # if any partitions are requested, then the state of the world must have meaninfully - # changed since the previous evaluation - and self.true_subset.size == 0 - and other.true_subset.size == 0 + and self.true_subset == other.true_subset # the candidate subset gets modified during serialization and get_serializable_candidate_subset(self.candidate_subset) == get_serializable_candidate_subset(other.candidate_subset) diff --git a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py index 163f0714e0a45..1d2587eafe53e 100644 --- a/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py +++ b/python_modules/dagster/dagster/_core/definitions/asset_daemon_cursor.py @@ -255,7 +255,7 @@ def backcompat_deserialize_asset_daemon_cursor_str( previous_evaluation_state.append(backcompat_evaluation_state) return AssetDaemonCursor( - evaluation_id=default_evaluation_id, + evaluation_id=data.get("evaluation_id") or default_evaluation_id, previous_evaluation_state=previous_evaluation_state, last_observe_request_timestamp_by_asset_key=last_observe_request_timestamp_by_asset_key, ) diff --git a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py index b3fd30e875665..1a25874a0ea8d 100644 --- a/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py +++ b/python_modules/dagster/dagster/_core/definitions/auto_materialize_rule_evaluation.py @@ -200,9 +200,11 @@ def partitions_def(self) -> Optional[PartitionsDefinition]: raise NotImplementedError() def _get_empty_subset(self, asset_key: AssetKey, is_partitioned: bool) -> AssetSubset: - # We know this asset is partitioned, but we don't know what its partitions def is, so we - # just use a DefaultPartitionsSubset - if is_partitioned and self.partitions_def is None: + if not is_partitioned: + return AssetSubset(asset_key, False) + elif self.partitions_def is None: + # We know this asset is partitioned, but we don't know what its partitions def was, so we + # just use a DefaultPartitionsSubset return AssetSubset(asset_key, DefaultPartitionsSubset(set())) else: return AssetSubset.empty(asset_key, self.partitions_def) @@ -310,8 +312,9 @@ def _get_child_decision_type_evaluation( is_partitioned, rule_snapshot, ) - for rule_snapshot in rule_snapshots - or set(partition_subsets_by_condition_by_rule_snapshot.keys()) + for rule_snapshot in ( + set(rule_snapshots) | set(partition_subsets_by_condition_by_rule_snapshot.keys()) + ) if rule_snapshot.decision_type == decision_type ] @@ -329,11 +332,7 @@ def _get_child_decision_type_evaluation( decision_type_snapshot = AssetConditionSnapshot( class_name=OrAssetCondition.__name__, description="Any of", unique_id=unique_id ) - initial = ( - AssetSubset(asset_key, DefaultPartitionsSubset(set())) - if is_partitioned - else AssetSubset.empty(asset_key, None) - ) + initial = self._get_empty_subset(asset_key, is_partitioned) evaluation = AssetConditionEvaluation( condition_snapshot=decision_type_snapshot, true_subset=reduce( @@ -362,7 +361,7 @@ def _get_child_decision_type_evaluation( # In reality, we'd like to invert the inner true_subset here, but this is an # expensive operation, and error-prone as the set of all partitions may have changed # since the evaluation was stored. Instead, we just use an empty subset. - true_subset = AssetSubset(asset_key, evaluation.true_subset.subset_value.empty_subset()) + true_subset = self._get_empty_subset(asset_key, is_partitioned) else: true_subset = evaluation.true_subset._replace( value=not evaluation.true_subset.bool_value @@ -403,7 +402,14 @@ def unpack( cast(Sequence[AutoMaterializeRuleSnapshot], unpacked_dict.get("rule_snapshots", [])) or [] ) - is_partitioned = any(tup[1] is not None for tup in partition_subsets_by_condition) + is_partitioned = ( + any(tup[1] is not None for tup in partition_subsets_by_condition) + if partition_subsets_by_condition + # if we don't have any partition_subsets_by_condition to look at, we can't tell if this + # asset was partitioned at the time that the evaluation was stored, so instead we assume + # that its current partition status is the same as its partition status at storage time. + else self.partitions_def is not None + ) # get the sub-evaluations for each decision type materialize_evaluation = check.not_none( From 6263bd47cfaffdc9c313377bd7fb98a57cdfcee1 Mon Sep 17 00:00:00 2001 From: Marco Salazar Date: Tue, 9 Jan 2024 13:25:55 -0500 Subject: [PATCH 20/56] sofar --- .../AutomaterializeRunsTable.tsx | 113 ++++++++++++++++++ .../types/AutomaterializeMiddlePanel.types.ts | 23 ++++ .../types/AutomaterializeRunsTable.types.ts | 45 +++++++ 3 files changed, 181 insertions(+) create mode 100644 js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRunsTable.tsx create mode 100644 js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeMiddlePanel.types.ts create mode 100644 js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRunsTable.types.ts diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRunsTable.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRunsTable.tsx new file mode 100644 index 0000000000000..3677ed436c1a5 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRunsTable.tsx @@ -0,0 +1,113 @@ +import {gql, useQuery} from '@apollo/client'; +import {Body2, Box, Mono, Table, colorTextLighter} from '@dagster-io/ui-components'; +import React from 'react'; +import {Link} from 'react-router-dom'; + +import {PYTHON_ERROR_FRAGMENT} from '../../app/PythonErrorFragment'; +import {PythonErrorInfo} from '../../app/PythonErrorInfo'; +import {RunStatusTagWithStats} from '../../runs/RunStatusTag'; +import {RUN_TIME_FRAGMENT, RunStateSummary, RunTime, titleForRun} from '../../runs/RunUtils'; + +import { + AutomaterializeRunsQuery, + AutomaterializeRunsQueryVariables, +} from './types/AutomaterializeRunsTable.types'; + +export const AutomaterializeRunsTable = ({runIds}: {runIds: string[]}) => { + const {data, loading, error} = useQuery< + AutomaterializeRunsQuery, + AutomaterializeRunsQueryVariables + >(AUTOMATERIALIZE_RUNS_QUERY, { + variables: { + filter: { + runIds, + }, + }, + skip: !runIds.length, + }); + + if (!runIds.length) { + return ( + + None + + ); + } + + if (error) { + return An error occurred fetching runs. Check your network status; + } + + if (loading || !data) { + return null; + } + + if (data.runsOrError.__typename === 'PythonError') { + return ; + } + + if (data.runsOrError.__typename === 'InvalidPipelineRunsFilterError') { + return {data.runsOrError.message}; + } + + return ( + + + + + + + + + + + + {data.runsOrError.results.map((run) => ( + + + + + + + ))} + +
Run IDCreated dateStatusDuration
+ + {titleForRun(run)} + + + + + + + +
+
+ ); +}; + +const AUTOMATERIALIZE_RUNS_QUERY = gql` + query AutomaterializeRunsQuery($filter: RunsFilter) { + runsOrError(filter: $filter) { + ... on Runs { + results { + id + ...AutomaterializeRunFragment + } + } + ... on InvalidPipelineRunsFilterError { + message + } + ...PythonErrorFragment + } + } + + fragment AutomaterializeRunFragment on Run { + id + runId + ...RunTimeFragment + } + + ${PYTHON_ERROR_FRAGMENT} + ${RUN_TIME_FRAGMENT} +`; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeMiddlePanel.types.ts b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeMiddlePanel.types.ts new file mode 100644 index 0000000000000..c87f6f0c76e87 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeMiddlePanel.types.ts @@ -0,0 +1,23 @@ +// Generated GraphQL types, do not edit manually. + +import * as Types from '../../../graphql/types'; + +export type FullPartitionsQueryVariables = Types.Exact<{ + assetKey: Types.AssetKeyInput; +}>; + +export type FullPartitionsQuery = { + __typename: 'Query'; + assetNodeOrError: + | { + __typename: 'AssetNode'; + id: string; + partitionKeysByDimension: Array<{ + __typename: 'DimensionPartitionKeys'; + name: string; + type: Types.PartitionDefinitionType; + partitionKeys: Array; + }>; + } + | {__typename: 'AssetNotFoundError'}; +}; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRunsTable.types.ts b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRunsTable.types.ts new file mode 100644 index 0000000000000..d65ee354e0ee9 --- /dev/null +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRunsTable.types.ts @@ -0,0 +1,45 @@ +// Generated GraphQL types, do not edit manually. + +import * as Types from '../../../graphql/types'; + +export type AutomaterializeRunsQueryVariables = Types.Exact<{ + filter?: Types.InputMaybe; +}>; + +export type AutomaterializeRunsQuery = { + __typename: 'Query'; + runsOrError: + | {__typename: 'InvalidPipelineRunsFilterError'; message: string} + | { + __typename: 'PythonError'; + message: string; + stack: Array; + errorChain: Array<{ + __typename: 'ErrorChainLink'; + isExplicitLink: boolean; + error: {__typename: 'PythonError'; message: string; stack: Array}; + }>; + } + | { + __typename: 'Runs'; + results: Array<{ + __typename: 'Run'; + id: string; + runId: string; + status: Types.RunStatus; + startTime: number | null; + endTime: number | null; + updateTime: number | null; + }>; + }; +}; + +export type AutomaterializeRunFragment = { + __typename: 'Run'; + id: string; + runId: string; + status: Types.RunStatus; + startTime: number | null; + endTime: number | null; + updateTime: number | null; +}; From a1f62c4e6638f26cc792fe3cfc0fde0645e6c66e Mon Sep 17 00:00:00 2001 From: Marco Salazar Date: Tue, 9 Jan 2024 12:24:57 -0500 Subject: [PATCH 21/56] ?? --- .../ui-components/src/components/Suggest.tsx | 36 +- .../src/components/TagSelector.tsx | 25 +- .../assets/AssetEventMetadataEntriesTable.tsx | 5 +- .../assets/AssetSidebarActivitySummary.tsx | 2 +- .../packages/ui-core/src/assets/AssetTabs.tsx | 10 +- .../packages/ui-core/src/assets/AssetView.tsx | 9 +- .../AssetAutomaterializePolicyPage.tsx | 76 +- .../AutoMaterializeExperimentalBanner.tsx | 11 +- .../AutomaterializeLeftPanel.tsx | 130 +- .../AutomaterializeMiddlePanel.tsx | 381 ++++- ...AutomaterializeRequestedPartitionsLink.tsx | 311 ---- .../AutomaterializeRightPanel.tsx | 209 --- .../AutomaterializeRunTag.tsx | 51 - .../EvaluationCounts.tsx | 73 - .../GetEvaluationsQuery.tsx | 169 +- .../PartitionSegmentWithPopover.tsx | 117 +- .../PolicyEvaluationStatusTag.tsx | 3 +- .../PolicyEvaluationTable.tsx | 132 +- .../RuleEvaluationOutcomes.tsx | 236 --- .../AutoMaterializePolicyPage.fixtures.ts | 109 +- .../AutomaterializeMiddlePanel.stories.tsx | 16 +- .../PartitionSegmentWithPopover.stories.tsx | 10 +- .../PolicyEvaluationTable.stories.tsx | 15 +- .../flattenEvaluations.tsx | 44 +- ...aterializeRequestedPartitionsLink.types.ts | 39 - .../types/AutomaterializeRightPanel.types.ts | 33 - .../types/AutomaterializeRunTag.types.ts | 15 - .../types/GetEvaluationsQuery.types.ts | 1449 +++++++++++++++-- .../useEvaluationsQueryResult.tsx | 12 +- .../AutomaterializeTagWithEvaluation.tsx | 2 +- .../assets/__tests__/buildAssetTabs.test.tsx | 10 +- .../AutomaterializationTickDetailDialog.tsx | 2 +- .../src/graphql/possibleTypes.generated.json | 700 +++++++- .../packages/ui-core/src/runs/TimeElapsed.tsx | 12 + .../toys/auto_materializing/repo_1.py | 25 +- .../hello_world_repository.py | 1 + 36 files changed, 2925 insertions(+), 1555 deletions(-) delete mode 100644 js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRequestedPartitionsLink.tsx delete mode 100644 js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRightPanel.tsx delete mode 100644 js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRunTag.tsx delete mode 100644 js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/EvaluationCounts.tsx delete mode 100644 js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/RuleEvaluationOutcomes.tsx delete mode 100644 js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRequestedPartitionsLink.types.ts delete mode 100644 js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRightPanel.types.ts delete mode 100644 js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRunTag.types.ts diff --git a/js_modules/dagster-ui/packages/ui-components/src/components/Suggest.tsx b/js_modules/dagster-ui/packages/ui-components/src/components/Suggest.tsx index eb5f9491f2582..04611868e84f7 100644 --- a/js_modules/dagster-ui/packages/ui-components/src/components/Suggest.tsx +++ b/js_modules/dagster-ui/packages/ui-components/src/components/Suggest.tsx @@ -99,23 +99,25 @@ export const Suggest = (props: Props) => { } return ( - ( -
- {props.renderItem(props.filteredItems[a.index] as T, a.index)} -
- )} - width={menuWidth} - height={Math.min(props.filteredItems.length * itemHeight, itemHeight * VISIBLE_ITEMS)} - /> +
+ ( +
+ {props.renderItem(props.filteredItems[a.index] as T, a.index)} +
+ )} + width={menuWidth} + height={Math.min(props.filteredItems.length * itemHeight, itemHeight * VISIBLE_ITEMS)} + /> +
); }} popoverProps={allPopoverProps} diff --git a/js_modules/dagster-ui/packages/ui-components/src/components/TagSelector.tsx b/js_modules/dagster-ui/packages/ui-components/src/components/TagSelector.tsx index 046e100fa720d..67b506e1cbbd6 100644 --- a/js_modules/dagster-ui/packages/ui-components/src/components/TagSelector.tsx +++ b/js_modules/dagster-ui/packages/ui-components/src/components/TagSelector.tsx @@ -49,6 +49,8 @@ type Props = { dropdownStyles?: React.CSSProperties; rowWidth?: number; rowHeight?: number; + closeOnSelect?: boolean; + usePortal?: boolean; }; const defaultRenderTag = (tag: string, tagProps: TagSelectorTagProps) => { @@ -64,7 +66,7 @@ const defaultRenderTag = (tag: string, tagProps: TagSelectorTagProps) => { maxWidth: '120px', }} data-tooltip={tag} - data-tooltip-style={DefaultTagTooltipStyle} + data-tooltip-style={TagSelectorDefaultTagTooltipStyle} > @@ -107,6 +109,8 @@ export const TagSelector = ({ dropdownStyles, renderTagList, rowHeight = MENU_ITEM_HEIGHT, + closeOnSelect, + usePortal, }: Props) => { const [isDropdownOpen, setIsDropdownOpen] = React.useState(false); const {viewport, containerProps} = useViewport(); @@ -141,6 +145,9 @@ export const TagSelector = ({ setSelectedTags( selected ? selectedTags.filter((t) => t !== tag) : [...selectedTags, tag], ); + if (closeOnSelect) { + setIsDropdownOpen(false); + } }; if (renderDropdownItem) { return
{renderDropdownItem(tag, {toggle, selected})}
; @@ -162,6 +169,7 @@ export const TagSelector = ({ return {dropdownContent}; }, [ allTags, + closeOnSelect, dropdownStyles, items, renderDropdown, @@ -205,27 +213,28 @@ export const TagSelector = ({ } } }} - content={
{dropdown}
} + content={
{dropdown}
} targetTagName="div" onOpening={rowVirtualizer.measure} onOpened={rowVirtualizer.measure} + usePortal={usePortal} > - { setIsDropdownOpen((isOpen) => !isOpen); }} {...containerProps} > - {tagsContent} + {tagsContent}
-
+ ); }; -const Container = styled.div` +export const TagSelectorContainer = styled.div` display: flex; flex-direction: row; align-items: center; @@ -237,7 +246,7 @@ const Placeholder = styled.div` color: ${colorTextDisabled()}; `; -const TagsContainer = styled(Box)` +export const TagSelectorTagsContainer = styled(Box)` overflow-x: auto; &::-webkit-scrollbar { @@ -303,7 +312,7 @@ export const TagSelectorWithSearch = ( ); }; -const DefaultTagTooltipStyle = JSON.stringify({ +export const TagSelectorDefaultTagTooltipStyle = JSON.stringify({ background: colorBackgroundDefault(), border: `1px solid ${colorBorderDefault()}`, color: colorTextDefault(), diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AssetEventMetadataEntriesTable.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AssetEventMetadataEntriesTable.tsx index ad7cddb4a2609..cdd924a6c3881 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AssetEventMetadataEntriesTable.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AssetEventMetadataEntriesTable.tsx @@ -21,7 +21,10 @@ export const AssetEventMetadataEntriesTable = ({ event, observations, }: { - event: AssetObservationFragment | AssetMaterializationFragment | null; + event: Pick< + AssetObservationFragment | AssetMaterializationFragment, + 'metadataEntries' | 'timestamp' + > | null; observations?: (AssetObservationFragment | AssetMaterializationFragment)[]; }) => { if (!event || (!event.metadataEntries.length && !observations?.length)) { diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AssetSidebarActivitySummary.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AssetSidebarActivitySummary.tsx index c78e3aa316f0d..03af02d9bd494 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AssetSidebarActivitySummary.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AssetSidebarActivitySummary.tsx @@ -92,7 +92,7 @@ export const AssetSidebarActivitySummary = ({ padding={{horizontal: 24, vertical: 12}} flex={{direction: 'row', gap: 4, alignItems: 'center'}} > - + View auto-materialize history diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AssetTabs.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AssetTabs.tsx index 70af69f533f5b..d066958cc3662 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AssetTabs.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AssetTabs.tsx @@ -36,7 +36,7 @@ export const DEFAULT_ASSET_TAB_ORDER = [ 'plots', 'definition', 'lineage', - 'auto-materialize-history', + 'automation', ]; export type AssetTabConfigInput = { @@ -91,10 +91,10 @@ export const buildAssetTabMap = (input: AssetTabConfigInput): Record { if (definitionQueryResult.loading && !definitionQueryResult.previousData) { return ; } - return ( - - ); + return ; }; const renderChecksTab = () => { @@ -218,7 +213,7 @@ export const AssetView = ({assetKey, trace}: Props) => { return renderEventsTab(); case 'plots': return renderPlotsTab(); - case 'auto-materialize-history': + case 'automation': return renderAutomaterializeHistoryTab(); case 'checks': return renderChecksTab(); diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AssetAutomaterializePolicyPage.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AssetAutomaterializePolicyPage.tsx index 769ee58be99bc..78c9576c6bacf 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AssetAutomaterializePolicyPage.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AssetAutomaterializePolicyPage.tsx @@ -1,43 +1,39 @@ -import {Box, Subheading, colorTextLight} from '@dagster-io/ui-components'; +import {Box, colorTextLight} from '@dagster-io/ui-components'; import * as React from 'react'; import styled from 'styled-components'; import {FIFTEEN_SECONDS, useQueryRefreshAtInterval} from '../../app/QueryRefresh'; import {useQueryPersistedState} from '../../hooks/useQueryPersistedState'; import {AssetKey} from '../types'; +import {AssetViewDefinitionNodeFragment} from '../types/AssetView.types'; import {AutoMaterializeExperimentalBanner} from './AutoMaterializeExperimentalBanner'; import {AutomaterializeLeftPanel} from './AutomaterializeLeftPanel'; import {AutomaterializeMiddlePanel} from './AutomaterializeMiddlePanel'; -import {AutomaterializeRightPanel} from './AutomaterializeRightPanel'; import {useEvaluationsQueryResult} from './useEvaluationsQueryResult'; export const AssetAutomaterializePolicyPage = ({ assetKey, - assetHasDefinedPartitions, + definition, }: { assetKey: AssetKey; - assetHasDefinedPartitions: boolean; + definition?: AssetViewDefinitionNodeFragment | null; }) => { const {queryResult, paginationProps} = useEvaluationsQueryResult({assetKey}); useQueryRefreshAtInterval(queryResult, FIFTEEN_SECONDS); - const {evaluations} = React.useMemo(() => { + const evaluations = React.useMemo(() => { if ( - queryResult.data?.autoMaterializeAssetEvaluationsOrError?.__typename === - 'AutoMaterializeAssetEvaluationRecords' && + queryResult.data?.assetConditionEvaluationRecordsOrError?.__typename === + 'AssetConditionEvaluationRecords' && queryResult.data?.assetNodeOrError?.__typename === 'AssetNode' ) { - return { - evaluations: queryResult.data?.autoMaterializeAssetEvaluationsOrError.records, - currentAutoMaterializeEvaluationId: - queryResult.data.assetNodeOrError.currentAutoMaterializeEvaluationId, - }; + return queryResult.data?.assetConditionEvaluationRecordsOrError.records; } - return {evaluations: [], currentAutoMaterializeEvaluationId: null}; + return []; }, [ - queryResult.data?.autoMaterializeAssetEvaluationsOrError, + queryResult.data?.assetConditionEvaluationRecordsOrError, queryResult.data?.assetNodeOrError, ]); @@ -73,40 +69,30 @@ export const AssetAutomaterializePolicyPage = ({
- - - Evaluation history + + + { + setSelectedEvaluationId(evaluation.evaluationId); + }} + selectedEvaluation={selectedEvaluation} + /> - - - { - setSelectedEvaluationId(evaluation.evaluationId); - }} - selectedEvaluation={selectedEvaluation} - /> - - - - + + - - - ); diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutoMaterializeExperimentalBanner.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutoMaterializeExperimentalBanner.tsx index 48dead31abf6a..7d0d842d4d479 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutoMaterializeExperimentalBanner.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutoMaterializeExperimentalBanner.tsx @@ -1,15 +1,24 @@ import {Alert, Icon, Tag, Tooltip, colorAccentBlue} from '@dagster-io/ui-components'; import React from 'react'; +import {useStateWithStorage} from '../../hooks/useStateWithStorage'; + const LearnMoreLink = 'https://docs.dagster.io/concepts/assets/asset-auto-execution#auto-materializing-assets-'; export const AutoMaterializeExperimentalBanner = () => { + const [closed, setClosed] = useStateWithStorage('automation-experimental', (value) => !!value); + if (closed) { + return null; + } return ( } + onClose={() => { + setClosed(true); + }} description={ You can learn more about this new feature and provide feedback{' '} diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeLeftPanel.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeLeftPanel.tsx index 69119e4c945eb..e9a6063f2afc2 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeLeftPanel.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeLeftPanel.tsx @@ -1,32 +1,41 @@ import { + Body2, Box, Caption, CursorPaginationControls, + Icon, + Subtitle1, + colorAccentBlue, + colorAccentGreen, colorBackgroundBlue, colorBackgroundBlueHover, colorBackgroundDefault, colorBackgroundDefaultHover, + colorBackgroundDisabled, colorBackgroundLight, colorKeylineDefault, colorTextBlue, colorTextDefault, + colorTextGreen, } from '@dagster-io/ui-components'; import * as React from 'react'; +import {Link} from 'react-router-dom'; import styled from 'styled-components'; import {TimestampDisplay} from '../../schedules/TimestampDisplay'; +import {numberFormatter} from '../../ui/formatters'; +import {AssetViewDefinitionNodeFragment} from '../types/AssetView.types'; -import {EvaluationCounts} from './EvaluationCounts'; -import {AutoMaterializeEvaluationRecordItemFragment} from './types/GetEvaluationsQuery.types'; +import {AssetConditionEvaluationRecordFragment} from './types/GetEvaluationsQuery.types'; import {useEvaluationsQueryResult} from './useEvaluationsQueryResult'; interface Props extends ListProps { - evaluations: AutoMaterializeEvaluationRecordItemFragment[]; + evaluations: AssetConditionEvaluationRecordFragment[]; paginationProps: ReturnType['paginationProps']; } export const AutomaterializeLeftPanel = ({ - assetHasDefinedPartitions, + definition, evaluations, paginationProps, onSelectEvaluation, @@ -35,7 +44,7 @@ export const AutomaterializeLeftPanel = ({ return ( void; - selectedEvaluation?: AutoMaterializeEvaluationRecordItemFragment; + definition?: AssetViewDefinitionNodeFragment | null; + evaluations: AssetConditionEvaluationRecordFragment[]; + onSelectEvaluation: (evaluation: AssetConditionEvaluationRecordFragment) => void; + selectedEvaluation?: AssetConditionEvaluationRecordFragment; } export const AutomaterializeLeftList = (props: ListProps) => { - const {assetHasDefinedPartitions, evaluations, onSelectEvaluation, selectedEvaluation} = props; + const {evaluations, onSelectEvaluation, selectedEvaluation, definition} = props; return ( - - {evaluations.map((evaluation) => { - const isSelected = selectedEvaluation?.evaluationId === evaluation.evaluationId; - const {numRequested, numSkipped, numDiscarded} = evaluation; - - return ( - { - onSelectEvaluation(evaluation); - }} - $selected={isSelected} - > - - - + + + Evaluations + + + + + + + {definition?.automationPolicySensor?.name ?? 'Automation'} - - ); - })} - - Evaluations are retained for 30 days + + + + {evaluations.map((evaluation) => { + const isSelected = selectedEvaluation?.id === evaluation.id; + + const hasRequested = evaluation.numRequested > 0; + + function status() { + if (hasRequested) { + if (definition?.partitionDefinition) { + return ( + {numberFormatter.format(evaluation.numRequested)} Requested + ); + } + return Requested; + } + return Not Requested; + } + + return ( + { + onSelectEvaluation(evaluation); + }} + $selected={isSelected} + > + + + + + + + +
{status()}
+
+
+ ); + })} +
+ + Evaluations are retained for 30 days +
); @@ -141,3 +181,11 @@ const EvaluationListItem = styled.button` padding: 8px 12px; `; + +export const StatusDot = styled.div<{$color: string}>` + background-color: ${({$color}) => $color}; + border-radius: 50%; + width: 10px; + height: 10px; + margin: 5px; +`; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeMiddlePanel.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeMiddlePanel.tsx index c169c2f2e5b35..4e7b4716c7b3c 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeMiddlePanel.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeMiddlePanel.tsx @@ -1,60 +1,70 @@ -import {useQuery} from '@apollo/client'; -import {Box, NonIdealState, Subheading} from '@dagster-io/ui-components'; +import {gql, useQuery} from '@apollo/client'; +import { + BaseTag, + Box, + Icon, + MenuItem, + MiddleTruncate, + NonIdealState, + Subheading, + Subtitle2, + Tag, + TagSelectorContainer, + TagSelectorDefaultTagTooltipStyle, + TagSelectorWithSearch, + colorAccentGray, + colorAccentGreen, + colorBackgroundGray, + colorTextLight, +} from '@dagster-io/ui-components'; import * as React from 'react'; +import styled from 'styled-components'; import {ErrorWrapper} from '../../app/PythonErrorInfo'; -import {AutoMaterializeDecisionType, AutoMaterializeRule} from '../../graphql/types'; +import {formatElapsedTimeWithMsec} from '../../app/Util'; +import {Timestamp} from '../../app/time/Timestamp'; +import {DimensionPartitionKeys} from '../../graphql/types'; +import {numberFormatter} from '../../ui/formatters'; import {AssetKey} from '../types'; +import {AssetViewDefinitionNodeFragment} from '../types/AssetView.types'; -import {AutomaterializeRequestedPartitionsLink} from './AutomaterializeRequestedPartitionsLink'; -import {AutomaterializeRunTag} from './AutomaterializeRunTag'; -import {GET_EVALUATIONS_QUERY} from './GetEvaluationsQuery'; -import {RuleEvaluationOutcomes} from './RuleEvaluationOutcomes'; -import {EvaluationOrEmpty, NoConditionsMetEvaluation} from './types'; +import {StatusDot} from './AutomaterializeLeftPanel'; +import {AutomaterializeRunsTable} from './AutomaterializeRunsTable'; +import { + GET_EVALUATIONS_QUERY, + GET_EVALUATIONS_SPECIFIC_PARTITION_QUERY, +} from './GetEvaluationsQuery'; +import {PolicyEvaluationTable} from './PolicyEvaluationTable'; +import { + FullPartitionsQuery, + FullPartitionsQueryVariables, +} from './types/AutomaterializeMiddlePanel.types'; import { + AssetConditionEvaluationRecordFragment, GetEvaluationsQuery, GetEvaluationsQueryVariables, - RuleWithEvaluationsFragment, - AutoMaterializeEvaluationRecordItemFragment, + GetEvaluationsSpecificPartitionQuery, + GetEvaluationsSpecificPartitionQueryVariables, } from './types/GetEvaluationsQuery.types'; interface Props { assetKey: AssetKey; - assetHasDefinedPartitions: boolean; selectedEvaluationId: number | undefined; + selectedEvaluation?: AssetConditionEvaluationRecordFragment; + definition?: AssetViewDefinitionNodeFragment | null; } -const EMPTY: EvaluationOrEmpty = { - __typename: 'no_conditions_met', - evaluationId: 0, - amount: 0, - endTimestamp: 0, - startTimestamp: 0, -}; - -const extractRequestedPartitionKeys = (rulesWithEvaluations: RuleWithEvaluationsFragment[]) => { - let requested: string[] = []; - let skippedOrDiscarded: string[] = []; - - rulesWithEvaluations.forEach(({rule, ruleEvaluations}) => { - const partitionKeys = ruleEvaluations.flatMap((e) => - e.partitionKeysOrError?.__typename === 'PartitionKeys' - ? e.partitionKeysOrError.partitionKeys - : [], - ); - if (rule.decisionType === AutoMaterializeDecisionType.MATERIALIZE) { - requested = requested.concat(partitionKeys); - } else { - skippedOrDiscarded = skippedOrDiscarded.concat(partitionKeys); - } - }); - - const skippedOrDiscardedSet = new Set(skippedOrDiscarded); - return new Set(requested.filter((partitionKey) => !skippedOrDiscardedSet.has(partitionKey))); -}; +const emptyArray: any[] = []; export const AutomaterializeMiddlePanel = (props: Props) => { - const {assetKey, assetHasDefinedPartitions, selectedEvaluationId} = props; + const { + assetKey, + selectedEvaluationId, + selectedEvaluation: _selectedEvaluation, + definition, + } = props; + + const [selectedPartition, setSelectPartition] = React.useState(null); // We receive the selected evaluation ID and retrieve it here because the middle panel // may be displaying an evaluation that was not retrieved at the page level for the @@ -67,10 +77,23 @@ export const AutomaterializeMiddlePanel = (props: Props) => { cursor: selectedEvaluationId ? `${selectedEvaluationId + 1}` : undefined, limit: 2, }, + skip: !!_selectedEvaluation || !!selectedPartition, }, ); - if (loading && !data) { + const {data: specificPartitionData, previousData: previousSpecificPartitionData} = useQuery< + GetEvaluationsSpecificPartitionQuery, + GetEvaluationsSpecificPartitionQueryVariables + >(GET_EVALUATIONS_SPECIFIC_PARTITION_QUERY, { + variables: { + assetKey, + evaluationId: selectedEvaluationId!, + partition: selectedPartition!, + }, + skip: !selectedEvaluationId || !selectedPartition, + }); + + if (!_selectedEvaluation && loading && !data) { return ( { } if ( - data?.autoMaterializeAssetEvaluationsOrError?.__typename === + data?.assetConditionEvaluationRecordsOrError?.__typename === 'AutoMaterializeAssetEvaluationNeedsMigrationError' ) { return ( @@ -105,69 +128,117 @@ export const AutomaterializeMiddlePanel = (props: Props) => { ); } - const currentRules = - (data?.assetNodeOrError.__typename === 'AssetNode' && - data.assetNodeOrError.autoMaterializePolicy?.rules) || - []; - - const evaluations = data?.autoMaterializeAssetEvaluationsOrError?.records || []; + const evaluations = data?.assetConditionEvaluationRecordsOrError?.records || []; const selectedEvaluation = - evaluations.find((evaluation) => evaluation.evaluationId === selectedEvaluationId) || EMPTY; + _selectedEvaluation ?? + evaluations.find((evaluation) => evaluation.evaluationId === selectedEvaluationId); return ( ); }; export const AutomaterializeMiddlePanelWithData = ({ - currentRules, selectedEvaluation, - assetHasDefinedPartitions, + definition, + selectPartition, + specificPartitionData, + selectedPartition, }: { - currentRules: AutoMaterializeRule[]; - selectedEvaluation: NoConditionsMetEvaluation | AutoMaterializeEvaluationRecordItemFragment; - assetHasDefinedPartitions: boolean; + definition?: AssetViewDefinitionNodeFragment | null; + selectedEvaluation?: AssetConditionEvaluationRecordFragment; + selectPartition: (partitionKey: string | null) => void; + specificPartitionData?: GetEvaluationsSpecificPartitionQuery; + selectedPartition: string | null; }) => { - const runIds = - selectedEvaluation?.__typename === 'AutoMaterializeAssetEvaluationRecord' - ? selectedEvaluation.runIds - : []; - const rulesWithRuleEvaluations = - selectedEvaluation?.__typename === 'AutoMaterializeAssetEvaluationRecord' - ? selectedEvaluation.rulesWithRuleEvaluations - : []; - const rules = - selectedEvaluation?.__typename === 'AutoMaterializeAssetEvaluationRecord' && - selectedEvaluation.rules - ? selectedEvaluation.rules - : currentRules; - - const headerRight = () => { - if (runIds.length === 0) { - return null; - } - if (assetHasDefinedPartitions) { + const statusTag = React.useMemo(() => { + if (selectedEvaluation?.numRequested) { + if (definition?.partitionDefinition) { + return ( + + + + {selectedEvaluation.numRequested} Requested + + + ); + } return ( - + + + + Requested + + ); } - return ; - }; + return ( + + + + Not Requested + + + ); + }, [definition, selectedEvaluation]); + + const evaluation = selectedEvaluation?.evaluation; + const partitionsEvaluated = React.useMemo(() => { + if (evaluation) { + const rootEvaluationNode = evaluation.evaluationNodes.find( + (node) => node.uniqueId === evaluation.rootUniqueId, + ); + if (rootEvaluationNode?.__typename === 'PartitionedAssetConditionEvaluationNode') { + return ( + rootEvaluationNode.numTrue + rootEvaluationNode.numFalse + rootEvaluationNode.numSkipped + ); + } + } + return 0; + }, [evaluation]); + + const {data} = useQuery( + FULL_PARTITIONS_QUERY, + { + variables: definition + ? { + assetKey: {path: definition.assetKey.path}, + } + : undefined, + skip: !definition?.assetKey, + }, + ); + + let partitionKeys: DimensionPartitionKeys[] = emptyArray; + if (data?.assetNodeOrError.__typename === 'AssetNode') { + partitionKeys = data.assetNodeOrError.partitionKeysByDimension; + } + + const allPartitions = React.useMemo(() => { + if (partitionKeys.length === 1) { + return partitionKeys[0]!.partitionKeys; + } else if (partitionKeys.length === 2) { + const firstSet = partitionKeys[0]!.partitionKeys; + const secondSet = partitionKeys[1]!.partitionKeys; + return firstSet.flatMap((key1) => secondSet.map((key2) => `${key1}|${key2}`)); + } else if (partitionKeys.length > 2) { + throw new Error('Only 2 dimensions are supported'); + } + return []; + }, [partitionKeys]); return ( @@ -178,13 +249,145 @@ export const AutomaterializeMiddlePanelWithData = ({ flex={{alignItems: 'center', justifyContent: 'space-between'}} > Result -
{headerRight()}
- + {selectedEvaluation ? ( + + +
+ + Evaluation Result +
{statusTag}
+
+ {selectedEvaluation?.timestamp ? ( + + Timestamp + + + ) : null} + + Duration +
+ {selectedEvaluation?.startTimestamp && selectedEvaluation?.endTimestamp + ? formatElapsedTimeWithMsec( + (selectedEvaluation.endTimestamp - selectedEvaluation.startTimestamp) * + 1000, + ) + : '\u2013'} +
+
+
+
+ + Policy evaluation + + {partitionsEvaluated ? ( + + {numberFormatter.format(partitionsEvaluated)} partitions evaluated + + { + selectPartition(tags[tags.length - 1] || null); + }} + renderDropdownItem={(tag, props) => ( + + )} + renderDropdown={(dropdown) => ( + + {dropdown} + + )} + renderTag={(tag, tagProps) => ( + } + label={ +
+ + + + +
+ } + /> + )} + usePortal={false} + /> + + + +
+
+ ) : null} + + + Runs launched ({selectedEvaluation.runIds.length}) + + +
+ ) : null}
); }; + +const FULL_PARTITIONS_QUERY = gql` + query FullPartitionsQuery($assetKey: AssetKeyInput!) { + assetNodeOrError(assetKey: $assetKey) { + ... on AssetNode { + id + partitionKeysByDimension { + name + type + partitionKeys + } + } + } + } +`; +const TagSelectorWrapper = styled.div` + position: relative; + + ${TagSelectorContainer} { + width: 370px; + padding-left: 32px; + height: 36px; + } +`; + +const SearchIconWrapper = styled.div` + position: absolute; + left: 12px; + top: 0px; + bottom: 0px; + pointer-events: none; + display: flex; + align-items: center; +`; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRequestedPartitionsLink.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRequestedPartitionsLink.tsx deleted file mode 100644 index f453e988d4b92..0000000000000 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRequestedPartitionsLink.tsx +++ /dev/null @@ -1,311 +0,0 @@ -import {gql, useQuery} from '@apollo/client'; -import { - Box, - Button, - ButtonLink, - Dialog, - DialogFooter, - NonIdealState, - Spinner, - Tag, - TextInput, - Caption, -} from '@dagster-io/ui-components'; -import {useVirtualizer} from '@tanstack/react-virtual'; -import * as React from 'react'; -import {Link} from 'react-router-dom'; -import styled from 'styled-components'; - -import {showCustomAlert} from '../../app/CustomAlertProvider'; -import {PYTHON_ERROR_FRAGMENT} from '../../app/PythonErrorFragment'; -import {PythonErrorInfo} from '../../app/PythonErrorInfo'; -import {RunStatusTagWithID} from '../../runs/RunStatusTag'; -import {DagsterTag} from '../../runs/RunTag'; -import {Container, Inner, Row} from '../../ui/VirtualizedTable'; - -import { - RunStatusAndPartitionKeyQuery, - RunStatusAndPartitionKeyQueryVariables, - RunStatusAndTagsFragment, -} from './types/AutomaterializeRequestedPartitionsLink.types'; - -interface Props { - runIds?: string[]; - partitionKeys: string[]; - intent?: React.ComponentProps['intent']; -} - -export const AutomaterializeRequestedPartitionsLink = ({runIds, partitionKeys, intent}: Props) => { - const [isOpen, setIsOpen] = React.useState(false); - const [queryString, setQueryString] = React.useState(''); - const queryLowercase = queryString.toLocaleLowerCase(); - - const count = partitionKeys.length; - - const filteredPartitionKeys = React.useMemo(() => { - if (queryLowercase === '') { - return partitionKeys; - } - return partitionKeys.filter((partitionKey) => - partitionKey.toLocaleLowerCase().includes(queryLowercase), - ); - }, [partitionKeys, queryLowercase]); - - const label = React.useMemo(() => { - if (runIds) { - return count === 1 ? '1 partition launched' : `${count} partitions launched`; - } - return count === 1 ? '1 partition' : `${count} partitions`; - }, [count, runIds]); - - const content = () => { - if (queryString && !filteredPartitionKeys.length) { - return ; - } - - return runIds ? ( - - ) : ( - - ); - }; - - return ( - <> - - {label} - setIsOpen(true)}> - View details - - - setIsOpen(false)} - style={{width: '750px', maxWidth: '80vw', minWidth: '500px'}} - canOutsideClickClose - canEscapeKeyClose - > - -
- {count === 1 ? '1 partition' : `${count} partitions`} -
- {count > 0 ? ( - setQueryString(e.target.value)} - placeholder="Filter by partition…" - style={{width: '252px'}} - /> - ) : null} -
-
{content()}
- - - -
- - ); -}; - -type PartitionRunTuple = [string, RunStatusAndTagsFragment]; - -const PartitionAndRunList = ({runIds, partitionKeys}: Props) => { - const {data, loading} = useQuery< - RunStatusAndPartitionKeyQuery, - RunStatusAndPartitionKeyQueryVariables - >(RUN_STATUS_AND_PARTITION_KEY, { - variables: {filter: {runIds}}, - }); - - const runs = data?.runsOrError; - - if (!runs) { - if (loading) { - return ( - - -
Loading partitions and runs…
-
- ); - } - - return ( - - - - ); - } - - if (runs.__typename === 'PythonError') { - return ( - - { - showCustomAlert({ - title: 'Python error', - body: , - }); - }} - > - View error - - } - /> - - ); - } - - if (runs.__typename === 'InvalidPipelineRunsFilterError' || !runs.results.length) { - return ( - - - - ); - } - - const {results} = runs; - - const runsByPartitionKey: Record = Object.fromEntries( - results - .map((run) => { - const {tags} = run; - const partitionTag = tags.find(({key}) => key === DagsterTag.Partition); - return partitionTag ? [partitionTag.value, run] : null; - }) - .filter((tupleOrNull): tupleOrNull is PartitionRunTuple => !!tupleOrNull), - ); - - return ( - - ); -}; - -const NoMatchesEmptyState = ({queryString}: {queryString: string}) => { - return ( - - - No matching partitions for {queryString} - - } - /> - - ); -}; - -interface VirtualizedListProps { - partitionKeys: string[]; - runsByPartitionKey?: Record; -} - -const VirtualizedPartitionList = ({partitionKeys, runsByPartitionKey}: VirtualizedListProps) => { - const container = React.useRef(null); - - const rowVirtualizer = useVirtualizer({ - count: partitionKeys.length, - getScrollElement: () => container.current, - estimateSize: () => 40, - overscan: 10, - }); - - const totalHeight = rowVirtualizer.getTotalSize(); - const items = rowVirtualizer.getVirtualItems(); - const showRunTag = !!runsByPartitionKey; - - return ( - - - {items.map(({index, key, size, start}) => { - const partitionKey = partitionKeys[index]!; - const runForPartition = runsByPartitionKey ? runsByPartitionKey[partitionKey] : null; - - return ( - - -
{partitionKeys[index]}
- {showRunTag ? ( -
- {runForPartition ? ( - - - - ) : ( - Run not found - )} -
- ) : null} -
-
- ); - })} -
-
- ); -}; - -export const RUN_STATUS_AND_PARTITION_KEY = gql` - query RunStatusAndPartitionKey($filter: RunsFilter) { - runsOrError(filter: $filter) { - ... on Runs { - results { - id - ...RunStatusAndTagsFragment - } - } - ... on InvalidPipelineRunsFilterError { - message - } - ...PythonErrorFragment - } - } - - fragment RunStatusAndTagsFragment on Run { - id - status - tags { - key - value - } - } - - ${PYTHON_ERROR_FRAGMENT} -`; - -const TagLink = styled(Link)` - :focus { - outline: none; - } -`; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRightPanel.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRightPanel.tsx deleted file mode 100644 index e9978fbec89c1..0000000000000 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRightPanel.tsx +++ /dev/null @@ -1,209 +0,0 @@ -import {gql, useQuery} from '@apollo/client'; -import { - Box, - Subheading, - Body, - ExternalAnchorButton, - Icon, - NonIdealState, - Spinner, - Mono, -} from '@dagster-io/ui-components'; -import * as React from 'react'; -import {Link, Redirect} from 'react-router-dom'; - -import {ErrorWrapper} from '../../app/PythonErrorInfo'; -import {FIFTEEN_SECONDS, useQueryRefreshAtInterval} from '../../app/QueryRefresh'; -import {AutomaterializePolicyTag} from '../AutomaterializePolicyTag'; -import {assetDetailsPathForKey} from '../assetDetailsPathForKey'; -import {AssetKey} from '../types'; - -import { - GetPolicyInfoQuery, - GetPolicyInfoQueryVariables, -} from './types/AutomaterializeRightPanel.types'; - -interface Props { - assetKey: AssetKey; -} - -export const AutomaterializeRightPanel = ({assetKey}: Props) => { - const queryResult = useQuery( - GET_POLICY_INFO_QUERY, - {variables: {assetKey}}, - ); - - useQueryRefreshAtInterval(queryResult, FIFTEEN_SECONDS); - const {data, error} = queryResult; - - return ( - - - Overview - -
- {error ? ( - - {JSON.stringify(error)} - - ) : !data ? ( - - - - ) : data.assetNodeOrError.__typename === 'AssetNotFoundError' ? ( - - ) : ( - <> - {data.assetNodeOrError.autoMaterializePolicy ? ( - - Auto-materialize policy - - - } - > - - This asset will be automatically materialized when at least one of the conditions - to the left is met and no skip conditions are met. - - - ) : ( - - -
- An auto-materialize policy specifies how Dagster should attempt to keep an - asset up-to-date. -
-
- } - > - View documentation - -
-
- } - /> - - )} - {data.assetNodeOrError.freshnessPolicy ? ( - - - - This asset will be considered late if it is not materialized within{' '} - {data.assetNodeOrError.freshnessPolicy.maximumLagMinutes} minutes of it’s upstream - dependencies. - - View upstream assets - - - - ) : ( - - -
- A FreshnessPolicy specifies how up-to-date you want a given asset to be. -
-
- } - > - View documentation - -
-
- } - /> - - )} - - )} -
-
- ); -}; - -const RightPanelSection = ({ - title, - children, -}: { - title: React.ReactNode; - children: React.ReactNode; -}) => { - return ( - - {title} - {children} - - ); -}; - -const RightPanelDetail = ({ - title, - value, -}: { - title: React.ReactNode; - tooltip?: React.ReactNode; - value: React.ReactNode; -}) => { - return ( - -
{title}
- {value} -
- ); -}; - -export const GET_POLICY_INFO_QUERY = gql` - query GetPolicyInfoQuery($assetKey: AssetKeyInput!) { - assetNodeOrError(assetKey: $assetKey) { - ... on AssetNode { - id - freshnessPolicy { - maximumLagMinutes - cronSchedule - cronScheduleTimezone - } - autoMaterializePolicy { - policyType - maxMaterializationsPerMinute - rules { - description - decisionType - } - } - } - } - } -`; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRunTag.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRunTag.tsx deleted file mode 100644 index 4c1319dd0d8dd..0000000000000 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/AutomaterializeRunTag.tsx +++ /dev/null @@ -1,51 +0,0 @@ -import {gql, useQuery} from '@apollo/client'; -import {Tag} from '@dagster-io/ui-components'; -import * as React from 'react'; -import {Link} from 'react-router-dom'; - -import {RunStatusTagWithID} from '../../runs/RunStatusTag'; - -import {RunStatusOnlyQuery, RunStatusOnlyQueryVariables} from './types/AutomaterializeRunTag.types'; - -interface Props { - runId: string; -} - -export const AutomaterializeRunTag = ({runId}: Props) => { - const {data, loading} = useQuery( - RUN_STATUS_ONLY, - { - variables: {runId}, - }, - ); - - if (loading && !data) { - return Loading; - } - - const run = data?.runOrError; - if (run?.__typename !== 'Run') { - return ( - - Run not found - - ); - } - - return ( - - - - ); -}; - -export const RUN_STATUS_ONLY = gql` - query RunStatusOnlyQuery($runId: ID!) { - runOrError(runId: $runId) { - ... on Run { - id - status - } - } - } -`; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/EvaluationCounts.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/EvaluationCounts.tsx deleted file mode 100644 index a057d025b87bd..0000000000000 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/EvaluationCounts.tsx +++ /dev/null @@ -1,73 +0,0 @@ -import { - Box, - Caption, - colorTextBlue, - colorTextGreen, - colorTextLight, - colorTextLighter, - colorTextRed, - colorTextYellow, -} from '@dagster-io/ui-components'; -import * as React from 'react'; - -import {compactNumber} from '../../ui/formatters'; - -interface Props { - numRequested: number; - numSkipped: number; - numDiscarded: number; - isPartitionedAsset: boolean; - selected: boolean; -} - -export const EvaluationCounts = React.memo((props: Props) => { - const {numRequested, numSkipped, numDiscarded, isPartitionedAsset, selected} = props; - - const requested = - numRequested || isPartitionedAsset ? ( - - {isPartitionedAsset ? `${compactNumber(numRequested)} launched` : 'Launched'} - - ) : null; - - const skipped = - numSkipped || isPartitionedAsset ? ( - - {isPartitionedAsset ? `${compactNumber(numSkipped)} skipped` : 'Skipped'} - - ) : null; - - const discarded = - numDiscarded || isPartitionedAsset ? ( - - {isPartitionedAsset ? `${compactNumber(numDiscarded)} discarded` : 'Discarded'} - - ) : null; - - const filtered = [requested, skipped, discarded].filter( - (element): element is React.ReactElement => !!element, - ); - - return ( - - {filtered - .map((element, ii) => [ - element, - - / - , - ]) - .flat() - .slice(0, -1)} - - ); -}); diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/GetEvaluationsQuery.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/GetEvaluationsQuery.tsx index a2ede01f651a4..1cc0ee004a6ea 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/GetEvaluationsQuery.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/GetEvaluationsQuery.tsx @@ -1,5 +1,96 @@ import {gql} from '@apollo/client'; +import {METADATA_ENTRY_FRAGMENT} from '../../metadata/MetadataEntry'; + +const AssetSubsetFragment = gql` + fragment AssetSubsetFragment on AssetSubset { + subsetValue { + isPartitioned + partitionKeys + partitionKeyRanges { + start + end + } + } + } +`; + +const SpecificPartitionAssetConditionEvaluationNodeFragment = gql` + fragment SpecificPartitionAssetConditionEvaluationNodeFragment on SpecificPartitionAssetConditionEvaluationNode { + description + status + uniqueId + childUniqueIds + metadataEntries { + ...MetadataEntryFragment + } + } + ${METADATA_ENTRY_FRAGMENT} +`; + +const UnpartitionedAssetConditionEvaluationNodeFragment = gql` + fragment UnpartitionedAssetConditionEvaluationNodeFragment on UnpartitionedAssetConditionEvaluationNode { + description + startTimestamp + endTimestamp + status + uniqueId + childUniqueIds + metadataEntries { + ...MetadataEntryFragment + } + } + ${METADATA_ENTRY_FRAGMENT} +`; +const PartitionedAssetConditionEvaluationNodeFragment = gql` + fragment PartitionedAssetConditionEvaluationNodeFragment on PartitionedAssetConditionEvaluationNode { + description + startTimestamp + endTimestamp + numTrue + numFalse + numSkipped + trueSubset { + ...AssetSubsetFragment + } + falseSubset { + ...AssetSubsetFragment + } + candidateSubset { + ...AssetSubsetFragment + } + uniqueId + childUniqueIds + } + ${AssetSubsetFragment} +`; + +const AssetConditionEvaluationRecordFragment = gql` + fragment AssetConditionEvaluationRecordFragment on AssetConditionEvaluationRecord { + id + evaluationId + numRequested + assetKey { + path + } + runIds + timestamp + startTimestamp + endTimestamp + evaluation { + rootUniqueId + evaluationNodes { + ...UnpartitionedAssetConditionEvaluationNodeFragment + ...PartitionedAssetConditionEvaluationNodeFragment + ...SpecificPartitionAssetConditionEvaluationNodeFragment + } + } + } + ${UnpartitionedAssetConditionEvaluationNodeFragment} + ${PartitionedAssetConditionEvaluationNodeFragment} + ${SpecificPartitionAssetConditionEvaluationNodeFragment} +`; + export const GET_EVALUATIONS_QUERY = gql` query GetEvaluationsQuery($assetKey: AssetKeyInput!, $limit: Int!, $cursor: String) { assetNodeOrError(assetKey: $assetKey) { @@ -17,11 +108,11 @@ export const GET_EVALUATIONS_QUERY = gql` } } - autoMaterializeAssetEvaluationsOrError(assetKey: $assetKey, limit: $limit, cursor: $cursor) { - ... on AutoMaterializeAssetEvaluationRecords { + assetConditionEvaluationRecordsOrError(assetKey: $assetKey, limit: $limit, cursor: $cursor) { + ... on AssetConditionEvaluationRecords { records { id - ...AutoMaterializeEvaluationRecordItem + ...AssetConditionEvaluationRecordFragment } } ... on AutoMaterializeAssetEvaluationNeedsMigrationError { @@ -29,58 +120,30 @@ export const GET_EVALUATIONS_QUERY = gql` } } } + ${AssetConditionEvaluationRecordFragment} +`; - fragment AutoMaterializeEvaluationRecordItem on AutoMaterializeAssetEvaluationRecord { - id - evaluationId - numRequested - numSkipped - numDiscarded - timestamp - runIds - rulesWithRuleEvaluations { - ...RuleWithEvaluationsFragment - } - rules { - description - decisionType - className - } - } - - fragment RuleWithEvaluationsFragment on AutoMaterializeRuleWithRuleEvaluations { - rule { - description - decisionType - className - } - ruleEvaluations { - evaluationData { - ... on TextRuleEvaluationData { - text - } - ... on ParentMaterializedRuleEvaluationData { - updatedAssetKeys { - path - } - willUpdateAssetKeys { - path - } - } - ... on WaitingOnKeysRuleEvaluationData { - waitingOnAssetKeys { - path - } - } - } - partitionKeysOrError { - ... on PartitionKeys { - partitionKeys - } - ... on Error { - message - } +export const GET_EVALUATIONS_SPECIFIC_PARTITION_QUERY = gql` + query GetEvaluationsSpecificPartitionQuery( + $assetKey: AssetKeyInput! + $evaluationId: Int! + $partition: String! + ) { + assetConditionEvaluationForPartition( + assetKey: $assetKey + evaluationId: $evaluationId + partition: $partition + ) { + rootUniqueId + evaluationNodes { + ...UnpartitionedAssetConditionEvaluationNodeFragment + ...PartitionedAssetConditionEvaluationNodeFragment + ...SpecificPartitionAssetConditionEvaluationNodeFragment } } } + + ${UnpartitionedAssetConditionEvaluationNodeFragment} + ${PartitionedAssetConditionEvaluationNodeFragment} + ${SpecificPartitionAssetConditionEvaluationNodeFragment} `; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PartitionSegmentWithPopover.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PartitionSegmentWithPopover.tsx index d2711e81bbe12..759156aa4d886 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PartitionSegmentWithPopover.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PartitionSegmentWithPopover.tsx @@ -1,7 +1,10 @@ import { Box, + Menu, + MenuItem, MiddleTruncate, Popover, + Tag, TextInput, TextInputContainer, colorAccentGray, @@ -16,10 +19,11 @@ import * as React from 'react'; import styled from 'styled-components'; import {assertUnreachable} from '../../app/Util'; +import {AssetConditionEvaluationStatus, AssetSubsetValue} from '../../graphql/types'; import {Container, Inner, Row} from '../../ui/VirtualizedTable'; +import {numberFormatter} from '../../ui/formatters'; import {PolicyEvaluationStatusTag} from './PolicyEvaluationStatusTag'; -import {AssetConditionEvaluationStatus, AssetSubset} from './types'; const statusToColors = (status: AssetConditionEvaluationStatus) => { switch (status) { @@ -34,45 +38,72 @@ const statusToColors = (status: AssetConditionEvaluationStatus) => { } }; +type AssetSusbsetWithoutTypenames = { + subsetValue: Omit; +}; + interface Props { description: string; status: AssetConditionEvaluationStatus; - subset: AssetSubset | null; - width: number; + subset: AssetSusbsetWithoutTypenames | null; + selectPartition: (partitionKey: string | null) => void; } +export const PartitionSegmentWithPopover = ({ + description, + selectPartition, + status, + subset, +}: Props) => { + const intent = React.useMemo(() => { + switch (status) { + case AssetConditionEvaluationStatus.FALSE: + return 'warning' as const; + case AssetConditionEvaluationStatus.SKIPPED: + return undefined; + case AssetConditionEvaluationStatus.TRUE: + return 'success' as const; + } + }, [status]); -export const PartitionSegmentWithPopover = ({description, width, status, subset}: Props) => { - const {color, hoverColor} = React.useMemo(() => statusToColors(status), [status]); - const segment = ; if (!subset) { - return segment; + return null; } + const count = subset.subsetValue.partitionKeys?.length || 0; + return ( - - } - > - {segment} - - + + } + > + + {numberFormatter.format(count)} {status.charAt(0) + status.toLowerCase().slice(1)} + + ); }; interface ListProps { description: string; status: AssetConditionEvaluationStatus; - subset: AssetSubset; + subset: AssetSusbsetWithoutTypenames; + selectPartition: (partitionKey: string | null) => void; } const ITEM_HEIGHT = 32; const MAX_ITEMS_BEFORE_TRUNCATION = 4; -const PartitionSubsetList = ({description, status, subset}: ListProps) => { +const PartitionSubsetList = ({description, status, subset, selectPartition}: ListProps) => { const container = React.useRef(null); const [searchValue, setSearchValue] = React.useState(''); @@ -103,6 +134,7 @@ const PartitionSubsetList = ({description, status, subset}: ListProps) => { padding={{vertical: 8, left: 12, right: 8}} border="bottom" flex={{direction: 'row', alignItems: 'center', justifyContent: 'space-between'}} + style={{display: 'grid', gridTemplateColumns: 'minmax(0, 1fr) auto', gap: 8}} > @@ -121,30 +153,35 @@ const PartitionSubsetList = ({description, status, subset}: ListProps) => { ) : null}
MAX_ITEMS_BEFORE_TRUNCATION ? '150px' : count * ITEM_HEIGHT, + height: count > MAX_ITEMS_BEFORE_TRUNCATION ? '150px' : count * ITEM_HEIGHT + 16, overflow: 'hidden', }} > - - {virtualItems.map(({index, key, size, start}) => { - const partitionKey = filteredKeys[index]!; - return ( - - - -
- -
-
-
- ); - })} -
+ + + {virtualItems.map(({index, key, size, start}) => { + const partitionKey = filteredKeys[index]!; + return ( + + { + selectPartition(partitionKey); + }} + text={ + + +
+ +
+
+ } + /> +
+ ); + })} +
+
diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PolicyEvaluationStatusTag.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PolicyEvaluationStatusTag.tsx index 81a69c23cc865..4e577d2108864 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PolicyEvaluationStatusTag.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PolicyEvaluationStatusTag.tsx @@ -2,8 +2,7 @@ import {Tag} from '@dagster-io/ui-components'; import * as React from 'react'; import {assertUnreachable} from '../../app/Util'; - -import {AssetConditionEvaluationStatus} from './types'; +import {AssetConditionEvaluationStatus} from '../../graphql/types'; export const PolicyEvaluationStatusTag = ({status}: {status: AssetConditionEvaluationStatus}) => { switch (status) { diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PolicyEvaluationTable.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PolicyEvaluationTable.tsx index 145b3e762eebb..bee1f101564b6 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PolicyEvaluationTable.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PolicyEvaluationTable.tsx @@ -1,5 +1,8 @@ +import {gql, useQuery} from '@apollo/client'; import { Box, + Button, + Dialog, Table, colorBackgroundDefault, colorBackgroundDefaultHover, @@ -9,25 +12,43 @@ import { import * as React from 'react'; import styled, {css} from 'styled-components'; +<<<<<<< Updated upstream import {assertUnreachable} from '../../app/Util'; +======= +import {AssetConditionEvaluationStatus} from '../../graphql/types'; +import {MetadataEntryFragment} from '../../metadata/types/MetadataEntry.types'; +>>>>>>> Stashed changes import {TimeElapsed} from '../../runs/TimeElapsed'; +import {AssetEventMetadataEntriesTable} from '../AssetEventMetadataEntriesTable'; +import {AssetViewDefinitionNodeFragment} from '../types/AssetView.types'; import {PartitionSegmentWithPopover} from './PartitionSegmentWithPopover'; import {PolicyEvaluationCondition} from './PolicyEvaluationCondition'; import {PolicyEvaluationStatusTag} from './PolicyEvaluationStatusTag'; -import {flattenEvaluations} from './flattenEvaluations'; +import {FlattenedConditionEvaluation, flattenEvaluations} from './flattenEvaluations'; import { +<<<<<<< Updated upstream AssetConditionEvaluation, AssetConditionEvaluationStatus, PartitionedAssetConditionEvaluation, SpecificPartitionAssetConditionEvaluation, UnpartitionedAssetConditionEvaluation, } from './types'; +======= + AssetConditionEvaluationRecordFragment, + PartitionedAssetConditionEvaluationNodeFragment, + SpecificPartitionAssetConditionEvaluationNodeFragment, + UnpartitionedAssetConditionEvaluationNodeFragment, +} from './types/GetEvaluationsQuery.types'; +>>>>>>> Stashed changes -interface Props { - rootEvaluation: T; +interface Props { + evaluationRecord: Pick; + definition?: AssetViewDefinitionNodeFragment | null; + selectPartition: (partitionKey: string | null) => void; } +<<<<<<< Updated upstream export const PolicyEvaluationTable = ({ rootEvaluation, }: Props) => { @@ -40,29 +61,79 @@ export const PolicyEvaluationTable = ({ default: return assertUnreachable(rootEvaluation); } +======= +export const PolicyEvaluationTable = ({evaluationRecord, definition, selectPartition}: Props) => { + const flattened = React.useMemo(() => flattenEvaluations(evaluationRecord), [evaluationRecord]); + if (flattened[0]?.evaluation.__typename === 'PartitionedAssetConditionEvaluationNode') { + return ( + [] + } + definition={definition} + selectPartition={selectPartition} + /> + ); + } + + return ( + [] + | FlattenedConditionEvaluation[] + } + /> + ); +>>>>>>> Stashed changes }; const UnpartitionedPolicyEvaluationTable = ({ - rootEvaluation, + flattenedRecords, }: { +<<<<<<< Updated upstream rootEvaluation: UnpartitionedAssetConditionEvaluation | SpecificPartitionAssetConditionEvaluation; }) => { const [hoveredKey, setHoveredKey] = React.useState(null); const flattened = React.useMemo(() => flattenEvaluations(rootEvaluation), [rootEvaluation]); const showDuration = rootEvaluation.__typename === 'UnpartitionedAssetConditionEvaluation'; +======= + flattenedRecords: + | FlattenedConditionEvaluation[] + | FlattenedConditionEvaluation[]; +}) => { + const [hoveredKey, setHoveredKey] = React.useState(null); + const isSpecificPartitionAssetConditionEvaluations = + flattenedRecords[0]?.evaluation.__typename === 'SpecificPartitionAssetConditionEvaluationNode'; + +>>>>>>> Stashed changes return ( Condition Result +<<<<<<< Updated upstream {showDuration ? Duration : null} +======= + {isSpecificPartitionAssetConditionEvaluations ? null : Duration} +>>>>>>> Stashed changes Details +<<<<<<< Updated upstream {flattened.map(({evaluation, id, parentId, depth, type}) => { const {description, status} = evaluation; +======= + {flattenedRecords.map(({evaluation, id, parentId, depth, type}) => { + const {description, status} = evaluation; + let endTimestamp, startTimestamp; + if ('endTimestamp' in evaluation) { + endTimestamp = evaluation.endTimestamp; + startTimestamp = evaluation.startTimestamp; + } +>>>>>>> Stashed changes return ( +<<<<<<< Updated upstream {showDuration ? ( {evaluation.__typename === 'UnpartitionedAssetConditionEvaluation' ? ( @@ -95,6 +167,16 @@ const UnpartitionedPolicyEvaluationTable = ({ ) : null} +======= + {startTimestamp && endTimestamp ? ( + + + + ) : null} + + {evaluation.metadataEntries ? : null} + +>>>>>>> Stashed changes ); })} @@ -103,15 +185,40 @@ const UnpartitionedPolicyEvaluationTable = ({ ); }; +const ViewDetailsButton = ({ + evaluation, +}: { + evaluation: {metadataEntries: MetadataEntryFragment[]; timestamp: string}; +}) => { + const [showDetails, setShowDetails] = React.useState(false); + return ( + <> + + + + + + ); +}; + const FULL_SEGMENTS_WIDTH = 200; const PartitionedPolicyEvaluationTable = ({ - rootEvaluation, + flattenedRecords, + selectPartition, }: { - rootEvaluation: PartitionedAssetConditionEvaluation; + flattenedRecords: FlattenedConditionEvaluation[]; + definition?: AssetViewDefinitionNodeFragment | null; + selectPartition: (partitionKey: string | null) => void; }) => { const [hoveredKey, setHoveredKey] = React.useState(null); - const flattened = React.useMemo(() => flattenEvaluations(rootEvaluation), [rootEvaluation]); + return ( @@ -122,7 +229,7 @@ const PartitionedPolicyEvaluationTable = ({ - {flattened.map(({evaluation, id, parentId, depth, type}) => { + {flattenedRecords.map(({evaluation, id, parentId, depth, type}) => { const { description, endTimestamp, @@ -134,7 +241,6 @@ const PartitionedPolicyEvaluationTable = ({ falseSubset, candidateSubset, } = evaluation; - const total = numTrue + numFalse + numSkipped; return ( ) : null} {numFalse > 0 ? ( @@ -171,7 +277,7 @@ const PartitionedPolicyEvaluationTable = ({ status={AssetConditionEvaluationStatus.FALSE} description={description} subset={falseSubset} - width={Math.ceil((numFalse / total) * FULL_SEGMENTS_WIDTH)} + selectPartition={selectPartition} /> ) : null} {numSkipped > 0 ? ( @@ -179,13 +285,13 @@ const PartitionedPolicyEvaluationTable = ({ status={AssetConditionEvaluationStatus.SKIPPED} description={description} subset={candidateSubset} - width={Math.ceil((numSkipped / total) * FULL_SEGMENTS_WIDTH)} + selectPartition={selectPartition} /> ) : null}
- + ); diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/RuleEvaluationOutcomes.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/RuleEvaluationOutcomes.tsx deleted file mode 100644 index feb048d9230ca..0000000000000 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/RuleEvaluationOutcomes.tsx +++ /dev/null @@ -1,236 +0,0 @@ -import { - Box, - Icon, - Tag, - colorTextDefault, - colorTextLight, - colorTextLighter, -} from '@dagster-io/ui-components'; -import groupBy from 'lodash/groupBy'; -import * as React from 'react'; - -import {assertUnreachable} from '../../app/Util'; -import { - AutoMaterializeDecisionType, - AutoMaterializeRule, - AutoMaterializeRuleEvaluation, -} from '../../graphql/types'; - -import {AutomaterializeRequestedPartitionsLink} from './AutomaterializeRequestedPartitionsLink'; -import {CollapsibleSection} from './CollapsibleSection'; -import {ParentUpdatedLink} from './ParentUpdatedLink'; -import {ParentUpdatedPartitionLink} from './ParentUpdatedPartitionLink'; -import {WaitingOnAssetKeysLink} from './WaitingOnAssetKeysLink'; -import {WaitingOnAssetKeysPartitionLink} from './WaitingOnAssetKeysPartitionLink'; -import {RuleWithEvaluationsFragment} from './types/GetEvaluationsQuery.types'; - -interface RuleEvaluationOutcomeProps { - text: string; - met: boolean; - rightElement?: React.ReactNode; -} - -const RuleEvaluationOutcome = ({text, met, rightElement}: RuleEvaluationOutcomeProps) => { - return ( - - - -
- {text.slice(0, 1).toUpperCase()} - {text.slice(1)} -
-
- {rightElement} -
- ); -}; - -const SECTIONS: { - decisionType: AutoMaterializeDecisionType; - header: string; - details: string; - intent?: React.ComponentProps['intent']; - partitionedOnly?: boolean; -}[] = [ - { - decisionType: AutoMaterializeDecisionType.MATERIALIZE, - header: 'Materialization conditions met', - details: - 'These conditions trigger a materialization, unless they are blocked by a skip or discard condition.', - }, - { - decisionType: AutoMaterializeDecisionType.SKIP, - header: 'Skip conditions met', - details: 'Skips will materialize in a future evaluation, once the skip condition is resolved.', - }, - { - decisionType: AutoMaterializeDecisionType.DISCARD, - header: 'Discard conditions met', - details: - 'Discarded partitions will not be materialized unless new materialization conditions occur. You may want to run a manual backfill to respond to the materialize conditions.', - intent: 'danger', - partitionedOnly: true, - }, -]; - -interface RuleEvaluationOutcomesProps { - rules: AutoMaterializeRule[]; - ruleEvaluations: RuleWithEvaluationsFragment[]; - assetHasDefinedPartitions: boolean; -} - -export const RuleEvaluationOutcomes = ({ - rules, - ruleEvaluations, - assetHasDefinedPartitions, -}: RuleEvaluationOutcomesProps) => { - const groupedRules = groupBy(rules, (rule) => rule.decisionType); - - return ( - <> - {SECTIONS.filter( - (section) => - groupedRules[section.decisionType] && - (assetHasDefinedPartitions || !section.partitionedOnly), - ).map((section) => ( - - - {(groupedRules[section.decisionType] || []).map(({description}, idx) => { - const evaluations = - ruleEvaluations.find((e) => e.rule?.description === description)?.ruleEvaluations || - []; - return ( - 0} - rightElement={ - assetHasDefinedPartitions ? ( - - ) : ( - - ) - } - /> - ); - })} - - - ))} - - ); -}; - -const RightElementForEvaluations = ({ - evaluations, -}: { - evaluations: AutoMaterializeRuleEvaluation[]; - intent?: React.ComponentProps['intent']; -}) => { - const first = evaluations.map((e) => e.evaluationData!).find(Boolean); - if (!first) { - return
; - } - switch (first.__typename) { - case 'ParentMaterializedRuleEvaluationData': - return ( - - ); - case 'WaitingOnKeysRuleEvaluationData': - return ; - case 'TextRuleEvaluationData': - return {first.text}; - default: - assertUnreachable(first); - } - - return ; -}; - -const partitionKeysOf = (e: AutoMaterializeRuleEvaluation) => - e.partitionKeysOrError?.__typename === 'PartitionKeys' - ? e.partitionKeysOrError.partitionKeys - : []; - -const RightElementForPartitionedEvaluations = ({ - evaluations, - intent, -}: { - evaluations: AutoMaterializeRuleEvaluation[]; - intent?: React.ComponentProps['intent']; -}) => { - const evaluationsWithData = evaluations.filter((e) => !!e.evaluationData); - const first = evaluationsWithData[0]?.evaluationData; - if (!first) { - const partitionKeys = evaluations.flatMap(partitionKeysOf); - return partitionKeys.length ? ( - - ) : ( -
- ); - } - - const typename = first.__typename; - switch (typename) { - case 'ParentMaterializedRuleEvaluationData': - const updatedAssetKeys = Object.fromEntries( - evaluationsWithData.flatMap((e) => - partitionKeysOf(e).map((key) => [ - key, - (e.evaluationData?.__typename === 'ParentMaterializedRuleEvaluationData' && - e.evaluationData.updatedAssetKeys) || - [], - ]), - ), - ); - const willUpdateAssetKeys = Object.fromEntries( - evaluationsWithData.flatMap((e) => - partitionKeysOf(e).map((key) => [ - key, - (e.evaluationData?.__typename === 'ParentMaterializedRuleEvaluationData' && - e.evaluationData.willUpdateAssetKeys) || - [], - ]), - ), - ); - - return ( - - ); - case 'WaitingOnKeysRuleEvaluationData': - const assetKeysByPartition = Object.fromEntries( - evaluationsWithData.flatMap((e) => - partitionKeysOf(e).map((key) => [ - key, - (e.evaluationData?.__typename === 'WaitingOnKeysRuleEvaluationData' && - e.evaluationData.waitingOnAssetKeys) || - [], - ]), - ), - ); - return ; - case 'TextRuleEvaluationData': - return {first.text}; - default: - assertUnreachable(typename); - } -}; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__fixtures__/AutoMaterializePolicyPage.fixtures.ts b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__fixtures__/AutoMaterializePolicyPage.fixtures.ts index 1a8f82acd7a11..ea2080e1a6b64 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__fixtures__/AutoMaterializePolicyPage.fixtures.ts +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__fixtures__/AutoMaterializePolicyPage.fixtures.ts @@ -3,26 +3,19 @@ import {DocumentNode} from 'graphql'; import { AutoMaterializeDecisionType, - AutoMaterializePolicyType, + buildAssetConditionEvaluationRecords, buildAssetKey, buildAssetNode, - buildAutoMaterializeAssetEvaluationNeedsMigrationError, buildAutoMaterializeAssetEvaluationRecord, buildAutoMaterializeAssetEvaluationRecords, buildAutoMaterializePolicy, buildAutoMaterializeRule, buildAutoMaterializeRuleEvaluation, buildAutoMaterializeRuleWithRuleEvaluations, - buildFreshnessPolicy, buildParentMaterializedRuleEvaluationData, buildPartitionKeys, } from '../../../graphql/types'; -import {GET_POLICY_INFO_QUERY} from '../AutomaterializeRightPanel'; import {GET_EVALUATIONS_QUERY} from '../GetEvaluationsQuery'; -import { - GetPolicyInfoQuery, - GetPolicyInfoQueryVariables, -} from '../types/AutomaterializeRightPanel.types'; import { GetEvaluationsQuery, GetEvaluationsQueryVariables, @@ -69,20 +62,6 @@ export const buildGetEvaluationsQuery = ({ }); }; -export const buildGetPolicyInfoQuery = ({ - variables, - data, -}: { - variables: GetPolicyInfoQueryVariables; - data: Omit; -}): MockedResponse => { - return buildQueryMock({ - query: GET_POLICY_INFO_QUERY, - variables, - data, - }); -}; - const ONE_MINUTE = 1000 * 60; export const TEST_EVALUATION_ID = 27; @@ -285,9 +264,7 @@ export const Evaluations = { }), currentAutoMaterializeEvaluationId: 1000, }), - autoMaterializeAssetEvaluationsOrError: buildAutoMaterializeAssetEvaluationRecords({ - records: [], - }), + assetConditionEvaluationRecordsOrError: buildAssetConditionEvaluationRecords(), }, }); }, @@ -304,10 +281,7 @@ export const Evaluations = { rules: BASE_AUTOMATERIALIZE_RULES, }), }), - autoMaterializeAssetEvaluationsOrError: - buildAutoMaterializeAssetEvaluationNeedsMigrationError({ - message: 'Test message', - }), + assetConditionEvaluationRecordsOrError: buildAssetConditionEvaluationRecords(), }, }); }, @@ -324,9 +298,7 @@ export const Evaluations = { rules: [...BASE_AUTOMATERIALIZE_RULES, DISCARD_RULE], }), }), - autoMaterializeAssetEvaluationsOrError: buildAutoMaterializeAssetEvaluationRecords({ - records: assetKeyPath ? [SINGLE_MATERIALIZE_RECORD_NO_PARTITIONS] : [], - }), + assetConditionEvaluationRecordsOrError: buildAssetConditionEvaluationRecords(), }, }); }, @@ -343,9 +315,7 @@ export const Evaluations = { rules: [...BASE_AUTOMATERIALIZE_RULES, DISCARD_RULE], }), }), - autoMaterializeAssetEvaluationsOrError: buildAutoMaterializeAssetEvaluationRecords({ - records: assetKeyPath ? [SINGLE_MATERIALIZE_RECORD_WITH_PARTITIONS] : [], - }), + assetConditionEvaluationRecordsOrError: buildAssetConditionEvaluationRecords(), }, }); }, @@ -362,74 +332,7 @@ export const Evaluations = { rules: [...BASE_AUTOMATERIALIZE_RULES, DISCARD_RULE], }), }), - autoMaterializeAssetEvaluationsOrError: buildAutoMaterializeAssetEvaluationRecords({ - records: buildEvaluationRecordsWithPartitions(), - }), - }, - }); - }, -}; - -export const Policies = { - YesAutomaterializeNoFreshnessPolicy: ( - assetKeyPath: string[], - policyType: AutoMaterializePolicyType = AutoMaterializePolicyType.EAGER, - ) => { - return buildGetPolicyInfoQuery({ - variables: { - assetKey: {path: assetKeyPath}, - }, - data: { - assetNodeOrError: buildAssetNode({ - freshnessPolicy: null, - autoMaterializePolicy: buildAutoMaterializePolicy({ - policyType, - }), - }), - }, - }); - }, - YesAutomaterializeYesFreshnessPolicy: ( - assetKeyPath: string[], - policyType: AutoMaterializePolicyType = AutoMaterializePolicyType.EAGER, - ) => { - return buildGetPolicyInfoQuery({ - variables: { - assetKey: {path: assetKeyPath}, - }, - data: { - assetNodeOrError: buildAssetNode({ - freshnessPolicy: buildFreshnessPolicy({}), - autoMaterializePolicy: buildAutoMaterializePolicy({ - policyType, - }), - }), - }, - }); - }, - NoAutomaterializeYesFreshnessPolicy: (assetKeyPath: string[]) => { - return buildGetPolicyInfoQuery({ - variables: { - assetKey: {path: assetKeyPath}, - }, - data: { - assetNodeOrError: buildAssetNode({ - freshnessPolicy: buildFreshnessPolicy(), - autoMaterializePolicy: null, - }), - }, - }); - }, - NoAutomaterializeNoFreshnessPolicy: (assetKeyPath: string[]) => { - return buildGetPolicyInfoQuery({ - variables: { - assetKey: {path: assetKeyPath}, - }, - data: { - assetNodeOrError: buildAssetNode({ - freshnessPolicy: null, - autoMaterializePolicy: null, - }), + assetConditionEvaluationRecordsOrError: buildAssetConditionEvaluationRecords(), }, }); }, diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__stories__/AutomaterializeMiddlePanel.stories.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__stories__/AutomaterializeMiddlePanel.stories.tsx index 806e531338f96..af5176416c574 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__stories__/AutomaterializeMiddlePanel.stories.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__stories__/AutomaterializeMiddlePanel.stories.tsx @@ -1,7 +1,7 @@ import {MockedProvider} from '@apollo/client/testing'; import * as React from 'react'; -import {RunStatus} from '../../../graphql/types'; +import {RunStatus, buildAssetNode, buildPartitionDefinition} from '../../../graphql/types'; import { AutomaterializeMiddlePanel, AutomaterializeMiddlePanelWithData, @@ -23,11 +23,7 @@ export const Empty = () => { mocks={[Evaluations.Single(path), buildRunStatusOnlyQuery('abcdef12', RunStatus.STARTED)]} >
- +
); @@ -42,11 +38,7 @@ export const WithoutPartitions = () => { ]} >
- +
); @@ -63,7 +55,7 @@ export const WithPartitions = () => {
diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__stories__/PartitionSegmentWithPopover.stories.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__stories__/PartitionSegmentWithPopover.stories.tsx index eda22c37b78b3..030cb2e663ed6 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__stories__/PartitionSegmentWithPopover.stories.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__stories__/PartitionSegmentWithPopover.stories.tsx @@ -2,8 +2,8 @@ import {Box} from '@dagster-io/ui-components'; import faker from 'faker'; import * as React from 'react'; +import {AssetConditionEvaluationStatus} from '../../../graphql/types'; import {PartitionSegmentWithPopover} from '../PartitionSegmentWithPopover'; -import {AssetConditionEvaluationStatus, AssetSubset} from '../types'; // eslint-disable-next-line import/no-default-export export default { @@ -14,7 +14,7 @@ export default { const PARTITION_COUNT = 300; export const TruePartitions = () => { - const subset: AssetSubset = React.useMemo(() => { + const subset = React.useMemo(() => { const partitionKeys = new Array(PARTITION_COUNT) .fill(null) .map(() => faker.random.words(2).toLowerCase().replace(/ /g, '-')); @@ -42,7 +42,7 @@ export const TruePartitions = () => { }; export const FalsePartitions = () => { - const subset: AssetSubset = React.useMemo(() => { + const subset = React.useMemo(() => { const partitionKeys = new Array(PARTITION_COUNT) .fill(null) .map(() => faker.random.words(2).toLowerCase().replace(/ /g, '-')); @@ -70,7 +70,7 @@ export const FalsePartitions = () => { }; export const SkippedPartitions = () => { - const subset: AssetSubset = React.useMemo(() => { + const subset = React.useMemo(() => { const partitionKeys = new Array(PARTITION_COUNT) .fill(null) .map(() => faker.random.words(2).toLowerCase().replace(/ /g, '-')); @@ -98,7 +98,7 @@ export const SkippedPartitions = () => { }; export const FewPartitions = () => { - const subset: AssetSubset = React.useMemo(() => { + const subset = React.useMemo(() => { const partitionKeys = new Array(2) .fill(null) .map(() => faker.random.words(2).toLowerCase().replace(/ /g, '-')); diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__stories__/PolicyEvaluationTable.stories.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__stories__/PolicyEvaluationTable.stories.tsx index f861c0a67f2be..8e135fc697afe 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__stories__/PolicyEvaluationTable.stories.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/__stories__/PolicyEvaluationTable.stories.tsx @@ -1,12 +1,18 @@ import * as React from 'react'; -import {PolicyEvaluationTable} from '../PolicyEvaluationTable'; import { AssetConditionEvaluationStatus, +<<<<<<< Updated upstream PartitionedAssetConditionEvaluation, SpecificPartitionAssetConditionEvaluation, UnpartitionedAssetConditionEvaluation, } from '../types'; +======= + buildAssetConditionEvaluationRecord, + buildUnpartitionedAssetConditionEvaluationNode, +} from '../../../graphql/types'; +import {PolicyEvaluationTable} from '../PolicyEvaluationTable'; +>>>>>>> Stashed changes // eslint-disable-next-line import/no-default-export export default { @@ -15,14 +21,13 @@ export default { }; export const NonPartitioned = () => { - const evaluation: UnpartitionedAssetConditionEvaluation = { - __typename: 'UnpartitionedAssetConditionEvaluation' as const, + const evaluation = buildAssetConditionEvaluationRecord({ description: 'All are true:', startTimestamp: 1, endTimestamp: 200, status: AssetConditionEvaluationStatus.TRUE, childEvaluations: [ - { + buildUnpartition{ __typename: 'UnpartitionedAssetConditionEvaluation' as const, description: 'Any are true:', startTimestamp: 1, @@ -84,7 +89,7 @@ export const NonPartitioned = () => { childEvaluations: null, }, ], - }; + }); return ; }; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/flattenEvaluations.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/flattenEvaluations.tsx index ce90826910bf1..9ab21c9408b2e 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/flattenEvaluations.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/flattenEvaluations.tsx @@ -1,7 +1,12 @@ import {ConditionType} from './PolicyEvaluationCondition'; -import {AssetConditionEvaluation} from './types'; +import { + AssetConditionEvaluationRecordFragment, + PartitionedAssetConditionEvaluationNodeFragment, + SpecificPartitionAssetConditionEvaluationNodeFragment, + UnpartitionedAssetConditionEvaluationNodeFragment, +} from './types/GetEvaluationsQuery.types'; -type FlattenedConditionEvaluation = { +export type FlattenedConditionEvaluation = { evaluation: T; id: number; parentId: number | null; @@ -9,15 +14,31 @@ type FlattenedConditionEvaluation = { type: ConditionType; }; -export const flattenEvaluations = (rootEvaluation: T) => { - const all: FlattenedConditionEvaluation[] = []; +type Evaluation = + | PartitionedAssetConditionEvaluationNodeFragment + | UnpartitionedAssetConditionEvaluationNodeFragment + | SpecificPartitionAssetConditionEvaluationNodeFragment; + +type FlattenedEvaluation = + | FlattenedConditionEvaluation + | FlattenedConditionEvaluation + | FlattenedConditionEvaluation; + +export const flattenEvaluations = ( + evaluationRecord: Pick, +) => { + const all: FlattenedEvaluation[] = []; let counter = 0; - const append = (evaluation: T, parentId: number | null, depth: number) => { + const recordsById = Object.fromEntries( + evaluationRecord.evaluation.evaluationNodes.map((node) => [node.uniqueId, node]), + ); + + const append = (evaluation: Evaluation, parentId: number | null, depth: number) => { const id = counter + 1; const type = - evaluation.childEvaluations && evaluation.childEvaluations.length > 0 ? 'group' : 'leaf'; + evaluation.childUniqueIds && evaluation.childUniqueIds.length > 0 ? 'group' : 'leaf'; all.push({ evaluation, @@ -25,18 +46,19 @@ export const flattenEvaluations = (rootEvalu parentId: parentId === null ? counter : parentId, depth, type, - } as FlattenedConditionEvaluation); + } as FlattenedEvaluation); counter = id; - if (evaluation.childEvaluations) { + if (evaluation.childUniqueIds) { const parentCounter = counter; - evaluation.childEvaluations.forEach((child) => { - append(child as T, parentCounter, depth + 1); + evaluation.childUniqueIds.forEach((childId) => { + const child = recordsById[childId]!; + append(child, parentCounter, depth + 1); }); } }; - append(rootEvaluation, null, 0); + append(recordsById[evaluationRecord.evaluation.rootUniqueId]!, null, 0); return all; }; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRequestedPartitionsLink.types.ts b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRequestedPartitionsLink.types.ts deleted file mode 100644 index ee441dd774a59..0000000000000 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRequestedPartitionsLink.types.ts +++ /dev/null @@ -1,39 +0,0 @@ -// Generated GraphQL types, do not edit manually. - -import * as Types from '../../../graphql/types'; - -export type RunStatusAndPartitionKeyQueryVariables = Types.Exact<{ - filter?: Types.InputMaybe; -}>; - -export type RunStatusAndPartitionKeyQuery = { - __typename: 'Query'; - runsOrError: - | {__typename: 'InvalidPipelineRunsFilterError'; message: string} - | { - __typename: 'PythonError'; - message: string; - stack: Array; - errorChain: Array<{ - __typename: 'ErrorChainLink'; - isExplicitLink: boolean; - error: {__typename: 'PythonError'; message: string; stack: Array}; - }>; - } - | { - __typename: 'Runs'; - results: Array<{ - __typename: 'Run'; - id: string; - status: Types.RunStatus; - tags: Array<{__typename: 'PipelineTag'; key: string; value: string}>; - }>; - }; -}; - -export type RunStatusAndTagsFragment = { - __typename: 'Run'; - id: string; - status: Types.RunStatus; - tags: Array<{__typename: 'PipelineTag'; key: string; value: string}>; -}; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRightPanel.types.ts b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRightPanel.types.ts deleted file mode 100644 index 18d58a6b68c98..0000000000000 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRightPanel.types.ts +++ /dev/null @@ -1,33 +0,0 @@ -// Generated GraphQL types, do not edit manually. - -import * as Types from '../../../graphql/types'; - -export type GetPolicyInfoQueryVariables = Types.Exact<{ - assetKey: Types.AssetKeyInput; -}>; - -export type GetPolicyInfoQuery = { - __typename: 'Query'; - assetNodeOrError: - | { - __typename: 'AssetNode'; - id: string; - freshnessPolicy: { - __typename: 'FreshnessPolicy'; - maximumLagMinutes: number; - cronSchedule: string | null; - cronScheduleTimezone: string | null; - } | null; - autoMaterializePolicy: { - __typename: 'AutoMaterializePolicy'; - policyType: Types.AutoMaterializePolicyType; - maxMaterializationsPerMinute: number | null; - rules: Array<{ - __typename: 'AutoMaterializeRule'; - description: string; - decisionType: Types.AutoMaterializeDecisionType; - }>; - } | null; - } - | {__typename: 'AssetNotFoundError'}; -}; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRunTag.types.ts b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRunTag.types.ts deleted file mode 100644 index 8f37485fe08ad..0000000000000 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/AutomaterializeRunTag.types.ts +++ /dev/null @@ -1,15 +0,0 @@ -// Generated GraphQL types, do not edit manually. - -import * as Types from '../../../graphql/types'; - -export type RunStatusOnlyQueryVariables = Types.Exact<{ - runId: Types.Scalars['ID']; -}>; - -export type RunStatusOnlyQuery = { - __typename: 'Query'; - runOrError: - | {__typename: 'PythonError'} - | {__typename: 'Run'; id: string; status: Types.RunStatus} - | {__typename: 'RunNotFoundError'}; -}; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/GetEvaluationsQuery.types.ts b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/GetEvaluationsQuery.types.ts index 5041a1fd939ed..2f911a966d488 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/GetEvaluationsQuery.types.ts +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/types/GetEvaluationsQuery.types.ts @@ -2,6 +2,653 @@ import * as Types from '../../../graphql/types'; +export type AssetSubsetFragment = { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{__typename: 'PartitionKeyRange'; start: string; end: string}> | null; + }; +}; + +export type SpecificPartitionAssetConditionEvaluationNodeFragment = { + __typename: 'SpecificPartitionAssetConditionEvaluationNode'; + description: string; + status: Types.AssetConditionEvaluationStatus; + uniqueId: string; + childUniqueIds: Array; + metadataEntries: Array< + | { + __typename: 'AssetMetadataEntry'; + label: string; + description: string | null; + assetKey: {__typename: 'AssetKey'; path: Array}; + } + | { + __typename: 'BoolMetadataEntry'; + boolValue: boolean | null; + label: string; + description: string | null; + } + | { + __typename: 'FloatMetadataEntry'; + floatValue: number | null; + label: string; + description: string | null; + } + | { + __typename: 'IntMetadataEntry'; + intValue: number | null; + intRepr: string; + label: string; + description: string | null; + } + | { + __typename: 'JobMetadataEntry'; + jobName: string; + repositoryName: string | null; + locationName: string; + label: string; + description: string | null; + } + | { + __typename: 'JsonMetadataEntry'; + jsonString: string; + label: string; + description: string | null; + } + | { + __typename: 'MarkdownMetadataEntry'; + mdStr: string; + label: string; + description: string | null; + } + | {__typename: 'NotebookMetadataEntry'; path: string; label: string; description: string | null} + | {__typename: 'NullMetadataEntry'; label: string; description: string | null} + | {__typename: 'PathMetadataEntry'; path: string; label: string; description: string | null} + | { + __typename: 'PipelineRunMetadataEntry'; + runId: string; + label: string; + description: string | null; + } + | { + __typename: 'PythonArtifactMetadataEntry'; + module: string; + name: string; + label: string; + description: string | null; + } + | { + __typename: 'TableMetadataEntry'; + label: string; + description: string | null; + table: { + __typename: 'Table'; + records: Array; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: {__typename: 'TableConstraints'; other: Array} | null; + }; + }; + } + | { + __typename: 'TableSchemaMetadataEntry'; + label: string; + description: string | null; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: {__typename: 'TableConstraints'; other: Array} | null; + }; + } + | {__typename: 'TextMetadataEntry'; text: string; label: string; description: string | null} + | {__typename: 'UrlMetadataEntry'; url: string; label: string; description: string | null} + >; +}; + +export type UnpartitionedAssetConditionEvaluationNodeFragment = { + __typename: 'UnpartitionedAssetConditionEvaluationNode'; + description: string; + startTimestamp: number | null; + endTimestamp: number | null; + status: Types.AssetConditionEvaluationStatus; + uniqueId: string; + childUniqueIds: Array; + metadataEntries: Array< + | { + __typename: 'AssetMetadataEntry'; + label: string; + description: string | null; + assetKey: {__typename: 'AssetKey'; path: Array}; + } + | { + __typename: 'BoolMetadataEntry'; + boolValue: boolean | null; + label: string; + description: string | null; + } + | { + __typename: 'FloatMetadataEntry'; + floatValue: number | null; + label: string; + description: string | null; + } + | { + __typename: 'IntMetadataEntry'; + intValue: number | null; + intRepr: string; + label: string; + description: string | null; + } + | { + __typename: 'JobMetadataEntry'; + jobName: string; + repositoryName: string | null; + locationName: string; + label: string; + description: string | null; + } + | { + __typename: 'JsonMetadataEntry'; + jsonString: string; + label: string; + description: string | null; + } + | { + __typename: 'MarkdownMetadataEntry'; + mdStr: string; + label: string; + description: string | null; + } + | {__typename: 'NotebookMetadataEntry'; path: string; label: string; description: string | null} + | {__typename: 'NullMetadataEntry'; label: string; description: string | null} + | {__typename: 'PathMetadataEntry'; path: string; label: string; description: string | null} + | { + __typename: 'PipelineRunMetadataEntry'; + runId: string; + label: string; + description: string | null; + } + | { + __typename: 'PythonArtifactMetadataEntry'; + module: string; + name: string; + label: string; + description: string | null; + } + | { + __typename: 'TableMetadataEntry'; + label: string; + description: string | null; + table: { + __typename: 'Table'; + records: Array; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: {__typename: 'TableConstraints'; other: Array} | null; + }; + }; + } + | { + __typename: 'TableSchemaMetadataEntry'; + label: string; + description: string | null; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: {__typename: 'TableConstraints'; other: Array} | null; + }; + } + | {__typename: 'TextMetadataEntry'; text: string; label: string; description: string | null} + | {__typename: 'UrlMetadataEntry'; url: string; label: string; description: string | null} + >; +}; + +export type PartitionedAssetConditionEvaluationNodeFragment = { + __typename: 'PartitionedAssetConditionEvaluationNode'; + description: string; + startTimestamp: number | null; + endTimestamp: number | null; + numTrue: number; + numFalse: number; + numSkipped: number; + uniqueId: string; + childUniqueIds: Array; + trueSubset: { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{ + __typename: 'PartitionKeyRange'; + start: string; + end: string; + }> | null; + }; + }; + falseSubset: { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{ + __typename: 'PartitionKeyRange'; + start: string; + end: string; + }> | null; + }; + }; + candidateSubset: { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{ + __typename: 'PartitionKeyRange'; + start: string; + end: string; + }> | null; + }; + } | null; +}; + +export type AssetConditionEvaluationRecordFragment = { + __typename: 'AssetConditionEvaluationRecord'; + id: string; + evaluationId: number; + numRequested: number; + runIds: Array; + timestamp: number; + startTimestamp: number | null; + endTimestamp: number | null; + assetKey: {__typename: 'AssetKey'; path: Array}; + evaluation: { + __typename: 'AssetConditionEvaluation'; + rootUniqueId: string; + evaluationNodes: Array< + | { + __typename: 'PartitionedAssetConditionEvaluationNode'; + description: string; + startTimestamp: number | null; + endTimestamp: number | null; + numTrue: number; + numFalse: number; + numSkipped: number; + uniqueId: string; + childUniqueIds: Array; + trueSubset: { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{ + __typename: 'PartitionKeyRange'; + start: string; + end: string; + }> | null; + }; + }; + falseSubset: { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{ + __typename: 'PartitionKeyRange'; + start: string; + end: string; + }> | null; + }; + }; + candidateSubset: { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{ + __typename: 'PartitionKeyRange'; + start: string; + end: string; + }> | null; + }; + } | null; + } + | { + __typename: 'SpecificPartitionAssetConditionEvaluationNode'; + description: string; + status: Types.AssetConditionEvaluationStatus; + uniqueId: string; + childUniqueIds: Array; + metadataEntries: Array< + | { + __typename: 'AssetMetadataEntry'; + label: string; + description: string | null; + assetKey: {__typename: 'AssetKey'; path: Array}; + } + | { + __typename: 'BoolMetadataEntry'; + boolValue: boolean | null; + label: string; + description: string | null; + } + | { + __typename: 'FloatMetadataEntry'; + floatValue: number | null; + label: string; + description: string | null; + } + | { + __typename: 'IntMetadataEntry'; + intValue: number | null; + intRepr: string; + label: string; + description: string | null; + } + | { + __typename: 'JobMetadataEntry'; + jobName: string; + repositoryName: string | null; + locationName: string; + label: string; + description: string | null; + } + | { + __typename: 'JsonMetadataEntry'; + jsonString: string; + label: string; + description: string | null; + } + | { + __typename: 'MarkdownMetadataEntry'; + mdStr: string; + label: string; + description: string | null; + } + | { + __typename: 'NotebookMetadataEntry'; + path: string; + label: string; + description: string | null; + } + | {__typename: 'NullMetadataEntry'; label: string; description: string | null} + | { + __typename: 'PathMetadataEntry'; + path: string; + label: string; + description: string | null; + } + | { + __typename: 'PipelineRunMetadataEntry'; + runId: string; + label: string; + description: string | null; + } + | { + __typename: 'PythonArtifactMetadataEntry'; + module: string; + name: string; + label: string; + description: string | null; + } + | { + __typename: 'TableMetadataEntry'; + label: string; + description: string | null; + table: { + __typename: 'Table'; + records: Array; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: {__typename: 'TableConstraints'; other: Array} | null; + }; + }; + } + | { + __typename: 'TableSchemaMetadataEntry'; + label: string; + description: string | null; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: {__typename: 'TableConstraints'; other: Array} | null; + }; + } + | { + __typename: 'TextMetadataEntry'; + text: string; + label: string; + description: string | null; + } + | { + __typename: 'UrlMetadataEntry'; + url: string; + label: string; + description: string | null; + } + >; + } + | { + __typename: 'UnpartitionedAssetConditionEvaluationNode'; + description: string; + startTimestamp: number | null; + endTimestamp: number | null; + status: Types.AssetConditionEvaluationStatus; + uniqueId: string; + childUniqueIds: Array; + metadataEntries: Array< + | { + __typename: 'AssetMetadataEntry'; + label: string; + description: string | null; + assetKey: {__typename: 'AssetKey'; path: Array}; + } + | { + __typename: 'BoolMetadataEntry'; + boolValue: boolean | null; + label: string; + description: string | null; + } + | { + __typename: 'FloatMetadataEntry'; + floatValue: number | null; + label: string; + description: string | null; + } + | { + __typename: 'IntMetadataEntry'; + intValue: number | null; + intRepr: string; + label: string; + description: string | null; + } + | { + __typename: 'JobMetadataEntry'; + jobName: string; + repositoryName: string | null; + locationName: string; + label: string; + description: string | null; + } + | { + __typename: 'JsonMetadataEntry'; + jsonString: string; + label: string; + description: string | null; + } + | { + __typename: 'MarkdownMetadataEntry'; + mdStr: string; + label: string; + description: string | null; + } + | { + __typename: 'NotebookMetadataEntry'; + path: string; + label: string; + description: string | null; + } + | {__typename: 'NullMetadataEntry'; label: string; description: string | null} + | { + __typename: 'PathMetadataEntry'; + path: string; + label: string; + description: string | null; + } + | { + __typename: 'PipelineRunMetadataEntry'; + runId: string; + label: string; + description: string | null; + } + | { + __typename: 'PythonArtifactMetadataEntry'; + module: string; + name: string; + label: string; + description: string | null; + } + | { + __typename: 'TableMetadataEntry'; + label: string; + description: string | null; + table: { + __typename: 'Table'; + records: Array; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: {__typename: 'TableConstraints'; other: Array} | null; + }; + }; + } + | { + __typename: 'TableSchemaMetadataEntry'; + label: string; + description: string | null; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: {__typename: 'TableConstraints'; other: Array} | null; + }; + } + | { + __typename: 'TextMetadataEntry'; + text: string; + label: string; + description: string | null; + } + | { + __typename: 'UrlMetadataEntry'; + url: string; + label: string; + description: string | null; + } + >; + } + >; + }; +}; + export type GetEvaluationsQueryVariables = Types.Exact<{ assetKey: Types.AssetKeyInput; limit: Types.Scalars['Int']; @@ -26,131 +673,711 @@ export type GetEvaluationsQuery = { } | null; } | {__typename: 'AssetNotFoundError'}; - autoMaterializeAssetEvaluationsOrError: - | {__typename: 'AutoMaterializeAssetEvaluationNeedsMigrationError'; message: string} + assetConditionEvaluationRecordsOrError: | { - __typename: 'AutoMaterializeAssetEvaluationRecords'; + __typename: 'AssetConditionEvaluationRecords'; records: Array<{ - __typename: 'AutoMaterializeAssetEvaluationRecord'; + __typename: 'AssetConditionEvaluationRecord'; id: string; evaluationId: number; numRequested: number; - numSkipped: number; - numDiscarded: number; - timestamp: number; runIds: Array; - rulesWithRuleEvaluations: Array<{ - __typename: 'AutoMaterializeRuleWithRuleEvaluations'; - rule: { - __typename: 'AutoMaterializeRule'; - description: string; - decisionType: Types.AutoMaterializeDecisionType; - className: string; - }; - ruleEvaluations: Array<{ - __typename: 'AutoMaterializeRuleEvaluation'; - evaluationData: - | { - __typename: 'ParentMaterializedRuleEvaluationData'; - updatedAssetKeys: Array<{__typename: 'AssetKey'; path: Array}> | null; - willUpdateAssetKeys: Array<{ - __typename: 'AssetKey'; - path: Array; - }> | null; - } - | {__typename: 'TextRuleEvaluationData'; text: string | null} - | { - __typename: 'WaitingOnKeysRuleEvaluationData'; - waitingOnAssetKeys: Array<{__typename: 'AssetKey'; path: Array}> | null; - } - | null; - partitionKeysOrError: - | {__typename: 'PartitionKeys'; partitionKeys: Array} - | {__typename: 'PartitionSubsetDeserializationError'; message: string} - | null; - }>; - }>; - rules: Array<{ - __typename: 'AutoMaterializeRule'; - description: string; - decisionType: Types.AutoMaterializeDecisionType; - className: string; - }> | null; + timestamp: number; + startTimestamp: number | null; + endTimestamp: number | null; + assetKey: {__typename: 'AssetKey'; path: Array}; + evaluation: { + __typename: 'AssetConditionEvaluation'; + rootUniqueId: string; + evaluationNodes: Array< + | { + __typename: 'PartitionedAssetConditionEvaluationNode'; + description: string; + startTimestamp: number | null; + endTimestamp: number | null; + numTrue: number; + numFalse: number; + numSkipped: number; + uniqueId: string; + childUniqueIds: Array; + trueSubset: { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{ + __typename: 'PartitionKeyRange'; + start: string; + end: string; + }> | null; + }; + }; + falseSubset: { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{ + __typename: 'PartitionKeyRange'; + start: string; + end: string; + }> | null; + }; + }; + candidateSubset: { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{ + __typename: 'PartitionKeyRange'; + start: string; + end: string; + }> | null; + }; + } | null; + } + | { + __typename: 'SpecificPartitionAssetConditionEvaluationNode'; + description: string; + status: Types.AssetConditionEvaluationStatus; + uniqueId: string; + childUniqueIds: Array; + metadataEntries: Array< + | { + __typename: 'AssetMetadataEntry'; + label: string; + description: string | null; + assetKey: {__typename: 'AssetKey'; path: Array}; + } + | { + __typename: 'BoolMetadataEntry'; + boolValue: boolean | null; + label: string; + description: string | null; + } + | { + __typename: 'FloatMetadataEntry'; + floatValue: number | null; + label: string; + description: string | null; + } + | { + __typename: 'IntMetadataEntry'; + intValue: number | null; + intRepr: string; + label: string; + description: string | null; + } + | { + __typename: 'JobMetadataEntry'; + jobName: string; + repositoryName: string | null; + locationName: string; + label: string; + description: string | null; + } + | { + __typename: 'JsonMetadataEntry'; + jsonString: string; + label: string; + description: string | null; + } + | { + __typename: 'MarkdownMetadataEntry'; + mdStr: string; + label: string; + description: string | null; + } + | { + __typename: 'NotebookMetadataEntry'; + path: string; + label: string; + description: string | null; + } + | {__typename: 'NullMetadataEntry'; label: string; description: string | null} + | { + __typename: 'PathMetadataEntry'; + path: string; + label: string; + description: string | null; + } + | { + __typename: 'PipelineRunMetadataEntry'; + runId: string; + label: string; + description: string | null; + } + | { + __typename: 'PythonArtifactMetadataEntry'; + module: string; + name: string; + label: string; + description: string | null; + } + | { + __typename: 'TableMetadataEntry'; + label: string; + description: string | null; + table: { + __typename: 'Table'; + records: Array; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: { + __typename: 'TableConstraints'; + other: Array; + } | null; + }; + }; + } + | { + __typename: 'TableSchemaMetadataEntry'; + label: string; + description: string | null; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: { + __typename: 'TableConstraints'; + other: Array; + } | null; + }; + } + | { + __typename: 'TextMetadataEntry'; + text: string; + label: string; + description: string | null; + } + | { + __typename: 'UrlMetadataEntry'; + url: string; + label: string; + description: string | null; + } + >; + } + | { + __typename: 'UnpartitionedAssetConditionEvaluationNode'; + description: string; + startTimestamp: number | null; + endTimestamp: number | null; + status: Types.AssetConditionEvaluationStatus; + uniqueId: string; + childUniqueIds: Array; + metadataEntries: Array< + | { + __typename: 'AssetMetadataEntry'; + label: string; + description: string | null; + assetKey: {__typename: 'AssetKey'; path: Array}; + } + | { + __typename: 'BoolMetadataEntry'; + boolValue: boolean | null; + label: string; + description: string | null; + } + | { + __typename: 'FloatMetadataEntry'; + floatValue: number | null; + label: string; + description: string | null; + } + | { + __typename: 'IntMetadataEntry'; + intValue: number | null; + intRepr: string; + label: string; + description: string | null; + } + | { + __typename: 'JobMetadataEntry'; + jobName: string; + repositoryName: string | null; + locationName: string; + label: string; + description: string | null; + } + | { + __typename: 'JsonMetadataEntry'; + jsonString: string; + label: string; + description: string | null; + } + | { + __typename: 'MarkdownMetadataEntry'; + mdStr: string; + label: string; + description: string | null; + } + | { + __typename: 'NotebookMetadataEntry'; + path: string; + label: string; + description: string | null; + } + | {__typename: 'NullMetadataEntry'; label: string; description: string | null} + | { + __typename: 'PathMetadataEntry'; + path: string; + label: string; + description: string | null; + } + | { + __typename: 'PipelineRunMetadataEntry'; + runId: string; + label: string; + description: string | null; + } + | { + __typename: 'PythonArtifactMetadataEntry'; + module: string; + name: string; + label: string; + description: string | null; + } + | { + __typename: 'TableMetadataEntry'; + label: string; + description: string | null; + table: { + __typename: 'Table'; + records: Array; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: { + __typename: 'TableConstraints'; + other: Array; + } | null; + }; + }; + } + | { + __typename: 'TableSchemaMetadataEntry'; + label: string; + description: string | null; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: { + __typename: 'TableConstraints'; + other: Array; + } | null; + }; + } + | { + __typename: 'TextMetadataEntry'; + text: string; + label: string; + description: string | null; + } + | { + __typename: 'UrlMetadataEntry'; + url: string; + label: string; + description: string | null; + } + >; + } + >; + }; }>; } + | {__typename: 'AutoMaterializeAssetEvaluationNeedsMigrationError'; message: string} | null; }; -export type AutoMaterializeEvaluationRecordItemFragment = { - __typename: 'AutoMaterializeAssetEvaluationRecord'; - id: string; - evaluationId: number; - numRequested: number; - numSkipped: number; - numDiscarded: number; - timestamp: number; - runIds: Array; - rulesWithRuleEvaluations: Array<{ - __typename: 'AutoMaterializeRuleWithRuleEvaluations'; - rule: { - __typename: 'AutoMaterializeRule'; - description: string; - decisionType: Types.AutoMaterializeDecisionType; - className: string; - }; - ruleEvaluations: Array<{ - __typename: 'AutoMaterializeRuleEvaluation'; - evaluationData: - | { - __typename: 'ParentMaterializedRuleEvaluationData'; - updatedAssetKeys: Array<{__typename: 'AssetKey'; path: Array}> | null; - willUpdateAssetKeys: Array<{__typename: 'AssetKey'; path: Array}> | null; - } - | {__typename: 'TextRuleEvaluationData'; text: string | null} - | { - __typename: 'WaitingOnKeysRuleEvaluationData'; - waitingOnAssetKeys: Array<{__typename: 'AssetKey'; path: Array}> | null; - } - | null; - partitionKeysOrError: - | {__typename: 'PartitionKeys'; partitionKeys: Array} - | {__typename: 'PartitionSubsetDeserializationError'; message: string} - | null; - }>; - }>; - rules: Array<{ - __typename: 'AutoMaterializeRule'; - description: string; - decisionType: Types.AutoMaterializeDecisionType; - className: string; - }> | null; -}; +export type GetEvaluationsSpecificPartitionQueryVariables = Types.Exact<{ + assetKey: Types.AssetKeyInput; + evaluationId: Types.Scalars['Int']; + partition: Types.Scalars['String']; +}>; -export type RuleWithEvaluationsFragment = { - __typename: 'AutoMaterializeRuleWithRuleEvaluations'; - rule: { - __typename: 'AutoMaterializeRule'; - description: string; - decisionType: Types.AutoMaterializeDecisionType; - className: string; - }; - ruleEvaluations: Array<{ - __typename: 'AutoMaterializeRuleEvaluation'; - evaluationData: +export type GetEvaluationsSpecificPartitionQuery = { + __typename: 'Query'; + assetConditionEvaluationForPartition: { + __typename: 'AssetConditionEvaluation'; + rootUniqueId: string; + evaluationNodes: Array< + | { + __typename: 'PartitionedAssetConditionEvaluationNode'; + description: string; + startTimestamp: number | null; + endTimestamp: number | null; + numTrue: number; + numFalse: number; + numSkipped: number; + uniqueId: string; + childUniqueIds: Array; + trueSubset: { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{ + __typename: 'PartitionKeyRange'; + start: string; + end: string; + }> | null; + }; + }; + falseSubset: { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{ + __typename: 'PartitionKeyRange'; + start: string; + end: string; + }> | null; + }; + }; + candidateSubset: { + __typename: 'AssetSubset'; + subsetValue: { + __typename: 'AssetSubsetValue'; + isPartitioned: boolean; + partitionKeys: Array | null; + partitionKeyRanges: Array<{ + __typename: 'PartitionKeyRange'; + start: string; + end: string; + }> | null; + }; + } | null; + } | { - __typename: 'ParentMaterializedRuleEvaluationData'; - updatedAssetKeys: Array<{__typename: 'AssetKey'; path: Array}> | null; - willUpdateAssetKeys: Array<{__typename: 'AssetKey'; path: Array}> | null; + __typename: 'SpecificPartitionAssetConditionEvaluationNode'; + description: string; + status: Types.AssetConditionEvaluationStatus; + uniqueId: string; + childUniqueIds: Array; + metadataEntries: Array< + | { + __typename: 'AssetMetadataEntry'; + label: string; + description: string | null; + assetKey: {__typename: 'AssetKey'; path: Array}; + } + | { + __typename: 'BoolMetadataEntry'; + boolValue: boolean | null; + label: string; + description: string | null; + } + | { + __typename: 'FloatMetadataEntry'; + floatValue: number | null; + label: string; + description: string | null; + } + | { + __typename: 'IntMetadataEntry'; + intValue: number | null; + intRepr: string; + label: string; + description: string | null; + } + | { + __typename: 'JobMetadataEntry'; + jobName: string; + repositoryName: string | null; + locationName: string; + label: string; + description: string | null; + } + | { + __typename: 'JsonMetadataEntry'; + jsonString: string; + label: string; + description: string | null; + } + | { + __typename: 'MarkdownMetadataEntry'; + mdStr: string; + label: string; + description: string | null; + } + | { + __typename: 'NotebookMetadataEntry'; + path: string; + label: string; + description: string | null; + } + | {__typename: 'NullMetadataEntry'; label: string; description: string | null} + | { + __typename: 'PathMetadataEntry'; + path: string; + label: string; + description: string | null; + } + | { + __typename: 'PipelineRunMetadataEntry'; + runId: string; + label: string; + description: string | null; + } + | { + __typename: 'PythonArtifactMetadataEntry'; + module: string; + name: string; + label: string; + description: string | null; + } + | { + __typename: 'TableMetadataEntry'; + label: string; + description: string | null; + table: { + __typename: 'Table'; + records: Array; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: {__typename: 'TableConstraints'; other: Array} | null; + }; + }; + } + | { + __typename: 'TableSchemaMetadataEntry'; + label: string; + description: string | null; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: {__typename: 'TableConstraints'; other: Array} | null; + }; + } + | { + __typename: 'TextMetadataEntry'; + text: string; + label: string; + description: string | null; + } + | { + __typename: 'UrlMetadataEntry'; + url: string; + label: string; + description: string | null; + } + >; } - | {__typename: 'TextRuleEvaluationData'; text: string | null} | { - __typename: 'WaitingOnKeysRuleEvaluationData'; - waitingOnAssetKeys: Array<{__typename: 'AssetKey'; path: Array}> | null; + __typename: 'UnpartitionedAssetConditionEvaluationNode'; + description: string; + startTimestamp: number | null; + endTimestamp: number | null; + status: Types.AssetConditionEvaluationStatus; + uniqueId: string; + childUniqueIds: Array; + metadataEntries: Array< + | { + __typename: 'AssetMetadataEntry'; + label: string; + description: string | null; + assetKey: {__typename: 'AssetKey'; path: Array}; + } + | { + __typename: 'BoolMetadataEntry'; + boolValue: boolean | null; + label: string; + description: string | null; + } + | { + __typename: 'FloatMetadataEntry'; + floatValue: number | null; + label: string; + description: string | null; + } + | { + __typename: 'IntMetadataEntry'; + intValue: number | null; + intRepr: string; + label: string; + description: string | null; + } + | { + __typename: 'JobMetadataEntry'; + jobName: string; + repositoryName: string | null; + locationName: string; + label: string; + description: string | null; + } + | { + __typename: 'JsonMetadataEntry'; + jsonString: string; + label: string; + description: string | null; + } + | { + __typename: 'MarkdownMetadataEntry'; + mdStr: string; + label: string; + description: string | null; + } + | { + __typename: 'NotebookMetadataEntry'; + path: string; + label: string; + description: string | null; + } + | {__typename: 'NullMetadataEntry'; label: string; description: string | null} + | { + __typename: 'PathMetadataEntry'; + path: string; + label: string; + description: string | null; + } + | { + __typename: 'PipelineRunMetadataEntry'; + runId: string; + label: string; + description: string | null; + } + | { + __typename: 'PythonArtifactMetadataEntry'; + module: string; + name: string; + label: string; + description: string | null; + } + | { + __typename: 'TableMetadataEntry'; + label: string; + description: string | null; + table: { + __typename: 'Table'; + records: Array; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: {__typename: 'TableConstraints'; other: Array} | null; + }; + }; + } + | { + __typename: 'TableSchemaMetadataEntry'; + label: string; + description: string | null; + schema: { + __typename: 'TableSchema'; + columns: Array<{ + __typename: 'TableColumn'; + name: string; + description: string | null; + type: string; + constraints: { + __typename: 'TableColumnConstraints'; + nullable: boolean; + unique: boolean; + other: Array; + }; + }>; + constraints: {__typename: 'TableConstraints'; other: Array} | null; + }; + } + | { + __typename: 'TextMetadataEntry'; + text: string; + label: string; + description: string | null; + } + | { + __typename: 'UrlMetadataEntry'; + url: string; + label: string; + description: string | null; + } + >; } - | null; - partitionKeysOrError: - | {__typename: 'PartitionKeys'; partitionKeys: Array} - | {__typename: 'PartitionSubsetDeserializationError'; message: string} - | null; - }>; + >; + } | null; }; diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/useEvaluationsQueryResult.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/useEvaluationsQueryResult.tsx index 5ea54aab18cf2..764d405005e42 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/useEvaluationsQueryResult.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/useEvaluationsQueryResult.tsx @@ -11,10 +11,10 @@ export function useEvaluationsQueryResult({assetKey}: {assetKey: AssetKey}) { return useCursorPaginatedQuery({ nextCursorForResult: (data) => { if ( - data.autoMaterializeAssetEvaluationsOrError?.__typename === - 'AutoMaterializeAssetEvaluationRecords' + data.assetConditionEvaluationRecordsOrError?.__typename === + 'AssetConditionEvaluationRecords' ) { - return data.autoMaterializeAssetEvaluationsOrError.records[ + return data.assetConditionEvaluationRecordsOrError.records[ PAGE_SIZE - 1 ]?.evaluationId.toString(); } @@ -22,10 +22,10 @@ export function useEvaluationsQueryResult({assetKey}: {assetKey: AssetKey}) { }, getResultArray: (data) => { if ( - data?.autoMaterializeAssetEvaluationsOrError?.__typename === - 'AutoMaterializeAssetEvaluationRecords' + data?.assetConditionEvaluationRecordsOrError?.__typename === + 'AssetConditionEvaluationRecords' ) { - return data.autoMaterializeAssetEvaluationsOrError.records; + return data.assetConditionEvaluationRecordsOrError.records; } return []; }, diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutomaterializeTagWithEvaluation.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutomaterializeTagWithEvaluation.tsx index 8ff8f45587123..e2e23f6d64c2b 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutomaterializeTagWithEvaluation.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutomaterializeTagWithEvaluation.tsx @@ -32,7 +32,7 @@ export const AutomaterializeTagWithEvaluation = ({assetKeys, evaluationId}: Prop > {sortedKeys.map((assetKey) => { const url = assetDetailsPathForKey(assetKey, { - view: 'auto-materialize-history', + view: 'automation', evaluation: evaluationId, }); return ( diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/__tests__/buildAssetTabs.test.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/__tests__/buildAssetTabs.test.tsx index f852496b7e1ed..e15d39a0c701e 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/__tests__/buildAssetTabs.test.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/__tests__/buildAssetTabs.test.tsx @@ -313,7 +313,7 @@ describe('buildAssetTabs', () => { 'plots', 'definition', 'lineage', - 'auto-materialize-history', + 'automation', ]); }); @@ -332,13 +332,7 @@ describe('buildAssetTabs', () => { params, }); const tabKeys = tabList.map(({id}) => id); - expect(tabKeys).toEqual([ - 'events', - 'plots', - 'definition', - 'lineage', - 'auto-materialize-history', - ]); + expect(tabKeys).toEqual(['events', 'plots', 'definition', 'lineage', 'automation']); }); it('hides partitions and auto-materialize tabs if no partitions or auto-materializing', () => { diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/auto-materialization/AutomaterializationTickDetailDialog.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/auto-materialization/AutomaterializationTickDetailDialog.tsx index b38bff5666b56..03f38d910d82a 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/auto-materialization/AutomaterializationTickDetailDialog.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/auto-materialization/AutomaterializationTickDetailDialog.tsx @@ -242,7 +242,7 @@ const AssetDetailRow = ({ diff --git a/js_modules/dagster-ui/packages/ui-core/src/graphql/possibleTypes.generated.json b/js_modules/dagster-ui/packages/ui-core/src/graphql/possibleTypes.generated.json index 4d529a469d3a4..a332d15404186 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/graphql/possibleTypes.generated.json +++ b/js_modules/dagster-ui/packages/ui-core/src/graphql/possibleTypes.generated.json @@ -1 +1,699 @@ -{"DisplayableEvent":["EngineEvent","ExecutionStepOutputEvent","ExpectationResult","FailureMetadata","HandledOutputEvent","LoadedInputEvent","ObjectStoreOperationResult","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","TypeCheck"],"MarkerEvent":["EngineEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent"],"ErrorEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepUpForRetryEvent","HookErroredEvent","RunFailureEvent","ResourceInitFailureEvent"],"MessageEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepRestartEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","HandledOutputEvent","HookCompletedEvent","HookErroredEvent","HookSkippedEvent","LoadedInputEvent","LogMessageEvent","ObjectStoreOperationEvent","RunCanceledEvent","RunCancelingEvent","RunDequeuedEvent","RunEnqueuedEvent","RunFailureEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","RunStartEvent","RunStartingEvent","RunSuccessEvent","StepExpectationResultEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","AssetMaterializationPlannedEvent","LogsCapturedEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"RunEvent":["RunCanceledEvent","RunCancelingEvent","RunDequeuedEvent","RunEnqueuedEvent","RunFailureEvent","RunStartEvent","RunStartingEvent","RunSuccessEvent","AssetMaterializationPlannedEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent"],"PipelineRunStepStats":["RunStepStats"],"StepEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepRestartEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","HandledOutputEvent","HookCompletedEvent","HookErroredEvent","HookSkippedEvent","LoadedInputEvent","ObjectStoreOperationEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepExpectationResultEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"AssetPartitionStatuses":["DefaultPartitionStatuses","MultiPartitionStatuses","TimePartitionStatuses"],"PartitionStatus1D":["TimePartitionStatuses","DefaultPartitionStatuses"],"AssetChecksOrError":["AssetChecks","AssetCheckNeedsMigrationError","AssetCheckNeedsUserCodeUpgrade","AssetCheckNeedsAgentUpgradeError"],"Instigator":["Schedule","Sensor"],"EvaluationStackEntry":["EvaluationStackListItemEntry","EvaluationStackPathEntry","EvaluationStackMapKeyEntry","EvaluationStackMapValueEntry"],"IPipelineSnapshot":["Pipeline","PipelineSnapshot","Job"],"PipelineConfigValidationError":["FieldNotDefinedConfigError","FieldsNotDefinedConfigError","MissingFieldConfigError","MissingFieldsConfigError","RuntimeMismatchConfigError","SelectorTypeConfigError"],"PipelineConfigValidationInvalid":["RunConfigValidationInvalid"],"PipelineConfigValidationResult":["InvalidSubsetError","PipelineConfigValidationValid","RunConfigValidationInvalid","PipelineNotFoundError","PythonError"],"PipelineReference":["PipelineSnapshot","UnknownPipeline"],"PipelineRun":["Run"],"DagsterRunEvent":["ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","ExecutionStepRestartEvent","LogMessageEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","RunFailureEvent","RunStartEvent","RunEnqueuedEvent","RunDequeuedEvent","RunStartingEvent","RunCancelingEvent","RunCanceledEvent","RunSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","HandledOutputEvent","LoadedInputEvent","LogsCapturedEvent","ObjectStoreOperationEvent","StepExpectationResultEvent","MaterializationEvent","ObservationEvent","EngineEvent","HookCompletedEvent","HookSkippedEvent","HookErroredEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent","AssetMaterializationPlannedEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"PipelineRunLogsSubscriptionPayload":["PipelineRunLogsSubscriptionSuccess","PipelineRunLogsSubscriptionFailure"],"RunOrError":["Run","RunNotFoundError","PythonError"],"PipelineRunStatsSnapshot":["RunStatsSnapshot"],"RunStatsSnapshotOrError":["RunStatsSnapshot","PythonError"],"PipelineSnapshotOrError":["PipelineNotFoundError","PipelineSnapshot","PipelineSnapshotNotFoundError","PythonError"],"AssetOrError":["Asset","AssetNotFoundError"],"AssetsOrError":["AssetConnection","PythonError"],"DeletePipelineRunResult":["DeletePipelineRunSuccess","UnauthorizedError","PythonError","RunNotFoundError"],"ExecutionPlanOrError":["ExecutionPlan","RunConfigValidationInvalid","PipelineNotFoundError","InvalidSubsetError","PythonError"],"PipelineOrError":["Pipeline","PipelineNotFoundError","InvalidSubsetError","PythonError"],"ReloadRepositoryLocationMutationResult":["WorkspaceLocationEntry","ReloadNotSupported","RepositoryLocationNotFound","UnauthorizedError","PythonError"],"RepositoryLocationOrLoadError":["RepositoryLocation","PythonError"],"ReloadWorkspaceMutationResult":["Workspace","UnauthorizedError","PythonError"],"ShutdownRepositoryLocationMutationResult":["ShutdownRepositoryLocationSuccess","RepositoryLocationNotFound","UnauthorizedError","PythonError"],"TerminatePipelineExecutionFailure":["TerminateRunFailure"],"TerminatePipelineExecutionSuccess":["TerminateRunSuccess"],"TerminateRunResult":["TerminateRunSuccess","TerminateRunFailure","RunNotFoundError","UnauthorizedError","PythonError"],"ScheduleMutationResult":["PythonError","UnauthorizedError","ScheduleStateResult"],"ScheduleOrError":["Schedule","ScheduleNotFoundError","PythonError"],"SchedulerOrError":["Scheduler","SchedulerNotDefinedError","PythonError"],"SchedulesOrError":["Schedules","RepositoryNotFoundError","PythonError"],"ScheduleTickSpecificData":["ScheduleTickSuccessData","ScheduleTickFailureData"],"LaunchBackfillResult":["LaunchBackfillSuccess","PartitionSetNotFoundError","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"ConfigTypeOrError":["EnumConfigType","CompositeConfigType","RegularConfigType","PipelineNotFoundError","ConfigTypeNotFoundError","PythonError"],"ConfigType":["ArrayConfigType","CompositeConfigType","EnumConfigType","NullableConfigType","RegularConfigType","ScalarUnionConfigType","MapConfigType"],"WrappingConfigType":["ArrayConfigType","NullableConfigType"],"DagsterType":["ListDagsterType","NullableDagsterType","RegularDagsterType"],"DagsterTypeOrError":["RegularDagsterType","PipelineNotFoundError","DagsterTypeNotFoundError","PythonError"],"WrappingDagsterType":["ListDagsterType","NullableDagsterType"],"Error":["AssetCheckNeedsMigrationError","AssetCheckNeedsUserCodeUpgrade","AssetCheckNeedsAgentUpgradeError","AssetNotFoundError","ConflictingExecutionParamsError","ConfigTypeNotFoundError","DagsterTypeNotFoundError","InvalidPipelineRunsFilterError","InvalidSubsetError","ModeNotFoundError","NoModeProvidedError","PartitionSetNotFoundError","PipelineNotFoundError","RunConflict","PipelineSnapshotNotFoundError","PresetNotFoundError","PythonError","ErrorChainLink","UnauthorizedError","ReloadNotSupported","RepositoryLocationNotFound","RepositoryNotFoundError","ResourceNotFoundError","RunGroupNotFoundError","RunNotFoundError","ScheduleNotFoundError","SchedulerNotDefinedError","SensorNotFoundError","DuplicateDynamicPartitionError","InstigationStateNotFoundError","SolidStepStatusUnavailableError","GraphNotFoundError","BackfillNotFoundError","PartitionSubsetDeserializationError","AutoMaterializeAssetEvaluationNeedsMigrationError"],"PipelineRunConflict":["RunConflict"],"PipelineRunNotFoundError":["RunNotFoundError"],"RepositoriesOrError":["RepositoryConnection","RepositoryNotFoundError","PythonError"],"RepositoryOrError":["PythonError","Repository","RepositoryNotFoundError"],"InstigationTypeSpecificData":["SensorData","ScheduleData"],"InstigationStateOrError":["InstigationState","InstigationStateNotFoundError","PythonError"],"InstigationStatesOrError":["InstigationStates","PythonError"],"MetadataEntry":["TableSchemaMetadataEntry","TableMetadataEntry","FloatMetadataEntry","IntMetadataEntry","JsonMetadataEntry","BoolMetadataEntry","MarkdownMetadataEntry","PathMetadataEntry","NotebookMetadataEntry","PythonArtifactMetadataEntry","TextMetadataEntry","UrlMetadataEntry","PipelineRunMetadataEntry","AssetMetadataEntry","JobMetadataEntry","NullMetadataEntry"],"PartitionRunConfigOrError":["PartitionRunConfig","PythonError"],"AssetBackfillStatus":["AssetPartitionsStatusCounts","UnpartitionedAssetStatus"],"PartitionSetOrError":["PartitionSet","PartitionSetNotFoundError","PythonError"],"PartitionSetsOrError":["PartitionSets","PipelineNotFoundError","PythonError"],"PartitionsOrError":["Partitions","PythonError"],"PartitionStatusesOrError":["PartitionStatuses","PythonError"],"PartitionTagsOrError":["PartitionTags","PythonError"],"RunConfigSchemaOrError":["RunConfigSchema","PipelineNotFoundError","InvalidSubsetError","ModeNotFoundError","PythonError"],"LaunchRunResult":["LaunchRunSuccess","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"LaunchRunReexecutionResult":["LaunchRunSuccess","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"LaunchPipelineRunSuccess":["LaunchRunSuccess"],"RunsOrError":["Runs","InvalidPipelineRunsFilterError","PythonError"],"PipelineRuns":["Runs"],"RunGroupOrError":["RunGroup","RunGroupNotFoundError","PythonError"],"SensorOrError":["Sensor","SensorNotFoundError","UnauthorizedError","PythonError"],"SensorsOrError":["Sensors","RepositoryNotFoundError","PythonError"],"StopSensorMutationResultOrError":["StopSensorMutationResult","UnauthorizedError","PythonError"],"ISolidDefinition":["CompositeSolidDefinition","SolidDefinition"],"SolidContainer":["Pipeline","PipelineSnapshot","Job","CompositeSolidDefinition","Graph"],"SolidStepStatsOrError":["SolidStepStatsConnection","SolidStepStatusUnavailableError"],"WorkspaceOrError":["Workspace","PythonError"],"WorkspaceLocationStatusEntriesOrError":["WorkspaceLocationStatusEntries","PythonError"],"GraphOrError":["Graph","GraphNotFoundError","PythonError"],"ResourceDetailsOrError":["ResourceDetails","ResourceNotFoundError","PythonError"],"ResourcesOrError":["ResourceDetailsList","RepositoryNotFoundError","PythonError"],"EnvVarWithConsumersOrError":["EnvVarWithConsumersList","PythonError"],"RunTagKeysOrError":["PythonError","RunTagKeys"],"RunTagsOrError":["PythonError","RunTags"],"RunIdsOrError":["RunIds","InvalidPipelineRunsFilterError","PythonError"],"AssetNodeOrError":["AssetNode","AssetNotFoundError"],"PartitionBackfillOrError":["PartitionBackfill","BackfillNotFoundError","PythonError"],"PartitionBackfillsOrError":["PartitionBackfills","PythonError"],"EventConnectionOrError":["EventConnection","RunNotFoundError","PythonError"],"AutoMaterializeAssetEvaluationRecordsOrError":["AutoMaterializeAssetEvaluationRecords","AutoMaterializeAssetEvaluationNeedsMigrationError"],"PartitionKeysOrError":["PartitionKeys","PartitionSubsetDeserializationError"],"AutoMaterializeRuleEvaluationData":["TextRuleEvaluationData","ParentMaterializedRuleEvaluationData","WaitingOnKeysRuleEvaluationData"],"AssetConditionEvaluationNode":["UnpartitionedAssetConditionEvaluationNode","PartitionedAssetConditionEvaluationNode","SpecificPartitionAssetConditionEvaluationNode"],"AssetConditionEvaluationRecordsOrError":["AssetConditionEvaluationRecords","AutoMaterializeAssetEvaluationNeedsMigrationError"],"SensorDryRunResult":["PythonError","SensorNotFoundError","DryRunInstigationTick"],"ScheduleDryRunResult":["DryRunInstigationTick","PythonError","ScheduleNotFoundError"],"TerminateRunsResultOrError":["TerminateRunsResult","PythonError"],"AssetWipeMutationResult":["AssetNotFoundError","UnauthorizedError","PythonError","AssetWipeSuccess"],"ReportRunlessAssetEventsResult":["UnauthorizedError","PythonError","ReportRunlessAssetEventsSuccess"],"ResumeBackfillResult":["ResumeBackfillSuccess","UnauthorizedError","PythonError"],"CancelBackfillResult":["CancelBackfillSuccess","UnauthorizedError","PythonError"],"LogTelemetryMutationResult":["LogTelemetrySuccess","PythonError"],"AddDynamicPartitionResult":["AddDynamicPartitionSuccess","UnauthorizedError","PythonError","DuplicateDynamicPartitionError"]} \ No newline at end of file +<<<<<<< HEAD +{"DisplayableEvent":["EngineEvent","ExecutionStepOutputEvent","ExpectationResult","FailureMetadata","HandledOutputEvent","LoadedInputEvent","ObjectStoreOperationResult","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","TypeCheck"],"MarkerEvent":["EngineEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent"],"ErrorEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepUpForRetryEvent","HookErroredEvent","RunFailureEvent","ResourceInitFailureEvent"],"MessageEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepRestartEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","HandledOutputEvent","HookCompletedEvent","HookErroredEvent","HookSkippedEvent","LoadedInputEvent","LogMessageEvent","ObjectStoreOperationEvent","RunCanceledEvent","RunCancelingEvent","RunDequeuedEvent","RunEnqueuedEvent","RunFailureEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","RunStartEvent","RunStartingEvent","RunSuccessEvent","StepExpectationResultEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","AssetMaterializationPlannedEvent","LogsCapturedEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"RunEvent":["RunCanceledEvent","RunCancelingEvent","RunDequeuedEvent","RunEnqueuedEvent","RunFailureEvent","RunStartEvent","RunStartingEvent","RunSuccessEvent","AssetMaterializationPlannedEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent"],"PipelineRunStepStats":["RunStepStats"],"StepEvent":["EngineEvent","ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepRestartEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","HandledOutputEvent","HookCompletedEvent","HookErroredEvent","HookSkippedEvent","LoadedInputEvent","ObjectStoreOperationEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","StepExpectationResultEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","MaterializationEvent","ObservationEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"AssetPartitionStatuses":["DefaultPartitionStatuses","MultiPartitionStatuses","TimePartitionStatuses"],"PartitionStatus1D":["TimePartitionStatuses","DefaultPartitionStatuses"],"AssetChecksOrError":["AssetChecks","AssetCheckNeedsMigrationError","AssetCheckNeedsUserCodeUpgrade","AssetCheckNeedsAgentUpgradeError"],"Instigator":["Schedule","Sensor"],"EvaluationStackEntry":["EvaluationStackListItemEntry","EvaluationStackPathEntry","EvaluationStackMapKeyEntry","EvaluationStackMapValueEntry"],"IPipelineSnapshot":["Pipeline","PipelineSnapshot","Job"],"PipelineConfigValidationError":["FieldNotDefinedConfigError","FieldsNotDefinedConfigError","MissingFieldConfigError","MissingFieldsConfigError","RuntimeMismatchConfigError","SelectorTypeConfigError"],"PipelineConfigValidationInvalid":["RunConfigValidationInvalid"],"PipelineConfigValidationResult":["InvalidSubsetError","PipelineConfigValidationValid","RunConfigValidationInvalid","PipelineNotFoundError","PythonError"],"PipelineReference":["PipelineSnapshot","UnknownPipeline"],"PipelineRun":["Run"],"DagsterRunEvent":["ExecutionStepFailureEvent","ExecutionStepInputEvent","ExecutionStepOutputEvent","ExecutionStepSkippedEvent","ExecutionStepStartEvent","ExecutionStepSuccessEvent","ExecutionStepUpForRetryEvent","ExecutionStepRestartEvent","LogMessageEvent","ResourceInitFailureEvent","ResourceInitStartedEvent","ResourceInitSuccessEvent","RunFailureEvent","RunStartEvent","RunEnqueuedEvent","RunDequeuedEvent","RunStartingEvent","RunCancelingEvent","RunCanceledEvent","RunSuccessEvent","StepWorkerStartedEvent","StepWorkerStartingEvent","HandledOutputEvent","LoadedInputEvent","LogsCapturedEvent","ObjectStoreOperationEvent","StepExpectationResultEvent","MaterializationEvent","ObservationEvent","EngineEvent","HookCompletedEvent","HookSkippedEvent","HookErroredEvent","AlertStartEvent","AlertSuccessEvent","AlertFailureEvent","AssetMaterializationPlannedEvent","AssetCheckEvaluationPlannedEvent","AssetCheckEvaluationEvent"],"PipelineRunLogsSubscriptionPayload":["PipelineRunLogsSubscriptionSuccess","PipelineRunLogsSubscriptionFailure"],"RunOrError":["Run","RunNotFoundError","PythonError"],"PipelineRunStatsSnapshot":["RunStatsSnapshot"],"RunStatsSnapshotOrError":["RunStatsSnapshot","PythonError"],"PipelineSnapshotOrError":["PipelineNotFoundError","PipelineSnapshot","PipelineSnapshotNotFoundError","PythonError"],"AssetOrError":["Asset","AssetNotFoundError"],"AssetsOrError":["AssetConnection","PythonError"],"DeletePipelineRunResult":["DeletePipelineRunSuccess","UnauthorizedError","PythonError","RunNotFoundError"],"ExecutionPlanOrError":["ExecutionPlan","RunConfigValidationInvalid","PipelineNotFoundError","InvalidSubsetError","PythonError"],"PipelineOrError":["Pipeline","PipelineNotFoundError","InvalidSubsetError","PythonError"],"ReloadRepositoryLocationMutationResult":["WorkspaceLocationEntry","ReloadNotSupported","RepositoryLocationNotFound","UnauthorizedError","PythonError"],"RepositoryLocationOrLoadError":["RepositoryLocation","PythonError"],"ReloadWorkspaceMutationResult":["Workspace","UnauthorizedError","PythonError"],"ShutdownRepositoryLocationMutationResult":["ShutdownRepositoryLocationSuccess","RepositoryLocationNotFound","UnauthorizedError","PythonError"],"TerminatePipelineExecutionFailure":["TerminateRunFailure"],"TerminatePipelineExecutionSuccess":["TerminateRunSuccess"],"TerminateRunResult":["TerminateRunSuccess","TerminateRunFailure","RunNotFoundError","UnauthorizedError","PythonError"],"ScheduleMutationResult":["PythonError","UnauthorizedError","ScheduleStateResult"],"ScheduleOrError":["Schedule","ScheduleNotFoundError","PythonError"],"SchedulerOrError":["Scheduler","SchedulerNotDefinedError","PythonError"],"SchedulesOrError":["Schedules","RepositoryNotFoundError","PythonError"],"ScheduleTickSpecificData":["ScheduleTickSuccessData","ScheduleTickFailureData"],"LaunchBackfillResult":["LaunchBackfillSuccess","PartitionSetNotFoundError","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"ConfigTypeOrError":["EnumConfigType","CompositeConfigType","RegularConfigType","PipelineNotFoundError","ConfigTypeNotFoundError","PythonError"],"ConfigType":["ArrayConfigType","CompositeConfigType","EnumConfigType","NullableConfigType","RegularConfigType","ScalarUnionConfigType","MapConfigType"],"WrappingConfigType":["ArrayConfigType","NullableConfigType"],"DagsterType":["ListDagsterType","NullableDagsterType","RegularDagsterType"],"DagsterTypeOrError":["RegularDagsterType","PipelineNotFoundError","DagsterTypeNotFoundError","PythonError"],"WrappingDagsterType":["ListDagsterType","NullableDagsterType"],"Error":["AssetCheckNeedsMigrationError","AssetCheckNeedsUserCodeUpgrade","AssetCheckNeedsAgentUpgradeError","AssetNotFoundError","ConflictingExecutionParamsError","ConfigTypeNotFoundError","DagsterTypeNotFoundError","InvalidPipelineRunsFilterError","InvalidSubsetError","ModeNotFoundError","NoModeProvidedError","PartitionSetNotFoundError","PipelineNotFoundError","RunConflict","PipelineSnapshotNotFoundError","PresetNotFoundError","PythonError","ErrorChainLink","UnauthorizedError","ReloadNotSupported","RepositoryLocationNotFound","RepositoryNotFoundError","ResourceNotFoundError","RunGroupNotFoundError","RunNotFoundError","ScheduleNotFoundError","SchedulerNotDefinedError","SensorNotFoundError","DuplicateDynamicPartitionError","InstigationStateNotFoundError","SolidStepStatusUnavailableError","GraphNotFoundError","BackfillNotFoundError","PartitionSubsetDeserializationError","AutoMaterializeAssetEvaluationNeedsMigrationError"],"PipelineRunConflict":["RunConflict"],"PipelineRunNotFoundError":["RunNotFoundError"],"RepositoriesOrError":["RepositoryConnection","RepositoryNotFoundError","PythonError"],"RepositoryOrError":["PythonError","Repository","RepositoryNotFoundError"],"InstigationTypeSpecificData":["SensorData","ScheduleData"],"InstigationStateOrError":["InstigationState","InstigationStateNotFoundError","PythonError"],"InstigationStatesOrError":["InstigationStates","PythonError"],"MetadataEntry":["TableSchemaMetadataEntry","TableMetadataEntry","FloatMetadataEntry","IntMetadataEntry","JsonMetadataEntry","BoolMetadataEntry","MarkdownMetadataEntry","PathMetadataEntry","NotebookMetadataEntry","PythonArtifactMetadataEntry","TextMetadataEntry","UrlMetadataEntry","PipelineRunMetadataEntry","AssetMetadataEntry","JobMetadataEntry","NullMetadataEntry"],"PartitionRunConfigOrError":["PartitionRunConfig","PythonError"],"AssetBackfillStatus":["AssetPartitionsStatusCounts","UnpartitionedAssetStatus"],"PartitionSetOrError":["PartitionSet","PartitionSetNotFoundError","PythonError"],"PartitionSetsOrError":["PartitionSets","PipelineNotFoundError","PythonError"],"PartitionsOrError":["Partitions","PythonError"],"PartitionStatusesOrError":["PartitionStatuses","PythonError"],"PartitionTagsOrError":["PartitionTags","PythonError"],"RunConfigSchemaOrError":["RunConfigSchema","PipelineNotFoundError","InvalidSubsetError","ModeNotFoundError","PythonError"],"LaunchRunResult":["LaunchRunSuccess","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"LaunchRunReexecutionResult":["LaunchRunSuccess","InvalidStepError","InvalidOutputError","RunConfigValidationInvalid","PipelineNotFoundError","RunConflict","UnauthorizedError","PythonError","InvalidSubsetError","PresetNotFoundError","ConflictingExecutionParamsError","NoModeProvidedError"],"LaunchPipelineRunSuccess":["LaunchRunSuccess"],"RunsOrError":["Runs","InvalidPipelineRunsFilterError","PythonError"],"PipelineRuns":["Runs"],"RunGroupOrError":["RunGroup","RunGroupNotFoundError","PythonError"],"SensorOrError":["Sensor","SensorNotFoundError","UnauthorizedError","PythonError"],"SensorsOrError":["Sensors","RepositoryNotFoundError","PythonError"],"StopSensorMutationResultOrError":["StopSensorMutationResult","UnauthorizedError","PythonError"],"ISolidDefinition":["CompositeSolidDefinition","SolidDefinition"],"SolidContainer":["Pipeline","PipelineSnapshot","Job","CompositeSolidDefinition","Graph"],"SolidStepStatsOrError":["SolidStepStatsConnection","SolidStepStatusUnavailableError"],"WorkspaceOrError":["Workspace","PythonError"],"WorkspaceLocationStatusEntriesOrError":["WorkspaceLocationStatusEntries","PythonError"],"GraphOrError":["Graph","GraphNotFoundError","PythonError"],"ResourceDetailsOrError":["ResourceDetails","ResourceNotFoundError","PythonError"],"ResourcesOrError":["ResourceDetailsList","RepositoryNotFoundError","PythonError"],"EnvVarWithConsumersOrError":["EnvVarWithConsumersList","PythonError"],"RunTagKeysOrError":["PythonError","RunTagKeys"],"RunTagsOrError":["PythonError","RunTags"],"RunIdsOrError":["RunIds","InvalidPipelineRunsFilterError","PythonError"],"AssetNodeOrError":["AssetNode","AssetNotFoundError"],"PartitionBackfillOrError":["PartitionBackfill","BackfillNotFoundError","PythonError"],"PartitionBackfillsOrError":["PartitionBackfills","PythonError"],"EventConnectionOrError":["EventConnection","RunNotFoundError","PythonError"],"AutoMaterializeAssetEvaluationRecordsOrError":["AutoMaterializeAssetEvaluationRecords","AutoMaterializeAssetEvaluationNeedsMigrationError"],"PartitionKeysOrError":["PartitionKeys","PartitionSubsetDeserializationError"],"AutoMaterializeRuleEvaluationData":["TextRuleEvaluationData","ParentMaterializedRuleEvaluationData","WaitingOnKeysRuleEvaluationData"],"AssetConditionEvaluationNode":["UnpartitionedAssetConditionEvaluationNode","PartitionedAssetConditionEvaluationNode","SpecificPartitionAssetConditionEvaluationNode"],"AssetConditionEvaluationRecordsOrError":["AssetConditionEvaluationRecords","AutoMaterializeAssetEvaluationNeedsMigrationError"],"SensorDryRunResult":["PythonError","SensorNotFoundError","DryRunInstigationTick"],"ScheduleDryRunResult":["DryRunInstigationTick","PythonError","ScheduleNotFoundError"],"TerminateRunsResultOrError":["TerminateRunsResult","PythonError"],"AssetWipeMutationResult":["AssetNotFoundError","UnauthorizedError","PythonError","AssetWipeSuccess"],"ReportRunlessAssetEventsResult":["UnauthorizedError","PythonError","ReportRunlessAssetEventsSuccess"],"ResumeBackfillResult":["ResumeBackfillSuccess","UnauthorizedError","PythonError"],"CancelBackfillResult":["CancelBackfillSuccess","UnauthorizedError","PythonError"],"LogTelemetryMutationResult":["LogTelemetrySuccess","PythonError"],"AddDynamicPartitionResult":["AddDynamicPartitionSuccess","UnauthorizedError","PythonError","DuplicateDynamicPartitionError"]} +======= +{ + "DisplayableEvent": [ + "EngineEvent", + "ExecutionStepOutputEvent", + "ExpectationResult", + "FailureMetadata", + "HandledOutputEvent", + "LoadedInputEvent", + "ObjectStoreOperationResult", + "ResourceInitFailureEvent", + "ResourceInitStartedEvent", + "ResourceInitSuccessEvent", + "StepWorkerStartedEvent", + "StepWorkerStartingEvent", + "MaterializationEvent", + "ObservationEvent", + "TypeCheck" + ], + "MarkerEvent": [ + "EngineEvent", + "ResourceInitFailureEvent", + "ResourceInitStartedEvent", + "ResourceInitSuccessEvent", + "StepWorkerStartedEvent", + "StepWorkerStartingEvent" + ], + "ErrorEvent": [ + "EngineEvent", + "ExecutionStepFailureEvent", + "ExecutionStepUpForRetryEvent", + "HookErroredEvent", + "RunFailureEvent", + "ResourceInitFailureEvent" + ], + "MessageEvent": [ + "EngineEvent", + "ExecutionStepFailureEvent", + "ExecutionStepInputEvent", + "ExecutionStepOutputEvent", + "ExecutionStepRestartEvent", + "ExecutionStepSkippedEvent", + "ExecutionStepStartEvent", + "ExecutionStepSuccessEvent", + "ExecutionStepUpForRetryEvent", + "HandledOutputEvent", + "HookCompletedEvent", + "HookErroredEvent", + "HookSkippedEvent", + "LoadedInputEvent", + "LogMessageEvent", + "ObjectStoreOperationEvent", + "RunCanceledEvent", + "RunCancelingEvent", + "RunDequeuedEvent", + "RunEnqueuedEvent", + "RunFailureEvent", + "ResourceInitFailureEvent", + "ResourceInitStartedEvent", + "ResourceInitSuccessEvent", + "RunStartEvent", + "RunStartingEvent", + "RunSuccessEvent", + "StepExpectationResultEvent", + "StepWorkerStartedEvent", + "StepWorkerStartingEvent", + "MaterializationEvent", + "ObservationEvent", + "AssetMaterializationPlannedEvent", + "LogsCapturedEvent", + "AlertStartEvent", + "AlertSuccessEvent", + "AlertFailureEvent", + "AssetCheckEvaluationPlannedEvent", + "AssetCheckEvaluationEvent" + ], + "RunEvent": [ + "RunCanceledEvent", + "RunCancelingEvent", + "RunDequeuedEvent", + "RunEnqueuedEvent", + "RunFailureEvent", + "RunStartEvent", + "RunStartingEvent", + "RunSuccessEvent", + "AssetMaterializationPlannedEvent", + "AlertStartEvent", + "AlertSuccessEvent", + "AlertFailureEvent" + ], + "PipelineRunStepStats": [ + "RunStepStats" + ], + "StepEvent": [ + "EngineEvent", + "ExecutionStepFailureEvent", + "ExecutionStepInputEvent", + "ExecutionStepOutputEvent", + "ExecutionStepRestartEvent", + "ExecutionStepSkippedEvent", + "ExecutionStepStartEvent", + "ExecutionStepSuccessEvent", + "ExecutionStepUpForRetryEvent", + "HandledOutputEvent", + "HookCompletedEvent", + "HookErroredEvent", + "HookSkippedEvent", + "LoadedInputEvent", + "ObjectStoreOperationEvent", + "ResourceInitFailureEvent", + "ResourceInitStartedEvent", + "ResourceInitSuccessEvent", + "StepExpectationResultEvent", + "StepWorkerStartedEvent", + "StepWorkerStartingEvent", + "MaterializationEvent", + "ObservationEvent", + "AssetCheckEvaluationPlannedEvent", + "AssetCheckEvaluationEvent" + ], + "AssetPartitionStatuses": [ + "DefaultPartitionStatuses", + "MultiPartitionStatuses", + "TimePartitionStatuses" + ], + "PartitionStatus1D": [ + "TimePartitionStatuses", + "DefaultPartitionStatuses" + ], + "AssetChecksOrError": [ + "AssetChecks", + "AssetCheckNeedsMigrationError", + "AssetCheckNeedsUserCodeUpgrade", + "AssetCheckNeedsAgentUpgradeError" + ], + "Instigator": [ + "Schedule", + "Sensor" + ], + "EvaluationStackEntry": [ + "EvaluationStackListItemEntry", + "EvaluationStackPathEntry", + "EvaluationStackMapKeyEntry", + "EvaluationStackMapValueEntry" + ], + "IPipelineSnapshot": [ + "Pipeline", + "PipelineSnapshot", + "Job" + ], + "PipelineConfigValidationError": [ + "FieldNotDefinedConfigError", + "FieldsNotDefinedConfigError", + "MissingFieldConfigError", + "MissingFieldsConfigError", + "RuntimeMismatchConfigError", + "SelectorTypeConfigError" + ], + "PipelineConfigValidationInvalid": [ + "RunConfigValidationInvalid" + ], + "PipelineConfigValidationResult": [ + "InvalidSubsetError", + "PipelineConfigValidationValid", + "RunConfigValidationInvalid", + "PipelineNotFoundError", + "PythonError" + ], + "PipelineReference": [ + "PipelineSnapshot", + "UnknownPipeline" + ], + "PipelineRun": [ + "Run" + ], + "DagsterRunEvent": [ + "ExecutionStepFailureEvent", + "ExecutionStepInputEvent", + "ExecutionStepOutputEvent", + "ExecutionStepSkippedEvent", + "ExecutionStepStartEvent", + "ExecutionStepSuccessEvent", + "ExecutionStepUpForRetryEvent", + "ExecutionStepRestartEvent", + "LogMessageEvent", + "ResourceInitFailureEvent", + "ResourceInitStartedEvent", + "ResourceInitSuccessEvent", + "RunFailureEvent", + "RunStartEvent", + "RunEnqueuedEvent", + "RunDequeuedEvent", + "RunStartingEvent", + "RunCancelingEvent", + "RunCanceledEvent", + "RunSuccessEvent", + "StepWorkerStartedEvent", + "StepWorkerStartingEvent", + "HandledOutputEvent", + "LoadedInputEvent", + "LogsCapturedEvent", + "ObjectStoreOperationEvent", + "StepExpectationResultEvent", + "MaterializationEvent", + "ObservationEvent", + "EngineEvent", + "HookCompletedEvent", + "HookSkippedEvent", + "HookErroredEvent", + "AlertStartEvent", + "AlertSuccessEvent", + "AlertFailureEvent", + "AssetMaterializationPlannedEvent", + "AssetCheckEvaluationPlannedEvent", + "AssetCheckEvaluationEvent" + ], + "PipelineRunLogsSubscriptionPayload": [ + "PipelineRunLogsSubscriptionSuccess", + "PipelineRunLogsSubscriptionFailure" + ], + "RunOrError": [ + "Run", + "RunNotFoundError", + "PythonError" + ], + "PipelineRunStatsSnapshot": [ + "RunStatsSnapshot" + ], + "RunStatsSnapshotOrError": [ + "RunStatsSnapshot", + "PythonError" + ], + "PipelineSnapshotOrError": [ + "PipelineNotFoundError", + "PipelineSnapshot", + "PipelineSnapshotNotFoundError", + "PythonError" + ], + "AssetOrError": [ + "Asset", + "AssetNotFoundError" + ], + "AssetsOrError": [ + "AssetConnection", + "PythonError" + ], + "DeletePipelineRunResult": [ + "DeletePipelineRunSuccess", + "UnauthorizedError", + "PythonError", + "RunNotFoundError" + ], + "ExecutionPlanOrError": [ + "ExecutionPlan", + "RunConfigValidationInvalid", + "PipelineNotFoundError", + "InvalidSubsetError", + "PythonError" + ], + "PipelineOrError": [ + "Pipeline", + "PipelineNotFoundError", + "InvalidSubsetError", + "PythonError" + ], + "ReloadRepositoryLocationMutationResult": [ + "WorkspaceLocationEntry", + "ReloadNotSupported", + "RepositoryLocationNotFound", + "UnauthorizedError", + "PythonError" + ], + "RepositoryLocationOrLoadError": [ + "RepositoryLocation", + "PythonError" + ], + "ReloadWorkspaceMutationResult": [ + "Workspace", + "UnauthorizedError", + "PythonError" + ], + "ShutdownRepositoryLocationMutationResult": [ + "ShutdownRepositoryLocationSuccess", + "RepositoryLocationNotFound", + "UnauthorizedError", + "PythonError" + ], + "TerminatePipelineExecutionFailure": [ + "TerminateRunFailure" + ], + "TerminatePipelineExecutionSuccess": [ + "TerminateRunSuccess" + ], + "TerminateRunResult": [ + "TerminateRunSuccess", + "TerminateRunFailure", + "RunNotFoundError", + "UnauthorizedError", + "PythonError" + ], + "ScheduleMutationResult": [ + "PythonError", + "UnauthorizedError", + "ScheduleStateResult" + ], + "ScheduleOrError": [ + "Schedule", + "ScheduleNotFoundError", + "PythonError" + ], + "SchedulerOrError": [ + "Scheduler", + "SchedulerNotDefinedError", + "PythonError" + ], + "SchedulesOrError": [ + "Schedules", + "RepositoryNotFoundError", + "PythonError" + ], + "ScheduleTickSpecificData": [ + "ScheduleTickSuccessData", + "ScheduleTickFailureData" + ], + "LaunchBackfillResult": [ + "LaunchBackfillSuccess", + "PartitionSetNotFoundError", + "InvalidStepError", + "InvalidOutputError", + "RunConfigValidationInvalid", + "PipelineNotFoundError", + "RunConflict", + "UnauthorizedError", + "PythonError", + "InvalidSubsetError", + "PresetNotFoundError", + "ConflictingExecutionParamsError", + "NoModeProvidedError" + ], + "ConfigTypeOrError": [ + "EnumConfigType", + "CompositeConfigType", + "RegularConfigType", + "PipelineNotFoundError", + "ConfigTypeNotFoundError", + "PythonError" + ], + "ConfigType": [ + "ArrayConfigType", + "CompositeConfigType", + "EnumConfigType", + "NullableConfigType", + "RegularConfigType", + "ScalarUnionConfigType", + "MapConfigType" + ], + "WrappingConfigType": [ + "ArrayConfigType", + "NullableConfigType" + ], + "DagsterType": [ + "ListDagsterType", + "NullableDagsterType", + "RegularDagsterType" + ], + "DagsterTypeOrError": [ + "RegularDagsterType", + "PipelineNotFoundError", + "DagsterTypeNotFoundError", + "PythonError" + ], + "WrappingDagsterType": [ + "ListDagsterType", + "NullableDagsterType" + ], + "Error": [ + "AssetCheckNeedsMigrationError", + "AssetCheckNeedsUserCodeUpgrade", + "AssetCheckNeedsAgentUpgradeError", + "AssetNotFoundError", + "ConflictingExecutionParamsError", + "ConfigTypeNotFoundError", + "DagsterTypeNotFoundError", + "InvalidPipelineRunsFilterError", + "InvalidSubsetError", + "ModeNotFoundError", + "NoModeProvidedError", + "PartitionSetNotFoundError", + "PipelineNotFoundError", + "RunConflict", + "PipelineSnapshotNotFoundError", + "PresetNotFoundError", + "PythonError", + "ErrorChainLink", + "UnauthorizedError", + "ReloadNotSupported", + "RepositoryLocationNotFound", + "RepositoryNotFoundError", + "ResourceNotFoundError", + "RunGroupNotFoundError", + "RunNotFoundError", + "ScheduleNotFoundError", + "SchedulerNotDefinedError", + "SensorNotFoundError", + "DuplicateDynamicPartitionError", + "InstigationStateNotFoundError", + "SolidStepStatusUnavailableError", + "GraphNotFoundError", + "BackfillNotFoundError", + "PartitionSubsetDeserializationError", + "AutoMaterializeAssetEvaluationNeedsMigrationError" + ], + "PipelineRunConflict": [ + "RunConflict" + ], + "PipelineRunNotFoundError": [ + "RunNotFoundError" + ], + "RepositoriesOrError": [ + "RepositoryConnection", + "PythonError" + ], + "RepositoryOrError": [ + "PythonError", + "Repository", + "RepositoryNotFoundError" + ], + "InstigationTypeSpecificData": [ + "SensorData", + "ScheduleData" + ], + "InstigationStateOrError": [ + "InstigationState", + "InstigationStateNotFoundError", + "PythonError" + ], + "InstigationStatesOrError": [ + "InstigationStates", + "PythonError" + ], + "MetadataEntry": [ + "TableSchemaMetadataEntry", + "TableMetadataEntry", + "FloatMetadataEntry", + "IntMetadataEntry", + "JsonMetadataEntry", + "BoolMetadataEntry", + "MarkdownMetadataEntry", + "PathMetadataEntry", + "NotebookMetadataEntry", + "PythonArtifactMetadataEntry", + "TextMetadataEntry", + "UrlMetadataEntry", + "PipelineRunMetadataEntry", + "AssetMetadataEntry", + "JobMetadataEntry", + "NullMetadataEntry" + ], + "PartitionRunConfigOrError": [ + "PartitionRunConfig", + "PythonError" + ], + "AssetBackfillStatus": [ + "AssetPartitionsStatusCounts", + "UnpartitionedAssetStatus" + ], + "PartitionSetOrError": [ + "PartitionSet", + "PartitionSetNotFoundError", + "PythonError" + ], + "PartitionSetsOrError": [ + "PartitionSets", + "PipelineNotFoundError", + "PythonError" + ], + "PartitionsOrError": [ + "Partitions", + "PythonError" + ], + "PartitionStatusesOrError": [ + "PartitionStatuses", + "PythonError" + ], + "PartitionTagsOrError": [ + "PartitionTags", + "PythonError" + ], + "RunConfigSchemaOrError": [ + "RunConfigSchema", + "PipelineNotFoundError", + "InvalidSubsetError", + "ModeNotFoundError", + "PythonError" + ], + "LaunchRunResult": [ + "LaunchRunSuccess", + "InvalidStepError", + "InvalidOutputError", + "RunConfigValidationInvalid", + "PipelineNotFoundError", + "RunConflict", + "UnauthorizedError", + "PythonError", + "InvalidSubsetError", + "PresetNotFoundError", + "ConflictingExecutionParamsError", + "NoModeProvidedError" + ], + "LaunchRunReexecutionResult": [ + "LaunchRunSuccess", + "InvalidStepError", + "InvalidOutputError", + "RunConfigValidationInvalid", + "PipelineNotFoundError", + "RunConflict", + "UnauthorizedError", + "PythonError", + "InvalidSubsetError", + "PresetNotFoundError", + "ConflictingExecutionParamsError", + "NoModeProvidedError" + ], + "LaunchPipelineRunSuccess": [ + "LaunchRunSuccess" + ], + "RunsOrError": [ + "Runs", + "InvalidPipelineRunsFilterError", + "PythonError" + ], + "PipelineRuns": [ + "Runs" + ], + "RunGroupOrError": [ + "RunGroup", + "RunGroupNotFoundError", + "PythonError" + ], + "SensorOrError": [ + "Sensor", + "SensorNotFoundError", + "UnauthorizedError", + "PythonError" + ], + "SensorsOrError": [ + "Sensors", + "RepositoryNotFoundError", + "PythonError" + ], + "StopSensorMutationResultOrError": [ + "StopSensorMutationResult", + "UnauthorizedError", + "PythonError" + ], + "ISolidDefinition": [ + "CompositeSolidDefinition", + "SolidDefinition" + ], + "SolidContainer": [ + "Pipeline", + "PipelineSnapshot", + "Job", + "CompositeSolidDefinition", + "Graph" + ], + "SolidStepStatsOrError": [ + "SolidStepStatsConnection", + "SolidStepStatusUnavailableError" + ], + "WorkspaceOrError": [ + "Workspace", + "PythonError" + ], + "WorkspaceLocationStatusEntriesOrError": [ + "WorkspaceLocationStatusEntries", + "PythonError" + ], + "GraphOrError": [ + "Graph", + "GraphNotFoundError", + "PythonError" + ], + "ResourceDetailsOrError": [ + "ResourceDetails", + "ResourceNotFoundError", + "PythonError" + ], + "ResourcesOrError": [ + "ResourceDetailsList", + "RepositoryNotFoundError", + "PythonError" + ], + "EnvVarWithConsumersOrError": [ + "EnvVarWithConsumersList", + "PythonError" + ], + "RunTagKeysOrError": [ + "PythonError", + "RunTagKeys" + ], + "RunTagsOrError": [ + "PythonError", + "RunTags" + ], + "RunIdsOrError": [ + "RunIds", + "InvalidPipelineRunsFilterError", + "PythonError" + ], + "AssetNodeOrError": [ + "AssetNode", + "AssetNotFoundError" + ], + "PartitionBackfillOrError": [ + "PartitionBackfill", + "BackfillNotFoundError", + "PythonError" + ], + "PartitionBackfillsOrError": [ + "PartitionBackfills", + "PythonError" + ], + "EventConnectionOrError": [ + "EventConnection", + "RunNotFoundError", + "PythonError" + ], + "AutoMaterializeAssetEvaluationRecordsOrError": [ + "AutoMaterializeAssetEvaluationRecords", + "AutoMaterializeAssetEvaluationNeedsMigrationError" + ], + "PartitionKeysOrError": [ + "PartitionKeys", + "PartitionSubsetDeserializationError" + ], + "AutoMaterializeRuleEvaluationData": [ + "TextRuleEvaluationData", + "ParentMaterializedRuleEvaluationData", + "WaitingOnKeysRuleEvaluationData" + ], + "AssetConditionEvaluationNode": [ + "UnpartitionedAssetConditionEvaluationNode", + "PartitionedAssetConditionEvaluationNode", + "SpecificPartitionAssetConditionEvaluationNode" + ], + "AssetConditionEvaluationRecordsOrError": [ + "AssetConditionEvaluationRecords", + "AutoMaterializeAssetEvaluationNeedsMigrationError" + ], + "SensorDryRunResult": [ + "PythonError", + "SensorNotFoundError", + "DryRunInstigationTick" + ], + "ScheduleDryRunResult": [ + "DryRunInstigationTick", + "PythonError", + "ScheduleNotFoundError" + ], + "TerminateRunsResultOrError": [ + "TerminateRunsResult", + "PythonError" + ], + "AssetWipeMutationResult": [ + "AssetNotFoundError", + "UnauthorizedError", + "PythonError", + "AssetWipeSuccess" + ], + "ReportRunlessAssetEventsResult": [ + "UnauthorizedError", + "PythonError", + "ReportRunlessAssetEventsSuccess" + ], + "ResumeBackfillResult": [ + "ResumeBackfillSuccess", + "UnauthorizedError", + "PythonError" + ], + "CancelBackfillResult": [ + "CancelBackfillSuccess", + "UnauthorizedError", + "PythonError" + ], + "LogTelemetryMutationResult": [ + "LogTelemetrySuccess", + "PythonError" + ], + "AddDynamicPartitionResult": [ + "AddDynamicPartitionSuccess", + "UnauthorizedError", + "PythonError", + "DuplicateDynamicPartitionError" + ] +} +>>>>>>> 7049e7d26b (??) diff --git a/js_modules/dagster-ui/packages/ui-core/src/runs/TimeElapsed.tsx b/js_modules/dagster-ui/packages/ui-core/src/runs/TimeElapsed.tsx index 2584bfb2ae2e4..b3509395dc94e 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/runs/TimeElapsed.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/runs/TimeElapsed.tsx @@ -6,11 +6,19 @@ import {formatElapsedTimeWithMsec, formatElapsedTimeWithoutMsec} from '../app/Ut export interface Props { startUnix: number | null; endUnix: number | null; +<<<<<<< HEAD showMsec?: boolean; } export const TimeElapsed = (props: Props) => { const {startUnix, endUnix, showMsec} = props; +======= + msec?: boolean; +} + +export const TimeElapsed = (props: Props) => { + const {startUnix, endUnix, msec} = props; +>>>>>>> 1d3458f405 (??) const [endTime, setEndTime] = React.useState(() => (endUnix ? endUnix * 1000 : null)); const interval = React.useRef>(); @@ -46,7 +54,11 @@ export const TimeElapsed = (props: Props) => { {startTime +<<<<<<< HEAD ? showMsec +======= + ? msec +>>>>>>> 1d3458f405 (??) ? formatElapsedTimeWithMsec((endTime || Date.now()) - startTime) : formatElapsedTimeWithoutMsec((endTime || Date.now()) - startTime) : '–'} diff --git a/python_modules/dagster-test/dagster_test/toys/auto_materializing/repo_1.py b/python_modules/dagster-test/dagster_test/toys/auto_materializing/repo_1.py index e19079d1dfe18..4e1dbc8e48855 100644 --- a/python_modules/dagster-test/dagster_test/toys/auto_materializing/repo_1.py +++ b/python_modules/dagster-test/dagster_test/toys/auto_materializing/repo_1.py @@ -1,4 +1,11 @@ -from dagster import AutoMaterializePolicy, DailyPartitionsDefinition, asset, repository +from dagster import ( + AutoMaterializePolicy, + DailyPartitionsDefinition, + DynamicPartitionsDefinition, + MultiPartitionsDefinition, + asset, + repository, +) ### Non partitioned ## @@ -42,6 +49,21 @@ def eager_downstream_1_partitioned(eager_upstream_partitioned): return eager_upstream_partitioned + 1 +customers_partitions_def = DynamicPartitionsDefinition(name="customers") +multipartition_w_dynamic_partitions_def = MultiPartitionsDefinition( + {"customers": customers_partitions_def, "daily": DailyPartitionsDefinition("2023-01-01")} +) + + +@asset( + auto_materialize_policy=AutoMaterializePolicy.eager(), + partitions_def=multipartition_w_dynamic_partitions_def, + deps=[eager_downstream_0_point_5_partitioned], +) +def eager_downstream_2_partitioned(eager_upstream_partitioned): + return eager_upstream_partitioned + 1 + + @repository def auto_materialize_repo_1(): return [ @@ -51,4 +73,5 @@ def auto_materialize_repo_1(): eager_upstream_partitioned, eager_downstream_1_partitioned, eager_downstream_0_point_5_partitioned, + eager_downstream_2_partitioned, ] diff --git a/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/hello_world_file_in_directory/hello_world_repository.py b/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/hello_world_file_in_directory/hello_world_repository.py index 3d496b80c0b22..69e95db328013 100644 --- a/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/hello_world_file_in_directory/hello_world_repository.py +++ b/python_modules/dagster/dagster_tests/cli_tests/workspace_tests/hello_world_file_in_directory/hello_world_repository.py @@ -1,6 +1,7 @@ # type: ignore from dagster import repository + from src.jobs import hello_world_job From 46b5fc72e1544e284134cc222c748950823962ef Mon Sep 17 00:00:00 2001 From: Marco Salazar Date: Tue, 9 Jan 2024 12:33:37 -0500 Subject: [PATCH 22/56] ?? --- .../assets/AssetEventMetadataEntriesTable.tsx | 9 +-- .../PolicyEvaluationTable.tsx | 69 +++---------------- .../PolicyEvaluationTable.stories.tsx | 7 -- .../hello_world_repository.py | 1 - 4 files changed, 13 insertions(+), 73 deletions(-) diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AssetEventMetadataEntriesTable.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AssetEventMetadataEntriesTable.tsx index cdd924a6c3881..3813f99a54b18 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AssetEventMetadataEntriesTable.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AssetEventMetadataEntriesTable.tsx @@ -21,10 +21,11 @@ export const AssetEventMetadataEntriesTable = ({ event, observations, }: { - event: Pick< - AssetObservationFragment | AssetMaterializationFragment, - 'metadataEntries' | 'timestamp' - > | null; + event: + | (Pick & { + timestamp?: string | number; + }) + | null; observations?: (AssetObservationFragment | AssetMaterializationFragment)[]; }) => { if (!event || (!event.metadataEntries.length && !observations?.length)) { diff --git a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PolicyEvaluationTable.tsx b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PolicyEvaluationTable.tsx index bee1f101564b6..7ec8210fd76f8 100644 --- a/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PolicyEvaluationTable.tsx +++ b/js_modules/dagster-ui/packages/ui-core/src/assets/AutoMaterializePolicyPage/PolicyEvaluationTable.tsx @@ -12,12 +12,8 @@ import { import * as React from 'react'; import styled, {css} from 'styled-components'; -<<<<<<< Updated upstream -import {assertUnreachable} from '../../app/Util'; -======= import {AssetConditionEvaluationStatus} from '../../graphql/types'; import {MetadataEntryFragment} from '../../metadata/types/MetadataEntry.types'; ->>>>>>> Stashed changes import {TimeElapsed} from '../../runs/TimeElapsed'; import {AssetEventMetadataEntriesTable} from '../AssetEventMetadataEntriesTable'; import {AssetViewDefinitionNodeFragment} from '../types/AssetView.types'; @@ -27,20 +23,11 @@ import {PolicyEvaluationCondition} from './PolicyEvaluationCondition'; import {PolicyEvaluationStatusTag} from './PolicyEvaluationStatusTag'; import {FlattenedConditionEvaluation, flattenEvaluations} from './flattenEvaluations'; import { -<<<<<<< Updated upstream - AssetConditionEvaluation, - AssetConditionEvaluationStatus, - PartitionedAssetConditionEvaluation, - SpecificPartitionAssetConditionEvaluation, - UnpartitionedAssetConditionEvaluation, -} from './types'; -======= AssetConditionEvaluationRecordFragment, PartitionedAssetConditionEvaluationNodeFragment, SpecificPartitionAssetConditionEvaluationNodeFragment, UnpartitionedAssetConditionEvaluationNodeFragment, } from './types/GetEvaluationsQuery.types'; ->>>>>>> Stashed changes interface Props { evaluationRecord: Pick; @@ -48,20 +35,6 @@ interface Props { selectPartition: (partitionKey: string | null) => void; } -<<<<<<< Updated upstream -export const PolicyEvaluationTable = ({ - rootEvaluation, -}: Props) => { - switch (rootEvaluation.__typename) { - case 'UnpartitionedAssetConditionEvaluation': - case 'SpecificPartitionAssetConditionEvaluation': - return ; - case 'PartitionedAssetConditionEvaluation': - return ; - default: - return assertUnreachable(rootEvaluation); - } -======= export const PolicyEvaluationTable = ({evaluationRecord, definition, selectPartition}: Props) => { const flattened = React.useMemo(() => flattenEvaluations(evaluationRecord), [evaluationRecord]); if (flattened[0]?.evaluation.__typename === 'PartitionedAssetConditionEvaluationNode') { @@ -85,19 +58,11 @@ export const PolicyEvaluationTable = ({evaluationRecord, definition, selectParti } /> ); ->>>>>>> Stashed changes }; const UnpartitionedPolicyEvaluationTable = ({ flattenedRecords, }: { -<<<<<<< Updated upstream - rootEvaluation: UnpartitionedAssetConditionEvaluation | SpecificPartitionAssetConditionEvaluation; -}) => { - const [hoveredKey, setHoveredKey] = React.useState(null); - const flattened = React.useMemo(() => flattenEvaluations(rootEvaluation), [rootEvaluation]); - const showDuration = rootEvaluation.__typename === 'UnpartitionedAssetConditionEvaluation'; -======= flattenedRecords: | FlattenedConditionEvaluation[] | FlattenedConditionEvaluation[]; @@ -106,26 +71,17 @@ const UnpartitionedPolicyEvaluationTable = ({ const isSpecificPartitionAssetConditionEvaluations = flattenedRecords[0]?.evaluation.__typename === 'SpecificPartitionAssetConditionEvaluationNode'; ->>>>>>> Stashed changes return ( Condition Result -<<<<<<< Updated upstream - {showDuration ? Duration : null} -======= {isSpecificPartitionAssetConditionEvaluations ? null : Duration} ->>>>>>> Stashed changes Details -<<<<<<< Updated upstream - {flattened.map(({evaluation, id, parentId, depth, type}) => { - const {description, status} = evaluation; -======= {flattenedRecords.map(({evaluation, id, parentId, depth, type}) => { const {description, status} = evaluation; let endTimestamp, startTimestamp; @@ -133,7 +89,6 @@ const UnpartitionedPolicyEvaluationTable = ({ endTimestamp = evaluation.endTimestamp; startTimestamp = evaluation.startTimestamp; } ->>>>>>> Stashed changes return ( -<<<<<<< Updated upstream - {showDuration ? ( - - {evaluation.__typename === 'UnpartitionedAssetConditionEvaluation' ? ( - - ) : null} - - ) : null} - -======= {startTimestamp && endTimestamp ? ( @@ -176,7 +118,6 @@ const UnpartitionedPolicyEvaluationTable = ({ {evaluation.metadataEntries ? : null} ->>>>>>> Stashed changes ); })} @@ -188,12 +129,18 @@ const UnpartitionedPolicyEvaluationTable = ({ const ViewDetailsButton = ({ evaluation, }: { - evaluation: {metadataEntries: MetadataEntryFragment[]; timestamp: string}; + evaluation: {metadataEntries: MetadataEntryFragment[]}; }) => { const [showDetails, setShowDetails] = React.useState(false); return ( <> - + { + setShowDetails(false); + }} + >