From a750a7501673a6e8357ef2e3f22140476f83d8ec Mon Sep 17 00:00:00 2001 From: Oded Messer Date: Mon, 26 Jun 2023 04:53:39 +0300 Subject: [PATCH] Stable experiment range sorting (#9660) * secondary sorting by rev alphanumerically. to introduce determinism * add UT --- dvc/repo/experiments/collect.py | 10 +-- tests/unit/repo/experiments/test_collect.py | 69 +++++++++++++++++++++ 2 files changed, 75 insertions(+), 4 deletions(-) create mode 100644 tests/unit/repo/experiments/test_collect.py diff --git a/dvc/repo/experiments/collect.py b/dvc/repo/experiments/collect.py index a2c032c790..b652cf2f76 100644 --- a/dvc/repo/experiments/collect.py +++ b/dvc/repo/experiments/collect.py @@ -10,6 +10,7 @@ Iterator, List, Optional, + Tuple, Union, ) @@ -369,12 +370,13 @@ def _describe( def _sorted_ranges(exp_ranges: Iterable["ExpRange"]) -> List["ExpRange"]: - """Return list of ExpRange sorted by timestamp.""" + """Return list of ExpRange sorted by (timestamp, rev).""" - def _head_timestamp(exp_range: "ExpRange") -> datetime: + def _head_timestamp(exp_range: "ExpRange") -> Tuple[datetime, str]: head_exp = first(exp_range.revs) if head_exp and head_exp.data and head_exp.data.timestamp: - return head_exp.data.timestamp - return datetime.fromtimestamp(0) + return head_exp.data.timestamp, head_exp.rev + + return datetime.fromtimestamp(0), "" return sorted(exp_ranges, key=_head_timestamp, reverse=True) diff --git a/tests/unit/repo/experiments/test_collect.py b/tests/unit/repo/experiments/test_collect.py new file mode 100644 index 0000000000..478809f947 --- /dev/null +++ b/tests/unit/repo/experiments/test_collect.py @@ -0,0 +1,69 @@ +import datetime +import random +from typing import Dict, List + +import pytest + +from dvc.repo.experiments.collect import ExpRange, ExpState, SerializableExp, collect + + +@pytest.mark.vscode +def test_collect_stable_sorting(dvc, scm, mocker): + """ + Check that output is deterministically sorted even for + commits with the same timestamp. This affects the experience + in vs-code to avoid experiments "bouncing around" when "exp show" + is called repeatedly + """ + expected_revs = [ + "c" * 40, + "b" * 40, + "a" * 40, + "7" * 40, + ] + + def collect_queued_patched(_, baseline_revs) -> Dict[str, List["ExpRange"]]: + single_timestamp = datetime.datetime(2023, 6, 20, 0, 0, 0) + + exp_ranges = [ + ExpRange( + revs=[ + ExpState( + rev=rev, + name=f"exp-state-{rev[0]}", + data=SerializableExp(rev=rev, timestamp=single_timestamp), + ) + ], + name=f"exp-range-{rev[0]}", + ) + for rev in expected_revs + ] + + # shuffle collection order + random.shuffle(exp_ranges) + + return {baseline_rev: exp_ranges for baseline_rev in baseline_revs} + + mocker.patch("dvc.repo.experiments.collect.collect_queued", collect_queued_patched) + mocker.patch("dvc.repo.experiments.collect.collect_active", return_value={}) + mocker.patch("dvc.repo.experiments.collect.collect_failed", return_value={}) + mocker.patch("dvc.repo.experiments.collect.collect_successful", return_value={}) + + # repeat (shuffling collection order in collect_queued_patched) + for _ in range(20): + collected = collect(repo=dvc, all_commits=True) + assert collected[0].rev == "workspace" + assert collected[0].experiments is None + assert collected[1].rev == scm.get_rev() + _assert_experiment_rev_order(collected[1].experiments, expected_revs) + + +def _assert_experiment_rev_order( + actual: List["ExpRange"], + expected_revs: List[str], +): + expected_revs = expected_revs.copy() + + for actual_exp_range in actual: + for exp_state in actual_exp_range.revs: + assert exp_state.rev == expected_revs.pop(0)