Skip to content

Commit

Permalink
Stable experiment range sorting (#9660)
Browse files Browse the repository at this point in the history
* secondary sorting by rev alphanumerically. to introduce determinism

* add UT
  • Loading branch information
omesser authored Jun 26, 2023
1 parent cf976f1 commit a750a75
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 4 deletions.
10 changes: 6 additions & 4 deletions dvc/repo/experiments/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
Iterator,
List,
Optional,
Tuple,
Union,
)

Expand Down Expand Up @@ -369,12 +370,13 @@ def _describe(


def _sorted_ranges(exp_ranges: Iterable["ExpRange"]) -> List["ExpRange"]:
"""Return list of ExpRange sorted by timestamp."""
"""Return list of ExpRange sorted by (timestamp, rev)."""

def _head_timestamp(exp_range: "ExpRange") -> datetime:
def _head_timestamp(exp_range: "ExpRange") -> Tuple[datetime, str]:
head_exp = first(exp_range.revs)
if head_exp and head_exp.data and head_exp.data.timestamp:
return head_exp.data.timestamp
return datetime.fromtimestamp(0)
return head_exp.data.timestamp, head_exp.rev

return datetime.fromtimestamp(0), ""

return sorted(exp_ranges, key=_head_timestamp, reverse=True)
69 changes: 69 additions & 0 deletions tests/unit/repo/experiments/test_collect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import datetime
import random
from typing import Dict, List

import pytest

from dvc.repo.experiments.collect import ExpRange, ExpState, SerializableExp, collect


@pytest.mark.vscode
def test_collect_stable_sorting(dvc, scm, mocker):
"""
Check that output is deterministically sorted even for
commits with the same timestamp. This affects the experience
in vs-code to avoid experiments "bouncing around" when "exp show"
is called repeatedly
"""
expected_revs = [
"c" * 40,
"b" * 40,
"a" * 40,
"7" * 40,
]

def collect_queued_patched(_, baseline_revs) -> Dict[str, List["ExpRange"]]:
single_timestamp = datetime.datetime(2023, 6, 20, 0, 0, 0)

exp_ranges = [
ExpRange(
revs=[
ExpState(
rev=rev,
name=f"exp-state-{rev[0]}",
data=SerializableExp(rev=rev, timestamp=single_timestamp),
)
],
name=f"exp-range-{rev[0]}",
)
for rev in expected_revs
]

# shuffle collection order
random.shuffle(exp_ranges)

return {baseline_rev: exp_ranges for baseline_rev in baseline_revs}

mocker.patch("dvc.repo.experiments.collect.collect_queued", collect_queued_patched)
mocker.patch("dvc.repo.experiments.collect.collect_active", return_value={})
mocker.patch("dvc.repo.experiments.collect.collect_failed", return_value={})
mocker.patch("dvc.repo.experiments.collect.collect_successful", return_value={})

# repeat (shuffling collection order in collect_queued_patched)
for _ in range(20):
collected = collect(repo=dvc, all_commits=True)
assert collected[0].rev == "workspace"
assert collected[0].experiments is None
assert collected[1].rev == scm.get_rev()
_assert_experiment_rev_order(collected[1].experiments, expected_revs)


def _assert_experiment_rev_order(
actual: List["ExpRange"],
expected_revs: List[str],
):
expected_revs = expected_revs.copy()

for actual_exp_range in actual:
for exp_state in actual_exp_range.revs:
assert exp_state.rev == expected_revs.pop(0)

0 comments on commit a750a75

Please sign in to comment.