From 9f66897bf78a57396c3bd11176bb62b3ec392234 Mon Sep 17 00:00:00 2001 From: Matt Seddon <37993418+mattseddon@users.noreply.github.com> Date: Tue, 27 Feb 2024 06:54:20 +1100 Subject: [PATCH] plots: support x-dict in nested dvc.yaml (#10318) --- dvc/repo/plots/__init__.py | 16 ++- tests/func/plots/test_show.py | 81 ++++++++++++++++ tests/integration/plots/test_plots.py | 135 ++++++++++++++++++++++++++ 3 files changed, 227 insertions(+), 5 deletions(-) diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index ee783d25d8..0976acccea 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -423,11 +423,17 @@ def _id_is_path(plot_props=None): def _adjust_sources(fs, plot_props, config_dir): new_plot_props = deepcopy(plot_props) - old_y = new_plot_props.pop("y", {}) - new_y = {} - for filepath, val in old_y.items(): - new_y[_normpath(fs.join(config_dir, filepath))] = val - new_plot_props["y"] = new_y + for axis in ["x", "y"]: + x_is_inferred = axis == "x" and ( + axis not in new_plot_props or isinstance(new_plot_props[axis], str) + ) + if x_is_inferred: + continue + old = new_plot_props.pop(axis, {}) + new = {} + for filepath, val in old.items(): + new[_normpath(fs.join(config_dir, filepath))] = val + new_plot_props[axis] = new return new_plot_props diff --git a/tests/func/plots/test_show.py b/tests/func/plots/test_show.py index 515f075123..526a390408 100644 --- a/tests/func/plots/test_show.py +++ b/tests/func/plots/test_show.py @@ -172,6 +172,87 @@ def test_plots_show_overlap(tmp_dir, dvc, run_copy_metrics, clear_before_run): ) +def test_plots_show_nested_x_dict(tmp_dir, dvc, scm): + rel_pipeline_dir = "pipelines/data-increment" + + pipeline_rel_dvclive_metrics_dir = "dvclive/plots/metrics" + dvc_rel_dvclive_metrics_dir = ( + f"{rel_pipeline_dir}/{pipeline_rel_dvclive_metrics_dir}" + ) + + pipeline_dir = tmp_dir / rel_pipeline_dir + dvclive_metrics_dir = pipeline_dir / pipeline_rel_dvclive_metrics_dir + dvclive_metrics_dir.mkdir(parents=True) + + def _get_plot_defn(rel_dir: str) -> dict: + return { + "template": "simple", + "x": {f"{rel_dir}/Max_Leaf_Nodes.tsv": "Max_Leaf_Nodes"}, + "y": {f"{rel_dir}/Error.tsv": "Error"}, + } + + (pipeline_dir / "dvc.yaml").dump( + { + "plots": [ + { + "Error vs max_leaf_nodes": _get_plot_defn( + pipeline_rel_dvclive_metrics_dir + ) + }, + ] + }, + ) + + dvclive_metrics_dir.gen( + { + "Error.tsv": "step\tError\n" "0\t0.11\n" "1\t0.22\n" "2\t0.44\n", + "Max_Leaf_Nodes.tsv": "step\tMax_Leaf_Nodes\n" + "0\t5\n" + "1\t50\n" + "2\t500\n", + } + ) + + scm.commit("add dvc.yaml and dvclive metrics") + + result = dvc.plots.show() + assert result == { + "workspace": { + "definitions": { + "data": { + f"{rel_pipeline_dir}/dvc.yaml": { + "data": { + "Error vs max_leaf_nodes": _get_plot_defn( + dvc_rel_dvclive_metrics_dir + ) + }, + } + } + }, + "sources": { + "data": { + f"{dvc_rel_dvclive_metrics_dir}/Error.tsv": { + "data": [ + {"Error": "0.11", "step": "0"}, + {"Error": "0.22", "step": "1"}, + {"Error": "0.44", "step": "2"}, + ], + "props": {}, + }, + f"{dvc_rel_dvclive_metrics_dir}/Max_Leaf_Nodes.tsv": { + "data": [ + {"Max_Leaf_Nodes": "5", "step": "0"}, + {"Max_Leaf_Nodes": "50", "step": "1"}, + {"Max_Leaf_Nodes": "500", "step": "2"}, + ], + "props": {}, + }, + } + }, + } + } + + def test_dir_plots(tmp_dir, dvc, run_copy_metrics): subdir = tmp_dir / "subdir" subdir.mkdir() diff --git a/tests/integration/plots/test_plots.py b/tests/integration/plots/test_plots.py index 000b71064f..e3d5f189d9 100644 --- a/tests/integration/plots/test_plots.py +++ b/tests/integration/plots/test_plots.py @@ -481,3 +481,138 @@ def test_repo_with_dvclive_plots(tmp_dir, capsys, repo_with_dvclive_plots): } assert json_result == expected_result assert split_json_result == expected_result + + +@pytest.mark.vscode +def test_nested_x_defn_collection(tmp_dir, dvc, scm, capsys): + rel_pipeline_dir = "pipelines/data-increment" + pipeline_rel_dvclive_metrics_dir = "dvclive/plots/metrics" + pipeline_rel_other_logger_dir = "other/logger" + + dvc_rel_dvclive_metrics_dir = ( + f"{rel_pipeline_dir}/{pipeline_rel_dvclive_metrics_dir}" + ) + dvc_rel_other_logger_dir = f"{rel_pipeline_dir}/{pipeline_rel_other_logger_dir}" + + pipeline_dir = tmp_dir / rel_pipeline_dir + dvclive_metrics_dir = pipeline_dir / pipeline_rel_dvclive_metrics_dir + dvclive_metrics_dir.mkdir(parents=True) + other_logger_dir = pipeline_dir / pipeline_rel_other_logger_dir + other_logger_dir.mkdir(parents=True) + + (pipeline_dir / "dvc.yaml").dump( + { + "plots": [ + { + "Error vs max_leaf_nodes": { + "template": "simple", + "x": { + f"{pipeline_rel_dvclive_metrics_dir}" + "/Max_Leaf_Nodes.tsv": "Max_Leaf_Nodes" + }, + "y": {f"{pipeline_rel_dvclive_metrics_dir}/Error.tsv": "Error"}, + } + }, + { + f"{pipeline_rel_other_logger_dir}/multiple_metrics.json": { + "x": "x", + "y": ["y1", "y2"], + }, + }, + { + f"{pipeline_rel_dvclive_metrics_dir}/Error.tsv": {"y": ["Error"]}, + }, + { + "max leaf nodes": { + "y": { + f"{pipeline_rel_dvclive_metrics_dir}" + "/Max_Leaf_Nodes.tsv": "Max_Leaf_Nodes" + } + }, + }, + ] + }, + ) + dvclive_metrics_dir.gen( + { + "Error.tsv": "step\tError\n" "0\t0.11\n" "1\t0.22\n" "2\t0.44\n", + "Max_Leaf_Nodes.tsv": "step\tMax_Leaf_Nodes\n" + "0\t5\n" + "1\t50\n" + "2\t500\n", + } + ) + (other_logger_dir / "multiple_metrics.json").dump( + [ + {"x": 0, "y1": 0.1, "y2": 10}, + {"x": 1, "y1": 0.2, "y2": 22}, + ] + ) + + scm.commit("add dvc.yaml and metrics") + + _, _, split_json_result = call(capsys, subcommand="diff") + assert len(split_json_result.keys()) == 1 + assert len(split_json_result["data"].keys()) == 4 + + separate_x_file = split_json_result["data"]["Error vs max_leaf_nodes"][0] + + assert separate_x_file["anchor_definitions"][""] == [ + {"Error": "0.11", "Max_Leaf_Nodes": "5", "step": "0", "rev": "workspace"}, + {"Error": "0.22", "Max_Leaf_Nodes": "50", "step": "1", "rev": "workspace"}, + {"Error": "0.44", "Max_Leaf_Nodes": "500", "step": "2", "rev": "workspace"}, + ] + + same_x_file = split_json_result["data"][ + f"{dvc_rel_other_logger_dir}/multiple_metrics.json" + ][0] + assert same_x_file["anchor_definitions"][""] == [ + { + "x": 0, + "y1": 0.1, + "y2": 10, + "dvc_inferred_y_value": 0.1, + "field": "y1", + "rev": "workspace", + }, + { + "x": 1, + "y1": 0.2, + "y2": 22, + "dvc_inferred_y_value": 0.2, + "field": "y1", + "rev": "workspace", + }, + { + "x": 0, + "y1": 0.1, + "y2": 10, + "dvc_inferred_y_value": 10, + "field": "y2", + "rev": "workspace", + }, + { + "x": 1, + "y1": 0.2, + "y2": 22, + "dvc_inferred_y_value": 22, + "field": "y2", + "rev": "workspace", + }, + ] + + inferred_x_from_str = split_json_result["data"][ + f"{dvc_rel_dvclive_metrics_dir}/Error.tsv" + ][0] + assert inferred_x_from_str["anchor_definitions"][""] == [ + {"step": 0, "Error": "0.11", "rev": "workspace"}, + {"step": 1, "Error": "0.22", "rev": "workspace"}, + {"step": 2, "Error": "0.44", "rev": "workspace"}, + ] + + inferred_x_from_dict = split_json_result["data"]["max leaf nodes"][0] + assert inferred_x_from_dict["anchor_definitions"][""] == [ + {"step": 0, "Max_Leaf_Nodes": "5", "rev": "workspace"}, + {"step": 1, "Max_Leaf_Nodes": "50", "rev": "workspace"}, + {"step": 2, "Max_Leaf_Nodes": "500", "rev": "workspace"}, + ]