From 7476325ed93f4af588380f2a99f48ff7677cf0bd Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Sat, 22 Jul 2023 10:31:45 +0100 Subject: [PATCH] enhances auto3dseg data analyzer Signed-off-by: Wenqi Li --- monai/apps/auto3dseg/data_analyzer.py | 14 ++++++++------ monai/bundle/config_parser.py | 4 ++-- tests/test_auto3dseg.py | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py index ded6390601..350bb61a34 100644 --- a/monai/apps/auto3dseg/data_analyzer.py +++ b/monai/apps/auto3dseg/data_analyzer.py @@ -70,7 +70,7 @@ class DataAnalyzer: the DataAnalyzer will skip looking for labels and all label-related operations. hist_bins: bins to compute histogram for each image channel. hist_range: ranges to compute histogram for each image channel. - fmt: format used to save the analysis results. Defaults to "yaml". + fmt: format used to save the analysis results. Currently support ``"json"`` and ``"yaml"``, defaults to "yaml". histogram_only: whether to only compute histograms. Defaults to False. extra_params: other optional arguments. Currently supported arguments are : 'allowed_shape_difference' (default 5) can be used to change the default tolerance of @@ -164,6 +164,7 @@ def _check_data_uniformity(keys: list[str], result: dict) -> bool: constant_props = [result[DataStatsKeys.SUMMARY][DataStatsKeys.IMAGE_STATS][key] for key in keys] for prop in constant_props: if "stdev" in prop and np.any(prop["stdev"]): + logger.debug(f"summary image_stats {prop} has non-zero stdev {prop['stdev']}.") return False return True @@ -242,15 +243,16 @@ def get_all_case_stats(self, key="training", transform_list=None): if not self._check_data_uniformity([ImageStatsKeys.SPACING], result): logger.info("Data spacing is not completely uniform. MONAI transforms may provide unexpected result") if self.output_path: + logger.info(f"Writing data stats to {self.output_path}.") ConfigParser.export_config_file( result, self.output_path, fmt=self.fmt, default_flow_style=None, sort_keys=False ) + by_case_path = self.output_path.replace(f".{self.fmt}", f"_by_case.{self.fmt}") + if by_case_path == self.output_path: # self.output_path not ended with self.fmt? + by_case_path += f".by_case.{self.fmt}" + logger.info(f"Writing by-case data stats to {by_case_path}, this may take a while.") ConfigParser.export_config_file( - result_bycase, - self.output_path.replace(".yaml", "_by_case.yaml"), - fmt=self.fmt, - default_flow_style=None, - sort_keys=False, + result_bycase, by_case_path, fmt=self.fmt, default_flow_style=None, sort_keys=False ) # release memory if self.device.type == "cuda": diff --git a/monai/bundle/config_parser.py b/monai/bundle/config_parser.py index d03ca8e43b..e2553a5ffd 100644 --- a/monai/bundle/config_parser.py +++ b/monai/bundle/config_parser.py @@ -438,12 +438,12 @@ def export_config_file(cls, config: dict, filepath: PathLike, fmt: str = "json", """ _filepath: str = str(Path(filepath)) - writer = look_up_option(fmt.lower(), {"json", "yaml"}) + writer = look_up_option(fmt.lower(), {"json", "yaml", "yml"}) with open(_filepath, "w") as f: if writer == "json": json.dump(config, f, **kwargs) return - if writer == "yaml": + if writer == "yaml" or writer == "yml": return yaml.safe_dump(config, f, **kwargs) raise ValueError(f"only support JSON or YAML config file so far, got {writer}.") diff --git a/tests/test_auto3dseg.py b/tests/test_auto3dseg.py index 53f25051ec..272fb52f1a 100644 --- a/tests/test_auto3dseg.py +++ b/tests/test_auto3dseg.py @@ -170,7 +170,7 @@ def setUp(self): work_dir = self.test_dir.name self.dataroot_dir = os.path.join(work_dir, "sim_dataroot") self.datalist_file = os.path.join(work_dir, "sim_datalist.json") - self.datastat_file = os.path.join(work_dir, "datastats.yaml") + self.datastat_file = os.path.join(work_dir, "datastats.yml") ConfigParser.export_config_file(sim_datalist, self.datalist_file) @parameterized.expand(SIM_CPU_TEST_CASES)