From 4b82a4f477434d644d0717579966d8f93035f76f Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 25 Oct 2024 17:52:33 +0200 Subject: [PATCH 01/17] added stats calc tool --- pyproject.toml | 1 + src/ctapipe/tools/stats_calculation.py | 171 +++++++++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 src/ctapipe/tools/stats_calculation.py diff --git a/pyproject.toml b/pyproject.toml index 27c6cf0b7fc..6382a94dedc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -107,6 +107,7 @@ ctapipe-process = "ctapipe.tools.process:main" ctapipe-merge = "ctapipe.tools.merge:main" ctapipe-fileinfo = "ctapipe.tools.fileinfo:main" ctapipe-quickstart = "ctapipe.tools.quickstart:main" +ctapipe-stats-calculation = "ctapipe.tools.stats_calculation:main" ctapipe-train-energy-regressor = "ctapipe.tools.train_energy_regressor:main" ctapipe-train-particle-classifier = "ctapipe.tools.train_particle_classifier:main" ctapipe-train-disp-reconstructor = "ctapipe.tools.train_disp_reconstructor:main" diff --git a/src/ctapipe/tools/stats_calculation.py b/src/ctapipe/tools/stats_calculation.py new file mode 100644 index 00000000000..080d1b74c45 --- /dev/null +++ b/src/ctapipe/tools/stats_calculation.py @@ -0,0 +1,171 @@ +""" +Perform statistics calculation from DL1 data +""" + +import pathlib + +import numpy as np +from astropy.table import vstack + +from ctapipe.core import Tool +from ctapipe.core.tool import ToolConfigurationError +from ctapipe.core.traits import ( + Bool, + CaselessStrEnum, + Path, + Unicode, + classes_with_traits, +) +from ctapipe.instrument import SubarrayDescription +from ctapipe.io import write_table +from ctapipe.io.tableloader import TableLoader +from ctapipe.monitoring.calculator import PixelStatisticsCalculator + + +class StatisticsCalculatorTool(Tool): + """ + Perform statistics calculation for DL1 data + """ + + name = "StatisticsCalculatorTool" + description = "Perform statistics calculation for DL1 data" + + examples = """ + To calculate statistics of DL1 data files: + + > ctapipe-stats-calculation --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite + + """ + + input_url = Path( + help="Input CTA HDF5 files for DL1 data", + allow_none=True, + exists=True, + directory_ok=False, + file_ok=True, + ).tag(config=True) + + dl1_column_name = CaselessStrEnum( + ["image", "peak_time", "variance"], + default_value="image", + allow_none=False, + help="Column name of the DL1 data to calculate statistics", + ).tag(config=True) + + output_column_name = Unicode( + default_value="statistics", + allow_none=False, + help="Column name of the output statistics", + ).tag(config=True) + + output_path = Path( + help="Output filename", default_value=pathlib.Path("monitoring.h5") + ).tag(config=True) + + overwrite = Bool(help="Overwrite output file if it exists").tag(config=True) + + aliases = { + ("i", "input_url"): "StatisticsCalculatorTool.input_url", + ("o", "output_path"): "StatisticsCalculatorTool.output_path", + } + + flags = { + "overwrite": ( + {"StatisticsCalculatorTool": {"overwrite": True}}, + "Overwrite existing files", + ), + } + + classes = classes_with_traits(PixelStatisticsCalculator) + + def setup(self): + # Check that the input and output files are not the same + if self.input_url == self.output_path: + raise ToolConfigurationError( + "Input and output files are same. Fix your configuration / cli arguments." + ) + + # Load the subarray description from the input file + subarray = SubarrayDescription.from_hdf(self.input_url) + if subarray.n_tels != 1: + raise IOError( + "This tool is only intended for single telescope data." + "Please provide a file with only one telescope." + ) + self.tel_id = subarray.tel_ids[0] + # Set the table name for the output file + self.table_name = ( + f"/dl1/monitoring/telescope/{self.output_column_name}/tel_{self.tel_id:03d}" + ) + # Initialization of the statistics calculator + self.stats_calculator = PixelStatisticsCalculator( + parent=self, subarray=subarray + ) + # Read the whole dl1 images with the 'TableLoader' + input_data = TableLoader(input_url=self.input_url) + self.dl1_table = input_data.read_telescope_events_by_id( + dl1_images=True, + dl1_parameters=False, + dl1_muons=False, + dl2=False, + simulated=False, + true_images=False, + true_parameters=False, + instrument=False, + pointing=False, + )[self.tel_id] + + def start(self): + # Perform the first pass of the statistics calculation + aggregated_stats = self.stats_calculator.first_pass( + table=self.dl1_table, + tel_id=self.tel_id, + col_name=self.dl1_column_name, + ) + # Check if 'chunk_shift' is selected + if self.stats_calculator.chunk_shift is not None: + # Check if there are any faulty chunks to perform a second pass over the data + if np.any(~aggregated_stats["is_valid"].data): + # Perform the second pass of the statistics calculation + aggregated_stats_secondpass = self.stats_calculator.second_pass( + table=self.dl1_table, + valid_chunks=aggregated_stats["is_valid"].data, + tel_id=self.tel_id, + col_name=self.dl1_column_name, + ) + # Stack the statistic values from the first and second pass + aggregated_stats = vstack( + [aggregated_stats, aggregated_stats_secondpass] + ) + # Sort the stacked aggregated statistic values by starting time + aggregated_stats.sort(["time_start"]) + else: + self.log.info( + "No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", + self.tel_id, + ) + # Write the aggregated statistics and their outlier mask to the output file + write_table( + aggregated_stats, + self.output_path, + self.table_name, + overwrite=self.overwrite, + ) + + def finish(self): + self.log.info( + "DL1 monitoring data was stored in '%s' under '%s'", + self.output_path, + self.table_name, + ) + self.log.info("Tool is shutting down") + + +def main(): + # Run the tool + tool = StatisticsCalculatorTool() + tool.run() + + +if __name__ == "main": + main() From 09e49c8b125a391190adbf6e6a96e32c38d06a78 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 25 Oct 2024 18:02:19 +0200 Subject: [PATCH 02/17] added example config for stats calc --- src/ctapipe/resources/stats_calc_config.yaml | 36 ++++++++++++++++++++ src/ctapipe/tools/quickstart.py | 1 + 2 files changed, 37 insertions(+) create mode 100644 src/ctapipe/resources/stats_calc_config.yaml diff --git a/src/ctapipe/resources/stats_calc_config.yaml b/src/ctapipe/resources/stats_calc_config.yaml new file mode 100644 index 00000000000..59cc601e468 --- /dev/null +++ b/src/ctapipe/resources/stats_calc_config.yaml @@ -0,0 +1,36 @@ +StatisticsCalculatorTool: + dl1_column_name: "image" + output_column_name: "statistics" + +PixelStatisticsCalculator: + stats_aggregator_type: [["id", 1, "SigmaClippingAggregator"]] + chunk_shift: 1000 + faulty_pixels_fraction: 0.1 + outlier_detector_list: [ + { + "apply_to": "median", + "name": "MedianOutlierDetector", + "config": { + "median_range_factors": [-15, 15], + }, + }, + { + "apply_to": "median", + "name": "RangeOutlierDetector", + "config": { + "validity_range": [-20, 120], + } + } + { + "apply_to": "std", + "name": "StdOutlierDetector", + "config": { + "std_range_factors": [-15, 15], + }, + } + ] + +SigmaClippingAggregator: + chunk_size: 2500 + max_sigma: 4 + iterations: 5 diff --git a/src/ctapipe/tools/quickstart.py b/src/ctapipe/tools/quickstart.py index 67410458d95..9cab9c97d99 100644 --- a/src/ctapipe/tools/quickstart.py +++ b/src/ctapipe/tools/quickstart.py @@ -15,6 +15,7 @@ "stage1_config.yaml", "stage2_config.yaml", "ml_preprocessing_config.yaml", + "stats_calc_config.yaml", "train_energy_regressor.yaml", "train_particle_classifier.yaml", "train_disp_reconstructor.yaml", From 1eefbffb1ab0b85ea8be7c92356cc4395caf712b Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 09:55:45 +0100 Subject: [PATCH 03/17] allow to process multiple tels Since we should also support the processing of MCs, we might want to run the stats calc tool over multiple tels. --- src/ctapipe/resources/stats_calc_config.yaml | 7 +- src/ctapipe/tools/stats_calculation.py | 120 ++++++++++--------- 2 files changed, 68 insertions(+), 59 deletions(-) diff --git a/src/ctapipe/resources/stats_calc_config.yaml b/src/ctapipe/resources/stats_calc_config.yaml index 59cc601e468..965bd6dae68 100644 --- a/src/ctapipe/resources/stats_calc_config.yaml +++ b/src/ctapipe/resources/stats_calc_config.yaml @@ -1,9 +1,10 @@ StatisticsCalculatorTool: - dl1_column_name: "image" + allowed_tels: [1,2,3,4] + dl1a_column_name: "image" output_column_name: "statistics" PixelStatisticsCalculator: - stats_aggregator_type: [["id", 1, "SigmaClippingAggregator"]] + stats_aggregator_type: [["type", "*", "SigmaClippingAggregator"]] chunk_shift: 1000 faulty_pixels_fraction: 0.1 outlier_detector_list: [ @@ -20,7 +21,7 @@ PixelStatisticsCalculator: "config": { "validity_range": [-20, 120], } - } + }, { "apply_to": "std", "name": "StdOutlierDetector", diff --git a/src/ctapipe/tools/stats_calculation.py b/src/ctapipe/tools/stats_calculation.py index 080d1b74c45..b472ba461ae 100644 --- a/src/ctapipe/tools/stats_calculation.py +++ b/src/ctapipe/tools/stats_calculation.py @@ -1,5 +1,5 @@ """ -Perform statistics calculation from DL1 data +Perform statistics calculation from DL1a image data """ import pathlib @@ -12,7 +12,9 @@ from ctapipe.core.traits import ( Bool, CaselessStrEnum, + CInt, Path, + Set, Unicode, classes_with_traits, ) @@ -24,32 +26,42 @@ class StatisticsCalculatorTool(Tool): """ - Perform statistics calculation for DL1 data + Perform statistics calculation for DL1a image data """ name = "StatisticsCalculatorTool" - description = "Perform statistics calculation for DL1 data" + description = "Perform statistics calculation for DL1a image data" examples = """ - To calculate statistics of DL1 data files: + To calculate statistics of DL1a image data files: > ctapipe-stats-calculation --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite """ input_url = Path( - help="Input CTA HDF5 files for DL1 data", + help="Input CTA HDF5 files including DL1a image data", allow_none=True, exists=True, directory_ok=False, file_ok=True, ).tag(config=True) - dl1_column_name = CaselessStrEnum( + allowed_tels = Set( + trait=CInt(), + default_value=None, + allow_none=True, + help=( + "List of allowed tel_ids, others will be ignored. " + "If None, all telescopes in the input stream will be included." + ), + ).tag(config=True) + + dl1a_column_name = CaselessStrEnum( ["image", "peak_time", "variance"], default_value="image", allow_none=False, - help="Column name of the DL1 data to calculate statistics", + help="Column name of the DL1a image data to calculate statistics", ).tag(config=True) output_column_name = Unicode( @@ -87,23 +99,17 @@ def setup(self): # Load the subarray description from the input file subarray = SubarrayDescription.from_hdf(self.input_url) - if subarray.n_tels != 1: - raise IOError( - "This tool is only intended for single telescope data." - "Please provide a file with only one telescope." - ) - self.tel_id = subarray.tel_ids[0] - # Set the table name for the output file - self.table_name = ( - f"/dl1/monitoring/telescope/{self.output_column_name}/tel_{self.tel_id:03d}" - ) # Initialization of the statistics calculator self.stats_calculator = PixelStatisticsCalculator( parent=self, subarray=subarray ) - # Read the whole dl1 images with the 'TableLoader' + # Read the input data with the 'TableLoader' input_data = TableLoader(input_url=self.input_url) - self.dl1_table = input_data.read_telescope_events_by_id( + # Get the telescope ids from the input data or use the allowed_tels configuration + tel_ids = subarray.tel_ids if self.allowed_tels is None else self.allowed_tels + # Read the whole dl1 images + self.dl1_tables = input_data.read_telescope_events_by_id( + telescopes=tel_ids, dl1_images=True, dl1_parameters=False, dl1_muons=False, @@ -113,50 +119,52 @@ def setup(self): true_parameters=False, instrument=False, pointing=False, - )[self.tel_id] + ) def start(self): - # Perform the first pass of the statistics calculation - aggregated_stats = self.stats_calculator.first_pass( - table=self.dl1_table, - tel_id=self.tel_id, - col_name=self.dl1_column_name, - ) - # Check if 'chunk_shift' is selected - if self.stats_calculator.chunk_shift is not None: - # Check if there are any faulty chunks to perform a second pass over the data - if np.any(~aggregated_stats["is_valid"].data): - # Perform the second pass of the statistics calculation - aggregated_stats_secondpass = self.stats_calculator.second_pass( - table=self.dl1_table, - valid_chunks=aggregated_stats["is_valid"].data, - tel_id=self.tel_id, - col_name=self.dl1_column_name, - ) - # Stack the statistic values from the first and second pass - aggregated_stats = vstack( - [aggregated_stats, aggregated_stats_secondpass] - ) - # Sort the stacked aggregated statistic values by starting time - aggregated_stats.sort(["time_start"]) - else: - self.log.info( - "No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", - self.tel_id, - ) - # Write the aggregated statistics and their outlier mask to the output file - write_table( - aggregated_stats, - self.output_path, - self.table_name, - overwrite=self.overwrite, - ) + # Iterate over the telescope ids and their corresponding dl1 tables + for tel_id, dl1_table in self.dl1_tables.items(): + # Perform the first pass of the statistics calculation + aggregated_stats = self.stats_calculator.first_pass( + table=dl1_table, + tel_id=tel_id, + col_name=self.dl1a_column_name, + ) + # Check if 'chunk_shift' is selected + if self.stats_calculator.chunk_shift is not None: + # Check if there are any faulty chunks to perform a second pass over the data + if np.any(~aggregated_stats["is_valid"].data): + # Perform the second pass of the statistics calculation + aggregated_stats_secondpass = self.stats_calculator.second_pass( + table=dl1_table, + valid_chunks=aggregated_stats["is_valid"].data, + tel_id=tel_id, + col_name=self.dl1a_column_name, + ) + # Stack the statistic values from the first and second pass + aggregated_stats = vstack( + [aggregated_stats, aggregated_stats_secondpass] + ) + # Sort the stacked aggregated statistic values by starting time + aggregated_stats.sort(["time_start"]) + else: + self.log.info( + "No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", + tel_id, + ) + # Write the aggregated statistics and their outlier mask to the output file + write_table( + aggregated_stats, + self.output_path, + f"/dl1/monitoring/telescope/{self.output_column_name}/tel_{tel_id:03d}", + overwrite=self.overwrite, + ) def finish(self): self.log.info( "DL1 monitoring data was stored in '%s' under '%s'", self.output_path, - self.table_name, + f"/dl1/monitoring/telescope/{self.output_column_name}", ) self.log.info("Tool is shutting down") From 18b98a0a22a55f4dad4fc9a4904a5a2a546ced35 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 09:58:54 +0100 Subject: [PATCH 04/17] added unit test for stats calc tool --- src/ctapipe/tools/tests/test_stats_calc.py | 57 ++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 src/ctapipe/tools/tests/test_stats_calc.py diff --git a/src/ctapipe/tools/tests/test_stats_calc.py b/src/ctapipe/tools/tests/test_stats_calc.py new file mode 100644 index 00000000000..97157ace955 --- /dev/null +++ b/src/ctapipe/tools/tests/test_stats_calc.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +""" +Test ctapipe-stats-calculation tool +""" + +from traitlets.config.loader import Config + +from ctapipe.core import run_tool +from ctapipe.io import read_table +from ctapipe.tools.stats_calculation import StatisticsCalculatorTool + + +def test_stats_calc_tool(tmp_path, dl1_image_file): + """check statistics calculation from DL1a files""" + + # Create a configuration suitable for the test + tel_id = 3 + config = Config( + { + "StatisticsCalculatorTool": { + "allowed_tels": [tel_id], + "dl1a_column_name": "image", + "output_column_name": "statistics", + }, + "PixelStatisticsCalculator": { + "stats_aggregator_type": [ + ("id", tel_id, "PlainAggregator"), + ], + }, + "PlainAggregator": { + "chunk_size": 1, + }, + } + ) + # Set the output file path + monitoring_file = tmp_path / "monitoring.dl1.h5" + # Run the tool with the configuration and the input file + run_tool( + StatisticsCalculatorTool(config=config), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={monitoring_file}", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) + # Check that the output file has been created + assert monitoring_file.exists() + # Check that the output file is not empty + assert ( + read_table( + monitoring_file, + path=f"/dl1/monitoring/telescope/statistics/tel_{tel_id:03d}", + )["mean"] + is not None + ) From 82a4641f7c6f605aa2ff6634f88bd32d622afd21 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 10:05:23 +0100 Subject: [PATCH 05/17] add changelog --- docs/changes/2628.features.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/changes/2628.features.rst diff --git a/docs/changes/2628.features.rst b/docs/changes/2628.features.rst new file mode 100644 index 00000000000..f57b32854af --- /dev/null +++ b/docs/changes/2628.features.rst @@ -0,0 +1 @@ +Add a generic stats-calculation tool utilizing the PixelStatisticsCalculator. From cb2e3fefac8863193d90aeb21ff23eaa9a0f83ee Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 10:26:04 +0100 Subject: [PATCH 06/17] polish docs --- docs/user-guide/tools.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/user-guide/tools.rst b/docs/user-guide/tools.rst index 619b08bf793..f9c6c1b36cd 100644 --- a/docs/user-guide/tools.rst +++ b/docs/user-guide/tools.rst @@ -17,6 +17,7 @@ Data Processing Tools * ``ctapipe-quickstart``: create some default analysis configurations and a working directory * ``ctapipe-process``: Process event data in any supported format from R0/R1/DL0 to DL1 or DL2 HDF5 files. * ``ctapipe-apply-models``: Tool to apply machine learning models in bulk (as opposed to event by event). +* ``ctapipe-stats-calculation``: Tool to aggregate statistics and detect outliers from DL1a image data. * ``ctapipe-train-disp-reconstructor`` : Train the ML models for the `ctapipe.reco.DispReconstructor` (monoscopic reconstruction) * ``ctapipe-train-energy-regressor``: Train the ML models for the `ctapipe.reco.EnergyRegressor` (energy estimation) * ``ctapipe-train-particle-classifier``: Train the ML models for the `ctapipe.reco.ParticleClassifier` (gamma-hadron separation) From ba7bb0ecc2d4729981bb6733bc7bcc34412c7ab4 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 14:44:35 +0100 Subject: [PATCH 07/17] include first round of comments rename the tool and file name only keep dl1 table of the particular telescope into RAM added tests for tool config errors rename input col name adopt yaml syntax in example config for stats calculation --- docs/user-guide/tools.rst | 2 +- pyproject.toml | 2 +- .../resources/calculate_pixel_stats.yaml | 32 ++++++ src/ctapipe/resources/stats_calc_config.yaml | 37 ------- ...alculation.py => calculate_pixel_stats.py} | 62 ++++++----- .../tools/tests/test_calculate_pixel_stats.py | 102 ++++++++++++++++++ src/ctapipe/tools/tests/test_stats_calc.py | 57 ---------- 7 files changed, 170 insertions(+), 124 deletions(-) create mode 100644 src/ctapipe/resources/calculate_pixel_stats.yaml delete mode 100644 src/ctapipe/resources/stats_calc_config.yaml rename src/ctapipe/tools/{stats_calculation.py => calculate_pixel_stats.py} (72%) create mode 100644 src/ctapipe/tools/tests/test_calculate_pixel_stats.py delete mode 100644 src/ctapipe/tools/tests/test_stats_calc.py diff --git a/docs/user-guide/tools.rst b/docs/user-guide/tools.rst index f9c6c1b36cd..1a0b2320d9b 100644 --- a/docs/user-guide/tools.rst +++ b/docs/user-guide/tools.rst @@ -17,7 +17,7 @@ Data Processing Tools * ``ctapipe-quickstart``: create some default analysis configurations and a working directory * ``ctapipe-process``: Process event data in any supported format from R0/R1/DL0 to DL1 or DL2 HDF5 files. * ``ctapipe-apply-models``: Tool to apply machine learning models in bulk (as opposed to event by event). -* ``ctapipe-stats-calculation``: Tool to aggregate statistics and detect outliers from DL1a image data. +* ``ctapipe-calculate-pixel-statistics``: Tool to aggregate statistics and detect outliers from pixel-wise image data. * ``ctapipe-train-disp-reconstructor`` : Train the ML models for the `ctapipe.reco.DispReconstructor` (monoscopic reconstruction) * ``ctapipe-train-energy-regressor``: Train the ML models for the `ctapipe.reco.EnergyRegressor` (energy estimation) * ``ctapipe-train-particle-classifier``: Train the ML models for the `ctapipe.reco.ParticleClassifier` (gamma-hadron separation) diff --git a/pyproject.toml b/pyproject.toml index 6382a94dedc..fc0fbbc4bab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -107,7 +107,7 @@ ctapipe-process = "ctapipe.tools.process:main" ctapipe-merge = "ctapipe.tools.merge:main" ctapipe-fileinfo = "ctapipe.tools.fileinfo:main" ctapipe-quickstart = "ctapipe.tools.quickstart:main" -ctapipe-stats-calculation = "ctapipe.tools.stats_calculation:main" +ctapipe-calculate-pixel-statistics = "ctapipe.tools.calculate_pixel_stats:main" ctapipe-train-energy-regressor = "ctapipe.tools.train_energy_regressor:main" ctapipe-train-particle-classifier = "ctapipe.tools.train_particle_classifier:main" ctapipe-train-disp-reconstructor = "ctapipe.tools.train_disp_reconstructor:main" diff --git a/src/ctapipe/resources/calculate_pixel_stats.yaml b/src/ctapipe/resources/calculate_pixel_stats.yaml new file mode 100644 index 00000000000..e4d1f0b3866 --- /dev/null +++ b/src/ctapipe/resources/calculate_pixel_stats.yaml @@ -0,0 +1,32 @@ +StatisticsCalculatorTool: + allowed_tels: [1,2,3,4] + input_column_name: image + output_column_name: statistics + +PixelStatisticsCalculator: + stats_aggregator_type: + - ["type", "LST*", "SigmaClippingAggregator"], + - ["type", "MST*", "PlainAggregator"], + + chunk_shift: 1000 + faulty_pixels_fraction: 0.1 + outlier_detector_list: + - name: MedianOutlierDetector + apply_to: median + config: + median_range_factors: [-15, 15] + - name: RangeOutlierDetector + apply_to: median + config: + validity_range: [-20, 120] + - name: StdOutlierDetector + apply_to: std + config: + std_range_factors: [-15, 15] + +SigmaClippingAggregator: + chunk_size: 2500 + max_sigma: 4 + iterations: 5 +PlainAggregator: + chunk_size: 2500 diff --git a/src/ctapipe/resources/stats_calc_config.yaml b/src/ctapipe/resources/stats_calc_config.yaml deleted file mode 100644 index 965bd6dae68..00000000000 --- a/src/ctapipe/resources/stats_calc_config.yaml +++ /dev/null @@ -1,37 +0,0 @@ -StatisticsCalculatorTool: - allowed_tels: [1,2,3,4] - dl1a_column_name: "image" - output_column_name: "statistics" - -PixelStatisticsCalculator: - stats_aggregator_type: [["type", "*", "SigmaClippingAggregator"]] - chunk_shift: 1000 - faulty_pixels_fraction: 0.1 - outlier_detector_list: [ - { - "apply_to": "median", - "name": "MedianOutlierDetector", - "config": { - "median_range_factors": [-15, 15], - }, - }, - { - "apply_to": "median", - "name": "RangeOutlierDetector", - "config": { - "validity_range": [-20, 120], - } - }, - { - "apply_to": "std", - "name": "StdOutlierDetector", - "config": { - "std_range_factors": [-15, 15], - }, - } - ] - -SigmaClippingAggregator: - chunk_size: 2500 - max_sigma: 4 - iterations: 5 diff --git a/src/ctapipe/tools/stats_calculation.py b/src/ctapipe/tools/calculate_pixel_stats.py similarity index 72% rename from src/ctapipe/tools/stats_calculation.py rename to src/ctapipe/tools/calculate_pixel_stats.py index b472ba461ae..4616c70b4e9 100644 --- a/src/ctapipe/tools/stats_calculation.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -1,5 +1,5 @@ """ -Perform statistics calculation from DL1a image data +Perform statistics calculation from pixel-wise image data """ import pathlib @@ -11,7 +11,6 @@ from ctapipe.core.tool import ToolConfigurationError from ctapipe.core.traits import ( Bool, - CaselessStrEnum, CInt, Path, Set, @@ -26,21 +25,21 @@ class StatisticsCalculatorTool(Tool): """ - Perform statistics calculation for DL1a image data + Perform statistics calculation for pixel-wise image data """ name = "StatisticsCalculatorTool" - description = "Perform statistics calculation for DL1a image data" + description = "Perform statistics calculation for pixel-wise image data" examples = """ - To calculate statistics of DL1a image data files: + To calculate statistics of pixel-wise image data files: - > ctapipe-stats-calculation --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite + > ctapipe-calculate-pixel-statistics --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite """ input_url = Path( - help="Input CTA HDF5 files including DL1a image data", + help="Input CTA HDF5 files including pixel-wise image data", allow_none=True, exists=True, directory_ok=False, @@ -57,11 +56,10 @@ class StatisticsCalculatorTool(Tool): ), ).tag(config=True) - dl1a_column_name = CaselessStrEnum( - ["image", "peak_time", "variance"], + input_column_name = Unicode( default_value="image", allow_none=False, - help="Column name of the DL1a image data to calculate statistics", + help="Column name of the pixel-wise image data to calculate statistics", ).tag(config=True) output_column_name = Unicode( @@ -104,31 +102,39 @@ def setup(self): parent=self, subarray=subarray ) # Read the input data with the 'TableLoader' - input_data = TableLoader(input_url=self.input_url) + self.input_data = TableLoader(input_url=self.input_url) # Get the telescope ids from the input data or use the allowed_tels configuration - tel_ids = subarray.tel_ids if self.allowed_tels is None else self.allowed_tels - # Read the whole dl1 images - self.dl1_tables = input_data.read_telescope_events_by_id( - telescopes=tel_ids, - dl1_images=True, - dl1_parameters=False, - dl1_muons=False, - dl2=False, - simulated=False, - true_images=False, - true_parameters=False, - instrument=False, - pointing=False, + self.tel_ids = ( + subarray.tel_ids if self.allowed_tels is None else self.allowed_tels ) def start(self): - # Iterate over the telescope ids and their corresponding dl1 tables - for tel_id, dl1_table in self.dl1_tables.items(): + # Iterate over the telescope ids and calculate the statistics + for tel_id in self.tel_ids: + # Read the whole dl1 images for one particular telescope + dl1_table = self.input_data.read_telescope_events_by_id( + telescopes=tel_id, + dl1_images=True, + dl1_parameters=False, + dl1_muons=False, + dl2=False, + simulated=False, + true_images=False, + true_parameters=False, + instrument=False, + pointing=False, + )[tel_id] + # Check if the input column name is in the table + if self.input_column_name not in dl1_table.colnames: + raise ToolConfigurationError( + f"Column '{self.input_column_name}' not found " + f"in the input data for telescope 'tel_id={tel_id}'." + ) # Perform the first pass of the statistics calculation aggregated_stats = self.stats_calculator.first_pass( table=dl1_table, tel_id=tel_id, - col_name=self.dl1a_column_name, + col_name=self.input_column_name, ) # Check if 'chunk_shift' is selected if self.stats_calculator.chunk_shift is not None: @@ -139,7 +145,7 @@ def start(self): table=dl1_table, valid_chunks=aggregated_stats["is_valid"].data, tel_id=tel_id, - col_name=self.dl1a_column_name, + col_name=self.input_column_name, ) # Stack the statistic values from the first and second pass aggregated_stats = vstack( diff --git a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py new file mode 100644 index 00000000000..38e61354b65 --- /dev/null +++ b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +""" +Test ctapipe-calculate-pixel-statistics tool +""" + +import pytest +from traitlets.config.loader import Config + +from ctapipe.core import run_tool +from ctapipe.core.tool import ToolConfigurationError +from ctapipe.io import read_table +from ctapipe.tools.calculate_pixel_stats import StatisticsCalculatorTool + + +def test_calculate_pixel_stats_tool(tmp_path, dl1_image_file): + """check statistics calculation from pixel-wise image data files""" + + # Create a configuration suitable for the test + tel_id = 3 + config = Config( + { + "StatisticsCalculatorTool": { + "allowed_tels": [tel_id], + "input_column_name": "image", + "output_column_name": "statistics", + }, + "PixelStatisticsCalculator": { + "stats_aggregator_type": [ + ("id", tel_id, "PlainAggregator"), + ], + }, + "PlainAggregator": { + "chunk_size": 1, + }, + } + ) + # Set the output file path + monitoring_file = tmp_path / "monitoring.dl1.h5" + # Run the tool with the configuration and the input file + run_tool( + StatisticsCalculatorTool(config=config), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={monitoring_file}", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) + # Check that the output file has been created + assert monitoring_file.exists() + # Check that the output file is not empty + assert ( + read_table( + monitoring_file, + path=f"/dl1/monitoring/telescope/statistics/tel_{tel_id:03d}", + )["mean"] + is not None + ) + + +def test_tool_config_error(tmp_path, dl1_image_file): + """check tool configuration error""" + + # Run the tool with the configuration and the input file + config = Config( + { + "StatisticsCalculatorTool": { + "allowed_tels": [3], + "input_column_name": "image_charges", + "output_column_name": "statistics", + } + } + ) + # Set the output file path + monitoring_file = tmp_path / "monitoring.dl1.h5" + # Check if ToolConfigurationError is raised + # when the column name of the pixel-wise image data is not correct + with pytest.raises(ToolConfigurationError): + run_tool( + StatisticsCalculatorTool(config=config), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={monitoring_file}", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) + # Check if ToolConfigurationError is raised + # when the input and output files are the same + with pytest.raises(ToolConfigurationError): + run_tool( + StatisticsCalculatorTool(), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={dl1_image_file}", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) diff --git a/src/ctapipe/tools/tests/test_stats_calc.py b/src/ctapipe/tools/tests/test_stats_calc.py deleted file mode 100644 index 97157ace955..00000000000 --- a/src/ctapipe/tools/tests/test_stats_calc.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python3 -""" -Test ctapipe-stats-calculation tool -""" - -from traitlets.config.loader import Config - -from ctapipe.core import run_tool -from ctapipe.io import read_table -from ctapipe.tools.stats_calculation import StatisticsCalculatorTool - - -def test_stats_calc_tool(tmp_path, dl1_image_file): - """check statistics calculation from DL1a files""" - - # Create a configuration suitable for the test - tel_id = 3 - config = Config( - { - "StatisticsCalculatorTool": { - "allowed_tels": [tel_id], - "dl1a_column_name": "image", - "output_column_name": "statistics", - }, - "PixelStatisticsCalculator": { - "stats_aggregator_type": [ - ("id", tel_id, "PlainAggregator"), - ], - }, - "PlainAggregator": { - "chunk_size": 1, - }, - } - ) - # Set the output file path - monitoring_file = tmp_path / "monitoring.dl1.h5" - # Run the tool with the configuration and the input file - run_tool( - StatisticsCalculatorTool(config=config), - argv=[ - f"--input_url={dl1_image_file}", - f"--output_path={monitoring_file}", - "--overwrite", - ], - cwd=tmp_path, - raises=True, - ) - # Check that the output file has been created - assert monitoring_file.exists() - # Check that the output file is not empty - assert ( - read_table( - monitoring_file, - path=f"/dl1/monitoring/telescope/statistics/tel_{tel_id:03d}", - )["mean"] - is not None - ) From cb94b5426a87558730df43661701fceab2c763a3 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 14:59:48 +0100 Subject: [PATCH 08/17] rename config file also in quickstart tool --- src/ctapipe/tools/quickstart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctapipe/tools/quickstart.py b/src/ctapipe/tools/quickstart.py index 9cab9c97d99..f8dfaff0d3c 100644 --- a/src/ctapipe/tools/quickstart.py +++ b/src/ctapipe/tools/quickstart.py @@ -12,10 +12,10 @@ CONFIGS_TO_WRITE = [ "base_config.yaml", + "calculate_pixel_stats.yaml", "stage1_config.yaml", "stage2_config.yaml", "ml_preprocessing_config.yaml", - "stats_calc_config.yaml", "train_energy_regressor.yaml", "train_particle_classifier.yaml", "train_disp_reconstructor.yaml", From 82afdae9a0d0116a7393722c5d5ed50180da9226 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 16:28:16 +0100 Subject: [PATCH 09/17] remove redundant , in stats calc example config --- src/ctapipe/resources/calculate_pixel_stats.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ctapipe/resources/calculate_pixel_stats.yaml b/src/ctapipe/resources/calculate_pixel_stats.yaml index e4d1f0b3866..48e262d3ab2 100644 --- a/src/ctapipe/resources/calculate_pixel_stats.yaml +++ b/src/ctapipe/resources/calculate_pixel_stats.yaml @@ -5,8 +5,8 @@ StatisticsCalculatorTool: PixelStatisticsCalculator: stats_aggregator_type: - - ["type", "LST*", "SigmaClippingAggregator"], - - ["type", "MST*", "PlainAggregator"], + - ["type", "LST*", "SigmaClippingAggregator"] + - ["type", "MST*", "PlainAggregator"] chunk_shift: 1000 faulty_pixels_fraction: 0.1 From d62982fa4c7cab64395887decea0ed6168872da0 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 14 Nov 2024 13:11:21 +0100 Subject: [PATCH 10/17] use TableLoader for input handling --- src/ctapipe/tools/calculate_pixel_stats.py | 39 ++++++++++------------ 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/ctapipe/tools/calculate_pixel_stats.py b/src/ctapipe/tools/calculate_pixel_stats.py index 4616c70b4e9..89bdcb40065 100644 --- a/src/ctapipe/tools/calculate_pixel_stats.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -34,18 +34,10 @@ class StatisticsCalculatorTool(Tool): examples = """ To calculate statistics of pixel-wise image data files: - > ctapipe-calculate-pixel-statistics --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite + > ctapipe-calculate-pixel-statistics --TableLoader.input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite """ - input_url = Path( - help="Input CTA HDF5 files including pixel-wise image data", - allow_none=True, - exists=True, - directory_ok=False, - file_ok=True, - ).tag(config=True) - allowed_tels = Set( trait=CInt(), default_value=None, @@ -75,7 +67,7 @@ class StatisticsCalculatorTool(Tool): overwrite = Bool(help="Overwrite output file if it exists").tag(config=True) aliases = { - ("i", "input_url"): "StatisticsCalculatorTool.input_url", + ("i", "input_url"): "TableLoader.input_url", ("o", "output_path"): "StatisticsCalculatorTool.output_path", } @@ -86,34 +78,39 @@ class StatisticsCalculatorTool(Tool): ), } - classes = classes_with_traits(PixelStatisticsCalculator) + classes = [ + TableLoader, + ] + classes_with_traits(PixelStatisticsCalculator) def setup(self): + # Read the input data with the 'TableLoader' + self.input_data = TableLoader( + parent=self, + ) # Check that the input and output files are not the same - if self.input_url == self.output_path: + if self.input_data.input_url == self.output_path: raise ToolConfigurationError( "Input and output files are same. Fix your configuration / cli arguments." ) - # Load the subarray description from the input file - subarray = SubarrayDescription.from_hdf(self.input_url) - # Initialization of the statistics calculator - self.stats_calculator = PixelStatisticsCalculator( - parent=self, subarray=subarray - ) - # Read the input data with the 'TableLoader' - self.input_data = TableLoader(input_url=self.input_url) + subarray = SubarrayDescription.from_hdf(self.input_data.input_url) # Get the telescope ids from the input data or use the allowed_tels configuration self.tel_ids = ( subarray.tel_ids if self.allowed_tels is None else self.allowed_tels ) + # Initialization of the statistics calculator + self.stats_calculator = PixelStatisticsCalculator( + parent=self, subarray=subarray + ) def start(self): # Iterate over the telescope ids and calculate the statistics for tel_id in self.tel_ids: # Read the whole dl1 images for one particular telescope dl1_table = self.input_data.read_telescope_events_by_id( - telescopes=tel_id, + telescopes=[ + tel_id, + ], dl1_images=True, dl1_parameters=False, dl1_muons=False, From cca519726a045dbb8052c0a12467c83f9015dedf Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 14 Nov 2024 13:17:03 +0100 Subject: [PATCH 11/17] fix changelog filename --- docs/changes/2628.features.rst | 1 - 1 file changed, 1 deletion(-) delete mode 100644 docs/changes/2628.features.rst diff --git a/docs/changes/2628.features.rst b/docs/changes/2628.features.rst deleted file mode 100644 index f57b32854af..00000000000 --- a/docs/changes/2628.features.rst +++ /dev/null @@ -1 +0,0 @@ -Add a generic stats-calculation tool utilizing the PixelStatisticsCalculator. From 3d910286687d815be36b3a6565778af030670f0d Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 14 Nov 2024 13:19:46 +0100 Subject: [PATCH 12/17] add changelog file --- docs/changes/2628.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/changes/2628.feature.rst diff --git a/docs/changes/2628.feature.rst b/docs/changes/2628.feature.rst new file mode 100644 index 00000000000..f57b32854af --- /dev/null +++ b/docs/changes/2628.feature.rst @@ -0,0 +1 @@ +Add a generic stats-calculation tool utilizing the PixelStatisticsCalculator. From 092c1e1ea2ecb3f0954c03d6052a03749b26b2d0 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 14 Nov 2024 13:52:55 +0100 Subject: [PATCH 13/17] add proper ToolConfigurationError if chunk size is too large --- src/ctapipe/tools/calculate_pixel_stats.py | 9 +++++++++ .../tools/tests/test_calculate_pixel_stats.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/ctapipe/tools/calculate_pixel_stats.py b/src/ctapipe/tools/calculate_pixel_stats.py index 89bdcb40065..5603575a342 100644 --- a/src/ctapipe/tools/calculate_pixel_stats.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -121,6 +121,15 @@ def start(self): instrument=False, pointing=False, )[tel_id] + # Check if the chunk size does not exceed the table length of the input data + if self.stats_calculator.stats_aggregators[ + self.stats_calculator.stats_aggregator_type.tel[tel_id] + ].chunk_size > len(dl1_table): + raise ToolConfigurationError( + f"Change --StatisticsAggregator.chunk_size to decrease the chunk size " + f"of the aggregation to at least '{len(dl1_table)}' (table length of the " + f"input data for telescope 'tel_id={tel_id}')." + ) # Check if the input column name is in the table if self.input_column_name not in dl1_table.colnames: raise ToolConfigurationError( diff --git a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py index 38e61354b65..9a8c3416847 100644 --- a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py +++ b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py @@ -100,3 +100,18 @@ def test_tool_config_error(tmp_path, dl1_image_file): cwd=tmp_path, raises=True, ) + # Check if ToolConfigurationError is raised + # when the chunk size is larger than the number of events in the input file + with pytest.raises(ToolConfigurationError): + run_tool( + StatisticsCalculatorTool(), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={monitoring_file}", + "--StatisticsCalculatorTool.allowed_tels=3", + "--StatisticsAggregator.chunk_size=2500", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) From 3548c096cd6db38b2e0a2a51a8d6bdc56c92103f Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 14 Nov 2024 14:27:08 +0100 Subject: [PATCH 14/17] added metadata renamed output_column_name to output_table_name --- src/ctapipe/tools/calculate_pixel_stats.py | 11 +++++++---- src/ctapipe/tools/tests/test_calculate_pixel_stats.py | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/ctapipe/tools/calculate_pixel_stats.py b/src/ctapipe/tools/calculate_pixel_stats.py index 5603575a342..aff8b32e417 100644 --- a/src/ctapipe/tools/calculate_pixel_stats.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -54,10 +54,10 @@ class StatisticsCalculatorTool(Tool): help="Column name of the pixel-wise image data to calculate statistics", ).tag(config=True) - output_column_name = Unicode( + output_table_name = Unicode( default_value="statistics", allow_none=False, - help="Column name of the output statistics", + help="Table name of the output statistics", ).tag(config=True) output_path = Path( @@ -164,11 +164,14 @@ def start(self): "No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", tel_id, ) + # Add metadata to the aggregated statistics + aggregated_stats.meta["input_url"] = self.input_data.input_url + aggregated_stats.meta["input_column_name"] = self.input_column_name # Write the aggregated statistics and their outlier mask to the output file write_table( aggregated_stats, self.output_path, - f"/dl1/monitoring/telescope/{self.output_column_name}/tel_{tel_id:03d}", + f"/dl1/monitoring/telescope/{self.output_table_name}/tel_{tel_id:03d}", overwrite=self.overwrite, ) @@ -176,7 +179,7 @@ def finish(self): self.log.info( "DL1 monitoring data was stored in '%s' under '%s'", self.output_path, - f"/dl1/monitoring/telescope/{self.output_column_name}", + f"/dl1/monitoring/telescope/{self.output_table_name}", ) self.log.info("Tool is shutting down") diff --git a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py index 9a8c3416847..d64660c687a 100644 --- a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py +++ b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py @@ -22,7 +22,7 @@ def test_calculate_pixel_stats_tool(tmp_path, dl1_image_file): "StatisticsCalculatorTool": { "allowed_tels": [tel_id], "input_column_name": "image", - "output_column_name": "statistics", + "output_table_name": "statistics", }, "PixelStatisticsCalculator": { "stats_aggregator_type": [ @@ -68,7 +68,7 @@ def test_tool_config_error(tmp_path, dl1_image_file): "StatisticsCalculatorTool": { "allowed_tels": [3], "input_column_name": "image_charges", - "output_column_name": "statistics", + "output_table_name": "statistics", } } ) From b0c197c7513d2f2d027e2f1382bde31746c744ad Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 14 Nov 2024 14:30:00 +0100 Subject: [PATCH 15/17] polish error message --- src/ctapipe/tools/calculate_pixel_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctapipe/tools/calculate_pixel_stats.py b/src/ctapipe/tools/calculate_pixel_stats.py index aff8b32e417..aa8b15629f3 100644 --- a/src/ctapipe/tools/calculate_pixel_stats.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -127,7 +127,7 @@ def start(self): ].chunk_size > len(dl1_table): raise ToolConfigurationError( f"Change --StatisticsAggregator.chunk_size to decrease the chunk size " - f"of the aggregation to at least '{len(dl1_table)}' (table length of the " + f"of the aggregation to a maximum of '{len(dl1_table)}' (table length of the " f"input data for telescope 'tel_id={tel_id}')." ) # Check if the input column name is in the table From d3d3304b981cd02581c973aec168b4fd04efc379 Mon Sep 17 00:00:00 2001 From: Tjark Miener Date: Wed, 18 Dec 2024 14:27:01 +0100 Subject: [PATCH 16/17] added match in pytest raise --- .../tools/tests/test_calculate_pixel_stats.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py index d64660c687a..bcd3af2a66d 100644 --- a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py +++ b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py @@ -76,12 +76,15 @@ def test_tool_config_error(tmp_path, dl1_image_file): monitoring_file = tmp_path / "monitoring.dl1.h5" # Check if ToolConfigurationError is raised # when the column name of the pixel-wise image data is not correct - with pytest.raises(ToolConfigurationError): + with pytest.raises( + ToolConfigurationError, match="Column 'image_charges' not found" + ): run_tool( StatisticsCalculatorTool(config=config), argv=[ f"--input_url={dl1_image_file}", f"--output_path={monitoring_file}", + "--StatisticsAggregator.chunk_size=1", "--overwrite", ], cwd=tmp_path, @@ -89,7 +92,9 @@ def test_tool_config_error(tmp_path, dl1_image_file): ) # Check if ToolConfigurationError is raised # when the input and output files are the same - with pytest.raises(ToolConfigurationError): + with pytest.raises( + ToolConfigurationError, match="Input and output files are same." + ): run_tool( StatisticsCalculatorTool(), argv=[ @@ -102,7 +107,9 @@ def test_tool_config_error(tmp_path, dl1_image_file): ) # Check if ToolConfigurationError is raised # when the chunk size is larger than the number of events in the input file - with pytest.raises(ToolConfigurationError): + with pytest.raises( + ToolConfigurationError, match="Change --StatisticsAggregator.chunk_size" + ): run_tool( StatisticsCalculatorTool(), argv=[ From b6587ec3e8c4ede04ffc727d7f9fc39e7fbad585 Mon Sep 17 00:00:00 2001 From: Tjark Miener Date: Thu, 19 Dec 2024 10:52:53 +0100 Subject: [PATCH 17/17] add the event type to the meta data remove the input url since it is irrelevant --- src/ctapipe/tools/calculate_pixel_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctapipe/tools/calculate_pixel_stats.py b/src/ctapipe/tools/calculate_pixel_stats.py index aa8b15629f3..0b9de78d6e0 100644 --- a/src/ctapipe/tools/calculate_pixel_stats.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -165,7 +165,7 @@ def start(self): tel_id, ) # Add metadata to the aggregated statistics - aggregated_stats.meta["input_url"] = self.input_data.input_url + aggregated_stats.meta["event_type"] = dl1_table["event_type"][0] aggregated_stats.meta["input_column_name"] = self.input_column_name # Write the aggregated statistics and their outlier mask to the output file write_table(