From e58833597b590c77f6faf4895804726f6441bf67 Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Mon, 15 Jul 2024 10:29:36 -0400 Subject: [PATCH 01/13] update missing channel handle --- src/spey_pyhf/simplify.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/spey_pyhf/simplify.py b/src/spey_pyhf/simplify.py index ef7478b..3611782 100644 --- a/src/spey_pyhf/simplify.py +++ b/src/spey_pyhf/simplify.py @@ -1,7 +1,8 @@ """Interface to convert pyhf likelihoods to simplified likelihood framework""" import copy +import logging import warnings -from typing import Callable, List, Optional, Text, Union, Literal +from typing import Callable, List, Literal, Optional, Text, Union import numpy as np import spey @@ -18,6 +19,9 @@ def __dir__(): return [] +log = logging.getLogger("Spey") + + class ConversionError(Exception): """Conversion error class""" @@ -175,6 +179,7 @@ def __call__( }[fittype] interpreter = WorkspaceInterpreter(bkgonly_model) + bin_map = interpreter.bin_map # configure signal patch map with respect to channel names signal_patch_map = interpreter.patch_to_map(signal_patch) @@ -190,13 +195,14 @@ def __call__( ) for channel in interpreter.get_channels(control_region_indices): - interpreter.inject_signal( - channel, - [0.0] * len(signal_patch_map[channel]["data"]), - signal_patch_map[channel]["modifiers"] - if include_modifiers_in_control_model - else None, - ) + if channel in signal_patch_map: + interpreter.inject_signal( + channel, + [0.0] * bin_map[channel], + signal_patch_map[channel]["modifiers"] + if include_modifiers_in_control_model + else None, + ) pdf_wrapper = spey.get_backend("pyhf") control_model = pdf_wrapper( @@ -325,7 +331,15 @@ def __call__( # yields needs to be reordered properly before constructing the simplified likelihood signal_yields = [] for channel_name in stat_model_pyhf.config.channels: - signal_yields += signal_patch_map[channel_name]["data"] + try: + signal_yields += signal_patch_map[channel_name]["data"] + except KeyError: + log.warning( + f"Channel `{channel_name}` does not exist in the signal patch," + " yields will be set to zero." + ) + signal_yields += [0.0] * bin_map[channel_name] + # NOTE background yields are first moments in simplified framework not the yield values # in the full statistical model! background_yields = np.mean(samples, axis=0) From cad165f7ce499c1e8689b07928049277f8830864 Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Mon, 15 Jul 2024 10:34:26 -0400 Subject: [PATCH 02/13] bump version --- .zenodo.json | 6 +++--- src/spey_pyhf/_version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index c3c3bd5..c635095 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,8 +1,8 @@ { "description": "pyhf plug-in for spey package", "license": "MIT", - "title": "SpeysideHEP/spey-pyhf: v0.1.5", - "version": "v0.1.5", + "title": "SpeysideHEP/spey-pyhf: v0.1.6", + "version": "v0.1.6", "upload_type": "software", "creators": [ { @@ -29,7 +29,7 @@ }, { "scheme": "url", - "identifier": "https://github.com/SpeysideHEP/spey-pyhf/tree/v0.1.5", + "identifier": "https://github.com/SpeysideHEP/spey-pyhf/tree/v0.1.6", "relation": "isSupplementTo" }, { diff --git a/src/spey_pyhf/_version.py b/src/spey_pyhf/_version.py index 51ce7ae..5260485 100644 --- a/src/spey_pyhf/_version.py +++ b/src/spey_pyhf/_version.py @@ -1,3 +1,3 @@ """Version of the spey - pyhf plugin""" -__version__ = "0.1.5" +__version__ = "0.1.6" From 3fa0a00967a37610f4ab1df5426eec25eb81e5b3 Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Mon, 15 Jul 2024 10:34:34 -0400 Subject: [PATCH 03/13] update changelog --- docs/releases/changelog-v0.1.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/releases/changelog-v0.1.md b/docs/releases/changelog-v0.1.md index dce9b3f..7f2c08d 100644 --- a/docs/releases/changelog-v0.1.md +++ b/docs/releases/changelog-v0.1.md @@ -24,6 +24,9 @@ * Improve undefined channel handling in the patchset ([#12](https://github.com/SpeysideHEP/spey-pyhf/pull/12)) +* Improve undefined channel handling in the patchset for full likelihood simplification. + ([#13](https://github.com/SpeysideHEP/spey-pyhf/pull/13)) + ## Bug fixes * Bugfix in `simplify` module, where signal injector was not initiated properly. From a44e817e1e5f5a129354fd0d6e22a7902e3741fc Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Mon, 15 Jul 2024 10:43:31 -0400 Subject: [PATCH 04/13] add debug message --- src/spey_pyhf/simplify.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/spey_pyhf/simplify.py b/src/spey_pyhf/simplify.py index 3611782..6eb06d0 100644 --- a/src/spey_pyhf/simplify.py +++ b/src/spey_pyhf/simplify.py @@ -310,13 +310,16 @@ def __call__( # Some of the samples can lead to problems while sampling from a poisson distribution. # e.g. poisson requires positive lambda values to sample from. If sample leads to a negative # lambda value continue sampling to avoid that point. + log.debug("Problem with the sample generation") + log.debug( + f"Nuisance parameters: {current_nui_params if new_params is None else new_params}" + ) continue if len(warnings_list) > 0: - warnings.warn( - message=f"{len(warnings_list)} warning(s) generated during sampling." - " This might be due to edge cases in nuisance parameter sampling.", - category=RuntimeWarning, + log.warning( + f"{len(warnings_list)} warning(s) generated during sampling." + " This might be due to edge cases in nuisance parameter sampling." ) samples = np.vstack(samples) From 55a8b829320e075a2ef8c47cf886be2c07ceb0c7 Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Tue, 16 Jul 2024 10:52:24 -0400 Subject: [PATCH 05/13] update setup --- setup.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/setup.py b/setup.py index b59c4dd..16d08d0 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,10 @@ url="https://github.com/SpeysideHEP/spey-pyhf", project_urls={ "Bug Tracker": "https://github.com/SpeysideHEP/spey-pyhf/issues", + "Documentation": "https://spey-pyhf.readthedocs.io", + "Repository": "https://github.com/SpeysideHEP/spey-pyhf", + "Homepage": "https://github.com/SpeysideHEP/spey-pyhf", + "Download": f"https://github.com/SpeysideHEP/spey-pyhf/archive/refs/tags/v{version}.tar.gz", }, download_url=f"https://github.com/SpeysideHEP/spey-pyhf/archive/refs/tags/v{version}.tar.gz", author="Jack Y. Araz", From a45d398c394b22f51c0d29cadd988c1ce0e83976 Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Wed, 17 Jul 2024 10:54:14 -0400 Subject: [PATCH 06/13] minor bugfixes --- src/spey_pyhf/helper_functions.py | 28 +++++++++++++++------------- src/spey_pyhf/simplify.py | 8 +++++--- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/spey_pyhf/helper_functions.py b/src/spey_pyhf/helper_functions.py index ba9c88e..646bd28 100644 --- a/src/spey_pyhf/helper_functions.py +++ b/src/spey_pyhf/helper_functions.py @@ -9,6 +9,8 @@ def __dir__(): return __all__ +# pylint: disable=W1203, W1201, C0103 + log = logging.getLogger("Spey") @@ -237,7 +239,7 @@ def reset_signal(self) -> None: def add_patch(self, signal_patch: List[Dict]) -> None: """Inject signal patch""" - self._signal_dict, self._to_remove = self.patch_to_map( + self._signal_dict, self._signal_modifiers, self._to_remove = self.patch_to_map( signal_patch=signal_patch, return_remove_list=True ) @@ -272,7 +274,10 @@ def remove_list(self) -> List[Text]: def patch_to_map( self, signal_patch: List[Dict], return_remove_list: bool = False - ) -> Union[Tuple[Dict[Text, Dict], List[Text]], Dict[Text, Dict]]: + ) -> Union[ + Tuple[Dict[Text, Dict], Dict[Text, Dict], List[Text]], + Tuple[Dict[Text, Dict], Dict[Text, Dict]], + ]: """ Convert JSONPatch into signal map @@ -288,23 +293,20 @@ def patch_to_map( .. versionadded:: 0.1.5 Returns: - ``Tuple[Dict[Text, Dict], List[Text]]`` or ``Dict[Text, Dict]``: + ``Tuple[Dict[Text, Dict], Dict[Text, Dict], List[Text]]`` or ``Tuple[Dict[Text, Dict], Dict[Text, Dict]]``: signal map including the data and modifiers and the list of channels to be removed. """ - signal_map = {} - to_remove = [] + signal_map, modifier_map, to_remove = {}, {}, [] for item in signal_patch: path = int(item["path"].split("/")[2]) channel_name = self["channels"][path]["name"] if item["op"] == "add": - signal_map[channel_name] = { - "data": item["value"]["data"], - "modifiers": item["value"].get( - "modifiers", _default_modifiers(poi_name=self.poi_name[0][1]) - ), - } + signal_map[channel_name] = item["value"]["data"] + modifier_map[channel_name] = item["value"].get( + "modifiers", _default_modifiers(poi_name=self.poi_name[0][1]) + ) elif item["op"] == "remove": to_remove.append(channel_name) if return_remove_list: - return signal_map, to_remove - return signal_map + return signal_map, modifier_map, to_remove + return signal_map, modifier_map diff --git a/src/spey_pyhf/simplify.py b/src/spey_pyhf/simplify.py index 6eb06d0..c7e9123 100644 --- a/src/spey_pyhf/simplify.py +++ b/src/spey_pyhf/simplify.py @@ -19,6 +19,8 @@ def __dir__(): return [] +# pylint: disable=W1203 + log = logging.getLogger("Spey") @@ -182,7 +184,7 @@ def __call__( bin_map = interpreter.bin_map # configure signal patch map with respect to channel names - signal_patch_map = interpreter.patch_to_map(signal_patch) + signal_patch_map, signal_modifiers_map = interpreter.patch_to_map(signal_patch) # Prepare a JSON patch to separate control and validation regions # These regions are generally marked as CR and VR @@ -195,11 +197,11 @@ def __call__( ) for channel in interpreter.get_channels(control_region_indices): - if channel in signal_patch_map: + if channel in signal_patch_map and channel in signal_modifiers_map: interpreter.inject_signal( channel, [0.0] * bin_map[channel], - signal_patch_map[channel]["modifiers"] + signal_modifiers_map[channel] if include_modifiers_in_control_model else None, ) From 6175fdc8460340493eb8c21801947ca333971e27 Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Wed, 24 Jul 2024 10:06:14 -0400 Subject: [PATCH 07/13] add logging manager --- src/spey_pyhf/simplify.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/spey_pyhf/simplify.py b/src/spey_pyhf/simplify.py index c7e9123..29b5298 100644 --- a/src/spey_pyhf/simplify.py +++ b/src/spey_pyhf/simplify.py @@ -2,6 +2,7 @@ import copy import logging import warnings +from contextlib import contextmanager from typing import Callable, List, Literal, Optional, Text, Union import numpy as np @@ -19,7 +20,7 @@ def __dir__(): return [] -# pylint: disable=W1203 +# pylint: disable=W1203, R0903 log = logging.getLogger("Spey") @@ -47,6 +48,22 @@ def func(vector: np.ndarray) -> float: return func +@contextmanager +def _disable_logging(highest_level: int = logging.CRITICAL): + """ + Temporary disable logging implementation, this should move into Spey + + Args: + highest_level (``int``, default ``logging.CRITICAL``): highest level to be set in logging + """ + previous_level = logging.root.manager.disable + logging.disable(highest_level) + try: + yield + finally: + logging.disable(previous_level) + + class Simplify(spey.ConverterBase): r""" An interface to convert pyhf full statistical model prescription into simplified likelihood @@ -207,9 +224,10 @@ def __call__( ) pdf_wrapper = spey.get_backend("pyhf") - control_model = pdf_wrapper( - background_only_model=bkgonly_model, signal_patch=interpreter.make_patch() - ) + with _disable_logging(): + control_model = pdf_wrapper( + background_only_model=bkgonly_model, signal_patch=interpreter.make_patch() + ) # Extract the nuisance parameters that maximises the likelihood at mu=0 fit_opts = control_model.prepare_for_fit(expected=expected) From 24c348ae042df27d58104b6ce2844d77d2ee8324 Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Wed, 24 Jul 2024 10:34:37 -0400 Subject: [PATCH 08/13] update setup --- setup.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 16d08d0..f42c304 100644 --- a/setup.py +++ b/setup.py @@ -54,8 +54,14 @@ "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", - "Programming Language :: Python :: 3", "Topic :: Scientific/Engineering :: Physics", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], extras_require={ "dev": ["pytest>=7.1.2", "pytest-cov>=3.0.0", "twine>=3.7.1", "wheel>=0.37.1"], From acea9e8d23b96de762b32b9629e75aa037ab1250 Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Thu, 25 Jul 2024 09:24:28 -0400 Subject: [PATCH 09/13] bugfix --- src/spey_pyhf/simplify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spey_pyhf/simplify.py b/src/spey_pyhf/simplify.py index 29b5298..7efec54 100644 --- a/src/spey_pyhf/simplify.py +++ b/src/spey_pyhf/simplify.py @@ -355,7 +355,7 @@ def __call__( signal_yields = [] for channel_name in stat_model_pyhf.config.channels: try: - signal_yields += signal_patch_map[channel_name]["data"] + signal_yields += signal_patch_map[channel_name] except KeyError: log.warning( f"Channel `{channel_name}` does not exist in the signal patch," From 4ee5c39c182374ed142a6217d63fd8ddab540241 Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Thu, 25 Jul 2024 10:56:35 -0400 Subject: [PATCH 10/13] add bounds and include debug messages --- src/spey_pyhf/simplify.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/spey_pyhf/simplify.py b/src/spey_pyhf/simplify.py index 7efec54..fb4a474 100644 --- a/src/spey_pyhf/simplify.py +++ b/src/spey_pyhf/simplify.py @@ -234,7 +234,6 @@ def __call__( _, fit_param = fit( **fit_opts, initial_parameters=None, - bounds=None, fixed_poi_value=0.0, ) @@ -267,6 +266,10 @@ def __call__( ) fit_opts = statistical_model.prepare_for_fit(expected=expected) suggested_fixed = fit_opts["model_configuration"].suggested_fixed + log.debug( + "Number of parameters to be fitted during the scan: " + f"{fit_opts['model_configuration'].npar - len(fit_param)}" + ) samples = [] warnings_list = [] @@ -316,7 +319,9 @@ def __call__( _, new_params = fit( **current_fit_opts, initial_parameters=init_params.tolist(), - bounds=None, + bounds=current_fit_opts[ + "model_configuration" + ].suggested_bounds, ) warnings_list += w From b2cf4f2d0c4e5addd1cbdd6a04351f2adc8d68bf Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Tue, 17 Sep 2024 10:12:26 -0400 Subject: [PATCH 11/13] update signal injection including modifier check --- src/spey_pyhf/helper_functions.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/spey_pyhf/helper_functions.py b/src/spey_pyhf/helper_functions.py index 646bd28..694e060 100644 --- a/src/spey_pyhf/helper_functions.py +++ b/src/spey_pyhf/helper_functions.py @@ -170,6 +170,7 @@ def inject_signal( Args: channel (``Text``): channel name data (``List[float]``): signal yields + modifiers (``List[Dict]``): uncertainties. If None, default modifiers will be added. Raises: ``ValueError``: If channel does not exist or number of yields does not match @@ -186,9 +187,20 @@ def inject_signal( f"{self.bin_map[channel]} expected, {len(data)} received." ) + default_modifiers = _default_modifiers(self.poi_name[0][1]) + if modifiers is not None: + for mod in default_modifiers: + if mod not in modifiers: + log.warning( + f"Modifier `{mod['name']}` with type `{mod['type']}` is missing" + f" from the input. Adding `{mod['name']}`" + ) + log.debug(f"Adding modifier: {mod}") + modifiers.append(mod) + self._signal_dict[channel] = data self._signal_modifiers[channel] = ( - _default_modifiers(self.poi_name[0][1]) if modifiers is None else modifiers + default_modifiers if modifiers is None else modifiers ) @property From abadaae328165086d53767b00df73eb810ca878c Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Tue, 17 Sep 2024 10:14:14 -0400 Subject: [PATCH 12/13] update changelog --- docs/releases/changelog-v0.1.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/releases/changelog-v0.1.md b/docs/releases/changelog-v0.1.md index 7f2c08d..cdc9eb0 100644 --- a/docs/releases/changelog-v0.1.md +++ b/docs/releases/changelog-v0.1.md @@ -27,6 +27,9 @@ * Improve undefined channel handling in the patchset for full likelihood simplification. ([#13](https://github.com/SpeysideHEP/spey-pyhf/pull/13)) +* Add modifier check to signal injection. + ([#13](https://github.com/SpeysideHEP/spey-pyhf/pull/13)) + ## Bug fixes * Bugfix in `simplify` module, where signal injector was not initiated properly. From 61fd5a82331b8494aca36ba4c24223830e7984e1 Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Thu, 19 Sep 2024 13:04:51 -0400 Subject: [PATCH 13/13] update nuisance parameter handle --- src/spey_pyhf/simplify.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/spey_pyhf/simplify.py b/src/spey_pyhf/simplify.py index fb4a474..a15589b 100644 --- a/src/spey_pyhf/simplify.py +++ b/src/spey_pyhf/simplify.py @@ -259,7 +259,23 @@ def __call__( ) # Retreive pyhf models and compare parameter maps - stat_model_pyhf = statistical_model.backend.model()[1] + if include_modifiers_in_control_model: + stat_model_pyhf = statistical_model.backend.model()[1] + else: + # Remove the nuisance parameters from the signal patch + # Note that even if the signal yields are zero, nuisance parameters + # do contribute to the statistical model and some models may be highly + # sensitive to the shape and size of the nuisance parameters. + with _disable_logging(): + tmp_interpreter = copy.deepcopy(interpreter) + for channel, data in signal_patch_map.items(): + tmp_interpreter.inject_signal(channel=channel, data=data) + tmp_model = spey.get_backend("pyhf")( + background_only_model=bkgonly_model, + signal_patch=tmp_interpreter.make_patch(), + ) + stat_model_pyhf = tmp_model.backend.model()[1] + del tmp_model, tmp_interpreter control_model_pyhf = control_model.backend.model()[1] is_nuisance_map_different = ( stat_model_pyhf.config.par_map != control_model_pyhf.config.par_map @@ -357,16 +373,18 @@ def __call__( # NOTE: model spec might be modified within the pyhf workspace, thus # yields needs to be reordered properly before constructing the simplified likelihood - signal_yields = [] + signal_yields, missing_channels = [], [] for channel_name in stat_model_pyhf.config.channels: try: signal_yields += signal_patch_map[channel_name] except KeyError: - log.warning( - f"Channel `{channel_name}` does not exist in the signal patch," - " yields will be set to zero." - ) + missing_channels.append(channel_name) signal_yields += [0.0] * bin_map[channel_name] + if len(missing_channels) > 0: + log.warning( + "Following channels are not in the signal patch," + f" will be set to zero: {', '.join(missing_channels)}" + ) # NOTE background yields are first moments in simplified framework not the yield values # in the full statistical model!