From 11ffe3164ed3d9da6f376a52137cce82d11d2922 Mon Sep 17 00:00:00 2001 From: Dacheng Xu Date: Mon, 29 Apr 2024 12:15:27 -0400 Subject: [PATCH] Apply Shadow Matching into `PairedPeaks` (#51) close: https://github.com/dachengx/axidence/issues/33 --- axidence/plugins/pairing/peaks_paired.py | 259 +++++++++++++++++++++-- pyproject.toml | 1 + 2 files changed, 242 insertions(+), 18 deletions(-) diff --git a/axidence/plugins/pairing/peaks_paired.py b/axidence/plugins/pairing/peaks_paired.py index f17db38..2846a40 100644 --- a/axidence/plugins/pairing/peaks_paired.py +++ b/axidence/plugins/pairing/peaks_paired.py @@ -1,11 +1,13 @@ import warnings from immutabledict import immutabledict import numpy as np +from scipy.stats import poisson import strax from strax import Plugin, DownChunkingPlugin import straxen from straxen import units from straxen import PeakProximity +import GOFevaluation as ge from ...utils import copy_dtype from ...dtypes import peak_positions_dtype @@ -14,7 +16,7 @@ class PeaksPaired(ExhaustPlugin, DownChunkingPlugin): __version__ = "0.0.0" - depends_on = ("isolated_s1", "isolated_s2", "cut_event_building_salted", "event_shadow_salted") + depends_on = ("isolated_s1", "isolated_s2", "peaks_salted", "peak_shadow_salted") provides = ("peaks_paired", "truth_paired") data_kind = immutabledict(zip(provides, provides)) save_when = immutabledict(zip(provides, [strax.SaveWhen.EXPLICIT, strax.SaveWhen.ALWAYS])) @@ -44,12 +46,18 @@ class PeaksPaired(ExhaustPlugin, DownChunkingPlugin): help="Vertical electron drift velocity in cm/ns (1e4 m/ms)", ) - n_drift_time_bin = straxen.URLConfig( + n_drift_time_bins = straxen.URLConfig( default=50, type=int, help="Bin number of drift time", ) + max_n_shadow_bins = straxen.URLConfig( + default=30, + type=int, + help="Max bin number of 2D shadow matching", + ) + isolated_s1_rate_correction = straxen.URLConfig( default=True, type=bool, @@ -76,6 +84,10 @@ class PeaksPaired(ExhaustPlugin, DownChunkingPlugin): help="Whether shift drift time when performing shadow matching", ) + shadow_deltatime_exponent = straxen.URLConfig( + default=-1.0, type=float, track=True, help="The exponent of delta t when calculating shadow" + ) + fixed_drift_time = straxen.URLConfig( default=None, type=(int, float, None), @@ -145,6 +157,8 @@ def preprocess_isolated_s2(s2): def simple_pairing( s1, s2, + s1_rate, + s2_rate, run_time, max_drift_time, min_drift_time, @@ -153,13 +167,6 @@ def simple_pairing( fixed_drift_time, rng, ): - s1_rate = len(s1) / run_time - s2_rate = len(s2) / run_time - print(f"There are {len(s1)} S1 peaks group") - print(f"S1 rate is {s1_rate:.3f}Hz") - print(f"There are {len(s2)} S2 peaks group") - print(f"S2 rate is {s2_rate * 1e3:.3f}mHz") - paring_rate_full = ( s1_rate * s2_rate * (max_drift_time - min_drift_time) / units.s / paring_rate_correction ) @@ -173,6 +180,203 @@ def simple_pairing( drift_time = np.full(n_events, fixed_drift_time) return paring_rate_full, s1_group_number, s2_group_number, drift_time + def shadow_reference_selection(self, peaks_salted): + return peaks_salted[peaks_salted["type"] == 2] + + @staticmethod + def digitize2d(data_sample, bin_edges, n_bins): + """Get indices of the 2D bins to which each value in input array + belongs. + + Args: + data_sample: array, data waiting for binning + """ + digit = np.zeros(len(data_sample), dtype=int) + # `x_dig` is within [0, len(bin_edges[0])-1] + x_dig = np.digitize(data_sample[:, 0], bin_edges[0][1:]) + for xd in np.unique(x_dig): + digit[x_dig == xd] = ( + np.digitize(data_sample[:, 1][x_dig == xd], bin_edges[1][xd][1:]) + xd * n_bins + ) + return digit + + @staticmethod + def shadow_matching( + s1, + s2, + shadow_reference, + shadow_deltatime_exponent, + max_n_shadow_bins, + run_time, + max_drift_time, + min_drift_time, + n_drift_time_bins, + shift_dt_shadow_matching, + paring_rate_correction, + paring_rate_bootstrap_factor, + rng, + onlyrate=False, + ): + # perform Shadow matching technique + # see details in xenon:xenonnt:ac:prediction:summary#fake-plugin_simulation + + # 2D equal binning + # prepare the 2D space, x is log(S2/dt), y is (log(S2)**2+log(dt)**2)**0.5 + # because these 2 dimension is orthogonal + x = np.log10(shadow_reference["shadow_s2_time_shadow"]) + y = np.sqrt( + np.log10( + shadow_reference["shadow_s2_time_shadow"] + * shadow_reference["dt_s2_time_shadow"] ** -shadow_deltatime_exponent + ) + ** 2 + + np.log10(shadow_reference["dt_s2_time_shadow"]) ** 2 + ) + sampled_correlation = np.stack([x, y]).T + + x = np.log10(s1["shadow_s2_time_shadow"]) + y = np.sqrt( + np.log10( + s1["shadow_s2_time_shadow"] * s1["dt_s2_time_shadow"] ** -shadow_deltatime_exponent + ) + ** 2 + + np.log10(s1["dt_s2_time_shadow"]) ** 2 + ) + s1_sample = np.stack([x, y]).T + + # use (x, y) distribution of isolated S1 as reference + # because it is more intense when shadow(S2/dt) is large + reference_sample = s1_sample + ge.check_sample_sanity(reference_sample) + # largest bin number in x & y dimension + n_shadow_bins = max_n_shadow_bins + # find suitable bin number + # we do not allow binning which provides empty bin(count = 0) for Shadow's (x,y) + # because it is denominator + bins_find = False + while not bins_find: + try: + # binning is order [0, 1], important, place do not change this order + _, bin_edges = ge.equiprobable_histogram( + data_sample=reference_sample[ + reference_sample[:, 0] != reference_sample[:, 0].min() + ], + reference_sample=reference_sample[ + reference_sample[:, 0] != reference_sample[:, 0].min() + ], + n_partitions=[n_shadow_bins, n_shadow_bins], + ) + if np.any( + ge.apply_irregular_binning(data_sample=sampled_correlation, bin_edges=bin_edges) + <= 0 + ): + raise ValueError( + f"Weird! Find empty bin when the bin number is {n_shadow_bins}!" + ) + bins_find = True + except: # noqa + n_shadow_bins -= 1 + assert n_shadow_bins > 0, "No suitable binning found" + print(f"Shadow bins number is {n_shadow_bins}") + # apply the binning + s1_shadow_count = ge.apply_irregular_binning(data_sample=s1_sample, bin_edges=bin_edges) + # s2_shadow_count = ge.apply_irregular_binning(data_sample=s2_sample, bin_edges=bin_edges) + sampled_shadow_count = ge.apply_irregular_binning( + data_sample=sampled_correlation, bin_edges=bin_edges + ) + # shadow_ratio = s1_shadow_count / sampled_shadow_count + shadow_run_time = sampled_shadow_count / sampled_shadow_count.sum() * run_time + if not onlyrate: + # get indices of the 2D bins + s1_digit = PeaksPaired.digitize2d(s1_sample, bin_edges, n_shadow_bins) + _s1_group_number = np.arange(len(s1)) + s1_group_number_list = [ + _s1_group_number[s1_digit == xd].tolist() + for xd in range(n_shadow_bins * n_shadow_bins) + ] + + drift_time_bins = np.linspace(min_drift_time, max_drift_time, n_drift_time_bins + 1) + drift_time_bin_center = (drift_time_bins[:-1] + drift_time_bins[1:]) / 2 + + group_number_list = [] + _paring_rate_full = np.zeros(len(drift_time_bin_center)) + for i in range(len(drift_time_bin_center)): + if shift_dt_shadow_matching: + # different drift time between isolated S1 and S2 indicates + # different shadow matching arrangement + delta_t = drift_time_bin_center[i] + else: + delta_t = 0 + s2_shadow_dt_request = np.clip(s2["dt_s2_time_shadow"] - delta_t, 1, np.inf) + pre_area_s2 = ( + s2["shadow_s2_time_shadow"] * s2["dt_s2_time_shadow"] ** -shadow_deltatime_exponent + ) + x = np.log10(pre_area_s2 * s2_shadow_dt_request**shadow_deltatime_exponent) + y = np.sqrt(np.log10(pre_area_s2) ** 2 + np.log10(s2_shadow_dt_request) ** 2) + data_sample = np.stack([x, y]).T + ge.check_sample_sanity(data_sample) + # apply binning to (x,y) + s2_shadow_count = ge.apply_irregular_binning( + data_sample=data_sample, bin_edges=bin_edges + ) + # conditional rate is AC rate in each (x,y) bin + ac_rate_conditional = ( + s1_shadow_count / shadow_run_time * s2_shadow_count / shadow_run_time + ) + ac_rate_conditional *= (drift_time_bins[i + 1] - drift_time_bins[i]) / units.s + ac_rate_conditional *= sampled_shadow_count / sampled_shadow_count.sum() + ac_rate_conditional /= paring_rate_correction + _paring_rate_full[i] = ac_rate_conditional.sum() + if not onlyrate: + # expectation of AC in each bin in this run + mu_shadow = ac_rate_conditional * run_time * paring_rate_bootstrap_factor + count_pairing = np.zeros_like(mu_shadow, dtype=int) + for ii in range(mu_shadow.shape[0]): + for jj in range(mu_shadow.shape[1]): + count_pairing[ii, jj] = poisson.rvs(mu=mu_shadow[ii, jj]) + count_pairing = count_pairing.flatten() + # count_pairing = poisson.rvs(mu=mu_shadow).flatten() + if count_pairing.max() == 0: + count_pairing[mu_shadow.argmax()] = 1 + s2_digit = PeaksPaired.digitize2d(data_sample, bin_edges, n_shadow_bins) + _s2_group_number = np.arange(len(s2)) + s2_group_number_list = [ + _s2_group_number[s2_digit == xd].tolist() + for xd in range(n_shadow_bins * n_shadow_bins) + ] + # random sample isolated S1 and S2's group number + _s1_group_number = np.hstack( + [ + rng.choice( + s1_group_number_list[xd], + size=count_pairing[xd], + ) + for xd in range(n_shadow_bins * n_shadow_bins) + ] + ) + _s2_group_number = np.hstack( + [ + rng.choice( + s2_group_number_list[xd], + size=count_pairing[xd], + ) + for xd in range(n_shadow_bins * n_shadow_bins) + ] + ) + # sample drift time in this bin + _drift_time = rng.choice( + round(drift_time_bins[i + 1] - drift_time_bins[i]), + size=count_pairing.sum(), + ) + round(drift_time_bins[i]) + group_number_list.append([_s1_group_number, _s2_group_number, _drift_time]) + paring_rate_full = _paring_rate_full.sum() + if not onlyrate: + s1_group_number = np.hstack([group[0] for group in group_number_list]).astype(int) + s2_group_number = np.hstack([group[1] for group in group_number_list]).astype(int) + drift_time = np.hstack([group[2] for group in group_number_list]).astype(int) + assert len(s1_group_number) == len(s2_group_number) + return paring_rate_full, s1_group_number, s2_group_number, drift_time + def split_chunks(self, n_peaks): # divide results into chunks # max peaks number in left_i chunk @@ -305,7 +509,7 @@ def build_arrays( return peaks_arrays, truth_arrays - def compute(self, isolated_s1, isolated_s2, events_salted, start, end): + def compute(self, isolated_s1, isolated_s2, peaks_salted, start, end): for i, s in enumerate([isolated_s1, isolated_s2]): if np.any(np.diff(s["group_number"]) < 0): raise ValueError(f"Group number is not sorted in isolated S{i}!") @@ -319,21 +523,40 @@ def compute(self, isolated_s1, isolated_s2, events_salted, start, end): raise NotImplementedError("AC rate correction for isolated S1 is not implemented yet!") else: paring_rate_correction = 1 - print(f"Input S1 correction factor is {paring_rate_correction:.3f}") + print(f"Isolated S1 correction factor is {paring_rate_correction:.3f}") + run_time = (end - start) / units.s + s1_rate = len(isolated_s1) / run_time + s2_rate = len(main_isolated_s2) / run_time + print(f"There are {len(isolated_s1)} S1 peaks group") + print(f"S1 rate is {s1_rate:.3f}Hz") + print(f"There are {len(main_isolated_s2)} S2 peaks group") + print(f"S2 rate is {s2_rate * 1e3:.3f}mHz") if self.apply_shadow_matching: # simulate AC's drift time bin by bin - # print(f"Drift time bins number is {self.n_drift_time_bin}") - # drift_time_bins = np.linspace( - # self.min_drift_time, self.max_drift_time, self.n_drift_time_bin + 1 - # ) - # drift_time_bin_center = (drift_time_bins[:-1] + drift_time_bins[1:]) / 2 - raise NotImplementedError("Shadow matching is not implemented yet!") + shadow_reference = self.shadow_reference_selection(peaks_salted) + paring_rate_full, s1_group_number, s2_group_number, drift_time = self.shadow_matching( + isolated_s1, + main_isolated_s2, + shadow_reference, + self.shadow_deltatime_exponent, + self.max_n_shadow_bins, + run_time, + self.max_drift_time, + self.min_drift_time, + self.n_drift_time_bins, + self.shift_dt_shadow_matching, + paring_rate_correction, + self.paring_rate_bootstrap_factor, + self.rng, + ) else: paring_rate_full, s1_group_number, s2_group_number, drift_time = self.simple_pairing( isolated_s1, main_isolated_s2, - (end - start) / units.s, + s1_rate, + s2_rate, + run_time, self.max_drift_time, self.min_drift_time, paring_rate_correction, diff --git a/pyproject.toml b/pyproject.toml index 3f42d8a..ca5abb7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ documentation = "https://readthedocs.org/projects/axidence/" [tool.poetry.dependencies] strax = ">=1.6.2" straxen = ">=2.2.1" +GOFevaluation = ">=0.1.4" [build-system] requires = ["poetry-core>=1.0.8", "setuptools>=61.0"]