diff --git a/strax/dtypes.py b/strax/dtypes.py index 359b2573..b6664446 100644 --- a/strax/dtypes.py +++ b/strax/dtypes.py @@ -161,7 +161,12 @@ def hitlet_with_data_dtype(n_samples=2): def peak_dtype( - n_channels=100, n_sum_wv_samples=200, n_widths=11, digitize_top=True, hits_timing=True + n_channels=100, + n_sum_wv_samples=200, + n_widths=11, + digitize_top=True, + hits_timing=True, + save_waveform_start=True, ): """Data type for peaks - ranges across all channels in a detector Remember to set channel to -1 (todo: make enum) @@ -206,6 +211,19 @@ def peak_dtype( n_sum_wv_samples, ) dtype.insert(9, top_field) + + if save_waveform_start: + dtype += [ + ( + ( + "Waveform data in PE/sample (not PE/ns!), first 200 not downsampled samples", + "data_start", + ), + np.float32, + n_sum_wv_samples, + ) + ] + return dtype diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py index f40f57ee..d880beea 100644 --- a/strax/processing/peak_building.py +++ b/strax/processing/peak_building.py @@ -138,7 +138,12 @@ def find_peaks( @export @numba.jit(nopython=True, nogil=True, cache=True) def store_downsampled_waveform( - p, wv_buffer, store_in_data_top=False, wv_buffer_top=np.ones(1, dtype=np.float32) + p, + wv_buffer, + store_in_data_top=False, + store_waveform_start=False, + max_downsample_factor_waveform_start=2, + wv_buffer_top=np.ones(1, dtype=np.float32), ): """Downsample the waveform in buffer and store it in p['data'] and in p['data_top'] if indicated to do so. @@ -150,6 +155,11 @@ def store_downsampled_waveform( :param store_in_data_top: Boolean which indicates whether to also store into p['data_top'] When downsampling results in a fractional number of samples, the peak is shortened rather than extended. This causes data loss, but it is necessary to prevent overlaps between peaks. + :param store_waveform_start: Boolean which indicates whether to store the first samples of the + waveform in the peak. It will only store the first samples if the waveform is downsampled + and the downsample factor is smaller equal to max_downsample_factor_waveform_start. + :param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first + samples of the waveform. It should cover basically all S1s while keeping the disk usage low. """ @@ -170,6 +180,14 @@ def store_downsampled_waveform( wv_buffer[: p["length"] * downsample_factor].reshape(-1, downsample_factor).sum(axis=1) ) p["dt"] *= downsample_factor + + # If the waveform is downsampled, we can store the first samples of the waveform + if store_waveform_start & (downsample_factor <= max_downsample_factor_waveform_start): + if p["length"] > len(p["data_start"]): + p["data_start"] = wv_buffer[: len(p["data_start"])] + else: + p["data_start"][: p["length"]] = wv_buffer[: p["length"]] + else: if store_in_data_top: p["data_top"][: p["length"]] = wv_buffer_top[: p["length"]] @@ -229,7 +247,15 @@ def _simple_summed_waveform(records, containers, touching_windows, to_pe): @export @numba.jit(nopython=True, nogil=True, cache=True) def sum_waveform( - peaks, hits, records, record_links, adc_to_pe, n_top_channels=0, select_peaks_indices=None + peaks, + hits, + records, + record_links, + adc_to_pe, + n_top_channels=0, + select_peaks_indices=None, + save_waveform_start=False, + max_downsample_factor_waveform_start=2, ): """Compute sum waveforms for all peaks in peaks. Only builds summed waveform other regions in which hits were found. This is required to avoid any bias due to zero-padding and baselining. @@ -243,6 +269,11 @@ def sum_waveform( :param select_peaks_indices: Indices of the peaks for partial processing. In the form of np.array([np.int, np.int, ..]). If None (default), all the peaks are used for the summation. Assumes all peaks AND pulses have the same dt! + :param save_waveform_start: Boolean which indicates whether to store the first samples of the + waveform in the peak. It will only store the first samples if the waveform is downsampled + and the downsample factor is smaller equal to max_downsample_factor_waveform_start. + :param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first + samples of the waveform. It should cover basically all S1s while keeping the disk usage low. """ if not len(records): @@ -357,9 +388,18 @@ def sum_waveform( p["area"] += area_pe if n_top_channels > 0: - store_downsampled_waveform(p, swv_buffer, True, twv_buffer) + store_downsampled_waveform( + p, + swv_buffer, + True, + save_waveform_start, + max_downsample_factor_waveform_start, + twv_buffer, + ) else: - store_downsampled_waveform(p, swv_buffer) + store_downsampled_waveform( + p, swv_buffer, False, save_waveform_start, max_downsample_factor_waveform_start + ) p["n_saturated_channels"] = p["saturated_channel"].sum() p["area_per_channel"][:] = area_per_channel diff --git a/strax/processing/peak_merging.py b/strax/processing/peak_merging.py index 3f04a363..94e78556 100644 --- a/strax/processing/peak_merging.py +++ b/strax/processing/peak_merging.py @@ -8,7 +8,14 @@ @export -def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5)): +def merge_peaks( + peaks, + start_merge_at, + end_merge_at, + max_buffer=int(1e5), + save_waveform_start=False, + max_downsample_factor_waveform_start=2, +): """Merge specified peaks with their neighbors, return merged peaks. :param peaks: Record array of strax peak dtype. @@ -17,6 +24,11 @@ def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5)): :param max_buffer: Maximum number of samples in the sum_waveforms and other waveforms of the resulting peaks (after merging). Peaks must be constructed based on the properties of constituent peaks, it being too time-consuming to revert to records/hits. + :param save_waveform_start: Boolean which indicates whether to store the first samples of the + waveform in the peak. It will only store the first samples if the waveform is downsampled + and the downsample factor is smaller equal to max_downsample_factor_waveform_start. + :param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first + samples of the waveform. It should cover basically all S1s while keeping the disk usage low. """ assert len(start_merge_at) == len(end_merge_at) @@ -85,7 +97,14 @@ def merge_peaks(peaks, start_merge_at, end_merge_at, max_buffer=int(1e5)): # Downsample the buffers into new_p['data'], new_p['data_top'], # and new_p['data_bot'] - strax.store_downsampled_waveform(new_p, buffer, True, buffer_top) + strax.store_downsampled_waveform( + new_p, + buffer, + True, + save_waveform_start, + max_downsample_factor_waveform_start, + buffer_top, + ) new_p["n_saturated_channels"] = new_p["saturated_channel"].sum() diff --git a/strax/processing/peak_splitting.py b/strax/processing/peak_splitting.py index b04b0467..8ba2a7cf 100644 --- a/strax/processing/peak_splitting.py +++ b/strax/processing/peak_splitting.py @@ -15,6 +15,8 @@ def split_peaks( algorithm="local_minimum", data_type="peaks", n_top_channels=0, + save_waveform_start=False, + max_downsample_factor_waveform_start=2, **kwargs, ): """Return peaks split according to algorithm, with waveforms summed and widths computed. @@ -37,6 +39,11 @@ def split_peaks( the new split peaks/hitlets. :param n_top_channels: Number of top array channels. :param result_dtype: dtype of the result. + :param save_waveform_start: Boolean which indicates whether to store the first samples of the + waveform in the peak. It will only store the first samples if the waveform is downsampled + and the downsample factor is smaller equal to max_downsample_factor_waveform_start. + :param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first + samples of the waveform. It should cover basically all S1s while keeping the disk usage low. Any other options are passed to the algorithm. @@ -49,7 +56,16 @@ def split_peaks( if data_type_is_not_supported: raise TypeError(f'Data_type "{data_type}" is not supported.') return splitter( - peaks, hits, records, rlinks, to_pe, data_type, n_top_channels=n_top_channels, **kwargs + peaks, + hits, + records, + rlinks, + to_pe, + data_type, + n_top_channels=n_top_channels, + save_waveform_start=save_waveform_start, + max_downsample_factor_waveform_start=max_downsample_factor_waveform_start, + **kwargs, ) @@ -72,6 +88,11 @@ class PeakSplitter: implemented in each subclass defines the algorithm, which takes in a peak's waveform and returns the index to split the peak at, if a split point is found. Otherwise NO_MORE_SPLITS is returned and the peak is left as is. + :param save_waveform_start: Boolean which indicates whether to store the first samples of the + waveform in the peak. It will only store the first samples if the waveform is downsampled + and the downsample factor is smaller equal to max_downsample_factor_waveform_start. + :param max_downsample_factor_waveform_start: Maximum downsample factor for storing the first + samples of the waveform. It should cover basically all S1s while keeping the disk usage low. """ @@ -88,6 +109,8 @@ def __call__( do_iterations=1, min_area=0, n_top_channels=0, + save_waveform_start=False, + max_downsample_factor_waveform_start=None, **kwargs, ): if not len(records) or not len(peaks) or not do_iterations: @@ -127,7 +150,16 @@ def __call__( if is_split.sum() != 0: # Found new peaks: compute basic properties if data_type == "peaks": - strax.sum_waveform(new_peaks, hits, records, rlinks, to_pe, n_top_channels) + strax.sum_waveform( + new_peaks, + hits, + records, + rlinks, + to_pe, + n_top_channels, + save_waveform_start=save_waveform_start, + max_downsample_factor_waveform_start=max_downsample_factor_waveform_start, + ) strax.compute_widths(new_peaks) elif data_type == "hitlets": # Add record fields here diff --git a/tests/test_peak_processing.py b/tests/test_peak_processing.py index 2c2b1e5c..a0fde4b4 100644 --- a/tests/test_peak_processing.py +++ b/tests/test_peak_processing.py @@ -316,6 +316,7 @@ def test_simple_summed_waveform(pulses): fake_event_dtype = strax.time_dt_fields + [ ("data", np.float32, 200), ("data_top", np.float32, 200), + ("data_start", np.float32, 200), ] records = np.zeros(len(pulses), dtype=strax.record_dtype())