From 960581432ae66499ffbb489b78f4bad056cb2bde Mon Sep 17 00:00:00 2001 From: Dacheng Xu Date: Sun, 5 May 2024 13:00:38 -0400 Subject: [PATCH] Make chunk size samller (#69) --- axidence/plugins/pairing/peaks_paired.py | 4 +++- axidence/samplers.py | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/axidence/plugins/pairing/peaks_paired.py b/axidence/plugins/pairing/peaks_paired.py index 0183197..412265a 100644 --- a/axidence/plugins/pairing/peaks_paired.py +++ b/axidence/plugins/pairing/peaks_paired.py @@ -465,7 +465,9 @@ def shadow_matching( def split_chunks(self, n_peaks): # divide results into chunks # max peaks number in left_i chunk - max_in_chunk = round(self.chunk_target_size_mb * 1e6 / self.dtype["peaks_paired"].itemsize) + max_in_chunk = round( + 0.9 * self.chunk_target_size_mb * 1e6 / self.dtype["peaks_paired"].itemsize + ) _n_peaks = n_peaks.copy() if _n_peaks.max() > max_in_chunk: raise ValueError("Can not fit a single paired event in a chunk!") diff --git a/axidence/samplers.py b/axidence/samplers.py index a22efeb..117b997 100644 --- a/axidence/samplers.py +++ b/axidence/samplers.py @@ -34,13 +34,14 @@ def bins(self): raise ValueError(f"n_bins must be int, not {type(self.n_bins)}, got {self.n_bins}!") return self.inverse_transform(np.linspace(*self.transform(self.interval), self.n_bins + 1)) - def reweight(self, x, reference): + def reweight(self, x, reference, reference_weights=None): h_x = np.histogram(x, bins=self.bins)[0] - h_reference = np.histogram(reference, bins=self.bins)[0] + h_reference = np.histogram(reference, bins=self.bins, weights=reference_weights)[0] _weights = h_reference / h_x + indices = np.clip(np.digitize(x, self.bins) - 1, 0, len(self.bins) - 2) weights = np.where( (x > self.bins[0]) & (x < self.bins[-1]), - _weights[np.digitize(np.clip(x, self.bins[0], self.bins[-1]), self.bins) - 1], + _weights[indices], 0.0, ) return weights