Skip to content

Commit

Permalink
Make chunk size samller (#69)
Browse files Browse the repository at this point in the history
  • Loading branch information
dachengx authored May 5, 2024
1 parent b489fa9 commit 9605814
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
4 changes: 3 additions & 1 deletion axidence/plugins/pairing/peaks_paired.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,9 @@ def shadow_matching(
def split_chunks(self, n_peaks):
# divide results into chunks
# max peaks number in left_i chunk
max_in_chunk = round(self.chunk_target_size_mb * 1e6 / self.dtype["peaks_paired"].itemsize)
max_in_chunk = round(
0.9 * self.chunk_target_size_mb * 1e6 / self.dtype["peaks_paired"].itemsize
)
_n_peaks = n_peaks.copy()
if _n_peaks.max() > max_in_chunk:
raise ValueError("Can not fit a single paired event in a chunk!")
Expand Down
7 changes: 4 additions & 3 deletions axidence/samplers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,14 @@ def bins(self):
raise ValueError(f"n_bins must be int, not {type(self.n_bins)}, got {self.n_bins}!")
return self.inverse_transform(np.linspace(*self.transform(self.interval), self.n_bins + 1))

def reweight(self, x, reference):
def reweight(self, x, reference, reference_weights=None):
h_x = np.histogram(x, bins=self.bins)[0]
h_reference = np.histogram(reference, bins=self.bins)[0]
h_reference = np.histogram(reference, bins=self.bins, weights=reference_weights)[0]
_weights = h_reference / h_x
indices = np.clip(np.digitize(x, self.bins) - 1, 0, len(self.bins) - 2)
weights = np.where(
(x > self.bins[0]) & (x < self.bins[-1]),
_weights[np.digitize(np.clip(x, self.bins[0], self.bins[-1]), self.bins) - 1],
_weights[indices],
0.0,
)
return weights
Expand Down

0 comments on commit 9605814

Please sign in to comment.