diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 461841a..3ae37f2 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,6 @@ [bumpversion] -current_version = 0.3.3 +current_version = 0.3.8 files = setup.py epix/__init__.py commit = True tag = True + diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index b398fba..01cf977 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,33 +1,33 @@ # This workflow will upload epix to PyPi using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries -name: Upload Python Package +name: PyPI on: - release: - types: [created] - # Allow manual build if tests are failing first time workflow_dispatch: + release: + types: [ created ] jobs: - deploy: - + build: runs-on: ubuntu-latest - + permissions: + id-token: write steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.8' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine - - name: Build and publish - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - python setup.py sdist bdist_wheel - twine upload dist/* + # Setup steps + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: '3.9' + + - name: Checkout repo + uses: actions/checkout@v3 + + - name: Install dependencies + run: pip install wheel + + - name: Build package + run: python setup.py sdist bdist_wheel + + - name: Publish a Python distribution to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/HISTORY.md b/HISTORY.md index df4b6f6..a449f79 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,7 +1,31 @@ +0.3.8 (2024-02-23) +================== +* Propagate defaults for energy range arguments (#82) + +0.3.7 (2024-02-08) +================== +* Selection based on total energy deposit in sensitive volume (#76) +* Fix cut_by_eventid again (#80) + +0.3.6 (2024-01-15) +================== +* Fix G4 event number selection with cut_by_eventid (#75) +* `np.int` is deprecated (#78) +* Fix epix to allow version bump (#77) + +0.3.5 (2023-09-13) +================== +* Custom microclustering and beta-yields generator (#73) + +0.3.4 (2023-03-28) +================== +* Fix calculation for cluster weighted average time (#65) +* Add zenodo badge + 0.3.3 (2023-01-09) ================== -* Specify not working numba in req. file by @JoranAngevaare in https://github.com/XENONnT/epix/pull/62 -* Install scikit-learn by @JoranAngevaare in https://github.com/XENONnT/epix/pull/63 +* Specify not working numba in req. file (#62) +* Install scikit-learn (#63) 0.3.2 (2022-08-15) ================== diff --git a/README.md b/README.md index 26cf816..e857dab 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,10 @@ The other keyword arguments are: | `--MicroSeparationTime` | Clustering time (ns) | `10` | | `--TagClusterBy` | decide if you tag the cluster (particle type, energy depositing process) according to first interaction in it (`time`) or most energetic (`energy`) | `energy` | | `--MaxDelay` | Time after which we cut the rest of the event (ns) | `1e7` | -| `--YieldModel` | Model for yield/quanta generationg (nest or bbf) | `nest` | +| `--MaxEnergy` | Upper range for energy selection _in sensitive volume_ (keV) | `inf` | +| `--MinEnergy` | Lower range for energy selection _in sensitive volume_ (keV) | `-1` | +| `--YieldModel` | Model for yield/quanta generation (nest / bbf / beta) | `nest` | +| `--ClusterMethod` | Microclustering method (dbscan / betadecay / brem) | `dbscan` | | `--SourceRate` | Event rate for event separation
- `0` for no time shift (G4 time remains)
- `-1` for clean time shift between events
- `>0` (Hz) for random spacing | `0` | | `--CutNeutron` | Add if you want to filter only nuclear recoil events (maximum ER energy deposit 10 keV) | `false` | | `--JobNumber` | Job number in full chain simulation. Offset is computed as `JobNumber * n_simulated_events/SourceRate`, where `n_simulated_events` is read from file. | `0` | diff --git a/bin/run_epix b/bin/run_epix index c9a5fc1..d757086 100755 --- a/bin/run_epix +++ b/bin/run_epix @@ -48,7 +48,17 @@ def pars_args(): help='Event rate for event separation. 0 (default) means no time shift is applied for the ' 'different events. Use -1 for clean spacing or give a rate >0 to space events randomly.') parser.add_argument('--YieldModel', dest='yield', type=str, - default='nest', help = 'Switch between BBF and NEST yield model') + default='nest', help = 'Switch between NEST, BBF, and BETA yield model, ' + '[ nest | bbf | beta ]') + parser.add_argument('--ClusterMethod', dest='cluster_method', type=str, + default='dbscan', help = 'Switch between DBSCAN, BETA-DECAY, ' + 'and BREMSSTRAHLUNG microclustering, [ dbscan | betadecay | brem ]') + parser.add_argument('--MaxEnergy', dest='max_energy', type=float, + action='store', default=float('inf'), #keV + help='Maximum energy in sensitive volume to be stored [keV].') + parser.add_argument('--MinEnergy', dest='min_energy', type=float, + action='store', default=-1, #keV + help='Minimum energy in sensitive volume to be stored [keV].') parser.add_argument('--JobNumber', dest='job_number', type=int, action='store', default=0, help='Job number in full chain simulation. Offset is computed as ' diff --git a/epix/__init__.py b/epix/__init__.py index b52ba09..76ce76b 100644 --- a/epix/__init__.py +++ b/epix/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.3.3' +__version__ = '0.3.8' from .common import * from .io import * @@ -10,3 +10,5 @@ from .detectors import * from .run_epix import * from .quantgen_wrapper import BBF_quanta_generator +from .quantgen_wrapper import BETA_quanta_generator +from .quantgen_wrapper import NEST_quanta_generator diff --git a/epix/betadecay_cluster.py b/epix/betadecay_cluster.py new file mode 100755 index 0000000..b9d09c6 --- /dev/null +++ b/epix/betadecay_cluster.py @@ -0,0 +1,40 @@ +import numpy as np +import pandas as pd +import uproot +import time +import os +import re +import awkward as ak +import collections + + +class BetaDecayClustering(): + + def __init__(self): + pass + + def cluster(self, event_tracks_df): + betagamma_df = event_tracks_df[ + ((event_tracks_df.type == 'e-') & (event_tracks_df.creaproc == 'RadioactiveDecayBase')) + | ((event_tracks_df.type == 'gamma') & (event_tracks_df.creaproc == 'RadioactiveDecayBase'))] + betagamma_df = betagamma_df[ + ~((betagamma_df.edproc == "Rayl") | (betagamma_df.edproc == "Transportation"))] + + betagamma_df['ed'] = betagamma_df.PreStepEnergy - betagamma_df.PostStepEnergy + betagamma_df.loc[betagamma_df.type == 'e-', 'ed'] = betagamma_df.loc[ + betagamma_df.type == 'e-', 'PreStepEnergy'] + betagamma_df = betagamma_df[betagamma_df.ed > 1e-6] + + beta_trackid = [] + for ind, row in betagamma_df.iterrows(): + if row['type'] == 'gamma': + pass + elif row['type'] == 'e-': + if row['trackid'] in beta_trackid: + betagamma_df.drop(ind, inplace=True) + else: + beta_trackid.append(row['trackid']) + else: + betagamma_df.drop(ind, inplace=True) + + return betagamma_df diff --git a/epix/brem_cluster.py b/epix/brem_cluster.py new file mode 100755 index 0000000..ecef900 --- /dev/null +++ b/epix/brem_cluster.py @@ -0,0 +1,373 @@ +import numpy as np +import pandas as pd +import uproot +import time +import os +import re +import awkward as ak +import collections + + +class BremClustering(): + + def __init__(self): + self.lineage_dict = {'trackid': 0, 'parentid': 1, 'type': 2, 'creaproc': 3, 'edproc': 4, 'PreStepEnergy': 5} + + self.min_dE = 1e-6 + self.max_lineage_depth = 1000 # safeguard for the lineage loop + + g4_keys = ['evtid', 'trackid', 'parentid', 'parenttype', + 'ed', 'edproc', 'type', 'creaproc', + 'PreStepEnergy', 'PostStepEnergy', + 'x', 'y', 'z', 'x_pri', 'y_pri', 'z_pri', 't'] + self.agg_beta = {x: 'first' for x in g4_keys} + self.agg_beta['PostStepEnergy'] = 'last' + self.agg_beta['ed'] = 'sum' + self.agg_beta['dE'] = 'sum' + + self.agg_brem = {x: 'last' for x in g4_keys} + self.agg_brem['PreStepEnergy'] = 'first' + + self.agg_escaped = {x: 'last' for x in g4_keys} + + def get_clean_gamma_df(self, df): + # In the final gamma_df we will remove compt steps since they will be taken + # into account in the electron_df. We keep them now to see all steps + # in the lineages of gammas during correction calculation. + gamma_df = df[(df.type == 'gamma') & ((df.edproc == 'phot') | (df.edproc == 'compt'))] + # Exclude gamma from e- via eIoni since they are accounted in the e- eDep + # and their max energy is 35 keV + gamma_df = gamma_df[gamma_df.creaproc != 'eIoni'] + gamma_df = gamma_df[gamma_df.dE > self.min_dE] + return gamma_df + + def get_clean_electron_df(self, df): + em_df = df[(df.type == 'e-') + & ((df.creaproc == 'phot') | (df.creaproc == 'conv') | (df.creaproc == 'compt'))] + em_df = em_df[em_df.dE > self.min_dE] + return em_df.groupby(['trackid'], as_index=False).aggregate(self.agg_beta) + + def get_clean_positron_df(self, df): + ep_df = df[(df.type == 'e+')] + ep_df = ep_df[ep_df.edproc != 'annihil'] + ep_df = ep_df[ep_df.dE > self.min_dE] + return ep_df.groupby(['trackid'], as_index=False).aggregate(self.agg_beta) + + def get_brem_event_df(self, event_df): + brem_df = event_df[(event_df.type == 'gamma') & (event_df.creaproc == 'eBrem')] + brem_df = brem_df[brem_df.edproc != 'Transportation'] + brem_df = brem_df.groupby('trackid', as_index=False).aggregate(self.agg_brem) + + brem_df['corrected'] = False + + return brem_df + + def get_escaped_event_df(self, event_df): + # Before we clean the event_df, we check if there're any particles + # that escaped LXe volume in the event + + # Remove nu/anti_nu + event_df_no_nu = event_df[~(event_df.type.str.contains('nu'))] + + escaped_df = event_df_no_nu.groupby('trackid', as_index=False).aggregate(self.agg_escaped) + # escaped_df = escaped_df[escaped_df.edproc=='Transportation'] + escaped_df = escaped_df[escaped_df.PostStepEnergy > self.min_dE] + + escaped_df['corrected'] = False + + return escaped_df + + def get_clean_event_df(self, event_df): + _df = event_df[(event_df.edproc != 'Transportation')] # ignore due to zero deposited energy + _df = _df[(_df.edproc != 'Scintillation')] # ignore last step due to zero particle energy + _df = _df[(_df.edproc != 'Rayl')] # ignore due to zero deposited energy + + _df['dE'] = _df.PreStepEnergy - _df.PostStepEnergy + + clean_df = pd.DataFrame() + clean_df = pd.concat((clean_df, self.get_clean_gamma_df(_df))) + clean_df = pd.concat((clean_df, self.get_clean_electron_df(_df))) + clean_df = pd.concat((clean_df, self.get_clean_positron_df(_df))) + + clean_df['corr'] = 0.0 + clean_df['dE_corr'] = 0.0 + + return clean_df + + def get_tracks_array(self, clean_event_df): + df = clean_event_df.groupby(['trackid'], as_index=False).aggregate( + {k: 'first' for k in self.lineage_dict.keys()}) + return df.to_numpy() + + def get_lineage(self, data, parentid): + datalist = [] + + def get_node(data, parentid): + newdata = [] + for d in data: + if d[self.lineage_dict['parentid']] == parentid: + newdata.append(d) + for d in newdata: + datalist.append(d) + get_node(data, d[self.lineage_dict['trackid']]) + + get_node(data, parentid) + return np.array(datalist) + + def get_reverse_lineage(self, data, trackid): + datalist = [] + current_trackid = trackid + current_parentid = -1 + + for i in range(self.max_lineage_depth): + try: + _track = np.array([None for k in self.lineage_dict.values()]) + for k in self.lineage_dict.keys(): + _track[self.lineage_dict[k]] = data.loc[(data.trackid == current_trackid)][k].to_numpy()[0] + if current_trackid != trackid: + datalist.append(_track) + current_trackid = _track[self.lineage_dict['parentid']] + except KeyError: + break + except IndexError: + break + + return np.array(datalist) + + def get_gamma_df(self, event_df, + tracks_arr, clean_event_df, + phot_gamma_correction=False, + verbose=False): + phot_gamma_df = clean_event_df[clean_event_df.type == 'gamma'] + phot_gamma_df = phot_gamma_df[(phot_gamma_df.edproc == 'phot') | (phot_gamma_df.edproc == 'eIoni')] + + if not phot_gamma_correction: + phot_gamma_df = phot_gamma_df[phot_gamma_df.creaproc != 'phot'] + + phot_gamma_df['dE_corr'] = phot_gamma_df['dE'] + phot_gamma_df['corr'] + return phot_gamma_df + + def filter_electron_tracks(self, tracks_arr, clean_event_df, + exclude_e_from_phot_gamma=True): + ## exclude_e_from_phot_gamma: remove e- which come from the lineage of the gamma created by phot + + non_e_arr = tracks_arr[tracks_arr[:, self.lineage_dict['type']] != 'e-'] + + electron_arr = tracks_arr[tracks_arr[:, self.lineage_dict['type']] == 'e-'] + electron_arr = electron_arr[electron_arr[:, self.lineage_dict['creaproc']] != 'eIoni'] + + # Remove phot-e since their eDep are already included in the phot-gamma eDep + electron_arr = electron_arr[electron_arr[:, self.lineage_dict['creaproc']] != 'phot'] + + if exclude_e_from_phot_gamma: + # Remove e- which come from the lineage of the gamma created by phot + # since we disregard these type of gamma in gamma_df + phot_gamma_trackid = clean_event_df[ + (clean_event_df.type == 'gamma') & (clean_event_df.creaproc == 'phot')].trackid.unique() + e_from_phot_gamma_trackid = [] + for g in phot_gamma_trackid: + lineage = self.get_lineage(tracks_arr, g) + if len(lineage) > 0: + e_from_phot_gamma_trackid.extend(np.unique(lineage[:, self.lineage_dict['trackid']])) + + e_from_phot_gamma_trackid = list(set(e_from_phot_gamma_trackid)) + electron_arr = electron_arr[ + ~np.isin(electron_arr[:, self.lineage_dict['trackid']], e_from_phot_gamma_trackid)] + + clean_tracks_arr = np.concatenate((electron_arr, non_e_arr)) + + return clean_tracks_arr + + def remove_e_from_eIoni_gamma(self, event_df, e_df): + e_from_eIoni_gamma = [] + + for e_trackid in e_df.trackid.unique(): + e_rlin = self.get_reverse_lineage(event_df, trackid=e_trackid) + for step in e_rlin: + try: + if step[self.lineage_dict['type']] == 'gamma' and step[self.lineage_dict['creaproc']] == 'eIoni': + e_from_eIoni_gamma.append(e_trackid) + break + except IndexError: + print('IndexError') + + e_df = e_df[~e_df.trackid.isin(e_from_eIoni_gamma)] + return e_df + + def get_e_df(self, event_df, + tracks_arr, clean_event_df, e_type, + exclude_e_from_phot_gamma=True, + exclude_e_from_eIoni_gamma=True, + verbose=False): + clean_tracks_arr = self.filter_electron_tracks(tracks_arr, clean_event_df, exclude_e_from_phot_gamma) + e_tracks = clean_tracks_arr[clean_tracks_arr[:, self.lineage_dict['type']] == e_type] + e_trackid = np.unique(e_tracks[:, self.lineage_dict['trackid']]) + e_df = clean_event_df[clean_event_df.trackid.isin(e_trackid)] + + if exclude_e_from_eIoni_gamma: + e_df = self.remove_e_from_eIoni_gamma(event_df, e_df) + + try: + if e_type == 'e-': + e_df = e_df[(e_df.creaproc == 'compt') | (e_df.creaproc == 'conv') | (e_df.parentid == 0)] + e_df['dE_corr'] = e_df['dE'] + e_df['corr'] + except AttributeError: + pass + + return e_df + + def correct_for_escaped(self, track_df, escaped_df, escaped_rlineage_dict, mode): + corrected_escaped_trackid = [] + + for escaped_trackid, escaped_rlineage in escaped_rlineage_dict.items(): + for step in escaped_rlineage: + if np.isin(step[self.lineage_dict['trackid']], track_df.trackid.to_numpy()): + try: + _corr = escaped_df[escaped_df.trackid == escaped_trackid].PostStepEnergy.to_numpy()[0] + track_df.loc[track_df.trackid == step[self.lineage_dict['trackid']], 'corr'] += -1.0 * _corr + track_df.loc[track_df.trackid == step[self.lineage_dict['trackid']], 'dE_corr'] += -1.0 * _corr + escaped_df.loc[escaped_df.trackid == escaped_trackid, 'corrected'] = True + corrected_escaped_trackid.append(escaped_trackid) + break + except IndexError: + print('IndexError') + + for corr_trackid in corrected_escaped_trackid: + escaped_rlineage_dict.pop(corr_trackid, None) + + return track_df, escaped_rlineage_dict + + def escaped_secondaries_correction(self, event_df, escaped_df, + gamma_df, e_df, p_df, + brem_gamma_added_to_corr, ): + # Check if we have some of the eBrem gamma which have already been accounted for + # among the escaped secondaries + escaped_df.loc[(escaped_df.trackid.isin(brem_gamma_added_to_corr)), 'corrected'] = True + + # Check if we have compt-e- which are already in the e- table + # among the escaped secondaries + escaped_df.loc[(escaped_df.trackid.isin(e_df.trackid)), 'corrected'] = True + + # Check if we have gamma created by annihil or RDB + # among the escaped secondaries + escaped_df.loc[(escaped_df.creaproc == 'annihil'), 'corrected'] = True + escaped_df.loc[(escaped_df.creaproc == 'RadioactiveDecayBase'), 'corrected'] = True + + # Add reverse lineages of the escaped secondaries + esc_rlin_dict = {} + + for escaped_trackid in escaped_df[~escaped_df.corrected].trackid.to_numpy(): + esc_rlin_dict[escaped_trackid] = self.get_reverse_lineage(event_df, trackid=escaped_trackid) + + # Correction for the escaped secondaries, starting with compt e- + e_df, esc_rlin_dict = self.correct_for_escaped(e_df, escaped_df, esc_rlin_dict, mode='e-') + p_df, esc_rlin_dict = self.correct_for_escaped(p_df, escaped_df, esc_rlin_dict, mode='e+') + gamma_df, esc_rlin_dict = self.correct_for_escaped(gamma_df, escaped_df, esc_rlin_dict, mode='gamma') + + return gamma_df, e_df, p_df + + def correct_for_brem(self, track_df, brem_df, brem_rlineage_dict, mode): + corrected_brem_trackid = [] + + for brem_trackid, brem_rlineage in brem_rlineage_dict.items(): + for step in brem_rlineage: + if np.isin(step[self.lineage_dict['trackid']], track_df.trackid.to_numpy()): + try: + _corr = brem_df[brem_df.trackid == brem_trackid].PreStepEnergy.to_numpy()[0] + track_df.loc[track_df.trackid == step[self.lineage_dict['trackid']], 'corr'] += -1.0 * _corr + track_df.loc[track_df.trackid == step[self.lineage_dict['trackid']], 'dE_corr'] += -1.0 * _corr + brem_df.loc[brem_df.trackid == brem_trackid, 'corrected'] = True + corrected_brem_trackid.append(brem_trackid) + break + except IndexError: + print('IndexError') + + for corr_trackid in corrected_brem_trackid: + brem_rlineage_dict[corr_trackid] = [] + + return track_df, brem_rlineage_dict + + def brem_gamma_correction(self, event_df, brem_df, + gamma_df, e_df, p_df): + # Add reverse lineages of the Brem gamma + brem_rlin_dict = {} + + for brem_trackid in brem_df[~brem_df.corrected].trackid.to_numpy(): + brem_rlin_dict[brem_trackid] = self.get_reverse_lineage(event_df, trackid=brem_trackid) + + # Correction for the Brem gamma, starting with compt e- + e_df, brem_rlin_dict = self.correct_for_brem(e_df, brem_df, brem_rlin_dict, mode='e-') + p_df, brem_rlin_dict = self.correct_for_brem(p_df, brem_df, brem_rlin_dict, mode='e+') + gamma_df, brem_rlin_dict = self.correct_for_brem(gamma_df, brem_df, brem_rlin_dict, mode='gamma') + + brem_gamma_added_to_corr = [k for k in brem_rlin_dict.keys() if len(brem_rlin_dict[k]) == 0] + + return gamma_df, e_df, p_df, brem_gamma_added_to_corr + + def check_dE_corr(self, gamma_df, e_df, p_df, event_df): + # Positron table - check if there was a local eDep during annihilation step; + # add it as a correction + + for p_id in p_df.trackid.unique(): + _df = event_df[(event_df.trackid == p_id)] + _df = _df[_df.edproc == 'annihil'] + + try: + if len(_df) > 0: + _corr = _df.ed.sum() + p_df.loc[p_df['trackid'] == p_id, 'corr'] += _corr + p_df.loc[p_df['trackid'] == p_id, 'dE_corr'] += _corr + except IndexError: + print('IndexError') + + # Safeguard - check if there are any negative dE_corr values in the output + for df in [gamma_df, e_df, p_df]: + df.loc[df['dE_corr'] < self.min_dE, 'dE_corr'] *= 0.0 + + return gamma_df, e_df, p_df + + def cluster(self, event_tracks_df, verbose=False): + gamma_df = pd.DataFrame([]) + e_df = pd.DataFrame([]) + p_df = pd.DataFrame([]) + event_df = event_tracks_df.copy() + + event_empty = False + event_types = event_df.type.unique() + if 'gamma' not in event_types \ + and 'e-' not in event_types \ + and 'e+' not in event_types: + event_empty = True + + if not event_empty: + event_df['dE'] = event_df.PreStepEnergy - event_df.PostStepEnergy + + escaped_df = self.get_escaped_event_df(event_df) + brem_df = self.get_brem_event_df(event_df) + clean_event_df = self.get_clean_event_df(event_df) + event_tracks_array = self.get_tracks_array(clean_event_df) + + gamma_df = self.get_gamma_df(event_df=event_df, + tracks_arr=event_tracks_array, + clean_event_df=clean_event_df, + verbose=verbose) + e_df = self.get_e_df(event_df=event_df, + tracks_arr=event_tracks_array, + clean_event_df=clean_event_df, + e_type='e-', + verbose=verbose) + p_df = self.get_e_df(event_df=event_df, + tracks_arr=event_tracks_array, + clean_event_df=clean_event_df, + e_type='e+', + verbose=verbose) + + gamma_df, e_df, p_df, \ + brem_gamma_added_to_corr = self.brem_gamma_correction(event_df, brem_df, + gamma_df, e_df, p_df) + gamma_df, e_df, p_df = self.escaped_secondaries_correction(event_df, escaped_df, + gamma_df, e_df, p_df, + brem_gamma_added_to_corr) + gamma_df, e_df, p_df = self.check_dE_corr(gamma_df, e_df, p_df, event_df) + + return gamma_df, e_df, p_df diff --git a/epix/clustering.py b/epix/clustering.py index 2acd7b0..9c58803 100755 --- a/epix/clustering.py +++ b/epix/clustering.py @@ -2,17 +2,99 @@ import pandas as pd import numba import awkward as ak -from .common import reshape_awkward +from .common import reshape_awkward, awkwardify_df from sklearn.cluster import DBSCAN +from .brem_cluster import BremClustering +from .betadecay_cluster import BetaDecayClustering + + +def find_brem_cluster(interactions): + df = ak.to_pandas(interactions) + df_clustered = pd.DataFrame(columns=interactions.fields) + + if df.empty: + # TPC interaction is empty + return interactions + + bremClustering = BremClustering() + + for evt in df.index.get_level_values(0).unique(): + gamma_df, e_df, ep_df = bremClustering.cluster(df.loc[evt], verbose=False) + + dte = pd.DataFrame() + dte = pd.concat((dte, gamma_df), ignore_index=True) + dte = pd.concat((dte, e_df), ignore_index=True) + dte = pd.concat((dte, ep_df), ignore_index=True) + + if len(dte) > 0: + dte['cluster_ids'] = np.arange(len(dte)) + df_clustered = pd.concat((df_clustered, dte), ignore_index=True) + + try: + df_clustered.ed = df_clustered.dE_corr + df_clustered.drop(['corr', 'dE', 'dE_corr'], axis=1, inplace=True) + except AttributeError: + pass + + for c in ['trackid', 'parentid', 'evtid']: + df_clustered[c] = df_clustered[c].astype(int) + + interactions = awkwardify_df(df_clustered) + + if len(df_clustered)>0: + ci = df_clustered.loc[:, 'cluster_ids'].values + offsets = ak.num(interactions['x']) + interactions['cluster_ids'] = reshape_awkward(ci, offsets) + + return interactions + + +def find_betadecay_cluster(interactions, ): + """ + Function which finds clusters within an event + using beta-decay clustering approach: . . . + + Args: + x (pandas.DataFrame): Subentries of event must contain all fields + of interactions type returned by epix.file_loader + + Returns: + awkward.array: Modified interactions array with applied beta-decay clustering and added cluster_ids. + """ + + df = ak.to_pandas(interactions) + df_clustered = pd.DataFrame(columns=interactions.fields) + + if df.empty: + # TPC interaction is empty + return interactions + + betadecayClustering = BetaDecayClustering() + + for evt in df.index.get_level_values(0).unique(): + dte = betadecayClustering.cluster(df.loc[evt]) + if len(dte) > 0: + dte['cluster_ids'] = np.arange(len(dte)) + df_clustered = pd.concat((df_clustered, dte), ignore_index=True) + + interactions = awkwardify_df(df_clustered) + + if len(df_clustered)>0: + ci = df_clustered.loc[:, 'cluster_ids'].values + offsets = ak.num(interactions['x']) + interactions['cluster_ids'] = reshape_awkward(ci, offsets) + + return interactions def find_cluster(interactions, cluster_size_space, cluster_size_time): """ - Function which finds cluster within a event. + Function which finds clusters within an event using temporal + and spatial DBSCAN clustering. Args: x (pandas.DataFrame): Subentries of event must contain the - fields, x,y,z,time + fields: x,y,z,ed,time cluster_size_space (float): Max spatial distance between two points to be inside a cluster [cm]. cluster_size_time (float): Max time distance between two points to be @@ -24,7 +106,7 @@ def find_cluster(interactions, cluster_size_space, cluster_size_time): # TODO is there a better way to get the df? df = [] for key in ['x', 'y', 'z', 'ed', 't']: - df.append(ak.to_pandas(interactions[key], anonymous=key)) + df.append(ak.to_dataframe(interactions[key], anonymous=key)) df = pd.concat(df, axis=1) if df.empty: @@ -37,7 +119,7 @@ def find_cluster(interactions, cluster_size_space, cluster_size_time): df["time_cluster"] = np.concatenate(groups.apply(lambda x: simple_1d_clustering(x.t.values, cluster_size_time))) # Splitting into individual events and time cluster and apply space clustering space: - df['cluster_id'] = np.zeros(len(df.index), dtype=np.int) + df['cluster_id'] = np.zeros(len(df.index), dtype=int) for evt in df.index.get_level_values(0).unique(): _df_evt = df.loc[evt] @@ -46,13 +128,13 @@ def find_cluster(interactions, cluster_size_space, cluster_size_time): for _t in _t_clusters: _cl = _find_cluster(_df_evt[_df_evt.time_cluster == _t], cluster_size_space=cluster_size_space) - df.loc[(df.time_cluster == _t)&(df.index.get_level_values(0)==evt), 'cluster_id'] = _cl + add_to_cluster + df.loc[(df.time_cluster == _t) & (df.index.get_level_values(0) == evt), 'cluster_id'] = _cl + add_to_cluster add_to_cluster = max(_cl) + add_to_cluster + 1 ci = df.loc[:, 'cluster_id'].values offsets = ak.num(interactions['x']) interactions['cluster_ids'] = reshape_awkward(ci, offsets) - + return interactions @@ -69,14 +151,14 @@ def simple_1d_clustering(data, scale): Returns: clusters_undo_sort (np.array): Cluster Labels """ - + idx_sort = np.argsort(data) idx_undo_sort = np.argsort(idx_sort) data_sorted = data[idx_sort] diff = data_sorted[1:] - data_sorted[:-1] - + clusters = [0] c = 0 for value in diff: @@ -87,7 +169,7 @@ def simple_1d_clustering(data, scale): clusters.append(c) clusters_undo_sort = np.array(clusters)[idx_undo_sort] - + return clusters_undo_sort @@ -103,7 +185,7 @@ def _find_cluster(x, cluster_size_space): functon: to be used in groupby.apply. """ db_cluster = DBSCAN(eps=cluster_size_space, min_samples=1) - xprime = x[['x', 'y', 'z']].values + xprime = x[['x', 'y', 'z']].values return db_cluster.fit_predict(xprime) @@ -184,6 +266,7 @@ def _cluster(x, y, z, ed, time, ci, z_mean = 0 t_mean = 0 ed_tot = 0 + event_time_min = min(time[ei]) current_ci = 0 # Current cluster id i_class = 0 # Index for classification (depends on users requirement) @@ -208,8 +291,9 @@ def _cluster(x, y, z, ed, time, ci, edproc[ei][i_class]) # Write result, simple but extensive with awkward... - _write_result(res, x_mean, y_mean, z_mean, - ed_tot, t_mean, A, Z, nestid) + if ed_tot > 1e-6: + _write_result(res, x_mean, y_mean, z_mean, + ed_tot, t_mean, event_time_min, A, Z, nestid) # Update cluster id and empty buffer current_ci = ci[ei][ii] @@ -227,7 +311,7 @@ def _cluster(x, y, z, ed, time, ci, # We have to gather information of current cluster: e = ed[ei][ii] - t = time[ei][ii] + t = time[ei][ii] - event_time_min x_mean += x[ei][ii] * e y_mean += y[ei][ii] * e z_mean += z[ei][ii] * e @@ -251,8 +335,9 @@ def _cluster(x, y, z, ed, time, ci, parenttype[ei][i_class], creaproc[ei][i_class], edproc[ei][i_class]) - _write_result(res, x_mean, y_mean, z_mean, - ed_tot, t_mean, A, Z, nestid) + if ed_tot>1e-6: + _write_result(res, x_mean, y_mean, z_mean, + ed_tot, t_mean, event_time_min, A, Z, nestid) res.end_list() @@ -266,14 +351,16 @@ def _cluster(x, y, z, ed, time, ci, (('Atomic number', 'Z'), np.int16), (('Nest Id for qunata generation', 'nestid'), np.int16)] classifier = np.zeros(7, dtype=classifier_dtype) -classifier['types'] = ['None', 'neutron', 'alpha', 'None','None', 'gamma', 'e-'] -classifier['parenttype'] = ['None', 'None', 'None', 'Kr83[9.405]','Kr83[41.557]', 'None', 'None'] -classifier['creaproc'] = ['None', 'None', 'None', 'None', 'None','None', 'None'] -classifier['edproc'] = ['ionIoni', 'hadElastic', 'None', 'None','None', 'None', 'None'] +classifier['types'] = ['None', 'neutron', 'alpha', 'None', 'None', 'gamma', 'e-'] +classifier['parenttype'] = ['None', 'None', 'None', 'Kr83[9.405]', 'Kr83[41.557]', 'None', 'None'] +classifier['creaproc'] = ['None', 'None', 'None', 'None', 'None', 'None', 'None'] +classifier['edproc'] = ['ionIoni', 'hadElastic', 'None', 'None', 'None', 'None', 'None'] classifier['A'] = [0, 0, 4, infinity, infinity, 0, 0] classifier['Z'] = [0, 0, 2, 0, 0, 0, 0] classifier['nestid'] = [0, 0, 6, 11, 11, 7, 8] +# quick fix for https://github.com/XENONnT/epix/issues/69 +classifier = classifier[::-1] @numba.njit def classify(types, parenttype, creaproc, edproc): @@ -294,7 +381,7 @@ def classify(types, parenttype, creaproc, edproc): @numba.njit def _write_result(res, x_mean, y_mean, z_mean, - ed_tot, t_mean, A, Z, nestid): + ed_tot, t_mean, event_time_min, A, Z, nestid): """ Helper to write result into record array. """ @@ -306,7 +393,7 @@ def _write_result(res, x_mean, y_mean, z_mean, res.field('z') res.real(z_mean / ed_tot) res.field('t') - res.real(t_mean / ed_tot) + res.real((t_mean / ed_tot) + event_time_min) res.field('ed') res.real(ed_tot) res.field('nestid') diff --git a/epix/common.py b/epix/common.py index b3d4251..eb6a8bf 100755 --- a/epix/common.py +++ b/epix/common.py @@ -3,6 +3,40 @@ import awkward as ak +def awkwardify_df(df): + if 'evtid' in df.keys(): + df_eventid_key = 'evtid' + else: + df_eventid_key = 'eventid' + + _, evt_offsets = np.unique(df[df_eventid_key], return_counts=True) + + dictionary = {"x": reshape_awkward(df["x"].values, evt_offsets), + "y": reshape_awkward(df["y"].values, evt_offsets), + "z": reshape_awkward(df["z"].values, evt_offsets), + "x_pri": reshape_awkward(df["x_pri"].values, evt_offsets), + "y_pri": reshape_awkward(df["y_pri"].values, evt_offsets), + "z_pri": reshape_awkward(df["z_pri"].values, evt_offsets), + + "t": reshape_awkward(df["t"].values, evt_offsets), + "ed": reshape_awkward(df["ed"].values, evt_offsets), + "PreStepEnergy": reshape_awkward(df["PreStepEnergy"].values, evt_offsets), + "PostStepEnergy": reshape_awkward(df["PostStepEnergy"].values, evt_offsets), + "type": reshape_awkward(np.array(df["type"], dtype=str), evt_offsets), + "trackid": reshape_awkward(np.array(df["trackid"].values, dtype=int), evt_offsets), + "parenttype": reshape_awkward(np.array(df["parenttype"], dtype=str), evt_offsets), + "parentid": reshape_awkward(np.array(df["parentid"].values, dtype=int), evt_offsets), + "creaproc": reshape_awkward(np.array(df["creaproc"], dtype=str), evt_offsets), + "edproc": reshape_awkward(np.array(df["edproc"], dtype=str), evt_offsets), + "evtid": reshape_awkward(np.array(df[df_eventid_key].values, dtype=int), evt_offsets), + } + + if 'r' in df.keys(): + dictionary["r"] = reshape_awkward(df["r"].values, evt_offsets) + + return ak.Array(dictionary) + + def reshape_awkward(array, offset): """ Function which reshapes an array of strings or numbers according @@ -17,7 +51,7 @@ def reshape_awkward(array, offset): res: awkward1.ArrayBuilder object. """ res = ak.ArrayBuilder() - if (array.dtype == np.int) or (array.dtype == np.float64) or (array.dtype == np.float32): + if (array.dtype == int) or (array.dtype == np.float64) or (array.dtype == np.float32): _reshape_awkward_number(array, offset, res) else: _reshape_awkward_string(array, offset, res) @@ -48,6 +82,7 @@ def _reshape_awkward_number(array, offsets, res): res.end_list() start = end + def _reshape_awkward_string(array, offsets, res): """ Function which reshapes an array of strings according @@ -71,6 +106,7 @@ def _reshape_awkward_string(array, offsets, res): res.end_list() start = end + def awkward_to_flat_numpy(array): if len(array) == 0: return ak.to_numpy(array) @@ -82,10 +118,11 @@ def mulit_range(offsets): res = np.zeros(np.sum(offsets), dtype=np.int32) i = 0 for o in offsets: - res[i:i+o] = np.arange(0, o, dtype=np.int32) + res[i:i + o] = np.arange(0, o, dtype=np.int32) i += o return res + @numba.njit def offset_range(offsets): """ @@ -101,7 +138,7 @@ def offset_range(offsets): res = np.zeros(np.sum(offsets), dtype=np.int32) i = 0 for ind, o in enumerate(offsets): - res[i:i+o] = ind + res[i:i + o] = ind i += o return res @@ -142,3 +179,35 @@ def apply_time_offset(result, dt): if len(result) == 0: return result return result['t'][:, :] + dt + + +def calc_tot_e_dep(df): + """ + Calculate total energy deposit in the sensitive volume + :param df: dataframe + :return: total energy deposit in each g4 event + """ + # Merge all energy deposits in single G4 events + g4id = df['g4id'] + u, s = np.unique(g4id, return_index=True) + # Split with event_number + edeps= np.split(df['e_dep'], s[1:]) + # Calculate tot_edep + tot_edep = np.asarray(list(map(lambda x:np.sum(x), edeps))) # Sum up the e_deps + num_edep = np.asarray(list(map(lambda x:len(x), edeps))) # Get number of e_deps + return np.repeat(tot_edep,num_edep) + + +def apply_energy_selection(instr,e_range): + """ + Apply energy selection with total energy deposit in the sensitive volume + :param instr: dataframe + :param e_range: (minimum E, maximum E) + :return: dataframe after the energy selection + """ + minE, maxE = e_range[0], e_range[1] + # Merge all energy deposits in a single G4 event + instr['tot_e'] = calc_tot_e_dep(instr)/2. #devide by 2 (S1, S2) + instr_aft_selection = instr[(instr['tot_e']>=minE) & (instr['tot_e']<=maxE)] + return instr_aft_selection + diff --git a/epix/data_files/cs1_beta.pkl b/epix/data_files/cs1_beta.pkl new file mode 100644 index 0000000..f9e7b0c Binary files /dev/null and b/epix/data_files/cs1_beta.pkl differ diff --git a/epix/data_files/cs2_beta.pkl b/epix/data_files/cs2_beta.pkl new file mode 100644 index 0000000..95272ba Binary files /dev/null and b/epix/data_files/cs2_beta.pkl differ diff --git a/epix/detector_volumes.py b/epix/detector_volumes.py index af2c5dc..772f57f 100755 --- a/epix/detector_volumes.py +++ b/epix/detector_volumes.py @@ -183,20 +183,21 @@ def in_sensitive_volume(events, sensitive_volumes): vol.volume_id, vol.xe_density, vol.create_S2, - res) + res) if ind: + new_results = res.snapshot() # Convert ArrayBuilder into true ak.Array # Now we add the other results, but first test if # volumes overlap. Only possible if we explicitly loop # over everything. This reduces performance but adds layer of # safety. - m = (result['vol_id'] > 0) & (res['vol_id'] == vol.volume_id) + m = (result['vol_id'] > 0) & (new_results['vol_id'] == vol.volume_id) if ak.any(m): overlapping_id = result[m][0] # Get volume name: name = [vol.name for vol in sensitive_volumes if vol.volume_id == overlapping_id][0] raise ValueError(f'The volume {vol.name} is overlapping with' f' volume {name}!') - new_results = res.snapshot() + for field in result.fields: # Workaround since we cannot sum up records-arrays anymore result[field] = result[field] + new_results[field] diff --git a/epix/io.py b/epix/io.py index 74a06cb..eaaca89 100755 --- a/epix/io.py +++ b/epix/io.py @@ -8,7 +8,7 @@ import wfsim import configparser -from .common import awkward_to_flat_numpy, offset_range, reshape_awkward +from .common import awkward_to_flat_numpy, offset_range, reshape_awkward, awkwardify_df SUPPORTED_OPTION = {'to_be_stored': 'getboolean', 'electric_field': ('getfloat', 'get'), @@ -29,7 +29,7 @@ def load_config(config_file_path): config.read(config_file_path) sections = config.sections() if not len(sections): - raise ValueError(f'Cannot load sections from config file "{config_file_path}".' + raise ValueError(f'Cannot load sections from config file "{config_file_path}".' 'Have you specified a wrong file?') settings = {} for s in sections: @@ -84,14 +84,14 @@ class file_loader(): """ def __init__(self, - directory, - file_name, - arg_debug=False, - outer_cylinder=None, - kwargs={}, - cut_by_eventid=False, - cut_nr_only=False, - ): + directory, + file_name, + arg_debug=False, + outer_cylinder=None, + kwargs={}, + cut_by_eventid=False, + cut_nr_only=False, + ): self.directory = directory self.file_name = file_name @@ -107,12 +107,12 @@ def __init__(self, "t", "ed", "type", "trackid", "parenttype", "parentid", - "creaproc", "edproc"] + "creaproc", "edproc", "PreStepEnergy", "PostStepEnergy"] - #Prepare cut for root and csv case + # Prepare cut for root and csv case if self.outer_cylinder: self.cut_string = (f'(r < {self.outer_cylinder["max_r"]})' - f' & ((zp >= {self.outer_cylinder["min_z"] * 10}) & (zp < {self.outer_cylinder["max_z"] * 10}))') + f' & ((zp >= {self.outer_cylinder["min_z"] * 10}) & (zp < {self.outer_cylinder["max_z"] * 10}))') else: self.cut_string = None @@ -146,10 +146,11 @@ def load_file(self): interactions = interactions[m] if self.cut_nr_only: - m = ((interactions['type'] == "neutron")&(interactions['edproc'] == "hadElastic")) | (interactions['edproc'] == "ionIoni") + m = ((interactions['type'] == "neutron") & (interactions['edproc'] == "hadElastic")) | ( + interactions['edproc'] == "ionIoni") e_dep_er = ak.sum(interactions[~m]['ed'], axis=1) e_dep_nr = ak.sum(interactions[m]['ed'], axis=1) - interactions = interactions[(e_dep_er<10) & (e_dep_nr>0)] + interactions = interactions[(e_dep_er < 10) & (e_dep_nr > 0)] # Removing all events with no interactions: m = ak.num(interactions['ed']) > 0 @@ -173,9 +174,9 @@ def _load_root_file(self): if self.arg_debug: print(f'Total entries in input file = {ttree.num_entries}') - cutby_string='output file entry' + cutby_string = 'output file entry' if self.cut_by_eventid: - cutby_string='g4 eventid' + cutby_string = 'g4 eventid' if self.kwargs['entry_start'] is not None: print(f'Starting to read from {cutby_string} {self.kwargs["entry_start"]}') @@ -184,22 +185,17 @@ def _load_root_file(self): # If user specified entry start/stop we have to update number of # events for source rate computation: - if self.kwargs['entry_start'] is not None: + if self.kwargs['entry_start'] is not None and self.cut_by_eventid: start = self.kwargs['entry_start'] else: start = 0 - - if self.kwargs['entry_stop'] is not None: + if self.kwargs['entry_stop'] is not None and self.cut_by_eventid: stop = self.kwargs['entry_stop'] else: stop = n_simulated_events - n_simulated_events = stop - start - if self.cut_by_eventid: - # Start/stop refers to eventid so drop start drop from kwargs - # dict if specified, otherwise we cut again on rows. - self.kwargs.pop('entry_start', None) - self.kwargs.pop('entry_stop', None) + self.kwargs.pop('entry_start', None) + self.kwargs.pop('entry_stop', None) # Conversions and parameters to be computed: alias = {'x': 'xp/10', # converting "geant4" mm to "straxen" cm @@ -207,7 +203,7 @@ def _load_root_file(self): 'z': 'zp/10', 'r': 'sqrt(x**2 + y**2)', 't': 'time*10**9' - } + } # Read in data, convert mm to cm and perform a first cut if specified: interactions = ttree.arrays(self.column_names, @@ -219,16 +215,23 @@ def _load_root_file(self): interactions['evtid'] = eventids xyz_pri = ttree.arrays(['x_pri', 'y_pri', 'z_pri'], - aliases={'x_pri': 'xp_pri/10', - 'y_pri': 'yp_pri/10', - 'z_pri': 'zp_pri/10' - }, - **self.kwargs) + aliases={'x_pri': 'xp_pri/10', + 'y_pri': 'yp_pri/10', + 'z_pri': 'zp_pri/10' + }, + **self.kwargs) interactions['x_pri'] = ak.broadcast_arrays(xyz_pri['x_pri'], interactions['x'])[0] interactions['y_pri'] = ak.broadcast_arrays(xyz_pri['y_pri'], interactions['x'])[0] interactions['z_pri'] = ak.broadcast_arrays(xyz_pri['z_pri'], interactions['x'])[0] + if self.cut_by_eventid: + first_evtid = ak.min(interactions['evtid']) + start += first_evtid + stop += first_evtid + + n_simulated_events = stop - start + return interactions, n_simulated_events, start, stop def _load_csv_file(self): @@ -244,20 +247,20 @@ def _load_csv_file(self): """ print("Load instructions from a csv file!") - - instr_df = pd.read_csv(self.file) - - #unit conversion similar to root case - instr_df["x"] = instr_df["xp"]/10 - instr_df["y"] = instr_df["yp"]/10 - instr_df["z"] = instr_df["zp"]/10 - instr_df["x_pri"] = instr_df["xp_pri"]/10 - instr_df["y_pri"] = instr_df["yp_pri"]/10 - instr_df["z_pri"] = instr_df["zp_pri"]/10 - instr_df["r"] = np.sqrt(instr_df["x"]**2 + instr_df["y"]**2) - instr_df["t"] = instr_df["time"]*10**9 - - #Check if all needed columns are in place: + + instr_df = pd.read_csv(self.file) + + # unit conversion similar to root case + instr_df["x"] = instr_df["xp"] / 10 + instr_df["y"] = instr_df["yp"] / 10 + instr_df["z"] = instr_df["zp"] / 10 + instr_df["x_pri"] = instr_df["xp_pri"] / 10 + instr_df["y_pri"] = instr_df["yp_pri"] / 10 + instr_df["z_pri"] = instr_df["zp_pri"] / 10 + instr_df["r"] = np.sqrt(instr_df["x"] ** 2 + instr_df["y"] ** 2) + instr_df["t"] = instr_df["time"] * 10 ** 9 + + # Check if all needed columns are in place: if not set(self.column_names).issubset(instr_df.columns): warnings.warn("Not all needed columns provided!") @@ -266,13 +269,13 @@ def _load_csv_file(self): if self.outer_cylinder: instr_df = instr_df.query(self.cut_string) - interactions = self._awkwardify_df(instr_df) + interactions = awkwardify_df(instr_df) - #Use always all events in the csv file + # Use always all events in the csv file start = 0 stop = n_simulated_events - return interactions, n_simulated_events, start, stop + return interactions, n_simulated_events, start, stop def _get_ttree(self): """ @@ -297,43 +300,10 @@ def _get_ttree(self): if v == 'TTree': ttrees.append(k) raise ValueError(f'Cannot find ttree object of "{file_name}".' - 'I tried to search in events and events/events.' - f'Found a ttree in {ttrees}?') + 'I tried to search in events and events/events.' + f'Found a ttree in {ttrees}?') return ttree, n_simulated_events - def _awkwardify_df(self, df): - """ - Function which builds an jagged awkward array from pandas dataframe. - - Args: - df: Pandas Dataframe - - Returns: - ak.Array(dictionary): awkward array - - """ - - _, evt_offsets = np.unique(df["eventid"], return_counts = True) - - dictionary = {"x": reshape_awkward(df["x"].values , evt_offsets), - "y": reshape_awkward(df["y"].values , evt_offsets), - "z": reshape_awkward(df["z"].values , evt_offsets), - "x_pri": reshape_awkward(df["x_pri"].values, evt_offsets), - "y_pri": reshape_awkward(df["y_pri"].values, evt_offsets), - "z_pri": reshape_awkward(df["z_pri"].values, evt_offsets), - "r": reshape_awkward(df["r"].values , evt_offsets), - "t": reshape_awkward(df["t"].values , evt_offsets), - "ed": reshape_awkward(df["ed"].values , evt_offsets), - "type":reshape_awkward(np.array(df["type"], dtype=str) , evt_offsets), - "trackid": reshape_awkward(df["trackid"].values , evt_offsets), - "parenttype": reshape_awkward(np.array(df["parenttype"], dtype=str) , evt_offsets), - "parentid": reshape_awkward(df["parentid"].values , evt_offsets), - "creaproc": reshape_awkward(np.array(df["creaproc"], dtype=str) , evt_offsets), - "edproc": reshape_awkward(np.array(df["edproc"], dtype=str) , evt_offsets), - "evtid": reshape_awkward(df["eventid"].values , evt_offsets), - } - - return ak.Array(dictionary) # ---------------------- # Outputing wfsim instructions: @@ -384,3 +354,6 @@ def awkward_to_wfsim_row_style(interactions): # Remove entries with no quanta res = res[res['amp'] > 0] return res + +def empty_wfsim_instructions(): + return np.empty(0, dtype=wfsim.instruction_dtype) diff --git a/epix/quantgen_wrapper.py b/epix/quantgen_wrapper.py index 257bb1b..465bef9 100644 --- a/epix/quantgen_wrapper.py +++ b/epix/quantgen_wrapper.py @@ -1,5 +1,8 @@ import numpy as np import nestpy +import pickle + + class BBF_quanta_generator: def __init__(self): self.er_par_dict = { @@ -12,42 +15,43 @@ def __init__(self): 'py4': 0.5864910020458629, 'rf0': 0.029414125811261564, 'rf1': 0.2571929264699089, - 'fano' : 0.059 + 'fano': 0.059 } self.nr_par_dict = { - "W": 0.01374615297291325, - "alpha": 0.9376149722771664, + "W": 0.01374615297291325, + "alpha": 0.9376149722771664, "zeta": 0.0472, - "beta": 311.86846286764376, - "gamma": 0.015772527423653895, + "beta": 311.86846286764376, + "gamma": 0.015772527423653895, "delta": 0.0620, - "kappa": 0.13762801393921467, - "eta": 6.387273512457444, + "kappa": 0.13762801393921467, + "eta": 6.387273512457444, "lambda": 1.4102590741165675, - "fano" : 0.059 + "fano": 0.059 } - self.ERs = [7,8,11] - self.NRs = [0,1] + self.ERs = [7, 8, 11] + self.NRs = [0, 1] self.unknown = [12] self.get_quanta_vectorized = np.vectorize(self.get_quanta, excluded="self") - + def update_ER_params(self, new_params): self.er_par_dict.update(new_params) + def update_NR_params(self, new_params): self.nr_par_dict.update(new_params) def get_quanta(self, interaction, energy, field): - if int(interaction) in self.ERs: + if int(interaction) in self.ERs: return self.get_ER_quanta(energy, field, self.er_par_dict) elif int(interaction) in self.NRs: return self.get_NR_quanta(energy, field, self.nr_par_dict) elif int(interaction) in self.unknown: - return 0,0,0 + return 0, 0, 0 else: raise RuntimeError("Unknown nest ID: {:d}, {:s}".format( - int(interaction), - str(nestpy.INTERACTION_TYPE(int(interaction))))) - + int(interaction), + str(nestpy.INTERACTION_TYPE(int(interaction))))) + #### def ER_recomb(self, energy, field, par_dict): W = par_dict['W'] @@ -57,14 +61,15 @@ def ER_recomb(self, energy, field, par_dict): Ni = Nq / (1. + ExIonRatio) Nex = Nq - Ni - TI = par_dict['py0'] * np.exp(-energy/par_dict['py1']) * field**par_dict['py2'] - Recomb = 1. - np.log(1. + TI*Ni/4.) / (TI*Ni/4.) - FD = 1. / (1. + np.exp(-(energy-par_dict['py3'])/par_dict['py4'])) + TI = par_dict['py0'] * np.exp(-energy / par_dict['py1']) * field ** par_dict['py2'] + Recomb = 1. - np.log(1. + TI * Ni / 4.) / (TI * Ni / 4.) + FD = 1. / (1. + np.exp(-(energy - par_dict['py3']) / par_dict['py4'])) return Recomb * FD + def ER_drecomb(self, energy, par_dict): - return par_dict['rf0'] * (1. - np.exp(-energy/par_dict['py1'])) - + return par_dict['rf0'] * (1. - np.exp(-energy / par_dict['py1'])) + def NR_quenching(self, energy, par_dict): alpha = par_dict['alpha'] beta = par_dict['beta'] @@ -75,10 +80,11 @@ def NR_quenching(self, energy, par_dict): lam = par_dict['lambda'] zeta = par_dict['zeta'] - e = 11.5 * energy * 54.**(-7./3.) - g = 3. * e**0.15 + 0.7 * e**0.6 + e + e = 11.5 * energy * 54. ** (-7. / 3.) + g = 3. * e ** 0.15 + 0.7 * e ** 0.6 + e + + return kappa * g / (1. + kappa * g) - return kappa*g / (1. + kappa*g) def NR_ExIonRatio(self, energy, field, par_dict): alpha = par_dict['alpha'] beta = par_dict['beta'] @@ -89,9 +95,10 @@ def NR_ExIonRatio(self, energy, field, par_dict): lam = par_dict['lambda'] zeta = par_dict['zeta'] - e = 11.5 * energy * 54.**(-7./3.) + e = 11.5 * energy * 54. ** (-7. / 3.) + + return alpha * field ** (-zeta) * (1. - np.exp(-beta * e)) - return alpha * field**(-zeta) * (1. - np.exp(-beta*e)) def NR_Penning_quenching(self, energy, par_dict): alpha = par_dict['alpha'] beta = par_dict['beta'] @@ -102,10 +109,11 @@ def NR_Penning_quenching(self, energy, par_dict): lam = par_dict['lambda'] zeta = par_dict['zeta'] - e = 11.5 * energy * 54.**(-7./3.) - g = 3. * e**0.15 + 0.7 * e**0.6 + e + e = 11.5 * energy * 54. ** (-7. / 3.) + g = 3. * e ** 0.15 + 0.7 * e ** 0.6 + e + + return 1. / (1. + eta * e ** lam) - return 1. / (1. + eta * e**lam) def NR_recomb(self, energy, field, par_dict): alpha = par_dict['alpha'] beta = par_dict['beta'] @@ -116,34 +124,37 @@ def NR_recomb(self, energy, field, par_dict): lam = par_dict['lambda'] zeta = par_dict['zeta'] - e = 11.5 * energy * 54.**(-7./3.) - g = 3. * e**0.15 + 0.7 * e**0.6 + e + e = 11.5 * energy * 54. ** (-7. / 3.) + g = 3. * e ** 0.15 + 0.7 * e ** 0.6 + e HeatQuenching = self.NR_quenching(energy, par_dict) PenningQuenching = self.NR_Penning_quenching(energy, par_dict) ExIonRatio = self.NR_ExIonRatio(energy, field, par_dict) - xi = gamma * field**(-delta) + xi = gamma * field ** (-delta) Nq = energy * HeatQuenching / par_dict['W'] - Ni = Nq / (1.+ ExIonRatio) + Ni = Nq / (1. + ExIonRatio) + + return 1. - np.log(1. + Ni * xi) / (Ni * xi) - return 1. - np.log(1. + Ni*xi) / (Ni*xi) ### def get_ER_quanta(self, energy, field, par_dict): Nq_mean = energy / par_dict['W'] - Nq = np.clip(np.round(np.random.normal(Nq_mean, np.sqrt(Nq_mean * par_dict['fano']))), 0, np.inf).astype(np.int64) + Nq = np.clip(np.round(np.random.normal(Nq_mean, np.sqrt(Nq_mean * par_dict['fano']))), 0, np.inf).astype( + np.int64) - Ni = np.random.binomial(Nq, 1./(1.+par_dict['Nex/Ni'])) + Ni = np.random.binomial(Nq, 1. / (1. + par_dict['Nex/Ni'])) - recomb = self.ER_recomb(energy,field, par_dict) + recomb = self.ER_recomb(energy, field, par_dict) drecomb = self.ER_drecomb(energy, par_dict) true_recomb = np.clip(np.random.normal(recomb, drecomb), 0., 1.) - Ne = np.random.binomial(Ni, 1.-true_recomb) + Ne = np.random.binomial(Ni, 1. - true_recomb) Nph = Nq - Ne Nex = Nq - Ni return Nph, Ne, Nex + def get_NR_quanta(self, energy, field, par_dict): Nq_mean = energy / par_dict['W'] Nq = np.round(np.random.normal(Nq_mean, np.sqrt(Nq_mean * par_dict['fano']))).astype(np.int64) @@ -151,8 +162,8 @@ def get_NR_quanta(self, energy, field, par_dict): quenching = self.NR_quenching(energy, par_dict) Nq = np.random.binomial(Nq, quenching) - ExIonRatio = self.NR_ExIonRatio(energy, field,par_dict) - Ni = np.random.binomial(Nq, ExIonRatio/(1.+ExIonRatio)) + ExIonRatio = self.NR_ExIonRatio(energy, field, par_dict) + Ni = np.random.binomial(Nq, ExIonRatio / (1. + ExIonRatio)) penning_quenching = self.NR_Penning_quenching(energy, par_dict) Nex = np.random.binomial(Nq - Ni, penning_quenching) @@ -161,23 +172,49 @@ def get_NR_quanta(self, energy, field, par_dict): if recomb < 0 or recomb > 1: return None, None - Ne = np.random.binomial(Ni, 1.-recomb) + Ne = np.random.binomial(Ni, 1. - recomb) Nph = Ni + Nex - Ne return Nph, Ne, Nex - + + class NEST_quanta_generator: - + def __init__(self): self.nc = nestpy.NESTcalc(nestpy.DetectorExample_XENON10()) ## not sure if nestpy RNG issue was solved, so randomize NEST internal state for i in range(np.random.randint(100)): - self.nc.GetQuanta(self.nc.GetYields(energy=np.random.uniform(10,100))) - + self.nc.GetQuanta(self.nc.GetYields(energy=np.random.uniform(10, 100))) + def get_quanta(self, interaction, energy, field): - y=self.nc.GetYields( - interaction = nestpy.INTERACTION_TYPE(interaction), - energy=energy, - drift_field =field, - ) + y = self.nc.GetYields( + interaction=nestpy.INTERACTION_TYPE(interaction), + energy=energy, + drift_field=field, + ) q_ = self.nc.GetQuanta(y) - return q_.photons,q_.electrons, q_.excitons + return q_.photons, q_.electrons, q_.excitons + + +class BETA_quanta_generator: + + def __init__(self): + # ToDo: Should user be able to set custom beta-generator input files? + cs1_spline_path = 'epix/data_files/cs1_beta.pkl' + cs2_spline_path = 'epix/data_files/cs2_beta.pkl' + with open(cs1_spline_path, 'rb') as f: + self.cs1_spline = pickle.load(f) + with open(cs2_spline_path, 'rb') as f: + self.cs2_spline = pickle.load(f) + + # ToDo: Should the numbers be in the config file? + self.XENONnT_g1 = 0.151 ## v5 + self.XENONnT_g2 = 16.450 ## v5 + + self.get_quanta_vectorized = np.vectorize(self.get_quanta, excluded="self") + + def get_quanta(self, interaction, energy, field): + beta_photons = self.cs1_spline(energy) / self.XENONnT_g1 + beta_electrons = self.cs2_spline(energy) / self.XENONnT_g2 + beta_excitons = 0.0 + + return beta_photons, beta_electrons, beta_excitons diff --git a/epix/run_epix.py b/epix/run_epix.py index 479579e..773be72 100644 --- a/epix/run_epix.py +++ b/epix/run_epix.py @@ -6,7 +6,7 @@ import warnings import epix -from .common import ak_num, calc_dt, apply_time_offset +from .common import ak_num, calc_dt, apply_time_offset, apply_energy_selection def main(args, return_df=False, return_wfsim_instructions=False, strax=False): @@ -34,10 +34,20 @@ def main(args, return_df=False, return_wfsim_instructions=False, strax=False): if args['debug']: tnow = monitor_time(tnow, 'load data.') print(f"Finding clusters of interactions with a dr = {args['micro_separation']} mm" - f" and dt = {args['micro_separation_time']} ns") - - # Cluster finding and clustering (convert micro_separation mm -> cm): - inter = epix.find_cluster(inter, args['micro_separation']/10, args['micro_separation_time']) + f" and dt = {args['micro_separation_time']} ns") + + if 'cluster_method' in args and args['cluster_method'] == 'betadecay': + print("Running BETA-DECAY clustering...") + inter = epix.find_betadecay_cluster(inter) + elif 'cluster_method' in args and args['cluster_method'] == 'brem': + print("Running BREM clustering...") + inter = epix.find_brem_cluster(inter) + else: + if 'cluster_method' in args and args['cluster_method'] == 'dbscan': + print(f"Running default DBSCAN microclustering...") + else: + print(f"Clustering method is not recognized, using the default DBSCAN microclustering...") + inter = epix.find_cluster(inter, args['micro_separation'] / 10, args['micro_separation_time']) if args['debug']: tnow = monitor_time(tnow, 'find clusters.') @@ -47,13 +57,14 @@ def main(args, return_df=False, return_wfsim_instructions=False, strax=False): if args['debug']: tnow = monitor_time(tnow, 'merge clusters.') - # Add eventid again: - result['evtid'] = ak.broadcast_arrays(inter['evtid'][:, 0], result['ed'])[0] + if len(result)>0: + # Add eventid again: + result['evtid'] = ak.broadcast_arrays(inter['evtid'][:, 0], result['ed'])[0] - # Add x_pri, y_pri, z_pri again: - result['x_pri'] = ak.broadcast_arrays(inter['x_pri'][:, 0], result['ed'])[0] - result['y_pri'] = ak.broadcast_arrays(inter['y_pri'][:, 0], result['ed'])[0] - result['z_pri'] = ak.broadcast_arrays(inter['z_pri'][:, 0], result['ed'])[0] + # Add x_pri, y_pri, z_pri again: + result['x_pri'] = ak.broadcast_arrays(inter['x_pri'][:, 0], result['ed'])[0] + result['y_pri'] = ak.broadcast_arrays(inter['y_pri'][:, 0], result['ed'])[0] + result['z_pri'] = ak.broadcast_arrays(inter['z_pri'][:, 0], result['ed'])[0] # Sort detector volumes and keep interactions in selected ones: if args['debug']: @@ -107,26 +118,36 @@ def main(args, return_df=False, return_wfsim_instructions=False, strax=False): print('Generating photons and electrons for events') # Generate quanta: if len(result) > 0: - if not ('yield' in args.keys()): + if not ('yield' in args.keys()): print("No yield is provided! Forcing nest") - args['yield']="nest" - if args['yield'].lower()=="nest": + args['yield'] = "nest" + if args['yield'].lower() == "nest": + print("Using NEST quanta generator...") photons, electrons, excitons = epix.quanta_from_NEST(epix.awkward_to_flat_numpy(result['ed']), epix.awkward_to_flat_numpy(result['nestid']), epix.awkward_to_flat_numpy(result['e_field']), epix.awkward_to_flat_numpy(result['A']), epix.awkward_to_flat_numpy(result['Z']), epix.awkward_to_flat_numpy(result['create_S2']), - density=epix.awkward_to_flat_numpy(result['xe_density'])) - elif args['yield'].lower()=="bbf": - bbfyields=epix.BBF_quanta_generator() + density=epix.awkward_to_flat_numpy( + result['xe_density'])) + elif args['yield'].lower() == "bbf": + print("Using BBF quanta generator...") + bbfyields = epix.BBF_quanta_generator() photons, electrons, excitons = bbfyields.get_quanta_vectorized( - energy=epix.awkward_to_flat_numpy(result['ed']), - interaction=epix.awkward_to_flat_numpy(result['nestid']), - field=epix.awkward_to_flat_numpy(result['e_field']) - ) + energy=epix.awkward_to_flat_numpy(result['ed']), + interaction=epix.awkward_to_flat_numpy(result['nestid']), + field=epix.awkward_to_flat_numpy(result['e_field']) + ) + elif args['yield'].lower() == "beta": + print("Using BETA quanta generator...") + betayields = epix.BETA_quanta_generator() + photons, electrons, excitons = betayields.get_quanta_vectorized( + energy=epix.awkward_to_flat_numpy(result['ed']), + interaction=epix.awkward_to_flat_numpy(result['nestid']), + field=epix.awkward_to_flat_numpy(result['e_field'])) else: - raise RuntimeError("Unknown yield model: ", args['yields']) + raise RuntimeError("Unknown yield model: ", args['yield']) result['photons'] = epix.reshape_awkward(photons, ak_num(result['ed'])) result['electrons'] = epix.reshape_awkward(electrons, ak_num(result['ed'])) result['excitons'] = epix.reshape_awkward(excitons, ak_num(result['ed'])) @@ -153,7 +174,7 @@ def main(args, return_df=False, return_wfsim_instructions=False, strax=False): else: # Rate offset computed based on the specified rate and job_id. # Assumes all jobs were started with the same number of events. - offset = (args['job_number']*n_simulated_events)/args['source_rate'] + offset = (args['job_number'] * n_simulated_events) / args['source_rate'] dt = epix.times_from_fixed_rate(args['source_rate'], number_of_events, n_simulated_events, @@ -168,6 +189,15 @@ def main(args, return_df=False, return_wfsim_instructions=False, strax=False): if args['debug'] & (len(result) == 0): warnings.warn('No interactions left, return empty DataFrame.') instructions = epix.awkward_to_wfsim_row_style(result) + # Apply energy range selection for instructions if required + selected_min_energy = args.get('min_energy', -1) + selected_max_energy = args.get('max_energy', float('inf')) + if (selected_min_energy > 0.0) or (selected_max_energy < float('inf')): + instructions = apply_energy_selection(instructions, [selected_min_energy, selected_max_energy]) + if len(instructions) == 0: + instructions = epix.empty_wfsim_instructions() + if args['debug']: + warnings.warn('No interactions left after the energy selection, return empty DataFrame.') if args['source_rate'] != 0: # Only sort by time again if source rates were applied, otherwise # things are already sorted within the events and should stay this way. diff --git a/setup.py b/setup.py index b2c3ec1..9e3d4be 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setuptools.setup( name='epix', - version='0.3.3', + version='0.3.8', description='Electron and Photon Instructions generator for XENON', author='epix contributors, the XENON collaboration', url='https://github.com/XENONnT/epix',