Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speedup 10: 25 % quicker _get_array_dicts and 10 % quicker _prep_data_for_correlation #536

Merged
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
- `_prep_data_for_correlation`: 3x speedup for filling NaN-traces in templates
- New function ``quick_trace_select` for a very efficient selection of trace
by seed ID without wildcards (4x speedup).
* utils.correlate
- 25 % speedup for `_get_array_dicts` with quicker access to properties.
* utils.catalog_to_dd
- ._prepare_stream
- Now more consistently slices templates to length = extract_len * samp_rate
Expand Down
23 changes: 15 additions & 8 deletions eqcorrscan/utils/correlate.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import math
from packaging import version

from obspy import UTCDateTime

from eqcorrscan.utils.libnames import _load_cdll
from eqcorrscan.utils import FMF_INSTALLED
from eqcorrscan.utils.pre_processing import _stream_quick_select
Expand Down Expand Up @@ -1119,13 +1121,15 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True):
stream.sort(['network', 'station', 'location', 'channel'])
for template in templates:
template.sort(['network', 'station', 'location', 'channel'])
t_starts.append(min([tr.stats.starttime for tr in template]))
t_starts.append(
UTCDateTime(ns=min([tr.stats.starttime.__dict__['_UTCDateTime__ns']
for tr in template])))
stream_start = min([tr.stats.starttime for tr in stream])
# get seed ids, make sure these are collected on sorted streams
seed_ids = [tr.id + '_' + str(i) for i, tr in enumerate(templates[0])]
# pull common channels out of streams and templates and put in dicts
for i, seed_id in enumerate(seed_ids):
temps_with_seed = [template[i].data for template in templates]
temps_with_seed = [template.traces[i].data for template in templates]
t_ar = np.array(temps_with_seed).astype(np.float32)
template_dict.update({seed_id: t_ar})
stream_channel = _stream_quick_select(stream, seed_id.split('_')[0])[0]
Expand All @@ -1139,15 +1143,18 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True):
# pad_list can become 0. 0-1 = -1; which is problematic.
stream_offset = int(
math.floor(stream_channel.stats.sampling_rate *
(stream_channel.stats.starttime - stream_start)))
(stream_channel.stats.starttime - stream_start)))
if stack:
pad_list = [
int(round(template[i].stats.sampling_rate *
(template[i].stats.starttime -
t_starts[j]))) - stream_offset
for j, template in zip(range(len(templates)), templates)]
int(round(
template.traces[i].stats.__dict__['sampling_rate'] *
(template.traces[i].stats.starttime.__dict__[
'_UTCDateTime__ns'] -
t_starts[j].__dict__['_UTCDateTime__ns']) / 1e9)) -
stream_offset
for j, template in enumerate(templates)]
else:
pad_list = [0 for _ in range(len(templates))]
pad_list = [0 for _ in templates]
pad_dict.update({seed_id: pad_list})

return stream_dict, template_dict, pad_dict, seed_ids
Expand Down
22 changes: 12 additions & 10 deletions eqcorrscan/utils/pre_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -999,18 +999,20 @@ def _prep_data_for_correlation(stream, templates, template_names=None,
template.traces[idx] for idx in stream_trace_id_dict[seed_id]])
if len(template_channel) <= channel_index:
# out_template[channel_number].data = nan_channel # quicker:
out_template[channel_number].__dict__['data'] = copy.deepcopy(
nan_channel)
out_template[channel_number].stats.__dict__['npts'] = \
template_length
out_template[channel_number].stats.__dict__['starttime'] = \
template_starttime
out_template[channel_number].stats.__dict__['endtime'] = \
UTCDateTime(ns=int(
out_template.traces[channel_number].__dict__[
'data'] = copy.deepcopy(nan_channel)
calum-chamberlain marked this conversation as resolved.
Show resolved Hide resolved
out_template.traces[channel_number].stats.__dict__[
'npts'] = template_length
out_template.traces[channel_number].stats.__dict__[
'starttime'] = template_starttime
out_template.traces[channel_number].stats.__dict__[
'endtime'] = UTCDateTime(ns=int(
round(template_starttime.ns
+ (template_length * samp_rate) * 1e9)))
+ (template_length / samp_rate) * 1e9)))
else:
out_template[channel_number] = template_channel[channel_index]
out_template.traces[channel_number] = template_channel.traces[
channel_index]

# If a template-trace matches a NaN-trace in the stream , then set
# template-trace to NaN so that this trace does not appear in channel-
# list of detections.
Expand Down