diff --git a/CHANGES.md b/CHANGES.md index 386758278..2bf621883 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,6 +12,8 @@ - `_prep_data_for_correlation`: 3x speedup for filling NaN-traces in templates - New function ``quick_trace_select` for a very efficient selection of trace by seed ID without wildcards (4x speedup). +* utils.correlate + - 25 % speedup for `_get_array_dicts` with quicker access to properties. * utils.catalog_to_dd - ._prepare_stream - Now more consistently slices templates to length = extract_len * samp_rate diff --git a/eqcorrscan/utils/correlate.py b/eqcorrscan/utils/correlate.py index 08553b82d..0d4860e0f 100644 --- a/eqcorrscan/utils/correlate.py +++ b/eqcorrscan/utils/correlate.py @@ -30,6 +30,8 @@ import math from packaging import version +from obspy import UTCDateTime + from eqcorrscan.utils.libnames import _load_cdll from eqcorrscan.utils import FMF_INSTALLED from eqcorrscan.utils.pre_processing import _stream_quick_select @@ -1119,13 +1121,15 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True): stream.sort(['network', 'station', 'location', 'channel']) for template in templates: template.sort(['network', 'station', 'location', 'channel']) - t_starts.append(min([tr.stats.starttime for tr in template])) + t_starts.append( + UTCDateTime(ns=min([tr.stats.starttime.__dict__['_UTCDateTime__ns'] + for tr in template]))) stream_start = min([tr.stats.starttime for tr in stream]) # get seed ids, make sure these are collected on sorted streams seed_ids = [tr.id + '_' + str(i) for i, tr in enumerate(templates[0])] # pull common channels out of streams and templates and put in dicts for i, seed_id in enumerate(seed_ids): - temps_with_seed = [template[i].data for template in templates] + temps_with_seed = [template.traces[i].data for template in templates] t_ar = np.array(temps_with_seed).astype(np.float32) template_dict.update({seed_id: t_ar}) stream_channel = _stream_quick_select(stream, seed_id.split('_')[0])[0] @@ -1139,15 +1143,18 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True): # pad_list can become 0. 0-1 = -1; which is problematic. stream_offset = int( math.floor(stream_channel.stats.sampling_rate * - (stream_channel.stats.starttime - stream_start))) + (stream_channel.stats.starttime - stream_start))) if stack: pad_list = [ - int(round(template[i].stats.sampling_rate * - (template[i].stats.starttime - - t_starts[j]))) - stream_offset - for j, template in zip(range(len(templates)), templates)] + int(round( + template.traces[i].stats.__dict__['sampling_rate'] * + (template.traces[i].stats.starttime.__dict__[ + '_UTCDateTime__ns'] - + t_starts[j].__dict__['_UTCDateTime__ns']) / 1e9)) - + stream_offset + for j, template in enumerate(templates)] else: - pad_list = [0 for _ in range(len(templates))] + pad_list = [0 for _ in templates] pad_dict.update({seed_id: pad_list}) return stream_dict, template_dict, pad_dict, seed_ids diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py index 0e877f201..7c37bc3c2 100644 --- a/eqcorrscan/utils/pre_processing.py +++ b/eqcorrscan/utils/pre_processing.py @@ -999,18 +999,20 @@ def _prep_data_for_correlation(stream, templates, template_names=None, template.traces[idx] for idx in stream_trace_id_dict[seed_id]]) if len(template_channel) <= channel_index: # out_template[channel_number].data = nan_channel # quicker: - out_template[channel_number].__dict__['data'] = copy.deepcopy( - nan_channel) - out_template[channel_number].stats.__dict__['npts'] = \ - template_length - out_template[channel_number].stats.__dict__['starttime'] = \ - template_starttime - out_template[channel_number].stats.__dict__['endtime'] = \ - UTCDateTime(ns=int( + out_template.traces[channel_number].__dict__[ + 'data'] = np.copy(nan_channel) + out_template.traces[channel_number].stats.__dict__[ + 'npts'] = template_length + out_template.traces[channel_number].stats.__dict__[ + 'starttime'] = template_starttime + out_template.traces[channel_number].stats.__dict__[ + 'endtime'] = UTCDateTime(ns=int( round(template_starttime.ns - + (template_length * samp_rate) * 1e9))) + + (template_length / samp_rate) * 1e9))) else: - out_template[channel_number] = template_channel[channel_index] + out_template.traces[channel_number] = template_channel.traces[ + channel_index] + # If a template-trace matches a NaN-trace in the stream , then set # template-trace to NaN so that this trace does not appear in channel- # list of detections.