From ebb6a8125d554e28dcc32d1110a1da754ad7fdb3 Mon Sep 17 00:00:00 2001 From: flixha Date: Thu, 5 Jan 2023 18:49:34 +0100 Subject: [PATCH 1/6] speed up _get_array_dicts with direct access to UTCDateTime.__dict__['_UTCDateTime__ns'] --- eqcorrscan/utils/correlate.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/eqcorrscan/utils/correlate.py b/eqcorrscan/utils/correlate.py index 08553b82d..021b38d38 100644 --- a/eqcorrscan/utils/correlate.py +++ b/eqcorrscan/utils/correlate.py @@ -30,6 +30,8 @@ import math from packaging import version +from obspy import UTCDateTime + from eqcorrscan.utils.libnames import _load_cdll from eqcorrscan.utils import FMF_INSTALLED from eqcorrscan.utils.pre_processing import _stream_quick_select @@ -1119,7 +1121,9 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True): stream.sort(['network', 'station', 'location', 'channel']) for template in templates: template.sort(['network', 'station', 'location', 'channel']) - t_starts.append(min([tr.stats.starttime for tr in template])) + t_starts.append( + UTCDateTime(ns=min([tr.stats.starttime.__dict__['_UTCDateTime__ns'] + for tr in template]))) stream_start = min([tr.stats.starttime for tr in stream]) # get seed ids, make sure these are collected on sorted streams seed_ids = [tr.id + '_' + str(i) for i, tr in enumerate(templates[0])] @@ -1142,12 +1146,14 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True): (stream_channel.stats.starttime - stream_start))) if stack: pad_list = [ - int(round(template[i].stats.sampling_rate * - (template[i].stats.starttime - - t_starts[j]))) - stream_offset - for j, template in zip(range(len(templates)), templates)] + int(round( + template[i].stats.sampling_rate * + (template[i].stats.starttime.__dict__['_UTCDateTime__ns'] - + t_starts[j].__dict__['_UTCDateTime__ns']) / 1e9)) - + stream_offset + for j, template in enumerate(templates)] else: - pad_list = [0 for _ in range(len(templates))] + pad_list = [0 for _ in templates] pad_dict.update({seed_id: pad_list}) return stream_dict, template_dict, pad_dict, seed_ids From 13e696f163f10034e4e0f42f4a6d2d93f072fad7 Mon Sep 17 00:00:00 2001 From: flixha Date: Mon, 9 Jan 2023 16:17:48 +0100 Subject: [PATCH 2/6] speed up trace preparation with direct access to stream.traces with index --- eqcorrscan/utils/correlate.py | 9 +++++---- eqcorrscan/utils/pre_processing.py | 20 +++++++++++--------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/eqcorrscan/utils/correlate.py b/eqcorrscan/utils/correlate.py index 021b38d38..0d4860e0f 100644 --- a/eqcorrscan/utils/correlate.py +++ b/eqcorrscan/utils/correlate.py @@ -1129,7 +1129,7 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True): seed_ids = [tr.id + '_' + str(i) for i, tr in enumerate(templates[0])] # pull common channels out of streams and templates and put in dicts for i, seed_id in enumerate(seed_ids): - temps_with_seed = [template[i].data for template in templates] + temps_with_seed = [template.traces[i].data for template in templates] t_ar = np.array(temps_with_seed).astype(np.float32) template_dict.update({seed_id: t_ar}) stream_channel = _stream_quick_select(stream, seed_id.split('_')[0])[0] @@ -1143,12 +1143,13 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True): # pad_list can become 0. 0-1 = -1; which is problematic. stream_offset = int( math.floor(stream_channel.stats.sampling_rate * - (stream_channel.stats.starttime - stream_start))) + (stream_channel.stats.starttime - stream_start))) if stack: pad_list = [ int(round( - template[i].stats.sampling_rate * - (template[i].stats.starttime.__dict__['_UTCDateTime__ns'] - + template.traces[i].stats.__dict__['sampling_rate'] * + (template.traces[i].stats.starttime.__dict__[ + '_UTCDateTime__ns'] - t_starts[j].__dict__['_UTCDateTime__ns']) / 1e9)) - stream_offset for j, template in enumerate(templates)] diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py index 0e877f201..758410b70 100644 --- a/eqcorrscan/utils/pre_processing.py +++ b/eqcorrscan/utils/pre_processing.py @@ -999,18 +999,20 @@ def _prep_data_for_correlation(stream, templates, template_names=None, template.traces[idx] for idx in stream_trace_id_dict[seed_id]]) if len(template_channel) <= channel_index: # out_template[channel_number].data = nan_channel # quicker: - out_template[channel_number].__dict__['data'] = copy.deepcopy( - nan_channel) - out_template[channel_number].stats.__dict__['npts'] = \ - template_length - out_template[channel_number].stats.__dict__['starttime'] = \ - template_starttime - out_template[channel_number].stats.__dict__['endtime'] = \ - UTCDateTime(ns=int( + out_template.traces[channel_number].__dict__[ + 'data'] = copy.deepcopy(nan_channel) + out_template.traces[channel_number].stats.__dict__[ + 'npts'] = template_length + out_template.traces[channel_number].stats.__dict__[ + 'starttime'] = template_starttime + out_template.traces[channel_number].stats.__dict__[ + 'endtime'] = UTCDateTime(ns=int( round(template_starttime.ns + (template_length * samp_rate) * 1e9))) else: - out_template[channel_number] = template_channel[channel_index] + out_template.traces[channel_number] = template_channel.traces[ + channel_index] + # If a template-trace matches a NaN-trace in the stream , then set # template-trace to NaN so that this trace does not appear in channel- # list of detections. From a38ab893e0526655e2ea88b772ff32015e60156e Mon Sep 17 00:00:00 2001 From: flixha Date: Mon, 9 Jan 2023 19:33:36 +0100 Subject: [PATCH 3/6] add changelog entry --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 77034a314..8187b6b6a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,6 +12,8 @@ - `_prep_data_for_correlation`: 3x speedup for filling NaN-traces in templates - New function ``quick_trace_select` for a very efficient selection of trace by seed ID without wildcards (4x speedup). +* utils.correlate + - 25 % speedup for `_get_array_dicts` with quicker access to properties. * utils.catalog_to_dd._prepare_stream - Now more consistently slices templates to length = extract_len * samp_rate so that user receives less warnings about insufficient data. From 25f8392d32734447ff3b480de3651cf558aa179e Mon Sep 17 00:00:00 2001 From: flixha Date: Tue, 10 Jan 2023 12:22:22 +0100 Subject: [PATCH 4/6] fix issue in setting endtime manually (had no effect on results) --- eqcorrscan/utils/pre_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py index 758410b70..b1bbfd034 100644 --- a/eqcorrscan/utils/pre_processing.py +++ b/eqcorrscan/utils/pre_processing.py @@ -1008,7 +1008,7 @@ def _prep_data_for_correlation(stream, templates, template_names=None, out_template.traces[channel_number].stats.__dict__[ 'endtime'] = UTCDateTime(ns=int( round(template_starttime.ns - + (template_length * samp_rate) * 1e9))) + + (template_length * samp_rate) / 1e9))) else: out_template.traces[channel_number] = template_channel.traces[ channel_index] From 41d4adcdb5802f62f5ce9c563a189c3dc18aa420 Mon Sep 17 00:00:00 2001 From: flixha Date: Tue, 10 Jan 2023 12:25:47 +0100 Subject: [PATCH 5/6] fix issue in setting endtime manually (had no effect on results) --- eqcorrscan/utils/pre_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py index b1bbfd034..d0b257a88 100644 --- a/eqcorrscan/utils/pre_processing.py +++ b/eqcorrscan/utils/pre_processing.py @@ -1008,7 +1008,7 @@ def _prep_data_for_correlation(stream, templates, template_names=None, out_template.traces[channel_number].stats.__dict__[ 'endtime'] = UTCDateTime(ns=int( round(template_starttime.ns - + (template_length * samp_rate) / 1e9))) + + (template_length / samp_rate) * 1e9))) else: out_template.traces[channel_number] = template_channel.traces[ channel_index] From d1afd5f99aa3abc2a8d5bf1112e9400ce017fa07 Mon Sep 17 00:00:00 2001 From: Calum Chamberlain Date: Thu, 16 Mar 2023 16:11:30 +1300 Subject: [PATCH 6/6] Use np.copy instead of deepcopy very minor speed advantage --- eqcorrscan/utils/pre_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py index d0b257a88..7c37bc3c2 100644 --- a/eqcorrscan/utils/pre_processing.py +++ b/eqcorrscan/utils/pre_processing.py @@ -1000,7 +1000,7 @@ def _prep_data_for_correlation(stream, templates, template_names=None, if len(template_channel) <= channel_index: # out_template[channel_number].data = nan_channel # quicker: out_template.traces[channel_number].__dict__[ - 'data'] = copy.deepcopy(nan_channel) + 'data'] = np.copy(nan_channel) out_template.traces[channel_number].stats.__dict__[ 'npts'] = template_length out_template.traces[channel_number].stats.__dict__[