From ebb6a8125d554e28dcc32d1110a1da754ad7fdb3 Mon Sep 17 00:00:00 2001
From: flixha <felix.halpaap@uib.no>
Date: Thu, 5 Jan 2023 18:49:34 +0100
Subject: [PATCH 1/6] speed up _get_array_dicts with direct access to
 UTCDateTime.__dict__['_UTCDateTime__ns']

---
 eqcorrscan/utils/correlate.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/eqcorrscan/utils/correlate.py b/eqcorrscan/utils/correlate.py
index 08553b82d..021b38d38 100644
--- a/eqcorrscan/utils/correlate.py
+++ b/eqcorrscan/utils/correlate.py
@@ -30,6 +30,8 @@
 import math
 from packaging import version
 
+from obspy import UTCDateTime
+
 from eqcorrscan.utils.libnames import _load_cdll
 from eqcorrscan.utils import FMF_INSTALLED
 from eqcorrscan.utils.pre_processing import _stream_quick_select
@@ -1119,7 +1121,9 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True):
     stream.sort(['network', 'station', 'location', 'channel'])
     for template in templates:
         template.sort(['network', 'station', 'location', 'channel'])
-        t_starts.append(min([tr.stats.starttime for tr in template]))
+        t_starts.append(
+            UTCDateTime(ns=min([tr.stats.starttime.__dict__['_UTCDateTime__ns']
+                                for tr in template])))
     stream_start = min([tr.stats.starttime for tr in stream])
     # get seed ids, make sure these are collected on sorted streams
     seed_ids = [tr.id + '_' + str(i) for i, tr in enumerate(templates[0])]
@@ -1142,12 +1146,14 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True):
                   (stream_channel.stats.starttime - stream_start)))
         if stack:
             pad_list = [
-                int(round(template[i].stats.sampling_rate *
-                          (template[i].stats.starttime -
-                           t_starts[j]))) - stream_offset
-                for j, template in zip(range(len(templates)), templates)]
+                int(round(
+                    template[i].stats.sampling_rate *
+                    (template[i].stats.starttime.__dict__['_UTCDateTime__ns'] -
+                     t_starts[j].__dict__['_UTCDateTime__ns']) / 1e9)) -
+                stream_offset
+                for j, template in enumerate(templates)]
         else:
-            pad_list = [0 for _ in range(len(templates))]
+            pad_list = [0 for _ in templates]
         pad_dict.update({seed_id: pad_list})
 
     return stream_dict, template_dict, pad_dict, seed_ids

From 13e696f163f10034e4e0f42f4a6d2d93f072fad7 Mon Sep 17 00:00:00 2001
From: flixha <felix.halpaap@uib.no>
Date: Mon, 9 Jan 2023 16:17:48 +0100
Subject: [PATCH 2/6] speed up trace preparation with direct access to
 stream.traces with index

---
 eqcorrscan/utils/correlate.py      |  9 +++++----
 eqcorrscan/utils/pre_processing.py | 20 +++++++++++---------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/eqcorrscan/utils/correlate.py b/eqcorrscan/utils/correlate.py
index 021b38d38..0d4860e0f 100644
--- a/eqcorrscan/utils/correlate.py
+++ b/eqcorrscan/utils/correlate.py
@@ -1129,7 +1129,7 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True):
     seed_ids = [tr.id + '_' + str(i) for i, tr in enumerate(templates[0])]
     # pull common channels out of streams and templates and put in dicts
     for i, seed_id in enumerate(seed_ids):
-        temps_with_seed = [template[i].data for template in templates]
+        temps_with_seed = [template.traces[i].data for template in templates]
         t_ar = np.array(temps_with_seed).astype(np.float32)
         template_dict.update({seed_id: t_ar})
         stream_channel = _stream_quick_select(stream, seed_id.split('_')[0])[0]
@@ -1143,12 +1143,13 @@ def _get_array_dicts(templates, stream, stack, copy_streams=True):
         # pad_list can become 0. 0-1 = -1; which is problematic.
         stream_offset = int(
             math.floor(stream_channel.stats.sampling_rate *
-                  (stream_channel.stats.starttime - stream_start)))
+                       (stream_channel.stats.starttime - stream_start)))
         if stack:
             pad_list = [
                 int(round(
-                    template[i].stats.sampling_rate *
-                    (template[i].stats.starttime.__dict__['_UTCDateTime__ns'] -
+                    template.traces[i].stats.__dict__['sampling_rate'] *
+                    (template.traces[i].stats.starttime.__dict__[
+                        '_UTCDateTime__ns'] -
                      t_starts[j].__dict__['_UTCDateTime__ns']) / 1e9)) -
                 stream_offset
                 for j, template in enumerate(templates)]
diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py
index 0e877f201..758410b70 100644
--- a/eqcorrscan/utils/pre_processing.py
+++ b/eqcorrscan/utils/pre_processing.py
@@ -999,18 +999,20 @@ def _prep_data_for_correlation(stream, templates, template_names=None,
                 template.traces[idx] for idx in stream_trace_id_dict[seed_id]])
             if len(template_channel) <= channel_index:
                 # out_template[channel_number].data = nan_channel  # quicker:
-                out_template[channel_number].__dict__['data'] = copy.deepcopy(
-                    nan_channel)
-                out_template[channel_number].stats.__dict__['npts'] = \
-                    template_length
-                out_template[channel_number].stats.__dict__['starttime'] = \
-                    template_starttime
-                out_template[channel_number].stats.__dict__['endtime'] = \
-                    UTCDateTime(ns=int(
+                out_template.traces[channel_number].__dict__[
+                    'data'] = copy.deepcopy(nan_channel)
+                out_template.traces[channel_number].stats.__dict__[
+                    'npts'] = template_length
+                out_template.traces[channel_number].stats.__dict__[
+                    'starttime'] = template_starttime
+                out_template.traces[channel_number].stats.__dict__[
+                    'endtime'] = UTCDateTime(ns=int(
                         round(template_starttime.ns
                               + (template_length * samp_rate) * 1e9)))
             else:
-                out_template[channel_number] = template_channel[channel_index]
+                out_template.traces[channel_number] = template_channel.traces[
+                    channel_index]
+
         # If a template-trace matches a NaN-trace in the stream , then set
         # template-trace to NaN so that this trace does not appear in channel-
         # list of detections.

From a38ab893e0526655e2ea88b772ff32015e60156e Mon Sep 17 00:00:00 2001
From: flixha <felix.halpaap@uib.no>
Date: Mon, 9 Jan 2023 19:33:36 +0100
Subject: [PATCH 3/6] add changelog entry

---
 CHANGES.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGES.md b/CHANGES.md
index 77034a314..8187b6b6a 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -12,6 +12,8 @@
   - `_prep_data_for_correlation`: 3x speedup for filling NaN-traces in templates
   - New function ``quick_trace_select` for a very efficient selection of trace
     by seed ID without wildcards (4x speedup).
+* utils.correlate
+  - 25 % speedup for `_get_array_dicts` with quicker access to properties.
 * utils.catalog_to_dd._prepare_stream
   - Now more consistently slices templates to length = extract_len * samp_rate
     so that user receives less warnings about insufficient data.

From 25f8392d32734447ff3b480de3651cf558aa179e Mon Sep 17 00:00:00 2001
From: flixha <felix.halpaap@uib.no>
Date: Tue, 10 Jan 2023 12:22:22 +0100
Subject: [PATCH 4/6] fix issue in setting endtime manually (had no effect on
 results)

---
 eqcorrscan/utils/pre_processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py
index 758410b70..b1bbfd034 100644
--- a/eqcorrscan/utils/pre_processing.py
+++ b/eqcorrscan/utils/pre_processing.py
@@ -1008,7 +1008,7 @@ def _prep_data_for_correlation(stream, templates, template_names=None,
                 out_template.traces[channel_number].stats.__dict__[
                     'endtime'] = UTCDateTime(ns=int(
                         round(template_starttime.ns
-                              + (template_length * samp_rate) * 1e9)))
+                              + (template_length * samp_rate) / 1e9)))
             else:
                 out_template.traces[channel_number] = template_channel.traces[
                     channel_index]

From 41d4adcdb5802f62f5ce9c563a189c3dc18aa420 Mon Sep 17 00:00:00 2001
From: flixha <felix.halpaap@uib.no>
Date: Tue, 10 Jan 2023 12:25:47 +0100
Subject: [PATCH 5/6] fix issue in setting endtime manually (had no effect on
 results)

---
 eqcorrscan/utils/pre_processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py
index b1bbfd034..d0b257a88 100644
--- a/eqcorrscan/utils/pre_processing.py
+++ b/eqcorrscan/utils/pre_processing.py
@@ -1008,7 +1008,7 @@ def _prep_data_for_correlation(stream, templates, template_names=None,
                 out_template.traces[channel_number].stats.__dict__[
                     'endtime'] = UTCDateTime(ns=int(
                         round(template_starttime.ns
-                              + (template_length * samp_rate) / 1e9)))
+                              + (template_length / samp_rate) * 1e9)))
             else:
                 out_template.traces[channel_number] = template_channel.traces[
                     channel_index]

From d1afd5f99aa3abc2a8d5bf1112e9400ce017fa07 Mon Sep 17 00:00:00 2001
From: Calum Chamberlain <calum.chamberlain@vuw.ac.nz>
Date: Thu, 16 Mar 2023 16:11:30 +1300
Subject: [PATCH 6/6] Use np.copy instead of deepcopy

very minor speed advantage
---
 eqcorrscan/utils/pre_processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py
index d0b257a88..7c37bc3c2 100644
--- a/eqcorrscan/utils/pre_processing.py
+++ b/eqcorrscan/utils/pre_processing.py
@@ -1000,7 +1000,7 @@ def _prep_data_for_correlation(stream, templates, template_names=None,
             if len(template_channel) <= channel_index:
                 # out_template[channel_number].data = nan_channel  # quicker:
                 out_template.traces[channel_number].__dict__[
-                    'data'] = copy.deepcopy(nan_channel)
+                    'data'] = np.copy(nan_channel)
                 out_template.traces[channel_number].stats.__dict__[
                     'npts'] = template_length
                 out_template.traces[channel_number].stats.__dict__[