Merge branch 'develop' into length-sanitation

eqcorrscan · Jul 25, 2024 · 79765b1 · 79765b1
2 parents 28505a7 + 3aab7e8
commit 79765b1
Show file tree

Hide file tree

Showing 12 changed files with 116 additions and 277 deletions.
diff --git a/eqcorrscan/core/match_filter/family.py b/eqcorrscan/core/match_filter/family.py
@@ -565,8 +565,9 @@ def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4,
             `cores`).
         :type ignore_length: bool
         :param ignore_length:
-            If using daylong=True, then dayproc will try check that the data
-            are there for at least 80% of the day, if you don't want this check
+            Processing functions will check that the data are there for at
+            least 80% of the required length and raise an error if not.
+            If you don't want this check
             (which will raise an error if too much data are missing) then set
             ignore_length=True.  This is not recommended!
         :type ignore_bad_data: bool
@@ -770,7 +771,6 @@ def _process_streams(self, stream, pre_processed, process_cores=1,
                 parallel=parallel,
                 cores=process_cores,
                 stream=template_stream.merge().copy(),
-                daylong=False,
                 ignore_length=ignore_length,
                 overlap=0.0, ignore_bad_data=ignore_bad_data)
             processed_stream = Stream()

diff --git a/eqcorrscan/core/match_filter/helpers/processes.py b/eqcorrscan/core/match_filter/helpers/processes.py
@@ -177,7 +177,6 @@ def _get_detection_stream(
     pre_process: bool = False,
     parallel_process: bool = True,
     process_cores: int = None,
-    daylong: bool = False,
     overlap: Union[str, float] = "calculate",
     ignore_length: bool = False,
     ignore_bad_data: bool = False,
@@ -232,7 +231,6 @@ def _get_detection_stream(
         Whether to process data in parallel (uses multi-threading)
     :param process_cores:
         Maximum number of cores to use for parallel processing
-    :param daylong: See utils.pre_processing.multi_process
     :param overlap: See core.match_filter.tribe.detect
     :param ignore_length: See utils.pre_processing.multi_process
     :param ignore_bad_data: See utils.pre_processing.multi_process
@@ -289,7 +287,7 @@ def _get_detection_stream(
                     lowcut=lowcut, samp_rate=samp_rate,
                     process_length=process_length,
                     parallel=parallel_process, cores=process_cores,
-                    daylong=daylong, ignore_length=ignore_length,
+                    ignore_length=ignore_length,
                     overlap=overlap, ignore_bad_data=ignore_bad_data)
                 # We don't need to hold on to st!
                 del st
@@ -341,7 +339,6 @@ def _pre_processor(
     process_length: float,
     parallel: bool,
     cores: int,
-    daylong: bool,
     ignore_length: bool,
     overlap: float,
     ignore_bad_data: bool,
@@ -373,7 +370,6 @@ def _pre_processor(
     :param process_length: See utils.pre_processing.multi_process
     :param parallel: See utils.pre_processing.multi_process
     :param cores: See utils.pre_processing.multi_process
-    :param daylong: See utils.pre_processing.multi_process
     :param ignore_length: See utils.pre_processing.multi_process
     :param overlap: See core.match_filter.tribe.detect
     :param ignore_bad_data: See utils.pre_processing.multi_process
@@ -406,7 +402,7 @@ def _pre_processor(
         try:
             st_chunks = _pre_process(
                 st, template_ids, pre_processed, filt_order, highcut, lowcut,
-                samp_rate, process_length, parallel, cores, daylong,
+                samp_rate, process_length, parallel, cores,
                 ignore_length, ignore_bad_data, overlap)
             for chunk in st_chunks:
                 if not os.path.isdir(temp_stream_dir):

diff --git a/eqcorrscan/core/match_filter/helpers/tribe.py b/eqcorrscan/core/match_filter/helpers/tribe.py
@@ -198,7 +198,6 @@ def _pre_process(
     process_length: float,
     parallel: bool,
     cores: int,
-    daylong: bool,
     ignore_length: bool,
     ignore_bad_data: bool,
     overlap: float, **kwargs
@@ -218,7 +217,6 @@ def _pre_process(
     :param process_length: See utils.pre_processing.multi_process
     :param parallel: See utils.pre_processing.multi_process
     :param cores: See utils.pre_processing.multi_process
-    :param daylong: See utils.pre_processing.multi_process
     :param ignore_length: See utils.pre_processing.multi_process
     :param overlap: See core.match_filter.tribe.detect
     :param ignore_bad_data: See utils.pre_processing.multi_process
@@ -247,7 +245,6 @@ def _pre_process(
             parallel=parallel,
             cores=cores,
             stream=st,
-            daylong=daylong,
             ignore_length=ignore_length,
             overlap=overlap,
             ignore_bad_data=ignore_bad_data)

diff --git a/eqcorrscan/core/match_filter/matched_filter.py b/eqcorrscan/core/match_filter/matched_filter.py
@@ -313,7 +313,7 @@ def match_filter(template_names, template_list, st, threshold,
     # Data must be pre-processed
     party = tribe.detect(
         stream=st, threshold=threshold, threshold_type=threshold_type,
-        trig_int=trig_int, plot=plot, plotdir=plotdir, daylong=False,
+        trig_int=trig_int, plot=plot, plotdir=plotdir,
         parallel_process=False, xcorr_func=xcorr_func, concurrency=concurrency,
         cores=cores, ignore_length=True, ignore_bad_data=True, group_size=None,
         overlap="calculate", full_peaks=full_peaks, save_progress=False,

diff --git a/eqcorrscan/core/match_filter/party.py b/eqcorrscan/core/match_filter/party.py
@@ -20,6 +20,7 @@
 import tempfile
 import logging
 from os.path import join
+import warnings
 
 import numpy as np
 from obspy import Catalog, read_events, Stream
@@ -927,8 +928,9 @@ def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4,
             `cores`).
         :type ignore_length: bool
         :param ignore_length:
-            If using daylong=True, then dayproc will try check that the data
-            are there for at least 80% of the day, if you don't want this check
+            Processing functions will check that the data are there for at
+            least 80% of the required length and raise an error if not.
+            If you don't want this check
             (which will raise an error if too much data are missing) then set
             ignore_length=True.  This is not recommended!
         :type ignore_bad_data: bool
@@ -961,6 +963,10 @@ def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4,
         .. Note::
             Picks are corrected for the template pre-pick time.
         """
+        # Cope with daylong deprecation
+        daylong = kwargs.pop("daylong", None)
+        if daylong:
+            warnings.warn("daylong argument deprecated - will be ignored")
         process_cores = process_cores or cores
         template_groups = group_templates(
             [_f.template for _f in self.families

diff --git a/eqcorrscan/core/match_filter/template.py b/eqcorrscan/core/match_filter/template.py
@@ -393,7 +393,7 @@ def read(self, filename):
         return self
 
     def detect(self, stream, threshold, threshold_type, trig_int,
-               plot=False, plotdir=None, pre_processed=False, daylong=False,
+               plot=False, plotdir=None, pre_processed=False,
                parallel_process=True, xcorr_func=None, concurrency=None,
                cores=None, ignore_length=False, overlap="calculate",
                full_peaks=False, **kwargs):
@@ -428,12 +428,6 @@ def detect(self, stream, threshold, threshold_type, trig_int,
             Defaults to False, which will use the
             :mod:`eqcorrscan.utils.pre_processing` routines to resample and
             filter the continuous data.
-        :type daylong: bool
-        :param daylong:
-            Set to True to use the
-            :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which
-            preforms additional checks and is more efficient for day-long data
-            over other methods.
         :type parallel_process: bool
         :param parallel_process:
         :type xcorr_func: str or callable
@@ -450,8 +444,9 @@ def detect(self, stream, threshold, threshold_type, trig_int,
         :param cores: Number of workers for processing and detection.
         :type ignore_length: bool
         :param ignore_length:
-            If using daylong=True, then dayproc will try check that the data
-            are there for at least 80% of the day, if you don't want this check
+            Processing functions will check that the data are there for at
+            least 80% of the required length and raise an error if not.
+            If you don't want this check
             (which will raise an error if too much data are missing) then set
             ignore_length=True.  This is not recommended!
         :type overlap: float
@@ -537,7 +532,7 @@ def detect(self, stream, threshold, threshold_type, trig_int,
         party = Tribe(templates=[self]).detect(
             stream=stream, threshold=threshold,
             threshold_type=threshold_type, trig_int=trig_int, plotdir=plotdir,
-            plot=plot, pre_processed=pre_processed, daylong=daylong,
+            plot=plot, pre_processed=pre_processed,
             parallel_process=parallel_process, xcorr_func=xcorr_func,
             concurrency=concurrency, cores=cores, ignore_length=ignore_length,
             overlap=overlap, full_peaks=full_peaks, **kwargs)

diff --git a/eqcorrscan/core/match_filter/tribe.py b/eqcorrscan/core/match_filter/tribe.py
@@ -22,6 +22,7 @@
 import traceback
 import uuid
 import logging
+import warnings
 
 from multiprocessing import Process, Queue, cpu_count
 from queue import Empty
@@ -652,7 +653,7 @@ def cluster(self, method, **kwargs):
         return tribes
 
     def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
-               plotdir=None, daylong=False, parallel_process=True,
+               plotdir=None, parallel_process=True,
                xcorr_func=None, concurrency=None, cores=None,
                concurrent_processing=False, ignore_length=False,
                ignore_bad_data=False, group_size=None, overlap="calculate",
@@ -685,12 +686,6 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
         :param plotdir:
             The path to save plots to. If `plotdir=None` (default) then the
             figure will be shown on screen.
-        :type daylong: bool
-        :param daylong:
-            Set to True to use the
-            :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which
-            preforms additional checks and is more efficient for day-long data
-            over other methods.
         :type parallel_process: bool
         :param parallel_process:
         :type xcorr_func: str or callable
@@ -712,8 +707,9 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
             benchmarking.
         :type ignore_length: bool
         :param ignore_length:
-            If using daylong=True, then dayproc will try check that the data
-            are there for at least 80% of the day, if you don't want this check
+            Processing functions will check that the data are there for at
+            least 80% of the required length and raise an error if the data
+            are not long enough. if you don't want this check
             (which will raise an error if too much data are missing) then set
             ignore_length=True.  This is not recommended!
         :type ignore_bad_data: bool
@@ -832,6 +828,11 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
         # We should not need to copy the stream, it is copied in chunks by
         # _group_process
 
+        # Cope with daylong deprecation
+        daylong = kwargs.pop("daylong", None)
+        if daylong:
+            warnings.warn("daylong argument deprecated - will be ignored")
+
         # Argument handling
         if overlap is None:
             overlap = 0.0
@@ -871,7 +872,7 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
             tr.id for template in self.templates for tr in template.st)
 
         args = (stream, template_ids, pre_processed, parallel_process,
-                process_cores, daylong, ignore_length, overlap,
+                process_cores, ignore_length, overlap,
                 ignore_bad_data, group_size, groups, sampling_rate, threshold,
                 threshold_type, save_progress, xcorr_func, concurrency, cores,
                 export_cccsums, parallel, peak_cores, trig_int, full_peaks,
@@ -899,7 +900,7 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False,
 
     def _detect_serial(
         self, stream, template_ids, pre_processed, parallel_process,
-        process_cores, daylong, ignore_length, overlap, ignore_bad_data,
+        process_cores, ignore_length, overlap, ignore_bad_data,
         group_size, groups, sampling_rate, threshold, threshold_type,
         save_progress, xcorr_func, concurrency, cores, export_cccsums,
         parallel, peak_cores, trig_int, full_peaks, plot, plotdir, plot_format,
@@ -923,7 +924,7 @@ def _detect_serial(
             lowcut=self.templates[0].lowcut,
             samp_rate=self.templates[0].samp_rate,
             process_length=self.templates[0].process_length,
-            parallel=parallel_process, cores=process_cores, daylong=daylong,
+            parallel=parallel_process, cores=process_cores,
             ignore_length=ignore_length, ignore_bad_data=ignore_bad_data,
             overlap=overlap, **kwargs)
 
@@ -990,7 +991,7 @@ def _detect_serial(
 
     def _detect_concurrent(
         self, stream, template_ids, pre_processed, parallel_process,
-        process_cores, daylong, ignore_length, overlap, ignore_bad_data,
+        process_cores, ignore_length, overlap, ignore_bad_data,
         group_size, groups, sampling_rate, threshold, threshold_type,
         save_progress, xcorr_func, concurrency, cores, export_cccsums,
         parallel, peak_cores, trig_int, full_peaks, plot, plotdir, plot_format,
@@ -1050,7 +1051,6 @@ def _detect_concurrent(
                     process_length=self.templates[0].process_length,
                     parallel=parallel_process,
                     cores=process_cores,
-                    daylong=daylong,
                     ignore_length=ignore_length,
                     overlap=overlap,
                     ignore_bad_data=ignore_bad_data,
@@ -1228,7 +1228,7 @@ def _detect_concurrent(
 
     def client_detect(self, client, starttime, endtime, threshold,
                       threshold_type, trig_int, plot=False, plotdir=None,
-                      min_gap=None, daylong=False, parallel_process=True,
+                      min_gap=None, parallel_process=True,
                       xcorr_func=None, concurrency=None, cores=None,
                       concurrent_processing=False, ignore_length=False,
                       ignore_bad_data=False, group_size=None,
@@ -1269,12 +1269,6 @@ def client_detect(self, client, starttime, endtime, threshold,
         :param min_gap:
             Minimum gap allowed in data - use to remove traces with known
             issues
-        :type daylong: bool
-        :param daylong:
-            Set to True to use the
-            :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which
-            preforms additional checks and is more efficient for day-long data
-            over other methods.
         :type parallel_process: bool
         :param parallel_process:
         :type xcorr_func: str or callable
@@ -1296,8 +1290,9 @@ def client_detect(self, client, starttime, endtime, threshold,
             benchmarking.
         :type ignore_length: bool
         :param ignore_length:
-            If using daylong=True, then dayproc will try check that the data
-            are there for at least 80% of the day, if you don't want this check
+            Processing functions will check that the data are there for at
+            least 80% of the required length and raise an error if not.
+            If you don't want this check
             (which will raise an error if too much data are missing) then set
             ignore_length=True.  This is not recommended!
         :type ignore_bad_data: bool
@@ -1389,6 +1384,11 @@ def client_detect(self, client, starttime, endtime, threshold,
         from eqcorrscan.core.match_filter.helpers.processes import (
             _get_detection_stream)
 
+        # Cope with daylong deprecation
+        daylong = kwargs.pop("daylong", None)
+        if daylong:
+            warnings.warn("daylong argument deprecated - will be ignored")
+
         # This uses get_waveforms_bulk to get data - not all client types have
         # this, so we check and monkey patch here.
         if not hasattr(client, "get_waveforms_bulk"):
@@ -1433,7 +1433,7 @@ def client_detect(self, client, starttime, endtime, threshold,
         detector_kwargs = dict(
             threshold=threshold, threshold_type=threshold_type,
             trig_int=trig_int, plot=plot, plotdir=plotdir,
-            daylong=daylong, parallel_process=parallel_process,
+            parallel_process=parallel_process,
             xcorr_func=xcorr_func, concurrency=concurrency, cores=cores,
             ignore_length=ignore_length, ignore_bad_data=ignore_bad_data,
             group_size=group_size, overlap=None, full_peaks=full_peaks,
@@ -1442,6 +1442,7 @@ def client_detect(self, client, starttime, endtime, threshold,
             poison_queue=poison_queue, shutdown=False,
             concurrent_processing=concurrent_processing, groups=groups,
             make_events=make_events, min_stations=min_stations)
+        detector_kwargs.update(kwargs)
 
         if not concurrent_processing:
             Logger.warning("Using concurrent_processing=True can be faster if"
@@ -1486,7 +1487,7 @@ def client_detect(self, client, starttime, endtime, threshold,
                 temp_stream_dir=self._stream_dir,
                 full_stream_dir=full_stream_dir,
                 pre_process=True, parallel_process=parallel_process,
-                process_cores=process_cores, daylong=daylong,
+                process_cores=process_cores,
                 overlap=0.0, ignore_length=ignore_length,
                 ignore_bad_data=ignore_bad_data,
                 filt_order=self.templates[0].filt_order,

diff --git a/eqcorrscan/core/template_gen.py b/eqcorrscan/core/template_gen.py
@@ -246,6 +246,12 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order,
     >>> print(len(templates[0]))
     15
     """
+    # Cope with daylong deprecation
+    daylong = kwargs.pop("daylong", None)
+    if daylong:
+        warnings.warn(
+            "daylong argument deprecated - process-len will be set to 86400")
+        process_len = 86400.0
     client_map = {'from_client': 'fdsn'}
     assert method in ('from_client', 'from_meta_file', 'from_sac')
     if not isinstance(swin, list):
@@ -319,22 +325,6 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order,
             Logger.info("No data")
             continue
         if process:
-            data_len = max([len(tr.data) / tr.stats.sampling_rate
-                            for tr in st])
-            if 80000 < data_len < 90000:
-                daylong = True
-                starttime = min([tr.stats.starttime for tr in st])
-                min_delta = min([tr.stats.delta for tr in st])
-                # Cope with the common starttime less than 1 sample before the
-                #  start of day.
-                if (starttime + min_delta).date > starttime.date:
-                    starttime = (starttime + min_delta)
-                # Check if this is stupid:
-                if abs(starttime - UTCDateTime(starttime.date)) > 600:
-                    daylong = False
-                starttime = starttime.date
-            else:
-                daylong = False
             # Check if the required amount of data have been downloaded - skip
             # channels if arg set.
             for tr in st:
@@ -356,9 +346,7 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order,
             kwargs = dict(
                 st=st, lowcut=lowcut, highcut=highcut,
                 filt_order=filt_order, samp_rate=samp_rate,
-                parallel=parallel, num_cores=num_cores, daylong=daylong)
-            if daylong:
-                kwargs.update(dict(starttime=UTCDateTime(starttime)))
+                parallel=parallel, num_cores=num_cores)
             st = pre_processing.multi_process(**kwargs)
         data_start = min([tr.stats.starttime for tr in st])
         data_end = max([tr.stats.endtime for tr in st])