diff --git a/.github/test_conda_env.yml b/.github/test_conda_env.yml index 31666f8a4..4a562975d 100644 --- a/.github/test_conda_env.yml +++ b/.github/test_conda_env.yml @@ -3,6 +3,7 @@ channels: - conda-forge - defaults dependencies: + - conda-forge::compilers - numpy>=1.12 - matplotlib>=1.3.0 - scipy diff --git a/.github/test_conda_env_macOS.yml b/.github/test_conda_env_macOS.yml deleted file mode 100644 index 62bc683cd..000000000 --- a/.github/test_conda_env_macOS.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: eqcorrscan-test -channels: - - conda-forge -dependencies: - # Compiler bits to match conda-forge build env - - cctools_osx-64 - - clang=14 - - clang-14 - - clang_osx-64 - - llvm-openmp - - clangxx=14 - - compiler-rt - - compiler-rt_osx-64 - - ld64_osx-64 - - libclang-cpp14 - - libcxx=14 - - libllvm14 - - llvm-tools - # Normal requirements - - numpy>=1.12 - - matplotlib>=1.3.0 - - scipy - - mock - - obspy>=1.4.0 - - h5py>3.3 # Issue with dep resolution: https://github.com/conda-forge/h5py-feedstock/issues/92 - - pyyaml - - bottleneck - - fftw - - pytest>=2.0.0 - - pytest-cov - - pytest-pep8 - - pytest-xdist - - pytest-rerunfailures - - pytest-mpl<0.16.0 - - codecov - - boto3 diff --git a/.github/workflows/runtest.yml b/.github/workflows/runtest.yml index 17337e5fa..8506e1ed3 100644 --- a/.github/workflows/runtest.yml +++ b/.github/workflows/runtest.yml @@ -13,26 +13,27 @@ jobs: # continue-on-error: true steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - name: Get conda env file - shell: bash -l {0} - run: | - if [ "$RUNNER_OS" == "macOS" ]; then - cp .github/test_conda_env_macOS.yml .github/test_conda_env.yml - fi + # Not needed as using the generic "compilers" + # - name: Get conda env file + # shell: bash -l {0} + # run: | + # if [ "$RUNNER_OS" == "macOS" ]; then + # cp .github/test_conda_env_macOS.yml .github/test_conda_env.yml + # fi - name: Setup conda - uses: conda-incubator/setup-miniconda@v2.1.1 + uses: conda-incubator/setup-miniconda@v3 with: - miniforge-variant: Mambaforge - miniforge-version: latest + # miniforge-variant: Miniforge + # miniforge-version: latest python-version: ${{ matrix.python-version }} activate-environment: eqcorrscan-test - use-mamba: true + # use-mamba: true - name: Update Env - run: mamba env update -n eqcorrscan-test -f .github/test_conda_env.yml + run: conda env update -n eqcorrscan-test -f .github/test_conda_env.yml - name: install eqcorrscan shell: bash -l {0} @@ -90,19 +91,19 @@ jobs: fail-fast: false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup conda - uses: conda-incubator/setup-miniconda@v2.1.1 + uses: conda-incubator/setup-miniconda@v3 with: - miniforge-variant: Mambaforge - miniforge-version: latest + # miniforge-variant: Mambaforge + # miniforge-version: latest python-version: ${{ matrix.python-version }} activate-environment: eqcorrscan-test - use-mamba: true + # use-mamba: true - name: Update Env - run: mamba env update -n eqcorrscan-test -f .github/test_conda_env.yml + run: conda env update -n eqcorrscan-test -f .github/test_conda_env.yml - name: install eqcorrscan shell: bash -l {0} @@ -148,19 +149,19 @@ jobs: fail-fast: false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup conda - uses: conda-incubator/setup-miniconda@v2.1.1 + uses: conda-incubator/setup-miniconda@v3 with: - miniforge-variant: Mambaforge - miniforge-version: latest + # miniforge-variant: Mambaforge + # miniforge-version: latest python-version: ${{ matrix.python-version }} activate-environment: eqcorrscan-test - use-mamba: true + # use-mamba: true - name: Update Env - run: mamba env update -n eqcorrscan-test -f .github/test_conda_env.yml + run: conda env update -n eqcorrscan-test -f .github/test_conda_env.yml - name: install fmf shell: bash -l {0} diff --git a/eqcorrscan/core/lag_calc.py b/eqcorrscan/core/lag_calc.py index eb69385e6..1ac361d8e 100644 --- a/eqcorrscan/core/lag_calc.py +++ b/eqcorrscan/core/lag_calc.py @@ -361,10 +361,10 @@ def xcorr_pick_family(family, stream, shift_len=0.2, min_cc=0.4, phase = phase_hints.get(tr.id, None) if phase is None and stachan.channel[1][-1] in vertical_chans: phase = 'P' - Logger.warning(f"Unknown phase hint for {tr.id} - assigning P") + Logger.debug(f"Unknown phase hint for {tr.id} - assigning P") elif phase is None and stachan.channel[1][-1] in horizontal_chans: phase = 'S' - Logger.warning(f"Unknown phase hint for {tr.id} - assigning S") + Logger.debug(f"Unknown phase hint for {tr.id} - assigning S") _waveform_id = WaveformStreamID(seed_string=tr.id) event.picks.append(Pick( waveform_id=_waveform_id, time=picktime, diff --git a/eqcorrscan/core/match_filter/detection.py b/eqcorrscan/core/match_filter/detection.py index 4b11f3807..aba3e917f 100644 --- a/eqcorrscan/core/match_filter/detection.py +++ b/eqcorrscan/core/match_filter/detection.py @@ -301,7 +301,7 @@ def _calculate_event(self, template=None, template_st=None, template_pick, key=lambda p: p.time)[_index].phase_hint except IndexError: - Logger.error(f"No pick for trace: {tr.id}") + Logger.debug(f"No pick for trace: {tr.id}") ev.picks.append(new_pick) if estimate_origin and template is not None\ and template.event is not None: @@ -397,7 +397,7 @@ def extract_stream(self, stream, length, prepick, all_vert=False, pick = [p for p in pick if p.waveform_id.channel_code == channel] if len(pick) == 0: - Logger.info( + Logger.debug( "No pick for {0}.{1}".format(station, channel)) continue elif len(pick) > 1: @@ -430,7 +430,7 @@ def extract_stream(self, stream, length, prepick, all_vert=False, length) < tr.stats.delta: cut_stream += _tr else: - Logger.info( + Logger.debug( "Insufficient data length for {0}".format(tr.id)) return cut_stream diff --git a/eqcorrscan/core/match_filter/family.py b/eqcorrscan/core/match_filter/family.py index 1660aefa5..24874663e 100644 --- a/eqcorrscan/core/match_filter/family.py +++ b/eqcorrscan/core/match_filter/family.py @@ -606,6 +606,18 @@ def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4, """ from eqcorrscan.core.lag_calc import xcorr_pick_family + # We should make sure we have events calculated for all detections + # we should clean out anything that was there before + # (and warn the user) + _overwritten_warning = False + for d in self.detections: + if len(d.event.picks): + _overwritten_warning = True + d._calculate_event(template=self.template) + if _overwritten_warning: + Logger.warning("Old events in family have been overwritten to " + "ensure lag-calc runs as expected") + processed_stream = self._process_streams( stream=stream, pre_processed=pre_processed, process_cores=process_cores, parallel=parallel, diff --git a/eqcorrscan/core/match_filter/helpers/tribe.py b/eqcorrscan/core/match_filter/helpers/tribe.py index da493b329..3cd0eaa57 100644 --- a/eqcorrscan/core/match_filter/helpers/tribe.py +++ b/eqcorrscan/core/match_filter/helpers/tribe.py @@ -82,7 +82,7 @@ def _download_st( """ Helper to download a stream from a client for a given start and end time. - Applies `buff` to extend download to (heopfully) ensure all data are + Applies `buff` to extend download to (hopefully) ensure all data are provided. Retries download up to `retries` times, and discards data with large gaps. diff --git a/eqcorrscan/core/match_filter/party.py b/eqcorrscan/core/match_filter/party.py index 1141f56f6..1a0544950 100644 --- a/eqcorrscan/core/match_filter/party.py +++ b/eqcorrscan/core/match_filter/party.py @@ -21,7 +21,6 @@ import logging from os.path import join import warnings -from tempfile import template import numpy as np from obspy import Catalog, read_events, Stream @@ -924,16 +923,28 @@ def client_lag_calc( template_channel_ids = { tuple(tr.id.split('.')) for f in chunk for tr in f.template.st} st = _download_st( - starttime=chunk_start, endtime=chunk_end, buff=300.0, + starttime=chunk_start, endtime=chunk_end + pad, buff=300.0, min_gap=min_gap, template_channel_ids=template_channel_ids, client=client, retries=retries) Logger.info(f"Downloaded data for {len(st)} channels, expected " f"{len(template_channel_ids)} channels") + if len(st) == 0: + Logger.warning( + f"No data meeting standards available between " + f"{chunk_start} and {chunk_end}, skipping {len(chunk)} " + f"detections") + chunk_start += (chunk_length - chunk_overlap) + chunk_end = chunk_start + chunk_length + # catalog += chunk.get_catalog() + continue # 3. Run lag-calc - catalog += chunk.lag_calc( + _picked_catalog = chunk.lag_calc( stream=st, pre_processed=False, shift_len=shift_len, *args, **kwargs) + Logger.debug(f"For {len(chunk)} a catalog of " + f"{len(_picked_catalog)} was returned") + catalog += _picked_catalog # 4. Run next group detections_run.update({d.id for f in chunk for d in f}) @@ -1059,19 +1070,21 @@ def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4, for template_group in template_groups: family = [_f for _f in self.families if _f.template == template_group[0]][0] - group_seed_ids = {tr.id for template in template_group - for tr in template.st} + group_seed_ids = {tr.id for _template in template_group + for tr in _template.st} template_stream = Stream() for seed_id in group_seed_ids: net, sta, loc, chan = seed_id.split('.') template_stream += stream.select( network=net, station=sta, location=loc, channel=chan) # Process once and only once for each group. + Logger.info("Processing stream for group") processed_stream = family._process_streams( stream=template_stream, pre_processed=pre_processed, process_cores=process_cores, parallel=parallel, ignore_bad_data=ignore_bad_data, ignore_length=ignore_length, select_used_chans=False) + Logger.info(f"Processed_stream: {processed_stream}") for template in template_group: family = [_f for _f in self.families if _f.template == template][0] diff --git a/eqcorrscan/tests/pre_processing_test.py b/eqcorrscan/tests/pre_processing_test.py index 344694439..6d42d3e9f 100644 --- a/eqcorrscan/tests/pre_processing_test.py +++ b/eqcorrscan/tests/pre_processing_test.py @@ -269,6 +269,31 @@ def test_process_bad_data(self): filt_order=3, samp_rate=1, starttime=False, seisan_chan_names=True, ignore_length=False) + def test_process_partial_bad_data(self): + """ + Check that processing works as expected when we have + partial bad data. Issue #592 + """ + bad_data = Stream([self.st[0].slice( + self.st[0].stats.starttime, + self.st[0].stats.starttime + 20).copy()]) + bad_data += self.st[1].copy() + with self.assertRaises(NotImplementedError): + multi_process( + st=bad_data, lowcut=0.1, highcut=0.4, + filt_order=3, samp_rate=1, + starttime=bad_data[1].stats.starttime, + endtime=bad_data[1].stats.endtime, + seisan_chan_names=True, ignore_length=False) + # Check that it just drops that trace with ignore_bad_data + st_processed = multi_process( + st=bad_data, lowcut=0.1, highcut=0.4, + filt_order=3, samp_rate=1, + starttime=bad_data[1].stats.starttime, + endtime=bad_data[1].stats.endtime, + seisan_chan_names=True, ignore_bad_data=True) + self.assertEqual(len(st_processed), 1) + def test_short_data_fail(self): """Check that we don't allow too much missing data.""" with self.assertRaises(NotImplementedError): @@ -283,9 +308,9 @@ def test_short_data_fail(self): def test_short_data_pass(self): """Check that we do allow missing data if ignore_length is True.""" processed = multi_process( - st=self.st[0].copy().trim(endtime=self. - st[0].stats.endtime - 18000), lowcut=0.1, - highcut=0.4, filt_order=3, samp_rate=1, + st=self.st[0].copy().trim( + endtime=self.st[0].stats.endtime - 18000), + lowcut=0.1, highcut=0.4, filt_order=3, samp_rate=1, starttime=self.day_start, endtime=UTCDateTime(self.day_start) + 86400., seisan_chan_names=True, ignore_length=True) diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py index d40293320..bade71441 100644 --- a/eqcorrscan/utils/pre_processing.py +++ b/eqcorrscan/utils/pre_processing.py @@ -275,6 +275,7 @@ def multi_process(st, lowcut, highcut, filt_order, samp_rate, parallel=False, Logger.info( f"{trace_id} not found in {set(tr.id for tr in st)}," f" ignoring") + Logger.info(f"Stream after QC is: {st}") # 3. Detrend # ~ 2x speedup for 50 100 Hz daylong traces on 12 threads @@ -288,21 +289,26 @@ def multi_process(st, lowcut, highcut, filt_order, samp_rate, parallel=False, for i, _ in enumerate(st): if float(st[i].stats.npts / st[i].stats.sampling_rate) != length: Logger.info( - 'Data for {0} are not long-enough, will zero pad'.format( - st[i].id)) + 'Data for {0} are not long-enough, will try to ' + 'zero pad'.format(st[i].id)) st[i], padded[st[i].id] = _length_check( st[i], starttime=starttime, length=length, ignore_length=ignore_length, ignore_bad_data=ignore_bad_data) # Remove None traces that might be returned from length checking - st.traces = [tr for tr in st if tr is not None] + st.traces = [tr for tr in st if tr is not None and tr.stats.npts] # Check that we actually still have some data if not _stream_has_data(st): if tracein: - return st[0] + if len(st): + return st[0] + else: + return Trace() return st + Logger.info(f"Stream after length check and padding is: {st}") + # 5. Resample # ~ 3.25x speedup for 50 100 Hz daylong traces on 12 threads st = _multi_resample( @@ -802,6 +808,7 @@ def _group_process(filt_order, highcut, lowcut, samp_rate, process_length, starttime = starttimes[0] endtime = endtimes[-1] + stream_endtime = copy.deepcopy(endtime) data_len_samps = round((endtime - starttime) * samp_rate) + 1 assert overlap < process_length, "Overlap must be less than process length" chunk_len_samps = (process_length - overlap) * samp_rate @@ -883,11 +890,11 @@ def _group_process(filt_order, highcut, lowcut, samp_rate, process_length, f" and {_endtime}") continue - if _endtime < stream[0].stats.endtime: + if _endtime < stream_endtime: Logger.warning( "Last bit of data between {0} and {1} will go unused " "because it is shorter than a chunk of {2} s".format( - _endtime, stream[0].stats.endtime, process_length)) + _endtime, stream_endtime, process_length)) return processed_streams