AxFoundation · WenzDaniel · Aug 24, 2023 · Aug 24, 2023 · Aug 25, 2023 · Aug 25, 2023
diff --git a/strax/plugins/plugin.py b/strax/plugins/plugin.py
@@ -14,6 +14,7 @@
 import numpy as np
 from copy import copy, deepcopy
 import strax
+import types
 
 export, __all__ = strax.exporter()
 
@@ -484,10 +485,14 @@ class IterDone(Exception):
                     inputs_merged = {
                         kind: strax.Chunk.merge([inputs[d] for d in deps_of_kind])
                         for kind, deps_of_kind in self.dependencies_by_kind().items()}
-
+                
                 # Submit the computation
                 # print(f"{self} calling with {inputs_merged}")
                 if self.parallel and executor is not None:
+                    if inspect.isgeneratorfunction(self.compute):
+                        raise NotImplementedError(
+                            f'Plugin "{self.__class__.__name__}" uses an iterator as compute method. ' 
+                            'This is not supported in multi-threading/processing.')
                     new_future = executor.submit(
                         self.do_compute,
                         chunk_i=chunk_i,
@@ -496,7 +501,11 @@ class IterDone(Exception):
                     pending_futures = [f for f in pending_futures if not f.done()]
                     yield new_future
                 else:
-                    yield self.do_compute(chunk_i=chunk_i, **inputs_merged)
+                    chunk = self.do_compute(chunk_i=chunk_i, **inputs_merged)
+                    if isinstance(chunk, types.GeneratorType):
+                        yield from chunk
+                    else:
+                        yield chunk
 
         except IterDone:
             # Check all sources are exhausted.
@@ -605,9 +614,11 @@ def do_compute(self, chunk_i=None, **kwargs):
         if self.compute_takes_start_end:
             kwargs['start'] = start
             kwargs['end'] = end
+
         result = self.compute(**kwargs)
-
-        return self._fix_output(result, start, end)
+        if isinstance(result, types.GeneratorType):
+            return result
+        return self._fix_output(result, start, end) 
 
     def _fix_output(self, result, start, end, _dtype=None):
         if self.multi_output and _dtype is None:

diff --git a/strax/testutils.py b/strax/testutils.py
@@ -260,6 +260,76 @@ def compute(self, peaks):
         return dict(peak_classification=p,
                     lone_hits=lh)
 
+
+
+# Plugins with time structure within chunks,
+# used to test down chunking within plugin compute.
+@strax.takes_config(
+    strax.Option('n_chunks', type=int, default=10, track=False),
+    strax.Option('recs_per_chunk', type=int, default=10, track=False),
+)
+class RecordsWithTimeStructure(strax.Plugin):
+    provides = 'records'
+    parallel = 'process'
+    depends_on = tuple()
+    dtype = strax.record_dtype()
+
+    rechunk_on_save = False
+
+    def source_finished(self):
+        return True
+
+    def is_ready(self, chunk_i):
+        return chunk_i < self.config['n_chunks']    
+
+    def setup(self):
+        self.last_end = 0
+
+    def compute(self, chunk_i):
+
+        r = np.zeros(self.config['recs_per_chunk'], self.dtype)
+        r['time'] = self.last_end + np.arange(self.config['recs_per_chunk']) + 5
+        r['length'] = r['dt'] = 1
+        r['channel'] = np.arange(len(r))
+
+        end = self.last_end + self.config['recs_per_chunk'] + 10
+        chunk = self.chunk(start=self.last_end, end=end, data=r)
+        self.last_end = end
+
+        return chunk
+
+
+class DownSampleRecords(strax.Plugin):
+    """PLugin to test the downsampling of Chunks during compute. Needed
+    for simulations.
+    """
+
+    provides = 'records_down_chunked'
+    depends_on  = 'records'
+    dtype = strax.record_dtype()
+    rechunk_on_save = False
+    parallel='process'
+
+    def compute(self, records, start, end):
+        offset = 0
+        last_start = start
+
+        count=0
+        for count, r in enumerate(records):
+            if count == 5:
+                res = records[offset:count]
+                chunk_end = np.max(strax.endtime(res))
+                offset = count
+                chunk = self.chunk(start=last_start, end=chunk_end, data=res)
+                last_start = chunk_end
+                yield chunk
+
+        res = records[offset:count+1]
+        chunk = self.chunk(start=last_start, end=end, data=res)
+        yield chunk
+
+
+
 # Used in test_core.py
 run_id = '0'
 

diff --git a/tests/test_context.py b/tests/test_context.py
@@ -1,5 +1,5 @@
 import strax
-from strax.testutils import Records, Peaks, PeaksWoPerRunDefault, PeakClassification, run_id
+from strax.testutils import Records, Peaks, PeaksWoPerRunDefault, PeakClassification, RecordsWithTimeStructure, DownSampleRecords, run_id
 import tempfile
 import numpy as np
 from hypothesis import given, settings
@@ -215,6 +215,33 @@ def tearDown(self):
         if os.path.exists(self.tempdir):
             shutil.rmtree(self.tempdir)
 
+    def test_down_chunking(self):
+        st = self.get_context(False)
+        st.register(RecordsWithTimeStructure)
+        st.register(DownSampleRecords)
+
+        st.make(run_id, 'records')
+        st.make(run_id, 'records_down_chunked')
+
+        chunks_records = st.get_meta(run_id, 'records')['chunks']
+        chunks_records_down_chunked = st.get_meta(run_id, 'records_down_chunked')['chunks']
+
+        _chunks_are_downsampled = len(chunks_records)*2 == len(chunks_records_down_chunked) 
+        assert _chunks_are_downsampled
+
+        _chunks_are_continues = np.all([chunks_records_down_chunked[i]['end'] == chunks_records_down_chunked[i+1]['start'] for i in range(len(chunks_records_down_chunked)-1)])
+        assert _chunks_are_continues
+
+    def test_down_chunking_multi_processing(self):
+        st = self.get_context(False, allow_multiprocess=True)
+        st.set_context_config({'use_per_run_defaults': False})
+        st.register(RecordsWithTimeStructure)
+        st.register(DownSampleRecords)
+
+        st.make(run_id, 'records', max_workers=1)
+        with self.assertRaises(NotImplementedError):
+            st.make(run_id, 'records_down_chunked', max_workers=2)
+
     def test_get_plugins_with_cache(self):
         st = self.get_context(False)
         st.register(Records)
@@ -283,11 +310,12 @@ def test_deregister(self):
         st.deregister_plugins_with_missing_dependencies()
         assert st._plugin_class_registry.pop('peaks', None) is None
 
-    def get_context(self, use_defaults):
+    def get_context(self, use_defaults, **kwargs):
         """Get simple context where we have one mock run in the only storage frontend"""
         assert isinstance(use_defaults, bool)
         st = strax.Context(storage=self.get_mock_sf(),
-                           check_available=('records',)
+                           check_available=('records',),
+                           **kwargs
                            )
         st.set_context_config({'use_per_run_defaults': use_defaults})
         return st