From 3e11535c32057c004f0f02ec82adaffa6d9a8bb7 Mon Sep 17 00:00:00 2001
From: Lorenzo Principe <28869147+lorenzomag@users.noreply.github.com>
Date: Sun, 1 Sep 2024 23:01:06 -0500
Subject: [PATCH 1/7] Allow get_df on all data_types

---
 strax/context.py | 12 +++---------
 strax/utils.py   | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/strax/context.py b/strax/context.py
index dc1c4864..fbd18e42 100644
--- a/strax/context.py
+++ b/strax/context.py
@@ -1910,15 +1910,9 @@ def get_df(
 
         """
         df = self.get_array(run_id, targets, save=save, max_workers=max_workers, **kwargs)
-        try:
-            return pd.DataFrame.from_records(df)
-        except Exception as e:
-            if "Data must be 1-dimensional" in str(e):
-                raise ValueError(
-                    f"Cannot load '{targets}' as a dataframe because it has "
-                    "array fields. Please use get_array."
-                )
-            raise
+
+        return strax.convert_structured_array_to_df(df)
+
 
     def get_zarr(
         self,
diff --git a/strax/utils.py b/strax/utils.py
index 95decf85..e4f11088 100644
--- a/strax/utils.py
+++ b/strax/utils.py
@@ -813,3 +813,23 @@ def convert_tuple_to_list(init_func_input):
     else:
         # if not a container, return. i.e. int, float, bytes, str etc.
         return func_input
+
+@export
+def convert_structured_array_to_df(structured_array):
+    """
+    Convert a structured numpy array to a pandas DataFrame.
+    Parameters:
+    structured_array (numpy.ndarray): The structured array to be converted.
+    Returns:
+    pandas.DataFrame: The converted DataFrame.
+    """
+    
+    data_dict = {}
+    for name in structured_array.dtype.names:
+        col = structured_array[name]
+        if col.ndim > 1:
+            # Convert n-dimensional columns to lists of ndarrays
+            data_dict[name] = [np.array(row) for row in col]
+        else:
+            data_dict[name] = col
+    return pd.DataFrame(data_dict)
\ No newline at end of file

From 908fb5d8631bc9091793405e16f65a642210f8ff Mon Sep 17 00:00:00 2001
From: Lorenzo Principe <28869147+lorenzomag@users.noreply.github.com>
Date: Mon, 2 Sep 2024 02:05:30 -0500
Subject: [PATCH 2/7] Add basic test for get_df on peaks data

---
 tests/test_core.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 605275ab..acc9d282 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -32,6 +32,23 @@ def test_core(allow_multiprocess, max_workers, processor):
     assert bla.dtype == strax.peak_dtype()
 
 
+@processing_conditions
+def test_core_df(allow_multiprocess, max_workers, processor):
+    """Test that get_df works with N-dimensional data"""
+    mystrax = strax.Context(
+        storage=[],
+        register=[Records, Peaks],
+        processors=[processor],
+        allow_multiprocess=allow_multiprocess,
+        use_per_run_defaults=True,
+    )
+
+    df = mystrax.get_df(run_id=run_id, targets="peaks", max_workers=max_workers)
+    p = mystrax.get_single_plugin(run_id, "records")
+    assert len(df.loc[0, "data"]) == 200
+    assert len(df) == p.config["recs_per_chunk"] * p.config["n_chunks"]
+
+
 def test_post_office_state():
     mystrax = strax.Context(
         storage=[],

From de4c778638dfbcb8e25d7f52160d35b5787b0a0b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 07:16:19 +0000
Subject: [PATCH 3/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 strax/context.py   |  1 -
 strax/utils.py     | 10 ++++++----
 tests/test_core.py |  2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/strax/context.py b/strax/context.py
index fbd18e42..b022053a 100644
--- a/strax/context.py
+++ b/strax/context.py
@@ -1913,7 +1913,6 @@ def get_df(
 
         return strax.convert_structured_array_to_df(df)
 
-
     def get_zarr(
         self,
         run_ids,
diff --git a/strax/utils.py b/strax/utils.py
index e4f11088..bd9a491a 100644
--- a/strax/utils.py
+++ b/strax/utils.py
@@ -814,16 +814,18 @@ def convert_tuple_to_list(init_func_input):
         # if not a container, return. i.e. int, float, bytes, str etc.
         return func_input
 
+
 @export
 def convert_structured_array_to_df(structured_array):
-    """
-    Convert a structured numpy array to a pandas DataFrame.
+    """Convert a structured numpy array to a pandas DataFrame.
+
     Parameters:
     structured_array (numpy.ndarray): The structured array to be converted.
     Returns:
     pandas.DataFrame: The converted DataFrame.
+
     """
-    
+
     data_dict = {}
     for name in structured_array.dtype.names:
         col = structured_array[name]
@@ -832,4 +834,4 @@ def convert_structured_array_to_df(structured_array):
             data_dict[name] = [np.array(row) for row in col]
         else:
             data_dict[name] = col
-    return pd.DataFrame(data_dict)
\ No newline at end of file
+    return pd.DataFrame(data_dict)
diff --git a/tests/test_core.py b/tests/test_core.py
index acc9d282..62d14c9b 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -34,7 +34,7 @@ def test_core(allow_multiprocess, max_workers, processor):
 
 @processing_conditions
 def test_core_df(allow_multiprocess, max_workers, processor):
-    """Test that get_df works with N-dimensional data"""
+    """Test that get_df works with N-dimensional data."""
     mystrax = strax.Context(
         storage=[],
         register=[Records, Peaks],

From 38c48deada47c882d7b2084552a800fdb37d1391 Mon Sep 17 00:00:00 2001
From: Lorenzo Principe <28869147+lorenzomag@users.noreply.github.com>
Date: Thu, 19 Sep 2024 23:30:56 -0500
Subject: [PATCH 4/7] Add warning message for non-scalar DataFrame entries

---
 strax/context.py   |  2 +-
 strax/utils.py     | 20 +++++++++++++++++---
 tests/test_core.py |  5 +++--
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/strax/context.py b/strax/context.py
index e83c0d9e..193353e8 100644
--- a/strax/context.py
+++ b/strax/context.py
@@ -1909,7 +1909,7 @@ def get_df(
         """
         df = self.get_array(run_id, targets, save=save, max_workers=max_workers, **kwargs)
 
-        return strax.convert_structured_array_to_df(df)
+        return strax.convert_structured_array_to_df(df, log=self.log)
 
     def get_zarr(
         self,
diff --git a/strax/utils.py b/strax/utils.py
index 01af8667..df13d9bf 100644
--- a/strax/utils.py
+++ b/strax/utils.py
@@ -807,9 +807,9 @@ def convert_tuple_to_list(init_func_input):
 
 
 @export
-def convert_structured_array_to_df(structured_array):
-    """Convert a structured numpy array to a pandas DataFrame.
-
+def convert_structured_array_to_df(structured_array, log=None):
+    """
+    Convert a structured numpy array to a pandas DataFrame.
     Parameters:
     structured_array (numpy.ndarray): The structured array to be converted.
     Returns:
@@ -817,12 +817,26 @@ def convert_structured_array_to_df(structured_array):
 
     """
 
+    if log is None:
+        import logging
+
+        log = logging.getLogger("strax_array_to_df")
+    
     data_dict = {}
+    converted_cols = []
     for name in structured_array.dtype.names:
         col = structured_array[name]
         if col.ndim > 1:
             # Convert n-dimensional columns to lists of ndarrays
             data_dict[name] = [np.array(row) for row in col]
+            converted_cols.append(name)
         else:
             data_dict[name] = col
+
+    if converted_cols:
+        log.warning(
+            f"Columns {converted_cols} contain non-scalar entries. "
+            "Some pandas functions (e.g., groupby, apply) might not perform as expected on these columns."
+        )
+
     return pd.DataFrame(data_dict)
diff --git a/tests/test_core.py b/tests/test_core.py
index 62d14c9b..c09cb907 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -33,8 +33,8 @@ def test_core(allow_multiprocess, max_workers, processor):
 
 
 @processing_conditions
-def test_core_df(allow_multiprocess, max_workers, processor):
-    """Test that get_df works with N-dimensional data."""
+def test_core_df(allow_multiprocess, max_workers, processor, caplog):
+    """Test that get_df works with N-dimensional data"""
     mystrax = strax.Context(
         storage=[],
         register=[Records, Peaks],
@@ -47,6 +47,7 @@ def test_core_df(allow_multiprocess, max_workers, processor):
     p = mystrax.get_single_plugin(run_id, "records")
     assert len(df.loc[0, "data"]) == 200
     assert len(df) == p.config["recs_per_chunk"] * p.config["n_chunks"]
+    assert "contain non-scalar entries. Some pandas functions (e.g., groupby, apply) might not perform as expected on these columns." in caplog.text
 
 
 def test_post_office_state():

From 9d06ad11ec3efda9aa05d72abe1492533286e8fd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 20 Sep 2024 04:40:23 +0000
Subject: [PATCH 5/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 strax/utils.py     | 6 +++---
 tests/test_core.py | 7 +++++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/strax/utils.py b/strax/utils.py
index df13d9bf..49971807 100644
--- a/strax/utils.py
+++ b/strax/utils.py
@@ -808,8 +808,8 @@ def convert_tuple_to_list(init_func_input):
 
 @export
 def convert_structured_array_to_df(structured_array, log=None):
-    """
-    Convert a structured numpy array to a pandas DataFrame.
+    """Convert a structured numpy array to a pandas DataFrame.
+
     Parameters:
     structured_array (numpy.ndarray): The structured array to be converted.
     Returns:
@@ -821,7 +821,7 @@ def convert_structured_array_to_df(structured_array, log=None):
         import logging
 
         log = logging.getLogger("strax_array_to_df")
-    
+
     data_dict = {}
     converted_cols = []
     for name in structured_array.dtype.names:
diff --git a/tests/test_core.py b/tests/test_core.py
index c09cb907..3ce990b3 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -34,7 +34,7 @@ def test_core(allow_multiprocess, max_workers, processor):
 
 @processing_conditions
 def test_core_df(allow_multiprocess, max_workers, processor, caplog):
-    """Test that get_df works with N-dimensional data"""
+    """Test that get_df works with N-dimensional data."""
     mystrax = strax.Context(
         storage=[],
         register=[Records, Peaks],
@@ -47,7 +47,10 @@ def test_core_df(allow_multiprocess, max_workers, processor, caplog):
     p = mystrax.get_single_plugin(run_id, "records")
     assert len(df.loc[0, "data"]) == 200
     assert len(df) == p.config["recs_per_chunk"] * p.config["n_chunks"]
-    assert "contain non-scalar entries. Some pandas functions (e.g., groupby, apply) might not perform as expected on these columns." in caplog.text
+    assert (
+        "contain non-scalar entries. Some pandas functions (e.g., groupby, apply) might not perform as expected on these columns."
+        in caplog.text
+    )
 
 
 def test_post_office_state():

From b7c981a9ca4e0feaf6ac7a3d1752cde3706a8548 Mon Sep 17 00:00:00 2001
From: Lorenzo Principe <28869147+lorenzomag@users.noreply.github.com>
Date: Fri, 20 Sep 2024 00:19:47 -0500
Subject: [PATCH 6/7] Reduce line length

---
 strax/utils.py     | 3 ++-
 tests/test_core.py | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/strax/utils.py b/strax/utils.py
index 49971807..430c0e48 100644
--- a/strax/utils.py
+++ b/strax/utils.py
@@ -836,7 +836,8 @@ def convert_structured_array_to_df(structured_array, log=None):
     if converted_cols:
         log.warning(
             f"Columns {converted_cols} contain non-scalar entries. "
-            "Some pandas functions (e.g., groupby, apply) might not perform as expected on these columns."
+            "Some pandas functions (e.g., groupby, apply) might "
+            "not perform as expected on these columns."
         )
 
     return pd.DataFrame(data_dict)
diff --git a/tests/test_core.py b/tests/test_core.py
index 3ce990b3..9590a8b9 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -34,6 +34,7 @@ def test_core(allow_multiprocess, max_workers, processor):
 
 @processing_conditions
 def test_core_df(allow_multiprocess, max_workers, processor, caplog):
+    """Test that get_df works with N-dimensional data."""
     """Test that get_df works with N-dimensional data."""
     mystrax = strax.Context(
         storage=[],
@@ -48,7 +49,8 @@ def test_core_df(allow_multiprocess, max_workers, processor, caplog):
     assert len(df.loc[0, "data"]) == 200
     assert len(df) == p.config["recs_per_chunk"] * p.config["n_chunks"]
     assert (
-        "contain non-scalar entries. Some pandas functions (e.g., groupby, apply) might not perform as expected on these columns."
+        "contain non-scalar entries. Some pandas functions (e.g., groupby, apply)"
+        " might not perform as expected on these columns." 
         in caplog.text
     )
 

From 471b2357c6441601d0e43b93fe26992e038f2d2e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 20 Sep 2024 05:22:22 +0000
Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/test_core.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 9590a8b9..fbee3bc4 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -50,8 +50,7 @@ def test_core_df(allow_multiprocess, max_workers, processor, caplog):
     assert len(df) == p.config["recs_per_chunk"] * p.config["n_chunks"]
     assert (
         "contain non-scalar entries. Some pandas functions (e.g., groupby, apply)"
-        " might not perform as expected on these columns." 
-        in caplog.text
+        " might not perform as expected on these columns." in caplog.text
     )