simw · simw · Nov 14, 2023 · Nov 14, 2023
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "pipedata"
-version = "0.2.1"
+version = "0.2.2"
 description = "Framework for building pipelines for data processing"
 authors = ["Simon Wicks <[email protected]>"]
 readme = "README.md"

diff --git a/src/pipedata/__init__.py b/src/pipedata/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.2.1"
+__version__ = "0.2.2"
 
 __all__ = [
     "__version__",

diff --git a/src/pipedata/ops/files.py b/src/pipedata/ops/files.py
@@ -64,7 +64,12 @@ def parquet_batch_reader(
         for file_ref in file_refs:
             logger.info(f"Reading parquet file {file_ref}")
             ds = pa_dataset.dataset(file_ref, format="parquet")
-            for batch in ds.to_batches(columns=columns, batch_size=batch_size):
+            for i, batch in enumerate(
+                ds.to_batches(columns=columns, batch_size=batch_size)
+            ):
+                logger.info(
+                    f"Processing batch {i} (length {len(batch)}) from {file_ref}"
+                )
                 if return_as == "recordbatch":
                     yield batch
                 elif return_as == "record":