From d3a8fc00090c1ee6d74ee01012091cf8f4b43731 Mon Sep 17 00:00:00 2001 From: Tobias Sargeant Date: Wed, 28 Aug 2024 16:18:32 +0100 Subject: [PATCH 1/3] Apply normalization consistently in VLenBytes None and 0 are treated like a 0 length string when computing lengths, and the same normalization should be applied to the value passed to PyBytes_AS_STRING. If this is not done, an assertion is hit in the python runtime (when compiled in debug mode). --- numcodecs/vlen.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/numcodecs/vlen.pyx b/numcodecs/vlen.pyx index e1e149ee..9c1223fe 100644 --- a/numcodecs/vlen.pyx +++ b/numcodecs/vlen.pyx @@ -250,7 +250,10 @@ class VLenBytes(Codec): l = lengths[i] store_le32(data, l) data += 4 - encv = PyBytes_AS_STRING(values[i]) + b = values[i] + if b is None or b == 0: # treat these as missing value, normalize + b = b'' + encv = PyBytes_AS_STRING(b) memcpy(data, encv, l) data += l From 9efe051b457dcd0af670c3923d3f3d0fa678b32c Mon Sep 17 00:00:00 2001 From: Tobias Sargeant Date: Thu, 29 Aug 2024 08:37:01 +0100 Subject: [PATCH 2/3] Alternative: skip bytes to string conversion and memcpy if zero length. --- numcodecs/vlen.pyx | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/numcodecs/vlen.pyx b/numcodecs/vlen.pyx index 9c1223fe..64fe0c11 100644 --- a/numcodecs/vlen.pyx +++ b/numcodecs/vlen.pyx @@ -250,12 +250,10 @@ class VLenBytes(Codec): l = lengths[i] store_le32(data, l) data += 4 - b = values[i] - if b is None or b == 0: # treat these as missing value, normalize - b = b'' - encv = PyBytes_AS_STRING(b) - memcpy(data, encv, l) - data += l + if l > 0: + encv = PyBytes_AS_STRING(b) + memcpy(data, encv, l) + data += l return out From 19cf050d839b7644633bfb0e39a082eba2d911bb Mon Sep 17 00:00:00 2001 From: Tobias Sargeant Date: Thu, 29 Aug 2024 14:26:45 +0100 Subject: [PATCH 3/3] Update vlen.pyx --- numcodecs/vlen.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numcodecs/vlen.pyx b/numcodecs/vlen.pyx index 64fe0c11..cf01a4fb 100644 --- a/numcodecs/vlen.pyx +++ b/numcodecs/vlen.pyx @@ -251,7 +251,7 @@ class VLenBytes(Codec): store_le32(data, l) data += 4 if l > 0: - encv = PyBytes_AS_STRING(b) + encv = PyBytes_AS_STRING(values[i]) memcpy(data, encv, l) data += l