From dbe14a8f860f2aecfdd34bce802cde03db6be000 Mon Sep 17 00:00:00 2001 From: Lili Karashchuk Date: Fri, 20 Oct 2023 10:38:25 -0700 Subject: [PATCH] Make the hdf5 videos store as int8 format (#1559) * make the hdf5 video dataset type as proper int8 by padding with zeros * add gzip compression --- AUTHORS | 2 ++ sleap/io/video.py | 20 +++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/AUTHORS b/AUTHORS index e6a78d2ba..11e40e839 100644 --- a/AUTHORS +++ b/AUTHORS @@ -11,3 +11,5 @@ John Smith Example Inc. Jeremy Delahanty The Salk Institute for Biological Studies + +Lili Karashchuk Allen Institute of Neural Dynamics diff --git a/sleap/io/video.py b/sleap/io/video.py index 4953d2f69..39de1972f 100644 --- a/sleap/io/video.py +++ b/sleap/io/video.py @@ -1443,19 +1443,29 @@ def to_hdf5( def encode(img): _, encoded = cv2.imencode("." + format, img) - return np.squeeze(encoded) + return np.squeeze(encoded).astype("int8") + + # pad with zeroes to guarantee int8 type in hdf5 file + frames = [] + for i in range(len(frame_numbers)): + frames.append(encode(frame_data[i])) + + max_frame_size = max([len(x) for x in frames]) - dtype = h5.special_dtype(vlen=np.dtype("int8")) dset = f.create_dataset( - dataset + "/video", (len(frame_numbers),), dtype=dtype + dataset + "/video", + (len(frame_numbers), max_frame_size), + dtype="int8", + compression="gzip", ) dset.attrs["format"] = format dset.attrs["channels"] = self.channels dset.attrs["height"] = self.height dset.attrs["width"] = self.width - for i in range(len(frame_numbers)): - dset[i] = encode(frame_data[i]) + for i, frame in enumerate(frames): + dset[i, 0 : len(frame)] = frame + else: f.create_dataset( dataset + "/video",