From a03fe06ebfaf5fe942c342145d35cfe64a16d2f8 Mon Sep 17 00:00:00 2001 From: Stuart Read Date: Thu, 7 Mar 2024 14:15:24 -0600 Subject: [PATCH] Fix NaN handling for strings (ref #6670) In fixing this, switched string handling from fixed-length to variable length https://docs.h5py.org/en/stable/special.html#variable-length-strings --- Orange/data/io.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Orange/data/io.py b/Orange/data/io.py index 4c3de870043..8de8a08031d 100644 --- a/Orange/data/io.py +++ b/Orange/data/io.py @@ -21,6 +21,7 @@ import h5py import numpy as np +import pandas as pd import xlrd import xlsxwriter @@ -601,6 +602,8 @@ def parse(attr): f.create_dataset("Y", data=data.Y) if data.metas.size: for i, attr in enumerate(data.domain.metas): - col_type = 'S' if isinstance(attr, StringVariable) else 'f' + col_type = h5py.string_dtype() if isinstance(attr, StringVariable) else 'f' col_data = data.metas[:, [i]].astype(col_type) - f.create_dataset(f'metas/{i}', data=col_data) + if col_type is not 'f': + col_data[pd.isnull(col_data)] = "" + f.create_dataset(f'metas/{i}', data=col_data, dtype=col_type)