Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDF5Base can identify empty objects #330

Merged
merged 8 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions tdms/include/hdf5_io/hdf5_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,22 @@ class HDF5Base {
* @return false Otherwise.
*/
bool is_ok() const;

/**
* @brief Returns true if the object under /object_path is flagged as
* MATLAB_empty.
* @details Naturally, MATLAB does not save empty arrays or structs as objects
* with no elements or size, instead it saves them as 2-by-1 arrays with 0's
* populating the data. This means that a simple comparison against the
* number of elements or members does not provide the correct information when
* attempting to determine whether an empty input has been passed.
*
* MATLAB _does_ however attach an attribute to any object that it marks as
* empty, MATLAB_empty. This uint8 is set to 1 if the object is indeed an
* empty array, so _that_ is what we will use to distinguish.
* @param object_path Path to the object under file root; /object_path.
* @return true The object is flagged as being the empty MATLAB array.
* @return false Otherwise.
willGraham01 marked this conversation as resolved.
Show resolved Hide resolved
*/
bool flagged_MATLAB_empty(const std::string &object_path) const;
};
53 changes: 52 additions & 1 deletion tdms/src/hdf5_io/hdf5_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "hdf5_io/hdf5_base.h"

#include <iostream>
#include <stdexcept>

#include <H5Cpp.h>
#include <H5public.h>
Expand Down Expand Up @@ -49,8 +50,58 @@ H5Dimension HDF5Base::shape_of(const std::string &group_name,
}

bool HDF5Base::is_ok() const {
// TODO: check for file health might be unnessicary given we've constructed
// TODO: check for file health might be unnecessary given we've constructed
// the object.
return file_->isHdf5(filename_);
// return file_->isAccessible(filename_) && file_->isHdf5(filename_);
}

bool HDF5Base::flagged_MATLAB_empty(const std::string &object_path) const {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe is_flagged_as_MATLAB_empty(object_path)?
I don't insist though.

// Can't check anything if there's no file
if (file_ == nullptr) { throw std::runtime_error("No file opened"); }
willGraham01 marked this conversation as resolved.
Show resolved Hide resolved

// This will point to the MATLAB_empty attribute
H5::Attribute empty_attribute;

// Attempt to fetch the object requested
if (!file_->exists(object_path)) {
throw std::runtime_error(filename_ + " has no object " + object_path);
}
hid_t object_reference = file_->getObjId(object_path);

// The object could be a group or a dataset, so we need to account for this
H5I_type_t object_type = file_->getHDFObjType(object_reference);
if (object_type == H5I_GROUP) {
// Dealing with a group
H5::Group object = file_->openGroup(object_path);
willGraham01 marked this conversation as resolved.
Show resolved Hide resolved
if (object.attrExists("MATLAB_empty")) {
empty_attribute = object.openAttribute("MATLAB_empty");
} else {
// Object is not flagged as being empty
return false;
}
object.close();
} else if (object_type == H5I_DATASET) {
// Dealing with a dataset
H5::DataSet object = file_->openDataSet(object_path);
if (object.attrExists("MATLAB_empty")) {
empty_attribute = object.openAttribute("MATLAB_empty");
} else {
// Object is not flagged as being empty
return false;
}
object.close();
} else {
// No other objects should be the result of MATLAB saving an empty object,
// so throw error
throw std::runtime_error(object_path + " is not a Group or a DataSet");
}

// Having extracted the dataset, attempt to read the attribute value
uint8_t empty_bool[1];
empty_attribute.read(H5::PredType::NATIVE_UINT8, empty_bool);
empty_attribute.close();

// And finally return whether this is flagged as true
return empty_bool[0] == 1;
}
23 changes: 20 additions & 3 deletions tdms/tests/unit/benchmark_scripts/create_hdf5_test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@ def create_hdf5_test_file() -> None:
os.mkdir(os.path.dirname(FNAME_TO_CREATE))
file = h5py.File(FNAME_TO_CREATE, "w")

# Create a group under root
read_in_test = file.require_group("read_in_test")

# Create test data to read in
consecutive_numbers = np.arange(0, 12, dtype=float)

# Create a group under root
read_in_test = file.require_group("read_in_test")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This, however, does need a comment because it's weird to me that require_group creates a group.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#libhdf5-world-problems

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's h5py for you 😅

# Populate group with test data
read_in_test.create_dataset(
"vector_int", data=consecutive_numbers, shape=(12,), dtype=int
Expand All @@ -33,6 +32,24 @@ def create_hdf5_test_file() -> None:
"tensor_double", data=consecutive_numbers, shape=(2, 3, 2), dtype=float
)

# Create & populate the group that mimics MATLAB empty arrays
# Deliberately include some data here to stress that emptiness is based off the presence of an attribute
flag_as_empty = file.require_group("flag_as_empty")
flag_as_empty.attrs["MATLAB_empty"] = np.array(1, dtype=np.uint8)
flag_as_empty.create_dataset(
"consecutive_numbers", data=consecutive_numbers[0:3], shape=(3,), dtype=float
)

# Create a top-level dataset that will be empty, and give it the MATLAB_empty attribute.
# But set this to be false, to emphasise dependence on this value.
not_marked_empty = file.create_dataset(
"not_marked_empty", data=consecutive_numbers[0:0], shape=(0,), dtype=float
)
not_marked_empty.attrs["MATLAB_empty"] = np.array(0, dtype=np.uint8)

# Create an attribute at the root of the file for debugging and testing purposes
file.attrs["file_attribute"] = 1

file.close()
return

Expand Down
7 changes: 7 additions & 0 deletions tdms/tests/unit/benchmark_scripts/create_structure_array.m
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@
read_in_test.matrix = reshape(0:11, 2, 6);
read_in_test.tensor = reshape(0:11, 2, 3, 2);

%% The two forms of the empty struct / matrix that we might come across
empty_struct = struct([]);
empty_array = [];

%% A non-empty standalone array that we can attempt to read from
nonempty_dataset = 0:4;

%% save variables to the file we need
% Save the files to the expected filename for the unit tests to read the
% data back in.
Expand Down
Binary file not shown.
61 changes: 61 additions & 0 deletions tdms/tests/unit/hdf5_io_tests/test_hdf5_base.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#include <catch2/catch_test_macros.hpp>
#include <spdlog/spdlog.h>

#include "hdf5_io/hdf5_reader.h"
#include "unit_test_utils.h"

using tdms_unit_test_data::hdf5_test_file;
using tdms_unit_test_data::struct_testdata;

// HDF5Base cannot be instantiated, so we use HDF5Reader just to avoid
// accidental writes.
TEST_CASE("HDF5Base: flagged_MATLAB_empty()") {

HDF5Reader matlab_file(struct_testdata);
HDF5Reader hdf5_file(hdf5_test_file);

SECTION("Point to empty objects") {
spdlog::info("HDF5Base::flagged_MATLAB_empty() [Empty objects]");
// Point to empty dataset (array)
REQUIRE(matlab_file.flagged_MATLAB_empty("empty_array"));
// Point to empty group (struct)
REQUIRE(matlab_file.flagged_MATLAB_empty("empty_struct"));
}

SECTION("Point to non-empty objects") {
spdlog::info("HDF5Base::flagged_MATLAB_empty() [Non-empty objects]");
// Point to nonempty dataset (array)
REQUIRE(!matlab_file.flagged_MATLAB_empty("nonempty_dataset"));
// Point to nonempty group (struct)
REQUIRE(!matlab_file.flagged_MATLAB_empty("example_struct"));
// Point to a nonempty dataset within a group
REQUIRE(!matlab_file.flagged_MATLAB_empty("read_in_test/vector"));
// Point to nonempty group not created by MATLAB (so the attribute should
// not be flagged)
REQUIRE(!hdf5_file.flagged_MATLAB_empty("read_in_test"));
}

SECTION("Exception cases") {
spdlog::info("HDF5Base::flagged_MATLAB_empty() [Exceptions]");
// Object not present in the file
REQUIRE_THROWS_AS(
matlab_file.flagged_MATLAB_empty("no_object_with_this_name"),
std::runtime_error);
REQUIRE_THROWS_AS(
hdf5_file.flagged_MATLAB_empty("no_object_with_this_name"),
std::runtime_error);

// Object is not a group or dataset
REQUIRE_THROWS_AS(hdf5_file.flagged_MATLAB_empty("file_attribute"),
std::runtime_error);
}

SECTION("False positives") {
spdlog::info("HDF5Base::flagged_MATLAB_empty() [False positives]");
// Object has an attribute MATLAB_empty, but still contains data
REQUIRE(hdf5_file.flagged_MATLAB_empty("flag_as_empty"));
// Object is an empty array, has the MATLAB_empty flag, but this is set to
// false and thus it should come back as not being empty
REQUIRE(!hdf5_file.flagged_MATLAB_empty("not_marked_empty"));
}
}