From 9e4ba604c59447815e985d73c5cdd9ed0c228a3b Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 10 Nov 2021 18:06:29 -0800 Subject: [PATCH 01/19] Add first at reading sidecar modifications --- src/hdmf/backends/hdf5/h5tools.py | 62 ++++++++++- tests/unit/io_tests/__init__.py | 0 tests/unit/io_tests/test_sidecar.py | 162 ++++++++++++++++++++++++++++ 3 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 tests/unit/io_tests/__init__.py create mode 100644 tests/unit/io_tests/test_sidecar.py diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 9567a8054..d7b9a31c4 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1,3 +1,4 @@ +import json import logging import os.path import warnings @@ -18,7 +19,8 @@ from ...container import Container from ...data_utils import AbstractDataChunkIterator from ...spec import RefSpec, DtypeSpec, NamespaceCatalog, GroupSpec, NamespaceBuilder -from ...utils import docval, getargs, popargs, call_docval_func, get_data_shape, fmt_docval_args, get_docval, StrDataset +from ...utils import (docval, getargs, popargs, call_docval_func, get_data_shape, fmt_docval_args, get_docval, + StrDataset, LabelledDict) ROOT_NAME = 'root' SPEC_LOC_ATTR = '.specloc' @@ -514,6 +516,7 @@ def read_builder(self): if f_builder is None: f_builder = self.__read_group(self.__file, ROOT_NAME, ignore=ignore) self.__read[self.__file] = f_builder + self.update_builder_from_sidecar(f_builder) return f_builder def __set_written(self, builder): @@ -1549,3 +1552,60 @@ def set_dataio(cls, **kwargs): """ cargs, ckwargs = fmt_docval_args(H5DataIO.__init__, kwargs) return H5DataIO(*cargs, **ckwargs) + + @docval( + {'name': 'f_builder', 'type': GroupBuilder, 'doc': 'A GroupBuilder representing the main file object.'}, + returns='The same input GroupBuilder, now modified.', + rtype='GroupBuilder' + ) + def update_builder_from_sidecar(self, **kwargs): + # in-place update of the builder + # the sidecar json will have the same name as the file but have suffix .json + f_builder = getargs('f_builder', kwargs) + sidecar_filename = Path(self.__file.filename).with_suffix('.json') + f = open(sidecar_filename, 'r') + versions = json.load(f)['versions'] + + builder_map = self.__get_object_id_map(f_builder) + for version_dict in versions: + for change_dict in version_dict.get('changes'): + object_id = change_dict['object_id'] + relative_path = change_dict.get('relative_path') + new_value = change_dict['new_value'] + + builder = builder_map[object_id] + if relative_path in builder.attributes: + # TODO handle different dtypes + builder.attributes[relative_path] = new_value + elif isinstance(builder, GroupBuilder): + obj = builder.get(relative_path) + if isinstance(obj, DatasetBuilder): # update data in sub-DatasetBuilder + self.__update_dataset_builder(obj, new_value) + else: + raise ValueError("Relative path '%s' not recognized as a dataset or attribute") + else: # DatasetBuilder has object_id + if not relative_path: # update data + self.__update_dataset_builder(builder, new_value) + else: + raise ValueError("Relative path '%s' not recognized as None or attribute") + # TODO handle compound dtypes + + return f_builder + + def __update_dataset_builder(self, dset_builder, value): + # TODO handle different dtypes + dset_builder['data'] = value + + def __get_object_id_map(self, builder): + stack = [builder] + ret = dict() + while len(stack): + b = stack.pop() + if 'object_id' in b.attributes: + ret[b.attributes['object_id']] = b + if isinstance(b, GroupBuilder): + for g in b.groups.values(): + stack.append(g) + for d in b.datasets.values(): + stack.append(d) + return ret diff --git a/tests/unit/io_tests/__init__.py b/tests/unit/io_tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/io_tests/test_sidecar.py b/tests/unit/io_tests/test_sidecar.py new file mode 100644 index 000000000..2a9d2deae --- /dev/null +++ b/tests/unit/io_tests/test_sidecar.py @@ -0,0 +1,162 @@ +import json +import os + +from hdmf import Container +from hdmf.backends.hdf5.h5tools import HDF5IO +from hdmf.build import BuildManager, TypeMap, ObjectMapper +from hdmf.spec import AttributeSpec, DatasetSpec, GroupSpec, SpecCatalog, SpecNamespace, NamespaceCatalog +from hdmf.testing import TestCase +from hdmf.utils import getargs, docval + + +class TestBasic(TestCase): + + def setUp(self): + self.h5_path = "./tests/unit/io_tests/test_sidecar.h5" + foo2 = Foo('sub_foo', [-1, -2, -3], 'OLD', [-17]) + foo1 = Foo('foo1', [1, 2, 3], 'old', [17], foo2) + with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io: + io.write(foo1) + + version2 = { + "label": "version 2", + "description": "change attr1 from 'old' to 'my experiment' and my_data from [1, 2, 3] to [4, 5, 6, 7]", + "changes": [ + { + "object_id": foo1.object_id, + "relative_path": "attr1", + "new_value": "my experiment" + }, + { + "object_id": foo1.object_id, + "relative_path": "my_data", + "new_value": [4, 5, 6, 7] + } + ] + } + + version3 = { + "label": "version 3", + "description": "change sub_foo/my_data from [-1, -2, -3] to [[0]]", + "changes": [ + { + "object_id": foo2.object_id, + "relative_path": "my_data", + "new_value": [[0]] + } + ] + } + + sidecar = dict() + sidecar["versions"] = [version2, version3] + + self.json_path = "./tests/unit/io_tests/test_sidecar.json" + with open(self.json_path, 'w') as outfile: + json.dump(sidecar, outfile) + + def tearDown(self): + if os.path.exists(self.h5_path): + os.remove(self.h5_path) + if os.path.exists(self.json_path): + os.remove(self.json_path) + + def test_update_builder(self): + io = HDF5IO(self.h5_path, 'r', manager=_get_manager()) + foo1 = io.read() + assert foo1.attr1 == "my experiment" + assert foo1.my_data == [4, 5, 6, 7] + assert foo1.sub_foo.my_data == [[0]] + + +class Foo(Container): + + @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, + {'name': 'my_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer dataset'}, + {'name': 'attr1', 'type': str, 'doc': 'a string attribute'}, + {'name': 'attr2', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute'}, + {'name': 'sub_foo', 'type': 'Foo', 'doc': 'a child Foo', 'default': None}) + def __init__(self, **kwargs): + name, my_data, attr1, attr2, sub_foo = getargs('name', 'my_data', 'attr1', 'attr2', 'sub_foo', kwargs) + super().__init__(name=name) + self.__data = my_data + self.__attr1 = attr1 + self.__attr2 = attr2 + self.__sub_foo = sub_foo + if sub_foo is not None: + assert sub_foo.name == 'sub_foo' # on read mapping will not work otherwise + self.__sub_foo.parent = self + + @property + def my_data(self): + return self.__data + + @property + def attr1(self): + return self.__attr1 + + @property + def attr2(self): + return self.__attr2 + + @property + def sub_foo(self): + return self.__sub_foo + + +def _get_manager(): + foo_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Foo', + groups=[ + GroupSpec( + doc='a child Foo', + data_type_inc='Foo', + name='sub_foo', + quantity='?', + ) + ], + datasets=[ + DatasetSpec( + doc='a 1-D integer dataset', + dtype='int', + name='my_data', + shape=[None, ], + attributes=[ + AttributeSpec( + name='attr2', + doc='a 1-D integer attribute', + dtype='int', + shape=[None, ], + ) + ] + ) + ], + attributes=[ + AttributeSpec(name='attr1', doc='a string attribute', dtype='text'), + ] + ) + + class FooMapper(ObjectMapper): + """Remap 'attr2' attribute on Foo container to 'my_data' dataset spec > 'attr2' attribute spec.""" + def __init__(self, spec): + super().__init__(spec) + my_data_spec = spec.get_dataset('my_data') + self.map_spec('attr2', my_data_spec.get_attribute('attr2')) + + spec_catalog = SpecCatalog() + spec_catalog.register_spec(foo_spec, 'test.yaml') + namespace_name = 'test_core' + namespace = SpecNamespace( + doc='a test namespace', + name=namespace_name, + schema=[{'source': 'test.yaml'}], + version='0.1.0', + catalog=spec_catalog + ) + namespace_catalog = NamespaceCatalog() + namespace_catalog.add_namespace(namespace_name, namespace) + type_map = TypeMap(namespace_catalog) + type_map.register_container_type(namespace_name, 'Foo', Foo) + type_map.register_map(Foo, FooMapper) + manager = BuildManager(type_map) + return manager From 1f539190aea84053565f3f60793e1bb37af2c18e Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 10 Nov 2021 18:33:23 -0800 Subject: [PATCH 02/19] Pretty-print json --- tests/unit/io_tests/test_sidecar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/io_tests/test_sidecar.py b/tests/unit/io_tests/test_sidecar.py index 2a9d2deae..a3421c678 100644 --- a/tests/unit/io_tests/test_sidecar.py +++ b/tests/unit/io_tests/test_sidecar.py @@ -52,7 +52,7 @@ def setUp(self): self.json_path = "./tests/unit/io_tests/test_sidecar.json" with open(self.json_path, 'w') as outfile: - json.dump(sidecar, outfile) + json.dump(sidecar, outfile, indent=4) def tearDown(self): if os.path.exists(self.h5_path): From dafc650c4ecaeea3847d50033a08a8a94de0c953 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 10 Nov 2021 18:43:11 -0800 Subject: [PATCH 03/19] Update to work if json is not present --- src/hdmf/backends/hdf5/h5tools.py | 58 ++++++++++++++++--------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index d7b9a31c4..5923f848a 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -19,8 +19,7 @@ from ...container import Container from ...data_utils import AbstractDataChunkIterator from ...spec import RefSpec, DtypeSpec, NamespaceCatalog, GroupSpec, NamespaceBuilder -from ...utils import (docval, getargs, popargs, call_docval_func, get_data_shape, fmt_docval_args, get_docval, - StrDataset, LabelledDict) +from ...utils import docval, getargs, popargs, call_docval_func, get_data_shape, fmt_docval_args, get_docval, StrDataset ROOT_NAME = 'root' SPEC_LOC_ATTR = '.specloc' @@ -1562,32 +1561,35 @@ def update_builder_from_sidecar(self, **kwargs): # in-place update of the builder # the sidecar json will have the same name as the file but have suffix .json f_builder = getargs('f_builder', kwargs) - sidecar_filename = Path(self.__file.filename).with_suffix('.json') - f = open(sidecar_filename, 'r') - versions = json.load(f)['versions'] - - builder_map = self.__get_object_id_map(f_builder) - for version_dict in versions: - for change_dict in version_dict.get('changes'): - object_id = change_dict['object_id'] - relative_path = change_dict.get('relative_path') - new_value = change_dict['new_value'] - - builder = builder_map[object_id] - if relative_path in builder.attributes: - # TODO handle different dtypes - builder.attributes[relative_path] = new_value - elif isinstance(builder, GroupBuilder): - obj = builder.get(relative_path) - if isinstance(obj, DatasetBuilder): # update data in sub-DatasetBuilder - self.__update_dataset_builder(obj, new_value) - else: - raise ValueError("Relative path '%s' not recognized as a dataset or attribute") - else: # DatasetBuilder has object_id - if not relative_path: # update data - self.__update_dataset_builder(builder, new_value) - else: - raise ValueError("Relative path '%s' not recognized as None or attribute") + sidecar_path = Path(self.__file.filename).with_suffix('.json') + if not sidecar_path.is_file(): + return + + with open(sidecar_path, 'r') as f: + versions = json.load(f)['versions'] + + builder_map = self.__get_object_id_map(f_builder) + for version_dict in versions: + for change_dict in version_dict.get('changes'): + object_id = change_dict['object_id'] + relative_path = change_dict.get('relative_path') + new_value = change_dict['new_value'] + + builder = builder_map[object_id] + if relative_path in builder.attributes: + # TODO handle different dtypes + builder.attributes[relative_path] = new_value + elif isinstance(builder, GroupBuilder): + obj = builder.get(relative_path) + if isinstance(obj, DatasetBuilder): # update data in sub-DatasetBuilder + self.__update_dataset_builder(obj, new_value) + else: + raise ValueError("Relative path '%s' not recognized as a dataset or attribute") + else: # DatasetBuilder has object_id + if not relative_path: # update data + self.__update_dataset_builder(builder, new_value) + else: + raise ValueError("Relative path '%s' not recognized as None or attribute") # TODO handle compound dtypes return f_builder From de5fefe9c1b11e9e6f6101ab9cdf0c635589abeb Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 11 Nov 2021 10:13:03 -0800 Subject: [PATCH 04/19] Refactor BuilderUpdater functionality to sep class --- src/hdmf/backends/builderupdater.py | 73 +++++++++++++++++++++++++++++ src/hdmf/backends/hdf5/h5tools.py | 62 ------------------------ src/hdmf/backends/io.py | 2 + 3 files changed, 75 insertions(+), 62 deletions(-) create mode 100644 src/hdmf/backends/builderupdater.py diff --git a/src/hdmf/backends/builderupdater.py b/src/hdmf/backends/builderupdater.py new file mode 100644 index 000000000..051e0e5ce --- /dev/null +++ b/src/hdmf/backends/builderupdater.py @@ -0,0 +1,73 @@ +import json +from pathlib import Path + +from ..build import GroupBuilder, DatasetBuilder +from ..utils import docval, getargs + + +class BuilderUpdater: + + @classmethod + @docval( + {'name': 'file_builder', 'type': GroupBuilder, 'doc': 'A GroupBuilder representing the main file object.'}, + {'name': 'file_path', 'type': str, + 'doc': 'Path to the data file. The sidecar file is assumed to have the same base name but have suffix .json.'}, + returns='The same input GroupBuilder, now modified.', + rtype='GroupBuilder' + ) + def update_from_sidecar_json(cls, **kwargs): + # in-place update of the builder + # the sidecar json will have the same name as the file but have suffix .json + f_builder, path = getargs('file_builder', 'file_path', kwargs) + sidecar_path = Path(path).with_suffix('.json') + if not sidecar_path.is_file(): + return + + with open(sidecar_path, 'r') as f: + versions = json.load(f)['versions'] + + builder_map = cls.__get_object_id_map(f_builder) + for version_dict in versions: + for change_dict in version_dict.get('changes'): + object_id = change_dict['object_id'] + relative_path = change_dict.get('relative_path') + new_value = change_dict['new_value'] + + builder = builder_map[object_id] + if relative_path in builder.attributes: + # TODO handle different dtypes + builder.attributes[relative_path] = new_value + elif isinstance(builder, GroupBuilder): + obj = builder.get(relative_path) + if isinstance(obj, DatasetBuilder): # update data in sub-DatasetBuilder + cls.__update_dataset_builder(obj, new_value) + else: + raise ValueError("Relative path '%s' not recognized as a dataset or attribute") + else: # DatasetBuilder has object_id + if not relative_path: # update data + cls.__update_dataset_builder(builder, new_value) + else: + raise ValueError("Relative path '%s' not recognized as None or attribute") + # TODO handle compound dtypes + + return f_builder + + @classmethod + def __update_dataset_builder(cls, dset_builder, value): + # TODO handle different dtypes + dset_builder['data'] = value + + @classmethod + def __get_object_id_map(cls, builder): + stack = [builder] + ret = dict() + while len(stack): + b = stack.pop() + if 'object_id' in b.attributes: + ret[b.attributes['object_id']] = b + if isinstance(b, GroupBuilder): + for g in b.groups.values(): + stack.append(g) + for d in b.datasets.values(): + stack.append(d) + return ret diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 5923f848a..9567a8054 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1,4 +1,3 @@ -import json import logging import os.path import warnings @@ -515,7 +514,6 @@ def read_builder(self): if f_builder is None: f_builder = self.__read_group(self.__file, ROOT_NAME, ignore=ignore) self.__read[self.__file] = f_builder - self.update_builder_from_sidecar(f_builder) return f_builder def __set_written(self, builder): @@ -1551,63 +1549,3 @@ def set_dataio(cls, **kwargs): """ cargs, ckwargs = fmt_docval_args(H5DataIO.__init__, kwargs) return H5DataIO(*cargs, **ckwargs) - - @docval( - {'name': 'f_builder', 'type': GroupBuilder, 'doc': 'A GroupBuilder representing the main file object.'}, - returns='The same input GroupBuilder, now modified.', - rtype='GroupBuilder' - ) - def update_builder_from_sidecar(self, **kwargs): - # in-place update of the builder - # the sidecar json will have the same name as the file but have suffix .json - f_builder = getargs('f_builder', kwargs) - sidecar_path = Path(self.__file.filename).with_suffix('.json') - if not sidecar_path.is_file(): - return - - with open(sidecar_path, 'r') as f: - versions = json.load(f)['versions'] - - builder_map = self.__get_object_id_map(f_builder) - for version_dict in versions: - for change_dict in version_dict.get('changes'): - object_id = change_dict['object_id'] - relative_path = change_dict.get('relative_path') - new_value = change_dict['new_value'] - - builder = builder_map[object_id] - if relative_path in builder.attributes: - # TODO handle different dtypes - builder.attributes[relative_path] = new_value - elif isinstance(builder, GroupBuilder): - obj = builder.get(relative_path) - if isinstance(obj, DatasetBuilder): # update data in sub-DatasetBuilder - self.__update_dataset_builder(obj, new_value) - else: - raise ValueError("Relative path '%s' not recognized as a dataset or attribute") - else: # DatasetBuilder has object_id - if not relative_path: # update data - self.__update_dataset_builder(builder, new_value) - else: - raise ValueError("Relative path '%s' not recognized as None or attribute") - # TODO handle compound dtypes - - return f_builder - - def __update_dataset_builder(self, dset_builder, value): - # TODO handle different dtypes - dset_builder['data'] = value - - def __get_object_id_map(self, builder): - stack = [builder] - ret = dict() - while len(stack): - b = stack.pop() - if 'object_id' in b.attributes: - ret[b.attributes['object_id']] = b - if isinstance(b, GroupBuilder): - for g in b.groups.values(): - stack.append(g) - for d in b.datasets.values(): - stack.append(d) - return ret diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index d5fb6a889..a60891fd7 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -4,6 +4,7 @@ from ..build import BuildManager, GroupBuilder from ..container import Container from ..utils import docval, getargs, popargs +from .builderupdater import BuilderUpdater class HDMFIO(metaclass=ABCMeta): @@ -38,6 +39,7 @@ def read(self, **kwargs): if all(len(v) == 0 for v in f_builder.values()): # TODO also check that the keys are appropriate. print a better error message raise UnsupportedOperation('Cannot build data. There are no values.') + BuilderUpdater.update_from_sidecar_json(f_builder, self.__source) container = self.__manager.construct(f_builder) return container From 036fa1ef47f2da3a15d3d019773c2af0197e26d7 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 30 Nov 2021 12:23:14 -0800 Subject: [PATCH 05/19] Handle changing sub-dataset attr, add sidecar fields --- src/hdmf/backends/builderupdater.py | 28 +++++++++++++--------- src/hdmf/build/builders.py | 27 ++++++++++++++++++++++ tests/unit/io_tests/test_sidecar.py | 36 ++++++++++++++++++++++------- 3 files changed, 72 insertions(+), 19 deletions(-) diff --git a/src/hdmf/backends/builderupdater.py b/src/hdmf/backends/builderupdater.py index 051e0e5ce..ebb64515f 100644 --- a/src/hdmf/backends/builderupdater.py +++ b/src/hdmf/backends/builderupdater.py @@ -31,30 +31,36 @@ def update_from_sidecar_json(cls, **kwargs): for change_dict in version_dict.get('changes'): object_id = change_dict['object_id'] relative_path = change_dict.get('relative_path') - new_value = change_dict['new_value'] + new_value = change_dict['value'] builder = builder_map[object_id] + # TODO handle paths to links + # TODO handle object references if relative_path in builder.attributes: - # TODO handle different dtypes + # TODO handle different dtypes including compound dtypes builder.attributes[relative_path] = new_value - elif isinstance(builder, GroupBuilder): - obj = builder.get(relative_path) - if isinstance(obj, DatasetBuilder): # update data in sub-DatasetBuilder - cls.__update_dataset_builder(obj, new_value) - else: - raise ValueError("Relative path '%s' not recognized as a dataset or attribute") + elif isinstance(builder, GroupBuilder): # GroupBuilder has object_id + sub_dset_builder, attr_name = builder.get_subbuilder(relative_path) + if sub_dset_builder is None: + raise ValueError("Relative path '%s' not recognized as a dataset or attribute" + % relative_path) + if attr_name is None: # update data in sub-DatasetBuilder + cls.__update_dataset_builder(sub_dset_builder, new_value) + else: # update attribute + sub_dset_builder.attributes[attr_name] = new_value + else: # DatasetBuilder has object_id if not relative_path: # update data cls.__update_dataset_builder(builder, new_value) else: - raise ValueError("Relative path '%s' not recognized as None or attribute") - # TODO handle compound dtypes + raise ValueError("Relative path '%s' not recognized as None or attribute" % relative_path) return f_builder @classmethod def __update_dataset_builder(cls, dset_builder, value): - # TODO handle different dtypes + # TODO handle different dtypes including compound dtypes + # TODO consider replacing slices of a dataset or attribute dset_builder['data'] = value @classmethod diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index 72a1bbe4f..2274f9569 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -277,6 +277,33 @@ def __get_rec(self, key_ar): else: if key_ar[0] in self.groups: return self.groups[key_ar[0]].__get_rec(key_ar[1:]) + elif key_ar[0] in self.datasets: # "dset/x" must be an attribute on dset + assert len(key_ar) == 2 + return self.datasets[key_ar[0]].attributes[key_ar[1]] + raise KeyError(key_ar[0]) + + def get_subbuilder(self, key, default=None): + """Like dict.get, but looks in groups and datasets sub-dictionaries and ignores trailing attributes and links. + """ + try: + key_ar = _posixpath.normpath(key).split('/') + return self.__get_subbuilder_rec(key_ar) + except KeyError: + return default + + def __get_subbuilder_rec(self, key_ar): + # recursive helper for get_subbuilder + # returns sub-builder and attribute name if present + if key_ar[0] in self.groups: + if len(key_ar) == 1: + return self.groups[key_ar[0]], None + elif key_ar[1] in (self.groups + self.datasets): + return self.groups[key_ar[0]].__get_subbuilder_rec(key_ar[1:]) + elif key_ar[0] in self.datasets: # "dset/x" must be an attribute on dset + if len(key_ar) == 1: + return self.datasets[key_ar[0]], None + assert len(key_ar) == 2 + return self.datasets[key_ar[0]], key_ar[1] raise KeyError(key_ar[0]) def __setitem__(self, args, val): diff --git a/tests/unit/io_tests/test_sidecar.py b/tests/unit/io_tests/test_sidecar.py index a3421c678..fef0442c1 100644 --- a/tests/unit/io_tests/test_sidecar.py +++ b/tests/unit/io_tests/test_sidecar.py @@ -20,35 +20,53 @@ def setUp(self): version2 = { "label": "version 2", - "description": "change attr1 from 'old' to 'my experiment' and my_data from [1, 2, 3] to [4, 5, 6, 7]", + "description": "change attr1 from 'old' to 'my experiment' and my_data from [1, 2, 3] to [4, 5]", + "datetime": "2020-10-29T19:15:15.789Z", + "agent": "John Doe", "changes": [ { "object_id": foo1.object_id, "relative_path": "attr1", - "new_value": "my experiment" + "value": "my experiment" }, { "object_id": foo1.object_id, "relative_path": "my_data", - "new_value": [4, 5, 6, 7] + "value": [4, 5], + "dtype": "int32" } ] } version3 = { "label": "version 3", - "description": "change sub_foo/my_data from [-1, -2, -3] to [[0]]", + "description": ("change sub_foo/my_data from [-1, -2, -3] to [[0]], delete my_data/attr2, and change " + "my_data from [4, 5] to [6, 7]"), + "datetime": "2021-11-30T20:16:16.790Z", + "agent": "Jane Doe", "changes": [ { "object_id": foo2.object_id, "relative_path": "my_data", - "new_value": [[0]] - } + "value": [[0]] + }, + { + "object_id": foo1.object_id, + "relative_path": "my_data/attr2", + "value": None # will be encoded on disk as null + }, + { + "object_id": foo1.object_id, + "relative_path": "my_data", + "value": [6, 7], + "dtype": "int8" + }, ] } sidecar = dict() sidecar["versions"] = [version2, version3] + sidecar["schema_version"] = "0.1.0" self.json_path = "./tests/unit/io_tests/test_sidecar.json" with open(self.json_path, 'w') as outfile: @@ -64,8 +82,9 @@ def test_update_builder(self): io = HDF5IO(self.h5_path, 'r', manager=_get_manager()) foo1 = io.read() assert foo1.attr1 == "my experiment" - assert foo1.my_data == [4, 5, 6, 7] + assert foo1.my_data == [6, 7] # TODO test dtype assert foo1.sub_foo.my_data == [[0]] + assert foo1.attr2 is None class Foo(Container): @@ -73,7 +92,7 @@ class Foo(Container): @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, {'name': 'my_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer dataset'}, {'name': 'attr1', 'type': str, 'doc': 'a string attribute'}, - {'name': 'attr2', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute'}, + {'name': 'attr2', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, {'name': 'sub_foo', 'type': 'Foo', 'doc': 'a child Foo', 'default': None}) def __init__(self, **kwargs): name, my_data, attr1, attr2, sub_foo = getargs('name', 'my_data', 'attr1', 'attr2', 'sub_foo', kwargs) @@ -127,6 +146,7 @@ def _get_manager(): doc='a 1-D integer attribute', dtype='int', shape=[None, ], + required=False ) ] ) From b4b5419ef801d802a3c6710ad84ebb1b158969b1 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 30 Nov 2021 12:27:45 -0800 Subject: [PATCH 06/19] Use semantic versioning in version label --- src/hdmf/backends/builderupdater.py | 2 +- tests/unit/io_tests/test_sidecar.py | 26 ++++++++++++++++++++------ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/hdmf/backends/builderupdater.py b/src/hdmf/backends/builderupdater.py index ebb64515f..30935c62c 100644 --- a/src/hdmf/backends/builderupdater.py +++ b/src/hdmf/backends/builderupdater.py @@ -1,7 +1,7 @@ import json from pathlib import Path -from ..build import GroupBuilder, DatasetBuilder +from ..build import GroupBuilder from ..utils import docval, getargs diff --git a/tests/unit/io_tests/test_sidecar.py b/tests/unit/io_tests/test_sidecar.py index fef0442c1..0a9b747f1 100644 --- a/tests/unit/io_tests/test_sidecar.py +++ b/tests/unit/io_tests/test_sidecar.py @@ -18,8 +18,8 @@ def setUp(self): with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io: io.write(foo1) - version2 = { - "label": "version 2", + version2_0_0 = { + "label": "2.0.0", "description": "change attr1 from 'old' to 'my experiment' and my_data from [1, 2, 3] to [4, 5]", "datetime": "2020-10-29T19:15:15.789Z", "agent": "John Doe", @@ -38,10 +38,10 @@ def setUp(self): ] } - version3 = { - "label": "version 3", + version3_0_0 = { + "label": "3.0.0", "description": ("change sub_foo/my_data from [-1, -2, -3] to [[0]], delete my_data/attr2, and change " - "my_data from [4, 5] to [6, 7]"), + "dtype of my_data"), "datetime": "2021-11-30T20:16:16.790Z", "agent": "Jane Doe", "changes": [ @@ -64,8 +64,22 @@ def setUp(self): ] } + version3_0_1 = { + "label": "3.0.1", + "description": "change my_data from [4, 5] to [6, 7]", + "datetime": "2021-11-30T20:17:16.790Z", + "agent": "Jane Doe", + "changes": [ + { + "object_id": foo1.object_id, + "relative_path": "my_data", + "value": [6, 7], + }, + ] + } + sidecar = dict() - sidecar["versions"] = [version2, version3] + sidecar["versions"] = [version2_0_0, version3_0_0, version3_0_1] sidecar["schema_version"] = "0.1.0" self.json_path = "./tests/unit/io_tests/test_sidecar.json" From 151c69d5c6cbb963ac50f0687b76f180a1f264fc Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 1 Dec 2021 11:55:17 -0800 Subject: [PATCH 07/19] Add jsonschema for sidecar json --- sidecar.schema.json | 138 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 sidecar.schema.json diff --git a/sidecar.schema.json b/sidecar.schema.json new file mode 100644 index 000000000..72d168b19 --- /dev/null +++ b/sidecar.schema.json @@ -0,0 +1,138 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "sidecar.schema.json", + "title": "Schema for the sidecar JSON file", + "description": "A schema for validating HDMF sidecar JSON files", + "version": "0.1.0", + "type": "object", + "additionalProperties": false, + "properties": { + "versions": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "required": [ + "label", + "description", + "datetime", + "agent", + "changes" + ], + "properties": { + "label": { + "description": "Version label. Must conform to Semantic Versioning v2.0", + "type": "string", + "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" + }, + "description": {"type": "string"}, + "datetime": {"type": "string", "format": "date"}, + "agent": {"type": "string"}, + "changes": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "required": [ + "object_id", + "relative_path", + "value" + ], + "properties": { + "object_id": { + "description": "Object ID. Must conform to UUID-4 with dashes", + "type": "string", + "pattern": "^[0-9a-f]{8}\\-[0-9a-f]{4}\\-4[0-9a-f]{3}\\-[89ab][0-9a-f]{3}\\-[0-9a-f]{12}$" + }, + "relative_path": {"type": "string"}, + "value": { + "description": "New value. If null, then the value is removed.", + "member_region": { "type": ["array", "string", "number", "boolean", "null"] } + }, + "dtype": {"$ref": "#/definitions/dtype"} + } + } + } + } + } + }, + "schema_version": { + "description": "The version of the sidecar JSON schema that the file conforms to. Must confirm to Semantic Versioning v2.0", + "type": "string", + "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" + } + }, + "definitions": { + "protectedString": { + "type": "string", + "pattern": "^[A-Za-z_][A-Za-z0-9_]*$" + }, + "dtype": { + "anyOf": [ + {"$ref": "#/definitions/flat_dtype"}, + {"$ref": "#/definitions/compound_dtype"} + ] + }, + "flat_dtype": { + "description": "Required string describing the data type of the attribute", + "anyOf": [ + { + "type": "string", + "enum": [ + "float", + "float32", + "double", + "float64", + "long", + "int64", + "int", + "int32", + "int16", + "int8", + "uint", + "uint32", + "uint16", + "uint8", + "uint64", + "numeric", + "text", + "utf", + "utf8", + "utf-8", + "ascii", + "bool", + "isodatetime" + ] + }, + {"$ref": "#/definitions/ref_dtype"} + ] + }, + "ref_dtype": { + "type": "object", + "required": ["target_type", "reftype"], + "properties": { + "target_type": { + "description": "Describes the neurodata_type of the target that the reference points to", + "type": "string" + }, + "reftype": { + "description": "describes the kind of reference", + "type": "string", + "enum": ["ref", "reference", "object", "region"] + } + } + }, + "compound_dtype": { + "type": "array", + "items": { + "type": "object", + "required": ["name", "doc", "dtype"], + "properties": { + "name": {"$ref": "#/definitions/protectedString"}, + "doc": {"type": "string"}, + "dtype": {"$ref": "#/definitions/flat_dtype"} + } + } + } + } +} From 32d1397a8a629e0b5095ac17a55963ecf36ed35c Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 1 Dec 2021 12:25:49 -0800 Subject: [PATCH 08/19] Add validation to read --- sidecar.schema.json | 4 ++++ src/hdmf/backends/__init__.py | 1 + src/hdmf/backends/builderupdater.py | 33 +++++++++++++++++++++++++---- tests/unit/io_tests/test_sidecar.py | 33 +++++++++++++++++++++++++++-- 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/sidecar.schema.json b/sidecar.schema.json index 72d168b19..ff0225f70 100644 --- a/sidecar.schema.json +++ b/sidecar.schema.json @@ -6,6 +6,10 @@ "version": "0.1.0", "type": "object", "additionalProperties": false, + "required": [ + "versions", + "schema_version" + ], "properties": { "versions": { "type": "array", diff --git a/src/hdmf/backends/__init__.py b/src/hdmf/backends/__init__.py index 030a2daee..1750af2b1 100644 --- a/src/hdmf/backends/__init__.py +++ b/src/hdmf/backends/__init__.py @@ -1 +1,2 @@ from . import hdf5 +from .builderupdater import SidecarValidationError diff --git a/src/hdmf/backends/builderupdater.py b/src/hdmf/backends/builderupdater.py index 30935c62c..5039b98d9 100644 --- a/src/hdmf/backends/builderupdater.py +++ b/src/hdmf/backends/builderupdater.py @@ -1,4 +1,5 @@ import json +import jsonschema from pathlib import Path from ..build import GroupBuilder @@ -7,25 +8,44 @@ class BuilderUpdater: + json_schema_file = 'sidecar.schema.json' + + @classmethod + def get_json_schema(cls): + """Load the sidecar JSON schema.""" + with open(cls.json_schema_file, 'r') as file: + schema = json.load(file) + return schema + + @classmethod + def validate_sidecar(cls, sidecar_dict): + """Validate the sidecar JSON dict with the sidecar JSON schema.""" + try: + jsonschema.validate(instance=sidecar_dict, schema=cls.get_json_schema()) + except jsonschema.exceptions.ValidationError as e: + raise SidecarValidationError() from e + @classmethod @docval( {'name': 'file_builder', 'type': GroupBuilder, 'doc': 'A GroupBuilder representing the main file object.'}, {'name': 'file_path', 'type': str, - 'doc': 'Path to the data file. The sidecar file is assumed to have the same base name but have suffix .json.'}, + 'doc': 'Path to the data file. The sidecar file is assumed to have the same base name but with suffix .json.'}, returns='The same input GroupBuilder, now modified.', rtype='GroupBuilder' ) def update_from_sidecar_json(cls, **kwargs): - # in-place update of the builder - # the sidecar json will have the same name as the file but have suffix .json + """Update the file builder in-place with the values specified in the sidecar JSON.""" + # the sidecar json must have the same name as the file but with suffix .json f_builder, path = getargs('file_builder', 'file_path', kwargs) sidecar_path = Path(path).with_suffix('.json') if not sidecar_path.is_file(): return with open(sidecar_path, 'r') as f: - versions = json.load(f)['versions'] + sidecar_dict = json.load(f) + cls.validate_sidecar(sidecar_dict) + versions = sidecar_dict['versions'] builder_map = cls.__get_object_id_map(f_builder) for version_dict in versions: for change_dict in version_dict.get('changes'): @@ -77,3 +97,8 @@ def __get_object_id_map(cls, builder): for d in b.datasets.values(): stack.append(d) return ret + + +class SidecarValidationError(Exception): + """Error raised when a sidecar file fails validation with the JSON schema.""" + pass diff --git a/tests/unit/io_tests/test_sidecar.py b/tests/unit/io_tests/test_sidecar.py index 0a9b747f1..c0359d598 100644 --- a/tests/unit/io_tests/test_sidecar.py +++ b/tests/unit/io_tests/test_sidecar.py @@ -3,6 +3,7 @@ from hdmf import Container from hdmf.backends.hdf5.h5tools import HDF5IO +from hdmf.backends import SidecarValidationError from hdmf.build import BuildManager, TypeMap, ObjectMapper from hdmf.spec import AttributeSpec, DatasetSpec, GroupSpec, SpecCatalog, SpecNamespace, NamespaceCatalog from hdmf.testing import TestCase @@ -93,14 +94,42 @@ def tearDown(self): os.remove(self.json_path) def test_update_builder(self): - io = HDF5IO(self.h5_path, 'r', manager=_get_manager()) - foo1 = io.read() + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + foo1 = io.read() assert foo1.attr1 == "my experiment" assert foo1.my_data == [6, 7] # TODO test dtype assert foo1.sub_foo.my_data == [[0]] assert foo1.attr2 is None +class TestFailValidation(TestCase): + + def setUp(self): + self.h5_path = "./tests/unit/io_tests/test_sidecar_fail.h5" + foo2 = Foo('sub_foo', [-1, -2, -3], 'OLD', [-17]) + foo1 = Foo('foo1', [1, 2, 3], 'old', [17], foo2) + with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io: + io.write(foo1) + + sidecar = dict() + sidecar["versions"] = [] + + self.json_path = "./tests/unit/io_tests/test_sidecar_fail.json" + with open(self.json_path, 'w') as outfile: + json.dump(sidecar, outfile, indent=4) + + def tearDown(self): + if os.path.exists(self.h5_path): + os.remove(self.h5_path) + if os.path.exists(self.json_path): + os.remove(self.json_path) + + def test_simple(self): + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + with self.assertRaises(SidecarValidationError): + io.read() + + class Foo(Container): @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, From 933ef40b5377d64761049932832a90046af3122b Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 6 Dec 2021 19:31:21 -0800 Subject: [PATCH 09/19] Update to use new schema. More tests needed --- sidecar.schema.json | 95 +++++++++----- src/hdmf/backends/builderupdater.py | 194 +++++++++++++++++++++++----- src/hdmf/build/__init__.py | 2 +- src/hdmf/build/builders.py | 1 + src/hdmf/build/objectmapper.py | 44 ++++--- src/hdmf/spec/spec.py | 2 +- tests/unit/io_tests/test_sidecar.py | 169 +++++++++++++----------- 7 files changed, 344 insertions(+), 163 deletions(-) diff --git a/sidecar.schema.json b/sidecar.schema.json index ff0225f70..850f47de0 100644 --- a/sidecar.schema.json +++ b/sidecar.schema.json @@ -7,57 +7,83 @@ "type": "object", "additionalProperties": false, "required": [ - "versions", + "operations", "schema_version" ], "properties": { - "versions": { + "operations": { "type": "array", "items": { "type": "object", "additionalProperties": false, "required": [ - "label", + "type", "description", - "datetime", - "agent", - "changes" + "object_id", + "relative_path" ], "properties": { - "label": { - "description": "Version label. Must conform to Semantic Versioning v2.0", - "type": "string", - "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" + "type": { + "description": "The type of modification to perform on the field of a neurodata type.", + "member_region": { "type": ["replace", "delete", "create_attribute", "change_dtype"] } }, "description": {"type": "string"}, - "datetime": {"type": "string", "format": "date"}, - "agent": {"type": "string"}, - "changes": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": false, - "required": [ - "object_id", - "relative_path", - "value" - ], + "object_id": { + "description": "Object ID. Must conform to UUID-4 with dashes", + "type": "string", + "pattern": "^[0-9a-f]{8}\\-[0-9a-f]{4}\\-4[0-9a-f]{3}\\-[89ab][0-9a-f]{3}\\-[0-9a-f]{12}$" + }, + "relative_path": {"type": "string"}, + "element_type": { + "anyOf": [ + { + "type": "string", + "enum": [ + "group", + "dataset", + "attribute" + ] + } + ] + }, + "value": { + "description": "New value.", + "member_region": { "type": ["array", "string", "number", "boolean", "null"] } + }, + "dtype": {"$ref": "#/definitions/dtype"} + }, + "allOf": [ + { + "description": "if type==replace, then value is required", + "if": { + "properties": { "type": { "const": "replace" } } + }, + "then": { + "required": [ "value" ] + } + }, + { + "description": "if type==delete, then value and dtype are not allowed", + "if": { + "properties": { "type": { "const": "delete" } } + }, + "then": { "properties": { - "object_id": { - "description": "Object ID. Must conform to UUID-4 with dashes", - "type": "string", - "pattern": "^[0-9a-f]{8}\\-[0-9a-f]{4}\\-4[0-9a-f]{3}\\-[89ab][0-9a-f]{3}\\-[0-9a-f]{12}$" - }, - "relative_path": {"type": "string"}, - "value": { - "description": "New value. If null, then the value is removed.", - "member_region": { "type": ["array", "string", "number", "boolean", "null"] } - }, - "dtype": {"$ref": "#/definitions/dtype"} + "value": false, + "dtype": false } } + }, + { + "description": "if type==create, then element_type is required", + "if": { + "properties": { "type": { "const": "create" } } + }, + "then": { + "required": [ "element_type" ] + } } - } + ] } }, "schema_version": { @@ -98,7 +124,6 @@ "uint16", "uint8", "uint64", - "numeric", "text", "utf", "utf8", diff --git a/src/hdmf/backends/builderupdater.py b/src/hdmf/backends/builderupdater.py index 5039b98d9..fe85248aa 100644 --- a/src/hdmf/backends/builderupdater.py +++ b/src/hdmf/backends/builderupdater.py @@ -1,8 +1,9 @@ import json import jsonschema +import numpy as np from pathlib import Path -from ..build import GroupBuilder +from ..build import GroupBuilder, ObjectMapper, unicode, ascii from ..utils import docval, getargs @@ -25,6 +26,64 @@ def validate_sidecar(cls, sidecar_dict): except jsonschema.exceptions.ValidationError as e: raise SidecarValidationError() from e + @classmethod + def convert_dtype_using_map(cls, value, dtype_str): + dtype = ObjectMapper.get_dtype_mapping(dtype_str) + if isinstance(value, (list, np.ndarray)): # array + if dtype is unicode: + dtype = 'U' + elif dtype is ascii: + dtype = 'S' + else: + dtype = np.dtype(dtype) + if isinstance(value, list): + ret = np.array(value, dtype=dtype) + else: + ret = value.astype(dtype) + else: # scalar + if dtype in (unicode, ascii): + ret = dtype(value) + else: + ret = np.dtype(dtype)(value) + return ret + + @classmethod + def convert_dtype_helper(cls, value, dtype): + # dtype comes from datasetbuilder or attribute + if isinstance(value, (list, np.ndarray)): # array + if dtype is str: + dtype = 'U' + elif dtype is bytes: + dtype = 'S' + else: + dtype = np.dtype(dtype) + if isinstance(value, list): + ret = np.array(value, dtype=dtype) + else: + ret = value.astype(dtype) + else: # scalar + if dtype in (str, bytes): + ret = dtype(value) + else: + ret = dtype.type(value) + return ret + + @classmethod + def convert_dtype(cls, value, dtype, old_value): + if value is None: + return None + # TODO handle compound dtypes + if dtype is not None: + new_value = cls.convert_dtype_using_map(value, dtype) + else: # keep the same dtype + if isinstance(old_value, np.ndarray): + new_value = cls.convert_dtype_helper(value, old_value.dtype) + else: + assert old_value is not None, \ + "Cannot convert new value to dtype without specifying new dtype or old value." + new_value = cls.convert_dtype_helper(value, type(old_value)) + return new_value + @classmethod @docval( {'name': 'file_builder', 'type': GroupBuilder, 'doc': 'A GroupBuilder representing the main file object.'}, @@ -45,43 +104,112 @@ def update_from_sidecar_json(cls, **kwargs): sidecar_dict = json.load(f) cls.validate_sidecar(sidecar_dict) - versions = sidecar_dict['versions'] - builder_map = cls.__get_object_id_map(f_builder) - for version_dict in versions: - for change_dict in version_dict.get('changes'): - object_id = change_dict['object_id'] - relative_path = change_dict.get('relative_path') - new_value = change_dict['value'] - - builder = builder_map[object_id] - # TODO handle paths to links - # TODO handle object references - if relative_path in builder.attributes: - # TODO handle different dtypes including compound dtypes - builder.attributes[relative_path] = new_value - elif isinstance(builder, GroupBuilder): # GroupBuilder has object_id - sub_dset_builder, attr_name = builder.get_subbuilder(relative_path) - if sub_dset_builder is None: - raise ValueError("Relative path '%s' not recognized as a dataset or attribute" - % relative_path) - if attr_name is None: # update data in sub-DatasetBuilder - cls.__update_dataset_builder(sub_dset_builder, new_value) - else: # update attribute - sub_dset_builder.attributes[attr_name] = new_value - - else: # DatasetBuilder has object_id - if not relative_path: # update data - cls.__update_dataset_builder(builder, new_value) - else: - raise ValueError("Relative path '%s' not recognized as None or attribute" % relative_path) + operations = sidecar_dict['operations'] + for operation in operations: + object_id = operation['object_id'] + relative_path = operation['relative_path'] + operation_type = operation['type'] + new_value = operation.get('value') + new_dtype = operation.get('dtype') + + builder_map = cls.__get_object_id_map(f_builder) + builder = builder_map[object_id] + # TODO handle paths to links + # TODO handle object references + if operation_type == 'replace' or operation_type == 'delete': + cls.__replace(builder, relative_path, new_value, new_dtype) + elif operation_type == 'change_dtype': + assert new_value is None + cls.__change_dtype(builder, relative_path, new_dtype) + elif operation_type == 'create_attribute': + cls.__create_attribute(builder, relative_path, new_value, new_dtype) + else: + raise ValueError("Operation type: '%s' not supported." % operation_type) return f_builder @classmethod - def __update_dataset_builder(cls, dset_builder, value): - # TODO handle different dtypes including compound dtypes + def __replace(cls, builder, relative_path, new_value, new_dtype): + if relative_path in builder.attributes: + cls.__replace_attribute(builder, relative_path, new_value, new_dtype) + elif isinstance(builder, GroupBuilder): # GroupBuilder has object_id + sub_dset_builder, attr_name = builder.get_subbuilder(relative_path) + if sub_dset_builder is None: + raise ValueError("Relative path '%s' not recognized as a group or dataset." + % relative_path) + if attr_name is None: + cls.__replace_dataset_data(sub_dset_builder, new_value, new_dtype) + else: + cls.__replace_attribute(sub_dset_builder, attr_name, new_value, new_dtype) + else: # DatasetBuilder has object_id + if not relative_path: + cls.__replace_dataset_data(builder, new_value, new_dtype) + else: + raise ValueError("Relative path '%s' not recognized as None or attribute." % relative_path) + + @classmethod + def __change_dtype(cls, builder, relative_path, new_dtype): + if relative_path in builder.attributes: + cls.__change_dtype_attribute(builder, relative_path, new_dtype) + elif isinstance(builder, GroupBuilder): # GroupBuilder has object_id + sub_dset_builder, attr_name = builder.get_subbuilder(relative_path) + if sub_dset_builder is None: + raise ValueError("Relative path '%s' not recognized as a group or dataset." + % relative_path) + if attr_name is None: # update data in sub-DatasetBuilder + cls.__change_dtype_dataset_data(sub_dset_builder, new_dtype) + else: + cls.__change_dtype_attribute(sub_dset_builder, attr_name, new_dtype) + else: # DatasetBuilder has object_id + if not relative_path: + cls.__change_dtype_dataset_data(builder, new_dtype) + else: + raise ValueError("Relative path '%s' not recognized as None or attribute." % relative_path) + + @classmethod + def __create_attribute(cls, builder, relative_path, new_value, new_dtype): + # TODO validate in jsonschema that the relative path cannot start or end with '/' + if '/' in relative_path: # GroupBuilder has object_id + assert isinstance(builder, GroupBuilder), \ + "Relative path '%s' can include '/' only if the object is a group." % relative_path + sub_dset_builder, attr_name = builder.get_subbuilder(relative_path) + if sub_dset_builder is None: + raise ValueError("Relative path '%s' not recognized as a sub-group or sub-dataset." + % relative_path) + if attr_name in sub_dset_builder.attributes: + raise ValueError("Attribute '%s' already exists. Cannot create attribute." + % relative_path) + cls.__create_builder_attribute(sub_dset_builder, attr_name, new_value, new_dtype) + elif relative_path in builder.attributes: + raise ValueError("Attribute '%s' already exists. Cannot create attribute." % relative_path) + else: + cls.__create_builder_attribute(builder, attr_name, new_value, new_dtype) + + @classmethod + def __replace_dataset_data(cls, dset_builder, value, dtype): # TODO consider replacing slices of a dataset or attribute - dset_builder['data'] = value + new_value = cls.convert_dtype(value, dtype, dset_builder['data']) + dset_builder['data'] = new_value + + @classmethod + def __replace_attribute(cls, builder, attr_name, value, dtype): + new_value = cls.convert_dtype(value, dtype, builder.attributes[attr_name]) + builder.attributes[attr_name] = new_value + + @classmethod + def __change_dtype_dataset_data(cls, dset_builder, dtype): + new_value = cls.convert_dtype(dset_builder['data'], dtype, dset_builder['data']) + dset_builder['data'] = new_value + + @classmethod + def __change_dtype_attribute(cls, builder, attr_name, dtype): + new_value = cls.convert_dtype(builder.attributes[attr_name], dtype, builder.attributes[attr_name]) + builder.attributes[attr_name] = new_value + + @classmethod + def __create_builder_attribute(cls, builder, attr_name, value, dtype): + new_value = cls.convert_dtype(value, dtype, None) + builder.attributes[attr_name] = new_value @classmethod def __get_object_id_map(cls, builder): diff --git a/src/hdmf/build/__init__.py b/src/hdmf/build/__init__.py index ea5d21152..33a36a2bb 100644 --- a/src/hdmf/build/__init__.py +++ b/src/hdmf/build/__init__.py @@ -3,6 +3,6 @@ from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError, ConstructError) from .manager import BuildManager, TypeMap -from .objectmapper import ObjectMapper +from .objectmapper import ObjectMapper, unicode, ascii from .warnings import (BuildWarning, MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning, MissingRequiredWarning, OrphanContainerWarning) diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index 2274f9569..54d11ed30 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -284,6 +284,7 @@ def __get_rec(self, key_ar): def get_subbuilder(self, key, default=None): """Like dict.get, but looks in groups and datasets sub-dictionaries and ignores trailing attributes and links. + Returns two values -- the sub-group or sub-dataset and the name of the attribute if present """ try: key_ar = _posixpath.normpath(key).split('/') diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 070d0187d..ebc2372f8 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -64,7 +64,7 @@ def _dec(func): return _dec -def _unicode(s): +def unicode(s): """ A helper function for converting to Unicode """ @@ -76,7 +76,7 @@ def _unicode(s): raise ValueError("Expected unicode or ascii string, got %s" % type(s)) -def _ascii(s): +def ascii(s): """ A helper function for converting to ASCII """ @@ -95,7 +95,7 @@ class ObjectMapper(metaclass=ExtenderMeta): # mapping from spec dtypes to numpy dtypes or functions for conversion of values to spec dtypes # make sure keys are consistent between hdmf.spec.spec.DtypeHelper.primary_dtype_synonyms, - # hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.manager.TypeMap._spec_dtype_map, + # hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.classgenerator.CustomClassGenerator._spec_dtype_map, # hdmf.validate.validator.__allowable, and backend dtype maps __dtypes = { "float": np.float32, @@ -115,18 +115,22 @@ class ObjectMapper(metaclass=ExtenderMeta): "uint16": np.uint16, "uint8": np.uint8, "bool": np.bool_, - "text": _unicode, - "utf": _unicode, - "utf8": _unicode, - "utf-8": _unicode, - "ascii": _ascii, - "bytes": _ascii, - "isodatetime": _ascii, - "datetime": _ascii, + "text": unicode, + "utf": unicode, + "utf8": unicode, + "utf-8": unicode, + "ascii": ascii, + "bytes": ascii, + "isodatetime": ascii, + "datetime": ascii, } __no_convert = set() + @classmethod + def get_dtype_mapping(cls, dtype_str): + return cls.__dtypes[dtype_str] + @classmethod def __resolve_numeric_dtype(cls, given, specified): """ @@ -201,14 +205,14 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 ret, ret_dtype = cls.__check_edgecases(spec, value, spec_dtype) if ret is not None or ret_dtype is not None: return ret, ret_dtype - # spec_dtype is a string, spec_dtype_type is a type or the conversion helper functions _unicode or _ascii + # spec_dtype is a string, spec_dtype_type is a type or the conversion helper functions unicode or ascii spec_dtype_type = cls.__dtypes[spec_dtype] warning_msg = None if isinstance(value, np.ndarray): - if spec_dtype_type is _unicode: + if spec_dtype_type is unicode: ret = value.astype('U') ret_dtype = "utf8" - elif spec_dtype_type is _ascii: + elif spec_dtype_type is ascii: ret = value.astype('S') ret_dtype = "ascii" else: @@ -220,9 +224,9 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 ret_dtype = ret.dtype.type elif isinstance(value, (tuple, list)): if len(value) == 0: - if spec_dtype_type is _unicode: + if spec_dtype_type is unicode: ret_dtype = 'utf8' - elif spec_dtype_type is _ascii: + elif spec_dtype_type is ascii: ret_dtype = 'ascii' else: ret_dtype = spec_dtype_type @@ -235,16 +239,16 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 ret_dtype = tmp_dtype elif isinstance(value, AbstractDataChunkIterator): ret = value - if spec_dtype_type is _unicode: + if spec_dtype_type is unicode: ret_dtype = "utf8" - elif spec_dtype_type is _ascii: + elif spec_dtype_type is ascii: ret_dtype = "ascii" else: ret_dtype, warning_msg = cls.__resolve_numeric_dtype(value.dtype, spec_dtype_type) else: - if spec_dtype_type in (_unicode, _ascii): + if spec_dtype_type in (unicode, ascii): ret_dtype = 'ascii' - if spec_dtype_type is _unicode: + if spec_dtype_type is unicode: ret_dtype = 'utf8' ret = spec_dtype_type(value) else: diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index c892cabf5..27c61644d 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -21,7 +21,7 @@ class DtypeHelper: # Dict where the keys are the primary data type and the values are list of strings with synonyms for the dtype # make sure keys are consistent between hdmf.spec.spec.DtypeHelper.primary_dtype_synonyms, - # hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.manager.TypeMap._spec_dtype_map, + # hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.classgenerator.CustomClassGenerator._spec_dtype_map, # hdmf.validate.validator.__allowable, and backend dtype maps # see https://hdmf-schema-language.readthedocs.io/en/latest/description.html#dtype primary_dtype_synonyms = { diff --git a/tests/unit/io_tests/test_sidecar.py b/tests/unit/io_tests/test_sidecar.py index c0359d598..7ea52bfcf 100644 --- a/tests/unit/io_tests/test_sidecar.py +++ b/tests/unit/io_tests/test_sidecar.py @@ -1,4 +1,5 @@ import json +import numpy as np import os from hdmf import Container @@ -14,73 +15,59 @@ class TestBasic(TestCase): def setUp(self): self.h5_path = "./tests/unit/io_tests/test_sidecar.h5" - foo2 = Foo('sub_foo', [-1, -2, -3], 'OLD', [-17]) - foo1 = Foo('foo1', [1, 2, 3], 'old', [17], foo2) + sub_foo = Foo(name='sub_foo', my_data=[-1, -2, -3], attr1='OLD') + foo1 = Foo(name='foo1', my_data=[1, 2, 3], attr1='old', attr2=[17], sub_foo=sub_foo) with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io: io.write(foo1) - version2_0_0 = { - "label": "2.0.0", - "description": "change attr1 from 'old' to 'my experiment' and my_data from [1, 2, 3] to [4, 5]", - "datetime": "2020-10-29T19:15:15.789Z", - "agent": "John Doe", - "changes": [ - { - "object_id": foo1.object_id, - "relative_path": "attr1", - "value": "my experiment" - }, - { - "object_id": foo1.object_id, - "relative_path": "my_data", - "value": [4, 5], - "dtype": "int32" - } - ] - } - - version3_0_0 = { - "label": "3.0.0", - "description": ("change sub_foo/my_data from [-1, -2, -3] to [[0]], delete my_data/attr2, and change " - "dtype of my_data"), - "datetime": "2021-11-30T20:16:16.790Z", - "agent": "Jane Doe", - "changes": [ - { - "object_id": foo2.object_id, - "relative_path": "my_data", - "value": [[0]] - }, - { - "object_id": foo1.object_id, - "relative_path": "my_data/attr2", - "value": None # will be encoded on disk as null - }, - { - "object_id": foo1.object_id, - "relative_path": "my_data", - "value": [6, 7], - "dtype": "int8" - }, - ] - } - - version3_0_1 = { - "label": "3.0.1", - "description": "change my_data from [4, 5] to [6, 7]", - "datetime": "2021-11-30T20:17:16.790Z", - "agent": "Jane Doe", - "changes": [ - { - "object_id": foo1.object_id, - "relative_path": "my_data", - "value": [6, 7], - }, - ] - } + operations = [ + { + "type": "replace", + "description": "change foo1/attr1 from 'old' to 'my experiment' (same dtype)", + "object_id": foo1.object_id, + "relative_path": "attr1", + "value": "my experiment" + }, + { + "type": "replace", + "description": "change foo1/my_data from [1, 2, 3] to [4, 5] (int16)", + "object_id": foo1.object_id, + "relative_path": "my_data", + "value": [4, 5], + "dtype": "int16" + }, + { + "type": "replace", + "description": "change sub_foo/my_data from [-1, -2, -3] to [[0]] (same dtype)", + "object_id": sub_foo.object_id, + "relative_path": "my_data", + "value": [[0]] + }, + { + "type": "create_attribute", + "description": "create sub_foo/my_data/attr2 and set it to [1] (int16)", + "object_id": sub_foo.object_id, + "relative_path": "my_data/attr2", + "value": [1], + "dtype": "int16" + }, + { + "type": "delete", + "description": "delete foo1/my_data/attr2", + "object_id": foo1.object_id, + "relative_path": "my_data/attr2", + }, + { + "type": "change_dtype", + "description": "change dtype of foo1/my_data from int16 to int8", + "object_id": foo1.object_id, + "relative_path": "my_data", + "dtype": "int8" + } + ] sidecar = dict() - sidecar["versions"] = [version2_0_0, version3_0_0, version3_0_1] + sidecar["operations"] = operations sidecar["schema_version"] = "0.1.0" self.json_path = "./tests/unit/io_tests/test_sidecar.json" @@ -97,8 +84,10 @@ def test_update_builder(self): with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: foo1 = io.read() assert foo1.attr1 == "my experiment" - assert foo1.my_data == [6, 7] # TODO test dtype - assert foo1.sub_foo.my_data == [[0]] + assert isinstance(foo1.my_data, np.ndarray) + np.testing.assert_array_equal(foo1.my_data, np.array([4, 5], dtype=np.dtype('int8'))) # TODO make sure this checks dtype + np.testing.assert_array_equal(foo1.sub_foo.my_data, np.array([[0]])) + np.testing.assert_array_equal(foo1.sub_foo.attr2, np.array([1], dtype=np.dtype('int16'))) assert foo1.attr2 is None @@ -106,13 +95,12 @@ class TestFailValidation(TestCase): def setUp(self): self.h5_path = "./tests/unit/io_tests/test_sidecar_fail.h5" - foo2 = Foo('sub_foo', [-1, -2, -3], 'OLD', [-17]) - foo1 = Foo('foo1', [1, 2, 3], 'old', [17], foo2) + foo2 = Foo(name='sub_foo', my_data=[-1, -2, -3], attr1='OLD') + foo1 = Foo(name='foo1', my_data=[1, 2, 3], attr1='old', attr2=[17], sub_foo=foo2) with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io: io.write(foo1) sidecar = dict() - sidecar["versions"] = [] self.json_path = "./tests/unit/io_tests/test_sidecar_fail.json" with open(self.json_path, 'w') as outfile: @@ -133,16 +121,23 @@ def test_simple(self): class Foo(Container): @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, - {'name': 'my_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer dataset'}, + {'name': 'my_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D or 2-D integer dataset', + 'shape': ((None, ), (None, None))}, {'name': 'attr1', 'type': str, 'doc': 'a string attribute'}, - {'name': 'attr2', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, + {'name': 'attr2', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'shape': (None, ), + 'default': None}, + {'name': 'opt_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer dataset', 'default': None}, + {'name': 'attr3', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, {'name': 'sub_foo', 'type': 'Foo', 'doc': 'a child Foo', 'default': None}) def __init__(self, **kwargs): - name, my_data, attr1, attr2, sub_foo = getargs('name', 'my_data', 'attr1', 'attr2', 'sub_foo', kwargs) + name, my_data, attr1, attr2, opt_data, attr3, sub_foo = getargs('name', 'my_data', 'attr1', 'attr2', 'opt_data', + 'attr3', 'sub_foo', kwargs) super().__init__(name=name) self.__data = my_data self.__attr1 = attr1 self.__attr2 = attr2 + self.__opt_data = opt_data + self.__attr3 = attr3 self.__sub_foo = sub_foo if sub_foo is not None: assert sub_foo.name == 'sub_foo' # on read mapping will not work otherwise @@ -160,6 +155,14 @@ def attr1(self): def attr2(self): return self.__attr2 + @property + def opt_data(self): + return self.__opt_data + + @property + def attr3(self): + return self.__attr3 + @property def sub_foo(self): return self.__sub_foo @@ -179,10 +182,10 @@ def _get_manager(): ], datasets=[ DatasetSpec( - doc='a 1-D integer dataset', + doc='a 1-D or 2-D integer dataset', dtype='int', name='my_data', - shape=[None, ], + shape=[[None, ], [None, None]], attributes=[ AttributeSpec( name='attr2', @@ -192,6 +195,22 @@ def _get_manager(): required=False ) ] + ), + DatasetSpec( + doc='an optional 1-D integer dataset', + dtype='int', + name='opt_data', + shape=[None, ], + quantity='?', + attributes=[ + AttributeSpec( + name='attr3', + doc='a 1-D integer attribute', + dtype='int', + shape=[None, ], + required=False + ) + ] ) ], attributes=[ @@ -200,11 +219,15 @@ def _get_manager(): ) class FooMapper(ObjectMapper): - """Remap 'attr2' attribute on Foo container to 'my_data' dataset spec > 'attr2' attribute spec.""" + """Remap 'attr2' attribute on Foo container to 'my_data' dataset spec > 'attr2' attribute spec and + remap 'attr3' attribute on Foo container to 'opt_data' dataset spec > 'attr3' attribute spec. + """ def __init__(self, spec): super().__init__(spec) my_data_spec = spec.get_dataset('my_data') self.map_spec('attr2', my_data_spec.get_attribute('attr2')) + opt_data_spec = spec.get_dataset('opt_data') + self.map_spec('attr3', opt_data_spec.get_attribute('attr3')) spec_catalog = SpecCatalog() spec_catalog.register_spec(foo_spec, 'test.yaml') From 393e5b315d553bf956f99928f3886bfe62cdc436 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 8 Dec 2021 00:48:36 -0800 Subject: [PATCH 10/19] Update tests (more to do) --- src/hdmf/backends/builderupdater.py | 11 +- src/hdmf/build/builders.py | 31 +- tests/unit/io_tests/test_sidecar.py | 455 ++++++++++++++++++++++------ 3 files changed, 395 insertions(+), 102 deletions(-) diff --git a/src/hdmf/backends/builderupdater.py b/src/hdmf/backends/builderupdater.py index fe85248aa..dfe417dcf 100644 --- a/src/hdmf/backends/builderupdater.py +++ b/src/hdmf/backends/builderupdater.py @@ -34,17 +34,12 @@ def convert_dtype_using_map(cls, value, dtype_str): dtype = 'U' elif dtype is ascii: dtype = 'S' - else: - dtype = np.dtype(dtype) if isinstance(value, list): ret = np.array(value, dtype=dtype) else: ret = value.astype(dtype) else: # scalar - if dtype in (unicode, ascii): - ret = dtype(value) - else: - ret = np.dtype(dtype)(value) + ret = dtype(value) return ret @classmethod @@ -135,7 +130,7 @@ def __replace(cls, builder, relative_path, new_value, new_dtype): elif isinstance(builder, GroupBuilder): # GroupBuilder has object_id sub_dset_builder, attr_name = builder.get_subbuilder(relative_path) if sub_dset_builder is None: - raise ValueError("Relative path '%s' not recognized as a group or dataset." + raise ValueError("Relative path '%s' not recognized as a group, dataset, or attribute." % relative_path) if attr_name is None: cls.__replace_dataset_data(sub_dset_builder, new_value, new_dtype) @@ -154,7 +149,7 @@ def __change_dtype(cls, builder, relative_path, new_dtype): elif isinstance(builder, GroupBuilder): # GroupBuilder has object_id sub_dset_builder, attr_name = builder.get_subbuilder(relative_path) if sub_dset_builder is None: - raise ValueError("Relative path '%s' not recognized as a group or dataset." + raise ValueError("Relative path '%s' not recognized as a group, dataset, or attribute." % relative_path) if attr_name is None: # update data in sub-DatasetBuilder cls.__change_dtype_dataset_data(sub_dset_builder, new_dtype) diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index 54d11ed30..0d1ca2e34 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -282,7 +282,7 @@ def __get_rec(self, key_ar): return self.datasets[key_ar[0]].attributes[key_ar[1]] raise KeyError(key_ar[0]) - def get_subbuilder(self, key, default=None): + def get_subbuilder(self, key, default=(None, None)): """Like dict.get, but looks in groups and datasets sub-dictionaries and ignores trailing attributes and links. Returns two values -- the sub-group or sub-dataset and the name of the attribute if present """ @@ -295,16 +295,29 @@ def get_subbuilder(self, key, default=None): def __get_subbuilder_rec(self, key_ar): # recursive helper for get_subbuilder # returns sub-builder and attribute name if present - if key_ar[0] in self.groups: - if len(key_ar) == 1: + # possibilities are: + # group + # group/subgroup or group/subgroup/... + # group/dataset or group/dataset/... + # group/attribute + # dataset + # dataset/attribute + if len(key_ar) == 1: + if key_ar[0] in self.groups: # group return self.groups[key_ar[0]], None - elif key_ar[1] in (self.groups + self.datasets): - return self.groups[key_ar[0]].__get_subbuilder_rec(key_ar[1:]) - elif key_ar[0] in self.datasets: # "dset/x" must be an attribute on dset - if len(key_ar) == 1: + elif key_ar[0] in self.datasets: # dataset return self.datasets[key_ar[0]], None - assert len(key_ar) == 2 - return self.datasets[key_ar[0]], key_ar[1] + elif len(key_ar) == 2: + if key_ar[0] in self.groups: + if key_ar[1] in self.groups[key_ar[0]].attributes: # group/attribute + return self.groups[key_ar[0]], key_ar[1] + else: # group/subgroup or group/dataset + return self.groups[key_ar[0]].__get_subbuilder_rec(key_ar[1:]) + elif key_ar[0] in self.datasets: + if key_ar[1] in self.datasets[key_ar[0]].attributes: # dataset/attribute + return self.datasets[key_ar[0]], key_ar[1] + elif len(key_ar) > 2 and key_ar[0] in self.groups: + return self.groups[key_ar[0]].__get_subbuilder_rec(key_ar[1:]) raise KeyError(key_ar[0]) def __setitem__(self, args, val): diff --git a/tests/unit/io_tests/test_sidecar.py b/tests/unit/io_tests/test_sidecar.py index 7ea52bfcf..089013647 100644 --- a/tests/unit/io_tests/test_sidecar.py +++ b/tests/unit/io_tests/test_sidecar.py @@ -2,7 +2,7 @@ import numpy as np import os -from hdmf import Container +from hdmf import Container, Data from hdmf.backends.hdf5.h5tools import HDF5IO from hdmf.backends import SidecarValidationError from hdmf.build import BuildManager, TypeMap, ObjectMapper @@ -15,88 +15,246 @@ class TestBasic(TestCase): def setUp(self): self.h5_path = "./tests/unit/io_tests/test_sidecar.h5" - sub_foo = Foo(name='sub_foo', my_data=[-1, -2, -3], attr1='OLD') - foo1 = Foo(name='foo1', my_data=[1, 2, 3], attr1='old', attr2=[17], sub_foo=sub_foo) + self.foo_data1 = FooData(name='foodata1', data=[1], data_attr1=2, data_attr2=['a']) + self.foo2 = Foo(name='foo2', my_data=[1, 2, 3], my_sub_data=[1, 2, 3], attr1='old') + self.sub_foo = Foo(name='sub_foo', my_data=[-1, -2, -3], my_sub_data=[-1, -2, -3], attr1='OLD') + self.foo_data = FooData(name='my_foo_data', data=[0, 1], data_attr1=1, data_attr2=['a']) + self.foo1 = Foo(name='foo1', my_data=[1, 2, 3], my_sub_data=[1, 2, 3], attr1='old', attr2=[17], + sub_foo=self.sub_foo, foo_holder_foos=[self.foo2], my_foo_data=self.foo_data, + attr3=[1], attr4=[1], attr5=[1]) with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io: - io.write(foo1) + io.write(self.foo1) + + # operations = [ + # { + # "type": "create_attribute", + # "description": "create sub_foo/my_data/attr2 and set it to [1] (int16)", + # "object_id": sub_foo.object_id, + # "relative_path": "my_data/attr2", + # "value": [1], + # "dtype": "int16" + # }, + # { + # "type": "delete", + # "description": "delete foo1/my_data/attr2", + # "object_id": foo1.object_id, + # "relative_path": "my_data/attr2", + # }, + # { + # "type": "change_dtype", + # "description": "change dtype of foo1/my_data from int16 to int8", + # "object_id": foo1.object_id, + # "relative_path": "my_data", + # "dtype": "int8" + # } + # ] + # + # sidecar = dict() + # sidecar["operations"] = operations + # sidecar["schema_version"] = "0.1.0" + # + # self.json_path = "./tests/unit/io_tests/test_sidecar.json" + # with open(self.json_path, 'w') as outfile: + # json.dump(sidecar, outfile, indent=4) + + def tearDown(self): + if os.path.exists(self.h5_path): + os.remove(self.h5_path) + if os.path.exists(self.json_path): + os.remove(self.json_path) + + def _write_test_sidecar(self, operations): + sidecar = dict() + sidecar["operations"] = operations + sidecar["schema_version"] = "0.1.0" + + self.json_path = "./tests/unit/io_tests/test_sidecar.json" + with open(self.json_path, 'w') as outfile: + json.dump(sidecar, outfile, indent=4) + def test_replace_typed_group_dataset_data(self): + """Test replacing the data of a dataset in a typed group.""" operations = [ { "type": "replace", - "description": "change foo1/attr1 from 'old' to 'my experiment' (same dtype)", - "object_id": foo1.object_id, - "relative_path": "attr1", - "value": "my experiment" + "description": "change foo1/my_data data from [1, 2, 3] to [4, 5] (int8)", + "object_id": self.foo1.object_id, + "relative_path": "my_data", + "value": [4, 5], + "dtype": "int8" }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + # TODO make sure this checks dtype + np.testing.assert_array_equal(read_foo1.my_data, np.array([4, 5], dtype=np.dtype('int8'))) + + def test_replace_subgroup_dataset_data(self): + """Test replacing the data of a dataset in a subgroup of a typed group.""" + operations = [ { "type": "replace", - "description": "change foo1/my_data from [1, 2, 3] to [4, 5] (int16)", - "object_id": foo1.object_id, - "relative_path": "my_data", + "description": "change foo1/foo_holder_foos/my_sub_data data from [1, 2, 3] to [4, 5] (int8)", + "object_id": self.foo1.object_id, + "relative_path": "foo_holder/my_sub_data", "value": [4, 5], - "dtype": "int16" + "dtype": "int8" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + # TODO make sure this checks dtype + np.testing.assert_array_equal(read_foo1.my_sub_data, np.array([4, 5], dtype=np.dtype('int8'))) + + def test_replace_typed_dataset_data(self): + """Test replacing the data of a typed dataset.""" + operations = [ + { + "type": "replace", + "description": "change my_foo_data data from [0, 1] to [2, 3] (int8)", + "object_id": self.foo_data.object_id, + "relative_path": "", + "value": [2, 3], + "dtype": "int8" }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + # TODO make sure this checks dtype + np.testing.assert_array_equal(read_foo1.my_foo_data.data, np.array([2, 3], dtype=np.dtype('int8'))) + + def test_replace_typed_group_attribute(self): + """Test replacing the attribute of a typed group.""" + operations = [ { "type": "replace", - "description": "change sub_foo/my_data from [-1, -2, -3] to [[0]] (same dtype)", - "object_id": sub_foo.object_id, - "relative_path": "my_data", - "value": [[0]] + "description": "change foo1/attr1 from 'old' to 'new' (same dtype)", + "object_id": self.foo1.object_id, + "relative_path": "attr1", + "value": "new" }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.attr1 == "new" + + def test_replace_subgroup_attribute(self): + """Test replacing the attribute of an untyped group in a typed group.""" + operations = [ { - "type": "create_attribute", - "description": "create sub_foo/my_data/attr2 and set it to [1] (int16)", - "object_id": sub_foo.object_id, - "relative_path": "my_data/attr2", - "value": [1], - "dtype": "int16" + "type": "replace", + "description": "change foo1/foo_holder/attr4 from [1] to [2] (int8)", + "object_id": self.foo1.object_id, + "relative_path": "foo_holder/attr4", + "value": [2], + "dtype": "int8" }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + # TODO make sure this checks dtype + np.testing.assert_array_equal(read_foo1.attr4, np.array([2], dtype=np.dtype('int8'))) + + def test_replace_typed_group_dataset_attribute(self): + """Test replacing the attribute of an untyped dataset in a typed group.""" + operations = [ { - "type": "delete", - "description": "delete foo1/my_data/attr2", - "object_id": foo1.object_id, + "type": "replace", + "description": "change foo1/my_data/attr2 from [1] to [2] (int8)", + "object_id": self.foo1.object_id, "relative_path": "my_data/attr2", + "value": [2], + "dtype": "int8" }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + # TODO make sure this checks dtype + np.testing.assert_array_equal(read_foo1.attr2, np.array([2], dtype=np.dtype('int8'))) + + def test_replace_subgroup_dataset_attribute(self): + """Test replacing the attribute of an untyped dataset in an untyped group in a typed group.""" + operations = [ { - "type": "change_dtype", - "description": "change dtype of foo1/my_data from int16 to int8", - "object_id": foo1.object_id, - "relative_path": "my_data", + "type": "replace", + "description": "change foo1/foo_holder/my_sub_data/attr5 from [1] to [2] (int8)", + "object_id": self.foo1.object_id, + "relative_path": "foo_holder/my_sub_data/attr5", + "value": [2], + "dtype": "int8" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + # TODO make sure this checks dtype + np.testing.assert_array_equal(read_foo1.attr5, np.array([2], dtype=np.dtype('int8'))) + + def test_replace_typed_dataset_attribute(self): + """Test replacing the attribute of a typed dataset.""" + operations = [ + { + "type": "replace", + "description": "change foo1/my_foo_data/data_attr1 from 1 to 2 (int8)", + "object_id": self.foo_data.object_id, + "relative_path": "data_attr1", + "value": 2, "dtype": "int8" - } + }, ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.my_foo_data.data_attr1 == 2 - sidecar = dict() - sidecar["operations"] = operations - sidecar["schema_version"] = "0.1.0" + def test_delete_typed_group_req_attribute(self): + pass - self.json_path = "./tests/unit/io_tests/test_sidecar.json" - with open(self.json_path, 'w') as outfile: - json.dump(sidecar, outfile, indent=4) + def test_delete_typed_group_opt_attribute(self): + pass - def tearDown(self): - if os.path.exists(self.h5_path): - os.remove(self.h5_path) - if os.path.exists(self.json_path): - os.remove(self.json_path) + def test_delete_subgroup_req_attribute(self): + pass - def test_update_builder(self): - with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: - foo1 = io.read() - assert foo1.attr1 == "my experiment" - assert isinstance(foo1.my_data, np.ndarray) - np.testing.assert_array_equal(foo1.my_data, np.array([4, 5], dtype=np.dtype('int8'))) # TODO make sure this checks dtype - np.testing.assert_array_equal(foo1.sub_foo.my_data, np.array([[0]])) - np.testing.assert_array_equal(foo1.sub_foo.attr2, np.array([1], dtype=np.dtype('int16'))) - assert foo1.attr2 is None + def test_delete_subgroup_opt_attribute(self): + pass + + def test_delete_typed_group_dataset_req_attribute(self): + pass + + def test_delete_typed_group_dataset_opt_attribute(self): + pass + + def test_delete_subgroup_dataset_req_attribute(self): + pass + + def test_delete_subgroup_dataset_opt_attribute(self): + pass + + def test_delete_dataset_req_attribute(self): + pass + + def test_delete_dataset_opt_attribute(self): + pass + + def test_cannot_delete_group(self): + pass + + def test_cannot_delete_dataset(self): + pass class TestFailValidation(TestCase): def setUp(self): self.h5_path = "./tests/unit/io_tests/test_sidecar_fail.h5" - foo2 = Foo(name='sub_foo', my_data=[-1, -2, -3], attr1='OLD') - foo1 = Foo(name='foo1', my_data=[1, 2, 3], attr1='old', attr2=[17], sub_foo=foo2) + foo1 = Foo(name='foo1', my_data=[1, 2, 3], my_sub_data=[1, 2, 3], attr1='old', attr2=[17]) with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io: io.write(foo1) @@ -123,52 +281,118 @@ class Foo(Container): @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, {'name': 'my_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D or 2-D integer dataset', 'shape': ((None, ), (None, None))}, + {'name': 'my_sub_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D or 2-D integer dataset', + 'shape': ((None, ), (None, None))}, {'name': 'attr1', 'type': str, 'doc': 'a string attribute'}, + {'name': 'optional_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer dataset', 'default': None}, {'name': 'attr2', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'shape': (None, ), 'default': None}, - {'name': 'opt_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer dataset', 'default': None}, {'name': 'attr3', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, - {'name': 'sub_foo', 'type': 'Foo', 'doc': 'a child Foo', 'default': None}) + {'name': 'attr4', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, + {'name': 'attr5', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, + {'name': 'sub_foo', 'type': 'Foo', 'doc': 'a child Foo', 'default': None}, + {'name': 'my_foo_data', 'type': 'FooData', 'doc': 'a child Foo', 'default': None}, + {'name': 'foo_holder_foos', 'type': ('array_data', 'data'), 'doc': 'child Foos', 'default': None}, + {'name': 'foo_holder_foo_data', 'type': ('array_data', 'data'), 'doc': 'child FooData', 'default': None}) def __init__(self, **kwargs): - name, my_data, attr1, attr2, opt_data, attr3, sub_foo = getargs('name', 'my_data', 'attr1', 'attr2', 'opt_data', - 'attr3', 'sub_foo', kwargs) + name, my_data, my_sub_data, attr1, = getargs('name', 'my_data', 'my_sub_data', 'attr1', kwargs) + optional_data, attr2, attr3, attr4, attr5 = getargs('optional_data', 'attr2', 'attr3', 'attr4', 'attr5', kwargs) + sub_foo, my_foo_data = getargs('sub_foo', 'my_foo_data', kwargs) + foo_holder_foos, foo_holder_foo_data = getargs('foo_holder_foos', 'foo_holder_foo_data', kwargs) super().__init__(name=name) - self.__data = my_data - self.__attr1 = attr1 - self.__attr2 = attr2 - self.__opt_data = opt_data - self.__attr3 = attr3 - self.__sub_foo = sub_foo + self.my_data = my_data + self.my_sub_data = my_sub_data + self.attr1 = attr1 + self.attr2 = attr2 + self.attr3 = attr3 + self.attr4 = attr4 + self.attr5 = attr5 + self.optional_data = optional_data + self.sub_foo = sub_foo if sub_foo is not None: assert sub_foo.name == 'sub_foo' # on read mapping will not work otherwise - self.__sub_foo.parent = self + self.sub_foo.parent = self + self.my_foo_data = my_foo_data + if my_foo_data is not None: + assert my_foo_data.name == 'my_foo_data' # on read mapping will not work otherwise + self.my_foo_data.parent = self + self.foo_holder_foos = foo_holder_foos + if foo_holder_foos is not None: + for f in foo_holder_foos: + f.parent = self + self.foo_holder_foo_data = foo_holder_foo_data + if foo_holder_foo_data is not None: + for f in foo_holder_foo_data: + f.parent = self - @property - def my_data(self): - return self.__data - @property - def attr1(self): - return self.__attr1 +class FooData(Data): - @property - def attr2(self): - return self.__attr2 - - @property - def opt_data(self): - return self.__opt_data + @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, + {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a 1-D or 2-D integer dataset', + 'shape': ((None, ), (None, None))}, + {'name': 'data_attr1', 'type': int, 'doc': 'an integer attribute'}, + {'name': 'data_attr2', 'type': ('array_data', 'data'), 'doc': 'a 1-D text attribute', 'shape': (None, ), + 'default': None}) + def __init__(self, **kwargs): + name, data, data_attr1, data_attr2 = getargs('name', 'data', 'data_attr1', 'data_attr2', kwargs) + super().__init__(name=name, data=data) + self.data_attr1 = data_attr1 + self.data_attr2 = data_attr2 - @property - def attr3(self): - return self.__attr3 - @property - def sub_foo(self): - return self.__sub_foo +def _get_manager(): + # Foo (group with data_type) has: + # - groups: + # - sub_foo (Foo), 0 or 1 + # - foo_holder (untyped), required + # - groups: + # - (Foo), 0 to many, remapped to foo_holder_foos + # - datasets: + # - my_sub_data (int, 1-D or 2-D), required + # - attributes: + # - attr5 (int, 1-D), optional, remapped to attr5 + # - (FooData), 0 to many, remapped to foo_holder_foo_data + # - attributes: + # - attr4 (int, 1-D), optional, remapped to attr4 + # - datasets: + # - my_data (int, 1-D or 2-D), required + # - attributes: + # - attr2 (int, 1-D), optional, remapped to attr2 + # - opt_data (int, 1-D or 2-D), 0 or 1, remapped to optional_data + # - attributes: + # - attr3 (int, 1-D), optional, remapped to attr3 + # - my_foo_data (FooData), optional + # - attributes: + # - attr1 (string, scalar), required + # + # FooData (dataset with data_type) has: + # - int, 1D + # - attributes: + # - data_attr1 (int, scalar), required + # - data_attr2 (text, 1-D), optional + foo_data_spec = DatasetSpec( + doc='A test dataset specification with a data type', + data_type_def='FooData', + dtype='int', + shape=[None, ], + attributes=[ + AttributeSpec( + name='data_attr1', + doc='a scalar integer attribute', + dtype='int', + ), + AttributeSpec( + name='data_attr2', + doc='a 1-D text attribute', + dtype='text', + shape=[None, ], + required=False + ) + ] + ) -def _get_manager(): foo_spec = GroupSpec( doc='A test group specification with a data type', data_type_def='Foo', @@ -178,6 +402,49 @@ def _get_manager(): data_type_inc='Foo', name='sub_foo', quantity='?', + ), + GroupSpec( + doc='an untyped group of Foos', + name='foo_holder', + quantity='?', + groups=[ + GroupSpec( + doc='child Foos', + data_type_inc='Foo', + quantity='*', + ) + ], + datasets=[ + DatasetSpec( + doc='a 1-D or 2-D integer dataset', + dtype='int', + name='my_sub_data', + shape=[[None, ], [None, None]], + attributes=[ + AttributeSpec( + name='attr5', + doc='a 1-D integer attribute', + dtype='int', + shape=[None, ], + required=False + ) + ] + ), + DatasetSpec( + doc='child FooData', + data_type_inc='FooData', + quantity='*', + ) + ], + attributes=[ + AttributeSpec( + name='attr4', + doc='a 1-D integer attribute', + dtype='int', + shape=[None, ], + required=False + ) + ] ) ], datasets=[ @@ -211,6 +478,12 @@ def _get_manager(): required=False ) ] + ), + DatasetSpec( + doc='child FooData', + name='my_foo_data', + data_type_inc='FooData', + quantity='?', ) ], attributes=[ @@ -219,8 +492,10 @@ def _get_manager(): ) class FooMapper(ObjectMapper): - """Remap 'attr2' attribute on Foo container to 'my_data' dataset spec > 'attr2' attribute spec and - remap 'attr3' attribute on Foo container to 'opt_data' dataset spec > 'attr3' attribute spec. + """Remap spec fields to Container attributes. + - 'attr2' attribute on Foo container to 'my_data' dataset spec > 'attr2' attribute spec + - 'attr3' attribute on Foo container to 'opt_data' dataset spec > 'attr3' attribute spec. + - TODO fill me in """ def __init__(self, spec): super().__init__(spec) @@ -228,9 +503,18 @@ def __init__(self, spec): self.map_spec('attr2', my_data_spec.get_attribute('attr2')) opt_data_spec = spec.get_dataset('opt_data') self.map_spec('attr3', opt_data_spec.get_attribute('attr3')) + self.map_spec('optional_data', opt_data_spec) + foo_holder_spec = spec.get_group('foo_holder') + self.map_spec('attr4', foo_holder_spec.get_attribute('attr4')) + self.map_spec('foo_holder_foos', foo_holder_spec.get_data_type('Foo')) + self.map_spec('foo_holder_foo_data', foo_holder_spec.get_data_type('FooData')) + my_sub_data_spec = foo_holder_spec.get_dataset('my_sub_data') + self.map_spec('my_sub_data', my_sub_data_spec) + self.map_spec('attr5', my_sub_data_spec.get_attribute('attr5')) spec_catalog = SpecCatalog() spec_catalog.register_spec(foo_spec, 'test.yaml') + spec_catalog.register_spec(foo_data_spec, 'test.yaml') namespace_name = 'test_core' namespace = SpecNamespace( doc='a test namespace', @@ -243,6 +527,7 @@ def __init__(self, spec): namespace_catalog.add_namespace(namespace_name, namespace) type_map = TypeMap(namespace_catalog) type_map.register_container_type(namespace_name, 'Foo', Foo) + type_map.register_container_type(namespace_name, 'FooData', FooData) type_map.register_map(Foo, FooMapper) manager = BuildManager(type_map) return manager From 2fda06d73cc3659462e742bebc105b5e69af7fd7 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 8 Dec 2021 10:09:36 -0800 Subject: [PATCH 11/19] Add description, author, and contact to sidecar JSON, fix tests --- sidecar.schema.json | 15 +++++++++++++++ tests/unit/io_tests/test_sidecar.py | 5 ++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/sidecar.schema.json b/sidecar.schema.json index 850f47de0..5010bfbf3 100644 --- a/sidecar.schema.json +++ b/sidecar.schema.json @@ -7,10 +7,25 @@ "type": "object", "additionalProperties": false, "required": [ + "description", + "author", + "contact", "operations", "schema_version" ], "properties": { + "description": {"type": "string"}, + "author": { + "type": "array", + "items": {"type": "string"} + }, + "contact": { + "type": "array", + "items": { + "type": "string", + "pattern": "^.*@.*$" + } + }, "operations": { "type": "array", "items": { diff --git a/tests/unit/io_tests/test_sidecar.py b/tests/unit/io_tests/test_sidecar.py index 089013647..8aae8d65c 100644 --- a/tests/unit/io_tests/test_sidecar.py +++ b/tests/unit/io_tests/test_sidecar.py @@ -14,6 +14,7 @@ class TestBasic(TestCase): def setUp(self): + self.json_path = "./tests/unit/io_tests/test_sidecar.json" self.h5_path = "./tests/unit/io_tests/test_sidecar.h5" self.foo_data1 = FooData(name='foodata1', data=[1], data_attr1=2, data_attr2=['a']) self.foo2 = Foo(name='foo2', my_data=[1, 2, 3], my_sub_data=[1, 2, 3], attr1='old') @@ -65,10 +66,12 @@ def tearDown(self): def _write_test_sidecar(self, operations): sidecar = dict() + sidecar["description"] = "Summary of changes" + sidecar["author"] = ["The NWB Team"] + sidecar["contact"] = ["contact@nwb.org"] sidecar["operations"] = operations sidecar["schema_version"] = "0.1.0" - self.json_path = "./tests/unit/io_tests/test_sidecar.json" with open(self.json_path, 'w') as outfile: json.dump(sidecar, outfile, indent=4) From 729e9893ea482142c84ae4fdb0c4750bcdadd388 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 12 Apr 2022 01:08:55 -0700 Subject: [PATCH 12/19] Update documentation, refactor, and add test cases --- docs/source/sidecar.rst | 110 ++++++++++ sidecar.schema.json | 50 +++-- src/hdmf/backends/builderupdater.py | 55 +++-- src/hdmf/build/builders.py | 31 +++ src/hdmf/build/errors.py | 8 + tests/unit/io_tests/test_sidecar.py | 316 +++++++++++++++++++--------- 6 files changed, 443 insertions(+), 127 deletions(-) create mode 100644 docs/source/sidecar.rst diff --git a/docs/source/sidecar.rst b/docs/source/sidecar.rst new file mode 100644 index 000000000..523596328 --- /dev/null +++ b/docs/source/sidecar.rst @@ -0,0 +1,110 @@ +.. _modifying_with_sidecar: + +Modifying an HDMF file with a sidecar JSON file +=============================================== + +Users may want to update part of an HDMF file without rewriting the entire file. +To do so, HDMF supports the use of a "sidecar" JSON file that lives adjacent to the HDMF file on disk and +specifies modifications to the HDMF file. Only a limited set of modifications are supported; for example, users can +delete a dataset or attribute but cannot create a new dataset or attribute. +When HDMF reads an HDMF file, if the corresponding sidecar JSON file exists, it is +automatically read and the modifications that it specifies are automatically applied. + +.. note:: + + This default behavior can be changed such that the corresponding sidecar JSON file is ignored when the HDMF file + is read by passing ``load_sidecar=False`` to the instance of `HDMFIO` used to read the HDMF file. + +Allowed modifications +--------------------- + +Only the following modifications to an HDMF file are supported in the sidecar JSON file: + +- Replace the values of a dataset or attribute with a scalar or 1-D array +- Delete a dataset or attribute + +.. note:: + + Replacing the values of a dataset or attribute with a very large 1-D array using the sidecar JSON file may not + be efficient and is discouraged. Users should instead consider rewriting the HDMF file with the + updated values. + +Specification for the sidecar JSON file +--------------------------------------- + +The sidecar JSON file can be validated using the ``sidecar.schema.json`` JSON schema file +located at the root of the HDMF repository. + +The sidecar JSON file must contain the following top-level keys: + +- "description": A free-form string describing the modifications specified in this file. +- "author": A list of free-form strings containing the names of the people who created this file. +- "contact": A list of email addresses for the people who created this file. Each author listed in the "author" key + *should* have a corresponding email address. +- "operations": A list of operations to perform on the data in the file, as specified below. +- "schema_version": The version of the sidecar JSON schema that the file conforms to. + +Specification for operations +---------------------------- + +All operations are required to have the following keys: + +- "type": The type of modification to perform. Only "replace" and "delete" are supported currently. +- "description": A description of the specified modification. +- "object_id": The object ID (UUID) of the data type that is closest in the file hierarchy to the + field being modified. +- "relative_path": The relative path from the data type with the given object ID to the field being modified. + +Operations can result in invalid files, i.e., files that do not conform to the specification. It is strongly +recommended that the file is validated against the schema after loading the sidecar JSON. In some cases, the +file cannot be read because the file is invalid. + +Replacing values of a dataset/attribute with a scalar or 1-D array +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Specify ``"type": "replace"`` to replace the values of a dataset/attribute from the associated HDMF file +as specified by the ``object_id`` and ``relative_path``. + +The operation specification must have the following keys: + +- "value": The new value for the dataset/attribute. Only scalar and 1-dimensional arrays can be + specified as a replacement value. + +The operation specification may also have the following keys: + +- "dtype": String representing the dtype of the new value. If this key is not present, then the dtype of the + existing value for the dataset/attribute is used. Allowed dtypes are listed in the + `HDMF schema language docs for dtype `_. + +.. note:: + + Replacing the values of datasets or attributes with object references or a compound data type is not yet supported. + +Deleting a dataset/attribute +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Specify ``"type": "delete"`` to delete (ignore) a dataset/attribute from the associated HDMF file +as specified by the ``object_id`` and ``relative_path``. + +The operation specification does not use any additional keys. + +Future changes +-------------- + +The HDMF team is considering supporting additional operations and expanding support for current operations +specified in the sidecar JSON file, such as: + +- Add rows to a ``DynamicTable`` (column-based) +- Add rows to a ``Table`` (row-based) +- Add a new group +- Add a new dataset +- Add a new attribute +- Add a new link +- Replace a dataset or attribute with object references +- Replace a dataset or attribute with a compound data type +- Replace selected slices of a dataset or attribute +- Delete a group +- Delete a link + +Please provide feedback on which operations are useful to you for HDMF to support in this +`issue ticket `_. diff --git a/sidecar.schema.json b/sidecar.schema.json index 5010bfbf3..fcb2e254d 100644 --- a/sidecar.schema.json +++ b/sidecar.schema.json @@ -14,12 +14,17 @@ "schema_version" ], "properties": { - "description": {"type": "string"}, + "description": { + "description": "A free-form string describing the modifications specified in this file.", + "type": "string" + }, "author": { + "description": "A list of free-form strings containing the names of the people who created this file.", "type": "array", "items": {"type": "string"} }, "contact": { + "description": "A list of email addresses for the people who created this file. Each author listed in the 'author' key *should* have a corresponding email address.", "type": "array", "items": { "type": "string", @@ -27,6 +32,7 @@ } }, "operations": { + "description": "A list of operations to perform on the data in the file.", "type": "array", "items": { "type": "object", @@ -39,16 +45,24 @@ ], "properties": { "type": { - "description": "The type of modification to perform on the field of a neurodata type.", - "member_region": { "type": ["replace", "delete", "create_attribute", "change_dtype"] } + "description": "The type of modification to perform.", + "member_region": { + "type": ["replace", "delete"] + } + }, + "description": { + "description": "A description of the specified modification.", + "type": "string" }, - "description": {"type": "string"}, "object_id": { - "description": "Object ID. Must conform to UUID-4 with dashes", + "description": "The object ID (UUID) of the data type that is closest in the file hierarchy to the field being modified. Must be in the UUID-4 format with hyphens.", "type": "string", "pattern": "^[0-9a-f]{8}\\-[0-9a-f]{4}\\-4[0-9a-f]{3}\\-[89ab][0-9a-f]{3}\\-[0-9a-f]{12}$" }, - "relative_path": {"type": "string"}, + "relative_path": { + "description": " The relative path from the data type with the given object ID to the field being modified.", + "type": "string" + }, "element_type": { "anyOf": [ { @@ -62,14 +76,16 @@ ] }, "value": { - "description": "New value.", - "member_region": { "type": ["array", "string", "number", "boolean", "null"] } + "description": "The new value for the dataset/attribute.", + "member_region": { + "type": ["array", "string", "number", "boolean", "null"] + } }, "dtype": {"$ref": "#/definitions/dtype"} }, "allOf": [ { - "description": "if type==replace, then value is required", + "description": "if type==replace, then value is required.", "if": { "properties": { "type": { "const": "replace" } } }, @@ -78,7 +94,7 @@ } }, { - "description": "if type==delete, then value and dtype are not allowed", + "description": "if type==delete, then value and dtype are not allowed.", "if": { "properties": { "type": { "const": "delete" } } }, @@ -90,7 +106,7 @@ } }, { - "description": "if type==create, then element_type is required", + "description": "if type==create, then element_type is required.", "if": { "properties": { "type": { "const": "create" } } }, @@ -102,16 +118,12 @@ } }, "schema_version": { - "description": "The version of the sidecar JSON schema that the file conforms to. Must confirm to Semantic Versioning v2.0", + "description": "The version of the sidecar JSON schema that the file conforms to. Must confirm to Semantic Versioning v2.0.", "type": "string", "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" } }, "definitions": { - "protectedString": { - "type": "string", - "pattern": "^[A-Za-z_][A-Za-z0-9_]*$" - }, "dtype": { "anyOf": [ {"$ref": "#/definitions/flat_dtype"}, @@ -119,7 +131,7 @@ ] }, "flat_dtype": { - "description": "Required string describing the data type of the attribute", + "description": "String describing the data type of the dataset or attribute.", "anyOf": [ { "type": "string", @@ -156,11 +168,11 @@ "required": ["target_type", "reftype"], "properties": { "target_type": { - "description": "Describes the neurodata_type of the target that the reference points to", + "description": "Describes the data_type of the target that the reference points to", "type": "string" }, "reftype": { - "description": "describes the kind of reference", + "description": "Describes the kind of reference", "type": "string", "enum": ["ref", "reference", "object", "region"] } diff --git a/src/hdmf/backends/builderupdater.py b/src/hdmf/backends/builderupdater.py index dfe417dcf..03dd20aeb 100644 --- a/src/hdmf/backends/builderupdater.py +++ b/src/hdmf/backends/builderupdater.py @@ -111,13 +111,15 @@ def update_from_sidecar_json(cls, **kwargs): builder = builder_map[object_id] # TODO handle paths to links # TODO handle object references - if operation_type == 'replace' or operation_type == 'delete': + if operation_type == 'replace': cls.__replace(builder, relative_path, new_value, new_dtype) - elif operation_type == 'change_dtype': - assert new_value is None - cls.__change_dtype(builder, relative_path, new_dtype) - elif operation_type == 'create_attribute': - cls.__create_attribute(builder, relative_path, new_value, new_dtype) + elif operation_type == 'delete': + cls.__delete(builder, relative_path) + # elif operation_type == 'change_dtype': + # assert new_value is None + # cls.__change_dtype(builder, relative_path, new_dtype) + # elif operation_type == 'create_attribute': + # cls.__create_attribute(builder, relative_path, new_value, new_dtype) else: raise ValueError("Operation type: '%s' not supported." % operation_type) @@ -127,22 +129,43 @@ def update_from_sidecar_json(cls, **kwargs): def __replace(cls, builder, relative_path, new_value, new_dtype): if relative_path in builder.attributes: cls.__replace_attribute(builder, relative_path, new_value, new_dtype) - elif isinstance(builder, GroupBuilder): # GroupBuilder has object_id - sub_dset_builder, attr_name = builder.get_subbuilder(relative_path) - if sub_dset_builder is None: + elif isinstance(builder, GroupBuilder): # object_id points to GroupBuilder + sub_builder, attr_name = builder.get_subbuilder(relative_path) + if sub_builder is None: raise ValueError("Relative path '%s' not recognized as a group, dataset, or attribute." % relative_path) if attr_name is None: - cls.__replace_dataset_data(sub_dset_builder, new_value, new_dtype) + cls.__replace_dataset_data(sub_builder, new_value, new_dtype) else: - cls.__replace_attribute(sub_dset_builder, attr_name, new_value, new_dtype) - else: # DatasetBuilder has object_id + cls.__replace_attribute(sub_builder, attr_name, new_value, new_dtype) + else: # object_id points to DatasetBuilder if not relative_path: cls.__replace_dataset_data(builder, new_value, new_dtype) else: raise ValueError("Relative path '%s' not recognized as None or attribute." % relative_path) @classmethod + def __delete(cls, builder, relative_path): + if relative_path in builder.attributes: # relative_path is name of attribute in GroupBuilder/DatasetBuilder + cls.__delete_attribute(builder, relative_path) + elif isinstance(builder, GroupBuilder): # object_id points to GroupBuilder + sub_builder, attr_name = builder.get_subbuilder(relative_path) + if sub_builder is None: + raise ValueError("Relative path '%s' not recognized as a group, dataset, or attribute." + % relative_path) + if attr_name is None: + # delete the DatasetBuilder from its parent + cls.__delete_builder(sub_builder.parent, sub_builder) + else: + cls.__delete_attribute(sub_builder, attr_name) # delete the attribute from the sub-Builder + else: # object_id points to DatasetBuilder + if not relative_path: + cls.__delete_builder(builder.parent, builder) # delete the DatasetBuilder from its parent + else: + raise ValueError("Relative path '%s' not recognized as None or attribute." % relative_path) + + @classmethod + # NOTE this function is currently unused def __change_dtype(cls, builder, relative_path, new_dtype): if relative_path in builder.attributes: cls.__change_dtype_attribute(builder, relative_path, new_dtype) @@ -191,6 +214,14 @@ def __replace_attribute(cls, builder, attr_name, value, dtype): new_value = cls.convert_dtype(value, dtype, builder.attributes[attr_name]) builder.attributes[attr_name] = new_value + @classmethod + def __delete_attribute(cls, builder, attr_name): + builder.remove_attribute(attr_name) + + @classmethod + def __delete_builder(cls, parent_builder, child_builder): + parent_builder.remove_child(child_builder) + @classmethod def __change_dtype_dataset_data(cls, dset_builder, dtype): new_value = cls.convert_dtype(dset_builder['data'], dtype, dset_builder['data']) diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index 9487d73d4..603369515 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -68,6 +68,13 @@ def parent(self, p): if self.__source is None: self.source = p.source + def reset_parent(self): + """Reset the parent of this Container to None. + + Use with caution. This can result in orphaned builders and broken links. Does not reset source. + """ + self.__parent = None + def __repr__(self): ret = "%s %s %s" % (self.path, self.__class__.__name__, super().__repr__()) return ret @@ -111,6 +118,10 @@ def set_attribute(self, **kwargs): name, value = getargs('name', 'value', kwargs) self.attributes[name] = value + def remove_attribute(self, name): + """Remove a child attribute from this builder. Use with caution. Intended for internal use.""" + del self.attributes[name] + class GroupBuilder(BaseBuilder): # sub-dictionary keys. subgroups go in super().__getitem__(GroupBuilder.__group) @@ -244,6 +255,26 @@ def __set_builder(self, builder, obj_type): if builder.parent is None: builder.parent = self + def remove_attribute(self, name): + """Remove a child attribute from this builder. Use with caution. Intended for internal use.""" + del self.obj_type[name] + super().remove_attribute(name) + + def remove_child(self, child): + """Remove a child builder from this builder. Use with caution. Intended for internal use.""" + if isinstance(child, GroupBuilder): + obj_type = GroupBuilder.__group + elif isinstance(child, DatasetBuilder): + obj_type = GroupBuilder.__dataset + elif isinstance(child, LinkBuilder): + obj_type = GroupBuilder.__link + else: # pragma: no cover + raise ValueError("child is expected to be a GroupBuilder, DatasetBuilder, or LinkBuilder, not a %s" + % type(child)) + del super().__getitem__(obj_type)[child.name] + del self.obj_type[child.name] + child.reset_parent() + def is_empty(self): """Returns true if there are no datasets, links, attributes, and non-empty subgroups. False otherwise.""" if len(self.datasets) or len(self.links) or len(self.attributes): diff --git a/src/hdmf/build/errors.py b/src/hdmf/build/errors.py index ec31ef5ba..66f3b63d6 100644 --- a/src/hdmf/build/errors.py +++ b/src/hdmf/build/errors.py @@ -47,3 +47,11 @@ class ContainerConfigurationError(Exception): class ConstructError(Exception): """Error raised when constructing a container from a builder.""" + + @docval({'name': 'builder', 'type': Builder, 'doc': 'the builder that cannot be constructed'}, + {'name': 'reason', 'type': str, 'doc': 'the reason for the error'}) + def __init__(self, **kwargs): + self.__builder = getargs('builder', kwargs) + self.__reason = getargs('reason', kwargs) + self.__message = "%s (%s): %s" % (self.__builder.name, self.__builder.path, self.__reason) + super().__init__(self.__message) diff --git a/tests/unit/io_tests/test_sidecar.py b/tests/unit/io_tests/test_sidecar.py index 8aae8d65c..177d670e2 100644 --- a/tests/unit/io_tests/test_sidecar.py +++ b/tests/unit/io_tests/test_sidecar.py @@ -5,7 +5,7 @@ from hdmf import Container, Data from hdmf.backends.hdf5.h5tools import HDF5IO from hdmf.backends import SidecarValidationError -from hdmf.build import BuildManager, TypeMap, ObjectMapper +from hdmf.build import BuildManager, TypeMap, ObjectMapper, ConstructError from hdmf.spec import AttributeSpec, DatasetSpec, GroupSpec, SpecCatalog, SpecNamespace, NamespaceCatalog from hdmf.testing import TestCase from hdmf.utils import getargs, docval @@ -20,44 +20,12 @@ def setUp(self): self.foo2 = Foo(name='foo2', my_data=[1, 2, 3], my_sub_data=[1, 2, 3], attr1='old') self.sub_foo = Foo(name='sub_foo', my_data=[-1, -2, -3], my_sub_data=[-1, -2, -3], attr1='OLD') self.foo_data = FooData(name='my_foo_data', data=[0, 1], data_attr1=1, data_attr2=['a']) - self.foo1 = Foo(name='foo1', my_data=[1, 2, 3], my_sub_data=[1, 2, 3], attr1='old', attr2=[17], + self.foo1 = Foo(name='foo1', my_data=[1, 2, 3], my_sub_data=[1, 2, 3], attr1='old', attr2='old', attr3=[17], sub_foo=self.sub_foo, foo_holder_foos=[self.foo2], my_foo_data=self.foo_data, - attr3=[1], attr4=[1], attr5=[1]) + attr4=[1], attr5=[1], attr6=[1], optional_data=[1, 2, 3]) with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io: io.write(self.foo1) - # operations = [ - # { - # "type": "create_attribute", - # "description": "create sub_foo/my_data/attr2 and set it to [1] (int16)", - # "object_id": sub_foo.object_id, - # "relative_path": "my_data/attr2", - # "value": [1], - # "dtype": "int16" - # }, - # { - # "type": "delete", - # "description": "delete foo1/my_data/attr2", - # "object_id": foo1.object_id, - # "relative_path": "my_data/attr2", - # }, - # { - # "type": "change_dtype", - # "description": "change dtype of foo1/my_data from int16 to int8", - # "object_id": foo1.object_id, - # "relative_path": "my_data", - # "dtype": "int8" - # } - # ] - # - # sidecar = dict() - # sidecar["operations"] = operations - # sidecar["schema_version"] = "0.1.0" - # - # self.json_path = "./tests/unit/io_tests/test_sidecar.json" - # with open(self.json_path, 'w') as outfile: - # json.dump(sidecar, outfile, indent=4) - def tearDown(self): if os.path.exists(self.h5_path): os.remove(self.h5_path) @@ -90,8 +58,8 @@ def test_replace_typed_group_dataset_data(self): self._write_test_sidecar(operations) with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: read_foo1 = io.read() - # TODO make sure this checks dtype - np.testing.assert_array_equal(read_foo1.my_data, np.array([4, 5], dtype=np.dtype('int8'))) + np.testing.assert_array_equal(read_foo1.my_data, np.array([4, 5])) + assert read_foo1.my_data.dtype is np.dtype('int8') def test_replace_subgroup_dataset_data(self): """Test replacing the data of a dataset in a subgroup of a typed group.""" @@ -108,8 +76,8 @@ def test_replace_subgroup_dataset_data(self): self._write_test_sidecar(operations) with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: read_foo1 = io.read() - # TODO make sure this checks dtype - np.testing.assert_array_equal(read_foo1.my_sub_data, np.array([4, 5], dtype=np.dtype('int8'))) + np.testing.assert_array_equal(read_foo1.my_sub_data, np.array([4, 5])) + assert read_foo1.my_sub_data.dtype is np.dtype('int8') def test_replace_typed_dataset_data(self): """Test replacing the data of a typed dataset.""" @@ -126,8 +94,8 @@ def test_replace_typed_dataset_data(self): self._write_test_sidecar(operations) with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: read_foo1 = io.read() - # TODO make sure this checks dtype - np.testing.assert_array_equal(read_foo1.my_foo_data.data, np.array([2, 3], dtype=np.dtype('int8'))) + np.testing.assert_array_equal(read_foo1.my_foo_data.data, np.array([2, 3])) + assert read_foo1.my_foo_data.data.dtype is np.dtype('int8') def test_replace_typed_group_attribute(self): """Test replacing the attribute of a typed group.""" @@ -150,9 +118,9 @@ def test_replace_subgroup_attribute(self): operations = [ { "type": "replace", - "description": "change foo1/foo_holder/attr4 from [1] to [2] (int8)", + "description": "change foo1/foo_holder/attr5 from [1] to [2] (int8)", "object_id": self.foo1.object_id, - "relative_path": "foo_holder/attr4", + "relative_path": "foo_holder/attr5", "value": [2], "dtype": "int8" }, @@ -160,17 +128,17 @@ def test_replace_subgroup_attribute(self): self._write_test_sidecar(operations) with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: read_foo1 = io.read() - # TODO make sure this checks dtype - np.testing.assert_array_equal(read_foo1.attr4, np.array([2], dtype=np.dtype('int8'))) + np.testing.assert_array_equal(read_foo1.attr5, np.array([2])) + assert read_foo1.attr5.dtype is np.dtype('int8') def test_replace_typed_group_dataset_attribute(self): """Test replacing the attribute of an untyped dataset in a typed group.""" operations = [ { "type": "replace", - "description": "change foo1/my_data/attr2 from [1] to [2] (int8)", + "description": "change foo1/my_data/attr3 from [1] to [2] (int8)", "object_id": self.foo1.object_id, - "relative_path": "my_data/attr2", + "relative_path": "my_data/attr3", "value": [2], "dtype": "int8" }, @@ -178,17 +146,17 @@ def test_replace_typed_group_dataset_attribute(self): self._write_test_sidecar(operations) with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: read_foo1 = io.read() - # TODO make sure this checks dtype - np.testing.assert_array_equal(read_foo1.attr2, np.array([2], dtype=np.dtype('int8'))) + np.testing.assert_array_equal(read_foo1.attr3, np.array([2])) + assert read_foo1.attr3.dtype is np.dtype('int8') def test_replace_subgroup_dataset_attribute(self): """Test replacing the attribute of an untyped dataset in an untyped group in a typed group.""" operations = [ { "type": "replace", - "description": "change foo1/foo_holder/my_sub_data/attr5 from [1] to [2] (int8)", + "description": "change foo1/foo_holder/my_sub_data/attr6 from [1] to [2] (int8)", "object_id": self.foo1.object_id, - "relative_path": "foo_holder/my_sub_data/attr5", + "relative_path": "foo_holder/my_sub_data/attr6", "value": [2], "dtype": "int8" }, @@ -196,8 +164,8 @@ def test_replace_subgroup_dataset_attribute(self): self._write_test_sidecar(operations) with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: read_foo1 = io.read() - # TODO make sure this checks dtype - np.testing.assert_array_equal(read_foo1.attr5, np.array([2], dtype=np.dtype('int8'))) + np.testing.assert_array_equal(read_foo1.attr6, np.array([2])) + assert read_foo1.attr6.dtype is np.dtype('int8') def test_replace_typed_dataset_attribute(self): """Test replacing the attribute of a typed dataset.""" @@ -216,48 +184,200 @@ def test_replace_typed_dataset_attribute(self): read_foo1 = io.read() assert read_foo1.my_foo_data.data_attr1 == 2 - def test_delete_typed_group_req_attribute(self): - pass + def test_delete_typed_group_required_dataset(self): + """Test deleting a required dataset in a typed group.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/my_data", + "object_id": self.foo1.object_id, + "relative_path": "my_data", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + msg = "root (root): Could not construct Foo object due to: Foo.__init__: missing argument 'my_data'" + with self.assertRaisesWith(ConstructError, msg): + io.read() - def test_delete_typed_group_opt_attribute(self): - pass + def test_delete_typed_group_optional_dataset(self): + """Test deleting an optional dataset in a typed group.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/opt_data", + "object_id": self.foo1.object_id, + "relative_path": "opt_data", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.optional_data is None - def test_delete_subgroup_req_attribute(self): - pass + def test_delete_typed_group_optional_typed_dataset(self): + """Test deleting an optional typed dataset in a typed group by providing the group object ID. - def test_delete_subgroup_opt_attribute(self): - pass + Note that my_foo_data has an object ID which can be used instead of this method. + """ + operations = [ + { + "type": "delete", + "description": "delete foo1/my_foo_data", + "object_id": self.foo1.object_id, + "relative_path": "my_foo_data", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.my_foo_data is None - def test_delete_typed_group_dataset_req_attribute(self): - pass + def test_delete_typed_dataset(self): + """Test deleting an optional typed dataset by providing the dataset object ID.""" + operations = [ + { + "type": "delete", + "description": "delete my_foo_data from its parent", + "object_id": self.foo_data.object_id, + "relative_path": "", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.my_foo_data is None - def test_delete_typed_group_dataset_opt_attribute(self): - pass + def test_delete_subgroup_required_dataset(self): + """Test deleting a required dataset in a subgroup of a typed group.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/foo_holder_foos/my_sub_data", + "object_id": self.foo1.object_id, + "relative_path": "foo_holder/my_sub_data" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + msg = "root (root): Could not construct Foo object due to: Foo.__init__: missing argument 'my_sub_data'" + with self.assertRaisesWith(ConstructError, msg): + io.read() - def test_delete_subgroup_dataset_req_attribute(self): - pass + def test_delete_typed_group_req_attribute(self): + """Test deleting a required attribute of a typed group.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/attr1", + "object_id": self.foo1.object_id, + "relative_path": "attr1", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + msg = "root (root): Could not construct Foo object due to: Foo.__init__: missing argument 'attr1'" + with self.assertRaisesWith(ConstructError, msg): + io.read() - def test_delete_subgroup_dataset_opt_attribute(self): - pass + def test_delete_typed_group_opt_attribute(self): + """Test deleting an optional attribute of a typed group.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/attr2", + "object_id": self.foo1.object_id, + "relative_path": "attr2", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.attr2 is None # the default value when attr2 is not provided + + def test_delete_subgroup_opt_attribute(self): + """Test deleting an optional attribute of a subgroup.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/foo_holder/attr5", + "object_id": self.foo1.object_id, + "relative_path": "foo_holder/attr5", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.attr5 is None - def test_delete_dataset_req_attribute(self): - pass + def test_delete_typed_group_dataset_opt_attribute(self): + """Test deleting an optional attribute of a dataset in a typed group.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/my_data/attr3", + "object_id": self.foo1.object_id, + "relative_path": "my_data/attr3", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.attr3 is None - def test_delete_dataset_opt_attribute(self): - pass + def test_delete_subgroup_dataset_opt_attribute(self): + """Test deleting an optional attribute of a dataset in a subgroup.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/foo_holder/my_sub_data/attr6", + "object_id": self.foo1.object_id, + "relative_path": "foo_holder/my_sub_data/attr6", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.attr6 is None - def test_cannot_delete_group(self): - pass + def test_delete_typed_dataset_req_attribute(self): + """Test deleting a required attribute or a typed dataset.""" + operations = [ + { + "type": "delete", + "description": "delete my_foo_data/data_attr1", + "object_id": self.foo_data.object_id, + "relative_path": "data_attr1", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + msg = ("my_foo_data (root/my_foo_data): Could not construct FooData object due to: FooData.__init__: " + "missing argument 'data_attr1'") + with self.assertRaisesWith(ConstructError, msg): + io.read() - def test_cannot_delete_dataset(self): - pass + def test_delete_typed_dataset_opt_attribute(self): + """Test deleting a required attribute or a typed dataset.""" + operations = [ + { + "type": "delete", + "description": "delete my_foo_data/data_attr2", + "object_id": self.foo_data.object_id, + "relative_path": "data_attr2", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.my_foo_data.data_attr2 is None class TestFailValidation(TestCase): def setUp(self): self.h5_path = "./tests/unit/io_tests/test_sidecar_fail.h5" - foo1 = Foo(name='foo1', my_data=[1, 2, 3], my_sub_data=[1, 2, 3], attr1='old', attr2=[17]) + foo1 = Foo(name='foo1', my_data=[1, 2, 3], my_sub_data=[1, 2, 3], attr1='old', attr3=[17]) with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io: io.write(foo1) @@ -287,19 +407,20 @@ class Foo(Container): {'name': 'my_sub_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D or 2-D integer dataset', 'shape': ((None, ), (None, None))}, {'name': 'attr1', 'type': str, 'doc': 'a string attribute'}, + {'name': 'attr2', 'type': str, 'doc': 'a string attribute', 'default': None}, {'name': 'optional_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer dataset', 'default': None}, - {'name': 'attr2', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'shape': (None, ), + {'name': 'attr3', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'shape': (None, ), 'default': None}, - {'name': 'attr3', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, {'name': 'attr4', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, {'name': 'attr5', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, + {'name': 'attr6', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, {'name': 'sub_foo', 'type': 'Foo', 'doc': 'a child Foo', 'default': None}, {'name': 'my_foo_data', 'type': 'FooData', 'doc': 'a child Foo', 'default': None}, {'name': 'foo_holder_foos', 'type': ('array_data', 'data'), 'doc': 'child Foos', 'default': None}, {'name': 'foo_holder_foo_data', 'type': ('array_data', 'data'), 'doc': 'child FooData', 'default': None}) def __init__(self, **kwargs): - name, my_data, my_sub_data, attr1, = getargs('name', 'my_data', 'my_sub_data', 'attr1', kwargs) - optional_data, attr2, attr3, attr4, attr5 = getargs('optional_data', 'attr2', 'attr3', 'attr4', 'attr5', kwargs) + name, my_data, my_sub_data, attr1, attr2 = getargs('name', 'my_data', 'my_sub_data', 'attr1', 'attr2', kwargs) + optional_data, attr3, attr4, attr5, attr6 = getargs('optional_data', 'attr3', 'attr4', 'attr5', 'attr6', kwargs) sub_foo, my_foo_data = getargs('sub_foo', 'my_foo_data', kwargs) foo_holder_foos, foo_holder_foo_data = getargs('foo_holder_foos', 'foo_holder_foo_data', kwargs) super().__init__(name=name) @@ -310,6 +431,7 @@ def __init__(self, **kwargs): self.attr3 = attr3 self.attr4 = attr4 self.attr5 = attr5 + self.attr6 = attr6 self.optional_data = optional_data self.sub_foo = sub_foo if sub_foo is not None: @@ -354,20 +476,21 @@ def _get_manager(): # - datasets: # - my_sub_data (int, 1-D or 2-D), required # - attributes: - # - attr5 (int, 1-D), optional, remapped to attr5 + # - attr6 (int, 1-D), optional, remapped to attr6 # - (FooData), 0 to many, remapped to foo_holder_foo_data # - attributes: - # - attr4 (int, 1-D), optional, remapped to attr4 + # - attr5 (int, 1-D), optional, remapped to attr5 # - datasets: # - my_data (int, 1-D or 2-D), required # - attributes: - # - attr2 (int, 1-D), optional, remapped to attr2 + # - attr3 (int, 1-D), optional, remapped to attr3 # - opt_data (int, 1-D or 2-D), 0 or 1, remapped to optional_data # - attributes: - # - attr3 (int, 1-D), optional, remapped to attr3 + # - attr4 (int, 1-D), optional, remapped to attr4 # - my_foo_data (FooData), optional # - attributes: # - attr1 (string, scalar), required + # - attr2 (string, scalar), optional # # FooData (dataset with data_type) has: # - int, 1D @@ -425,7 +548,7 @@ def _get_manager(): shape=[[None, ], [None, None]], attributes=[ AttributeSpec( - name='attr5', + name='attr6', doc='a 1-D integer attribute', dtype='int', shape=[None, ], @@ -441,7 +564,7 @@ def _get_manager(): ], attributes=[ AttributeSpec( - name='attr4', + name='attr5', doc='a 1-D integer attribute', dtype='int', shape=[None, ], @@ -458,7 +581,7 @@ def _get_manager(): shape=[[None, ], [None, None]], attributes=[ AttributeSpec( - name='attr2', + name='attr3', doc='a 1-D integer attribute', dtype='int', shape=[None, ], @@ -474,7 +597,7 @@ def _get_manager(): quantity='?', attributes=[ AttributeSpec( - name='attr3', + name='attr4', doc='a 1-D integer attribute', dtype='int', shape=[None, ], @@ -491,29 +614,30 @@ def _get_manager(): ], attributes=[ AttributeSpec(name='attr1', doc='a string attribute', dtype='text'), + AttributeSpec(name='attr2', doc='a string attribute', dtype='text', required=False), ] ) class FooMapper(ObjectMapper): """Remap spec fields to Container attributes. - - 'attr2' attribute on Foo container to 'my_data' dataset spec > 'attr2' attribute spec - - 'attr3' attribute on Foo container to 'opt_data' dataset spec > 'attr3' attribute spec. + - 'attr3' attribute on Foo container to 'my_data' dataset spec > 'attr3' attribute spec + - 'attr4' attribute on Foo container to 'opt_data' dataset spec > 'attr4' attribute spec. - TODO fill me in """ def __init__(self, spec): super().__init__(spec) my_data_spec = spec.get_dataset('my_data') - self.map_spec('attr2', my_data_spec.get_attribute('attr2')) + self.map_spec('attr3', my_data_spec.get_attribute('attr3')) opt_data_spec = spec.get_dataset('opt_data') - self.map_spec('attr3', opt_data_spec.get_attribute('attr3')) + self.map_spec('attr4', opt_data_spec.get_attribute('attr4')) self.map_spec('optional_data', opt_data_spec) foo_holder_spec = spec.get_group('foo_holder') - self.map_spec('attr4', foo_holder_spec.get_attribute('attr4')) + self.map_spec('attr5', foo_holder_spec.get_attribute('attr5')) self.map_spec('foo_holder_foos', foo_holder_spec.get_data_type('Foo')) self.map_spec('foo_holder_foo_data', foo_holder_spec.get_data_type('FooData')) my_sub_data_spec = foo_holder_spec.get_dataset('my_sub_data') self.map_spec('my_sub_data', my_sub_data_spec) - self.map_spec('attr5', my_sub_data_spec.get_attribute('attr5')) + self.map_spec('attr6', my_sub_data_spec.get_attribute('attr6')) spec_catalog = SpecCatalog() spec_catalog.register_spec(foo_spec, 'test.yaml') From ecd244d1b319e1e102d8cb73da295418deee600e Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 12 Apr 2022 01:19:28 -0700 Subject: [PATCH 13/19] Update --- docs/source/index.rst | 1 + docs/source/sidecar.rst | 26 +++++++++++++------------- src/hdmf/backends/io.py | 11 +++++++++-- tests/unit/io_tests/test_sidecar.py | 17 +++++++++++++++++ 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index b07ece740..3c6556574 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -45,6 +45,7 @@ If you use HDMF in your research, please use the following citation: building_api export validation + sidecar .. toctree:: :hidden: diff --git a/docs/source/sidecar.rst b/docs/source/sidecar.rst index 523596328..4d5ea8346 100644 --- a/docs/source/sidecar.rst +++ b/docs/source/sidecar.rst @@ -1,6 +1,6 @@ .. _modifying_with_sidecar: -Modifying an HDMF file with a sidecar JSON file +Modifying an HDMF File with a Sidecar JSON File =============================================== Users may want to update part of an HDMF file without rewriting the entire file. @@ -13,7 +13,7 @@ automatically read and the modifications that it specifies are automatically app .. note:: This default behavior can be changed such that the corresponding sidecar JSON file is ignored when the HDMF file - is read by passing ``load_sidecar=False`` to the instance of `HDMFIO` used to read the HDMF file. + is read by passing ``load_sidecar=False`` to ``HDMFIO.read()`` on the ``HDMFIO`` object used to read the HDMF file. Allowed modifications --------------------- @@ -37,23 +37,23 @@ located at the root of the HDMF repository. The sidecar JSON file must contain the following top-level keys: -- "description": A free-form string describing the modifications specified in this file. -- "author": A list of free-form strings containing the names of the people who created this file. -- "contact": A list of email addresses for the people who created this file. Each author listed in the "author" key +- ``"description"``: A free-form string describing the modifications specified in this file. +- ``"author"``: A list of free-form strings containing the names of the people who created this file. +- ``"contact"``: A list of email addresses for the people who created this file. Each author listed in the "author" key *should* have a corresponding email address. -- "operations": A list of operations to perform on the data in the file, as specified below. -- "schema_version": The version of the sidecar JSON schema that the file conforms to. +- ``"operations"``: A list of operations to perform on the data in the file, as specified below. +- ``"schema_version"``: The version of the sidecar JSON schema that the file conforms to. Specification for operations ---------------------------- All operations are required to have the following keys: -- "type": The type of modification to perform. Only "replace" and "delete" are supported currently. -- "description": A description of the specified modification. -- "object_id": The object ID (UUID) of the data type that is closest in the file hierarchy to the +- ``"type"``: The type of modification to perform. Only "replace" and "delete" are supported currently. +- ``"description"``: A description of the specified modification. +- ``"object_id"``: The object ID (UUID) of the data type that is closest in the file hierarchy to the field being modified. -- "relative_path": The relative path from the data type with the given object ID to the field being modified. +- ``"relative_path"``: The relative path from the data type with the given object ID to the field being modified. Operations can result in invalid files, i.e., files that do not conform to the specification. It is strongly recommended that the file is validated against the schema after loading the sidecar JSON. In some cases, the @@ -67,12 +67,12 @@ as specified by the ``object_id`` and ``relative_path``. The operation specification must have the following keys: -- "value": The new value for the dataset/attribute. Only scalar and 1-dimensional arrays can be +- ``"value"``: The new value for the dataset/attribute. Only scalar and 1-dimensional arrays can be specified as a replacement value. The operation specification may also have the following keys: -- "dtype": String representing the dtype of the new value. If this key is not present, then the dtype of the +- ``"dtype"``: String representing the dtype of the new value. If this key is not present, then the dtype of the existing value for the dataset/attribute is used. Allowed dtypes are listed in the `HDMF schema language docs for dtype `_. diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index b7965a62b..f6a3174d6 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -33,14 +33,21 @@ def source(self): '''The source of the container being read/written i.e. file path''' return self.__source - @docval(returns='the Container object that was read in', rtype=Container) + @docval({'name': 'load_sidecar', 'type': 'bool', + 'doc': ('Whether to load the sidecar JSON file and use the modifications specified in that file, if ' + 'the file exists. The file must have the same base name as the original source file but have ' + 'the suffix .json.'), + 'default': True}, + returns='the Container object that was read in', rtype=Container) def read(self, **kwargs): """Read a container from the IO source.""" + load_sidecar = popargs('load_sidecar', kwargs) f_builder = self.read_builder() if all(len(v) == 0 for v in f_builder.values()): # TODO also check that the keys are appropriate. print a better error message raise UnsupportedOperation('Cannot build data. There are no values.') - BuilderUpdater.update_from_sidecar_json(f_builder, self.__source) + if load_sidecar: + BuilderUpdater.update_from_sidecar_json(f_builder, self.__source) container = self.__manager.construct(f_builder) return container diff --git a/tests/unit/io_tests/test_sidecar.py b/tests/unit/io_tests/test_sidecar.py index 177d670e2..257a0aead 100644 --- a/tests/unit/io_tests/test_sidecar.py +++ b/tests/unit/io_tests/test_sidecar.py @@ -43,6 +43,23 @@ def _write_test_sidecar(self, operations): with open(self.json_path, 'w') as outfile: json.dump(sidecar, outfile, indent=4) + def test_load_sidecar_false(self): + """Test replacing the data of a dataset in a typed group.""" + operations = [ + { + "type": "replace", + "description": "change foo1/my_data data from [1, 2, 3] to [4, 5] (int8)", + "object_id": self.foo1.object_id, + "relative_path": "my_data", + "value": [4, 5], + "dtype": "int8" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read(load_sidecar=False) + np.testing.assert_array_equal(read_foo1.my_data, np.array([1, 2, 3])) + def test_replace_typed_group_dataset_data(self): """Test replacing the data of a dataset in a typed group.""" operations = [ From 168f4a9954edc7851f86e20b8e4cf68128655661 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 12 Apr 2022 01:25:24 -0700 Subject: [PATCH 14/19] Add link to sidecar json schema --- docs/source/sidecar.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/sidecar.rst b/docs/source/sidecar.rst index 4d5ea8346..ed1efe669 100644 --- a/docs/source/sidecar.rst +++ b/docs/source/sidecar.rst @@ -42,7 +42,9 @@ The sidecar JSON file must contain the following top-level keys: - ``"contact"``: A list of email addresses for the people who created this file. Each author listed in the "author" key *should* have a corresponding email address. - ``"operations"``: A list of operations to perform on the data in the file, as specified below. -- ``"schema_version"``: The version of the sidecar JSON schema that the file conforms to. +- ``"schema_version"``: The version of the sidecar JSON schema that the file conforms to, e.g., "0.1.0". + View the current version of this file here: + `sidecar.schema.json `_ Specification for operations ---------------------------- From 1c57573bfe3c16520c039cb31cda5c4430c68e83 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 12 Apr 2022 01:35:47 -0700 Subject: [PATCH 15/19] Add examples to doc --- docs/source/sidecar.rst | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/docs/source/sidecar.rst b/docs/source/sidecar.rst index ed1efe669..a0543b64c 100644 --- a/docs/source/sidecar.rst +++ b/docs/source/sidecar.rst @@ -46,6 +46,40 @@ The sidecar JSON file must contain the following top-level keys: View the current version of this file here: `sidecar.schema.json `_ +Here is an example sidecar JSON file: + +.. code:: javascript + + { + "description": "Summary of changes", + "author": [ + "The NWB Team" + ], + "contact": [ + "contact@nwb.org" + ], + "operations": [ + { + "type": "replace", + "description": "change foo1/my_data data from [1, 2, 3] to [4, 5] (int8)", + "object_id": "e0449bb5-2b53-48c1-b04e-85a9a4631655", + "relative_path": "my_data", + "value": [ + 4, + 5 + ], + "dtype": "int8" + }, + { + "type": "delete", + "description": "delete foo1/foo_holder/my_sub_data/attr6", + "object_id": "993fef27-680c-457a-af4d-b1d2725fcca9", + "relative_path": "foo_holder/my_sub_data/attr6" + } + ], + "schema_version": "0.1.0" + } + Specification for operations ---------------------------- @@ -78,6 +112,9 @@ The operation specification may also have the following keys: existing value for the dataset/attribute is used. Allowed dtypes are listed in the `HDMF schema language docs for dtype `_. +In the example sidecar JSON file above, the first operation specifies that the value of dataset "my_data" under +group "foo1", which has the specified object ID, should be replaced with the 1-D array [4, 5] (dtype: int8). + .. note:: Replacing the values of datasets or attributes with object references or a compound data type is not yet supported. @@ -90,6 +127,10 @@ as specified by the ``object_id`` and ``relative_path``. The operation specification does not use any additional keys. +In the example sidecar JSON file above, the second operation specifies that attribute "attr6" under +group "foo1", which has the specified object ID, at relative path "foo_holder/my_sub_data/attr6" should be deleted. +If "attr6" is a required attribute, this is likely to result in an invalid file that cannot be read by HDMF. + Future changes -------------- From 62ed248e870640fe8504549ac532c90123d39b4b Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 12 Apr 2022 01:40:01 -0700 Subject: [PATCH 16/19] Update sidecar.rst --- docs/source/sidecar.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/source/sidecar.rst b/docs/source/sidecar.rst index a0543b64c..ea4ffe220 100644 --- a/docs/source/sidecar.rst +++ b/docs/source/sidecar.rst @@ -112,7 +112,7 @@ The operation specification may also have the following keys: existing value for the dataset/attribute is used. Allowed dtypes are listed in the `HDMF schema language docs for dtype `_. -In the example sidecar JSON file above, the first operation specifies that the value of dataset "my_data" under +In the example sidecar JSON file above, the first operation specifies that the value of dataset "my_data" in group "foo1", which has the specified object ID, should be replaced with the 1-D array [4, 5] (dtype: int8). .. note:: @@ -127,8 +127,9 @@ as specified by the ``object_id`` and ``relative_path``. The operation specification does not use any additional keys. -In the example sidecar JSON file above, the second operation specifies that attribute "attr6" under -group "foo1", which has the specified object ID, at relative path "foo_holder/my_sub_data/attr6" should be deleted. +In the example sidecar JSON file above, the second operation specifies that attribute "attr6" +at relative path "foo_holder/my_sub_data/attr6" from group "foo1", which has the specified object ID, +should be deleted. If "attr6" is a required attribute, this is likely to result in an invalid file that cannot be read by HDMF. Future changes From 9faf7a22f167c79f8613c64d092de7287989de0f Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 21 Apr 2022 00:58:13 -0700 Subject: [PATCH 17/19] Update sidecar.rst --- docs/source/sidecar.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/sidecar.rst b/docs/source/sidecar.rst index ea4ffe220..40f8a08a2 100644 --- a/docs/source/sidecar.rst +++ b/docs/source/sidecar.rst @@ -3,10 +3,11 @@ Modifying an HDMF File with a Sidecar JSON File =============================================== -Users may want to update part of an HDMF file without rewriting the entire file. +Users may want to make small updates or corrections to an HDMF file without rewriting the entire file. To do so, HDMF supports the use of a "sidecar" JSON file that lives adjacent to the HDMF file on disk and specifies modifications to the HDMF file. Only a limited set of modifications are supported; for example, users can -delete a dataset or attribute but cannot create a new dataset or attribute. +hide a dataset or attribute so that it will not be read by HDMF, but cannot create a new dataset, attribute, group, +or link. When HDMF reads an HDMF file, if the corresponding sidecar JSON file exists, it is automatically read and the modifications that it specifies are automatically applied. @@ -33,7 +34,8 @@ Specification for the sidecar JSON file --------------------------------------- The sidecar JSON file can be validated using the ``sidecar.schema.json`` JSON schema file -located at the root of the HDMF repository. +located at the root of the HDMF repository. When an HDMF file is read, if a sidecar JSON file +is present, it is automatically read and validated against this JSON schema file. The sidecar JSON file must contain the following top-level keys: From 827d61d017091c75d5ca900a7cbfa027a6840322 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 21 Apr 2022 00:58:53 -0700 Subject: [PATCH 18/19] Update docs/source/sidecar.rst Co-authored-by: Oliver Ruebel --- docs/source/sidecar.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/source/sidecar.rst b/docs/source/sidecar.rst index 40f8a08a2..ed5c25352 100644 --- a/docs/source/sidecar.rst +++ b/docs/source/sidecar.rst @@ -93,9 +93,13 @@ All operations are required to have the following keys: field being modified. - ``"relative_path"``: The relative path from the data type with the given object ID to the field being modified. -Operations can result in invalid files, i.e., files that do not conform to the specification. It is strongly -recommended that the file is validated against the schema after loading the sidecar JSON. In some cases, the -file cannot be read because the file is invalid. +.. warning: + + Modifying a file via a sidecar file can result in files that are no longer compliant with the format + specification of the file. E.g., we may ``delete`` a required dataset via a sidecar operation, resulting + in an invalid file that in the worst case, may longer be readable because required arguments are missing. + It is strongly recommended that the file is validated against the schema after loading the sidecar JSON. + Replacing values of a dataset/attribute with a scalar or 1-D array ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From ef22dc56db3b878b24a09b3855c33c1a4abfa26e Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 21 Apr 2022 01:06:10 -0700 Subject: [PATCH 19/19] Update sidecar.rst --- docs/source/sidecar.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/source/sidecar.rst b/docs/source/sidecar.rst index ed5c25352..9c02f7adc 100644 --- a/docs/source/sidecar.rst +++ b/docs/source/sidecar.rst @@ -22,7 +22,7 @@ Allowed modifications Only the following modifications to an HDMF file are supported in the sidecar JSON file: - Replace the values of a dataset or attribute with a scalar or 1-D array -- Delete a dataset or attribute +- Hide a dataset or attribute .. note:: @@ -95,10 +95,11 @@ All operations are required to have the following keys: .. warning: - Modifying a file via a sidecar file can result in files that are no longer compliant with the format - specification of the file. E.g., we may ``delete`` a required dataset via a sidecar operation, resulting - in an invalid file that in the worst case, may longer be readable because required arguments are missing. - It is strongly recommended that the file is validated against the schema after loading the sidecar JSON. + Modifying a file via a sidecar file can result in a file that is no longer compliant with the format + specification of the file. For example, we may hide a required dataset via a sidecar operation, resulting + in an invalid file that, in the worst case, may longer be readable because required arguments are missing. + It is strongly recommended that the file is validated against the schema after creating, modifying, + and loading the sidecar JSON file. Replacing values of a dataset/attribute with a scalar or 1-D array