diff --git a/docs/source/index.rst b/docs/source/index.rst index b07ece740..3c6556574 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -45,6 +45,7 @@ If you use HDMF in your research, please use the following citation: building_api export validation + sidecar .. toctree:: :hidden: diff --git a/docs/source/sidecar.rst b/docs/source/sidecar.rst new file mode 100644 index 000000000..9c02f7adc --- /dev/null +++ b/docs/source/sidecar.rst @@ -0,0 +1,161 @@ +.. _modifying_with_sidecar: + +Modifying an HDMF File with a Sidecar JSON File +=============================================== + +Users may want to make small updates or corrections to an HDMF file without rewriting the entire file. +To do so, HDMF supports the use of a "sidecar" JSON file that lives adjacent to the HDMF file on disk and +specifies modifications to the HDMF file. Only a limited set of modifications are supported; for example, users can +hide a dataset or attribute so that it will not be read by HDMF, but cannot create a new dataset, attribute, group, +or link. +When HDMF reads an HDMF file, if the corresponding sidecar JSON file exists, it is +automatically read and the modifications that it specifies are automatically applied. + +.. note:: + + This default behavior can be changed such that the corresponding sidecar JSON file is ignored when the HDMF file + is read by passing ``load_sidecar=False`` to ``HDMFIO.read()`` on the ``HDMFIO`` object used to read the HDMF file. + +Allowed modifications +--------------------- + +Only the following modifications to an HDMF file are supported in the sidecar JSON file: + +- Replace the values of a dataset or attribute with a scalar or 1-D array +- Hide a dataset or attribute + +.. note:: + + Replacing the values of a dataset or attribute with a very large 1-D array using the sidecar JSON file may not + be efficient and is discouraged. Users should instead consider rewriting the HDMF file with the + updated values. + +Specification for the sidecar JSON file +--------------------------------------- + +The sidecar JSON file can be validated using the ``sidecar.schema.json`` JSON schema file +located at the root of the HDMF repository. When an HDMF file is read, if a sidecar JSON file +is present, it is automatically read and validated against this JSON schema file. + +The sidecar JSON file must contain the following top-level keys: + +- ``"description"``: A free-form string describing the modifications specified in this file. +- ``"author"``: A list of free-form strings containing the names of the people who created this file. +- ``"contact"``: A list of email addresses for the people who created this file. Each author listed in the "author" key + *should* have a corresponding email address. +- ``"operations"``: A list of operations to perform on the data in the file, as specified below. +- ``"schema_version"``: The version of the sidecar JSON schema that the file conforms to, e.g., "0.1.0". + View the current version of this file here: + `sidecar.schema.json `_ + +Here is an example sidecar JSON file: + +.. code:: javascript + + { + "description": "Summary of changes", + "author": [ + "The NWB Team" + ], + "contact": [ + "contact@nwb.org" + ], + "operations": [ + { + "type": "replace", + "description": "change foo1/my_data data from [1, 2, 3] to [4, 5] (int8)", + "object_id": "e0449bb5-2b53-48c1-b04e-85a9a4631655", + "relative_path": "my_data", + "value": [ + 4, + 5 + ], + "dtype": "int8" + }, + { + "type": "delete", + "description": "delete foo1/foo_holder/my_sub_data/attr6", + "object_id": "993fef27-680c-457a-af4d-b1d2725fcca9", + "relative_path": "foo_holder/my_sub_data/attr6" + } + ], + "schema_version": "0.1.0" + } + +Specification for operations +---------------------------- + +All operations are required to have the following keys: + +- ``"type"``: The type of modification to perform. Only "replace" and "delete" are supported currently. +- ``"description"``: A description of the specified modification. +- ``"object_id"``: The object ID (UUID) of the data type that is closest in the file hierarchy to the + field being modified. +- ``"relative_path"``: The relative path from the data type with the given object ID to the field being modified. + +.. warning: + + Modifying a file via a sidecar file can result in a file that is no longer compliant with the format + specification of the file. For example, we may hide a required dataset via a sidecar operation, resulting + in an invalid file that, in the worst case, may longer be readable because required arguments are missing. + It is strongly recommended that the file is validated against the schema after creating, modifying, + and loading the sidecar JSON file. + + +Replacing values of a dataset/attribute with a scalar or 1-D array +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Specify ``"type": "replace"`` to replace the values of a dataset/attribute from the associated HDMF file +as specified by the ``object_id`` and ``relative_path``. + +The operation specification must have the following keys: + +- ``"value"``: The new value for the dataset/attribute. Only scalar and 1-dimensional arrays can be + specified as a replacement value. + +The operation specification may also have the following keys: + +- ``"dtype"``: String representing the dtype of the new value. If this key is not present, then the dtype of the + existing value for the dataset/attribute is used. Allowed dtypes are listed in the + `HDMF schema language docs for dtype `_. + +In the example sidecar JSON file above, the first operation specifies that the value of dataset "my_data" in +group "foo1", which has the specified object ID, should be replaced with the 1-D array [4, 5] (dtype: int8). + +.. note:: + + Replacing the values of datasets or attributes with object references or a compound data type is not yet supported. + +Deleting a dataset/attribute +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Specify ``"type": "delete"`` to delete (ignore) a dataset/attribute from the associated HDMF file +as specified by the ``object_id`` and ``relative_path``. + +The operation specification does not use any additional keys. + +In the example sidecar JSON file above, the second operation specifies that attribute "attr6" +at relative path "foo_holder/my_sub_data/attr6" from group "foo1", which has the specified object ID, +should be deleted. +If "attr6" is a required attribute, this is likely to result in an invalid file that cannot be read by HDMF. + +Future changes +-------------- + +The HDMF team is considering supporting additional operations and expanding support for current operations +specified in the sidecar JSON file, such as: + +- Add rows to a ``DynamicTable`` (column-based) +- Add rows to a ``Table`` (row-based) +- Add a new group +- Add a new dataset +- Add a new attribute +- Add a new link +- Replace a dataset or attribute with object references +- Replace a dataset or attribute with a compound data type +- Replace selected slices of a dataset or attribute +- Delete a group +- Delete a link + +Please provide feedback on which operations are useful to you for HDMF to support in this +`issue ticket `_. diff --git a/sidecar.schema.json b/sidecar.schema.json new file mode 100644 index 000000000..fcb2e254d --- /dev/null +++ b/sidecar.schema.json @@ -0,0 +1,194 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "sidecar.schema.json", + "title": "Schema for the sidecar JSON file", + "description": "A schema for validating HDMF sidecar JSON files", + "version": "0.1.0", + "type": "object", + "additionalProperties": false, + "required": [ + "description", + "author", + "contact", + "operations", + "schema_version" + ], + "properties": { + "description": { + "description": "A free-form string describing the modifications specified in this file.", + "type": "string" + }, + "author": { + "description": "A list of free-form strings containing the names of the people who created this file.", + "type": "array", + "items": {"type": "string"} + }, + "contact": { + "description": "A list of email addresses for the people who created this file. Each author listed in the 'author' key *should* have a corresponding email address.", + "type": "array", + "items": { + "type": "string", + "pattern": "^.*@.*$" + } + }, + "operations": { + "description": "A list of operations to perform on the data in the file.", + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "required": [ + "type", + "description", + "object_id", + "relative_path" + ], + "properties": { + "type": { + "description": "The type of modification to perform.", + "member_region": { + "type": ["replace", "delete"] + } + }, + "description": { + "description": "A description of the specified modification.", + "type": "string" + }, + "object_id": { + "description": "The object ID (UUID) of the data type that is closest in the file hierarchy to the field being modified. Must be in the UUID-4 format with hyphens.", + "type": "string", + "pattern": "^[0-9a-f]{8}\\-[0-9a-f]{4}\\-4[0-9a-f]{3}\\-[89ab][0-9a-f]{3}\\-[0-9a-f]{12}$" + }, + "relative_path": { + "description": " The relative path from the data type with the given object ID to the field being modified.", + "type": "string" + }, + "element_type": { + "anyOf": [ + { + "type": "string", + "enum": [ + "group", + "dataset", + "attribute" + ] + } + ] + }, + "value": { + "description": "The new value for the dataset/attribute.", + "member_region": { + "type": ["array", "string", "number", "boolean", "null"] + } + }, + "dtype": {"$ref": "#/definitions/dtype"} + }, + "allOf": [ + { + "description": "if type==replace, then value is required.", + "if": { + "properties": { "type": { "const": "replace" } } + }, + "then": { + "required": [ "value" ] + } + }, + { + "description": "if type==delete, then value and dtype are not allowed.", + "if": { + "properties": { "type": { "const": "delete" } } + }, + "then": { + "properties": { + "value": false, + "dtype": false + } + } + }, + { + "description": "if type==create, then element_type is required.", + "if": { + "properties": { "type": { "const": "create" } } + }, + "then": { + "required": [ "element_type" ] + } + } + ] + } + }, + "schema_version": { + "description": "The version of the sidecar JSON schema that the file conforms to. Must confirm to Semantic Versioning v2.0.", + "type": "string", + "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" + } + }, + "definitions": { + "dtype": { + "anyOf": [ + {"$ref": "#/definitions/flat_dtype"}, + {"$ref": "#/definitions/compound_dtype"} + ] + }, + "flat_dtype": { + "description": "String describing the data type of the dataset or attribute.", + "anyOf": [ + { + "type": "string", + "enum": [ + "float", + "float32", + "double", + "float64", + "long", + "int64", + "int", + "int32", + "int16", + "int8", + "uint", + "uint32", + "uint16", + "uint8", + "uint64", + "text", + "utf", + "utf8", + "utf-8", + "ascii", + "bool", + "isodatetime" + ] + }, + {"$ref": "#/definitions/ref_dtype"} + ] + }, + "ref_dtype": { + "type": "object", + "required": ["target_type", "reftype"], + "properties": { + "target_type": { + "description": "Describes the data_type of the target that the reference points to", + "type": "string" + }, + "reftype": { + "description": "Describes the kind of reference", + "type": "string", + "enum": ["ref", "reference", "object", "region"] + } + } + }, + "compound_dtype": { + "type": "array", + "items": { + "type": "object", + "required": ["name", "doc", "dtype"], + "properties": { + "name": {"$ref": "#/definitions/protectedString"}, + "doc": {"type": "string"}, + "dtype": {"$ref": "#/definitions/flat_dtype"} + } + } + } + } +} diff --git a/src/hdmf/backends/__init__.py b/src/hdmf/backends/__init__.py index 030a2daee..1750af2b1 100644 --- a/src/hdmf/backends/__init__.py +++ b/src/hdmf/backends/__init__.py @@ -1 +1,2 @@ from . import hdf5 +from .builderupdater import SidecarValidationError diff --git a/src/hdmf/backends/builderupdater.py b/src/hdmf/backends/builderupdater.py new file mode 100644 index 000000000..03dd20aeb --- /dev/null +++ b/src/hdmf/backends/builderupdater.py @@ -0,0 +1,258 @@ +import json +import jsonschema +import numpy as np +from pathlib import Path + +from ..build import GroupBuilder, ObjectMapper, unicode, ascii +from ..utils import docval, getargs + + +class BuilderUpdater: + + json_schema_file = 'sidecar.schema.json' + + @classmethod + def get_json_schema(cls): + """Load the sidecar JSON schema.""" + with open(cls.json_schema_file, 'r') as file: + schema = json.load(file) + return schema + + @classmethod + def validate_sidecar(cls, sidecar_dict): + """Validate the sidecar JSON dict with the sidecar JSON schema.""" + try: + jsonschema.validate(instance=sidecar_dict, schema=cls.get_json_schema()) + except jsonschema.exceptions.ValidationError as e: + raise SidecarValidationError() from e + + @classmethod + def convert_dtype_using_map(cls, value, dtype_str): + dtype = ObjectMapper.get_dtype_mapping(dtype_str) + if isinstance(value, (list, np.ndarray)): # array + if dtype is unicode: + dtype = 'U' + elif dtype is ascii: + dtype = 'S' + if isinstance(value, list): + ret = np.array(value, dtype=dtype) + else: + ret = value.astype(dtype) + else: # scalar + ret = dtype(value) + return ret + + @classmethod + def convert_dtype_helper(cls, value, dtype): + # dtype comes from datasetbuilder or attribute + if isinstance(value, (list, np.ndarray)): # array + if dtype is str: + dtype = 'U' + elif dtype is bytes: + dtype = 'S' + else: + dtype = np.dtype(dtype) + if isinstance(value, list): + ret = np.array(value, dtype=dtype) + else: + ret = value.astype(dtype) + else: # scalar + if dtype in (str, bytes): + ret = dtype(value) + else: + ret = dtype.type(value) + return ret + + @classmethod + def convert_dtype(cls, value, dtype, old_value): + if value is None: + return None + # TODO handle compound dtypes + if dtype is not None: + new_value = cls.convert_dtype_using_map(value, dtype) + else: # keep the same dtype + if isinstance(old_value, np.ndarray): + new_value = cls.convert_dtype_helper(value, old_value.dtype) + else: + assert old_value is not None, \ + "Cannot convert new value to dtype without specifying new dtype or old value." + new_value = cls.convert_dtype_helper(value, type(old_value)) + return new_value + + @classmethod + @docval( + {'name': 'file_builder', 'type': GroupBuilder, 'doc': 'A GroupBuilder representing the main file object.'}, + {'name': 'file_path', 'type': str, + 'doc': 'Path to the data file. The sidecar file is assumed to have the same base name but with suffix .json.'}, + returns='The same input GroupBuilder, now modified.', + rtype='GroupBuilder' + ) + def update_from_sidecar_json(cls, **kwargs): + """Update the file builder in-place with the values specified in the sidecar JSON.""" + # the sidecar json must have the same name as the file but with suffix .json + f_builder, path = getargs('file_builder', 'file_path', kwargs) + sidecar_path = Path(path).with_suffix('.json') + if not sidecar_path.is_file(): + return + + with open(sidecar_path, 'r') as f: + sidecar_dict = json.load(f) + cls.validate_sidecar(sidecar_dict) + + operations = sidecar_dict['operations'] + for operation in operations: + object_id = operation['object_id'] + relative_path = operation['relative_path'] + operation_type = operation['type'] + new_value = operation.get('value') + new_dtype = operation.get('dtype') + + builder_map = cls.__get_object_id_map(f_builder) + builder = builder_map[object_id] + # TODO handle paths to links + # TODO handle object references + if operation_type == 'replace': + cls.__replace(builder, relative_path, new_value, new_dtype) + elif operation_type == 'delete': + cls.__delete(builder, relative_path) + # elif operation_type == 'change_dtype': + # assert new_value is None + # cls.__change_dtype(builder, relative_path, new_dtype) + # elif operation_type == 'create_attribute': + # cls.__create_attribute(builder, relative_path, new_value, new_dtype) + else: + raise ValueError("Operation type: '%s' not supported." % operation_type) + + return f_builder + + @classmethod + def __replace(cls, builder, relative_path, new_value, new_dtype): + if relative_path in builder.attributes: + cls.__replace_attribute(builder, relative_path, new_value, new_dtype) + elif isinstance(builder, GroupBuilder): # object_id points to GroupBuilder + sub_builder, attr_name = builder.get_subbuilder(relative_path) + if sub_builder is None: + raise ValueError("Relative path '%s' not recognized as a group, dataset, or attribute." + % relative_path) + if attr_name is None: + cls.__replace_dataset_data(sub_builder, new_value, new_dtype) + else: + cls.__replace_attribute(sub_builder, attr_name, new_value, new_dtype) + else: # object_id points to DatasetBuilder + if not relative_path: + cls.__replace_dataset_data(builder, new_value, new_dtype) + else: + raise ValueError("Relative path '%s' not recognized as None or attribute." % relative_path) + + @classmethod + def __delete(cls, builder, relative_path): + if relative_path in builder.attributes: # relative_path is name of attribute in GroupBuilder/DatasetBuilder + cls.__delete_attribute(builder, relative_path) + elif isinstance(builder, GroupBuilder): # object_id points to GroupBuilder + sub_builder, attr_name = builder.get_subbuilder(relative_path) + if sub_builder is None: + raise ValueError("Relative path '%s' not recognized as a group, dataset, or attribute." + % relative_path) + if attr_name is None: + # delete the DatasetBuilder from its parent + cls.__delete_builder(sub_builder.parent, sub_builder) + else: + cls.__delete_attribute(sub_builder, attr_name) # delete the attribute from the sub-Builder + else: # object_id points to DatasetBuilder + if not relative_path: + cls.__delete_builder(builder.parent, builder) # delete the DatasetBuilder from its parent + else: + raise ValueError("Relative path '%s' not recognized as None or attribute." % relative_path) + + @classmethod + # NOTE this function is currently unused + def __change_dtype(cls, builder, relative_path, new_dtype): + if relative_path in builder.attributes: + cls.__change_dtype_attribute(builder, relative_path, new_dtype) + elif isinstance(builder, GroupBuilder): # GroupBuilder has object_id + sub_dset_builder, attr_name = builder.get_subbuilder(relative_path) + if sub_dset_builder is None: + raise ValueError("Relative path '%s' not recognized as a group, dataset, or attribute." + % relative_path) + if attr_name is None: # update data in sub-DatasetBuilder + cls.__change_dtype_dataset_data(sub_dset_builder, new_dtype) + else: + cls.__change_dtype_attribute(sub_dset_builder, attr_name, new_dtype) + else: # DatasetBuilder has object_id + if not relative_path: + cls.__change_dtype_dataset_data(builder, new_dtype) + else: + raise ValueError("Relative path '%s' not recognized as None or attribute." % relative_path) + + @classmethod + def __create_attribute(cls, builder, relative_path, new_value, new_dtype): + # TODO validate in jsonschema that the relative path cannot start or end with '/' + if '/' in relative_path: # GroupBuilder has object_id + assert isinstance(builder, GroupBuilder), \ + "Relative path '%s' can include '/' only if the object is a group." % relative_path + sub_dset_builder, attr_name = builder.get_subbuilder(relative_path) + if sub_dset_builder is None: + raise ValueError("Relative path '%s' not recognized as a sub-group or sub-dataset." + % relative_path) + if attr_name in sub_dset_builder.attributes: + raise ValueError("Attribute '%s' already exists. Cannot create attribute." + % relative_path) + cls.__create_builder_attribute(sub_dset_builder, attr_name, new_value, new_dtype) + elif relative_path in builder.attributes: + raise ValueError("Attribute '%s' already exists. Cannot create attribute." % relative_path) + else: + cls.__create_builder_attribute(builder, attr_name, new_value, new_dtype) + + @classmethod + def __replace_dataset_data(cls, dset_builder, value, dtype): + # TODO consider replacing slices of a dataset or attribute + new_value = cls.convert_dtype(value, dtype, dset_builder['data']) + dset_builder['data'] = new_value + + @classmethod + def __replace_attribute(cls, builder, attr_name, value, dtype): + new_value = cls.convert_dtype(value, dtype, builder.attributes[attr_name]) + builder.attributes[attr_name] = new_value + + @classmethod + def __delete_attribute(cls, builder, attr_name): + builder.remove_attribute(attr_name) + + @classmethod + def __delete_builder(cls, parent_builder, child_builder): + parent_builder.remove_child(child_builder) + + @classmethod + def __change_dtype_dataset_data(cls, dset_builder, dtype): + new_value = cls.convert_dtype(dset_builder['data'], dtype, dset_builder['data']) + dset_builder['data'] = new_value + + @classmethod + def __change_dtype_attribute(cls, builder, attr_name, dtype): + new_value = cls.convert_dtype(builder.attributes[attr_name], dtype, builder.attributes[attr_name]) + builder.attributes[attr_name] = new_value + + @classmethod + def __create_builder_attribute(cls, builder, attr_name, value, dtype): + new_value = cls.convert_dtype(value, dtype, None) + builder.attributes[attr_name] = new_value + + @classmethod + def __get_object_id_map(cls, builder): + stack = [builder] + ret = dict() + while len(stack): + b = stack.pop() + if 'object_id' in b.attributes: + ret[b.attributes['object_id']] = b + if isinstance(b, GroupBuilder): + for g in b.groups.values(): + stack.append(g) + for d in b.datasets.values(): + stack.append(d) + return ret + + +class SidecarValidationError(Exception): + """Error raised when a sidecar file fails validation with the JSON schema.""" + pass diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 5908d9051..f6a3174d6 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -5,6 +5,7 @@ from ..container import Container from .errors import UnsupportedOperation from ..utils import docval, getargs, popargs +from .builderupdater import BuilderUpdater class HDMFIO(metaclass=ABCMeta): @@ -32,13 +33,21 @@ def source(self): '''The source of the container being read/written i.e. file path''' return self.__source - @docval(returns='the Container object that was read in', rtype=Container) + @docval({'name': 'load_sidecar', 'type': 'bool', + 'doc': ('Whether to load the sidecar JSON file and use the modifications specified in that file, if ' + 'the file exists. The file must have the same base name as the original source file but have ' + 'the suffix .json.'), + 'default': True}, + returns='the Container object that was read in', rtype=Container) def read(self, **kwargs): """Read a container from the IO source.""" + load_sidecar = popargs('load_sidecar', kwargs) f_builder = self.read_builder() if all(len(v) == 0 for v in f_builder.values()): # TODO also check that the keys are appropriate. print a better error message raise UnsupportedOperation('Cannot build data. There are no values.') + if load_sidecar: + BuilderUpdater.update_from_sidecar_json(f_builder, self.__source) container = self.__manager.construct(f_builder) return container diff --git a/src/hdmf/build/__init__.py b/src/hdmf/build/__init__.py index ea5d21152..33a36a2bb 100644 --- a/src/hdmf/build/__init__.py +++ b/src/hdmf/build/__init__.py @@ -3,6 +3,6 @@ from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError, ConstructError) from .manager import BuildManager, TypeMap -from .objectmapper import ObjectMapper +from .objectmapper import ObjectMapper, unicode, ascii from .warnings import (BuildWarning, MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning, MissingRequiredWarning, OrphanContainerWarning) diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index f96e6016a..603369515 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -68,6 +68,13 @@ def parent(self, p): if self.__source is None: self.source = p.source + def reset_parent(self): + """Reset the parent of this Container to None. + + Use with caution. This can result in orphaned builders and broken links. Does not reset source. + """ + self.__parent = None + def __repr__(self): ret = "%s %s %s" % (self.path, self.__class__.__name__, super().__repr__()) return ret @@ -111,6 +118,10 @@ def set_attribute(self, **kwargs): name, value = getargs('name', 'value', kwargs) self.attributes[name] = value + def remove_attribute(self, name): + """Remove a child attribute from this builder. Use with caution. Intended for internal use.""" + del self.attributes[name] + class GroupBuilder(BaseBuilder): # sub-dictionary keys. subgroups go in super().__getitem__(GroupBuilder.__group) @@ -244,6 +255,26 @@ def __set_builder(self, builder, obj_type): if builder.parent is None: builder.parent = self + def remove_attribute(self, name): + """Remove a child attribute from this builder. Use with caution. Intended for internal use.""" + del self.obj_type[name] + super().remove_attribute(name) + + def remove_child(self, child): + """Remove a child builder from this builder. Use with caution. Intended for internal use.""" + if isinstance(child, GroupBuilder): + obj_type = GroupBuilder.__group + elif isinstance(child, DatasetBuilder): + obj_type = GroupBuilder.__dataset + elif isinstance(child, LinkBuilder): + obj_type = GroupBuilder.__link + else: # pragma: no cover + raise ValueError("child is expected to be a GroupBuilder, DatasetBuilder, or LinkBuilder, not a %s" + % type(child)) + del super().__getitem__(obj_type)[child.name] + del self.obj_type[child.name] + child.reset_parent() + def is_empty(self): """Returns true if there are no datasets, links, attributes, and non-empty subgroups. False otherwise.""" if len(self.datasets) or len(self.links) or len(self.attributes): @@ -282,6 +313,47 @@ def __get_rec(self, key_ar): else: if key_ar[0] in self.groups: return self.groups[key_ar[0]].__get_rec(key_ar[1:]) + elif key_ar[0] in self.datasets: # "dset/x" must be an attribute on dset + assert len(key_ar) == 2 + return self.datasets[key_ar[0]].attributes[key_ar[1]] + raise KeyError(key_ar[0]) + + def get_subbuilder(self, key, default=(None, None)): + """Like dict.get, but looks in groups and datasets sub-dictionaries and ignores trailing attributes and links. + Returns two values -- the sub-group or sub-dataset and the name of the attribute if present + """ + try: + key_ar = _posixpath.normpath(key).split('/') + return self.__get_subbuilder_rec(key_ar) + except KeyError: + return default + + def __get_subbuilder_rec(self, key_ar): + # recursive helper for get_subbuilder + # returns sub-builder and attribute name if present + # possibilities are: + # group + # group/subgroup or group/subgroup/... + # group/dataset or group/dataset/... + # group/attribute + # dataset + # dataset/attribute + if len(key_ar) == 1: + if key_ar[0] in self.groups: # group + return self.groups[key_ar[0]], None + elif key_ar[0] in self.datasets: # dataset + return self.datasets[key_ar[0]], None + elif len(key_ar) == 2: + if key_ar[0] in self.groups: + if key_ar[1] in self.groups[key_ar[0]].attributes: # group/attribute + return self.groups[key_ar[0]], key_ar[1] + else: # group/subgroup or group/dataset + return self.groups[key_ar[0]].__get_subbuilder_rec(key_ar[1:]) + elif key_ar[0] in self.datasets: + if key_ar[1] in self.datasets[key_ar[0]].attributes: # dataset/attribute + return self.datasets[key_ar[0]], key_ar[1] + elif len(key_ar) > 2 and key_ar[0] in self.groups: + return self.groups[key_ar[0]].__get_subbuilder_rec(key_ar[1:]) raise KeyError(key_ar[0]) def __setitem__(self, args, val): diff --git a/src/hdmf/build/errors.py b/src/hdmf/build/errors.py index ec31ef5ba..66f3b63d6 100644 --- a/src/hdmf/build/errors.py +++ b/src/hdmf/build/errors.py @@ -47,3 +47,11 @@ class ContainerConfigurationError(Exception): class ConstructError(Exception): """Error raised when constructing a container from a builder.""" + + @docval({'name': 'builder', 'type': Builder, 'doc': 'the builder that cannot be constructed'}, + {'name': 'reason', 'type': str, 'doc': 'the reason for the error'}) + def __init__(self, **kwargs): + self.__builder = getargs('builder', kwargs) + self.__reason = getargs('reason', kwargs) + self.__message = "%s (%s): %s" % (self.__builder.name, self.__builder.path, self.__reason) + super().__init__(self.__message) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index a9e3cf8e6..94fccacd5 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -64,7 +64,7 @@ def _dec(func): return _dec -def _unicode(s): +def unicode(s): """ A helper function for converting to Unicode """ @@ -76,7 +76,7 @@ def _unicode(s): raise ValueError("Expected unicode or ascii string, got %s" % type(s)) -def _ascii(s): +def ascii(s): """ A helper function for converting to ASCII """ @@ -95,7 +95,7 @@ class ObjectMapper(metaclass=ExtenderMeta): # mapping from spec dtypes to numpy dtypes or functions for conversion of values to spec dtypes # make sure keys are consistent between hdmf.spec.spec.DtypeHelper.primary_dtype_synonyms, - # hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.manager.TypeMap._spec_dtype_map, + # hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.classgenerator.CustomClassGenerator._spec_dtype_map, # hdmf.validate.validator.__allowable, and backend dtype maps __dtypes = { "float": np.float32, @@ -115,18 +115,22 @@ class ObjectMapper(metaclass=ExtenderMeta): "uint16": np.uint16, "uint8": np.uint8, "bool": np.bool_, - "text": _unicode, - "utf": _unicode, - "utf8": _unicode, - "utf-8": _unicode, - "ascii": _ascii, - "bytes": _ascii, - "isodatetime": _ascii, - "datetime": _ascii, + "text": unicode, + "utf": unicode, + "utf8": unicode, + "utf-8": unicode, + "ascii": ascii, + "bytes": ascii, + "isodatetime": ascii, + "datetime": ascii, } __no_convert = set() + @classmethod + def get_dtype_mapping(cls, dtype_str): + return cls.__dtypes[dtype_str] + @classmethod def __resolve_numeric_dtype(cls, given, specified): """ @@ -201,16 +205,16 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 ret, ret_dtype = cls.__check_edgecases(spec, value, spec_dtype) if ret is not None or ret_dtype is not None: return ret, ret_dtype - # spec_dtype is a string, spec_dtype_type is a type or the conversion helper functions _unicode or _ascii + # spec_dtype is a string, spec_dtype_type is a type or the conversion helper functions unicode or ascii spec_dtype_type = cls.__dtypes[spec_dtype] warning_msg = None # Numpy Array or Zarr array if (isinstance(value, np.ndarray) or (hasattr(value, 'astype') and hasattr(value, 'dtype'))): - if spec_dtype_type is _unicode: + if spec_dtype_type is unicode: ret = value.astype('U') ret_dtype = "utf8" - elif spec_dtype_type is _ascii: + elif spec_dtype_type is ascii: ret = value.astype('S') ret_dtype = "ascii" else: @@ -223,9 +227,9 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 # Tuple or list elif isinstance(value, (tuple, list)): if len(value) == 0: - if spec_dtype_type is _unicode: + if spec_dtype_type is unicode: ret_dtype = 'utf8' - elif spec_dtype_type is _ascii: + elif spec_dtype_type is ascii: ret_dtype = 'ascii' else: ret_dtype = spec_dtype_type @@ -239,16 +243,16 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 # Any DataChunkIterator elif isinstance(value, AbstractDataChunkIterator): ret = value - if spec_dtype_type is _unicode: + if spec_dtype_type is unicode: ret_dtype = "utf8" - elif spec_dtype_type is _ascii: + elif spec_dtype_type is ascii: ret_dtype = "ascii" else: ret_dtype, warning_msg = cls.__resolve_numeric_dtype(value.dtype, spec_dtype_type) else: - if spec_dtype_type in (_unicode, _ascii): + if spec_dtype_type in (unicode, ascii): ret_dtype = 'ascii' - if spec_dtype_type is _unicode: + if spec_dtype_type is unicode: ret_dtype = 'utf8' ret = spec_dtype_type(value) else: diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index b56404235..9dab980f1 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -21,7 +21,7 @@ class DtypeHelper: # Dict where the keys are the primary data type and the values are list of strings with synonyms for the dtype # make sure keys are consistent between hdmf.spec.spec.DtypeHelper.primary_dtype_synonyms, - # hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.manager.TypeMap._spec_dtype_map, + # hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.classgenerator.CustomClassGenerator._spec_dtype_map, # hdmf.validate.validator.__allowable, and backend dtype maps # see https://hdmf-schema-language.readthedocs.io/en/latest/description.html#dtype primary_dtype_synonyms = { diff --git a/tests/unit/io_tests/__init__.py b/tests/unit/io_tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/io_tests/test_sidecar.py b/tests/unit/io_tests/test_sidecar.py new file mode 100644 index 000000000..257a0aead --- /dev/null +++ b/tests/unit/io_tests/test_sidecar.py @@ -0,0 +1,677 @@ +import json +import numpy as np +import os + +from hdmf import Container, Data +from hdmf.backends.hdf5.h5tools import HDF5IO +from hdmf.backends import SidecarValidationError +from hdmf.build import BuildManager, TypeMap, ObjectMapper, ConstructError +from hdmf.spec import AttributeSpec, DatasetSpec, GroupSpec, SpecCatalog, SpecNamespace, NamespaceCatalog +from hdmf.testing import TestCase +from hdmf.utils import getargs, docval + + +class TestBasic(TestCase): + + def setUp(self): + self.json_path = "./tests/unit/io_tests/test_sidecar.json" + self.h5_path = "./tests/unit/io_tests/test_sidecar.h5" + self.foo_data1 = FooData(name='foodata1', data=[1], data_attr1=2, data_attr2=['a']) + self.foo2 = Foo(name='foo2', my_data=[1, 2, 3], my_sub_data=[1, 2, 3], attr1='old') + self.sub_foo = Foo(name='sub_foo', my_data=[-1, -2, -3], my_sub_data=[-1, -2, -3], attr1='OLD') + self.foo_data = FooData(name='my_foo_data', data=[0, 1], data_attr1=1, data_attr2=['a']) + self.foo1 = Foo(name='foo1', my_data=[1, 2, 3], my_sub_data=[1, 2, 3], attr1='old', attr2='old', attr3=[17], + sub_foo=self.sub_foo, foo_holder_foos=[self.foo2], my_foo_data=self.foo_data, + attr4=[1], attr5=[1], attr6=[1], optional_data=[1, 2, 3]) + with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io: + io.write(self.foo1) + + def tearDown(self): + if os.path.exists(self.h5_path): + os.remove(self.h5_path) + if os.path.exists(self.json_path): + os.remove(self.json_path) + + def _write_test_sidecar(self, operations): + sidecar = dict() + sidecar["description"] = "Summary of changes" + sidecar["author"] = ["The NWB Team"] + sidecar["contact"] = ["contact@nwb.org"] + sidecar["operations"] = operations + sidecar["schema_version"] = "0.1.0" + + with open(self.json_path, 'w') as outfile: + json.dump(sidecar, outfile, indent=4) + + def test_load_sidecar_false(self): + """Test replacing the data of a dataset in a typed group.""" + operations = [ + { + "type": "replace", + "description": "change foo1/my_data data from [1, 2, 3] to [4, 5] (int8)", + "object_id": self.foo1.object_id, + "relative_path": "my_data", + "value": [4, 5], + "dtype": "int8" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read(load_sidecar=False) + np.testing.assert_array_equal(read_foo1.my_data, np.array([1, 2, 3])) + + def test_replace_typed_group_dataset_data(self): + """Test replacing the data of a dataset in a typed group.""" + operations = [ + { + "type": "replace", + "description": "change foo1/my_data data from [1, 2, 3] to [4, 5] (int8)", + "object_id": self.foo1.object_id, + "relative_path": "my_data", + "value": [4, 5], + "dtype": "int8" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + np.testing.assert_array_equal(read_foo1.my_data, np.array([4, 5])) + assert read_foo1.my_data.dtype is np.dtype('int8') + + def test_replace_subgroup_dataset_data(self): + """Test replacing the data of a dataset in a subgroup of a typed group.""" + operations = [ + { + "type": "replace", + "description": "change foo1/foo_holder_foos/my_sub_data data from [1, 2, 3] to [4, 5] (int8)", + "object_id": self.foo1.object_id, + "relative_path": "foo_holder/my_sub_data", + "value": [4, 5], + "dtype": "int8" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + np.testing.assert_array_equal(read_foo1.my_sub_data, np.array([4, 5])) + assert read_foo1.my_sub_data.dtype is np.dtype('int8') + + def test_replace_typed_dataset_data(self): + """Test replacing the data of a typed dataset.""" + operations = [ + { + "type": "replace", + "description": "change my_foo_data data from [0, 1] to [2, 3] (int8)", + "object_id": self.foo_data.object_id, + "relative_path": "", + "value": [2, 3], + "dtype": "int8" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + np.testing.assert_array_equal(read_foo1.my_foo_data.data, np.array([2, 3])) + assert read_foo1.my_foo_data.data.dtype is np.dtype('int8') + + def test_replace_typed_group_attribute(self): + """Test replacing the attribute of a typed group.""" + operations = [ + { + "type": "replace", + "description": "change foo1/attr1 from 'old' to 'new' (same dtype)", + "object_id": self.foo1.object_id, + "relative_path": "attr1", + "value": "new" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.attr1 == "new" + + def test_replace_subgroup_attribute(self): + """Test replacing the attribute of an untyped group in a typed group.""" + operations = [ + { + "type": "replace", + "description": "change foo1/foo_holder/attr5 from [1] to [2] (int8)", + "object_id": self.foo1.object_id, + "relative_path": "foo_holder/attr5", + "value": [2], + "dtype": "int8" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + np.testing.assert_array_equal(read_foo1.attr5, np.array([2])) + assert read_foo1.attr5.dtype is np.dtype('int8') + + def test_replace_typed_group_dataset_attribute(self): + """Test replacing the attribute of an untyped dataset in a typed group.""" + operations = [ + { + "type": "replace", + "description": "change foo1/my_data/attr3 from [1] to [2] (int8)", + "object_id": self.foo1.object_id, + "relative_path": "my_data/attr3", + "value": [2], + "dtype": "int8" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + np.testing.assert_array_equal(read_foo1.attr3, np.array([2])) + assert read_foo1.attr3.dtype is np.dtype('int8') + + def test_replace_subgroup_dataset_attribute(self): + """Test replacing the attribute of an untyped dataset in an untyped group in a typed group.""" + operations = [ + { + "type": "replace", + "description": "change foo1/foo_holder/my_sub_data/attr6 from [1] to [2] (int8)", + "object_id": self.foo1.object_id, + "relative_path": "foo_holder/my_sub_data/attr6", + "value": [2], + "dtype": "int8" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + np.testing.assert_array_equal(read_foo1.attr6, np.array([2])) + assert read_foo1.attr6.dtype is np.dtype('int8') + + def test_replace_typed_dataset_attribute(self): + """Test replacing the attribute of a typed dataset.""" + operations = [ + { + "type": "replace", + "description": "change foo1/my_foo_data/data_attr1 from 1 to 2 (int8)", + "object_id": self.foo_data.object_id, + "relative_path": "data_attr1", + "value": 2, + "dtype": "int8" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.my_foo_data.data_attr1 == 2 + + def test_delete_typed_group_required_dataset(self): + """Test deleting a required dataset in a typed group.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/my_data", + "object_id": self.foo1.object_id, + "relative_path": "my_data", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + msg = "root (root): Could not construct Foo object due to: Foo.__init__: missing argument 'my_data'" + with self.assertRaisesWith(ConstructError, msg): + io.read() + + def test_delete_typed_group_optional_dataset(self): + """Test deleting an optional dataset in a typed group.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/opt_data", + "object_id": self.foo1.object_id, + "relative_path": "opt_data", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.optional_data is None + + def test_delete_typed_group_optional_typed_dataset(self): + """Test deleting an optional typed dataset in a typed group by providing the group object ID. + + Note that my_foo_data has an object ID which can be used instead of this method. + """ + operations = [ + { + "type": "delete", + "description": "delete foo1/my_foo_data", + "object_id": self.foo1.object_id, + "relative_path": "my_foo_data", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.my_foo_data is None + + def test_delete_typed_dataset(self): + """Test deleting an optional typed dataset by providing the dataset object ID.""" + operations = [ + { + "type": "delete", + "description": "delete my_foo_data from its parent", + "object_id": self.foo_data.object_id, + "relative_path": "", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.my_foo_data is None + + def test_delete_subgroup_required_dataset(self): + """Test deleting a required dataset in a subgroup of a typed group.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/foo_holder_foos/my_sub_data", + "object_id": self.foo1.object_id, + "relative_path": "foo_holder/my_sub_data" + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + msg = "root (root): Could not construct Foo object due to: Foo.__init__: missing argument 'my_sub_data'" + with self.assertRaisesWith(ConstructError, msg): + io.read() + + def test_delete_typed_group_req_attribute(self): + """Test deleting a required attribute of a typed group.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/attr1", + "object_id": self.foo1.object_id, + "relative_path": "attr1", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + msg = "root (root): Could not construct Foo object due to: Foo.__init__: missing argument 'attr1'" + with self.assertRaisesWith(ConstructError, msg): + io.read() + + def test_delete_typed_group_opt_attribute(self): + """Test deleting an optional attribute of a typed group.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/attr2", + "object_id": self.foo1.object_id, + "relative_path": "attr2", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.attr2 is None # the default value when attr2 is not provided + + def test_delete_subgroup_opt_attribute(self): + """Test deleting an optional attribute of a subgroup.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/foo_holder/attr5", + "object_id": self.foo1.object_id, + "relative_path": "foo_holder/attr5", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.attr5 is None + + def test_delete_typed_group_dataset_opt_attribute(self): + """Test deleting an optional attribute of a dataset in a typed group.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/my_data/attr3", + "object_id": self.foo1.object_id, + "relative_path": "my_data/attr3", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.attr3 is None + + def test_delete_subgroup_dataset_opt_attribute(self): + """Test deleting an optional attribute of a dataset in a subgroup.""" + operations = [ + { + "type": "delete", + "description": "delete foo1/foo_holder/my_sub_data/attr6", + "object_id": self.foo1.object_id, + "relative_path": "foo_holder/my_sub_data/attr6", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.attr6 is None + + def test_delete_typed_dataset_req_attribute(self): + """Test deleting a required attribute or a typed dataset.""" + operations = [ + { + "type": "delete", + "description": "delete my_foo_data/data_attr1", + "object_id": self.foo_data.object_id, + "relative_path": "data_attr1", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + msg = ("my_foo_data (root/my_foo_data): Could not construct FooData object due to: FooData.__init__: " + "missing argument 'data_attr1'") + with self.assertRaisesWith(ConstructError, msg): + io.read() + + def test_delete_typed_dataset_opt_attribute(self): + """Test deleting a required attribute or a typed dataset.""" + operations = [ + { + "type": "delete", + "description": "delete my_foo_data/data_attr2", + "object_id": self.foo_data.object_id, + "relative_path": "data_attr2", + }, + ] + self._write_test_sidecar(operations) + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + read_foo1 = io.read() + assert read_foo1.my_foo_data.data_attr2 is None + + +class TestFailValidation(TestCase): + + def setUp(self): + self.h5_path = "./tests/unit/io_tests/test_sidecar_fail.h5" + foo1 = Foo(name='foo1', my_data=[1, 2, 3], my_sub_data=[1, 2, 3], attr1='old', attr3=[17]) + with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io: + io.write(foo1) + + sidecar = dict() + + self.json_path = "./tests/unit/io_tests/test_sidecar_fail.json" + with open(self.json_path, 'w') as outfile: + json.dump(sidecar, outfile, indent=4) + + def tearDown(self): + if os.path.exists(self.h5_path): + os.remove(self.h5_path) + if os.path.exists(self.json_path): + os.remove(self.json_path) + + def test_simple(self): + with HDF5IO(self.h5_path, 'r', manager=_get_manager()) as io: + with self.assertRaises(SidecarValidationError): + io.read() + + +class Foo(Container): + + @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, + {'name': 'my_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D or 2-D integer dataset', + 'shape': ((None, ), (None, None))}, + {'name': 'my_sub_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D or 2-D integer dataset', + 'shape': ((None, ), (None, None))}, + {'name': 'attr1', 'type': str, 'doc': 'a string attribute'}, + {'name': 'attr2', 'type': str, 'doc': 'a string attribute', 'default': None}, + {'name': 'optional_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer dataset', 'default': None}, + {'name': 'attr3', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'shape': (None, ), + 'default': None}, + {'name': 'attr4', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, + {'name': 'attr5', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, + {'name': 'attr6', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute', 'default': None}, + {'name': 'sub_foo', 'type': 'Foo', 'doc': 'a child Foo', 'default': None}, + {'name': 'my_foo_data', 'type': 'FooData', 'doc': 'a child Foo', 'default': None}, + {'name': 'foo_holder_foos', 'type': ('array_data', 'data'), 'doc': 'child Foos', 'default': None}, + {'name': 'foo_holder_foo_data', 'type': ('array_data', 'data'), 'doc': 'child FooData', 'default': None}) + def __init__(self, **kwargs): + name, my_data, my_sub_data, attr1, attr2 = getargs('name', 'my_data', 'my_sub_data', 'attr1', 'attr2', kwargs) + optional_data, attr3, attr4, attr5, attr6 = getargs('optional_data', 'attr3', 'attr4', 'attr5', 'attr6', kwargs) + sub_foo, my_foo_data = getargs('sub_foo', 'my_foo_data', kwargs) + foo_holder_foos, foo_holder_foo_data = getargs('foo_holder_foos', 'foo_holder_foo_data', kwargs) + super().__init__(name=name) + self.my_data = my_data + self.my_sub_data = my_sub_data + self.attr1 = attr1 + self.attr2 = attr2 + self.attr3 = attr3 + self.attr4 = attr4 + self.attr5 = attr5 + self.attr6 = attr6 + self.optional_data = optional_data + self.sub_foo = sub_foo + if sub_foo is not None: + assert sub_foo.name == 'sub_foo' # on read mapping will not work otherwise + self.sub_foo.parent = self + self.my_foo_data = my_foo_data + if my_foo_data is not None: + assert my_foo_data.name == 'my_foo_data' # on read mapping will not work otherwise + self.my_foo_data.parent = self + self.foo_holder_foos = foo_holder_foos + if foo_holder_foos is not None: + for f in foo_holder_foos: + f.parent = self + self.foo_holder_foo_data = foo_holder_foo_data + if foo_holder_foo_data is not None: + for f in foo_holder_foo_data: + f.parent = self + + +class FooData(Data): + + @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, + {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'a 1-D or 2-D integer dataset', + 'shape': ((None, ), (None, None))}, + {'name': 'data_attr1', 'type': int, 'doc': 'an integer attribute'}, + {'name': 'data_attr2', 'type': ('array_data', 'data'), 'doc': 'a 1-D text attribute', 'shape': (None, ), + 'default': None}) + def __init__(self, **kwargs): + name, data, data_attr1, data_attr2 = getargs('name', 'data', 'data_attr1', 'data_attr2', kwargs) + super().__init__(name=name, data=data) + self.data_attr1 = data_attr1 + self.data_attr2 = data_attr2 + + +def _get_manager(): + # Foo (group with data_type) has: + # - groups: + # - sub_foo (Foo), 0 or 1 + # - foo_holder (untyped), required + # - groups: + # - (Foo), 0 to many, remapped to foo_holder_foos + # - datasets: + # - my_sub_data (int, 1-D or 2-D), required + # - attributes: + # - attr6 (int, 1-D), optional, remapped to attr6 + # - (FooData), 0 to many, remapped to foo_holder_foo_data + # - attributes: + # - attr5 (int, 1-D), optional, remapped to attr5 + # - datasets: + # - my_data (int, 1-D or 2-D), required + # - attributes: + # - attr3 (int, 1-D), optional, remapped to attr3 + # - opt_data (int, 1-D or 2-D), 0 or 1, remapped to optional_data + # - attributes: + # - attr4 (int, 1-D), optional, remapped to attr4 + # - my_foo_data (FooData), optional + # - attributes: + # - attr1 (string, scalar), required + # - attr2 (string, scalar), optional + # + # FooData (dataset with data_type) has: + # - int, 1D + # - attributes: + # - data_attr1 (int, scalar), required + # - data_attr2 (text, 1-D), optional + + foo_data_spec = DatasetSpec( + doc='A test dataset specification with a data type', + data_type_def='FooData', + dtype='int', + shape=[None, ], + attributes=[ + AttributeSpec( + name='data_attr1', + doc='a scalar integer attribute', + dtype='int', + ), + AttributeSpec( + name='data_attr2', + doc='a 1-D text attribute', + dtype='text', + shape=[None, ], + required=False + ) + ] + ) + + foo_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Foo', + groups=[ + GroupSpec( + doc='a child Foo', + data_type_inc='Foo', + name='sub_foo', + quantity='?', + ), + GroupSpec( + doc='an untyped group of Foos', + name='foo_holder', + quantity='?', + groups=[ + GroupSpec( + doc='child Foos', + data_type_inc='Foo', + quantity='*', + ) + ], + datasets=[ + DatasetSpec( + doc='a 1-D or 2-D integer dataset', + dtype='int', + name='my_sub_data', + shape=[[None, ], [None, None]], + attributes=[ + AttributeSpec( + name='attr6', + doc='a 1-D integer attribute', + dtype='int', + shape=[None, ], + required=False + ) + ] + ), + DatasetSpec( + doc='child FooData', + data_type_inc='FooData', + quantity='*', + ) + ], + attributes=[ + AttributeSpec( + name='attr5', + doc='a 1-D integer attribute', + dtype='int', + shape=[None, ], + required=False + ) + ] + ) + ], + datasets=[ + DatasetSpec( + doc='a 1-D or 2-D integer dataset', + dtype='int', + name='my_data', + shape=[[None, ], [None, None]], + attributes=[ + AttributeSpec( + name='attr3', + doc='a 1-D integer attribute', + dtype='int', + shape=[None, ], + required=False + ) + ] + ), + DatasetSpec( + doc='an optional 1-D integer dataset', + dtype='int', + name='opt_data', + shape=[None, ], + quantity='?', + attributes=[ + AttributeSpec( + name='attr4', + doc='a 1-D integer attribute', + dtype='int', + shape=[None, ], + required=False + ) + ] + ), + DatasetSpec( + doc='child FooData', + name='my_foo_data', + data_type_inc='FooData', + quantity='?', + ) + ], + attributes=[ + AttributeSpec(name='attr1', doc='a string attribute', dtype='text'), + AttributeSpec(name='attr2', doc='a string attribute', dtype='text', required=False), + ] + ) + + class FooMapper(ObjectMapper): + """Remap spec fields to Container attributes. + - 'attr3' attribute on Foo container to 'my_data' dataset spec > 'attr3' attribute spec + - 'attr4' attribute on Foo container to 'opt_data' dataset spec > 'attr4' attribute spec. + - TODO fill me in + """ + def __init__(self, spec): + super().__init__(spec) + my_data_spec = spec.get_dataset('my_data') + self.map_spec('attr3', my_data_spec.get_attribute('attr3')) + opt_data_spec = spec.get_dataset('opt_data') + self.map_spec('attr4', opt_data_spec.get_attribute('attr4')) + self.map_spec('optional_data', opt_data_spec) + foo_holder_spec = spec.get_group('foo_holder') + self.map_spec('attr5', foo_holder_spec.get_attribute('attr5')) + self.map_spec('foo_holder_foos', foo_holder_spec.get_data_type('Foo')) + self.map_spec('foo_holder_foo_data', foo_holder_spec.get_data_type('FooData')) + my_sub_data_spec = foo_holder_spec.get_dataset('my_sub_data') + self.map_spec('my_sub_data', my_sub_data_spec) + self.map_spec('attr6', my_sub_data_spec.get_attribute('attr6')) + + spec_catalog = SpecCatalog() + spec_catalog.register_spec(foo_spec, 'test.yaml') + spec_catalog.register_spec(foo_data_spec, 'test.yaml') + namespace_name = 'test_core' + namespace = SpecNamespace( + doc='a test namespace', + name=namespace_name, + schema=[{'source': 'test.yaml'}], + version='0.1.0', + catalog=spec_catalog + ) + namespace_catalog = NamespaceCatalog() + namespace_catalog.add_namespace(namespace_name, namespace) + type_map = TypeMap(namespace_catalog) + type_map.register_container_type(namespace_name, 'Foo', Foo) + type_map.register_container_type(namespace_name, 'FooData', FooData) + type_map.register_map(Foo, FooMapper) + manager = BuildManager(type_map) + return manager