diff --git a/docs/source/config_syntax.md b/docs/source/config_syntax.md index 24e7265523..c1e3d5cbe9 100644 --- a/docs/source/config_syntax.md +++ b/docs/source/config_syntax.md @@ -76,14 +76,15 @@ A few characters and keywords are interpreted beyond the plain texts, here are e ### To reference Python objects in configurations ```json -"@preprocessing#transforms#keys" +"@preprocessing::transforms::keys" ``` -_Description:_ `@` character indicates a reference to another configuration value defined at `preprocessing#transforms#keys`. -where `#` indicates a sub-structure of this configuration file. +_Description:_ `@` character indicates a reference to another configuration value defined at `preprocessing::transforms::keys`. +where `::` indicates a sub-structure of this configuration file. (`#` is a synonym for `::`, `preprocessing#transforms#keys` +refers to the same object.) ```json -"@preprocessing#1" +"@preprocessing::1" ``` _Description:_ `1` is referencing as an integer, which is used to index (zero-based indexing) the `preprocessing` sub-structure. @@ -122,10 +123,10 @@ It's therefore possible to modify the Python objects within an expression, for e ### To textually replace configuration elements ```json -"%demo_config.json#demo_net#in_channels" +"%demo_config.json::demo_net::in_channels" ``` -_Description:_ `%` character indicates a macro to replace the current configuration element with the texts at `demo_net#in_channels` in the +_Description:_ `%` character indicates a macro to replace the current configuration element with the texts at `demo_net::in_channels` in the `demo_config.json` file. The replacement is done before instantiating or evaluating the components. ### Instantiate a Python object @@ -203,6 +204,6 @@ Details on the CLI argument parsing is provided in the simple structures with sparse uses of expressions or references are preferred. - For `$import ` in the configuration, please make sure there are instructions for the users to install the `` if it is not a (optional) dependency of MONAI. -- As "#" and "$" might be interpreted differently by the `shell` or `CLI` tools, may need to add escape characters +- As `#`, `::`, and `$` might be interpreted differently by the `shell` or `CLI` tools, may need to add escape characters or quotes for them in the command line, like: `"\$torch.device('cuda:1')"`, `"'train_part#trainer'"`. - For more details and examples, please see [the tutorials](https://github.com/Project-MONAI/tutorials/tree/main/bundle). diff --git a/monai/bundle/config_parser.py b/monai/bundle/config_parser.py index e2553a5ffd..9d69318e99 100644 --- a/monai/bundle/config_parser.py +++ b/monai/bundle/config_parser.py @@ -141,16 +141,16 @@ def __getitem__(self, id: str | int) -> Any: Get the config by id. Args: - id: id of the ``ConfigItem``, ``"#"`` in id are interpreted as special characters to + id: id of the ``ConfigItem``, ``"::"`` (or ``"#"``) in id are interpreted as special characters to go one level further into the nested structures. Use digits indexing from "0" for list or other strings for dict. - For example: ``"xform#5"``, ``"net#channels"``. ``""`` indicates the entire ``self.config``. + For example: ``"xform::5"``, ``"net::channels"``. ``""`` indicates the entire ``self.config``. """ if id == "": return self.config config = self.config - for k in str(id).split(ID_SEP_KEY): + for k in ReferenceResolver.split_id(id): if not isinstance(config, (dict, list)): raise ValueError(f"config must be dict or list for key `{k}`, but got {type(config)}: {config}.") try: @@ -167,10 +167,10 @@ def __setitem__(self, id: str | int, config: Any) -> None: to ensure the updates are included in the parsed content. Args: - id: id of the ``ConfigItem``, ``"#"`` in id are interpreted as special characters to + id: id of the ``ConfigItem``, ``"::"`` (or ``"#"``) in id are interpreted as special characters to go one level further into the nested structures. Use digits indexing from "0" for list or other strings for dict. - For example: ``"xform#5"``, ``"net#channels"``. ``""`` indicates the entire ``self.config``. + For example: ``"xform::5"``, ``"net::channels"``. ``""`` indicates the entire ``self.config``. config: config to set at location ``id``. """ @@ -178,12 +178,11 @@ def __setitem__(self, id: str | int, config: Any) -> None: self.config = config self.ref_resolver.reset() return - keys = str(id).split(ID_SEP_KEY) + last_id, base_id = ReferenceResolver.split_id(id, last=True) # get the last parent level config item and replace it - last_id = ID_SEP_KEY.join(keys[:-1]) conf_ = self[last_id] - indexing = keys[-1] if isinstance(conf_, dict) else int(keys[-1]) + indexing = base_id if isinstance(conf_, dict) else int(base_id) conf_[indexing] = config self.ref_resolver.reset() return @@ -213,7 +212,7 @@ def set(self, config: Any, id: str = "", recursive: bool = True) -> None: default to `True`. for the nested id, only support `dict` for the missing section. """ - keys = str(id).split(ID_SEP_KEY) + keys = ReferenceResolver.split_id(id) conf_ = self.get() if recursive: if conf_ is None: @@ -222,12 +221,12 @@ def set(self, config: Any, id: str = "", recursive: bool = True) -> None: if isinstance(conf_, dict) and k not in conf_: conf_[k] = {} conf_ = conf_[k if isinstance(conf_, dict) else int(k)] - self[id] = config + self[ReferenceResolver.normalize_id(id)] = config def update(self, pairs: dict[str, Any]) -> None: """ Set the ``id`` and the corresponding config content in pairs, see also :py:meth:`__setitem__`. - For example, ``parser.update({"train#epoch": 100, "train#lr": 0.02})`` + For example, ``parser.update({"train::epoch": 100, "train::lr": 0.02})`` Args: pairs: dictionary of `id` and config pairs. @@ -272,10 +271,10 @@ def get_parsed_content(self, id: str = "", **kwargs: Any) -> Any: - Else, the result is the configuration content of `ConfigItem`. Args: - id: id of the ``ConfigItem``, ``"#"`` in id are interpreted as special characters to + id: id of the ``ConfigItem``, ``"::"`` (or ``"#"``) in id are interpreted as special characters to go one level further into the nested structures. Use digits indexing from "0" for list or other strings for dict. - For example: ``"xform#5"``, ``"net#channels"``. ``""`` indicates the entire ``self.config``. + For example: ``"xform::5"``, ``"net::channels"``. ``""`` indicates the entire ``self.config``. kwargs: additional keyword arguments to be passed to ``_resolve_one_item``. Currently support ``lazy`` (whether to retain the current config cache, default to `True`), ``instantiate`` (whether to instantiate the `ConfigComponent`, default to `True`) and @@ -330,16 +329,15 @@ def _do_resolve(self, config: Any, id: str = "") -> Any: Args: config: input config file to resolve. - id: id of the ``ConfigItem``, ``"#"`` in id are interpreted as special characters to + id: id of the ``ConfigItem``, ``"::"`` (or ``"#"``) in id are interpreted as special characters to go one level further into the nested structures. Use digits indexing from "0" for list or other strings for dict. - For example: ``"xform#5"``, ``"net#channels"``. ``""`` indicates the entire ``self.config``. + For example: ``"xform::5"``, ``"net::channels"``. ``""`` indicates the entire ``self.config``. """ if isinstance(config, (dict, list)): - for k, v in enumerate(config) if isinstance(config, list) else config.items(): - sub_id = f"{id}{ID_SEP_KEY}{k}" if id != "" else k - config[k] = self._do_resolve(v, sub_id) + for k, sub_id, v in self.ref_resolver.iter_subconfigs(id=id, config=config): + config[k] = self._do_resolve(v, sub_id) # type: ignore if isinstance(config, str): config = self.resolve_relative_ids(id, config) if config.startswith(MACRO_KEY): @@ -354,7 +352,7 @@ def resolve_macro_and_relative_ids(self): Recursively resolve `self.config` to replace the relative ids with absolute ids, for example, `@##A` means `A` in the upper level. and replace the macro tokens with target content, The macro tokens are marked as starting with "%", can be from another structured file, like: - ``"%default_net"``, ``"%/data/config.json#net"``. + ``"%default_net"``, ``"%/data/config.json::net"``. """ self.set(self._do_resolve(config=self.get())) @@ -365,15 +363,14 @@ def _do_parse(self, config: Any, id: str = "") -> None: Args: config: config source to parse. - id: id of the ``ConfigItem``, ``"#"`` in id are interpreted as special characters to + id: id of the ``ConfigItem``, ``"::"`` (or ``"#"``) in id are interpreted as special characters to go one level further into the nested structures. Use digits indexing from "0" for list or other strings for dict. - For example: ``"xform#5"``, ``"net#channels"``. ``""`` indicates the entire ``self.config``. + For example: ``"xform::5"``, ``"net::channels"``. ``""`` indicates the entire ``self.config``. """ if isinstance(config, (dict, list)): - for k, v in enumerate(config) if isinstance(config, list) else config.items(): - sub_id = f"{id}{ID_SEP_KEY}{k}" if id != "" else k + for _, sub_id, v in self.ref_resolver.iter_subconfigs(id=id, config=config): self._do_parse(config=v, id=sub_id) if ConfigComponent.is_instantiable(config): @@ -410,7 +407,7 @@ def load_config_files(cls, files: PathLike | Sequence[PathLike] | dict, **kwargs """ Load config files into a single config dict. The latter config file in the list will override or add the former config file. - ``"#"`` in the config keys are interpreted as special characters to go one level + ``"::"`` (or ``"#"``) in the config keys are interpreted as special characters to go one level further into the nested structures. Args: @@ -451,13 +448,14 @@ def export_config_file(cls, config: dict, filepath: PathLike, fmt: str = "json", def split_path_id(cls, src: str) -> tuple[str, str]: """ Split `src` string into two parts: a config file path and component id. - The file path should end with `(json|yaml|yml)`. The component id should be separated by `#` if it exists. + The file path should end with `(json|yaml|yml)`. The component id should be separated by `::` if it exists. If no path or no id, return "". Args: src: source string to split. """ + src = ReferenceResolver.normalize_id(src) result = re.compile(rf"({cls.suffix_match}(?=(?:{ID_SEP_KEY}.*)|$))", re.IGNORECASE).findall(src) if not result: return "", src # the src is a pure id @@ -488,6 +486,7 @@ def resolve_relative_ids(cls, id: str, value: str) -> str: """ # get the prefixes like: "@####", "%###", "@#" + value = ReferenceResolver.normalize_id(value) prefixes = sorted(set().union(cls.relative_id_prefix.findall(value)), reverse=True) current_id = id.split(ID_SEP_KEY) diff --git a/monai/bundle/reference_resolver.py b/monai/bundle/reference_resolver.py index 9dfe82a992..b36f2cc4a5 100644 --- a/monai/bundle/reference_resolver.py +++ b/monai/bundle/reference_resolver.py @@ -14,7 +14,7 @@ import re import warnings from collections.abc import Sequence -from typing import Any +from typing import Any, Iterator from monai.bundle.config_item import ConfigComponent, ConfigExpression, ConfigItem from monai.bundle.utils import ID_REF_KEY, ID_SEP_KEY @@ -31,7 +31,7 @@ class ReferenceResolver: The IDs must be unique within this set. A string in ``ConfigItem`` starting with ``@`` will be treated as a reference to other ``ConfigItem`` objects by ID. Since ``ConfigItem`` may have a nested dictionary or list structure, - the reference string may also contain a ``#`` character to refer to a substructure by + the reference string may also contain the separator ``::`` to refer to a substructure by key indexing for a dictionary or integer indexing for a list. In this class, resolving references is essentially substitution of the reference strings with the @@ -52,7 +52,7 @@ class ReferenceResolver: _vars = "__local_refs" sep = ID_SEP_KEY # separator for key indexing ref = ID_REF_KEY # reference prefix - # match a reference string, e.g. "@id#key", "@id#key#0", "@_target_#key" + # match a reference string, e.g. "@id::key", "@id::key::0", "@_target_::key" id_matcher = re.compile(rf"{ref}(?:\w*)(?:{sep}\w*)*") # if `allow_missing_reference` and can't find a reference ID, will just raise a warning and don't update the config allow_missing_reference = allow_missing_reference @@ -99,6 +99,7 @@ def get_item(self, id: str, resolve: bool = False, **kwargs: Any) -> ConfigItem kwargs: keyword arguments to pass to ``_resolve_one_item()``. Currently support ``instantiate`` and ``eval_expr``. Both are defaulting to True. """ + id = self.normalize_id(id) if resolve and id not in self.resolved_content: self._resolve_one_item(id=id, **kwargs) return self.items.get(id) @@ -121,6 +122,7 @@ def _resolve_one_item( if the `id` is not in the config content, must be a `ConfigItem` object. """ + id = self.normalize_id(id) if id in self.resolved_content: return self.resolved_content[id] try: @@ -190,18 +192,56 @@ def get_resolved_content(self, id: str, **kwargs: Any) -> ConfigExpression | str """ return self._resolve_one_item(id=id, **kwargs) + @classmethod + def normalize_id(cls, id: str | int) -> str: + """ + Normalize the id string to consistently use `cls.sep`. + + Args: + id: id string to be normalized. + """ + return str(id).replace("#", cls.sep) # backward compatibility `#` is the old separator + + @classmethod + def split_id(cls, id: str | int, last: bool = False) -> list[str]: + """ + Split the id string into a list of strings by `cls.sep`. + + Args: + id: id string to be split. + last: whether to split the rightmost part of the id. default is False (split all parts). + """ + if not last: + return cls.normalize_id(id).split(cls.sep) + res = cls.normalize_id(id).rsplit(cls.sep, 1) + return ["".join(res[:-1]), res[-1]] + + @classmethod + def iter_subconfigs(cls, id: str, config: Any) -> Iterator[tuple[str, str, Any]]: + """ + Iterate over the sub-configs of the input config, the output `sub_id` uses `cls.sep` to denote substructure. + + Args: + id: id string of the current input config. + config: input config to be iterated. + """ + for k, v in config.items() if isinstance(config, dict) else enumerate(config): + sub_id = f"{id}{cls.sep}{k}" if id != "" else f"{k}" + yield k, sub_id, v + @classmethod def match_refs_pattern(cls, value: str) -> dict[str, int]: """ Match regular expression for the input string to find the references. - The reference string starts with ``"@"``, like: ``"@XXX#YYY#ZZZ"``. + The reference string starts with ``"@"``, like: ``"@XXX::YYY::ZZZ"``. Args: value: input value to match regular expression. """ refs: dict[str, int] = {} - # regular expression pattern to match "@XXX" or "@XXX#YYY" + # regular expression pattern to match "@XXX" or "@XXX::YYY" + value = cls.normalize_id(value) result = cls.id_matcher.findall(value) value_is_expr = ConfigExpression.is_expression(value) for item in result: @@ -215,7 +255,7 @@ def match_refs_pattern(cls, value: str) -> dict[str, int]: def update_refs_pattern(cls, value: str, refs: dict) -> str: """ Match regular expression for the input string to update content with the references. - The reference part starts with ``"@"``, like: ``"@XXX#YYY#ZZZ"``. + The reference part starts with ``"@"``, like: ``"@XXX::YYY::ZZZ"``. References dictionary must contain the referring IDs as keys. Args: @@ -223,7 +263,8 @@ def update_refs_pattern(cls, value: str, refs: dict) -> str: refs: all the referring components with ids as keys, default to `None`. """ - # regular expression pattern to match "@XXX" or "@XXX#YYY" + # regular expression pattern to match "@XXX" or "@XXX::YYY" + value = cls.normalize_id(value) result = cls.id_matcher.findall(value) # reversely sort the matched references by length # and handle the longer first in case a reference item is substring of another longer item @@ -235,11 +276,10 @@ def update_refs_pattern(cls, value: str, refs: dict) -> str: ref_id = item[len(cls.ref) :] # remove the ref prefix "@" if ref_id not in refs: msg = f"can not find expected ID '{ref_id}' in the references." - if cls.allow_missing_reference: - warnings.warn(msg) - continue - else: + if not cls.allow_missing_reference: raise KeyError(msg) + warnings.warn(msg) + continue if value_is_expr: # replace with local code, `{"__local_refs": self.resolved_content}` will be added to # the `globals` argument of python `eval` in the `evaluate` @@ -265,12 +305,11 @@ def find_refs_in_config(cls, config: Any, id: str, refs: dict[str, int] | None = """ refs_: dict[str, int] = refs or {} if isinstance(config, str): - for id, count in cls.match_refs_pattern(value=config).items(): + for id, count in cls.match_refs_pattern(value=config).items(): # ref count is not currently used refs_[id] = refs_.get(id, 0) + count if not isinstance(config, (list, dict)): return refs_ - for k, v in config.items() if isinstance(config, dict) else enumerate(config): - sub_id = f"{id}{cls.sep}{k}" if id != "" else f"{k}" + for _, sub_id, v in cls.iter_subconfigs(id, config): if ConfigComponent.is_instantiable(v) or ConfigExpression.is_expression(v) and sub_id not in refs_: refs_[sub_id] = 1 refs_ = cls.find_refs_in_config(v, sub_id, refs_) @@ -294,8 +333,7 @@ def update_config_with_refs(cls, config: Any, id: str, refs: dict | None = None) if not isinstance(config, (list, dict)): return config ret = type(config)() - for idx, v in config.items() if isinstance(config, dict) else enumerate(config): - sub_id = f"{id}{cls.sep}{idx}" if id != "" else f"{idx}" + for idx, sub_id, v in cls.iter_subconfigs(id, config): if ConfigComponent.is_instantiable(v) or ConfigExpression.is_expression(v): updated = refs_[sub_id] if ConfigComponent.is_instantiable(v) and updated is None: diff --git a/monai/bundle/utils.py b/monai/bundle/utils.py index 62d4975d94..b187159c89 100644 --- a/monai/bundle/utils.py +++ b/monai/bundle/utils.py @@ -24,7 +24,7 @@ __all__ = ["ID_REF_KEY", "ID_SEP_KEY", "EXPR_KEY", "MACRO_KEY", "DEFAULT_MLFLOW_SETTINGS", "DEFAULT_EXP_MGMT_SETTINGS"] ID_REF_KEY = "@" # start of a reference to a ConfigItem -ID_SEP_KEY = "#" # separator for the ID of a ConfigItem +ID_SEP_KEY = "::" # separator for the ID of a ConfigItem EXPR_KEY = "$" # start of a ConfigExpression MACRO_KEY = "%" # start of a macro of a config diff --git a/tests/test_bundle_verify_net.py b/tests/test_bundle_verify_net.py index 95b9bd11eb..6f516fdd48 100644 --- a/tests/test_bundle_verify_net.py +++ b/tests/test_bundle_verify_net.py @@ -37,7 +37,7 @@ def test_verify(self, meta_file, config_file): cmd = ["coverage", "run", "-m", "monai.bundle", "verify_net_in_out", "network_def", "--meta_file"] cmd += [meta_file, "--config_file", config_file, "-n", "4", "--any", "16", "--args_file", def_args_file] - cmd += ["--device", "cpu", "--_meta_#network_data_format#inputs#image#spatial_shape", "[16,'*','2**p*n']"] + cmd += ["--device", "cpu", "--_meta_::network_data_format::inputs#image#spatial_shape", "[16,'*','2**p*n']"] command_line_tests(cmd) @parameterized.expand([TEST_CASE_1]) @@ -52,7 +52,7 @@ def test_verify_fp16(self, meta_file, config_file): cmd += [meta_file, "--config_file", config_file, "-n", "4", "--any", "16", "--args_file", def_args_file] cmd += ["--device", "cuda", "--_meta_#network_data_format#inputs#image#spatial_shape", "[16,'*','2**p*n']"] cmd += ["--_meta_#network_data_format#inputs#image#dtype", "float16"] - cmd += ["--_meta_#network_data_format#outputs#pred#dtype", "float16"] + cmd += ["--_meta_::network_data_format::outputs::pred::dtype", "float16"] command_line_tests(cmd) @parameterized.expand([TEST_CASE_1]) diff --git a/tests/test_config_parser.py b/tests/test_config_parser.py index 8cecfe87cf..63254e7336 100644 --- a/tests/test_config_parser.py +++ b/tests/test_config_parser.py @@ -314,6 +314,12 @@ def test_builtin(self): config = {"import statements": "$import math", "calc": {"_target_": "math.isclose", "a": 0.001, "b": 0.001}} self.assertEqual(ConfigParser(config).calc, True) + def test_slicing(self): + config = {"test": [1, 2, 3, 4], "test1": "$@test[::]", "test2": "$@test[::-1]", "st": "aten::relu"} + self.assertEqual(ConfigParser(config).test1, [1, 2, 3, 4]) + self.assertEqual(ConfigParser(config).test2, [4, 3, 2, 1]) + self.assertEqual(ConfigParser(config).st, "aten::relu") + @parameterized.expand([TEST_CASE_5]) def test_substring_reference(self, config, expected): parser = ConfigParser(config=config) diff --git a/tests/test_reference_resolver.py b/tests/test_reference_resolver.py index 2ec5425258..07d56a16df 100644 --- a/tests/test_reference_resolver.py +++ b/tests/test_reference_resolver.py @@ -58,11 +58,11 @@ TEST_CASE_3 = [ { # all the recursively parsed config items - "transform#1": {"_target_": "RandTorchVisiond", "keys": "image", "name": "ColorJitter", "brightness": 0.25}, - "transform#1#_target_": "RandTorchVisiond", - "transform#1#keys": "image", - "transform#1#name": "ColorJitter", - "transform#1#brightness": 0.25, + "transform::1": {"_target_": "RandTorchVisiond", "keys": "image", "name": "ColorJitter", "brightness": 0.25}, + "transform#1::_target_": "RandTorchVisiond", + "transform::1::keys": "image", + "transform::1#name": "ColorJitter", + "transform::1::brightness": 0.25, }, "transform#1", RandTorchVisiond, @@ -76,6 +76,7 @@ def test_resolve(self, configs, expected_id, output_type): resolver = ReferenceResolver() # add items to resolver for k, v in configs.items(): + k = k.replace("#", "::") if ConfigComponent.is_instantiable(v): resolver.add_item(ConfigComponent(config=v, id=k, locator=locator)) elif ConfigExpression.is_expression(v):