diff --git a/.github/workflows/python-publish-to-prod-pypi.yml b/.github/workflows/python-publish-to-prod-pypi.yml index bf1ed5fe..22950e5b 100644 --- a/.github/workflows/python-publish-to-prod-pypi.yml +++ b/.github/workflows/python-publish-to-prod-pypi.yml @@ -9,10 +9,49 @@ on: - v* jobs: + validate: + name: Code Quality Assessment + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.6, 3.7, 3.8, 3.9] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Set Environment Variables + run: | + echo "${HOME}/.gem/ruby/2.7.0/bin" >> $GITHUB_PATH + - name: Install dependencies + run: | + gem install --user-install hiera-eyaml -v 2.1.0 + python -m pip install --upgrade pip + python -m pip install --upgrade setuptools + python -m pip install --upgrade wheel + python -m pip install --upgrade mypy pytest pytest-cov pytest-console-scripts pylint coveralls pep257 + python -m pip install --editable . + - name: Validate Compliance with PEP257 + run: | + pep257 yamlpath + - name: Validate Compliance with MyPY + run: | + mypy yamlpath + - name: Lint with pylint + run: | + pylint yamlpath + - name: Unit Test with pytest + run: | + pytest --verbose --cov=yamlpath --cov-report=term-missing --cov-fail-under=100 --script-launch-mode=subprocess tests + publish: name: Publish to Production PyPI + if: ${{ github.event.workflow_run.conclusion == 'success' }} runs-on: ubuntu-latest environment: 'PyPI: Production' + needs: validate steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/python-publish-to-test-pypi.yml b/.github/workflows/python-publish-to-test-pypi.yml index fa08467b..afd9b2e8 100644 --- a/.github/workflows/python-publish-to-test-pypi.yml +++ b/.github/workflows/python-publish-to-test-pypi.yml @@ -16,10 +16,48 @@ on: - release/* jobs: - test-publish: + validate: + name: Code Quality Assessment + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.6, 3.7, 3.8, 3.9] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Set Environment Variables + run: | + echo "${HOME}/.gem/ruby/2.7.0/bin" >> $GITHUB_PATH + - name: Install dependencies + run: | + gem install --user-install hiera-eyaml -v 2.1.0 + python -m pip install --upgrade pip + python -m pip install --upgrade setuptools + python -m pip install --upgrade wheel + python -m pip install --upgrade mypy pytest pytest-cov pytest-console-scripts pylint coveralls pep257 + python -m pip install --editable . + - name: Validate Compliance with PEP257 + run: | + pep257 yamlpath + - name: Validate Compliance with MyPY + run: | + mypy yamlpath + - name: Lint with pylint + run: | + pylint yamlpath + - name: Unit Test with pytest + run: | + pytest --verbose --cov=yamlpath --cov-report=term-missing --cov-fail-under=100 --script-launch-mode=subprocess tests + + publish: name: Publish to TEST PyPI runs-on: ubuntu-latest environment: 'PyPI: Test' + needs: validate steps: - uses: actions/checkout@v2 @@ -33,7 +71,7 @@ jobs: python -m pip install --upgrade setuptools wheel - name: Build Artifacts run: | - sed -r -e "s/(^__version__[[:space:]]*=[[:space:]]*)("'"'"[[:digit:]](\.[[:digit:]])+)"'"'"/\1\2.RC$(date "+%Y%m%d%H%M%S")"'"'"/" yamlpath/__init__.py + sed -i -r -e "s/(^__version__[[:space:]]*=[[:space:]]*)("'"'"[[:digit:]](\.[[:digit:]])+)"'"'"/\1\2.RC$(date "+%Y%m%d%H%M%S")"'"'"/" yamlpath/__init__.py python setup.py sdist bdist_wheel - name: Publish Artifacts uses: pypa/gh-action-pypi-publish@v1.4.2 diff --git a/CHANGES b/CHANGES index bb4d5bb2..0092b687 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,49 @@ +3.5.0: +Bug Fixes: +* Search expressions against Boolean values, [key=True] and [key=False], were + impossible. Now, they are working and are not case-sensitive, so [key=True], + [key=true], [key=TRUE], and such all work as expected. +* When null values were present, Deep Traversal (**) segments would always + return every node with a null value even when they would not match filter + conditions after the ** segment. When mustexist=False, this would also cause + a YAMLPathException. +* Descendent searches were considering only the first child of the search + ancestor. Now, ANY matching descendent node will correctly yield the + ancestor. +* Some Python-generated complex data types were escaping JSONification, + leading to unexpected stack-dumps when writing out JSON data for data types + like date and datetime. + +Enhancements: +* An entirely new segment type has been added to YAML Path and is now supported + by the library and reference implementation command-line tools: Keyword + Searches. Similar to programming language keywords, these reserved Keywords + work much like functions, accepting parameters and performing algorythmic + operations or returning data not otherwise accessible to other YAML Path + segment types. These new capabilities -- explored on the project Wiki -- + include: + * [has_child(NAME)] + * [name()] + * [max(NAME)] + * [min(NAME)] + * [parent([STEPS])] +* When stringified, YAML Paths with a solitary * wildcard segment were printed + using their internal RegEx variant, [.=~/.*/]. They are now printed as they + are entered, using a solitary *. As a consequence, any deliberate RegEx of + [.=~/.*/] is also printed as its equivalent solitary *. +* The yaml-paths command now allows printing YAML Paths without protective + escape symbols via a new --noescape option. While this makes the output more + human-friendly, the unescaped paths will not be suitable for use as YAML Path + input to other YAML Path processors where special symbols require escaping. +* [API] The NodeCoords class now tracks ancestry and the last YAML Path segment + responsible for triggering its generation. The ancestry stack -- + List[AncestryEntry] -- was necessary to support the [parent()] Search + Keyword. The responsible YAML Path segment tracking was necessary to enable + Hash/map/dict key renaming via the [name()] Search Keyword. These optional + attributes may be set when the NodeCoords is generated. +* [API] YAMLPath instances now have a pop() method. This mutates the YAMLPath + by popping off its last segment, returning that segment. + 3.4.1: Bug Fixes: * yaml-set (and the underlying Processor class) were unable to change nodes diff --git a/README.md b/README.md index 0dab0349..5b3c89e4 100644 --- a/README.md +++ b/README.md @@ -228,6 +228,18 @@ YAML Path understands these segment types: * When another segment follows, it matches every node within the remainder of the document's tree for which the following (and subsequent) segments match: `/shows/**/name/Star*` +* Search Keywords: Advanced search capabilities not otherwise possible using + other YAML Path segments. Taking the form of `[KEYWORD(PARAMETERS)]`, these + keywords are + [deeply explored on the Wiki](https://github.com/wwkimball/yamlpath/wiki/Search-Keywords) + and include: + * `[has_child(NAME)]`: Match nodes having a named child key + * `[max([NAME])]`: Match nodes having the maximum value + * `[min([NAME])]`: Match nodes having the minimum value + * `[name()]`: Match only the name of the present node, discarding all + children + * `[parent([STEPS])]`, Step up 1-N levels in the document from the present + node * Collectors: Placing any portion of the YAML Path within parenthesis defines a virtual list collector, like `(YAML Path)`; concatenation and exclusion operators are supported -- `+` and `-`, respectively -- along with nesting, @@ -266,37 +278,71 @@ versions of `pip` or its own dependency, *setuptools*. ### Using pip -Each published version of this project and its dependencies can be installed -from [PyPI](https://pypi.org/) using `pip`. Note that on systems with more than -one version of Python, you will probably need to use `pip3`, or equivalent -(e.g.: Cygwin users may need to use `pip3.6`, `pip3.9`, or such). +Like most others, this Python project is published to [PyPI](https://pypi.org/) +so that it can be easily installed via Python's `pip` command (or its +version-specific `pip3`, `pip3.7`, and such depending on how your Python was +installed). + +Python's `pip` command is ever-changing. It is updated very frequently. This +command further depends on other libraries to do its job, namely *setuptools*. +It so happens that *setuptools* is also updated very frequently. Both of these +are separate from Python itself, despite versions of them being pre-installed +with Python. It is your responsibility to keep `pip` and *setuptools* +up-to-date. When `pip` or *setuptools* become outdated, _you will experience +errors_ when trying to install newer Python packages like *yamlpath* **unless +you preinstall such packages' dependencies**. In the case of *yamlpath*, this +means you'd need to preinstall *ruamel.yaml* if you cannot or choose not to +upgrade `pip` and/or *setuptools*. + +As long as your `pip` and *setuptools* are up-to-date, installing *yamlpath* is +as simple as a single command (the "3.7" suffix to the `pip` command is +optional, depending on how your Python 3 was installed): ```shell -pip3 install yamlpath +pip3.7 install yamlpath ``` #### Very Old Versions of pip or its setuptools Dependency Very old versions of Python 3 ship with seriously outdated versions of `pip` and its *setuptools* dependency. When using versions of `pip` older than **18.1** -or *setuptools* older than version **46.4.0**, you may not be able to install -yamlpath with a single command. In this case, you have two options: either +or *setuptools* older than version **46.4.0**, you will not be able to install +*yamlpath* with a single command. In this case, you have two options: either pre-install *ruamel.yaml* before installing *yamlpath* or update `pip` and/or *setuptools* to at least the minimum required versions so `pip` can -auto-determine and install dependencies. This issue is not unique to yamlpath -because Python's ever-growing capabilities simply require periodic updates to -access. +auto-determine and install dependencies. This issue is not unique to +*yamlpath*. -When you cannot update `pip` or *setuptools*, just pre-install *ruamel.yaml* -before yamlpath, like so: +Upgrading `pip` and *setuptools* is trivially simple as long as you have +sufficient access rights to do so on your local machine. Depending on your +situation, you may need to prefix these with `sudo` and/or you may need to +substitute `python3` and `pip3` for `python` and `pip`, or even `python3.7` and +`pip3.7` (or another specific version of Python 3), respectively. To reiterate +that this project requires Python 3, these sample commands will be +demonstrated using such prefixes: ```shell -# In this edge-case, these commands CANNOT be joined, like: -# pip3.6 install ruamel.yaml yamlpath -pip3.6 install ruamel.yaml -pip3.6 install yamlpath +python3.7 -m pip install --upgrade pip +pip3.7 install --upgrade setuptools ``` +When you cannot or will not update `pip` or *setuptools*, just pre-install +*ruamel.yaml* before yamlpath. Each must be installed seperately and in order, +like this (you **cannot** combine these installations into a single command): + +```shell +pip3.7 install ruamel.yaml +pip3.7 install yamlpath +``` + +The downside to choosing this manual installation path is that you may end up +with an incompatible version of *ruamel.yaml*. This will manifest either as an +inability to install *yamlpath* at all, or only certain versions of *yamlpath*, +or *yamlpath* may experience unexpected errors caused by the incompatible code. +For the best experience, you are strongly encouraged to just keep `pip` and +*setuptools* up-to-date, particularly as a routine part of installing any new +Python packages. + ### Installing EYAML (Optional) EYAML support is entirely optional. You do not need EYAML to use YAML Path. @@ -593,9 +639,9 @@ optional arguments: ```text usage: yaml-paths [-h] [-V] -s EXPRESSION [-c EXPRESSION] [-m] [-L] [-F] [-X] - [-P] [-t ['.', '/', 'auto', 'dot', 'fslash']] [-i | -k | -K] - [-a] [-A | -Y | -y | -l] [-e] [-x EYAML] [-r PRIVATEKEY] - [-u PUBLICKEY] [-S] [-d | -v | -q] + [-P] [-n] [-t ['.', '/', 'auto', 'dot', 'fslash']] + [-i | -k | -K] [-a] [-A | -Y | -y | -l] [-e] [-x EYAML] + [-r PRIVATEKEY] [-u PUBLICKEY] [-S] [-d | -v | -q] [YAML_FILE [YAML_FILE ...]] Returns zero or more YAML Paths indicating where in given YAML/JSON/Compatible @@ -640,6 +686,11 @@ result printing options: or to indicate whether a file has any matches without printing them all, perhaps especially with --noexpression) + -n, --noescape omit escape characters from special characters in + printed YAML Paths; this is unsafe for feeding the + resulting YAML Paths into other YAML Path commands + because the symbols that would be escaped have special + meaning to YAML Path processors key name searching options: -i, --ignorekeynames (default) do not search key names @@ -683,7 +734,9 @@ EYAML options: A search or exception EXPRESSION takes the form of a YAML Path search operator -- %, $, =, ^, >, <, >=, <=, =~, or ! -- followed by the search term, omitting the left-hand operand. For more information about YAML Paths, please visit -https://github.com/wwkimball/yamlpath. +https://github.com/wwkimball/yamlpath/wiki. To report issues with this tool or +to request enhancements, please visit +https://github.com/wwkimball/yamlpath/issues. ``` * [yaml-set](yamlpath/commands/yaml_set.py) @@ -1167,7 +1220,7 @@ from yamlpath.exceptions import YAMLPathException yaml_path = YAMLPath("see.documentation.above.for.many.samples") try: - for node_coordinate in processor.get_nodes(yaml_path): + for node_coordinate in processor.get_nodes(yaml_path, mustexist=True): log.debug("Got {} from '{}'.".format(node_coordinate, yaml_path)) # Do something with each node_coordinate.node (the actual data) except YAMLPathException as ex: diff --git a/setup.py b/setup.py index 9415f315..ea8f7de6 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,8 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Operating System :: OS Independent", + "Environment :: Console", + "Topic :: Utilities", "Topic :: Software Development :: Libraries :: Python Modules", ], url="https://github.com/wwkimball/yamlpath", diff --git a/tests/test_commands_yaml_get.py b/tests/test_commands_yaml_get.py index c5da59da..31e5f2b0 100644 --- a/tests/test_commands_yaml_get.py +++ b/tests/test_commands_yaml_get.py @@ -174,3 +174,267 @@ def test_get_every_data_type(self, script_runner, tmp_path_factory): for line in result.stdout.splitlines(): assert line == results[match_index] match_index += 1 + + def test_get_only_aoh_nodes_without_named_child(self, script_runner, tmp_path_factory): + content = """--- +items: + - - alpha + - bravo + - charlie + - - alpha + - charlie + - delta + - - alpha + - bravo + - delta + - - bravo + - charlie + - delta +""" + results = [ + "alpha", + "charlie", + "delta" + ] + + yaml_file = create_temp_yaml_file(tmp_path_factory, content) + result = script_runner.run(self.command, "--query=/items/*[!has_child(bravo)]*", yaml_file) + assert result.success, result.stderr + + match_index = 0 + for line in result.stdout.splitlines(): + assert line == results[match_index] + match_index += 1 + + def test_get_only_aoh_nodes_with_named_child(self, script_runner, tmp_path_factory): + content = """--- +products: + - name: something + price: 0.99 + weight: 0.75 + recalled: false + - name: other + price: 9.99 + weight: 2.25 + dimensions: + width: 1 + height: 1 + depth: 1 + - name: moar + weight: 100 + dimensions: + width: 100 + height: 100 + depth: 100 + - name: less + price: 5 + dimensions: + width: 5 + height: 5 + - name: bad + price: 0 + weight: 4 + dimensions: + width: 13 + height: 4 + depth: 7 + recalled: true +""" + results = [ + "something", + "bad", + ] + + yaml_file = create_temp_yaml_file(tmp_path_factory, content) + result = script_runner.run(self.command, "--query=/products[has_child(recalled)]/name", yaml_file) + assert result.success, result.stderr + + match_index = 0 + for line in result.stdout.splitlines(): + assert line == results[match_index] + match_index += 1 + + @pytest.mark.parametrize("query,output", [ + ("/items/*[!has_child(bravo)][2][parent(0)]", ['delta']), + ("/items/*[!has_child(bravo)][2][parent()]", ['["alpha", "charlie", "delta"]']), + ("/items/*[!has_child(bravo)][2][parent(2)]", ['[["alpha", "bravo", "charlie"], ["alpha", "charlie", "delta"], ["alpha", "bravo", "delta"], ["bravo", "charlie", "delta"]]']), + ("/prices_hash/*[has_child(price)][name()]", ['doohickey', 'whatchamacallit', 'widget']), + ("/prices_hash/*[!has_child(price)][name()]", ['unknown']), + ]) + def test_get_parent_nodes(self, script_runner, tmp_path_factory, query, output): + content = """--- +items: + - - alpha + - bravo + - charlie + - - alpha + - charlie + - delta + - - alpha + - bravo + - delta + - - bravo + - charlie + - delta + +prices_hash: + doohickey: + price: 4.99 + whatchamacallit: + price: 9.95 + widget: + price: 0.98 + unknown: +""" + + yaml_file = create_temp_yaml_file(tmp_path_factory, content) + result = script_runner.run(self.command, "--query={}".format(query), yaml_file) + assert result.success, result.stderr + + match_index = 0 + for line in result.stdout.splitlines(): + assert line == output[match_index] + match_index += 1 + + @pytest.mark.parametrize("query,output", [ + ("svcs.*[name()]", ['coolserver', 'logsender']), + ("svcs.*[enabled=false][parent()][name()]", ['logsender']), + ("svcs[name()]", ['svcs']), + ("indexes[.^Item][name()]", ['0', '1', '3']), + ]) + def test_get_node_names(self, script_runner, tmp_path_factory, query, output): + # Contributed by https://github.com/AndydeCleyre + content = """--- +svcs: + coolserver: + enabled: true + exec: ./coolserver.py + logsender: + enabled: false + exec: remote_syslog -D +indexes: + - Item 1 + - Item 2 + - Disabled 3 + - Item 4 +""" + + yaml_file = create_temp_yaml_file(tmp_path_factory, content) + result = script_runner.run(self.command, "--query={}".format(query), yaml_file) + assert result.success, result.stderr + + match_index = 0 + for line in result.stdout.splitlines(): + assert line == output[match_index] + match_index += 1 + + @pytest.mark.parametrize("query,output", [ + ("prices_aoh[max(price)].product", ["whatchamacallit"]), + ("prices_aoh[!max(price)].price", ["4.99", "4.99", "0.98"]), + ("/prices_hash[max(price)][name()]", ["whatchamacallit"]), + ("/prices_hash[!max(price)][name()]", ["doohickey", "fob", "widget", "unknown"]), + ("(prices_hash.*.price)[max()]", ["9.95"]), + ("(prices_hash.*.price)[!max()]", ["4.99", "4.99", "0.98"]), + ("/prices_array[max()]", ["9.95"]), + ("/prices_array[!max()]", ["4.99", "4.99", "0.98", "\x00"]), + ("bare[max()]", ["value"]), + ("/bad_prices_aoh[max(price)]/product", ["fob"]), + ("/bad_prices_aoh[!max(price)]/price", ["4.99", "9.95", "True"]), + ("bad_prices_hash[max(price)][name()]", ["fob"]), + ("bad_prices_hash[!max(price)][name()]", ["doohickey", "whatchamacallit", "widget", "unknown"]), + ("(/bad_prices_hash/*/price)[max()]", ["not set"]), + ("(/bad_prices_hash/*/price)[!max()]", ["4.99", "9.95", "True"]), + ("bad_prices_array[max()]", ["not set"]), + ("bad_prices_array[!max()]", ["4.99", "9.95", "0.98", "\x00"]), + + ("prices_aoh[min(price)].product", ["widget"]), + ("prices_aoh[!min(price)].price", ["9.95", "4.99", "4.99"]), + ("/prices_hash[min(price)][name()]", ["widget"]), + ("/prices_hash[!min(price)][name()]", ["whatchamacallit", "doohickey", "fob", "unknown"]), + ("(prices_hash.*.price)[min()]", ["0.98"]), + ("(prices_hash.*.price)[!min()]", ["9.95", "4.99", "4.99"]), + ("/prices_array[min()]", ["0.98"]), + ("/prices_array[!min()]", ["9.95", "4.99", "4.99", "\x00"]), + ("bare[min()]", ["value"]), + ("/bad_prices_aoh[min(price)]/product", ["widget"]), + ("/bad_prices_aoh[!min(price)]/price", ["not set", "9.95", "4.99"]), + ("bad_prices_hash[min(price)][name()]", ["widget"]), + ("bad_prices_hash[!min(price)][name()]", ["fob", "whatchamacallit", "doohickey", "unknown"]), + ("(/bad_prices_hash/*/price)[min()]", ["4.99"]), + ("(/bad_prices_hash/*/price)[!min()]", ["not set", "9.95", "True"]), + ("bad_prices_array[min()]", ["0.98"]), + ("bad_prices_array[!min()]", ["not set", "9.95", "4.99", "\x00"]), + ]) + def test_get_min_max_nodes(self, script_runner, tmp_path_factory, query, output): + content = """--- +# Consistent Data Types +prices_aoh: + - product: doohickey + price: 4.99 + - product: fob + price: 4.99 + - product: whatchamacallit + price: 9.95 + - product: widget + price: 0.98 + - product: unknown + +prices_hash: + doohickey: + price: 4.99 + fob: + price: 4.99 + whatchamacallit: + price: 9.95 + widget: + price: 0.98 + unknown: + +prices_array: + - 4.99 + - 4.99 + - 9.95 + - 0.98 + - null + +# Inconsistent Data Types +bare: value + +bad_prices_aoh: + - product: doohickey + price: 4.99 + - product: fob + price: not set + - product: whatchamacallit + price: 9.95 + - product: widget + price: true + - product: unknown + +bad_prices_hash: + doohickey: + price: 4.99 + fob: + price: not set + whatchamacallit: + price: 9.95 + widget: + price: true + unknown: + +bad_prices_array: + - 4.99 + - not set + - 9.95 + - 0.98 + - null +""" + + yaml_file = create_temp_yaml_file(tmp_path_factory, content) + result = script_runner.run(self.command, "--query={}".format(query), yaml_file) + assert result.success, result.stderr + + match_index = 0 + for line in result.stdout.splitlines(): + assert line == output[match_index] + match_index += 1 diff --git a/tests/test_commands_yaml_paths.py b/tests/test_commands_yaml_paths.py index 82b5c939..81361a6f 100644 --- a/tests/test_commands_yaml_paths.py +++ b/tests/test_commands_yaml_paths.py @@ -2,6 +2,7 @@ from tests.conftest import create_temp_yaml_file +from yamlpath.enums import PathSeperators class Test_yaml_paths(): """Tests the yaml-paths command-line interface.""" @@ -989,3 +990,55 @@ def test_too_many_stdins(self, script_runner): result = script_runner.run(self.command, "--search", "=nothing", "-", "-") assert not result.success, result.stderr assert "Only one YAML_FILE may be the - pseudo-file" in result.stderr + + @pytest.mark.parametrize("pathsep,output", [ + (PathSeperators.AUTO, [ + 'foo.x: 12', + 'foo.y: hello world', + "foo.ip_range['initial']: 1.2.3.4", + 'foo.ip_range[]: tba', + "foo.array['first']: Cluster1", + 'array2[]: bar', + ]), + (PathSeperators.DOT, [ + 'foo.x: 12', + 'foo.y: hello world', + "foo.ip_range['initial']: 1.2.3.4", + 'foo.ip_range[]: tba', + "foo.array['first']: Cluster1", + 'array2[]: bar', + ]), + (PathSeperators.FSLASH, [ + '/foo/x: 12', + '/foo/y: hello world', + "/foo/ip_range['initial']: 1.2.3.4", + '/foo/ip_range[]: tba', + "/foo/array['first']: Cluster1", + '/array2[]: bar', + ]), + ]) + def test_unescaped_paths(self, script_runner, tmp_path_factory, pathsep, output): + # Credit: https://stackoverflow.com/questions/62155284/trying-to-get-all-paths-in-a-yaml-file + content = """--- +# sample set of lines +foo: + x: 12 + y: hello world + ip_range['initial']: 1.2.3.4 + ip_range[]: tba + array['first']: Cluster1 + +array2[]: bar +""" + yaml_file = create_temp_yaml_file(tmp_path_factory, content) + result = script_runner.run( + self.command, + "--nostdin", "--nofile", + "--expand", "--noescape", + "--keynames", "--values", + "--search", "=~/.*/", + "--pathsep", str(pathsep), + yaml_file + ) + assert result.success, result.stderr + assert "\n".join(output) + "\n" == result.stdout diff --git a/tests/test_commands_yaml_set.py b/tests/test_commands_yaml_set.py index 1759a639..ba28005b 100644 --- a/tests/test_commands_yaml_set.py +++ b/tests/test_commands_yaml_set.py @@ -1242,3 +1242,116 @@ def test_change_null(self, script_runner, tmp_path_factory): with open(yaml_file, 'r') as fhnd: filedat = fhnd.read() assert filedat == yamlout + + def test_assign_to_nonexistent_nodes(self, script_runner, tmp_path_factory): + # Contributed By: https://github.com/dwapstra + yamlin = """--- +devices: + R1: + os: ios + type: router + platform: asr1k + R2: + type: switch + platform: cat3k + R3: + type: access-point + platform: wrt + os: + R4: + type: tablet + os: null + platform: java + R5: + type: tablet + os: "" + platform: objective-c +""" + yamlout = """--- +devices: + R1: + os: ios + type: router + platform: asr1k + R2: + type: switch + platform: cat3k + os: generic + R3: + type: access-point + platform: wrt + os: + R4: + type: tablet + os: + platform: java + R5: + type: tablet + os: "" + platform: objective-c +""" + yaml_file = create_temp_yaml_file(tmp_path_factory, yamlin) + result = script_runner.run( + self.command, + "--change=/devices/*[!has_child(os)]/os", + "--value=generic", + yaml_file + ) + assert result.success, result.stderr + + with open(yaml_file, 'r') as fhnd: + filedat = fhnd.read() + assert filedat == yamlout + + def test_change_key_name_good(self, script_runner, tmp_path_factory): + yamlin = """--- +key: value +""" + yamlout = """--- +renamed_key: value +""" + yaml_file = create_temp_yaml_file(tmp_path_factory, yamlin) + result = script_runner.run( + self.command, + "--change=/key[name()]", + "--value=renamed_key", + yaml_file + ) + assert result.success, result.stderr + + with open(yaml_file, 'r') as fhnd: + filedat = fhnd.read() + assert filedat == yamlout + + def test_change_key_name_maps_only(self, script_runner, tmp_path_factory): + yamlin = """--- +items: + - one + - two +""" + + yaml_file = create_temp_yaml_file(tmp_path_factory, yamlin) + result = script_runner.run( + self.command, + "--change=/items[0][name()]", + "--value=2", + yaml_file + ) + assert not result.success, result.stdout + assert "Keys can be renamed only in Hash/map/dict" in result.stderr + + def test_change_key_name_unique_only(self, script_runner, tmp_path_factory): + yamlin = """--- +key: value +another_key: value +""" + + yaml_file = create_temp_yaml_file(tmp_path_factory, yamlin) + result = script_runner.run( + self.command, + "--change=another_key[name()]", + "--value=key", + yaml_file + ) + assert not result.success, result.stdout + assert "already exists at the same document level" in result.stderr diff --git a/tests/test_common_keywordsearches.py b/tests/test_common_keywordsearches.py new file mode 100644 index 00000000..34e9bea8 --- /dev/null +++ b/tests/test_common_keywordsearches.py @@ -0,0 +1,221 @@ +import pytest + +import ruamel.yaml as ry + +from yamlpath.enums import PathSearchKeywords +from yamlpath.path import SearchKeywordTerms +from yamlpath.common import KeywordSearches +from yamlpath.exceptions import YAMLPathException +from yamlpath import YAMLPath + +class Test_common_keywordsearches(): + """Tests for the KeywordSearches helper class.""" + + ### + # search_matches + ### + def test_unknown_search_keyword(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.search_matches( + SearchKeywordTerms(False, None, ""), + {}, + YAMLPath("/") + )) + assert -1 < str(ex.value).find("Unsupported search keyword") + + + ### + # has_child + ### + def test_has_child_invalid_param_count(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.search_matches( + SearchKeywordTerms(False, PathSearchKeywords.HAS_CHILD, []), + {}, + YAMLPath("/") + )) + assert -1 < str(ex.value).find("Invalid parameter count to ") + + def test_has_child_invalid_node(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.has_child( + "abc: xyz", + False, + ["wwk"], + YAMLPath("") + )) + assert -1 < str(ex.value).find("has no child nodes") + + + ### + # name + ### + def test_name_invalid_param_count(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.name( + False, + ["1", "2"], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("Invalid parameter count to ") + + def test_name_invalid_inversion(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.name( + True, + [], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("Inversion is meaningless to ") + + + ### + # max + ### + def test_max_invalid_param_count(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.max( + {}, + False, + ["1", "2"], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("Invalid parameter count to ") + + def test_max_missing_aoh_param(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.max( + [{'a': 1},{'a': 2}], + False, + [], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("when evaluating an Array-of-Hashes") + + def test_max_missing_hash_param(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.max( + {'a': {'b': 1}, 'c': {'d': 2}}, + False, + [], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("when comparing Hash/map/dict children") + + def test_max_invalid_array_param(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.max( + [1, 2, 3], + False, + ['3'], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("when comparing Array/sequence/list elements to one another") + + def test_max_incorrect_node(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.max( + {'b': 2}, + False, + ['b'], + YAMLPath("/*[max(b)]") + )) + assert -1 < str(ex.value).find("operates against collections of data") + + + ### + # min + ### + def test_min_invalid_param_count(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.min( + {}, + False, + ["1", "2"], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("Invalid parameter count to ") + + def test_min_missing_aoh_param(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.min( + [{'a': 1},{'a': 2}], + False, + [], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("when evaluating an Array-of-Hashes") + + def test_min_missing_hash_param(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.min( + {'a': {'b': 1}, 'c': {'d': 2}}, + False, + [], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("when comparing Hash/map/dict children") + + def test_min_invalid_array_param(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.min( + [1, 2, 3], + False, + ['3'], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("when comparing Array/sequence/list elements to one another") + + def test_min_incorrect_node(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.min( + {'b': 2}, + False, + ['b'], + YAMLPath("/*[max(b)]") + )) + assert -1 < str(ex.value).find("operates against collections of data") + + + ### + # parent + ### + def test_parent_invalid_param_count(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.parent( + {}, + False, + ["1", "2"], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("Invalid parameter count to ") + + def test_parent_invalid_inversion(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.parent( + {}, + True, + [], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("Inversion is meaningless to ") + + def test_parent_invalid_parameter(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.parent( + {}, + False, + ["abc"], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("Invalid parameter passed to ") + + def test_parent_invalid_step_count(self): + with pytest.raises(YAMLPathException) as ex: + nodes = list(KeywordSearches.parent( + {}, + False, + ["5"], + YAMLPath("/") + )) + assert -1 < str(ex.value).find("higher than the document root") diff --git a/tests/test_common_nodes.py b/tests/test_common_nodes.py index b0ecfa6b..9e532ed2 100644 --- a/tests/test_common_nodes.py +++ b/tests/test_common_nodes.py @@ -57,3 +57,19 @@ def test_delete_tag(self): ### def test_tagless_value_syntax_error(self): assert "[abc" == Nodes.tagless_value("[abc") + + + ### + # node_is_aoh + ### + def test_aoh_node_is_none(self): + assert False == Nodes.node_is_aoh(None) + + def test_aoh_node_is_not_list(self): + assert False == Nodes.node_is_aoh({"key": "value"}) + + def test_aoh_is_inconsistent(self): + assert False == Nodes.node_is_aoh([ + {"key": "value"}, + None + ]) diff --git a/tests/test_common_parsers.py b/tests/test_common_parsers.py index a4776abe..7cdb59c0 100644 --- a/tests/test_common_parsers.py +++ b/tests/test_common_parsers.py @@ -78,7 +78,7 @@ def test_stringify_complex_data_with_dates(self): ### # jsonify_yaml_data ### - def test_jsonify_complex_data(self): + def test_jsonify_complex_ruamel_data(self): tagged_tag = "!tagged" tagged_value = "tagged value" tagged_scalar = ry.scalarstring.PlainScalarString(tagged_value) @@ -101,3 +101,14 @@ def test_jsonify_complex_data(self): assert jdata["null"] == null_value assert jdata["dates"][0] == "2020-10-31" assert jdata["dates"][1] == "2020-11-03" + + def test_jsonify_complex_python_data(self): + cdata = { + "dates": [ + dt.date(2020, 10, 31), + dt.date(2020, 11, 3) + ] + } + jdata = Parsers.jsonify_yaml_data(cdata) + assert jdata["dates"][0] == "2020-10-31" + assert jdata["dates"][1] == "2020-11-03" diff --git a/tests/test_common_searches.py b/tests/test_common_searches.py index fcaa0f27..873172d3 100644 --- a/tests/test_common_searches.py +++ b/tests/test_common_searches.py @@ -12,12 +12,33 @@ class Test_common_searches(): ### # search_matches ### - def test_search_matches(self): - method = PathSearchMethods.CONTAINS - needle = "a" - haystack = "parents" - assert Searches.search_matches(method, needle, haystack) == True - + @pytest.mark.parametrize("match, method, needle, haystack", [ + (True, PathSearchMethods.CONTAINS, "a", "parents"), + (True, PathSearchMethods.ENDS_WITH, "ts", "parents"), + (True, PathSearchMethods.EQUALS, "parents", "parents"), + (True, PathSearchMethods.EQUALS, 42, 42), + (True, PathSearchMethods.EQUALS, "42", 42), + (True, PathSearchMethods.EQUALS, 3.14159265385, 3.14159265385), + (True, PathSearchMethods.EQUALS, "3.14159265385", 3.14159265385), + (True, PathSearchMethods.EQUALS, True, True), + (True, PathSearchMethods.EQUALS, "True", True), + (True, PathSearchMethods.EQUALS, "true", True), + (True, PathSearchMethods.EQUALS, False, False), + (True, PathSearchMethods.EQUALS, "False", False), + (True, PathSearchMethods.EQUALS, "false", False), + (True, PathSearchMethods.GREATER_THAN, 2, 4), + (True, PathSearchMethods.GREATER_THAN, "2", 4), + (True, PathSearchMethods.GREATER_THAN_OR_EQUAL, 2, 4), + (True, PathSearchMethods.GREATER_THAN_OR_EQUAL, "2", 4), + (True, PathSearchMethods.LESS_THAN, 4, 2), + (True, PathSearchMethods.LESS_THAN, "4", 2), + (True, PathSearchMethods.LESS_THAN_OR_EQUAL, 4, 2), + (True, PathSearchMethods.LESS_THAN_OR_EQUAL, "4", 2), + (True, PathSearchMethods.REGEX, ".+", "a"), + (True, PathSearchMethods.STARTS_WITH, "p", "parents") + ]) + def test_search_matches(self, match, method, needle, haystack): + assert match == Searches.search_matches(method, needle, haystack) ### # search_anchor diff --git a/tests/test_path_searchkeywordterms.py b/tests/test_path_searchkeywordterms.py new file mode 100644 index 00000000..a4334eea --- /dev/null +++ b/tests/test_path_searchkeywordterms.py @@ -0,0 +1,42 @@ +import pytest + +from yamlpath.enums import PathSearchKeywords +from yamlpath.path import SearchKeywordTerms + +class Test_path_SearchKeywordTerms(): + """Tests for the SearchKeywordTerms class.""" + + @pytest.mark.parametrize("invert,keyword,parameters,output", [ + (True, PathSearchKeywords.HAS_CHILD, "abc", "[!has_child(abc)]"), + (False, PathSearchKeywords.HAS_CHILD, "abc", "[has_child(abc)]"), + (False, PathSearchKeywords.HAS_CHILD, "abc\\,def", "[has_child(abc\\,def)]"), + (False, PathSearchKeywords.HAS_CHILD, "abc, def", "[has_child(abc, def)]"), + (False, PathSearchKeywords.HAS_CHILD, "abc,' def'", "[has_child(abc,' def')]"), + ]) + def test_str(self, invert, keyword, parameters, output): + assert output == str(SearchKeywordTerms(invert, keyword, parameters)) + + @pytest.mark.parametrize("parameters,output", [ + ("abc", ["abc"]), + ("abc\\,def", ["abc,def"]), + ("abc, def", ["abc", "def"]), + ("abc,' def'", ["abc", " def"]), + ("1,'1', 1, '1', 1 , ' 1', '1 ', ' 1 '", ["1", "1", "1", "1", "1", " 1", "1 ", " 1 "]), + ("true, False,'True','false'", ["true", "False", "True", "false"]), + ("'',,\"\", '', ,,\"\\'\",'\\\"'", ["", "", "", "", "", "", "'", "\""]), + ("'And then, she said, \"Quote!\"'", ["And then, she said, \"Quote!\""]), + (None, []), + ]) + def test_parameter_parsing(self, parameters, output): + skt = SearchKeywordTerms(False, PathSearchKeywords.HAS_CHILD, parameters) + assert output == skt.parameters + + @pytest.mark.parametrize("parameters", [ + ("','a'"), + ("a,\"b,"), + ]) + def test_unmatched_demarcation(self, parameters): + skt = SearchKeywordTerms(False, PathSearchKeywords.HAS_CHILD, parameters) + with pytest.raises(ValueError) as ex: + parmlist = skt.parameters + assert -1 < str(ex.value).find("one or more unmatched demarcation symbol") diff --git a/tests/test_processor.py b/tests/test_processor.py index 2607c73d..3ac32846 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -315,6 +315,8 @@ def test_set_value_in_none_data(self, capsys, quiet_logger): ("/top_hash/positive_float", -2.71828, 1, True, YAMLValueFormats.FLOAT, PathSeperators.FSLASH), ("/top_hash/negative_float", 5283.4, 1, True, YAMLValueFormats.FLOAT, PathSeperators.FSLASH), ("/null_value", "No longer null", 1, True, YAMLValueFormats.DEFAULT, PathSeperators.FSLASH), + ("(top_array[0])+(top_hash.negative_float)+(/null_value)", "REPLACEMENT", 3, True, YAMLValueFormats.DEFAULT, PathSeperators.FSLASH), + ("(((top_array[0])+(top_hash.negative_float))+(/null_value))", "REPLACEMENT", 3, False, YAMLValueFormats.DEFAULT, PathSeperators.FSLASH), ]) def test_set_value(self, quiet_logger, yamlpath, value, tally, mustexist, vformat, pathsep): yamldata = """--- @@ -338,7 +340,13 @@ def test_set_value(self, quiet_logger, yamlpath, value, tally, mustexist, vforma processor.set_value(yamlpath, value, mustexist=mustexist, value_format=vformat, pathsep=pathsep) matchtally = 0 for node in processor.get_nodes(yamlpath, mustexist=mustexist): - assert unwrap_node_coords(node) == value + changed_value = unwrap_node_coords(node) + if isinstance(changed_value, list): + for result in changed_value: + assert result == value + matchtally += 1 + continue + assert changed_value == value matchtally += 1 assert matchtally == tally @@ -910,3 +918,46 @@ def test_tag_nodes(self, quiet_logger, yaml_path, tag, pathseperator): assert isinstance(data['key'], TaggedScalar) assert data['key'].tag.value == tag + + @pytest.mark.parametrize("yaml_path,value,old_data,new_data", [ + (YAMLPath("/key[name()]"), "renamed_key", {'key': 'value'}, {'renamed_key': 'value'}), + ]) + def test_rename_dict_key(self, quiet_logger, yaml_path, value, old_data, new_data): + processor = Processor(quiet_logger, old_data) + processor.set_value(yaml_path, value) + assert new_data == old_data + + @pytest.mark.parametrize("yaml_path,value,old_data", [ + (YAMLPath("/key[name()]"), "renamed_key", {'key': 'value', 'renamed_key': 'value'}), + ]) + def test_rename_dict_key_cannot_overwrite(self, quiet_logger, yaml_path, value, old_data): + processor = Processor(quiet_logger, old_data) + with pytest.raises(YAMLPathException) as ex: + processor.set_value(yaml_path, value) + assert -1 < str(ex.value).find("already exists at the same document level") + + def test_traverse_with_null(self, quiet_logger): + # Contributed by https://github.com/rbordelo + yamldata = """--- +Things: + - name: first thing + rank: 42 + - name: second thing + rank: 5 + - name: third thing + rank: null + - name: fourth thing + rank: 1 +""" + + results = ["first thing", "second thing", "third thing", "fourth thing"] + + yaml = YAML() + data = yaml.load(yamldata) + processor = Processor(quiet_logger, data) + yamlpath = YAMLPath("/**/name") + + match_index = 0 + for node in processor.get_nodes(yamlpath): + assert unwrap_node_coords(node) == results[match_index] + match_index += 1 diff --git a/tests/test_wrappers_consoleprinter.py b/tests/test_wrappers_consoleprinter.py index e6590ffc..0d94225d 100644 --- a/tests/test_wrappers_consoleprinter.py +++ b/tests/test_wrappers_consoleprinter.py @@ -5,8 +5,8 @@ from ruamel.yaml.comments import CommentedMap, CommentedSeq, TaggedScalar from ruamel.yaml.scalarstring import PlainScalarString, FoldedScalarString -from yamlpath.wrappers import NodeCoords -from yamlpath.wrappers import ConsolePrinter +from yamlpath.enums import PathSegmentTypes +from yamlpath.wrappers import NodeCoords, ConsolePrinter from yamlpath import YAMLPath class Test_wrappers_ConsolePrinter(): @@ -162,16 +162,30 @@ def test_debug_noisy(self, capsys): "DEBUG: [tagged_array][1]b", ]) - nc = NodeCoords("value", dict(key="value"), "key", YAMLPath("key")) + nc = NodeCoords( + "value", + dict(key="value"), + "key", + YAMLPath("doc_root.key"), + [ (dict(doc_root=dict(key="value")), "doc_root"), + (dict(key="value"), "key")], + (PathSegmentTypes.KEY, "key") + ) logger.debug( "A node coordinate:", prefix="test_debug_noisy: ", data=nc) console = capsys.readouterr() assert "\n".join([ "DEBUG: test_debug_noisy: A node coordinate:", - "DEBUG: test_debug_noisy: (path)key", + "DEBUG: test_debug_noisy: (path)doc_root.key", + "DEBUG: test_debug_noisy: (segment)[0]PathSegmentTypes.KEY", + "DEBUG: test_debug_noisy: (segment)[1]key", "DEBUG: test_debug_noisy: (node)value", "DEBUG: test_debug_noisy: (parent)[key]value", "DEBUG: test_debug_noisy: (parentref)key", + "DEBUG: test_debug_noisy: (ancestry)[0][0][doc_root][key]value", + "DEBUG: test_debug_noisy: (ancestry)[0][1]doc_root", + "DEBUG: test_debug_noisy: (ancestry)[1][0][key]value", + "DEBUG: test_debug_noisy: (ancestry)[1][1]key", ]) + "\n" == console.out logger.debug(foldedval) diff --git a/tests/test_wrappers_nodecoords.py b/tests/test_wrappers_nodecoords.py index 82d2c78d..f851f55c 100644 --- a/tests/test_wrappers_nodecoords.py +++ b/tests/test_wrappers_nodecoords.py @@ -15,3 +15,23 @@ def test_repr(self): def test_str(self): node_coord = NodeCoords([], None, None) assert str(node_coord) == "[]" + + def test_gt(self): + lhs_nc = NodeCoords(5, None, None) + rhs_nc = NodeCoords(3, None, None) + assert lhs_nc > rhs_nc + + def test_null_gt(self): + lhs_nc = NodeCoords(5, None, None) + rhs_nc = NodeCoords(None, None, None) + assert not lhs_nc > rhs_nc + + def test_lt(self): + lhs_nc = NodeCoords(5, None, None) + rhs_nc = NodeCoords(7, None, None) + assert lhs_nc < rhs_nc + + def test_null_lt(self): + lhs_nc = NodeCoords(5, None, None) + rhs_nc = NodeCoords(None, None, None) + assert not lhs_nc < rhs_nc diff --git a/tests/test_yamlpath.py b/tests/test_yamlpath.py index c12e1266..ecae5b87 100644 --- a/tests/test_yamlpath.py +++ b/tests/test_yamlpath.py @@ -4,7 +4,7 @@ from yamlpath.enums import PathSegmentTypes, PathSeperators from yamlpath import YAMLPath -class Test_path_Path(): +class Test_YAMLPath(): """Tests for the Path class.""" @pytest.mark.parametrize("yamlpath,pathsep,output", [ @@ -32,8 +32,8 @@ class Test_path_Path(): ("a*f", PathSeperators.AUTO, "[.=~/^a.*f$/]"), ("a*f*z", PathSeperators.AUTO, "[.=~/^a.*f.*z$/]"), ("a*f*z*", PathSeperators.AUTO, "[.=~/^a.*f.*z.*$/]"), - ("*", PathSeperators.AUTO, "[.=~/.*/]"), - ("*.*", PathSeperators.AUTO, "[.=~/.*/][.=~/.*/]"), + ("*", PathSeperators.AUTO, "*"), + ("*.*", PathSeperators.AUTO, "*.*"), ("**", PathSeperators.AUTO, "**"), ("/**/def", PathSeperators.AUTO, "/**/def"), ("abc.**.def", PathSeperators.AUTO, "abc.**.def"), @@ -186,3 +186,35 @@ def test_parse_meaningless_traversal(self): with pytest.raises(YAMLPathException) as ex: str(YAMLPath("abc**")) assert -1 < str(ex.value).find("The ** traversal operator has no meaning when combined with other characters") + + def test_parse_bad_following_char(self): + with pytest.raises(YAMLPathException) as ex: + str(YAMLPath("abc[has_child(def)ghi]")) + assert -1 < str(ex.value).find("Invalid YAML Path at g, which must be ]") + + def test_parse_unknown_search_keyword(self): + with pytest.raises(YAMLPathException) as ex: + str(YAMLPath("abc[unknown_keyword()]")) + assert -1 < str(ex.value).find("Unknown search keyword, unknown_keyword") + + @pytest.mark.parametrize("path,pops,results", [ + ("/abc", 1, [(PathSegmentTypes.KEY, "abc")]), + ("abc", 1, [(PathSegmentTypes.KEY, "abc")]), + ("/abc/def", 2, [(PathSegmentTypes.KEY, "def"), (PathSegmentTypes.KEY, "abc")]), + ("abc.def", 2, [(PathSegmentTypes.KEY, "def"), (PathSegmentTypes.KEY, "abc")]), + ("/abc/def[3]", 3, [(PathSegmentTypes.INDEX, 3), (PathSegmentTypes.KEY, "def"), (PathSegmentTypes.KEY, "abc")]), + ("abc.def[3]", 3, [(PathSegmentTypes.INDEX, 3), (PathSegmentTypes.KEY, "def"), (PathSegmentTypes.KEY, "abc")]), + ("/abc/def[3][1]", 4, [(PathSegmentTypes.INDEX, 1), (PathSegmentTypes.INDEX, 3), (PathSegmentTypes.KEY, "def"), (PathSegmentTypes.KEY, "abc")]), + ("abc.def[3][1]", 4, [(PathSegmentTypes.INDEX, 1), (PathSegmentTypes.INDEX, 3), (PathSegmentTypes.KEY, "def"), (PathSegmentTypes.KEY, "abc")]), + ]) + def test_pop_segments(self, path, pops, results): + yp = YAMLPath(path) + for pop in range(pops): + assert results[pop] == yp.pop() + + def test_pop_too_many(self): + yp = YAMLPath("abc.def") + with pytest.raises(YAMLPathException) as ex: + for _ in range(5): + yp.pop() + assert -1 < str(ex.value).find("Cannot pop when") diff --git a/yamlpath/__init__.py b/yamlpath/__init__.py index 5962188e..8b0a6021 100644 --- a/yamlpath/__init__.py +++ b/yamlpath/__init__.py @@ -1,6 +1,6 @@ """Core YAML Path classes.""" # Establish the version number common to all components -__version__ = "3.4.1" +__version__ = "3.5.0" from yamlpath.yamlpath import YAMLPath from yamlpath.processor import Processor diff --git a/yamlpath/commands/yaml_paths.py b/yamlpath/commands/yaml_paths.py index 8e4040c5..c4b0afe4 100644 --- a/yamlpath/commands/yaml_paths.py +++ b/yamlpath/commands/yaml_paths.py @@ -92,6 +92,13 @@ def processcli(): help="omit YAML Paths from the output (useful with --values or to\ indicate whether a file has any matches without printing them\ all, perhaps especially with --noexpression)") + valdump_group.add_argument( + "-n", "--noescape", + action="store_true", + help="omit escape characters from special characters in printed YAML\ + Paths; this is unsafe for feeding the resulting YAML Paths into\ + other YAML Path commands because the symbols that would be\ + escaped have special meaning to YAML Path processors") parser.add_argument( "-t", "--pathsep", @@ -688,7 +695,16 @@ def print_results( resline += buffers[0] if print_yaml_path: - resline += "{}".format(result) + if args.noescape: + use_flash = args.pathsep is PathSeperators.FSLASH + seglines = [] + join_mark = "/" if use_flash else "." + path_prefix = "/" if use_flash else "" + for (_, segment) in result.escaped: + seglines.append(str(segment)) + resline += "{}{}".format(path_prefix, join_mark.join(seglines)) + else: + resline += "{}".format(result) resline += buffers[1] if print_value: diff --git a/yamlpath/commands/yaml_set.py b/yamlpath/commands/yaml_set.py index b9c80e04..1e4a307d 100644 --- a/yamlpath/commands/yaml_set.py +++ b/yamlpath/commands/yaml_set.py @@ -391,6 +391,7 @@ def _get_nodes(log, processor, yaml_path, **kwargs): """Gather requested nodes.""" must_exist = kwargs.pop("must_exist", False) default_value = kwargs.pop("default_value", " ") + ignore_fail = kwargs.pop("ignore_fail", False) gathered_nodes = [] try: @@ -402,6 +403,11 @@ def _get_nodes(log, processor, yaml_path, **kwargs): data=node_coordinate, prefix="yaml_set::_get_nodes: ") gathered_nodes.append(node_coordinate) except YAMLPathException as ex: + if ignore_fail: + log.debug( + "Ignoring failure to gather nodes due to: {}".format(ex), + prefix="yaml_set::_get_nodes: ") + return [] log.critical(ex, 1) log.debug( @@ -477,8 +483,9 @@ def main(): processor = EYAMLProcessor( log, yaml_data, binary=args.eyaml, publickey=args.publickey, privatekey=args.privatekey) + ignore_fail = not must_exist change_node_coordinates = _get_nodes( - log, processor, change_path, must_exist=must_exist, + log, processor, change_path, must_exist=True, ignore_fail=ignore_fail, default_value=("" if new_value else " ")) old_format = YAMLValueFormats.DEFAULT diff --git a/yamlpath/common/__init__.py b/yamlpath/common/__init__.py index 3270d771..765d000d 100644 --- a/yamlpath/common/__init__.py +++ b/yamlpath/common/__init__.py @@ -1,5 +1,6 @@ """Common library methods.""" from .anchors import Anchors +from .keywordsearches import KeywordSearches from .nodes import Nodes from .parsers import Parsers from .searches import Searches diff --git a/yamlpath/common/anchors.py b/yamlpath/common/anchors.py index 0e59dfeb..13973d73 100644 --- a/yamlpath/common/anchors.py +++ b/yamlpath/common/anchors.py @@ -90,8 +90,14 @@ def replace_merge_anchor(data: Any, old_node: Any, repl_node: Any) -> None: data.merge[midx] = (data.merge[midx][0], repl_node) @staticmethod - def combine_merge_anchors(lhs: CommentedMap, rhs: CommentedMap): - """Merge YAML merge keys.""" + def combine_merge_anchors(lhs: CommentedMap, rhs: CommentedMap) -> None: + """ + Merge YAML merge keys. + + Parameters: + 1. lhs (CommentedMap) The map to merge into + 2. rhs (CommentedMap) The map to merge from + """ for mele in rhs.merge: lhs.add_yaml_merge([mele]) @@ -172,7 +178,14 @@ def generate_unique_anchor_name( @staticmethod def get_node_anchor(node: Any) -> Optional[str]: - """Return a node's Anchor/Alias name or None wheh there isn't one.""" + """ + Return a node's Anchor/Alias name or None when there isn't one. + + Parameters: + 1. node (Any) The node to evaluate + + Returns: (str) The node's Anchor/Alias name or None when unset + """ if ( not hasattr(node, "anchor") or node.anchor is None diff --git a/yamlpath/common/keywordsearches.py b/yamlpath/common/keywordsearches.py new file mode 100644 index 00000000..e9c9bab8 --- /dev/null +++ b/yamlpath/common/keywordsearches.py @@ -0,0 +1,707 @@ +""" +Implement KeywordSearches. + +This is a static library of generally-useful code for searching data based on +pre-defined keywords (in the programming language sense). + +Copyright 2020 William W. Kimball, Jr. MBA MSIS +""" +from typing import Any, Generator, List + +from yamlpath.types import AncestryEntry, PathSegment +from yamlpath.enums import PathSearchKeywords, PathSearchMethods +from yamlpath.path import SearchKeywordTerms +from yamlpath.exceptions import YAMLPathException +from yamlpath.wrappers import NodeCoords +from yamlpath import YAMLPath +import yamlpath.common + +class KeywordSearches: + """Helper methods for common data searching operations.""" + + @staticmethod + def search_matches( + terms: SearchKeywordTerms, haystack: Any, yaml_path: YAMLPath, + **kwargs: Any + ) -> Generator[NodeCoords, None, None]: + """ + Perform a keyword search. + + Parameters: + 1. terms (SearchKeywordTerms) The search operation to perform + 2. haystack (Any) The data to evaluate + 3. yaml_path (YAMLPath) YAML Path containing this search keyword + + Keyword Arguments: See each of the called KeywordSearches methods + + Returns: (Generator[NodeCoords, None, None]) Matching data as it is + generated + """ + invert: bool = terms.inverted + keyword: PathSearchKeywords = terms.keyword + parameters: List[str] = terms.parameters + nc_matches: Generator[NodeCoords, None, None] + + if keyword is PathSearchKeywords.HAS_CHILD: + nc_matches = KeywordSearches.has_child( + haystack, invert, parameters, yaml_path, **kwargs) + elif keyword is PathSearchKeywords.NAME: + nc_matches = KeywordSearches.name( + invert, parameters, yaml_path, **kwargs) + elif keyword is PathSearchKeywords.MAX: + nc_matches = KeywordSearches.max( + haystack, invert, parameters, yaml_path, **kwargs) + elif keyword is PathSearchKeywords.MIN: + nc_matches = KeywordSearches.min( + haystack, invert, parameters, yaml_path, **kwargs) + elif keyword is PathSearchKeywords.PARENT: + nc_matches = KeywordSearches.parent( + haystack, invert, parameters, yaml_path, **kwargs) + else: + raise YAMLPathException( + "Unsupported search keyword {} in".format(keyword), + str(yaml_path)) + + for nc_match in nc_matches: + yield nc_match + + @staticmethod + # pylint: disable=locally-disabled,too-many-locals + def has_child( + data: Any, invert: bool, parameters: List[str], yaml_path: YAMLPath, + **kwargs: Any + ) -> Generator[NodeCoords, None, None]: + """ + Indicate whether data has a named child. + + Parameters: + 1. data (Any) The data to evaluate + 2. invert (bool) Invert the evaluation + 3. parameters (List[str]) Parsed parameters + 4. yaml_path (YAMLPath) YAML Path begetting this operation + + Keyword Arguments: + * parent (ruamel.yaml node) The parent node from which this query + originates + * parentref (Any) The Index or Key of data within parent + * relay_segment (PathSegment) YAML Path segment presently under + evaluation + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation + + Returns: (Generator[NodeCoords, None, None]) each result as it is + generated + """ + parent: Any = kwargs.pop("parent", None) + parentref: Any = kwargs.pop("parentref", None) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) + relay_segment: PathSegment = kwargs.pop("relay_segment", None) + + # There must be exactly one parameter + param_count = len(parameters) + if param_count != 1: + raise YAMLPathException( + ("Invalid parameter count to {}; {} required, got {} in" + " YAML Path").format( + PathSearchKeywords.HAS_CHILD, 1, param_count), + str(yaml_path)) + match_key = parameters[0] + + # Against a map, this will return nodes which have an immediate + # child key exactly named as per parameters. When inverted, only + # parents with no such key are yielded. + if isinstance(data, dict): + child_present = data is not None and match_key in data + if ( + (invert and not child_present) or + (child_present and not invert) + ): + yield NodeCoords( + data, parent, parentref, translated_path, ancestry, + relay_segment) + + # Against a list, this will merely require an exact match between + # parameters and any list elements. When inverted, every + # non-matching element is yielded. + elif isinstance(data, list): + # Against an AoH, this will scan each element's immediate children, + # treating and yielding as if this search were performed directly + # against each map in the list. + if yamlpath.common.Nodes.node_is_aoh(data): + for idx, ele in enumerate(data): + next_path = translated_path.append("[{}]".format(str(idx))) + for aoh_match in KeywordSearches.has_child( + ele, invert, parameters, yaml_path, + parent=data, parentref=idx, translated_path=next_path + ): + yield aoh_match + return + + child_present = match_key in data + if ( + (invert and not child_present) or + (child_present and not invert) + ): + yield NodeCoords( + data, parent, parentref, translated_path, ancestry, + relay_segment) + + elif data is None: + if invert: + yield NodeCoords( + data, parent, parentref, translated_path, ancestry, + relay_segment) + + else: + raise YAMLPathException( + ("{} data has no child nodes in YAML Path").format(type(data)), + str(yaml_path)) + + @staticmethod + # pylint: disable=locally-disabled,too-many-locals + def name( + invert: bool, parameters: List[str], yaml_path: YAMLPath, + **kwargs: Any + ) -> Generator[NodeCoords, None, None]: + """ + Match only the key-name of the present node. + + Parameters: + 1. invert (bool) Invert the evaluation + 2. parameters (List[str]) Parsed parameters + 3. yaml_path (YAMLPath) YAML Path begetting this operation + + Keyword Arguments: + * parent (ruamel.yaml node) The parent node from which this query + originates + * parentref (Any) The Index or Key of data within parent + * relay_segment (PathSegment) YAML Path segment presently under + evaluation + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation + + Returns: (Generator[NodeCoords, None, None]) each result as it is + generated + """ + parent: Any = kwargs.pop("parent", None) + parentref: Any = kwargs.pop("parentref", None) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) + relay_segment: PathSegment = kwargs.pop("relay_segment", None) + + # There are no parameters + param_count = len(parameters) + if param_count > 1: + raise YAMLPathException(( + "Invalid parameter count to {}(); {} are permitted, " + " got {} in YAML Path" + ).format(PathSearchKeywords.NAME, 0, param_count), + str(yaml_path)) + + if invert: + raise YAMLPathException(( + "Inversion is meaningless to {}()" + ).format(PathSearchKeywords.NAME), + str(yaml_path)) + + yield NodeCoords( + parentref, parent, parentref, translated_path, ancestry, + relay_segment) + + @staticmethod + # pylint: disable=locally-disabled,too-many-locals,too-many-branches,too-many-statements + def max( + data: Any, invert: bool, parameters: List[str], yaml_path: YAMLPath, + **kwargs: Any + ) -> Generator[NodeCoords, None, None]: + """ + Find whichever nodes/elements have a maximum value. + + Parameters: + 1. data (Any) The data to evaluate + 2. invert (bool) Invert the evaluation + 3. parameters (List[str]) Parsed parameters + 4. yaml_path (YAMLPath) YAML Path begetting this operation + + Keyword Arguments: + * parent (ruamel.yaml node) The parent node from which this query + originates + * parentref (Any) The Index or Key of data within parent + * relay_segment (PathSegment) YAML Path segment presently under + evaluation + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation + + Returns: (Generator[NodeCoords, None, None]) each result as it is + generated + """ + parent: Any = kwargs.pop("parent", None) + parentref: Any = kwargs.pop("parentref", None) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) + relay_segment: PathSegment = kwargs.pop("relay_segment", None) + + # There may be 0 or 1 parameters + param_count = len(parameters) + if param_count > 1: + raise YAMLPathException(( + "Invalid parameter count to {}([NAME]); up to {} permitted, " + " got {} in YAML Path" + ).format(PathSearchKeywords.MAX, 1, param_count), + str(yaml_path)) + + scan_node = parameters[0] if param_count > 0 else None + match_value: Any = None + match_nodes: List[NodeCoords] = [] + discard_nodes: List[NodeCoords] = [] + if yamlpath.common.Nodes.node_is_aoh(data): + # A named child node is mandatory + if scan_node is None: + raise YAMLPathException(( + "The {}([NAME]) Search Keyword requires a key name to scan" + " when evaluating an Array-of-Hashes in YAML Path" + ).format(PathSearchKeywords.MAX), + str(yaml_path)) + + for idx, ele in enumerate(data): + next_path = translated_path + "[{}]".format(idx) + next_ancestry = ancestry + [(data, idx)] + if scan_node in ele: + eval_val = ele[scan_node] + if (match_value is None + or yamlpath.common.Searches.search_matches( + PathSearchMethods.GREATER_THAN, match_value, + eval_val) + ): + match_value = eval_val + discard_nodes.extend(match_nodes) + match_nodes = [ + NodeCoords( + ele, data, idx, next_path, next_ancestry, + relay_segment) + ] + continue + + if (match_value is None + or yamlpath.common.Searches.search_matches( + PathSearchMethods.EQUALS, match_value, + eval_val) + ): + match_nodes.append(NodeCoords( + ele, data, idx, next_path, next_ancestry, + relay_segment)) + continue + + discard_nodes.append(NodeCoords( + ele, data, idx, next_path, next_ancestry, + relay_segment)) + + elif isinstance(data, dict): + # A named child node is mandatory + if scan_node is None: + raise YAMLPathException(( + "The {}([NAME]) Search Keyword requires a key name to scan" + " when comparing Hash/map/dict children in YAML Path" + ).format(PathSearchKeywords.MAX), + str(yaml_path)) + + for key, val in data.items(): + if isinstance(val, dict): + if val is not None and scan_node in val: + eval_val = val[scan_node] + next_path = ( + translated_path + YAMLPath.escape_path_section( + key, translated_path.seperator)) + next_ancestry = ancestry + [(data, key)] + if (match_value is None + or yamlpath.common.Searches.search_matches( + PathSearchMethods.GREATER_THAN, match_value, + eval_val) + ): + match_value = eval_val + discard_nodes.extend(match_nodes) + match_nodes = [ + NodeCoords( + val, data, key, next_path, next_ancestry, + relay_segment) + ] + continue + + if (match_value is None + or yamlpath.common.Searches.search_matches( + PathSearchMethods.EQUALS, match_value, + eval_val) + ): + match_nodes.append(NodeCoords( + val, data, key, next_path, next_ancestry, + relay_segment)) + continue + + elif scan_node in data: + # The user probably meant to operate against the parent + raise YAMLPathException(( + "The {}([NAME]) Search Keyword operates against" + " collections of data which share a common attribute" + " yet there is only a single node to consider. Did" + " you mean to evaluate the parent of the selected" + " node? Please review your YAML Path" + ).format(PathSearchKeywords.MAX), + str(yaml_path)) + + discard_nodes.append(NodeCoords( + val, data, key, next_path, next_ancestry, + relay_segment)) + + elif isinstance(data, list): + # A named child node is useless + if scan_node is not None: + raise YAMLPathException(( + "The {}([NAME]) Search Keyword cannot utilize a key name" + " when comparing Array/sequence/list elements to one" + " another in YAML Path" + ).format(PathSearchKeywords.MAX), + str(yaml_path)) + + for idx, ele in enumerate(data): + next_path = translated_path + "[{}]".format(idx) + next_ancestry = ancestry + [(data, idx)] + if (ele is not None + and ( + match_value is None or + yamlpath.common.Searches.search_matches( + PathSearchMethods.GREATER_THAN, match_value, + ele) + )): + match_value = ele + discard_nodes.extend(match_nodes) + match_nodes = [ + NodeCoords( + ele, data, idx, next_path, next_ancestry, + relay_segment) + ] + continue + + if (ele is not None + and yamlpath.common.Searches.search_matches( + PathSearchMethods.EQUALS, match_value, + ele) + ): + match_nodes.append(NodeCoords( + ele, data, idx, next_path, next_ancestry, + relay_segment)) + continue + + discard_nodes.append(NodeCoords( + ele, data, idx, next_path, next_ancestry, + relay_segment)) + + else: + # Non-complex data is always its own maximum and does not invert + match_value = data + match_nodes = [ + NodeCoords( + data, parent, parentref, translated_path, ancestry, + relay_segment) + ] + + yield_nodes = discard_nodes if invert else match_nodes + for node_coord in yield_nodes: + yield node_coord + + + @staticmethod + # pylint: disable=locally-disabled,too-many-locals,too-many-branches,too-many-statements + def min( + data: Any, invert: bool, parameters: List[str], yaml_path: YAMLPath, + **kwargs: Any + ) -> Generator[NodeCoords, None, None]: + """ + Find whichever nodes/elements have a minimum value. + + Parameters: + 1. data (Any) The data to evaluate + 2. invert (bool) Invert the evaluation + 3. parameters (List[str]) Parsed parameters + 4. yaml_path (YAMLPath) YAML Path begetting this operation + + Keyword Arguments: + * parent (ruamel.yaml node) The parent node from which this query + originates + * parentref (Any) The Index or Key of data within parent + * relay_segment (PathSegment) YAML Path segment presently under + evaluation + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation + + Returns: (Generator[NodeCoords, None, None]) each result as it is + generated + """ + parent: Any = kwargs.pop("parent", None) + parentref: Any = kwargs.pop("parentref", None) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) + relay_segment: PathSegment = kwargs.pop("relay_segment", None) + + # There may be 0 or 1 parameters + param_count = len(parameters) + if param_count > 1: + raise YAMLPathException(( + "Invalid parameter count to {}([NAME]); up to {} permitted, " + " got {} in YAML Path" + ).format(PathSearchKeywords.MIN, 1, param_count), + str(yaml_path)) + + scan_node = parameters[0] if param_count > 0 else None + match_value: Any = None + match_nodes: List[NodeCoords] = [] + discard_nodes: List[NodeCoords] = [] + if yamlpath.common.Nodes.node_is_aoh(data): + # A named child node is mandatory + if scan_node is None: + raise YAMLPathException(( + "The {}([NAME]) Search Keyword requires a key name to scan" + " when evaluating an Array-of-Hashes in YAML Path" + ).format(PathSearchKeywords.MIN), + str(yaml_path)) + + for idx, ele in enumerate(data): + next_path = translated_path + "[{}]".format(idx) + next_ancestry = ancestry + [(data, idx)] + if scan_node in ele: + eval_val = ele[scan_node] + if (match_value is None + or yamlpath.common.Searches.search_matches( + PathSearchMethods.LESS_THAN, match_value, + eval_val) + ): + match_value = eval_val + discard_nodes.extend(match_nodes) + match_nodes = [ + NodeCoords( + ele, data, idx, next_path, next_ancestry, + relay_segment) + ] + continue + + if (match_value is None + or yamlpath.common.Searches.search_matches( + PathSearchMethods.EQUALS, match_value, + eval_val) + ): + match_nodes.append(NodeCoords( + ele, data, idx, next_path, next_ancestry, + relay_segment)) + continue + + discard_nodes.append(NodeCoords( + ele, data, idx, next_path, next_ancestry, + relay_segment)) + + elif isinstance(data, dict): + # A named child node is mandatory + if scan_node is None: + raise YAMLPathException(( + "The {}([NAME]) Search Keyword requires a key name to scan" + " when comparing Hash/map/dict children in YAML Path" + ).format(PathSearchKeywords.MIN), + str(yaml_path)) + + for key, val in data.items(): + if isinstance(val, dict): + if val is not None and scan_node in val: + eval_val = val[scan_node] + next_path = ( + translated_path + YAMLPath.escape_path_section( + key, translated_path.seperator)) + next_ancestry = ancestry + [(data, key)] + if (match_value is None + or yamlpath.common.Searches.search_matches( + PathSearchMethods.LESS_THAN, match_value, + eval_val) + ): + match_value = eval_val + discard_nodes.extend(match_nodes) + match_nodes = [ + NodeCoords( + val, data, key, next_path, next_ancestry, + relay_segment) + ] + continue + + if (match_value is None + or yamlpath.common.Searches.search_matches( + PathSearchMethods.EQUALS, match_value, + eval_val) + ): + match_nodes.append(NodeCoords( + val, data, key, next_path, next_ancestry, + relay_segment)) + continue + + elif scan_node in data: + # The user probably meant to operate against the parent + raise YAMLPathException(( + "The {}([NAME]) Search Keyword operates against" + " collections of data which share a common attribute" + " yet there is only a single node to consider. Did" + " you mean to evaluate the parent of the selected" + " node? Please review your YAML Path" + ).format(PathSearchKeywords.MIN), + str(yaml_path)) + + discard_nodes.append(NodeCoords( + val, data, key, next_path, next_ancestry, + relay_segment)) + + elif isinstance(data, list): + # A named child node is useless + if scan_node is not None: + raise YAMLPathException(( + "The {}([NAME]) Search Keyword cannot utilize a key name" + " when comparing Array/sequence/list elements to one" + " another in YAML Path" + ).format(PathSearchKeywords.MIN), + str(yaml_path)) + + for idx, ele in enumerate(data): + next_path = translated_path + "[{}]".format(idx) + next_ancestry = ancestry + [(data, idx)] + if (ele is not None + and ( + match_value is None or + yamlpath.common.Searches.search_matches( + PathSearchMethods.LESS_THAN, match_value, + ele) + )): + match_value = ele + discard_nodes.extend(match_nodes) + match_nodes = [ + NodeCoords( + ele, data, idx, next_path, next_ancestry, + relay_segment) + ] + continue + + if (ele is not None + and yamlpath.common.Searches.search_matches( + PathSearchMethods.EQUALS, match_value, + ele) + ): + match_nodes.append(NodeCoords( + ele, data, idx, next_path, next_ancestry, + relay_segment)) + continue + + discard_nodes.append(NodeCoords( + ele, data, idx, next_path, next_ancestry, + relay_segment)) + + else: + # Non-complex data is always its own maximum and does not invert + match_value = data + match_nodes = [ + NodeCoords( + data, parent, parentref, translated_path, ancestry, + relay_segment) + ] + + yield_nodes = discard_nodes if invert else match_nodes + for node_coord in yield_nodes: + yield node_coord + + @staticmethod + # pylint: disable=locally-disabled,too-many-locals + def parent( + data: Any, invert: bool, parameters: List[str], yaml_path: YAMLPath, + **kwargs: Any + ) -> Generator[NodeCoords, None, None]: + """ + Climb back up N parent levels in the data hierarchy. + + Parameters: + 1. data (Any) The data to evaluate + 2. invert (bool) Invert the evaluation; not possible for parent() + 3. parameters (List[str]) Parsed parameters + 4. yaml_path (YAMLPath) YAML Path begetting this operation + + Keyword Arguments: + * parent (ruamel.yaml node) The parent node from which this query + originates + * parentref (Any) The Index or Key of data within parent + * relay_segment (PathSegment) YAML Path segment presently under + evaluation + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation + + Returns: (Generator[NodeCoords, None, None]) each result as it is + generated + """ + parent: Any = kwargs.pop("parent", None) + parentref: Any = kwargs.pop("parentref", None) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) + relay_segment: PathSegment = kwargs.pop("relay_segment", None) + + # There may be 0 or 1 parameters + param_count = len(parameters) + if param_count > 1: + raise YAMLPathException(( + "Invalid parameter count to {}([STEPS]); up to {} permitted, " + " got {} in YAML Path" + ).format(PathSearchKeywords.PARENT, 1, param_count), + str(yaml_path)) + + if invert: + raise YAMLPathException(( + "Inversion is meaningless to {}([STEPS])" + ).format(PathSearchKeywords.PARENT), + str(yaml_path)) + + parent_levels: int = 1 + ancestry_len: int = len(ancestry) + steps_max = ancestry_len + if param_count > 0: + try: + parent_levels = int(parameters[0]) + except ValueError as ex: + raise YAMLPathException(( + "Invalid parameter passed to {}([STEPS]), {}; must be" + " unset or an integer number indicating how may parent" + " STEPS to climb in YAML Path" + ).format(PathSearchKeywords.PARENT, parameters[0]), + str(yaml_path)) from ex + + if parent_levels > steps_max: + raise YAMLPathException(( + "Cannot {}([STEPS]) higher than the document root. {} steps" + " requested when {} available in YAML Path" + ).format(PathSearchKeywords.PARENT, parent_levels, steps_max), + str(yaml_path)) + + if parent_levels < 1: + # parent(0) is the present node + yield NodeCoords( + data, parent, parentref, translated_path, ancestry, + relay_segment) + else: + for _ in range(parent_levels): + translated_path.pop() + (data, _) = ancestry.pop() + ancestry_len -= 1 + + parentref = ancestry[-1][1] if ancestry_len > 0 else None + parent = ancestry[-1][0] if ancestry_len > 0 else None + yield NodeCoords( + data, parent, parentref, translated_path, ancestry, + relay_segment) diff --git a/yamlpath/common/nodes.py b/yamlpath/common/nodes.py index 637283ef..d69caa0c 100644 --- a/yamlpath/common/nodes.py +++ b/yamlpath/common/nodes.py @@ -259,7 +259,10 @@ def wrap_type(value: Any) -> Any: wrapped_value = value try: - ast_value = ast.literal_eval(value) + cased_value = value + if str(value).lower() in ("true", "false"): + cased_value = str(value).title() + ast_value = ast.literal_eval(cased_value) except ValueError: ast_value = value except SyntaxError: @@ -282,8 +285,9 @@ def wrap_type(value: Any) -> Any: return wrapped_value @staticmethod - def build_next_node(yaml_path: YAMLPath, depth: int, - value: Any = None) -> Any: + def build_next_node( + yaml_path: YAMLPath, depth: int, value: Any = None + ) -> Any: """ Get the best default value for the next entry in a YAML Path. @@ -310,8 +314,9 @@ def build_next_node(yaml_path: YAMLPath, depth: int, return default_value @staticmethod - def append_list_element(data: Any, value: Any = None, - anchor: str = None) -> Any: + def append_list_element( + data: Any, value: Any = None, anchor: str = None + ) -> Any: """ Append a new element to an ruamel.yaml List. @@ -366,7 +371,7 @@ def apply_yaml_tag(node: Any, value_tag: str) -> Any: 3. value_tag (str) Tag to apply (or None to remove) Returns: (Any) the updated node; may be new data, so replace your node - with this returned value! + with this returned value! """ if value_tag is None: return node @@ -390,12 +395,48 @@ def apply_yaml_tag(node: Any, value_tag: str) -> Any: @staticmethod def node_is_leaf(node: Any) -> bool: - """Indicate whether a node is a leaf (Scalar data).""" + """ + Indicate whether a node is a leaf (Scalar data). + + Parameters: + 1. node (Any) The node to evaluate + + Returns: (bool) True = node is a leaf; False, otherwise + """ return not isinstance(node, (dict, list, set)) + @staticmethod + def node_is_aoh(node: Any) -> bool: + """ + Indicate whether a node is an Array-of-Hashes (List of Dicts). + + Parameters: + 1. node (Any) The node under evaluation + + Returns: (bool) True = node is a `list` comprised **only** of `dict`s + """ + if node is None: + return False + + if not isinstance(node, (list, set)): + return False + + for ele in node: + if not isinstance(ele, dict): + return False + + return True + @staticmethod def tagless_elements(data: list) -> list: - """Get a copy of a list with all elements stripped of YAML Tags.""" + """ + Get a copy of a list with all elements stripped of YAML Tags. + + Parameters: + 1. data (list) The list to strip of YAML Tags + + Returns: (list) De-tagged version of `data` + """ detagged = [] for ele in data: if isinstance(ele, TaggedScalar): @@ -406,7 +447,14 @@ def tagless_elements(data: list) -> list: @staticmethod def tagless_value(value: Any) -> Any: - """Get a value in its true data-type, stripped of any YAML Tag.""" + """ + Get a value in its true data-type, stripped of any YAML Tag. + + Parameters: + 1. value (Any) The value to de-tag + + Returns: (Any) The de-tagged value + """ evalue = value if isinstance(value, TaggedScalar): evalue = value.value diff --git a/yamlpath/common/parsers.py b/yamlpath/common/parsers.py index e45b8fdc..b1feb94d 100644 --- a/yamlpath/common/parsers.py +++ b/yamlpath/common/parsers.py @@ -79,7 +79,7 @@ def get_yaml_data( 3. source (str) The source file or serialized literal to load; can be - for reading from STDIN (implies literal=True) - Keyword Parameters: + Keyword Arguments: * literal (bool) `source` is literal serialized YAML data rather than a file-spec, so load it directly @@ -177,7 +177,7 @@ def get_yaml_multidoc_data( 3. source (str) The source file to load; can be - for reading from STDIN - Keyword Parameters: + Keyword Arguments: * literal (bool) `source` is literal serialized YAML data rather than a file-spec, so load it directly @@ -317,7 +317,10 @@ def jsonify_yaml_data(data: Any) -> Any: for key, val in data.items(): data[key] = Parsers.jsonify_yaml_data(val) - elif isinstance(data, CommentedSeq): + elif isinstance(data, dict): + for key, val in data.items(): + data[key] = Parsers.jsonify_yaml_data(val) + elif isinstance(data, (list, CommentedSeq)): for idx, ele in enumerate(data): data[idx] = Parsers.jsonify_yaml_data(ele) elif isinstance(data, TaggedScalar): diff --git a/yamlpath/common/searches.py b/yamlpath/common/searches.py index b495e2df..5f912ae6 100644 --- a/yamlpath/common/searches.py +++ b/yamlpath/common/searches.py @@ -4,6 +4,7 @@ Copyright 2020 William W. Kimball, Jr. MBA MSIS """ import re +from ast import literal_eval from typing import Any, List from yamlpath.enums import ( @@ -23,22 +24,41 @@ class Searches: def search_matches( method: PathSearchMethods, needle: str, haystack: Any ) -> bool: - """Perform a search.""" + """ + Perform a search comparison. + + NOTE: For less-than, greather-than and related operations, the test is + whether `haystack` is less/greater-than `needle`. + + Parameters: + 1. method (PathSearchMethods) The search method to employ + 2. needle (str) The value to look for. + 3. haystack (Any) The value to look in. + + Returns: (bool) True = comparision passes; False = comparison fails. + """ + try: + cased_needle = needle + lower_needle = str(needle).lower() + if lower_needle in ("true", "false"): + cased_needle = str(needle).title() + typed_needle = literal_eval(cased_needle) + except ValueError: + typed_needle = needle + except SyntaxError: + typed_needle = needle + needle_type = type(typed_needle) matches: bool = False if method is PathSearchMethods.EQUALS: - if isinstance(haystack, int): - try: - matches = haystack == int(needle) - except ValueError: - matches = False - elif isinstance(haystack, float): - try: - matches = haystack == float(needle) - except ValueError: - matches = False + if isinstance(haystack, bool) and needle_type is bool: + matches = haystack == typed_needle + elif isinstance(haystack, int) and needle_type is int: + matches = haystack == typed_needle + elif isinstance(haystack, float) and needle_type is float: + matches = haystack == typed_needle else: - matches = haystack == needle + matches = str(haystack) == str(needle) elif method is PathSearchMethods.STARTS_WITH: matches = str(haystack).startswith(needle) elif method is PathSearchMethods.ENDS_WITH: @@ -57,7 +77,7 @@ def search_matches( except ValueError: matches = False else: - matches = haystack > needle + matches = str(haystack) > str(needle) elif method is PathSearchMethods.LESS_THAN: if isinstance(haystack, int): try: @@ -70,7 +90,7 @@ def search_matches( except ValueError: matches = False else: - matches = haystack < needle + matches = str(haystack) < str(needle) elif method is PathSearchMethods.GREATER_THAN_OR_EQUAL: if isinstance(haystack, int): try: @@ -83,7 +103,7 @@ def search_matches( except ValueError: matches = False else: - matches = haystack >= needle + matches = str(haystack) >= str(needle) elif method is PathSearchMethods.LESS_THAN_OR_EQUAL: if isinstance(haystack, int): try: @@ -96,7 +116,7 @@ def search_matches( except ValueError: matches = False else: - matches = haystack <= needle + matches = str(haystack) <= str(needle) elif method == PathSearchMethods.REGEX: matcher = re.compile(needle) matches = matcher.search(str(haystack)) is not None @@ -161,7 +181,14 @@ def search_anchor( def create_searchterms_from_pathattributes( rhs: PathAttributes ) -> SearchTerms: - """Convert a PathAttributes instance to a SearchTerms instance.""" + """ + Convert a PathAttributes instance to a SearchTerms instance. + + Parameters: + 1. rhs (PathAttributes) PathAttributes instance to convert + + Returns: (SearchTerms) SearchTerms extracted from `rhs` + """ if isinstance(rhs, SearchTerms): newinst: SearchTerms = SearchTerms( rhs.inverted, rhs.method, rhs.attribute, rhs.term diff --git a/yamlpath/differ/diffentry.py b/yamlpath/differ/diffentry.py index 22652cdc..383c6265 100644 --- a/yamlpath/differ/diffentry.py +++ b/yamlpath/differ/diffentry.py @@ -21,7 +21,27 @@ def __init__( self, action: DiffActions, path: YAMLPath, lhs: Any, rhs: Any, **kwargs ): - """Initiate a new DiffEntry.""" + """ + Instantiate a new DiffEntry. + + Parameters: + 1. action (DiffAction) The action taken for one document to become the + next + 2. path (YAMLPath) Location within the LHS document which changes to + becomes the RHS document + 3. lhs (Any) The Left-Hand-Side (original) document + 4. rhs (Any) The Right-Hand-Side (altered) document + + Keyword Arguments: + * lhs_iteration (Any) "Rough" position of the original element within + its document before it was changed + * lhs_parent (Any) Parent of the original data element + * rhs_iteration (Any) "Rough" position of the changed element within + its document, if it existed before the change (otherwise it'll be 0s) + * rhs_parent (Any) Parent of the changed data element + + Returns: N/A + """ self._action: DiffActions = action self._path: YAMLPath = path self._lhs: Any = lhs @@ -30,8 +50,24 @@ def __init__( self._set_index(lhs, rhs, **kwargs) self._verbose = False - def _set_index(self, lhs: Any, rhs: Any, **kwargs) -> Any: - """Build the sortable index for this entry.""" + def _set_index(self, lhs: Any, rhs: Any, **kwargs) -> None: + """ + Build the sortable index for this entry. + + Parameters: + 1. lhs (Any) The Left-Hand-Side (original) document + 2. rhs (Any) The Right-Hand-Side (altered) document + + Keyword Arguments: + * lhs_iteration (Any) "Rough" position of the original element within + its document before it was changed + * lhs_parent (Any) Parent of the original data element + * rhs_iteration (Any) "Rough" position of the changed element within + its document, if it existed before the change (otherwise it'll be 0s) + * rhs_parent (Any) Parent of the changed data element + + Returns: N/A + """ lhs_lc = DiffEntry._get_index(lhs, kwargs.pop("lhs_parent", None)) rhs_lc = DiffEntry._get_index(rhs, kwargs.pop("rhs_parent", None)) lhs_iteration = kwargs.pop("lhs_iteration", 0) @@ -136,8 +172,8 @@ def _present_data(cls, data: Any, prefix: str) -> str: formatted_data = json_safe_data if isinstance(json_safe_data, str): formatted_data = json_safe_data.strip() - json_data = json.dumps(formatted_data).replace( - "\\n", "\n{} ".format(prefix)) + json_data = json.dumps( + formatted_data).replace("\\n", "\n{} ".format(prefix)) data_tag = "" if isinstance(data, TaggedScalar) and data.tag.value: data_tag = "{} ".format(data.tag.value) diff --git a/yamlpath/differ/differ.py b/yamlpath/differ/differ.py index 49ac55f4..74b68ffb 100644 --- a/yamlpath/differ/differ.py +++ b/yamlpath/differ/differ.py @@ -30,6 +30,10 @@ def __init__( 1. logger (ConsolePrinter) Instance of ConsoleWriter or subclass 2. document (Any) The basis document + Keyword Arguments: + * ignore_eyaml_values (bool) Do not decrypt encrypted YAML value for + comparison + Returns: N/A Raises: N/A @@ -46,20 +50,41 @@ def __init__( else EYAMLProcessor(logger, document, **kwargs)) def compare_to(self, document: Any) -> None: - """Perform the diff calculation.""" + """ + Perform the diff calculation. + + Parameers: + 1. document (Any) The document to compare against + + Returns: N/A + """ self._diffs.clear() self.config.prepare(document) self._diff_between(YAMLPath(), self._data, document) def get_report(self) -> Generator[DiffEntry, None, None]: - """Get the diff report.""" + """ + Get the diff report. + + Parameters: N/A + + Returns: (Generator[DiffEntry, None, None]) Sorted DiffEntry records + """ for entry in sorted( self._diffs, key=lambda e: [int(i) for i in e.index.split('.')] ): yield entry - def _purge_document(self, path: YAMLPath, data: Any): - """Delete every node in the document.""" + def _purge_document(self, path: YAMLPath, data: Any) -> None: + """ + Record changes necessary to delete every node in the document. + + Parameters: + 1. path (YAMLPath) YAML Path to the document element under evaluation + 2. data (Any) The DOM element under evaluation + + Returns: N/A + """ if isinstance(data, CommentedMap): lhs_iteration = -1 for key, val in data.items(): @@ -84,7 +109,15 @@ def _purge_document(self, path: YAMLPath, data: Any): ) def _add_everything(self, path: YAMLPath, data: Any) -> None: - """Add every node in the document.""" + """ + Record changes necessary to add every node in the document. + + Parameters: + 1. path (YAMLPath) YAML Path to the document element under evaluation + 2. data (Any) The DOM element under evaluation + + Returns: N/A + """ if isinstance(data, CommentedMap): rhs_iteration = -1 for key, val in data.items(): @@ -111,7 +144,18 @@ def _add_everything(self, path: YAMLPath, data: Any) -> None: def _diff_scalars( self, path: YAMLPath, lhs: Any, rhs: Any, **kwargs ) -> None: - """Diff two Scalar values.""" + """ + Diff two Scalar values. + + Parameters: + 1. path (YAMLPath) YAML Path to the document element under evaluation + 2. lhs (Any) The left-hand-side (original) document + 3. rhs (Any) The right-hand-side (altered) document + + Keyword Arguments: See `DiffEntry` + + Returns: N/A + """ self.logger.debug( "Comparing LHS:", prefix="Differ::_diff_scalars: ", @@ -145,7 +189,14 @@ def _diff_scalars( def _diff_dicts( self, path: YAMLPath, lhs: CommentedMap, rhs: CommentedMap ) -> None: - """Diff two dicts.""" + """ + Diff two dicts. + + Parameters: + 1. path (YAMLPath) YAML Path to the document element under evaluation + 2. lhs (Any) The left-hand-side (original) document + 3. rhs (Any) The right-hand-side (altered) document + """ self.logger.debug( "Comparing LHS:", prefix="Differ::_diff_dicts: ", @@ -223,7 +274,20 @@ def _diff_dicts( def _diff_synced_lists( self, path: YAMLPath, lhs: CommentedSeq, rhs: CommentedSeq ) -> None: - """Diff two synchronized lists.""" + """ + Diff two synchronized lists. + + A "synchronized" list -- in this context -- is one in which all + elements that are identical to those of its exemplar list are + (re)positioned to identical index. + + Parameters: + 1. path (YAMLPath) YAML Path to the document element under evaluation + 2. lhs (Any) The left-hand-side (original) document + 3. rhs (Any) The right-hand-side (altered) document + + Returns: N/A + """ self.logger.debug("Differ::_diff_synced_lists: Starting...") self.logger.debug( "Synchronizing LHS Array elements at YAML Path, {}:" @@ -286,7 +350,19 @@ def _diff_arrays_of_scalars( self, path: YAMLPath, lhs: CommentedSeq, rhs: CommentedSeq, node_coord: NodeCoords, **kwargs ) -> None: - """Diff two lists of scalars.""" + """ + Diff two lists of scalars. + + Parameters: + 1. path (YAMLPath) YAML Path to the document element under evaluation + 2. lhs (Any) The left-hand-side (original) document + 3. rhs (Any) The right-hand-side (altered) document + 4. node_coord (NodeCoords) The node being evaluated + + Keyword Parameers: + * diff_deeply (bool) True = Deeply traverse complex elements; False = + compare complex elements as-is + """ self.logger.debug( "Comparing LHS:", prefix="Differ::_diff_arrays_of_scalars: ", @@ -336,7 +412,17 @@ def _diff_arrays_of_hashes( self, path: YAMLPath, lhs: CommentedSeq, rhs: CommentedSeq, node_coord: NodeCoords ) -> None: - """Diff two lists-of-dictionaries.""" + """ + Diff two lists-of-dictionaries. + + Parameters: + 1. path (YAMLPath) YAML Path to the document element under evaluation + 2. lhs (Any) The left-hand-side (original) document + 3. rhs (Any) The right-hand-side (altered) document + 4. node_coord (NodeCoords) The node being evaluated + + Returns: N/A + """ self.logger.debug( "Comparing LHS:", prefix="Differ::_diff_arrays_of_hashes: ", @@ -418,7 +504,20 @@ def _diff_arrays_of_hashes( def _diff_lists( self, path: YAMLPath, lhs: CommentedSeq, rhs: CommentedSeq, **kwargs ) -> None: - """Diff two lists.""" + """ + Diff two lists. + + Parameters: + 1. path (YAMLPath) YAML Path to the document element under evaluation + 2. lhs (Any) The left-hand-side (original) document + 3. rhs (Any) The right-hand-side (altered) document + + Keyword Arguments: + * rhs_parent (Any) Parent data node of rhs + * parentref (Any) Reference indicating rhs within rhs_parent + + Returns: N/A + """ self.logger.debug( "Comparing LHS:", prefix="Differ::_diff_lists: ", @@ -442,7 +541,16 @@ def _diff_lists( def _diff_between( self, path: YAMLPath, lhs: Any, rhs: Any, **kwargs ) -> None: - """Calculate the differences between two document nodes.""" + """ + Calculate the differences between two document nodes. + + Parameters: + 1. path (YAMLPath) YAML Path to the document element under evaluation + 2. lhs (Any) The left-hand-side (original) document + 3. rhs (Any) The right-hand-side (altered) document + + Keyword Arguments: See _diff_lists() and _diff_scalars() + """ self.logger.debug( "Comparing LHS:", prefix="Differ::_diff_between: ", @@ -481,7 +589,17 @@ def synchronize_lists_by_value( ) -> List[Tuple[ Optional[int], Optional[Any], Optional[int], Optional[Any] ]]: - """Synchronize two lists by value.""" + """ + Synchronize two lists by value. + + Parameters: + 1. lhs (Any) The left-hand-side (original) document + 2. rhs (Any) The right-hand-side (altered) document + + Returns: (List[Tuple[ + Optional[int], Optional[Any], Optional[int], Optional[Any] + ]]) List with LHS and RHS elements at identical elements + """ # Build a parallel index array to track the original RHS element # indexes of any surviving elements. rhs_reduced = [] @@ -517,7 +635,19 @@ def synchronize_lods_by_key( ) -> List[Tuple[ Optional[int], Optional[Any], Optional[int], Optional[Any] ]]: - """Synchronize two lists-of-dictionaries by identity key.""" + """ + Synchronize two lists-of-dictionaries by identity key. + + Parameters: + 1. path (YAMLPath) YAML Path to the document element under evaluation + 2. lhs (Any) The left-hand-side (original) document + 3. rhs (Any) The right-hand-side (altered) document + + Returns: (List[Tuple[ + Optional[int], Optional[Any], Optional[int], Optional[Any] + ]]) List of identical LHS and RHS elements in the same element + positions + """ # Build a parallel index array to track the original RHS element # indexes of any surviving elements. key_attr: str = "" @@ -591,7 +721,17 @@ def synchronize_lods_by_key( @classmethod def _get_key_indicies(cls, data: CommentedMap) -> Dict[Any, int]: - """Get a dictionary mapping of keys to relative positions.""" + """ + Get a dictionary mapping of keys to relative positions. + + Parameters: + 1. path (YAMLPath) YAML Path to the document element under evaluation + 2. lhs (Any) The left-hand-side (original) document + 3. rhs (Any) The right-hand-side (altered) document + + Returns: (Dict[Any, int]) Dictionary indicating the element position + of each key + """ key_map = {} if isinstance(data, CommentedMap): for idx, key in enumerate(data.keys()): diff --git a/yamlpath/differ/differconfig.py b/yamlpath/differ/differconfig.py index fda1b295..ab8249a2 100644 --- a/yamlpath/differ/differconfig.py +++ b/yamlpath/differ/differconfig.py @@ -1,5 +1,5 @@ """ -Config file processor for the Differ. +Implements DifferConfig. Copyright 2020 William W. Kimball, Jr. MBA MSIS """ diff --git a/yamlpath/enums/__init__.py b/yamlpath/enums/__init__.py index 9420583d..2050c804 100644 --- a/yamlpath/enums/__init__.py +++ b/yamlpath/enums/__init__.py @@ -2,6 +2,7 @@ from .anchormatches import AnchorMatches from .collectoroperators import CollectorOperators from .includealiases import IncludeAliases +from .pathsearchkeywords import PathSearchKeywords from .pathsearchmethods import PathSearchMethods from .pathsegmenttypes import PathSegmentTypes from .pathseperators import PathSeperators diff --git a/yamlpath/enums/pathsearchkeywords.py b/yamlpath/enums/pathsearchkeywords.py new file mode 100644 index 00000000..de9af170 --- /dev/null +++ b/yamlpath/enums/pathsearchkeywords.py @@ -0,0 +1,70 @@ +""" +Implements the PathSearchKeywords enumeration. + +Copyright 2021 William W. Kimball, Jr. MBA MSIS +""" +from enum import Enum, auto +from typing import List + + +class PathSearchKeywords(Enum): + """ + Supported keyword methods for searching YAML Path segments. + + These include: + + `HAS_CHILD` + Matches when the node has a direct child with a given name. + + `NAME` + Matches only the key-name or element-index of the present node, + discarding any and all child node data. Can be used to rename the + matched key as long as the new name is unique within the parent, lest + the preexisting node be overwritten. Cannot be used to reassign an + Array/sequence/list element to another position. + + `MAX` + Matches whichever node(s) has/have the maximum value for a named child + key or the maximum value within an Array/sequence/list. When used + against a scalar value, that value is always its own maximum. + + `MIN` + Matches whichever node(s) has/have the minimum value for a named child + key or the minimum value within an Array/sequence/list. When used + against a scalar value, that value is always its own minimum. + + `PARENT` + Access the parent(s) of the present node. + """ + + HAS_CHILD = auto() + NAME = auto() + MAX = auto() + MIN = auto() + PARENT = auto() + + def __str__(self) -> str: + """Get a String representation of an employed value of this enum.""" + keyword = '' + if self is PathSearchKeywords.HAS_CHILD: + keyword = 'has_child' + elif self is PathSearchKeywords.NAME: + keyword = 'name' + elif self is PathSearchKeywords.MAX: + keyword = 'max' + elif self is PathSearchKeywords.MIN: + keyword = 'min' + elif self is PathSearchKeywords.PARENT: + keyword = 'parent' + + return keyword + + @staticmethod + def get_keywords() -> List[str]: + """Return the full list of supported search keywords.""" + return [str(o).lower() for o in PathSearchKeywords] + + @staticmethod + def is_keyword(keyword: str) -> bool: + """Indicate whether keyword is known.""" + return keyword in PathSearchKeywords.get_keywords() diff --git a/yamlpath/enums/pathsearchmethods.py b/yamlpath/enums/pathsearchmethods.py index edf181fb..9e6246ad 100644 --- a/yamlpath/enums/pathsearchmethods.py +++ b/yamlpath/enums/pathsearchmethods.py @@ -9,7 +9,7 @@ class PathSearchMethods(Enum): """ - Supported selfs for searching YAML Path segments. + Supported methods for searching YAML Path segments. These include: @@ -77,7 +77,7 @@ def __str__(self) -> str: @staticmethod def get_operators() -> List[str]: - """Return the full list of suppoerted symbolic search operators.""" + """Return the full list of supported symbolic search operators.""" return [str(o) for o in PathSearchMethods] @staticmethod diff --git a/yamlpath/enums/pathsegmenttypes.py b/yamlpath/enums/pathsegmenttypes.py index ee21f8c0..489d9e00 100644 --- a/yamlpath/enums/pathsegmenttypes.py +++ b/yamlpath/enums/pathsegmenttypes.py @@ -23,6 +23,9 @@ class PathSegmentTypes(Enum): `INDEX` A list element index. + `KEYWORD_SEARCH` + A search based on PathSearchKeywords. + `KEY` A dictionary key name. @@ -41,3 +44,4 @@ class PathSegmentTypes(Enum): KEY = auto() SEARCH = auto() TRAVERSE = auto() + KEYWORD_SEARCH = auto() diff --git a/yamlpath/exceptions/yamlpathexception.py b/yamlpath/exceptions/yamlpathexception.py index 762d0b81..57e496c7 100644 --- a/yamlpath/exceptions/yamlpathexception.py +++ b/yamlpath/exceptions/yamlpathexception.py @@ -1,5 +1,5 @@ """ -Express an issue with a YAML Path. +Implement YAMLPathException. Copyright 2019, 2020 William W. Kimball, Jr. MBA MSIS """ @@ -14,8 +14,9 @@ class YAMLPathException(Exception): YAML node. """ - def __init__(self, user_message: str, yaml_path: str, - segment: Optional[str] = None) -> None: + def __init__( + self, user_message: str, yaml_path: str, segment: Optional[str] = None + ) -> None: """ Initialize this Exception with all pertinent data. diff --git a/yamlpath/eyaml/eyamlprocessor.py b/yamlpath/eyaml/eyamlprocessor.py index 826bd259..3e652a6a 100644 --- a/yamlpath/eyaml/eyamlprocessor.py +++ b/yamlpath/eyaml/eyamlprocessor.py @@ -22,8 +22,9 @@ class EYAMLProcessor(Processor): """Extend Processor to understand EYAML values.""" - def __init__(self, logger: ConsolePrinter, data: Any, - **kwargs: Optional[str]) -> None: + def __init__( + self, logger: ConsolePrinter, data: Any, **kwargs: Optional[str] + ) -> None: """ Instantiate an EYAMLProcessor. @@ -35,15 +36,15 @@ def __init__(self, logger: ConsolePrinter, data: Any, Parameters: 1. logger (ConsolePrinter) Instance of ConsolePrinter or subclass 2. data (Any) Parsed YAML data - 3. **kwargs (Optional[str]) can contain the following keyword - parameters: - * binary (str) The external eyaml command to use when performing - data encryption or decryption; if no path is provided, the - command will be sought on the system PATH. Defaut="eyaml" - * publickey (Optional[str]) Fully-qualified path to the public key - for use with data encryption - * privatekey (Optional[str]) Fully-qualified path to the public key - for use with data decryption + + Keyword Arguments: + * binary (str) The external eyaml command to use when performing + data encryption or decryption; if no path is provided, the + command will be sought on the system PATH. Defaut="eyaml" + * publickey (Optional[str]) Fully-qualified path to the public key + for use with data encryption + * privatekey (Optional[str]) Fully-qualified path to the public key + for use with data decryption Returns: N/A @@ -56,7 +57,7 @@ def __init__(self, logger: ConsolePrinter, data: Any, # pylint: disable=locally-disabled,too-many-branches def _find_eyaml_paths( - self, data: Any, build_path: str = "" + self, data: Any, build_path: str = "" ) -> Generator[YAMLPath, None, None]: """ Find every encrypted value and report each as a YAML Path. @@ -65,8 +66,8 @@ def _find_eyaml_paths( leading to an EYAML value within the evaluated YAML data. Parameters: - 1. data (Any) The parsed YAML data to process - 2. build_path (str) A YAML Path under construction + 1. data (Any) The parsed YAML data to process + 2. build_path (str) A YAML Path under construction Returns: (Generator[Path, None, None]) each YAML Path entry as they are discovered @@ -122,10 +123,10 @@ def decrypt_eyaml(self, value: str) -> str: 1. value (str) The EYAML value to decrypt Returns: (str) The decrypted value or the original value if it was not - actually encrypted. + actually encrypted. Raises: - - `EYAMLCommandException` when the eyaml binary cannot be utilized + - `EYAMLCommandException` when the eyaml binary cannot be utilized """ if not self.is_eyaml_value(value): return value @@ -176,9 +177,10 @@ def decrypt_eyaml(self, value: str) -> str: return retval - def encrypt_eyaml(self, value: str, - output: EYAMLOutputFormats = EYAMLOutputFormats.STRING - ) -> str: + def encrypt_eyaml( + self, value: str, + output: EYAMLOutputFormats = EYAMLOutputFormats.STRING + ) -> str: """ Encrypt a value via EYAML. @@ -187,10 +189,10 @@ def encrypt_eyaml(self, value: str, 2. output (EYAMLOutputFormats) the output format of the encryption Returns: (str) The encrypted result or the original value if it was - already an EYAML encryption. + already an EYAML encryption. Raises: - - `EYAMLCommandException` when the eyaml binary cannot be utilized. + - `EYAMLCommandException` when the eyaml binary cannot be utilized. """ if self.is_eyaml_value(value): return value @@ -248,9 +250,11 @@ def encrypt_eyaml(self, value: str, ) return retval - def set_eyaml_value(self, yaml_path: YAMLPath, value: str, - output: EYAMLOutputFormats = EYAMLOutputFormats.STRING, - mustexist: bool = False) -> None: + def set_eyaml_value( + self, yaml_path: YAMLPath, value: str, + output: EYAMLOutputFormats = EYAMLOutputFormats.STRING, + mustexist: bool = False + ) -> None: """ Encrypt and store a value where specified via YAML Path. @@ -265,7 +269,7 @@ def set_eyaml_value(self, yaml_path: YAMLPath, value: str, Returns: N/A Raises: - - `YAMLPathException` when YAML Path is invalid + - `YAMLPathException` when YAML Path is invalid """ self.logger.verbose( "Encrypting value(s) for {}." @@ -283,9 +287,10 @@ def set_eyaml_value(self, yaml_path: YAMLPath, value: str, value_format=emit_format ) - def get_eyaml_values(self, yaml_path: YAMLPath, mustexist: bool = False, - default_value: str = "" - ) -> Generator[str, None, None]: + def get_eyaml_values( + self, yaml_path: YAMLPath, mustexist: bool = False, + default_value: str = "" + ) -> Generator[str, None, None]: """ Retrieve and decrypt all EYAML nodes identified via a YAML Path. @@ -302,7 +307,7 @@ def get_eyaml_values(self, yaml_path: YAMLPath, mustexist: bool = False, specifies a non-existant node Raises: - - `YAMLPathException` when YAML Path is invalid + - `YAMLPathException` when YAML Path is invalid """ self.logger.verbose( "Decrypting value(s) at {}.".format(yaml_path) @@ -319,7 +324,7 @@ def _can_run_eyaml(self) -> bool: Parameters: N/A Returns: (bool) True when the present eyaml property indicates an - executable; False, otherwise + executable; False, otherwise Raises: N/A """ diff --git a/yamlpath/merger/exceptions/mergeexception.py b/yamlpath/merger/exceptions/mergeexception.py index b2eacfde..aac817fc 100644 --- a/yamlpath/merger/exceptions/mergeexception.py +++ b/yamlpath/merger/exceptions/mergeexception.py @@ -11,8 +11,10 @@ class MergeException(Exception): """Express an issue with a document merge.""" - def __init__(self, user_message: str, - yaml_path: Optional[Union[YAMLPath, str]] = None) -> None: + def __init__( + self, user_message: str, + yaml_path: Optional[Union[YAMLPath, str]] = None + ) -> None: """ Initialize this Exception with all pertinent data. diff --git a/yamlpath/merger/merger.py b/yamlpath/merger/merger.py index cab4ee6d..0c36e90b 100644 --- a/yamlpath/merger/merger.py +++ b/yamlpath/merger/merger.py @@ -28,14 +28,14 @@ class Merger: """Performs YAML document merges.""" - DEPRECATION_WARNING = ("WARNING: Deprecated methods will be removed in" - " the next major release of yamlpath. Please refer" - " to the CHANGES file for more information (and how" - " to get rid of this message).") + DEPRECATION_WARNING = ( + "WARNING: Deprecated methods will be removed in the next major" + " release of yamlpath. Please refer to the CHANGES file for more" + " information (and how to get rid of this message).") depwarn_printed = False def __init__( - self, logger: ConsolePrinter, lhs: Any, config: MergerConfig + self, logger: ConsolePrinter, lhs: Any, config: MergerConfig ) -> None: """ Instantiate this class into an object. @@ -107,7 +107,7 @@ def _merge_dicts( 3. path (YAMLPath) Location within the DOM where this merge is taking place. - Keyword Parameters: + Keyword Arguments: * parent (Any) Parent node of `rhs` * parentref (Any) Child Key or Index of `rhs` within `parent`. @@ -264,7 +264,7 @@ def _merge_simple_lists( 4. node_coord (NodeCoords) The RHS root node, its parent, and reference within its parent; used for config lookups. - Returns: (list) The merged result. + Returns: (CommentedSeq) The merged result. Raises: - `MergeException` when a clean merge is impossible. @@ -409,7 +409,7 @@ def _merge_lists( 2. rhs (CommentedSeq) The list to merge from. 3. path (YAMLPath) Location of the `rhs` source list within its DOM. - Keyword Parameters: + Keyword Arguments: * parent (Any) Parent node of `rhs` * parentref (Any) Child Key or Index of `rhs` within `parent`. diff --git a/yamlpath/merger/mergerconfig.py b/yamlpath/merger/mergerconfig.py index 0ef81c3e..8c9a187e 100644 --- a/yamlpath/merger/mergerconfig.py +++ b/yamlpath/merger/mergerconfig.py @@ -1,5 +1,5 @@ """ -Config file processor for the Merger. +Implement MergerConfig. Copyright 2020 William W. Kimball, Jr. MBA MSIS """ @@ -41,7 +41,14 @@ def __init__(self, logger: ConsolePrinter, args: Namespace) -> None: self._load_config() def anchor_merge_mode(self) -> AnchorConflictResolutions: - """Get Anchor merge mode.""" + """ + Get Anchor merge mode. + + Parameters: N/A + + Returns: (AnchorConflictResolutions) Resolved method for handling + YAML Anchor conflicts. + """ # Precedence: CLI > config[defaults] > default if hasattr(self.args, "anchors") and self.args.anchors: return AnchorConflictResolutions.from_str(self.args.anchors) @@ -178,13 +185,26 @@ def prepare(self, data: Any) -> None: self._prepare_user_rules(proc, merge_path, "keys", self.keys) def get_insertion_point(self) -> YAMLPath: - """Get the YAML Path at which merging shall be performed.""" + """ + Get the YAML Path at which merging shall be performed. + + Parameters: N/A + + Returns: (YAMLPath) User-specified point(s) within the document where + the RHS document is directed to be merged-in. + """ if hasattr(self.args, "mergeat"): return YAMLPath(self.args.mergeat) return YAMLPath("/") def get_document_format(self) -> OutputDocTypes: - """Get the user-desired output format.""" + """ + Get the user-desired output format. + + Paramerers: N/A + + Returns: (OutputDocTypes) The destination document type + """ if hasattr(self.args, "document_format"): return OutputDocTypes.from_str(self.args.document_format) return OutputDocTypes.AUTO diff --git a/yamlpath/path/__init__.py b/yamlpath/path/__init__.py index d68851ed..7d06a926 100644 --- a/yamlpath/path/__init__.py +++ b/yamlpath/path/__init__.py @@ -1,3 +1,4 @@ """Make all of the YAML Path components available.""" from .collectorterms import CollectorTerms +from .searchkeywordterms import SearchKeywordTerms from .searchterms import SearchTerms diff --git a/yamlpath/path/collectorterms.py b/yamlpath/path/collectorterms.py index 409635bf..f49f77ee 100644 --- a/yamlpath/path/collectorterms.py +++ b/yamlpath/path/collectorterms.py @@ -9,9 +9,10 @@ class CollectorTerms: """YAML Path Collector segment terms.""" - def __init__(self, expression: str, - operation: CollectorOperators = CollectorOperators.NONE - ) -> None: + def __init__( + self, expression: str, + operation: CollectorOperators = CollectorOperators.NONE + ) -> None: """ Instantiate a Collector Term. diff --git a/yamlpath/path/searchkeywordterms.py b/yamlpath/path/searchkeywordterms.py new file mode 100644 index 00000000..a80700c5 --- /dev/null +++ b/yamlpath/path/searchkeywordterms.py @@ -0,0 +1,139 @@ +""" +Implement SearchKeywordTerms. + +Copyright 2021 William W. Kimball, Jr. MBA MSIS +""" +from typing import List + +from yamlpath.enums import PathSearchKeywords + + +class SearchKeywordTerms: + """YAML path Search Keyword segment terms.""" + + def __init__( + self, inverted: bool, keyword: PathSearchKeywords, parameters: str + ) -> None: + """ + Instantiate a Keyword Search Term segment. + + Parameters: + 1. inverted (bool) true = invert the search operation; false, otherwise + 2. keyword (PathSearchKeywords) the search keyword + 3. parameters (str) the parameters to the keyword-named operation + """ + self._inverted: bool = inverted + self._keyword: PathSearchKeywords = keyword + self._parameters: str = parameters + self._lparameters: List[str] = [] + self._parameters_parsed: bool = False + + def __str__(self) -> str: + """Get a String representation of this Keyword Search Term.""" + return ( + "[" + + ("!" if self._inverted else "") + + str(self._keyword) + + "(" + + self._parameters + + ")]" + ) + + @property + def inverted(self) -> bool: + """ + Access the inversion flag for this Keyword Search. + + This indicates whether the search logic is to be inverted. + """ + return self._inverted + + @property + def keyword(self) -> PathSearchKeywords: + """ + Access the search keyword. + + This indicates what kind of search logic is to be performed. + """ + return self._keyword + + @property + # pylint: disable=locally-disabled,too-many-branches + def parameters(self) -> List[str]: + """Accessor for the parameters being fed to the search operation.""" + if self._parameters_parsed: + return self._lparameters + + if self._parameters is None: + self._parameters_parsed = True + self._lparameters = [] + return self._lparameters + + param: str = "" + params: List[str] = [] + escape_next: bool = False + demarc_stack: List[str] = [] + demarc_count: int = 0 + + # pylint: disable=locally-disabled,too-many-nested-blocks + for char in self._parameters: + demarc_count = len(demarc_stack) + + if escape_next: + # Pass-through; capture this escaped character + escape_next = False + + elif char == "\\": + escape_next = True + continue + + elif ( + char == " " + and (demarc_count < 1) + ): + # Ignore unescaped, non-demarcated whitespace + continue + + elif char in ['"', "'"]: + # Found a string demarcation mark + if demarc_count > 0: + # Already appending to an ongoing demarcated value + if char == demarc_stack[-1]: + # Close a matching pair + demarc_stack.pop() + demarc_count -= 1 + + if demarc_count < 1: + # Final close; seek the next delimiter + continue + + else: + # Embed a nested, demarcated component + demarc_stack.append(char) + demarc_count += 1 + else: + # Fresh demarcated value + demarc_stack.append(char) + demarc_count += 1 + continue + + elif demarc_count < 1 and char == ",": + params.append(param) + param = "" + continue + + param = param + char + + # Check for mismatched demarcations + if demarc_count > 0: + raise ValueError( + "Keyword search parameters contain one or more unmatched" + " demarcation symbol(s): {}".format(" ".join(demarc_stack))) + + # Add the last parameter, if there is one + if param: + params.append(param) + + self._lparameters = params + self._parameters_parsed = True + return self._lparameters diff --git a/yamlpath/path/searchterms.py b/yamlpath/path/searchterms.py index e5cd665e..3b35b001 100644 --- a/yamlpath/path/searchterms.py +++ b/yamlpath/path/searchterms.py @@ -9,8 +9,10 @@ class SearchTerms: """YAML path Search segment terms.""" - def __init__(self, inverted: bool, method: PathSearchMethods, - attribute: str, term: str) -> None: + def __init__( + self, inverted: bool, method: PathSearchMethods, attribute: str, + term: str + ) -> None: """ Instantiate a Search Term. diff --git a/yamlpath/processor.py b/yamlpath/processor.py index 8c888e9a..df125a34 100644 --- a/yamlpath/processor.py +++ b/yamlpath/processor.py @@ -2,18 +2,22 @@ """ YAML Path processor based on ruamel.yaml. -Copyright 2018, 2019, 2020 William W. Kimball, Jr. MBA MSIS +Copyright 2018, 2019, 2020, 2021 William W. Kimball, Jr. MBA MSIS """ from typing import Any, Dict, Generator, List, Union -from yamlpath.common import Anchors, Nodes, Searches +from ruamel.yaml.comments import CommentedMap + +from yamlpath.types import AncestryEntry, PathAttributes, PathSegment +from yamlpath.common import Anchors, KeywordSearches, Nodes, Searches from yamlpath import YAMLPath -from yamlpath.path import SearchTerms, CollectorTerms +from yamlpath.path import SearchKeywordTerms, SearchTerms, CollectorTerms from yamlpath.wrappers import ConsolePrinter, NodeCoords from yamlpath.exceptions import YAMLPathException from yamlpath.enums import ( YAMLValueFormats, PathSegmentTypes, + PathSearchKeywords, CollectorOperators, PathSeperators, ) @@ -37,15 +41,16 @@ def __init__(self, logger: ConsolePrinter, data: Any) -> None: self.logger: ConsolePrinter = logger self.data: Any = data - def get_nodes(self, yaml_path: Union[YAMLPath, str], - **kwargs: Any) -> Generator[Any, None, None]: + def get_nodes( + self, yaml_path: Union[YAMLPath, str], **kwargs: Any + ) -> Generator[Any, None, None]: """ Get nodes at YAML Path in data. Parameters: 1. yaml_path (Union[YAMLPath, str]) The YAML Path to evaluate - Keyword Parameters: + Keyword Arguments: * mustexist (bool) Indicate whether yaml_path must exist in data prior to this query (lest an Exception be raised); default=False @@ -76,6 +81,13 @@ def get_nodes(self, yaml_path: Union[YAMLPath, str], elif pathsep is not PathSeperators.AUTO: yaml_path.seperator = pathsep + self.logger.debug( + "Processing YAML Path:", + prefix="Processor::get_nodes: ", data={ + 'path': yaml_path, + 'segments': yaml_path.escaped + }) + if mustexist: matched_nodes: int = 0 for node_coords in self._get_required_nodes(self.data, yaml_path): @@ -99,8 +111,9 @@ def get_nodes(self, yaml_path: Union[YAMLPath, str], prefix="Processor::get_nodes: ", data=opt_node) yield opt_node - def set_value(self, yaml_path: Union[YAMLPath, str], - value: Any, **kwargs) -> None: + def set_value( + self, yaml_path: Union[YAMLPath, str], value: Any, **kwargs + ) -> None: """ Set the value of zero or more nodes at YAML Path in YAML data. @@ -108,7 +121,7 @@ def set_value(self, yaml_path: Union[YAMLPath, str], 1. yaml_path (Union[Path, str]) The YAML Path to evaluate 2. value (Any) The value to set - Keyword Parameters: + Keyword Arguments: * mustexist (bool) Indicate whether yaml_path must exist in data prior to this query (lest an Exception be raised); default=False @@ -150,15 +163,8 @@ def set_value(self, yaml_path: Union[YAMLPath, str], found_nodes: int = 0 for req_node in self._get_required_nodes(self.data, yaml_path): found_nodes += 1 - try: - self._update_node( - req_node.parent, req_node.parentref, value, - value_format, tag) - except ValueError as vex: - raise YAMLPathException( - "Impossible to write '{}' as {}. The error was: {}" - .format(value, value_format, str(vex)) - , str(yaml_path)) from vex + self._apply_change(yaml_path, req_node, value, + value_format=value_format, tag=tag) if found_nodes < 1: raise YAMLPathException( @@ -173,23 +179,115 @@ def set_value(self, yaml_path: Union[YAMLPath, str], for node_coord in self._get_optional_nodes( self.data, yaml_path, value ): + self._apply_change(yaml_path, node_coord, value, + value_format=value_format, tag=tag) + + # pylint: disable=locally-disabled,too-many-locals,too-many-branches + def _apply_change( + self, yaml_path: YAMLPath, node_coord: NodeCoords, value: Any, + **kwargs: Any + ) -> None: + """ + Apply a controlled change to the document via gathered NodeCoords. + + Parameters: + 1. yaml_path (YAMLPath) The YAML Path causing this change. + 2. node_coord (NodeCoords) The data node to affect. + 3. value (Any) The value to apply. + + Keyword Arguments: + * value_format (YAMLValueFormats) The demarcation or visual + representation to use when writing the data; + default=YAMLValueFormats.DEFAULT + * tag (str) Custom data-type tag to assign + + Returns: N/A + + Raises: + - YAMLPathException when the attempted change is impossible + """ + value_format: YAMLValueFormats = kwargs.pop("value_format", + YAMLValueFormats.DEFAULT) + tag: str = kwargs.pop("tag", None) + + self.logger.debug(( + "Attempting to change a node coordinate of type {} to value with" + " format <{}>:" + ).format(type(node_coord), value_format), + data={ + "value": value, + "node_coord": node_coord + }, prefix="Processor::_apply_change: ") + + if isinstance(node_coord.node, NodeCoords): + self.logger.debug( + "Unpacked Collector results to apply change:" + , data=node_coord.node + , prefix="Processor::_apply_change: ") + self._apply_change(yaml_path, node_coord.node, value, **kwargs) + + if (isinstance(node_coord.node, list) + and len(node_coord.node) > 0 + and isinstance(node_coord.node[0], NodeCoords) + ): + for collector_node in node_coord.node: self.logger.debug( - "Matched optional node coordinate:" - , data=node_coord - , prefix="Processor::set_value: ") - self.logger.debug( - "Setting its value with format {} to:".format(value_format) - , data=value - , prefix="Processor::set_value: ") - try: - self._update_node( - node_coord.parent, node_coord.parentref, value, - value_format, tag) - except ValueError as vex: - raise YAMLPathException( - "Impossible to write '{}' as {}. The error was: {}" - .format(value, value_format, str(vex)) - , str(yaml_path)) from vex + "Expanded collected Collector results to apply change:" + , data=collector_node + , prefix="Processor::_apply_change: ") + self._apply_change(yaml_path, collector_node, value, **kwargs) + return + + last_segment = node_coord.path_segment + if last_segment is not None: + (_, segment_value) = last_segment + if ( + isinstance(segment_value, SearchKeywordTerms) + and segment_value.keyword is PathSearchKeywords.NAME + ): + # Rename a key; the new name must not already exist in its + # parent. + parent = node_coord.parent + parentref = node_coord.parentref + if isinstance(parent, CommentedMap): + if value in parent: + raise YAMLPathException(( + "Key, {}, already exists at the same document" + " level in YAML Path" + ).format(value), str(yaml_path)) + + for i, k in [ + (idx, key) for idx, key + in enumerate(parent.keys()) + if key == parentref + ]: + parent.insert(i, value, parent.pop(k)) + break + elif isinstance(parent, dict): + if value in parent: + raise YAMLPathException(( + "Key, {}, already exists at the same document" + " level in YAML Path" + ).format(value), str(yaml_path)) + + parent[value] = parent[parentref] + del parent[parentref] + else: + raise YAMLPathException(( + "Keys can be renamed only in Hash/map/dict" + " data; got a {}, instead." + ).format(type(parent)), str(yaml_path)) + return + + try: + self._update_node( + node_coord.parent, node_coord.parentref, value, + value_format, tag) + except ValueError as vex: + raise YAMLPathException( + "Impossible to write '{}' as {}. The error was: {}" + .format(value, value_format, str(vex)) + , str(yaml_path)) from vex def _get_anchor_node( self, anchor_path: Union[YAMLPath, str], **kwargs: Any @@ -203,7 +301,7 @@ def _get_anchor_node( will result in a YAMLPathException because YAML does not define Aliases for more than one Anchor. - Keyword Parameters: + Keyword Arguments: * anchor_name (str) Alternate name to use for the YAML Anchor and its Aliases. @@ -278,7 +376,7 @@ def alias_nodes( will result in a YAMLPathException because YAML does not define Aliases for more than one Anchor. - Keyword Parameters: + Keyword Arguments: * pathsep (PathSeperators) Forced YAML Path segment seperator; set only when automatic inference fails; default = PathSeperators.AUTO @@ -326,7 +424,18 @@ def alias_gathered_nodes( Assign a YAML Anchor to zero or more YAML Alias nodes. Parameters: - 1. gathered_nodes (List[NodeCoords]) The pre-gathered nodes to assign. + 1. gathered_nodes (List[NodeCoords]) The pre-gathered nodes to assign + 2. anchor_path (Union[YAMLPath, str]) YAML Path to the source Anchor + + Keyword Arguments: + * pathsep (PathSeperators) Forced YAML Path segment seperator; set + only when automatic inference fails; + default = PathSeperators.AUTO + * anchor_name (str) Override automatic anchor name; use this, instead + + Returns: N/A + + Raises: N/A """ pathsep: PathSeperators = kwargs.pop("pathsep", PathSeperators.AUTO) anchor_name: str = kwargs.pop("anchor_name", "") @@ -344,7 +453,7 @@ def alias_gathered_nodes( self._alias_nodes(gathered_nodes, anchor_node) def _alias_nodes( - self, gathered_nodes: List[NodeCoords], anchor_node: Any + self, gathered_nodes: List[NodeCoords], anchor_node: Any ) -> None: """ Assign a YAML Anchor to its various YAML Alias nodes. @@ -374,7 +483,7 @@ def tag_nodes( 1. yaml_path (Union[YAMLPath, str]) The YAML Path to evaluate 2. tag (str) The tag to assign - Keyword Parameters: + Keyword Arguments: * pathsep (PathSeperators) Forced YAML Path segment seperator; set only when automatic inference fails; default = PathSeperators.AUTO @@ -382,7 +491,7 @@ def tag_nodes( Returns: N/A Raises: - - `YAMLPathException` when YAML Path is invalid + - `YAMLPathException` when YAML Path is invalid """ pathsep: PathSeperators = kwargs.pop("pathsep", PathSeperators.AUTO) @@ -410,7 +519,15 @@ def tag_nodes( def tag_gathered_nodes( self, gathered_nodes: List[NodeCoords], tag: str ) -> None: - """Assign a data-type tag to a set of nodes.""" + """ + Assign a data-type tag to a set of nodes. + + Parameters: + 1. gathered_nodes (List[NodeCoords]) The nodes to affect + 2. tag (str) The tag to assign + + Returns: N/A + """ # A YAML tag must be prefixed via at least one bang (!) if tag and not tag[0] == "!": tag = "!{}".format(tag) @@ -427,15 +544,16 @@ def tag_gathered_nodes( self.data, old_node, node_coord.parent[node_coord.parentref]) - def delete_nodes(self, yaml_path: Union[YAMLPath, str], - **kwargs: Any) -> Generator[NodeCoords, None, None]: + def delete_nodes( + self, yaml_path: Union[YAMLPath, str], **kwargs: Any + ) -> Generator[NodeCoords, None, None]: """ Gather and delete nodes at YAML Path in data. Parameters: 1. yaml_path (Union[YAMLPath, str]) The YAML Path to evaluate - Keyword Parameters: + Keyword Arguments: * pathsep (PathSeperators) Forced YAML Path segment seperator; set only when automatic inference fails; default = PathSeperators.AUTO @@ -489,8 +607,8 @@ def _delete_nodes(self, delete_nodes: List[NodeCoords]) -> None: 1. delete_nodes (List[NodeCoords]) The nodes to delete. Raises: - - `YAMLPathException` when the operation would destroy the entire - document + - `YAMLPathException` when the operation would destroy the entire + document """ for delete_nc in reversed(delete_nodes): node = delete_nc.node @@ -526,10 +644,9 @@ def _delete_nodes(self, delete_nodes: List[NodeCoords]) -> None: ) # pylint: disable=locally-disabled,too-many-branches,too-many-locals - def _get_nodes_by_path_segment(self, data: Any, - yaml_path: YAMLPath, segment_index: int, - **kwargs: Any - ) -> Generator[Any, None, None]: + def _get_nodes_by_path_segment( + self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs: Any + ) -> Generator[Any, None, None]: """ Get nodes identified by their YAML Path segment. @@ -547,6 +664,10 @@ def _get_nodes_by_path_segment(self, data: Any, * parentref (Any) The Index or Key of data within parent * traverse_lists (Boolean) Indicate whether KEY searches against lists are permitted to automatically traverse into the list; Default=True + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation Returns: (Generator[Any, None, None]) Each node coordinate or list of node coordinates as they are matched. You must check with isinstance() @@ -554,21 +675,14 @@ def _get_nodes_by_path_segment(self, data: Any, List[NodeCoords]. Raises: - - `NotImplementedError` when the segment indicates an unknown - PathSegmentTypes value. + - `NotImplementedError` when the segment indicates an unknown + PathSegmentTypes value. """ - parent = kwargs.pop("parent", None) - parentref = kwargs.pop("parentref", None) - traverse_lists = kwargs.pop("traverse_lists", True) - translated_path = kwargs.pop("translated_path", YAMLPath("")) - if data is None: - self.logger.debug( - "Bailing out on None data at parentref, {}, of parent:" - .format(parentref), - prefix="Processor::_get_nodes_by_path_segment: ", - data=parent) - return - + parent: Any = kwargs.pop("parent", None) + parentref: Any = kwargs.pop("parentref", None) + traverse_lists: bool = kwargs.pop("traverse_lists", True) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) segments = yaml_path.escaped if not (segments and len(segments) > segment_index): self.logger.debug( @@ -578,8 +692,9 @@ def _get_nodes_by_path_segment(self, data: Any, data=segments) return + pathseg: PathSegment = yaml_path.unescaped[segment_index] + (unesc_type, unesc_attrs) = pathseg (segment_type, stripped_attrs) = segments[segment_index] - (unesc_type, unesc_attrs) = yaml_path.unescaped[segment_index] # Disallow traversal recursion (because it creates a denial-of-service) if segment_index > 0 and segment_type == PathSegmentTypes.TRAVERSE: @@ -595,33 +710,45 @@ def _get_nodes_by_path_segment(self, data: Any, if segment_type == PathSegmentTypes.KEY: node_coords = self._get_nodes_by_key( data, yaml_path, segment_index, traverse_lists=traverse_lists, - translated_path=translated_path) + translated_path=translated_path, ancestry=ancestry) elif segment_type == PathSegmentTypes.INDEX: node_coords = self._get_nodes_by_index( data, yaml_path, segment_index, - translated_path=translated_path) + translated_path=translated_path, ancestry=ancestry) elif segment_type == PathSegmentTypes.ANCHOR: node_coords = self._get_nodes_by_anchor( data, yaml_path, segment_index, - translated_path=translated_path) + translated_path=translated_path, ancestry=ancestry) + elif ( + segment_type == PathSegmentTypes.KEYWORD_SEARCH + and isinstance(stripped_attrs, SearchKeywordTerms) + ): + node_coords = self._get_nodes_by_keyword_search( + data, yaml_path, stripped_attrs, parent=parent, + parentref=parentref, traverse_lists=traverse_lists, + translated_path=translated_path, ancestry=ancestry, + relay_segment=pathseg) elif ( segment_type == PathSegmentTypes.SEARCH and isinstance(stripped_attrs, SearchTerms) ): node_coords = self._get_nodes_by_search( data, stripped_attrs, parent=parent, parentref=parentref, - traverse_lists=traverse_lists, translated_path=translated_path) + traverse_lists=traverse_lists, translated_path=translated_path, + ancestry=ancestry) elif ( unesc_type == PathSegmentTypes.COLLECTOR and isinstance(unesc_attrs, CollectorTerms) ): node_coords = self._get_nodes_by_collector( data, yaml_path, segment_index, unesc_attrs, parent=parent, - parentref=parentref, translated_path=translated_path) + parentref=parentref, translated_path=translated_path, + ancestry=ancestry) elif segment_type == PathSegmentTypes.TRAVERSE: node_coords = self._get_nodes_by_traversal( data, yaml_path, segment_index, parent=parent, - parentref=parentref, translated_path=translated_path) + parentref=parentref, translated_path=translated_path, + ancestry=ancestry) else: raise NotImplementedError @@ -629,8 +756,7 @@ def _get_nodes_by_path_segment(self, data: Any, yield node_coord def _get_nodes_by_key( - self, data: Any, yaml_path: YAMLPath, segment_index: int, - **kwargs: Any + self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs: Any ) -> Generator[NodeCoords, None, None]: """ Get nodes from a Hash by their unique key name. @@ -646,16 +772,22 @@ def _get_nodes_by_key( Keyword Arguments: * traverse_lists (Boolean) Indicate whether KEY searches against lists are permitted to automatically traverse into the list; Default=True + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation Returns: (Generator[NodeCoords, None, None]) Each NodeCoords as they are matched Raises: N/A """ - traverse_lists = kwargs.pop("traverse_lists", True) - translated_path = kwargs.pop("translated_path", YAMLPath("")) + traverse_lists: bool = kwargs.pop("traverse_lists", True) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) - (_, stripped_attrs) = yaml_path.escaped[segment_index] + pathseg: PathSegment = yaml_path.escaped[segment_index] + (_, stripped_attrs) = pathseg str_stripped = str(stripped_attrs) self.logger.debug( @@ -666,6 +798,8 @@ def _get_nodes_by_key( next_translated_path = (translated_path + YAMLPath.escape_path_section( str_stripped, translated_path.seperator)) + next_ancestry: List[AncestryEntry] = ancestry + [ + (data, stripped_attrs)] if stripped_attrs in data: self.logger.debug( "Processor::_get_nodes_by_key: FOUND key node by name at" @@ -673,14 +807,15 @@ def _get_nodes_by_key( .format(str_stripped)) yield NodeCoords( data[stripped_attrs], data, stripped_attrs, - next_translated_path) + next_translated_path, next_ancestry, pathseg) else: # Check for a string/int type mismatch try: intkey = int(str_stripped) if intkey in data: yield NodeCoords( - data[intkey], data, intkey, next_translated_path) + data[intkey], data, intkey, next_translated_path, + ancestry + [(data, intkey)], pathseg) except ValueError: pass elif isinstance(data, list): @@ -694,7 +829,8 @@ def _get_nodes_by_key( .format(str_stripped)) yield NodeCoords( data[idx], data, idx, - translated_path + "[{}]".format(idx)) + translated_path + "[{}]".format(idx), + ancestry + [(data, idx)], pathseg) except ValueError: # Pass-through search against possible Array-of-Hashes, if # allowed. @@ -707,10 +843,12 @@ def _get_nodes_by_key( for eleidx, element in enumerate(data): next_translated_path = translated_path + "[{}]".format( eleidx) + next_ancestry = ancestry + [(data, stripped_attrs)] for node_coord in self._get_nodes_by_path_segment( element, yaml_path, segment_index, parent=data, parentref=eleidx, traverse_lists=traverse_lists, - translated_path=next_translated_path): + translated_path=next_translated_path, + ancestry=next_ancestry): self.logger.debug( "Processor::_get_nodes_by_key: FOUND key node " " via pass-through Array-of-Hashes search at {}." @@ -719,7 +857,7 @@ def _get_nodes_by_key( # pylint: disable=locally-disabled,too-many-locals def _get_nodes_by_index( - self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs + self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs ) -> Generator[NodeCoords, None, None]: """ Get nodes from a List by their index. @@ -730,18 +868,27 @@ def _get_nodes_by_index( Parameters: 1. data (Any) The parsed YAML data to process - 2. yaml_path (Path) The YAML Path being processed + 2. yaml_path (YAMLPath) The YAML Path being processed 3. segment_index (int) Segment index of the YAML Path to process + Keyword Arguments: + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation + Returns: (Generator[NodeCoords, None, None]) Each NodeCoords as they - are matched + are matched Raises: N/A """ - (_, stripped_attrs) = yaml_path.escaped[segment_index] + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) + + pathseg: PathSegment = yaml_path.escaped[segment_index] + (_, stripped_attrs) = pathseg (_, unstripped_attrs) = yaml_path.unescaped[segment_index] str_stripped = str(stripped_attrs) - translated_path = kwargs.pop("translated_path", YAMLPath("")) self.logger.debug( "Processor::_get_nodes_by_index: Seeking INDEX node at {}." @@ -767,16 +914,19 @@ def _get_nodes_by_index( if intmin == intmax and len(data) > intmin: yield NodeCoords( [data[intmin]], data, intmin, - translated_path + "[{}]".format(intmin)) + translated_path + "[{}]".format(intmin), + ancestry + [(data, intmin)], pathseg) else: sliced_elements = [] for slice_index in range(intmin, intmax): sliced_elements.append(NodeCoords( data[slice_index], data, intmin, - translated_path + "[{}]".format(slice_index))) + translated_path + "[{}]".format(slice_index), + ancestry + [(data, slice_index)], pathseg)) yield NodeCoords( sliced_elements, data, intmin, - translated_path + "[{}:{}]".format(intmin, intmax)) + translated_path + "[{}:{}]".format(intmin, intmax), + ancestry + [(data, intmin)], pathseg) elif isinstance(data, dict): for key, val in data.items(): @@ -784,7 +934,8 @@ def _get_nodes_by_index( yield NodeCoords( val, data, key, translated_path + YAMLPath.escape_path_section( - key, translated_path.seperator)) + key, translated_path.seperator), + ancestry + [(data, key)], pathseg) else: try: idx: int = int(str_stripped) @@ -798,10 +949,11 @@ def _get_nodes_by_index( if isinstance(data, list) and len(data) > idx: yield NodeCoords( - data[idx], data, idx, translated_path + "[{}]".format(idx)) + data[idx], data, idx, translated_path + "[{}]".format(idx), + ancestry + [(data, idx)], pathseg) def _get_nodes_by_anchor( - self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs + self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs ) -> Generator[NodeCoords, None, None]: """ Get nodes matching an Anchor name. @@ -811,16 +963,25 @@ def _get_nodes_by_anchor( Parameters: 1. data (Any) The parsed YAML data to process - 2. yaml_path (Path) The YAML Path being processed + 2. yaml_path (YAMLPath) The YAML Path being processed 3. segment_index (int) Segment index of the YAML Path to process + Keyword Arguments: + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation + Returns: (Generator[NodeCoords, None, None]) Each NodeCoords as they are matched Raises: N/A """ - (_, stripped_attrs) = yaml_path.escaped[segment_index] - translated_path = kwargs.pop("translated_path", YAMLPath("")) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) + + pathseg: PathSegment = yaml_path.escaped[segment_index] + (_, stripped_attrs) = pathseg next_translated_path = translated_path + "[&{}]".format( YAMLPath.escape_path_section( str(stripped_attrs), translated_path.seperator)) @@ -833,19 +994,67 @@ def _get_nodes_by_anchor( for lstidx, ele in enumerate(data): if (hasattr(ele, "anchor") and stripped_attrs == ele.anchor.value): - yield NodeCoords(ele, data, lstidx, next_translated_path) + yield NodeCoords(ele, data, lstidx, next_translated_path, + ancestry + [(data, lstidx)], pathseg) elif isinstance(data, dict): for key, val in data.items(): + next_ancestry = ancestry + [(data, key)] if (hasattr(key, "anchor") and stripped_attrs == key.anchor.value): - yield NodeCoords(val, data, key, next_translated_path) + yield NodeCoords( + val, data, key, next_translated_path, + next_ancestry, pathseg) elif (hasattr(val, "anchor") and stripped_attrs == val.anchor.value): - yield NodeCoords(val, data, key, next_translated_path) + yield NodeCoords( + val, data, key, next_translated_path, + next_ancestry, pathseg) + + def _get_nodes_by_keyword_search( + self, data: Any, yaml_path: YAMLPath, terms: SearchKeywordTerms, + **kwargs: Any + ) -> Generator[NodeCoords, None, None]: + """ + Perform a search identified by a keyword and its parameters. + + Parameters: + 1. data (Any) The parsed YAML data to process + 2. yaml_path (YAMLPath) The YAML Path being processed + 3. terms (SearchKeywordTerms) The keyword search terms + + Keyword Arguments: + * parent (ruamel.yaml node) The parent node from which this query + originates + * parentref (Any) The Index or Key of data within parent + * traverse_lists (Boolean) Indicate whether searches against lists are + permitted to automatically traverse into the list; Default=True + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation + + Returns: (Generator[NodeCoords, None, None]) Each NodeCoords as they + are matched + + Raises: N/A + """ + self.logger.debug( + "Seeking KEYWORD_SEARCH nodes matching {} in data:".format(terms), + data=data, + prefix="Processor::_get_nodes_by_keyword_search: ") + + for res_nc in KeywordSearches.search_matches( + terms, data, yaml_path, **kwargs + ): + self.logger.debug( + "Yielding keyword search match:", + data=res_nc, + prefix="Processor::_get_nodes_by_keyword_search: ") + yield res_nc # pylint: disable=too-many-statements def _get_nodes_by_search( - self, data: Any, terms: SearchTerms, **kwargs: Any + self, data: Any, terms: SearchTerms, **kwargs: Any ) -> Generator[NodeCoords, None, None]: """ Get nodes matching a search expression. @@ -863,6 +1072,10 @@ def _get_nodes_by_search( * parentref (Any) The Index or Key of data within parent * traverse_lists (Boolean) Indicate whether searches against lists are permitted to automatically traverse into the list; Default=True + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation Returns: (Generator[NodeCoords, None, None]) Each NodeCoords as they are matched @@ -874,16 +1087,20 @@ def _get_nodes_by_search( data=data, prefix="Processor::_get_nodes_by_search: ") - parent = kwargs.pop("parent", None) - parentref = kwargs.pop("parentref", None) - traverse_lists = kwargs.pop("traverse_lists", True) - translated_path = kwargs.pop("translated_path", YAMLPath("")) + parent: Any = kwargs.pop("parent", None) + parentref: Any = kwargs.pop("parentref", None) + traverse_lists: bool = kwargs.pop("traverse_lists", True) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + pathseg: PathSegment = (PathSegmentTypes.SEARCH, terms) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) + invert = terms.inverted method = terms.method attr = terms.attribute term = terms.term matches = False desc_path = YAMLPath(attr) + debug_matched = "NO MATCHES YIELDED" if isinstance(data, list): if not traverse_lists: self.logger.debug( @@ -900,28 +1117,37 @@ def _get_nodes_by_search( # Attempt a descendant search next_translated_path = translated_path + "[{}]".format( lstidx) + next_ancestry = ancestry + [(data, lstidx)] for desc_node in self._get_required_nodes( - ele, desc_path, 0, translated_path=next_translated_path + ele, desc_path, 0, + translated_path=next_translated_path, + ancestry=next_ancestry, relay_segment=pathseg ): matches = Searches.search_matches( method, term, desc_node.node) break if (matches and not invert) or (invert and not matches): + debug_matched = "one list match yielded" self.logger.debug( "Yielding list match at index {}:".format(lstidx), data=ele, prefix="Processor::_get_nodes_by_search: ") yield NodeCoords( ele, data, lstidx, - translated_path + "[{}]".format(lstidx)) + translated_path + "[{}]".format(lstidx), + ancestry + [(data, lstidx)], pathseg) elif isinstance(data, dict): # Allow . to mean "each key's name" if attr == '.': + self.logger.debug( + "Scanning every key's name...", + prefix="Processor::_get_nodes_by_search: ") for key, val in data.items(): matches = Searches.search_matches(method, term, key) if (matches and not invert) or (invert and not matches): + debug_matched = "one dictionary key name match yielded" self.logger.debug( "Yielding dictionary key name match against '{}':" .format(key), @@ -930,12 +1156,18 @@ def _get_nodes_by_search( yield NodeCoords( val, data, key, translated_path + YAMLPath.escape_path_section( - key, translated_path.seperator)) + key, translated_path.seperator), + ancestry + [(data, key)], pathseg) elif attr in data: value = data[attr] matches = Searches.search_matches(method, term, value) + self.logger.debug( + "Scanning for an attribute match against {}, which {}." + .format(attr, "matches" if matches else "does not match"), + prefix="Processor::_get_nodes_by_search: ") if (matches and not invert) or (invert and not matches): + debug_matched = "one dictionary attribute match yielded" self.logger.debug( "Yielding dictionary attribute match against '{}':" .format(attr), @@ -944,34 +1176,70 @@ def _get_nodes_by_search( yield NodeCoords( value, data, attr, translated_path + YAMLPath.escape_path_section( - attr, translated_path.seperator)) + attr, translated_path.seperator), + ancestry + [(data, attr)], pathseg) else: - # Attempt a descendant search + # Attempt a descendant search; return every node which has ANY + # descendent matching the search expression. + self.logger.debug(( + "Attempting a descendant search against data at" + " desc_path={}, translated_path={}:" + ).format(desc_path, translated_path), + prefix="Processor::_get_nodes_by_search: ", + data=data) for desc_node in self._get_required_nodes( data, desc_path, 0, parent=parent, parentref=parentref, - translated_path=translated_path + translated_path=translated_path, ancestry=ancestry, + relay_segment=pathseg ): matches = Searches.search_matches( method, term, desc_node.node) - break + + if (matches and not invert) or (invert and not matches): + # Search no further because the parent node of this + # search has at least one matching descendent. + self.logger.debug(( + "BREAKING OUT of descendent search with matches={}" + " and invert={}").format( + "matching" if matches else "NOT matching", + "yes" if invert else "no"), + prefix="Processor::_get_nodes_by_search: ") + break if (matches and not invert) or (invert and not matches): - yield NodeCoords(data, parent, parentref, translated_path) + debug_matched = "one descendant search match yielded" + self.logger.debug( + "Yielding descendant match against '{}':" + .format(attr), + data=data, + prefix="Processor::_get_nodes_by_search: ") + yield NodeCoords( + data, parent, parentref, translated_path, ancestry, + pathseg) else: # Check the passed data itself for a match matches = Searches.search_matches(method, term, data) if (matches and not invert) or (invert and not matches): + debug_matched = "query source data itself yielded" self.logger.debug( "Yielding the queried data itself because it matches.", prefix="Processor::_get_nodes_by_search: ") - yield NodeCoords(data, parent, parentref, translated_path) + yield NodeCoords( + data, parent, parentref, translated_path, ancestry, + pathseg) + + self.logger.debug( + "Finished seeking SEARCH nodes matching {} in data with {}:" + .format(terms, debug_matched), + data=data, + prefix="Processor::_get_nodes_by_search: ") # pylint: disable=locally-disabled def _get_nodes_by_collector( - self, data: Any, yaml_path: YAMLPath, segment_index: int, - terms: CollectorTerms, **kwargs: Any + self, data: Any, yaml_path: YAMLPath, segment_index: int, + terms: CollectorTerms, **kwargs: Any ) -> Generator[List[NodeCoords], None, None]: """ Generate List of nodes gathered via a Collector. @@ -982,7 +1250,7 @@ def _get_nodes_by_collector( Parameters: 1. data (ruamel.yaml data) The parsed YAML data to process - 2. yaml_path (Path) The YAML Path being processed + 2. yaml_path (YAMLPath) The YAML Path being processed 3. segment_index (int) Segment index of the YAML Path to process 4. terms (CollectorTerms) The collector terms @@ -990,6 +1258,10 @@ def _get_nodes_by_collector( * parent (ruamel.yaml node) The parent node from which this query originates * parentref (Any) The Index or Key of data within parent + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation Returns: (Generator[List[NodeCoords], None, None]) Each list of NodeCoords as they are matched (the result is always a list) @@ -1000,16 +1272,23 @@ def _get_nodes_by_collector( yield data return - parent = kwargs.pop("parent", None) - parentref = kwargs.pop("parentref", None) - translated_path = kwargs.pop("translated_path", YAMLPath("")) - node_coords = [] # A list of NodeCoords + parent: Any = kwargs.pop("parent", None) + parentref: Any = kwargs.pop("parentref", None) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) + + node_coords: List[NodeCoords] = [] + segments = yaml_path.escaped + next_segment_idx = segment_index + 1 + pathseg: PathSegment = segments[segment_index] + self.logger.debug( "Processor::_get_nodes_by_collector: Getting required nodes" " matching search expression: {}".format(terms.expression)) for node_coord in self._get_required_nodes( data, YAMLPath(terms.expression), 0, parent=parent, - parentref=parentref, translated_path=translated_path): + parentref=parentref, translated_path=translated_path, + ancestry=ancestry, relay_segment=pathseg): node_coords.append(node_coord) # This may end up being a bad idea for some cases, but this method will @@ -1030,17 +1309,15 @@ def _get_nodes_by_collector( flat_nodes.append( NodeCoords( flatten_node, node_coord.parent, flatten_idx, - node_coord.path)) + node_coord.path, node_coord.ancestry, pathseg)) node_coords = flat_nodes # As long as each next segment is an ADDITION or SUBTRACTION # COLLECTOR, keep combining the results. - segments = yaml_path.escaped - next_segment_idx = segment_index + 1 - # pylint: disable=too-many-nested-blocks while next_segment_idx < len(segments): - (peek_type, peek_attrs) = segments[next_segment_idx] + peekseg: PathSegment = segments[next_segment_idx] + (peek_type, peek_attrs) = peekseg if ( peek_type is PathSegmentTypes.COLLECTOR and isinstance(peek_attrs, CollectorTerms) @@ -1050,7 +1327,8 @@ def _get_nodes_by_collector( for node_coord in self._get_required_nodes( data, peek_path, 0, parent=parent, parentref=parentref, - translated_path=translated_path): + translated_path=translated_path, + ancestry=ancestry, relay_segment=peekseg): if (isinstance(node_coord, NodeCoords) and isinstance(node_coord.node, list)): for coord_idx, coord in enumerate(node_coord.node): @@ -1060,9 +1338,12 @@ def _get_nodes_by_collector( next_translated_path = ( next_translated_path + "[{}]".format(coord_idx)) + next_ancestry = ancestry + [( + node_coord.node, coord_idx)] coord = NodeCoords( coord, node_coord.node, coord_idx, - next_translated_path) + next_translated_path, + next_ancestry, peekseg) node_coords.append(coord) else: node_coords.append(node_coord) @@ -1071,7 +1352,8 @@ def _get_nodes_by_collector( for node_coord in self._get_required_nodes( data, peek_path, 0, parent=parent, parentref=parentref, - translated_path=translated_path): + translated_path=translated_path, + ancestry=ancestry, relay_segment=peekseg): unwrapped_data = NodeCoords.unwrap_node_coords( node_coord) if isinstance(unwrapped_data, list): @@ -1097,12 +1379,16 @@ def _get_nodes_by_collector( # yield only when there are results if node_coords: + self.logger.debug(( + "Yielding collected node list:"), + prefix="Processor::_get_nodes_by_collector: ", + data=node_coords) yield node_coords # pylint: disable=locally-disabled,too-many-branches - def _get_nodes_by_traversal(self, data: Any, yaml_path: YAMLPath, - segment_index: int, **kwargs: Any - ) -> Generator[Any, None, None]: + def _get_nodes_by_traversal( + self, data: Any, yaml_path: YAMLPath, segment_index: int, **kwargs: Any + ) -> Generator[Any, None, None]: """ Deeply traverse the document tree, returning all or filtered nodes. @@ -1111,41 +1397,53 @@ def _get_nodes_by_traversal(self, data: Any, yaml_path: YAMLPath, 2. yaml_path (yamlpath.Path) The YAML Path being processed 3. segment_index (int) Segment index of the YAML Path to process - Keyword Parameters: + Keyword Arguments: * parent (ruamel.yaml node) The parent node from which this query originates * parentref (Any) The Index or Key of data within parent + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation Returns: (Generator[Any, None, None]) Each node coordinate as they are matched. """ - parent = kwargs.pop("parent", None) - parentref = kwargs.pop("parentref", None) - translated_path = kwargs.pop("translated_path", YAMLPath("")) + parent: Any = kwargs.pop("parent", None) + parentref: Any = kwargs.pop("parentref", None) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) + + segments = yaml_path.escaped + pathseg: PathSegment = segments[segment_index] + next_segment_idx: int = segment_index + 1 self.logger.debug( "TRAVERSING the tree at parentref:", prefix="Processor::_get_nodes_by_traversal: ", data=parentref) - if data is None: - self.logger.debug( - "Processor::_get_nodes_by_traversal: Yielding a None node.") - yield NodeCoords(None, parent, parentref) - return - # Is there a next segment? - segments = yaml_path.escaped - if segment_index + 1 == len(segments): + if next_segment_idx == len(segments): # This traversal is gathering every leaf node + if data is None: + self.logger.debug(( + "Yielding a None node."), + prefix="Processor::_get_nodes_by_traversal: ") + yield NodeCoords(None, parent, parentref, translated_path, + ancestry, pathseg) + return + if isinstance(data, dict): for key, val in data.items(): next_translated_path = ( translated_path + YAMLPath.escape_path_section( key, translated_path.seperator)) + next_ancestry = ancestry + [(data, key)] for node_coord in self._get_nodes_by_traversal( val, yaml_path, segment_index, parent=data, parentref=key, - translated_path=next_translated_path + translated_path=next_translated_path, + ancestry=next_ancestry ): self.logger.debug( "Yielding unfiltered Hash value:", @@ -1169,7 +1467,9 @@ def _get_nodes_by_traversal(self, data: Any, yaml_path: YAMLPath, self.logger.debug( "Yielding unfiltered Scalar value:", prefix="Processor::_get_nodes_by_traversal: ", data=data) - yield NodeCoords(data, parent, parentref, translated_path) + yield NodeCoords( + data, parent, parentref, translated_path, ancestry, + pathseg) else: # There is a filter in the next segment; recurse data, comparing # every child against the following segment until there are no more @@ -1182,17 +1482,21 @@ def _get_nodes_by_traversal(self, data: Any, yaml_path: YAMLPath, self.logger.debug( "Processor::_get_nodes_by_traversal: Checking the DIRECT node" " for a next-segment match at {}...".format(parentref)) + + peekseg: PathSegment = segments[next_segment_idx] for node_coord in self._get_nodes_by_path_segment( - data, yaml_path, segment_index + 1, parent=parent, + data, yaml_path, next_segment_idx, parent=parent, parentref=parentref, traverse_lists=False, - translated_path=translated_path + translated_path=translated_path, ancestry=ancestry ): self.logger.debug( "Yielding filtered DIRECT node at parentref {} of coord:" .format(parentref), prefix="Processor::_get_nodes_by_traversal: ", data=node_coord) - yield NodeCoords(data, parent, parentref, translated_path) + yield NodeCoords( + data, parent, parentref, translated_path, ancestry, + peekseg) # Then, recurse into each child to perform the same test. if isinstance(data, dict): @@ -1204,10 +1508,12 @@ def _get_nodes_by_traversal(self, data: Any, yaml_path: YAMLPath, next_translated_path = ( translated_path + YAMLPath.escape_path_section( key, translated_path.seperator)) + next_ancestry = ancestry + [(data, key)] for node_coord in self._get_nodes_by_traversal( val, yaml_path, segment_index, parent=data, parentref=key, - translated_path=next_translated_path + translated_path=next_translated_path, + ancestry=next_ancestry ): self.logger.debug( "Yielding filtered indirect Hash value from KEY" @@ -1222,10 +1528,12 @@ def _get_nodes_by_traversal(self, data: Any, yaml_path: YAMLPath, " INDEX '{}' at ref '{}' for next-segment matches..." .format(idx, parentref)) next_translated_path = translated_path + "[{}]".format(idx) + next_ancestry = ancestry + [(data, idx)] for node_coord in self._get_nodes_by_traversal( ele, yaml_path, segment_index, parent=data, parentref=idx, - translated_path=next_translated_path + translated_path=next_translated_path, + ancestry=next_ancestry ): self.logger.debug( "Yielding filtered indirect Array value from INDEX" @@ -1234,40 +1542,46 @@ def _get_nodes_by_traversal(self, data: Any, yaml_path: YAMLPath, data=node_coord) yield node_coord - def _get_required_nodes(self, data: Any, yaml_path: YAMLPath, - depth: int = 0, **kwargs: Any - ) -> Generator[NodeCoords, None, None]: + def _get_required_nodes( + self, data: Any, yaml_path: YAMLPath, depth: int = 0, **kwargs: Any + ) -> Generator[NodeCoords, None, None]: """ Generate pre-existing NodeCoords from YAML data matching a YAML Path. Parameters: 1. data (Any) The parsed YAML data to process - 2. yaml_path (Path) The pre-parsed YAML Path to follow + 2. yaml_path (YAMLPath) The pre-parsed YAML Path to follow 3. depth (int) Index within yaml_path to process; default=0 4. parent (ruamel.yaml node) The parent node from which this query originates 5. parentref (Any) Key or Index of data within parent + Keyword Arguments: + * parent (ruamel.yaml node) The parent node from which this query + originates + * parentref (Any) The Index or Key of data within parent + * relay_segment (PathSegment) YAML Path segment presently under + evaluation + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation + Returns: (Generator[NodeCoords, None, None]) The requested NodeCoords - as they are matched + as they are matched Raises: N/A """ - parent = kwargs.pop("parent", None) - parentref = kwargs.pop("parentref", None) - translated_path = kwargs.pop("translated_path", YAMLPath("")) - - if data is None: - self.logger.debug( - "Bailing out on None data at parentref, {}, of parent:" - .format(parentref), - prefix="Processor::_get_required_nodes: ", - data=parent) - return + parent: Any = kwargs.pop("parent", None) + parentref: Any = kwargs.pop("parentref", None) + relay_segment: PathSegment = kwargs.pop("relay_segment", None) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) segments = yaml_path.escaped if segments and len(segments) > depth: - (segment_type, unstripped_attrs) = yaml_path.unescaped[depth] + pathseg: PathSegment = yaml_path.unescaped[depth] + (segment_type, unstripped_attrs) = pathseg except_segment = str(unstripped_attrs) self.logger.debug( "Seeking segment <{}>{} in data of type {}:" @@ -1277,11 +1591,10 @@ def _get_required_nodes(self, data: Any, yaml_path: YAMLPath, for segment_node_coords in self._get_nodes_by_path_segment( data, yaml_path, depth, parent=parent, parentref=parentref, - translated_path=translated_path + translated_path=translated_path, ancestry=ancestry ): self.logger.debug( - "Found node of type {} at <{}>{} in the data and recursing" - " into it..." + "Got data of type {} at <{}>{} in the data." .format( type(segment_node_coords.node if hasattr(segment_node_coords, "node") @@ -1291,29 +1604,32 @@ def _get_required_nodes(self, data: Any, yaml_path: YAMLPath, prefix="Processor::_get_required_nodes: ", data=segment_node_coords) - if (segment_node_coords is None - or (hasattr(segment_node_coords, "node") - and segment_node_coords.node is None) - ): - self.logger.debug( - "Processor::_get_required_nodes: Yielding null.") - yield segment_node_coords - elif isinstance(segment_node_coords, list): + if isinstance(segment_node_coords, list): # Most likely the output of a Collector, this list will be # of NodeCoords rather than an actual DOM reference. As # such, it must be treated as a virtual DOM element that # cannot itself be parented to the real DOM, though each # of its elements has a real parent. + self.logger.debug( + "Processor::_get_required_nodes: Got a list:", + data=segment_node_coords) for subnode_coord in self._get_required_nodes( segment_node_coords, yaml_path, depth + 1, - translated_path=translated_path): + parent=parent, parentref=parentref, + translated_path=translated_path, + ancestry=ancestry, relay_segment=pathseg): yield subnode_coord else: + self.logger.debug( + "Recursing into the retrieved data...", + prefix="Processor::_get_required_nodes: ") for subnode_coord in self._get_required_nodes( segment_node_coords.node, yaml_path, depth + 1, parent=segment_node_coords.parent, parentref=segment_node_coords.parentref, - translated_path=segment_node_coords.path): + translated_path=segment_node_coords.path, + ancestry=segment_node_coords.ancestry, + relay_segment=pathseg): self.logger.debug( "Finally returning segment data of type {} at" " parentref {}:" @@ -1328,12 +1644,14 @@ def _get_required_nodes(self, data: Any, yaml_path: YAMLPath, .format(type(data), parentref), prefix="Processor::_get_required_nodes: ", data=data, footer=" ") - yield NodeCoords(data, parent, parentref, translated_path) + yield NodeCoords( + data, parent, parentref, translated_path, ancestry, + relay_segment) # pylint: disable=locally-disabled,too-many-statements def _get_optional_nodes( - self, data: Any, yaml_path: YAMLPath, value: Any = None, - depth: int = 0, **kwargs: Any + self, data: Any, yaml_path: YAMLPath, value: Any = None, + depth: int = 0, **kwargs: Any ) -> Generator[NodeCoords, None, None]: """ Return zero or more pre-existing NodeCoords matching a YAML Path. @@ -1343,13 +1661,21 @@ def _get_optional_nodes( Parameters: 1. data (Any) The parsed YAML data to process - 2. yaml_path (Path) The pre-parsed YAML Path to follow + 2. yaml_path (YAMLPath) The pre-parsed YAML Path to follow 3. value (Any) The value to assign to the element 4. depth (int) For recursion, this identifies which segment of yaml_path to evaluate; default=0 - 5. parent (ruamel.yaml node) The parent node from which this query - originates - 6. parentref (Any) Index or Key of data within parent + + Keyword Arguments: + * parent (ruamel.yaml node) The parent node from which this query + originates + * parentref (Any) The Index or Key of data within parent + * relay_segment (PathSegment) YAML Path segment presently under + evaluation + * translated_path (YAMLPath) YAML Path indicating precisely which node + is being evaluated + * ancestry (List[AncestryEntry]) Stack of ancestors preceding the + present node under evaluation Returns: (Generator[NodeCoords, None, None]) The requested NodeCoords as they are matched @@ -1360,20 +1686,18 @@ def _get_optional_nodes( an element that does not exist in data and this code isn't yet prepared to add it. """ - parent = kwargs.pop("parent", None) - parentref = kwargs.pop("parentref", None) - translated_path = kwargs.pop("translated_path", YAMLPath("")) + parent: Any = kwargs.pop("parent", None) + parentref: Any = kwargs.pop("parentref", None) + relay_segment: PathSegment = kwargs.pop("relay_segment", None) + translated_path: YAMLPath = kwargs.pop("translated_path", YAMLPath("")) + ancestry: List[AncestryEntry] = kwargs.pop("ancestry", []) segments = yaml_path.escaped # pylint: disable=locally-disabled,too-many-nested-blocks if segments and len(segments) > depth: - (segment_type, unstripped_attrs) = yaml_path.unescaped[depth] - stripped_attrs: Union[ - str, - int, - SearchTerms, - CollectorTerms - ] = segments[depth][1] + pathseg: PathSegment = yaml_path.unescaped[depth] + (segment_type, unstripped_attrs) = pathseg + stripped_attrs: PathAttributes = segments[depth][1] except_segment = str(unstripped_attrs) self.logger.debug( @@ -1386,31 +1710,69 @@ def _get_optional_nodes( matched_nodes = 0 for next_coord in self._get_nodes_by_path_segment( data, yaml_path, depth, parent=parent, parentref=parentref, - translated_path=translated_path + translated_path=translated_path, ancestry=ancestry ): matched_nodes += 1 - self.logger.debug( - ("Processor::_get_optional_nodes: Found element <{}>{} in" - + " the data; recursing into it..." - ).format(segment_type, except_segment) + if isinstance(next_coord, list): + # Drill into Collector results + for node_coord in self._get_optional_nodes( + next_coord, yaml_path, value, depth + 1, + parent=parent, parentref=parentref, + translated_path=translated_path, + ancestry=ancestry, + relay_segment=pathseg + ): + self.logger.debug(( + "Relaying a drilled-into Collector node:"), + prefix="Processor::_get_optional_nodes: ", + data={ + "node": node_coord, + "parent": parent, + "parentref": parentref + } + ) + yield node_coord + continue + + if next_coord.node is None: + self.logger.debug(( + "Relaying a None element <{}>{} from the data." + ).format(segment_type, except_segment), + prefix="Processor::_get_optional_nodes: ", + data=next_coord + ) + yield next_coord + continue + + self.logger.debug(( + "Found element <{}>{} in the data; recursing into it..." + ).format(segment_type, except_segment), + prefix="Processor::_get_optional_nodes: ", + data=next_coord ) + for node_coord in self._get_optional_nodes( next_coord.node, yaml_path, value, depth + 1, parent=next_coord.parent, parentref=next_coord.parentref, - translated_path=next_coord.path + translated_path=next_coord.path, + ancestry=next_coord.ancestry, + relay_segment=pathseg ): yield node_coord if ( matched_nodes < 1 and segment_type is not PathSegmentTypes.SEARCH + and segment_type is not PathSegmentTypes.KEYWORD_SEARCH ): # Add the missing element self.logger.debug( ("Processor::_get_optional_nodes: Element <{}>{} is" - + " unknown in the data! Applying default, <{}>{}." - ).format(segment_type, except_segment, type(value), value) + " unknown in the data! Applying default, <{}>{} to" + " data:" + ).format(segment_type, except_segment, type(value), value), + data=data ) if isinstance(data, list): self.logger.debug( @@ -1429,10 +1791,12 @@ def _get_optional_nodes( new_idx = len(data) - 1 next_translated_path = translated_path + "[{}]".format( new_idx) + next_ancestry = ancestry + [(data, new_idx)] for node_coord in self._get_optional_nodes( new_ele, yaml_path, value, depth + 1, parent=data, parentref=new_idx, - translated_path=next_translated_path + translated_path=next_translated_path, + ancestry=next_ancestry, relay_segment=pathseg ): matched_nodes += 1 yield node_coord @@ -1461,10 +1825,12 @@ def _get_optional_nodes( Nodes.append_list_element(data, next_node) next_translated_path = translated_path + "[{}]".format( newidx) + next_ancestry = ancestry + [(data, newidx)] for node_coord in self._get_optional_nodes( data[newidx], yaml_path, value, depth + 1, parent=data, parentref=newidx, - translated_path=next_translated_path + translated_path=next_translated_path, + ancestry=next_ancestry, relay_segment=pathseg ): matched_nodes += 1 yield node_coord @@ -1494,11 +1860,13 @@ def _get_optional_nodes( translated_path + YAMLPath.escape_path_section( str(stripped_attrs), translated_path.seperator)) + next_ancestry = ancestry + [(data, stripped_attrs)] for node_coord in self._get_optional_nodes( data[stripped_attrs], yaml_path, value, depth + 1, parent=data, parentref=stripped_attrs, - translated_path=next_translated_path + translated_path=next_translated_path, + ancestry=next_ancestry, relay_segment=pathseg ): matched_nodes += 1 yield node_coord @@ -1509,7 +1877,16 @@ def _get_optional_nodes( str(yaml_path), except_segment ) + else: + self.logger.debug( + "Assuming data is scalar and cannot receive a {}" + " subreference at {} ({}/{}):".format( + str(segment_type), str(yaml_path), str(depth + 1), + str(len(yaml_path))), + prefix="Processor::_get_optional_nodes: ", + data={"data": data, "parent": parent, + "parentref": parentref, "(default_)value": value}) raise YAMLPathException( "Cannot add {} subreference to scalars".format( str(segment_type) @@ -1523,7 +1900,9 @@ def _get_optional_nodes( "Finally returning data of type {}:" .format(type(data)), prefix="Processor::_get_optional_nodes: ", data=data) - yield NodeCoords(data, parent, parentref, translated_path) + yield NodeCoords( + data, parent, parentref, translated_path, ancestry, + relay_segment) # pylint: disable=too-many-arguments def _update_node( @@ -1576,7 +1955,7 @@ def recurse(data, parent, parentref, reference_node, replacement_node): replacement_node) elif isinstance(data, list): for idx, item in enumerate(data): - if item is reference_node: + if data is parent and item is reference_node: data[idx] = replacement_node else: recurse(item, parent, parentref, reference_node, diff --git a/yamlpath/types/__init__.py b/yamlpath/types/__init__.py index 8cca559a..abf21409 100644 --- a/yamlpath/types/__init__.py +++ b/yamlpath/types/__init__.py @@ -1,3 +1,4 @@ """Make all custom types available.""" +from .ancestryentry import AncestryEntry from .pathattributes import PathAttributes from .pathsegment import PathSegment diff --git a/yamlpath/types/ancestryentry.py b/yamlpath/types/ancestryentry.py new file mode 100644 index 00000000..3ea90450 --- /dev/null +++ b/yamlpath/types/ancestryentry.py @@ -0,0 +1,8 @@ +""" +Defines a custom type for data ancestry (parent, parentref). + +Copyright 2021 William W. Kimball, Jr. MBA MSIS +""" +from typing import Any, Tuple + +AncestryEntry = Tuple[Any, Any] diff --git a/yamlpath/types/pathattributes.py b/yamlpath/types/pathattributes.py index 004aa304..ff0b0fcc 100644 --- a/yamlpath/types/pathattributes.py +++ b/yamlpath/types/pathattributes.py @@ -1,8 +1,12 @@ -"""Defines a custom type for YAML Path segment attributes.""" +""" +Defines a custom type for YAML Path segment attributes. + +Copyright 2020 William W. Kimball, Jr. MBA MSIS +""" from typing import Union from yamlpath.path import CollectorTerms import yamlpath.path.searchterms as searchterms -PathAttributes = Union[str, CollectorTerms, searchterms.SearchTerms] +PathAttributes = Union[str, int, CollectorTerms, searchterms.SearchTerms, None] diff --git a/yamlpath/types/pathsegment.py b/yamlpath/types/pathsegment.py index 1350ea3e..f598cf18 100644 --- a/yamlpath/types/pathsegment.py +++ b/yamlpath/types/pathsegment.py @@ -1,4 +1,8 @@ -"""Defines a custom type for YAML Path segments.""" +""" +Defines a custom type for YAML Path segments. + +Copyright 2020 William W. Kimball, Jr. MBA MSIS +""" from typing import Tuple from yamlpath.enums import PathSegmentTypes diff --git a/yamlpath/wrappers/consoleprinter.py b/yamlpath/wrappers/consoleprinter.py index 55e9151b..27d13555 100644 --- a/yamlpath/wrappers/consoleprinter.py +++ b/yamlpath/wrappers/consoleprinter.py @@ -11,10 +11,11 @@ verbose: allows output from ConsolePrinter::verbose(). debug: allows output from ConsolePrinter::debug(). -Copyright 2018, 2019, 2020 William W. Kimball, Jr. MBA MSIS +Copyright 2018, 2019, 2020, 2021 William W. Kimball, Jr. MBA MSIS """ import sys -from typing import Any, Dict, Generator, List, Set, Tuple, Union +from collections import deque +from typing import Any, Deque, Dict, Generator, List, Set, Tuple, Union from ruamel.yaml.comments import ( CommentedBase, @@ -231,7 +232,7 @@ def _debug_dump(data: Any, **kwargs) -> Generator[str, None, None]: data, prefix=prefix, **kwargs ): yield line - elif isinstance(data, (list, set, tuple)): + elif isinstance(data, (list, set, tuple, deque)): for line in ConsolePrinter._debug_list( data, prefix=prefix, **kwargs ): @@ -287,13 +288,20 @@ def _debug_node_coord( """Helper method for debug.""" prefix = kwargs.pop("prefix", "") path_prefix = "{}(path)".format(prefix) + segment_prefix = "{}(segment)".format(prefix) node_prefix = "{}(node)".format(prefix) parent_prefix = "{}(parent)".format(prefix) parentref_prefix = "{}(parentref)".format(prefix) + ancestry_prefix = "{}(ancestry)".format(prefix) for line in ConsolePrinter._debug_dump(data.path, prefix=path_prefix): yield line + for line in ConsolePrinter._debug_dump( + data.path_segment, prefix=segment_prefix + ): + yield line + for line in ConsolePrinter._debug_dump(data.node, prefix=node_prefix): yield line @@ -307,9 +315,14 @@ def _debug_node_coord( ): yield line + for line in ConsolePrinter._debug_dump( + data.ancestry, prefix=ancestry_prefix + ): + yield line + @staticmethod def _debug_list( - data: Union[List[Any], Tuple[Any, ...], Set[Any]], **kwargs + data: Union[List[Any], Set[Any], Tuple[Any, ...], Deque[Any]], **kwargs ) -> Generator[str, None, None]: """Helper for debug.""" prefix = kwargs.pop("prefix", "") diff --git a/yamlpath/wrappers/nodecoords.py b/yamlpath/wrappers/nodecoords.py index 2a049edf..9a3b0447 100644 --- a/yamlpath/wrappers/nodecoords.py +++ b/yamlpath/wrappers/nodecoords.py @@ -1,20 +1,33 @@ -"""Wrap a node along with its relative coordinates within its DOM.""" -from typing import Any +""" +Implement NodeCoords. +Copyright 2020, 2021 William W. Kimball, Jr. MBA MSIS +""" +from typing import Any, List, Optional + +from yamlpath.types import AncestryEntry, PathSegment from yamlpath import YAMLPath class NodeCoords: """ - Initialize a new NodeCoords. + Wrap a node's data along with its relative coordinates within its DOM. + + A node's "coordinates" includes these properties: + 1. Reference to the node itself, + 2. Immediate parent node of the wrapped node, + 3. Index or Key of the node within its immediate parent - A node's coordinates track these properties: - 1. Reference-to-the-Node-Itself, - 2. Immediate-Parent-Node-of-the-Node, - 3. Index-or-Key-of-the-Node-Within-Its-Immediate-Parent + Additional, optional data can be wrapped along with the node's coordinates + to facilitate other specific operations upon the node/DOM. See the + `__init__` method for details. """ + # pylint: disable=locally-disabled,too-many-arguments def __init__( - self, node: Any, parent: Any, parentref: Any, path: YAMLPath = None + self, node: Any, parent: Any, parentref: Any, + path: Optional[YAMLPath] = None, + ancestry: Optional[List[AncestryEntry]] = None, + path_segment: Optional[PathSegment] = None ) -> None: """ Initialize a new NodeCoords. @@ -26,15 +39,24 @@ def __init__( within `parent` the `node` is located 4. path (YAMLPath) The YAML Path for this node, as reported by its creator process + 5. ancestry (List[AncestryEntry]) Stack of AncestryEntry (parent, + parentref) tracking the hierarchical ancestry of this node through + its parent document + 6. path_segment (PathSegment) The YAML Path segment which most directly + caused the generation of this NodeCoords Returns: N/A Raises: N/A """ - self.node = node - self.parent = parent - self.parentref = parentref - self.path = path + self.node: Any = node + self.parent: Any = parent + self.parentref: Any = parentref + self.path: Optional[YAMLPath] = path + self.ancestry: List[AncestryEntry] = ([] + if ancestry is None + else ancestry) + self.path_segment: Optional[PathSegment] = path_segment def __str__(self) -> str: """Get a String representation of this object.""" @@ -50,6 +72,18 @@ def __repr__(self) -> str: self.__class__.__name__, self.node, self.parent, self.parentref)) + def __gt__(self, rhs: "NodeCoords") -> Any: + """Indicate whether this node's data is greater-than another's.""" + if self.node is None or rhs.node is None: + return False + return self.node > rhs.node + + def __lt__(self, rhs: "NodeCoords") -> Any: + """Indicate whether this node's data is less-than another's.""" + if self.node is None or rhs.node is None: + return False + return self.node < rhs.node + @staticmethod def unwrap_node_coords(data: Any) -> Any: """ diff --git a/yamlpath/yamlpath.py b/yamlpath/yamlpath.py index a1c586eb..da0db42a 100644 --- a/yamlpath/yamlpath.py +++ b/yamlpath/yamlpath.py @@ -1,20 +1,21 @@ """ Implement YAML Path. -Copyright 2019, 2020 William W. Kimball, Jr. MBA MSIS +Copyright 2019, 2020, 2021 William W. Kimball, Jr. MBA MSIS """ from collections import deque from typing import Deque, List, Optional, Union -from yamlpath.types import PathSegment +from yamlpath.types import PathAttributes, PathSegment from yamlpath.exceptions import YAMLPathException from yamlpath.enums import ( PathSegmentTypes, + PathSearchKeywords, PathSearchMethods, PathSeperators, CollectorOperators, ) -from yamlpath.path import SearchTerms, CollectorTerms +from yamlpath.path import SearchKeywordTerms, SearchTerms, CollectorTerms class YAMLPath: @@ -78,6 +79,10 @@ def __eq__(self, other: object) -> bool: """ Indicate equivalence of two YAMLPaths. + The path seperator is ignored for this comparison. This is deliberate + and allows "some.path[1]" == "/some/path[1]" because both forms of the + same path yield exactly the same data. + Parameters: 1. other (object) The other YAMLPath to compare against. @@ -126,6 +131,41 @@ def append(self, segment: str) -> "YAMLPath": self.original += "{}{}".format(seperator, segment) return self + def pop(self) -> PathSegment: + """ + Pop the last segment off this YAML Path. + + This mutates the YAML Path and returns the removed segment PathSegment. + + Returns: (PathSegment) The removed segment + """ + segments: Deque[PathSegment] = self.unescaped + if len(segments) < 1: + raise YAMLPathException( + "Cannot pop when there are no segments to pop from", + str(self)) + + popped_queue: Deque = deque() + popped_segment: PathSegment = segments.pop() + popped_queue.append(popped_segment) + removable_segment = YAMLPath._stringify_yamlpath_segments( + popped_queue, self.seperator) + prefixed_segment = "{}{}".format(self.seperator, removable_segment) + path_now = self.original + + if path_now.endswith(prefixed_segment): + self.original = path_now[0:len(path_now) - len(prefixed_segment)] + elif path_now.endswith(removable_segment): + self.original = path_now[0:len(path_now) - len(removable_segment)] + elif ( + self.seperator == PathSeperators.FSLASH + and path_now.endswith(removable_segment[1:]) + ): + self.original = path_now[ + 0:len(path_now) - len(removable_segment) + 1] + + return popped_segment + @property def original(self) -> str: """ @@ -151,11 +191,13 @@ def original(self, value: str) -> None: Raises: N/A """ + str_val = str(value) + # Check for empty paths - if not str(value).strip(): - value = "" + if not str_val.strip(): + str_val = "" - self._original = value + self._original = str_val self._seperator = PathSeperators.AUTO self._unescaped = deque() self._escaped = deque() @@ -243,7 +285,7 @@ def _parse_path(self, strip_escapes: bool = True ) -> Deque[PathSegment]: r""" - Parse the YAML Path into its component segments. + Parse the YAML Path into its component PathSegment tuples. Breaks apart a stringified YAML Path into component segments, each identified by its type. See README.md for sample YAML Paths. @@ -253,8 +295,8 @@ def _parse_path(self, only the "escaped" symbol. False = Leave all leading \ symbols intact. - Returns: (deque) an empty queue or a queue of tuples, each identifying - (PathSegmentTypes, segment_attributes). + Returns: (Deque[PathSegment]) an empty queue or a queue of + PathSegments. Raises: - `YAMLPathException` when the YAML Path is invalid @@ -268,12 +310,14 @@ def _parse_path(self, search_inverted: bool = False search_method: Optional[PathSearchMethods] = None search_attr: str = "" + search_keyword: Optional[PathSearchKeywords] = None seeking_regex_delim: bool = False capturing_regex: bool = False pathsep: str = str(self.seperator) collector_level: int = 0 collector_operator: CollectorOperators = CollectorOperators.NONE seeking_collector_operator: bool = False + next_char_must_be: Optional[str] = None # Empty paths yield empty queues if not yaml_path: @@ -289,26 +333,25 @@ def _parse_path(self, # pylint: disable=locally-disabled,too-many-nested-blocks for char in yaml_path: demarc_count = len(demarc_stack) + if next_char_must_be and char == next_char_must_be: + next_char_must_be = None if escape_next: # Pass-through; capture this escaped character escape_next = False elif capturing_regex: - if char == demarc_stack[-1]: - # Stop the RegEx capture - capturing_regex = False - demarc_stack.pop() - continue - # Pass-through; capture everything that isn't the present # RegEx delimiter. This deliberately means users cannot # escape the RegEx delimiter itself should it occur within # the RegEx; thus, users must select a delimiter that won't # appear within the RegEx (which is exactly why the user # gets to choose the delimiter). - # pylint: disable=unnecessary-pass - pass # pragma: no cover + if char == demarc_stack[-1]: + # Stop the RegEx capture + capturing_regex = False + demarc_stack.pop() + continue # The escape test MUST come AFTER the RegEx capture test so users # won't be forced into "The Backslash Plague". @@ -349,6 +392,11 @@ def _parse_path(self, collector_operator = CollectorOperators.SUBTRACTION continue + elif next_char_must_be and char != next_char_must_be: + raise YAMLPathException( + "Invalid YAML Path at {}, which must be {} in YAML Path" + .format(char, next_char_must_be), yaml_path) + elif char in ['"', "'"]: # Found a string demarcation mark if demarc_count > 0: @@ -384,6 +432,25 @@ def _parse_path(self, continue elif char == "(": + if demarc_count > 0 and demarc_stack[-1] == "[" and segment_id: + if PathSearchKeywords.is_keyword(segment_id): + demarc_stack.append(char) + demarc_count += 1 + segment_type = PathSegmentTypes.KEYWORD_SEARCH + search_keyword = PathSearchKeywords[segment_id.upper()] + segment_id = "" + continue + + raise YAMLPathException( + ("Unknown search keyword, {}; allowed: {}." + " Encountered in YAML Path") + .format( + segment_id, + ', '.join(PathSearchKeywords.get_keywords()) + ) + , yaml_path + ) + seeking_collector_operator = False collector_level += 1 demarc_stack.append(char) @@ -394,12 +461,12 @@ def _parse_path(self, if collector_level == 1: continue - elif collector_level > 0: - if ( - demarc_count > 0 - and char == ")" - and demarc_stack[-1] == "(" - ): + elif ( + demarc_count > 0 + and char == ")" + and demarc_stack[-1] == "(" + ): + if collector_level > 0: collector_level -= 1 demarc_count -= 1 demarc_stack.pop() @@ -413,6 +480,12 @@ def _parse_path(self, seeking_collector_operator = True continue + if segment_type is PathSegmentTypes.KEYWORD_SEARCH: + demarc_count -= 1 + demarc_stack.pop() + next_char_must_be = "]" + continue + elif demarc_count == 0 and char == "[": # Array INDEX/SLICE or SEARCH if segment_id: @@ -557,7 +630,7 @@ def _parse_path(self, and char == "]" and demarc_stack[-1] == "[" ): - # Store the INDEX, SLICE, or SEARCH parameters + # Store the INDEX, SLICE, SEARCH, or KEYWORD_SEARCH parameters if ( segment_type is PathSegmentTypes.INDEX and ':' not in segment_id @@ -586,6 +659,15 @@ def _parse_path(self, SearchTerms(search_inverted, search_method, search_attr, segment_id) )) + elif ( + segment_type is PathSegmentTypes.KEYWORD_SEARCH + and search_keyword + ): + path_segments.append(( + segment_type, + SearchKeywordTerms(search_inverted, search_keyword, + segment_id) + )) else: path_segments.append((segment_type, segment_id)) @@ -594,6 +676,8 @@ def _parse_path(self, demarc_stack.pop() demarc_count -= 1 search_method = None + search_inverted = False + search_keyword = None continue elif demarc_count < 1 and char == pathsep: @@ -648,9 +732,9 @@ def _parse_path(self, @staticmethod def _expand_splats( - yaml_path: str, segment_id: str, - segment_type: Optional[PathSegmentTypes] = None - ) -> tuple: + yaml_path: str, segment_id: PathAttributes, + segment_type: PathSegmentTypes + ) -> PathSegment: """ Replace segment IDs with search operators when * is present. @@ -660,12 +744,12 @@ def _expand_splats( 3. segment_type (Optional[PathSegmentTypes]) Pending predetermined type of the segment under evaluation. - Returns: (tuple) Coallesced YAML Path segment. + Returns: (PathSegment) Coallesced YAML Path segment. """ - coal_type = segment_type - coal_value: Union[str, SearchTerms, None] = segment_id + coal_type: PathSegmentTypes = segment_type + coal_value: PathAttributes = segment_id - if '*' in segment_id: + if isinstance(segment_id, str) and '*' in segment_id: splat_count = segment_id.count("*") splat_pos = segment_id.index("*") segment_len = len(segment_id) @@ -755,8 +839,21 @@ def _stringify_yamlpath_segments( ppath += "[&{}]".format(segment_attrs) else: ppath += "&{}".format(segment_attrs) - elif segment_type == PathSegmentTypes.SEARCH: + elif segment_type == PathSegmentTypes.KEYWORD_SEARCH: ppath += str(segment_attrs) + elif (segment_type == PathSegmentTypes.SEARCH + and isinstance(segment_attrs, SearchTerms)): + terms: SearchTerms = segment_attrs + if (terms.method == PathSearchMethods.REGEX + and terms.attribute == "." + and terms.term == ".*" + and not terms.inverted + ): + if add_sep: + ppath += pathsep + ppath += "*" + else: + ppath += str(segment_attrs) elif segment_type == PathSegmentTypes.COLLECTOR: ppath += str(segment_attrs) elif segment_type == PathSegmentTypes.TRAVERSE: @@ -786,7 +883,6 @@ def strip_path_prefix(path: "YAMLPath", prefix: "YAMLPath") -> "YAMLPath": if str(prefix) == "/": return path - prefix.seperator = PathSeperators.FSLASH path.seperator = PathSeperators.FSLASH prefix_str = str(prefix) path_str = str(path)