Skip to content

Commit

Permalink
Merge pull request #261 from 23andMe/feature/add-exclude
Browse files Browse the repository at this point in the history
Add --exclude argument to CLI
  • Loading branch information
cblakkan authored Jan 2, 2025
2 parents b44ed27 + a90c7e8 commit c989fab
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 38 deletions.
34 changes: 21 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
Yamale (ya·ma·lē)
=================
[![Build Status](https://github.com/23andMe/Yamale/actions/workflows/run-tests.yml/badge.svg)](https://github.com/23andMe/Yamale/actions/workflows/run-tests.yml)
[![PyPI](https://img.shields.io/pypi/v/yamale.svg)](https://pypi.python.org/pypi/yamale)
[![downloads](https://static.pepy.tech/badge/yamale/month)](https://pepy.tech/project/yamale)
[![versions](https://img.shields.io/pypi/pyversions/yamale.svg)](https://github.com/yamale/yamale)
[![license](https://img.shields.io/github/license/23andMe/yamale.svg)](https://github.com/23andMe/Yamale/blob/master/LICENSE)

| :warning: Ensure that your schema definitions come from internal or trusted sources. Yamale does not protect against intentionally malicious schemas. |
|:------------|
Expand All @@ -11,8 +16,6 @@ A schema and validator for YAML.
What's YAML? See the current spec [here](http://www.yaml.org/spec/1.2/spec.html) and an introduction
to the syntax [here](https://github.com/Animosity/CraftIRC/wiki/Complete-idiot's-introduction-to-yaml).

[![Build Status](https://github.com/23andMe/Yamale/actions/workflows/run-tests.yml/badge.svg)](https://github.com/23andMe/Yamale/actions/workflows/run-tests.yml)
[![PyPI](https://img.shields.io/pypi/v/yamale.svg)](https://pypi.python.org/pypi/yamale)

Requirements
------------
Expand All @@ -23,8 +26,10 @@ Requirements
Install
-------
### pip
```bash
```
$ pip install yamale
# or to include ruamel.yaml as a dependency
$ pip install yamale[ruamel]
```

NOTE: Some platforms, e.g., Mac OS, may ship with only Python 2 and may not have pip installed.
Expand All @@ -49,25 +54,28 @@ looking up the directory tree until it finds one. If Yamale can not find a schem

Usage:

```bash
usage: yamale [-h] [-s SCHEMA] [-n CPU_NUM] [-p PARSER] [--no-strict] [PATH]
```
usage: yamale [-h] [-s SCHEMA] [-e PATTERN] [-p PARSER] [-n CPU_NUM] [-x] [-v] [-V] [PATH ...]
Validate yaml files.
positional arguments:
PATH folder to validate. Default is current directory.
PATH Paths to validate, either directories or files. Default is the current directory.
optional arguments:
options:
-h, --help show this help message and exit
-s SCHEMA, --schema SCHEMA
filename of schema. Default is schema.yaml.
-n CPU_NUM, --cpu-num CPU_NUM
number of CPUs to use. Default is 4.
-e PATTERN, --exclude PATTERN
Python regex used to exclude files from validation. Any substring match of a file's absolute path will be excluded. Uses
default Python3 regex. Option can be supplied multiple times.
-p PARSER, --parser PARSER
YAML library to load files. Choices are "ruamel" or
"pyyaml" (default).
--no-strict Disable strict mode, unexpected elements in the data
will be accepted.
YAML library to load files. Choices are "ruamel" or "pyyaml" (default).
-n CPU_NUM, --cpu-num CPU_NUM
Number of child processes to spawn for validation. Default is 4. 'auto' to use CPU count.
-x, --no-strict Disable strict mode, unexpected elements in the data will be accepted.
-v, --verbose show verbose information
-V, --version show program's version number and exit
```

### API
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
packages=find_packages(),
include_package_data=True,
install_requires=["pyyaml"],
extras_requires={"ruamel": ["ruamel.yaml"]},
extras_require={"ruamel": ["ruamel.yaml"]},
python_requires=">=3.8",
entry_points={
"console_scripts": ["yamale=yamale.command_line:main"],
Expand Down
2 changes: 1 addition & 1 deletion yamale/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5.3.0
6.0.0
91 changes: 68 additions & 23 deletions yamale/command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
import argparse
import glob
import os
from multiprocessing import Pool
import re
import multiprocessing
from .yamale_error import YamaleError
from .schema.validationresults import Result
from .version import __version__
Expand Down Expand Up @@ -64,31 +65,32 @@ def _find_schema(data_path, schema_name):
return _find_data_path_schema(data_path, schema_name)


def _validate_single(yaml_path, schema_name, parser, strict):
print("Validating %s..." % yaml_path)
def _validate_file(yaml_path, schema_name, parser, strict, should_exclude):
if should_exclude(yaml_path):
return
s = _find_schema(yaml_path, schema_name)
if not s:
raise ValueError("Invalid schema name for '{}' or schema not found.".format(schema_name))
_validate(s, yaml_path, parser, strict, True)


def _validate_dir(root, schema_name, cpus, parser, strict):
pool = Pool(processes=cpus)
def _validate_dir(root, schema_name, cpus, parser, strict, should_exclude):
pool = multiprocessing.Pool(processes=cpus)
res = []
error_messages = []
print("Finding yaml files...")
for root, dirs, files in os.walk(root):
for root, _, files in os.walk(root):
for f in files:
if (f.endswith(".yaml") or f.endswith(".yml")) and f != schema_name:
d = os.path.join(root, f)
s = _find_schema(d, schema_name)
if s:
res.append(pool.apply_async(_validate, (s, d, parser, strict, False)))
yaml_path = os.path.join(root, f)
if should_exclude(yaml_path):
continue
schema_path = _find_schema(yaml_path, schema_name)
if schema_path:
res.append(pool.apply_async(_validate, (schema_path, yaml_path, parser, strict, False)))
else:
print("No schema found for: %s" % d)
print(f"No schema found for: {yaml_path}")

print("Found %s yaml files." % len(res))
print("Validating...")
print(f"Found {len(res)} yaml files to validate...")
for r in res:
sub_results = r.get(timeout=300)
error_messages.extend([str(sub_result) for sub_result in sub_results if not sub_result.isValid()])
Expand All @@ -98,16 +100,34 @@ def _validate_dir(root, schema_name, cpus, parser, strict):
raise ValueError("\n----\n".join(set(error_messages)))


def _router(paths, schema_name, cpus, parser, strict=True):
def _router(paths, schema_name, cpus, parser, excludes=None, strict=True, verbose=False):
EXCLUDE_REGEXES = tuple(re.compile(e) for e in excludes) if excludes else tuple()

def should_exclude(yaml_path):
has_match = any(pattern.search(yaml_path) for pattern in EXCLUDE_REGEXES)
if has_match and verbose:
print("Skipping validation for %s due to exclude pattern" % yaml_path)
return has_match

for path in paths:
path = os.path.abspath(path)
if os.path.isdir(path):
_validate_dir(path, schema_name, cpus, parser, strict)
abs_path = os.path.abspath(path)
if os.path.exists(abs_path):
print(f"Validating {path}...")
else:
raise ValueError(f"Path does not exist: {path}")

if os.path.isdir(abs_path):
_validate_dir(abs_path, schema_name, cpus, parser, strict, should_exclude)
else:
_validate_single(path, schema_name, parser, strict)
_validate_file(abs_path, schema_name, parser, strict, should_exclude)


def main():
def int_or_auto(num_cpu):
if num_cpu == "auto":
return multiprocessing.cpu_count()
return int(num_cpu)

parser = argparse.ArgumentParser(description="Validate yaml files.")
parser.add_argument(
"paths",
Expand All @@ -116,21 +136,46 @@ def main():
nargs="*",
help="Paths to validate, either directories or files. Default is the current directory.",
)
parser.add_argument("-V", "--version", action="version", version=__version__)
parser.add_argument("-s", "--schema", default="schema.yaml", help="filename of schema. Default is schema.yaml.")
parser.add_argument("-n", "--cpu-num", default=4, type=int, help="number of CPUs to use. Default is 4.")
parser.add_argument(
"-e",
"--exclude",
metavar="PATTERN",
action="append",
help="Python regex used to exclude files from validation. Any substring match of a file's absolute path will be excluded. Uses default Python3 regex. Option can be supplied multiple times.",
)
parser.add_argument(
"-p",
"--parser",
default="pyyaml",
help='YAML library to load files. Choices are "ruamel" or "pyyaml" (default).',
)
parser.add_argument(
"--no-strict", action="store_true", help="Disable strict mode, unexpected elements in the data will be accepted."
"-n",
"--cpu-num",
default=4,
type=int_or_auto,
help="Number of child processes to spawn for validation. Default is 4. 'auto' to use CPU count.",
)
parser.add_argument(
"-x",
"--no-strict",
action="store_true",
help="Disable strict mode, unexpected elements in the data will be accepted.",
)
parser.add_argument("-v", "--verbose", action="store_true", help="show verbose information")
parser.add_argument("-V", "--version", action="version", version=__version__)
args = parser.parse_args()
try:
_router(args.paths, args.schema, args.cpu_num, args.parser, not args.no_strict)
_router(
paths=args.paths,
schema_name=args.schema,
cpus=args.cpu_num,
parser=args.parser,
excludes=args.exclude,
strict=not args.no_strict,
verbose=args.verbose,
)
except (SyntaxError, NameError, TypeError, ValueError) as e:
print("Validation failed!\n%s" % str(e))
exit(1)
Expand Down
24 changes: 24 additions & 0 deletions yamale/tests/test_command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,19 @@ def test_multiple_paths_bad_yaml():
assert "map.bad: '12.5' is not a int." in e.value.message


def test_excludes():
command_line._router(
paths=[
"yamale/tests/command_line_fixtures/yamls/good.yaml",
"yamale/tests/command_line_fixtures/yamls/bad.yaml",
],
schema_name="schema.yaml",
excludes="bad.yaml",
cpus=1,
parser="PyYAML",
)


@pytest.mark.parametrize("parser", parsers)
def test_good_relative_yaml(parser):
command_line._router(
Expand Down Expand Up @@ -126,6 +139,17 @@ def test_bad_dir():
command_line._router("yamale/tests/command_line_fixtures/yamls", "schema.yaml", 4, "PyYAML")


def test_bad_path_raises():
with pytest.raises(ValueError) as e:
command_line._router(
paths=["yamale/tests/command_line_fixtures/yamls/a path that does not exist.yaml"],
schema_name="schema.yaml",
cpus=1,
parser="PyYAML",
)
assert "Path does not exist" in str(e)


def test_bad_strict():
with pytest.raises(ValueError) as e:
command_line._router(
Expand Down

0 comments on commit c989fab

Please sign in to comment.