From 78b6536f649a305915e0d0fe84b12029cac5919b Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Fri, 28 May 2021 21:24:54 +0200 Subject: [PATCH] Add script and methods to save YAML parameter tree on disk --- openfisca_core/parameters/__init__.py | 2 +- openfisca_core/parameters/config.py | 15 ++++++ openfisca_core/parameters/helpers.py | 30 +++++++++++ openfisca_core/parameters/parameter.py | 16 ++++++ openfisca_core/parameters/parameter_node.py | 8 +++ openfisca_core/parameters/parameter_scale.py | 8 +++ .../parameters/parameter_scale_bracket.py | 13 ++++- .../scripts/normalize_yaml_parameters.py | 52 +++++++++++++++++++ 8 files changed, 142 insertions(+), 2 deletions(-) create mode 100644 openfisca_core/scripts/normalize_yaml_parameters.py diff --git a/openfisca_core/parameters/__init__.py b/openfisca_core/parameters/__init__.py index 040ae47056..57b32a2893 100644 --- a/openfisca_core/parameters/__init__.py +++ b/openfisca_core/parameters/__init__.py @@ -33,7 +33,7 @@ ) from .at_instant_like import AtInstantLike # noqa: F401 -from .helpers import contains_nan, load_parameter_file # noqa: F401 +from .helpers import contains_nan, load_parameter_file, save_parameters_to_dir # noqa: F401 from .parameter_at_instant import ParameterAtInstant # noqa: F401 from .parameter_node_at_instant import ParameterNodeAtInstant # noqa: F401 from .vectorial_parameter_node_at_instant import VectorialParameterNodeAtInstant # noqa: F401 diff --git a/openfisca_core/parameters/config.py b/openfisca_core/parameters/config.py index e9a3041ae8..c15540f476 100644 --- a/openfisca_core/parameters/config.py +++ b/openfisca_core/parameters/config.py @@ -2,6 +2,7 @@ import os import yaml import typing +from collections import OrderedDict from openfisca_core.warnings import LibYAMLWarning @@ -42,3 +43,17 @@ def dict_no_duplicate_constructor(loader, node, deep = False): yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, dict_no_duplicate_constructor, Loader = Loader) + + +def represent_ordereddict(dumper, data): + value = [] + + for item_key, item_value in data.items(): + node_key = dumper.represent_data(item_key) + node_value = dumper.represent_data(item_value) + + value.append((node_key, node_value)) + + return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value) + +yaml.add_representer(OrderedDict, represent_ordereddict) diff --git a/openfisca_core/parameters/helpers.py b/openfisca_core/parameters/helpers.py index 0d0698ca36..f9b43e328a 100644 --- a/openfisca_core/parameters/helpers.py +++ b/openfisca_core/parameters/helpers.py @@ -1,7 +1,9 @@ import os +import re import traceback import numpy +import yaml from openfisca_core import parameters, periods from openfisca_core.errors import ParameterParsingError @@ -29,6 +31,31 @@ def load_parameter_file(file_path, name = ''): return _parse_child(name, data, file_path) +def save_parameters_to_dir(node, dir_path): + def dump_node(file_basename: str): + file_path = dir_path / "{}.yaml".format(file_basename) + node_yaml = node.to_yaml() + if node_yaml: + node_text = yaml.dump(node_yaml, allow_unicode=True, default_flow_style=False, sort_keys=False) + # Hack: remove quotes from dict keys containing dates. + # Cf https://github.com/fpagnoux/baremes-ipp-parser/blob/master/bareme_ipp_parsers/commons.py + node_text = re.sub(r"'(\d{4}-\d{2}-\d{2})':", r"\1:", node_text) + file_path.write_text(node_text) + + if isinstance(node, (parameters.Parameter, parameters.ParameterScale)): + file_basename = node.name.split(".")[-1] + dump_node(file_basename=file_basename) + else: + dump_node(file_basename="index") + for name, sub_node in node.children.items(): + if isinstance(sub_node, (parameters.Parameter, parameters.ParameterScale)): + save_parameters_to_dir(node=sub_node, dir_path=dir_path) + else: + sub_dir = dir_path / name + sub_dir.mkdir(exist_ok=True) + save_parameters_to_dir(node=sub_node, dir_path=sub_dir) + + def _compose_name(path, child_name = None, item_name = None): if not path: return child_name @@ -97,3 +124,6 @@ def _validate_parameter(parameter, data, data_type = None, allowed_keys = None): .format(key, parameter.name, list(allowed_keys)), parameter.file_path ) + +def _without_none_values(d): + return {k: v for k, v in d.items() if v is not None} diff --git a/openfisca_core/parameters/parameter.py b/openfisca_core/parameters/parameter.py index 85d367ae67..aac7760bca 100644 --- a/openfisca_core/parameters/parameter.py +++ b/openfisca_core/parameters/parameter.py @@ -1,6 +1,7 @@ import copy import os import typing +from collections import OrderedDict from openfisca_core import commons, periods from openfisca_core.errors import ParameterParsingError @@ -169,6 +170,21 @@ def update(self, period = None, start = None, stop = None, value = None): def get_descendants(self): return iter(()) + def to_yaml(self): + """Return a representation of the Parameter ready to be serialized to YAML.""" + return helpers._without_none_values({ + "description": self.description, + "documentation": self.documentation, + "metadata": self.metadata or None, + "values": self.values_as_yaml(), + }) + + def values_as_yaml(self): + return OrderedDict([ + (value.instant_str, {"value": value.value}) + for value in self.values_list + ]) + def _get_at_instant(self, instant): for value_at_instant in self.values_list: if value_at_instant.instant_str <= instant: diff --git a/openfisca_core/parameters/parameter_node.py b/openfisca_core/parameters/parameter_node.py index e811dd6f84..ae13d6eaac 100644 --- a/openfisca_core/parameters/parameter_node.py +++ b/openfisca_core/parameters/parameter_node.py @@ -151,5 +151,13 @@ def clone(self): return clone + def to_yaml(self): + """Return a representation of the ParameterNode ready to be serialized to YAML.""" + return helpers._without_none_values({ + "description": self.description, + "documentation": self.documentation, + "metadata": self.metadata or None, + }) + def _get_at_instant(self, instant): return ParameterNodeAtInstant(self.name, self, instant) diff --git a/openfisca_core/parameters/parameter_scale.py b/openfisca_core/parameters/parameter_scale.py index d1cfc26379..a4826ab238 100644 --- a/openfisca_core/parameters/parameter_scale.py +++ b/openfisca_core/parameters/parameter_scale.py @@ -73,6 +73,14 @@ def clone(self): return clone + def to_yaml(self): + """Return a representation of the Scale ready to be serialized to YAML.""" + return helpers._without_none_values({ + "brackets": [bracket.to_yaml() for bracket in self.brackets], + "description": self.description, + "metadata": self.metadata or None, + }) + def _get_at_instant(self, instant): brackets = [bracket.get_at_instant(instant) for bracket in self.brackets] diff --git a/openfisca_core/parameters/parameter_scale_bracket.py b/openfisca_core/parameters/parameter_scale_bracket.py index 6d361d09fa..2bca4a102b 100644 --- a/openfisca_core/parameters/parameter_scale_bracket.py +++ b/openfisca_core/parameters/parameter_scale_bracket.py @@ -1,4 +1,6 @@ -from openfisca_core.parameters import ParameterNode +from collections import OrderedDict + +from openfisca_core.parameters import helpers, ParameterNode class ParameterScaleBracket(ParameterNode): @@ -7,3 +9,12 @@ class ParameterScaleBracket(ParameterNode): """ _allowed_keys = set(['amount', 'threshold', 'rate', 'average_rate', 'base']) + + def to_yaml(self): + """Return a representation of the Bracket ready to be serialized to YAML.""" + yaml_dict = {} + for key in self._allowed_keys: + value = getattr(self, key, None) + if value is not None: + yaml_dict[key] = value.values_as_yaml() + return OrderedDict(sorted(helpers._without_none_values(yaml_dict).items())) diff --git a/openfisca_core/scripts/normalize_yaml_parameters.py b/openfisca_core/scripts/normalize_yaml_parameters.py new file mode 100644 index 0000000000..82af0a8ad1 --- /dev/null +++ b/openfisca_core/scripts/normalize_yaml_parameters.py @@ -0,0 +1,52 @@ +#! /usr/bin/env python + + +"""Normalize a YAML parameter tree, loading it from a directory and re-writing it to another one. + +This allows in particular to ensure that each YAML file contains exactly one parameter. +""" + + +import argparse +import logging +from pathlib import Path +import sys + +from openfisca_core.parameters import load_parameter_file, Parameter, ParameterNode, save_parameters_to_dir + + +logger = logging.getLogger(__name__) + + +def check_path_length(base_dir, max_path_length): + for path in base_dir.rglob("*.yaml"): + relative_path = path.relative_to(base_dir) + relative_path_len = len(str(relative_path)) + if relative_path_len > max_path_length: + logger.error("%r length is %d but max length is %d", str(relative_path), relative_path_len, max_path_length) + + +def main(): + parser = argparse.ArgumentParser(description = __doc__) + parser.add_argument('--max-path-length', type = int, default = None, + help = "log error if path is longer than specified value") + parser.add_argument('-v', '--verbose', action = 'store_true', default = False, help = "increase output verbosity") + parser.add_argument('source_dir', type = Path, help = "directory with parameters to read") + parser.add_argument('target_dir', type = Path, help = "directory where parameters are written") + args = parser.parse_args() + + if not args.source_dir.is_dir(): + parser.error("Invalid source_dir") + if not args.target_dir.is_dir(): + args.target_dir.mkdir() + + logging.basicConfig() + + parameters = load_parameter_file(args.source_dir) + save_parameters_to_dir(parameters, args.target_dir) + + if args.max_path_length is not None: + check_path_length(base_dir = args.target_dir, max_path_length = args.max_path_length) + +if __name__ == "__main__": + sys.exit(main())