From c0f98ab6e2055a02fb4ed302efbeb13abcee5ea8 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Tue, 29 Nov 2022 16:01:04 +0100 Subject: [PATCH 1/5] Add doc/type checks to holders --- openfisca_tasks/install.mk | 7 +++---- setup.cfg | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/openfisca_tasks/install.mk b/openfisca_tasks/install.mk index 79f3938e44..192e38d417 100644 --- a/openfisca_tasks/install.mk +++ b/openfisca_tasks/install.mk @@ -10,8 +10,7 @@ uninstall: @pip freeze | grep -v "^-e" | sed "s/@.*//" | xargs pip uninstall -y ## Delete builds and compiled python files. -clean: \ - $(shell ls -d * | grep "build\|dist") \ - $(shell find . -name "*.pyc") +clean: @$(call print_help,$@:) - @rm -rf $? + @ls -d * | grep "build\|dist" | xargs rm -rf + @find . -name "*.pyc" | xargs rm -rf diff --git a/setup.cfg b/setup.cfg index 724c6529e1..638615b8b3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,7 +14,7 @@ extend-ignore = D hang-closing = true ignore = E128,E251,F403,F405,E501,RST301,W503,W504 in-place = true -include-in-doctest = openfisca_core/commons openfisca_core/types +include-in-doctest = openfisca_core/commons openfisca_core/holders openfisca_core/types rst-directives = attribute, deprecated, seealso, versionadded, versionchanged rst-roles = any, attr, class, exc, func, meth, obj strictness = short @@ -41,7 +41,7 @@ skip_empty = true addopts = --doctest-modules --disable-pytest-warnings --showlocals doctest_optionflags = ELLIPSIS IGNORE_EXCEPTION_DETAIL NUMBER NORMALIZE_WHITESPACE python_files = **/*.py -testpaths = openfisca_core/commons openfisca_core/types tests +testpaths = openfisca_core/commons openfisca_core/holders openfisca_core/types tests [mypy] ignore_missing_imports = True From 54fc10c33ff78c1a327373cb090556598fa5a0aa Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Tue, 29 Nov 2022 16:18:59 +0100 Subject: [PATCH 2/5] Fix Holder.set_input test --- openfisca_core/holders/holder.py | 53 ++++++++++++++++++++++++++------ setup.cfg | 2 +- setup.py | 1 + 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/openfisca_core/holders/holder.py b/openfisca_core/holders/holder.py index 196b0612e1..9ffe4738cb 100644 --- a/openfisca_core/holders/holder.py +++ b/openfisca_core/holders/holder.py @@ -1,5 +1,6 @@ from __future__ import annotations +from typing import Any, Sequence, Union from typing_extensions import TypedDict import os @@ -12,6 +13,7 @@ from openfisca_core.errors import PeriodMismatchError from openfisca_core.data_storage import InMemoryStorage, OnDiskStorage from openfisca_core.indexed_enums import Enum +from openfisca_core.types import Period class Holder: @@ -134,21 +136,52 @@ def get_known_periods(self): return list(self._memory_storage.get_known_periods()) + list(( self._disk_storage.get_known_periods() if self._disk_storage else [])) - def set_input(self, period, array): - """ - Set a variable's value (``array``) for a given period (``period``) + def set_input( + self, + period: Period, + array: Union[numpy.ndarray, Sequence[Any]], + ) -> numpy.ndarray: + """Set a Variable's array of values of a given Period. + + Args: + period: The period at which the value is set. + array: The input value for the variable. + + Returns: + The set input array. + + Note: + If a ``set_input`` property has been set for the variable, this + method may accept inputs for periods not matching the + ``definition_period`` of the Variable. To read + more about this, check the `documentation`_. + + Examples: + >>> from openfisca_core import entities, populations, variables + >>> entity = entities.Entity("", "", "", "") + + >>> class MyVariable(variables.Variable): + ... definition_period = "year" + ... entity = entity + ... value_type = int + + >>> variable = MyVariable() - :param array: the input value for the variable - :param period: the period at which the value is setted + >>> population = populations.Population(entity) + >>> population.count = 2 - Example : + >>> holder = Holder(variable, population) + >>> holder.set_input("2018", numpy.array([12.5, 14])) + >>> holder.get_array("2018") + array([12, 14], dtype=int32) - >>> holder.set_input([12, 14], '2018-04') - >>> holder.get_array('2018-04') - >>> [12, 14] + >>> holder.set_input("2018", [12.5, 14]) + >>> holder.get_array("2018") + array([12, 14], dtype=int32) + .. _documentation: + https://openfisca.org/doc/coding-the-legislation/35_periods.html#set-input-automatically-process-variable-inputs-defined-for-periods-not-matching-the-definition-period - If a ``set_input`` property has been set for the variable, this method may accept inputs for periods not matching the ``definition_period`` of the variable. To read more about this, check the `documentation `_. """ period = periods.period(period) diff --git a/setup.cfg b/setup.cfg index 638615b8b3..3b29890570 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,7 +38,7 @@ skip_covered = true skip_empty = true [tool:pytest] -addopts = --doctest-modules --disable-pytest-warnings --showlocals +addopts = --doctest-modules --disable-pytest-warnings --showlocals --xdoctest doctest_optionflags = ELLIPSIS IGNORE_EXCEPTION_DETAIL NUMBER NORMALIZE_WHITESPACE python_files = **/*.py testpaths = openfisca_core/commons openfisca_core/holders openfisca_core/types tests diff --git a/setup.py b/setup.py index ad2c48f5c7..a4eae4efae 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,7 @@ 'openfisca-extension-template >= 1.2.0rc0, < 2.0.0', 'pycodestyle >= 2.8.0, < 2.9.0', 'pylint == 2.10.2', + 'xdoctest >= 1.0.0, < 2.0.0', ] + api_requirements setup( From 8aa1b39f762e16bb3def0fd5050ab4379e135c13 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Tue, 29 Nov 2022 16:48:04 +0100 Subject: [PATCH 3/5] Fix Holder.get_memory_usage test --- openfisca_core/holders/holder.py | 59 ++++++++++++++++++++++---------- openfisca_tasks/test_code.mk | 4 +++ setup.cfg | 4 +-- 3 files changed, 46 insertions(+), 21 deletions(-) diff --git a/openfisca_core/holders/holder.py b/openfisca_core/holders/holder.py index 9ffe4738cb..67db4056ae 100644 --- a/openfisca_core/holders/holder.py +++ b/openfisca_core/holders/holder.py @@ -91,25 +91,46 @@ def get_array(self, period): if self._disk_storage: return self._disk_storage.get(period) - def get_memory_usage(self): - """ - Get data about the virtual memory usage of the holder. - - :returns: Memory usage data - :rtype: dict - - Example: - - >>> holder.get_memory_usage() - >>> { - >>> 'nb_arrays': 12, # The holder contains the variable values for 12 different periods - >>> 'nb_cells_by_array': 100, # There are 100 entities (e.g. persons) in our simulation - >>> 'cell_size': 8, # Each value takes 8B of memory - >>> 'dtype': dtype('float64') # Each value is a float 64 - >>> 'total_nb_bytes': 10400 # The holder uses 10.4kB of virtual memory - >>> 'nb_requests': 24 # The variable has been computed 24 times - >>> 'nb_requests_by_array': 2 # Each array stored has been on average requested twice - >>> } + def get_memory_usage(self) -> MemoryUsage: + """Get data about the virtual memory usage of the Holder. + + Returns: + Memory usage data. + + Examples: + >>> from pprint import pprint + + >>> from openfisca_core import ( + ... entities, + ... populations, + ... simulations, + ... taxbenefitsystems, + ... variables, + ... ) + + >>> entity = entities.Entity("", "", "", "") + + >>> class MyVariable(variables.Variable): + ... definition_period = "year" + ... entity = entity + ... value_type = int + + >>> population = populations.Population(entity) + >>> variable = MyVariable() + >>> holder = Holder(variable, population) + + >>> tbs = taxbenefitsystems.TaxBenefitSystem([entity]) + >>> entities = {entity.key: population} + >>> simulation = simulations.Simulation(tbs, entities) + >>> holder.simulation = simulation + + >>> pprint(holder.get_memory_usage(), indent = 3) + { 'cell_size': nan, + 'dtype': , + 'nb_arrays': 0, + 'nb_cells_by_array': 0, + 'total_nb_bytes': 0... + """ usage = MemoryUsage({ diff --git a/openfisca_tasks/test_code.mk b/openfisca_tasks/test_code.mk index 1615c8dba4..500cdcb073 100644 --- a/openfisca_tasks/test_code.mk +++ b/openfisca_tasks/test_code.mk @@ -22,6 +22,10 @@ test-code: test-core test-country test-extension ## Run openfisca-core tests. test-core: $(shell pytest --quiet --quiet --collect-only 2> /dev/null | cut -f 1 -d ":") @$(call print_help,$@:) + @pytest --quiet --capture=no --xdoctest --xdoctest-verbose=0 \ + openfisca_core/commons \ + openfisca_core/holders \ + openfisca_core/types @PYTEST_ADDOPTS="$${PYTEST_ADDOPTS} ${pytest_args}" \ coverage run -m \ ${openfisca} test $? \ diff --git a/setup.cfg b/setup.cfg index 3b29890570..4e793a0249 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,10 +38,10 @@ skip_covered = true skip_empty = true [tool:pytest] -addopts = --doctest-modules --disable-pytest-warnings --showlocals --xdoctest +addopts = --doctest-modules --disable-pytest-warnings --showlocals doctest_optionflags = ELLIPSIS IGNORE_EXCEPTION_DETAIL NUMBER NORMALIZE_WHITESPACE python_files = **/*.py -testpaths = openfisca_core/commons openfisca_core/holders openfisca_core/types tests +testpaths = tests [mypy] ignore_missing_imports = True From a338ee9e77e4ac04b55d3332a65080e194b43d60 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Tue, 29 Nov 2022 16:57:05 +0100 Subject: [PATCH 4/5] Move MemoryStorage to file --- openfisca_core/holders/holder.py | 47 ++++++++++++-------------- openfisca_core/holders/memory_usage.py | 13 +++++++ 2 files changed, 34 insertions(+), 26 deletions(-) create mode 100644 openfisca_core/holders/memory_usage.py diff --git a/openfisca_core/holders/holder.py b/openfisca_core/holders/holder.py index 67db4056ae..bee8e5c495 100644 --- a/openfisca_core/holders/holder.py +++ b/openfisca_core/holders/holder.py @@ -1,7 +1,6 @@ from __future__ import annotations from typing import Any, Sequence, Union -from typing_extensions import TypedDict import os import warnings @@ -9,11 +8,17 @@ import numpy import psutil -from openfisca_core import commons, periods, tools -from openfisca_core.errors import PeriodMismatchError -from openfisca_core.data_storage import InMemoryStorage, OnDiskStorage -from openfisca_core.indexed_enums import Enum -from openfisca_core.types import Period +from openfisca_core import ( + errors, + commons, + data_storage as storage, + indexed_enums as enums, + periods, + tools, + types, + ) + +from .memory_usage import MemoryUsage class Holder: @@ -25,7 +30,7 @@ def __init__(self, variable, population): self.population = population self.variable = variable self.simulation = population.simulation - self._memory_storage = InMemoryStorage(is_eternal = (self.variable.definition_period == periods.ETERNITY)) + self._memory_storage = storage.InMemoryStorage(is_eternal = (self.variable.definition_period == periods.ETERNITY)) # By default, do not activate on-disk storage, or variable dropping self._disk_storage = None @@ -60,7 +65,7 @@ def create_disk_storage(self, directory = None, preserve = False): storage_dir = os.path.join(directory, self.variable.name) if not os.path.isdir(storage_dir): os.mkdir(storage_dir) - return OnDiskStorage( + return storage.OnDiskStorage( storage_dir, is_eternal = (self.variable.definition_period == periods.ETERNITY), preserve_storage_dir = preserve @@ -133,10 +138,10 @@ def get_memory_usage(self) -> MemoryUsage: """ - usage = MemoryUsage({ - "nb_cells_by_array": self.population.count, - "dtype": self.variable.dtype, - }) + usage = MemoryUsage( + nb_cells_by_array = self.population.count, + dtype = self.variable.dtype, + ) usage.update(self._memory_storage.get_memory_usage()) @@ -159,7 +164,7 @@ def get_known_periods(self): def set_input( self, - period: Period, + period: types.Period, array: Union[numpy.ndarray, Sequence[Any]], ) -> numpy.ndarray: """Set a Variable's array of values of a given Period. @@ -214,7 +219,7 @@ def set_input( self.variable.name, self.variable.definition_period ) - raise PeriodMismatchError( + raise errors.PeriodMismatchError( self.variable.name, period, self.variable.definition_period, @@ -242,7 +247,7 @@ def _to_array(self, value): raise ValueError( 'Unable to set value "{}" for variable "{}", as its length is {} while there are {} {} in the simulation.' .format(value, self.variable.name, len(value), self.population.count, self.population.entity.plural)) - if self.variable.value_type == Enum: + if self.variable.value_type == enums.Enum: value = self.variable.possible_values.encode(value) if value.dtype != self.variable.dtype: try: @@ -267,7 +272,7 @@ def _set(self, period, value): f'If you are the maintainer of "{name}", you can consider adding it a set_input attribute to enable automatic period casting.' ]) - raise PeriodMismatchError( + raise errors.PeriodMismatchError( self.variable.name, period, self.variable.definition_period, @@ -302,13 +307,3 @@ def default_array(self): """ return self.variable.default_array(self.population.count) - - -class MemoryUsage(TypedDict, total = False): - cell_size: int - dtype: numpy.dtype - nb_arrays: int - nb_cells_by_array: int - nb_requests: int - nb_requests_by_array: int - total_nb_bytes: int diff --git a/openfisca_core/holders/memory_usage.py b/openfisca_core/holders/memory_usage.py new file mode 100644 index 0000000000..0cac4e24c1 --- /dev/null +++ b/openfisca_core/holders/memory_usage.py @@ -0,0 +1,13 @@ +from typing_extensions import TypedDict + +import numpy + + +class MemoryUsage(TypedDict, total = False): + cell_size: int + dtype: numpy.dtype + nb_arrays: int + nb_cells_by_array: int + nb_requests: int + nb_requests_by_array: int + total_nb_bytes: int From 19d26df229a132bc53813fe6c0068c80261a34b6 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Tue, 29 Nov 2022 17:05:52 +0100 Subject: [PATCH 5/5] Fix types --- openfisca_core/holders/__init__.py | 1 + openfisca_core/holders/holder.py | 4 ++-- openfisca_core/populations/population.py | 10 +--------- openfisca_core/types/_domain.py | 9 +++++++++ 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/openfisca_core/holders/__init__.py b/openfisca_core/holders/__init__.py index a7d46e38a6..8bd7722ba3 100644 --- a/openfisca_core/holders/__init__.py +++ b/openfisca_core/holders/__init__.py @@ -23,3 +23,4 @@ from .helpers import set_input_dispatch_by_period, set_input_divide_by_period # noqa: F401 from .holder import Holder # noqa: F401 +from .memory_usage import MemoryUsage # noqa: F401 diff --git a/openfisca_core/holders/holder.py b/openfisca_core/holders/holder.py index bee8e5c495..ae7e3fbcec 100644 --- a/openfisca_core/holders/holder.py +++ b/openfisca_core/holders/holder.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Sequence, Union +from typing import Any, Optional, Sequence, Union import os import warnings @@ -166,7 +166,7 @@ def set_input( self, period: types.Period, array: Union[numpy.ndarray, Sequence[Any]], - ) -> numpy.ndarray: + ) -> Optional[numpy.ndarray]: """Set a Variable's array of values of a given Period. Args: diff --git a/openfisca_core/populations/population.py b/openfisca_core/populations/population.py index 68d85eb5c7..cb243aff70 100644 --- a/openfisca_core/populations/population.py +++ b/openfisca_core/populations/population.py @@ -8,7 +8,7 @@ import numpy from openfisca_core import periods, projectors -from openfisca_core.holders import Holder +from openfisca_core.holders import Holder, MemoryUsage from openfisca_core.projectors import Projector from openfisca_core.types import Array, Entity, Period, Role, Simulation @@ -272,11 +272,3 @@ class Calculate(NamedTuple): class MemoryUsageByVariable(TypedDict, total = False): by_variable: Dict[str, MemoryUsage] total_nb_bytes: int - - -class MemoryUsage(TypedDict, total = False): - cell_size: int - dtype: numpy.dtype - nb_arrays: int - nb_cells_by_array: int - total_nb_bytes: int diff --git a/openfisca_core/types/_domain.py b/openfisca_core/types/_domain.py index 3507b42b46..404c133a29 100644 --- a/openfisca_core/types/_domain.py +++ b/openfisca_core/types/_domain.py @@ -41,10 +41,19 @@ def get_memory_usage(self) -> Any: """Abstract method.""" +class Instant(Protocol): + """Instant protocol.""" + + @typing_extensions.runtime_checkable class Period(Protocol): """Period protocol.""" + @property + @abc.abstractmethod + def unit(self) -> Instant: + """Abstract method.""" + class Population(Protocol): """Population protocol."""