Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: improve holders module doctests #1051

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions openfisca_core/holders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@

from .helpers import set_input_dispatch_by_period, set_input_divide_by_period # noqa: F401
from .holder import Holder # noqa: F401
from .memory_usage import MemoryUsage # noqa: F401
155 changes: 102 additions & 53 deletions openfisca_core/holders/holder.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
from __future__ import annotations

from typing_extensions import TypedDict
from typing import Any, Optional, Sequence, Union

import os
import warnings

import numpy
import psutil

from openfisca_core import commons, periods, tools
from openfisca_core.errors import PeriodMismatchError
from openfisca_core.data_storage import InMemoryStorage, OnDiskStorage
from openfisca_core.indexed_enums import Enum
from openfisca_core import (
errors,
commons,
data_storage as storage,
indexed_enums as enums,
periods,
tools,
types,
)

from .memory_usage import MemoryUsage


class Holder:
Expand All @@ -23,7 +30,7 @@ def __init__(self, variable, population):
self.population = population
self.variable = variable
self.simulation = population.simulation
self._memory_storage = InMemoryStorage(is_eternal = (self.variable.definition_period == periods.ETERNITY))
self._memory_storage = storage.InMemoryStorage(is_eternal = (self.variable.definition_period == periods.ETERNITY))

# By default, do not activate on-disk storage, or variable dropping
self._disk_storage = None
Expand Down Expand Up @@ -58,7 +65,7 @@ def create_disk_storage(self, directory = None, preserve = False):
storage_dir = os.path.join(directory, self.variable.name)
if not os.path.isdir(storage_dir):
os.mkdir(storage_dir)
return OnDiskStorage(
return storage.OnDiskStorage(
storage_dir,
is_eternal = (self.variable.definition_period == periods.ETERNITY),
preserve_storage_dir = preserve
Expand Down Expand Up @@ -89,31 +96,52 @@ def get_array(self, period):
if self._disk_storage:
return self._disk_storage.get(period)

def get_memory_usage(self):
"""
Get data about the virtual memory usage of the holder.

:returns: Memory usage data
:rtype: dict

Example:

>>> holder.get_memory_usage()
>>> {
>>> 'nb_arrays': 12, # The holder contains the variable values for 12 different periods
>>> 'nb_cells_by_array': 100, # There are 100 entities (e.g. persons) in our simulation
>>> 'cell_size': 8, # Each value takes 8B of memory
>>> 'dtype': dtype('float64') # Each value is a float 64
>>> 'total_nb_bytes': 10400 # The holder uses 10.4kB of virtual memory
>>> 'nb_requests': 24 # The variable has been computed 24 times
>>> 'nb_requests_by_array': 2 # Each array stored has been on average requested twice
>>> }
def get_memory_usage(self) -> MemoryUsage:
"""Get data about the virtual memory usage of the Holder.

Returns:
Memory usage data.

Examples:
>>> from pprint import pprint

>>> from openfisca_core import (
... entities,
... populations,
... simulations,
... taxbenefitsystems,
... variables,
... )

>>> entity = entities.Entity("", "", "", "")

>>> class MyVariable(variables.Variable):
... definition_period = "year"
... entity = entity
... value_type = int

>>> population = populations.Population(entity)
>>> variable = MyVariable()
>>> holder = Holder(variable, population)

>>> tbs = taxbenefitsystems.TaxBenefitSystem([entity])
>>> entities = {entity.key: population}
>>> simulation = simulations.Simulation(tbs, entities)
>>> holder.simulation = simulation

>>> pprint(holder.get_memory_usage(), indent = 3)
{ 'cell_size': nan,
'dtype': <class 'numpy.int32'>,
'nb_arrays': 0,
'nb_cells_by_array': 0,
'total_nb_bytes': 0...

"""

usage = MemoryUsage({
"nb_cells_by_array": self.population.count,
"dtype": self.variable.dtype,
})
usage = MemoryUsage(
nb_cells_by_array = self.population.count,
dtype = self.variable.dtype,
)

usage.update(self._memory_storage.get_memory_usage())

Expand All @@ -134,21 +162,52 @@ def get_known_periods(self):
return list(self._memory_storage.get_known_periods()) + list((
self._disk_storage.get_known_periods() if self._disk_storage else []))

def set_input(self, period, array):
"""
Set a variable's value (``array``) for a given period (``period``)
def set_input(
self,
period: types.Period,
array: Union[numpy.ndarray, Sequence[Any]],
) -> Optional[numpy.ndarray]:
"""Set a Variable's array of values of a given Period.

Args:
period: The period at which the value is set.
array: The input value for the variable.

Returns:
The set input array.

Note:
If a ``set_input`` property has been set for the variable, this
method may accept inputs for periods not matching the
``definition_period`` of the Variable. To read
more about this, check the `documentation`_.

:param array: the input value for the variable
:param period: the period at which the value is setted
Examples:
>>> from openfisca_core import entities, populations, variables
>>> entity = entities.Entity("", "", "", "")

Example :
>>> class MyVariable(variables.Variable):
... definition_period = "year"
... entity = entity
... value_type = int

>>> holder.set_input([12, 14], '2018-04')
>>> holder.get_array('2018-04')
>>> [12, 14]
>>> variable = MyVariable()

>>> population = populations.Population(entity)
>>> population.count = 2

>>> holder = Holder(variable, population)
>>> holder.set_input("2018", numpy.array([12.5, 14]))
>>> holder.get_array("2018")
array([12, 14], dtype=int32)

>>> holder.set_input("2018", [12.5, 14])
>>> holder.get_array("2018")
array([12, 14], dtype=int32)

.. _documentation:
https://openfisca.org/doc/coding-the-legislation/35_periods.html#set-input-automatically-process-variable-inputs-defined-for-periods-not-matching-the-definition-period

If a ``set_input`` property has been set for the variable, this method may accept inputs for periods not matching the ``definition_period`` of the variable. To read more about this, check the `documentation <https://openfisca.org/doc/coding-the-legislation/35_periods.html#set-input-automatically-process-variable-inputs-defined-for-periods-not-matching-the-definition-period>`_.
"""

period = periods.period(period)
Expand All @@ -160,7 +219,7 @@ def set_input(self, period, array):
self.variable.name,
self.variable.definition_period
)
raise PeriodMismatchError(
raise errors.PeriodMismatchError(
self.variable.name,
period,
self.variable.definition_period,
Expand Down Expand Up @@ -188,7 +247,7 @@ def _to_array(self, value):
raise ValueError(
'Unable to set value "{}" for variable "{}", as its length is {} while there are {} {} in the simulation.'
.format(value, self.variable.name, len(value), self.population.count, self.population.entity.plural))
if self.variable.value_type == Enum:
if self.variable.value_type == enums.Enum:
value = self.variable.possible_values.encode(value)
if value.dtype != self.variable.dtype:
try:
Expand All @@ -213,7 +272,7 @@ def _set(self, period, value):
f'If you are the maintainer of "{name}", you can consider adding it a set_input attribute to enable automatic period casting.'
])

raise PeriodMismatchError(
raise errors.PeriodMismatchError(
self.variable.name,
period,
self.variable.definition_period,
Expand Down Expand Up @@ -248,13 +307,3 @@ def default_array(self):
"""

return self.variable.default_array(self.population.count)


class MemoryUsage(TypedDict, total = False):
cell_size: int
dtype: numpy.dtype
nb_arrays: int
nb_cells_by_array: int
nb_requests: int
nb_requests_by_array: int
total_nb_bytes: int
13 changes: 13 additions & 0 deletions openfisca_core/holders/memory_usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from typing_extensions import TypedDict

import numpy


class MemoryUsage(TypedDict, total = False):
cell_size: int
dtype: numpy.dtype
nb_arrays: int
nb_cells_by_array: int
nb_requests: int
nb_requests_by_array: int
total_nb_bytes: int
10 changes: 1 addition & 9 deletions openfisca_core/populations/population.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy

from openfisca_core import periods, projectors
from openfisca_core.holders import Holder
from openfisca_core.holders import Holder, MemoryUsage
from openfisca_core.projectors import Projector
from openfisca_core.types import Array, Entity, Period, Role, Simulation

Expand Down Expand Up @@ -272,11 +272,3 @@ class Calculate(NamedTuple):
class MemoryUsageByVariable(TypedDict, total = False):
by_variable: Dict[str, MemoryUsage]
total_nb_bytes: int


class MemoryUsage(TypedDict, total = False):
cell_size: int
dtype: numpy.dtype
nb_arrays: int
nb_cells_by_array: int
total_nb_bytes: int
9 changes: 9 additions & 0 deletions openfisca_core/types/_domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,19 @@ def get_memory_usage(self) -> Any:
"""Abstract method."""


class Instant(Protocol):
"""Instant protocol."""


@typing_extensions.runtime_checkable
class Period(Protocol):
"""Period protocol."""

@property
@abc.abstractmethod
def unit(self) -> Instant:
"""Abstract method."""


class Population(Protocol):
"""Population protocol."""
Expand Down
7 changes: 3 additions & 4 deletions openfisca_tasks/install.mk
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ uninstall:
@pip freeze | grep -v "^-e" | sed "s/@.*//" | xargs pip uninstall -y

## Delete builds and compiled python files.
clean: \
$(shell ls -d * | grep "build\|dist") \
$(shell find . -name "*.pyc")
clean:
@$(call print_help,$@:)
@rm -rf $?
@ls -d * | grep "build\|dist" | xargs rm -rf
@find . -name "*.pyc" | xargs rm -rf
4 changes: 4 additions & 0 deletions openfisca_tasks/test_code.mk
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ test-code: test-core test-country test-extension
## Run openfisca-core tests.
test-core: $(shell pytest --quiet --quiet --collect-only 2> /dev/null | cut -f 1 -d ":")
@$(call print_help,$@:)
@pytest --quiet --capture=no --xdoctest --xdoctest-verbose=0 \
openfisca_core/commons \
openfisca_core/holders \
openfisca_core/types
@PYTEST_ADDOPTS="$${PYTEST_ADDOPTS} ${pytest_args}" \
coverage run -m \
${openfisca} test $? \
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ extend-ignore = D
hang-closing = true
ignore = E128,E251,F403,F405,E501,RST301,W503,W504
in-place = true
include-in-doctest = openfisca_core/commons openfisca_core/types
include-in-doctest = openfisca_core/commons openfisca_core/holders openfisca_core/types
rst-directives = attribute, deprecated, seealso, versionadded, versionchanged
rst-roles = any, attr, class, exc, func, meth, obj
strictness = short
Expand All @@ -41,7 +41,7 @@ skip_empty = true
addopts = --doctest-modules --disable-pytest-warnings --showlocals
doctest_optionflags = ELLIPSIS IGNORE_EXCEPTION_DETAIL NUMBER NORMALIZE_WHITESPACE
python_files = **/*.py
testpaths = openfisca_core/commons openfisca_core/types tests
testpaths = tests

[mypy]
ignore_missing_imports = True
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
'openfisca-extension-template >= 1.2.0rc0, < 2.0.0',
'pycodestyle >= 2.8.0, < 2.9.0',
'pylint == 2.10.2',
'xdoctest >= 1.0.0, < 2.0.0',
] + api_requirements

setup(
Expand Down
Loading