From e314ee85f7efe0f500f8e18001f7e7e110304617 Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 28 Nov 2022 11:17:44 +0100 Subject: [PATCH 001/144] Initial 1D implementation --- skfda/representation/sparse.py | 578 +++++++++++++++++++++++++++++++++ 1 file changed, 578 insertions(+) create mode 100644 skfda/representation/sparse.py diff --git a/skfda/representation/sparse.py b/skfda/representation/sparse.py new file mode 100644 index 000000000..cda6be708 --- /dev/null +++ b/skfda/representation/sparse.py @@ -0,0 +1,578 @@ +"""Discretised functional data module. + +This module defines a class for representing discretized sparse data, +in which the observations may be made in different grid points in each +data function, and the overall density of the observations may be low + +""" +from __future__ import annotations + +import copy +import numbers +import warnings +from typing import ( + TYPE_CHECKING, + Any, + Optional, + Sequence, + Type, + TypeVar, + Union, + List, + Tuple, + cast, +) + +import findiff +import numpy as np +import pandas.api.extensions +import scipy.integrate +import scipy.stats.mstats +from matplotlib.figure import Figure + +from .._utils import _check_array_key, _int_to_real, _to_grid_points, constants +from ..typing._base import ( + DomainRange, + DomainRangeLike, + GridPoints, + GridPointsLike, + LabelTupleLike, +) +from ..typing._numpy import ArrayLike, NDArrayBool, NDArrayFloat, NDArrayInt +from ._functional_data import FData +from .grid import FDataGrid +from .evaluator import Evaluator +from .extrapolation import ExtrapolationLike +from .interpolation import SplineInterpolation + +if TYPE_CHECKING: + from .basis import Basis, FDataBasis + +T = TypeVar("T", bound='FDataSparse') + + + +class FDataSparse(FData): # noqa: WPS214 + #TODO Docstring + + def __init__( # noqa: WPS211 + self, + *, + sparse_data_grids: Optional[List[FDataGrid]] = None, + dataset_name: Optional[str] = None, + argument_names: Optional[LabelTupleLike] = None, + coordinate_names: Optional[LabelTupleLike] = None, + sample_names: Optional[LabelTupleLike] = None, + extrapolation: Optional[ExtrapolationLike] = None, + ): + """Construct a FDataSparse object.""" + #Create data structure of function pointers and coordinates + + self.lookup_table = np.empty((0,)) + self.coordinates_table = np.empty((0,)) + self.values_table = np.empty((0,)) + self.num_values = 0 + + #If data is given, populate the structure + if sparse_data_grids is not None: + for data_grid in sparse_data_grids: + self.add_function(data_grid) + + super().__init__( + extrapolation=extrapolation, + dataset_name=dataset_name, + argument_names=argument_names, + coordinate_names=coordinate_names, + sample_names=sample_names, + ) + + def add_function( + self, + data_grid: FDataGrid, + )-> None: + #TODO Implement for higher dimensions and maybe multiple functions in data grid + + #Extract the tuples of grid points from data_grid + grid_points = data_grid.grid_points[0] + + #Extract the tuple of values for each grid point + values = [x[0] for x in data_grid.data_matrix[0]] + + #Reshape coordinate and values array and add new values + self.coordinates_table = np.concatenate((self.coordinates_table, + grid_points), + axis=0) + self.values_table = np.concatenate((self.values_table, + values), + axis=0) + + #Add one entry to lookup_table + self.lookup_table = np.append(self.lookup_table, + self.num_values) + self.num_values += len(grid_points) + + def add_point( + self, + function_index: int, + coordinates: Tuple, + values: Tuple + )-> None: + # Find the interval of indexes where the function is stored + not_last_function = (function_index + 1) < len(self.lookup_table) + function_start = self.lookup_table[function_index] + function_end = self.lookup_table[function_index + + 1] if not_last_function else self.num_values + + #Find where in the interval lies the coordinate + function_coordinates = self.coordinates_table[function_start:function_end] + compare_coordinates = [coordinates < coord + for coord in function_coordinates] + + insertion_index = compare_coordinates.index(True) + + #Concatenate the new point sandwiched between the others + self.coordinates_table = np.concatenate((self.coordinates_table[:insertion_index], + [coordinates], + self.coordinates_table[insertion_index:]), + axis=0) + + self.values = np.concatenate((self.values_table[:insertion_index], + [values], + self.values_table[insertion_index:]), + axis=0) + + #Update the lookup table and number of values + if not_last_function: + self.lookup_table[function_index + 1] += 1 + self.num_values += 1 + + + + def round( # noqa: WPS125 + self, + decimals: int = 0, + out: Optional[FDataGrid] = None, + ) -> FDataGrid: + #TODO Implement when attributes are done. Round the values probably + pass + + @property + def sample_points(self) -> GridPoints: + warnings.warn( + "Parameter sample_points is deprecated. Use the " + "parameter grid_points instead.", + DeprecationWarning, + ) + return self.grid_points + + @property + def dim_domain(self) -> int: + if self.num_values == 0: + return 1 #TODO What to do here + #TODO Check float + #return len(self.coordinates_table[0]) #Length of any coordinate tuple + return 1 + + @property + def dim_codomain(self) -> int: + if self.num_values == 0: + return 1 #TODO What to do here + #TODO Check float + #return len(self.values_table[0]) #Length of any coordinate tuple + return 1 + + @property + def coordinates(self: T) -> _CoordinateIterator[T]: + #TODO Does it even make sense to do this? Maybe it requires an specific _SparseCoordinateIterator over the structure + pass + + @property + def n_samples(self) -> int: + return self.num_values + + @property + def sample_range(self) -> DomainRange: + """ + Return the sample range of the function. + + This contains the minimum and maximum values of the grid points in + each dimension. + + It does not have to be equal to the `domain_range`. + """ + return self._sample_range + + @property + def domain_range(self) -> DomainRange: + """ + Return the :term:`domain range` of the function. + + It does not have to be equal to the `sample_range`. + + """ + return self._domain_range + + @property + def interpolation(self) -> Evaluator: + """Define the type of interpolation applied in `evaluate`.""" + return self._interpolation + + @interpolation.setter + def interpolation(self, new_interpolation: Optional[Evaluator]) -> None: + + if new_interpolation is None: + new_interpolation = SplineInterpolation() + + self._interpolation = new_interpolation + + def _evaluate( + self, + eval_points: NDArrayFloat, + *, + aligned: bool = True, + ) -> NDArrayFloat: + + #TODO Implement when attributes are done + pass + + def derivative( + self: T, + *, + order: int = 1, + method: Optional[Basis] = None, + ) -> T: + #TODO Implement when attributes are done + pass + + def integrate( + self: T, + *, + domain: Optional[DomainRange] = None, + ) -> NDArrayFloat: + #TODO Implement when attributes are done + pass + + def _check_same_dimensions(self: T, other: T) -> None: + #TODO Implement when attributes are done + pass + + def sum( # noqa: WPS125 + self: T, + *, + axis: Optional[int] = None, + out: None = None, + keepdims: bool = False, + skipna: bool = False, + min_count: int = 0, + ) -> T: + #TODO Implement when attributes are done + pass + + def var(self: T) -> T: + #TODO Implement when attributes are done + pass + + def cov(self: T) -> T: + #TODO Implement when attributes are done + pass + + def gmean(self: T) -> T: + #TODO Implement when attributes are done + pass + + def equals(self, other: object) -> bool: + """Comparison of FDataGrid objects.""" + #TODO Implement when attributes are done + pass + + def _eq_elemenwise(self: T, other: T) -> NDArrayBool: + """Elementwise equality of FDataGrid.""" + #TODO Implement when attributes are done + pass + + def _get_op_matrix( + self, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> Union[None, float, NDArrayFloat, NDArrayInt]: + + #TODO Implement when attributes are done + pass + + def __add__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + + #TODO Implement when attributes are done + pass + + def __radd__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + + #TODO Implement when attributes are done + pass + + def __sub__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + + #TODO Implement when attributes are done + pass + + def __rsub__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + + #TODO Implement when attributes are done + pass + + def __mul__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + + #TODO Implement when attributes are done + pass + + def __rmul__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + + #TODO Implement when attributes are done + pass + + def __truediv__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + + #TODO Implement when attributes are done + pass + + def __rtruediv__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + + #TODO Implement when attributes are done + pass + + def __neg__(self: T) -> T: + """Negation of FData object.""" + #TODO Should be easy to implement, just negating the values + pass + + def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: + #TODO This should be easy to implement, using the add_function methods + pass + + def scatter(self, *args: Any, **kwargs: Any) -> Figure: + #TODO Maybe transform in full blown sparse FDataGrid and then scatter + pass + + def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: + #TODO Use BasisSmoother to return basis? + pass + + def to_grid( # noqa: D102 + self: T, + grid_points: Optional[GridPointsLike] = None, + *, + sample_points: Optional[GridPointsLike] = None, + ) -> T: + + #TODO Return list of data grids + pass + + def copy( # noqa: WPS211 + self: T, + *, + deep: bool = False, # For Pandas compatibility + data_matrix: Optional[ArrayLike] = None, + grid_points: Optional[GridPointsLike] = None, + sample_points: Optional[GridPointsLike] = None, + domain_range: Optional[DomainRangeLike] = None, + dataset_name: Optional[str] = None, + argument_names: Optional[LabelTupleLike] = None, + coordinate_names: Optional[LabelTupleLike] = None, + sample_names: Optional[LabelTupleLike] = None, + extrapolation: Optional[ExtrapolationLike] = None, + interpolation: Optional[Evaluator] = None, + ) -> T: + + #TODO Define copy after all attributes are locked + pass + + def restrict( + self: T, + domain_range: DomainRangeLike, + ) -> T: + + #TODO Is this possible with this structure + pass + + def shift( + self, + shifts: Union[ArrayLike, float], + *, + restrict_domain: bool = False, + extrapolation: Optional[ExtrapolationLike] = None, + grid_points: Optional[GridPointsLike] = None, + ) -> FDataGrid: + #TODO Is this possible with this structure? + pass + + def compose( + self: T, + fd: T, + *, + eval_points: Optional[GridPointsLike] = None, + ) -> T: + + #TODO Is this possible with this structure? + pass + + def __str__(self) -> str: + """Return str(self).""" + #TODO Define str method after all attributes are locked + pass + + def __repr__(self) -> str: + """Return repr(self).""" + return ( + f"FDataSparse(" # noqa: WPS221 + f"\nlookup_table={self.lookup_table!r}," + f"\ncoordinates_table={self.coordinates_table!r}," + f"\nvalues_table={self.values_table!r}," + #f"\ndomain_range={self.domain_range!r}," + f"\ndataset_name={self.dataset_name!r}," + f"\nargument_names={self.argument_names!r}," + f"\ncoordinate_names={self.coordinate_names!r}," + f"\nextrapolation={self.extrapolation!r}," + #f"\ninterpolation={self.interpolation!r})" + ).replace( + '\n', + '\n ', + ) + + def __getitem__( + self: T, + key: Union[int, slice, NDArrayInt, NDArrayBool], + ) -> T: + """Return self[key].""" + #TODO Maybe return from the view? Or transform using view functions directly from data structure? + + ##################################################################### + # Numpy methods + ##################################################################### + + def __array_ufunc__( + self, + ufunc: Any, + method: str, + *inputs: Any, + **kwargs: Any, + ) -> Any: + + for i in inputs: + if ( + isinstance(i, FDataGrid) + and not np.array_equal(i.grid_points, self.grid_points) + ): + return NotImplemented + + new_inputs = [ + i.data_matrix if isinstance(i, FDataGrid) + else self._get_op_matrix(i) for i in inputs + ] + + outputs = kwargs.pop('out', None) + if outputs: + new_outputs = [ + o.data_matrix if isinstance(o, FDataGrid) + else o for o in outputs + ] + kwargs['out'] = tuple(new_outputs) + else: + new_outputs = (None,) * ufunc.nout + + results = getattr(ufunc, method)(*new_inputs, **kwargs) + if results is NotImplemented: + return NotImplemented + + if ufunc.nout == 1: + results = (results,) + + results = tuple( + (result if output is None else output) + for result, output in zip(results, new_outputs) + ) + + results = [self.copy(data_matrix=r) for r in results] + + return results[0] if len(results) == 1 else results + + ##################################################################### + # Pandas ExtensionArray methods + ##################################################################### + + def _take_allow_fill( + self: T, + indices: NDArrayInt, + fill_value: T, + ) -> T: + result = self.copy() + result.data_matrix = np.full( + (len(indices),) + self.data_matrix.shape[1:], + np.nan, + ) + + positive_mask = indices >= 0 + result.data_matrix[positive_mask] = self.data_matrix[ + indices[positive_mask] + ] + + if fill_value is not self.dtype.na_value: + result.data_matrix[~positive_mask] = fill_value.data_matrix[0] + + return result + + @property + def dtype(self) -> FDataGridDType: + """The dtype for this extension array, FDataGridDType""" + return FDataGridDType( + grid_points=self.grid_points, + domain_range=self.domain_range, + dim_codomain=self.dim_codomain, + ) + + @property + def nbytes(self) -> int: + """ + The number of bytes needed to store this object in memory. + """ + return self.data_matrix.nbytes + sum( + p.nbytes for p in self.grid_points + ) + + def isna(self) -> NDArrayBool: + """ + Return a 1-D array indicating if each value is missing. + + Returns: + na_values: Positions of NA. + """ + return np.all( # type: ignore[no-any-return] + np.isnan(self.data_matrix), + axis=tuple(range(1, self.data_matrix.ndim)), + ) + + +#TODO Do i need a FDataSparseDType? + +#TODO Do I need a _CoordinateIterator? From f5042230d496ee91a483ba0d8480f63624c9892d Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 16 Jan 2023 05:04:43 +0100 Subject: [PATCH 002/144] Base irregular structure and representation reading from dataframe --- .../visualization/representation.py | 223 ++++++++++++++++++ .../{sparse.py => irregular.py} | 200 +++++++++------- 2 files changed, 333 insertions(+), 90 deletions(-) rename skfda/representation/{sparse.py => irregular.py} (72%) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index a9e1addee..86fb873f2 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -22,6 +22,7 @@ from ..._utils import _to_grid_points, constants from ...misc.validation import validate_domain_range from ...representation._functional_data import FData +from ...representation.irregular import FDataIrregular from ...typing._base import DomainRangeLike, GridPointsLike from ._baseplot import BasePlot from ._utils import ColorLike, _set_labels @@ -547,6 +548,228 @@ def _plot( _set_labels(self.fdata, fig, axes, self.patches) +class PlotIrregular(BasePlot): + """ + Class used to plot a FDataIrregular object. + + Args: + fdata: FDataIrregular object set that we want to plot. + chart: figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig: figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes: axis over where the graphs + are plotted. If None, see param fig. + n_rows: designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols: designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + domain_range: Range where the + function will be plotted. In objects with unidimensional domain + the domain range should be a tuple with the bounds of the + interval; in the case of surfaces a list with 2 tuples with + the ranges for each dimension. Default uses the domain range + of the functional object. + group: contains integers from [0 to number of + labels) indicating to which group each sample belongs to. Then, + the samples with the same label are plotted in the same color. + If None, the default value, each sample is plotted in the color + assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. + group_colors: colors in which groups are + represented, there must be one for each group. If None, each + group is shown with distict colors in the "Greys" colormap. + group_names: name of each of the groups which appear + in a legend, there must be one for each one. Defaults to None + and the legend is not shown. Implies `legend=True`. + legend: if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. + kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. + """ + + def __init__( + self, + fdata: FDataIrregular, + chart: Figure | Axes | None = None, + *, + fig: Figure | None = None, + axes: Axes | None = None, + n_rows: int | None = None, + n_cols: int | None = None, + domain_range: Tuple[int, int] | DomainRangeLike | None = None, + group: Sequence[K] | None = None, + group_colors: Indexable[K, ColorLike] | None = None, + group_names: Indexable[K, str] | None = None, + legend: bool = False, + **kwargs: Any, + ) -> None: + super().__init__( + chart, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) + self.fdata = fdata + + # There may be different points for each function + self.grid_points = [] + self.evaluated_points = [] + for index_start, index_end in zip(list(self.fdata.function_indices), + list(self.fdata.function_indices[1:])): + self.grid_points.append( + self.fdata.function_arguments[index_start:index_end]) + self.evaluated_points.append( + self.fdata.function_values[index_start:index_end]) + # Dont forget to add the last one + self.grid_points.append(self.fdata.function_arguments[index_end:]) + self.evaluated_points.append(self.fdata.function_values[index_end:]) + + self.domain_range = domain_range + self.group = group + self.group_colors = group_colors + self.group_names = group_names + self.legend = legend + + if self.domain_range is None: + self.domain_range = self.fdata.domain_range + else: + self.domain_range = validate_domain_range(self.domain_range) + + sample_colors, patches = _get_color_info( + self.fdata, + self.group, + self.group_names, + self.group_colors, + self.legend, + kwargs, + ) + self.sample_colors = sample_colors + self.patches = patches + + @property + def dim(self) -> int: + return self.fdata.dim_domain + 1 + + @property + def n_subplots(self) -> int: + return self.fdata.dim_codomain + + @property + def n_samples(self) -> int: + return self.fdata.n_samples + + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: + # Implement in subclasses + pass + + +class LinearPlotIrregular(PlotIrregular): + """ + Class used to plot the individual curves of a FDataIrregular object + using linear interpolation between the points. + """ + + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: + """ + Plot the individual curves of a FDataIrregular object. + + Returns: + fig: figure object in which the graphs are plotted. + """ + self.artists = np.zeros( + (self.n_samples, self.fdata.dim_codomain), + dtype=Artist, + ) + + color_dict: Dict[str, ColorLike | None] = {} + + if self.fdata.dim_domain == 1: + for j in range(self.fdata.n_samples): + + set_color_dict(self.sample_colors, j, color_dict) + + self.artists[j, 0] = axes[0].plot( + np.matrix.flatten(self.grid_points[j]), + np.matrix.flatten(self.evaluated_points[j]), + **color_dict, + picker=True, + pickradius=2, + ) + + else: + + # TODO Implementar para multidimension. Como hacer mesh? + import warnings + warnings.warn("Not implemented") + + _set_labels(self.fdata, fig, axes, self.patches) + + +class ScatterPlotIrregular(PlotIrregular): + """ + Class used to scatter a FDataIrregular object. + + """ + + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: + """ + Scatter FDataIrregular object. + + Returns: + fig: figure object in which the graphs are plotted. + """ + self.artists = np.zeros( + (self.n_samples, self.fdata.dim_codomain), + dtype=Artist, + ) + + color_dict: Dict[str, ColorLike | None] = {} + + if self.fdata.dim_domain == 1: + + for j in range(self.fdata.n_samples): + + set_color_dict(self.sample_colors, j, color_dict) + + self.artists[j, 0] = axes[0].scatter( + self.grid_points[j], + self.evaluated_points[j], + **color_dict, + picker=True, + pickradius=2, + ) + + else: + + # TODO Implement for multidimensional + import warnings + warnings.warn("Not implemented") + + _set_labels(self.fdata, fig, axes, self.patches) + + def set_color_dict( sample_colors: Any, ind: int, diff --git a/skfda/representation/sparse.py b/skfda/representation/irregular.py similarity index 72% rename from skfda/representation/sparse.py rename to skfda/representation/irregular.py index cda6be708..6304a1a83 100644 --- a/skfda/representation/sparse.py +++ b/skfda/representation/irregular.py @@ -1,6 +1,6 @@ """Discretised functional data module. -This module defines a class for representing discretized sparse data, +This module defines a class for representing discretized irregular data, in which the observations may be made in different grid points in each data function, and the overall density of the observations may be low @@ -48,35 +48,68 @@ if TYPE_CHECKING: from .basis import Basis, FDataBasis -T = TypeVar("T", bound='FDataSparse') +T = TypeVar("T", bound='FDataIrregular') - -class FDataSparse(FData): # noqa: WPS214 - #TODO Docstring +class FDataIrregular(FData): # noqa: WPS214 + # TODO Docstring + # TODO 1. Scatter + # TODO 2. Fix the input to fix domain, codomain and all dimensions (hard lock array size) + # TODO 3. Plot and PlotAndScatter def __init__( # noqa: WPS211 self, + input_dataframe: pandas.DataFrame, + id_name: str, + argument_names: LabelTupleLike, + coordinate_names: LabelTupleLike, *, - sparse_data_grids: Optional[List[FDataGrid]] = None, + dim_domain: Optional[int] = 1, + dim_codomain: Optional[int] = 1, + domain_range: Optional[DomainRangeLike] = None, dataset_name: Optional[str] = None, - argument_names: Optional[LabelTupleLike] = None, - coordinate_names: Optional[LabelTupleLike] = None, sample_names: Optional[LabelTupleLike] = None, extrapolation: Optional[ExtrapolationLike] = None, ): - """Construct a FDataSparse object.""" - #Create data structure of function pointers and coordinates + """Construct a FDataIrregular object.""" + # Set dimensions + # TODO Check dimensions against num of arguments and coordinates? + self._dim_domain = dim_domain + self._dim_codomain = dim_codomain + + # Accept stringsd but ensure the column names are tuples + _is_str = isinstance(argument_names, str) + argument_names = [argument_names] if _is_str else argument_names + + _is_str = isinstance(coordinate_names, str) + coordinate_names = [coordinate_names] if _is_str else coordinate_names - self.lookup_table = np.empty((0,)) - self.coordinates_table = np.empty((0,)) - self.values_table = np.empty((0,)) - self.num_values = 0 + #TODO Add long/wide differentiation, matrix loading - #If data is given, populate the structure - if sparse_data_grids is not None: - for data_grid in sparse_data_grids: - self.add_function(data_grid) + self._load_long_dataframe( + input_dataframe, + id_name, + argument_names, + coordinate_names, + ) + + #TODO Fix for higher dimensions + i=0 + self._sample_range = list() + for f in self.function_indices[1:]: + self._sample_range.append((self.function_arguments[i][0], + self.function_arguments[f-1][0])) + i = f + self._sample_range.append((self.function_arguments[i][0], + self.function_arguments[-1][0])) + + from ..misc.validation import validate_domain_range + if domain_range is None: + domain_range = self.sample_range + # Default value for domain_range is a list of tuples with + # the first and last element of each list of the grid_points. + + self._domain_range = validate_domain_range(domain_range) super().__init__( extrapolation=extrapolation, @@ -86,66 +119,49 @@ def __init__( # noqa: WPS211 sample_names=sample_names, ) - def add_function( + #TODO Fix for higher dimensions + #TODO Check typing + def _load_long_dataframe( self, - data_grid: FDataGrid, - )-> None: - #TODO Implement for higher dimensions and maybe multiple functions in data grid + dataframe: pandas.DataFrame, + id_column: str, + argument_columns: LabelTupleLike, + coordinate_columns: LabelTupleLike, + ) -> None: - #Extract the tuples of grid points from data_grid - grid_points = data_grid.grid_points[0] + # Obtain num functions and num observations from data + num_observations = dataframe.shape[0] + num_functions = dataframe[id_column].nunique() - #Extract the tuple of values for each grid point - values = [x[0] for x in data_grid.data_matrix[0]] + # Create data structure of function pointers and coordinates + self.num_functions = num_functions + self.num_observations = num_observations - #Reshape coordinate and values array and add new values - self.coordinates_table = np.concatenate((self.coordinates_table, - grid_points), - axis=0) - self.values_table = np.concatenate((self.values_table, - values), - axis=0) + self.function_indices = np.zeros((self.num_functions, ), + dtype=np.uint32) + self.function_arguments = np.zeros((self.num_observations, + self.dim_domain)) + self.function_values = np.zeros((self.num_observations, + self.dim_codomain)) - #Add one entry to lookup_table - self.lookup_table = np.append(self.lookup_table, - self.num_values) - self.num_values += len(grid_points) - - def add_point( - self, - function_index: int, - coordinates: Tuple, - values: Tuple - )-> None: - # Find the interval of indexes where the function is stored - not_last_function = (function_index + 1) < len(self.lookup_table) - function_start = self.lookup_table[function_index] - function_end = self.lookup_table[function_index - + 1] if not_last_function else self.num_values + head = 0 + index = 0 + for _, f_values in dataframe.groupby(id_column): + self.function_indices[index] = head + num_values = f_values.shape[0] - #Find where in the interval lies the coordinate - function_coordinates = self.coordinates_table[function_start:function_end] - compare_coordinates = [coordinates < coord - for coord in function_coordinates] - - insertion_index = compare_coordinates.index(True) + # Insert in order + f_values = f_values.sort_values(argument_columns) - #Concatenate the new point sandwiched between the others - self.coordinates_table = np.concatenate((self.coordinates_table[:insertion_index], - [coordinates], - self.coordinates_table[insertion_index:]), - axis=0) - - self.values = np.concatenate((self.values_table[:insertion_index], - [values], - self.values_table[insertion_index:]), - axis=0) - - #Update the lookup table and number of values - if not_last_function: - self.lookup_table[function_index + 1] += 1 - self.num_values += 1 - + new_args = f_values[argument_columns].values + self.function_arguments[head:head+num_values, :] = new_args + + new_coords = f_values[coordinate_columns].values + self.function_values[head:head+num_values, :] = new_coords + + # Update head and index + head += num_values + index += 1 def round( # noqa: WPS125 @@ -159,7 +175,7 @@ def round( # noqa: WPS125 @property def sample_points(self) -> GridPoints: warnings.warn( - "Parameter sample_points is deprecated. Use the " + "Parameter sample_points is deprecated. Use the " \ "parameter grid_points instead.", DeprecationWarning, ) @@ -167,28 +183,21 @@ def sample_points(self) -> GridPoints: @property def dim_domain(self) -> int: - if self.num_values == 0: - return 1 #TODO What to do here - #TODO Check float - #return len(self.coordinates_table[0]) #Length of any coordinate tuple - return 1 + return self._dim_domain @property def dim_codomain(self) -> int: - if self.num_values == 0: - return 1 #TODO What to do here - #TODO Check float - #return len(self.values_table[0]) #Length of any coordinate tuple - return 1 + return self._dim_codomain + #TODO Remove CoordinateIterator in an appropiate way @property def coordinates(self: T) -> _CoordinateIterator[T]: - #TODO Does it even make sense to do this? Maybe it requires an specific _SparseCoordinateIterator over the structure + #TODO Does it even make sense to do this? Maybe it requires an specific _IrregularCoordinateIterator over the structure pass @property def n_samples(self) -> int: - return self.num_values + return self.num_functions @property def sample_range(self) -> DomainRange: @@ -370,9 +379,20 @@ def __neg__(self: T) -> T: def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: #TODO This should be easy to implement, using the add_function methods pass + + def plot(self, *args: Any, **kwargs: Any) -> Figure: + from ..exploratory.visualization.representation import LinearPlotIrregular + + return LinearPlotIrregular(self, *args, **kwargs).plot() def scatter(self, *args: Any, **kwargs: Any) -> Figure: - #TODO Maybe transform in full blown sparse FDataGrid and then scatter + from ..exploratory.visualization.representation import ScatterPlotIrregular + + return ScatterPlotIrregular(self, *args, **kwargs).plot() + pass + + def plot_and_scatter(self, *args: Any, **kwargs: Any) -> Figure: + #TODO Concatenate all of the points and plot with scatter pass def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: @@ -445,10 +465,10 @@ def __str__(self) -> str: def __repr__(self) -> str: """Return repr(self).""" return ( - f"FDataSparse(" # noqa: WPS221 - f"\nlookup_table={self.lookup_table!r}," - f"\ncoordinates_table={self.coordinates_table!r}," - f"\nvalues_table={self.values_table!r}," + f"FDataIrregular(" # noqa: WPS221 + f"\nfunction_indices={self.function_indices!r}," + f"\nfunction_arguments={self.function_arguments!r}," + f"\nfunction_values={self.function_values!r}," #f"\ndomain_range={self.domain_range!r}," f"\ndataset_name={self.dataset_name!r}," f"\nargument_names={self.argument_names!r}," @@ -573,6 +593,6 @@ def isna(self) -> NDArrayBool: ) -#TODO Do i need a FDataSparseDType? +#TODO Do i need a FDataIrregularDType? #TODO Do I need a _CoordinateIterator? From de32dfeaf44b5ab0228c4e51ae716ce3f5f7ba3c Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 16 Jan 2023 05:30:46 +0100 Subject: [PATCH 003/144] Add scatterplot functionality --- skfda/representation/irregular.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 6304a1a83..353608481 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -53,9 +53,6 @@ class FDataIrregular(FData): # noqa: WPS214 # TODO Docstring - # TODO 1. Scatter - # TODO 2. Fix the input to fix domain, codomain and all dimensions (hard lock array size) - # TODO 3. Plot and PlotAndScatter def __init__( # noqa: WPS211 self, @@ -389,11 +386,10 @@ def scatter(self, *args: Any, **kwargs: Any) -> Figure: from ..exploratory.visualization.representation import ScatterPlotIrregular return ScatterPlotIrregular(self, *args, **kwargs).plot() - pass def plot_and_scatter(self, *args: Any, **kwargs: Any) -> Figure: - #TODO Concatenate all of the points and plot with scatter - pass + fig = self.scatter(*args, **kwargs) + self.plot(fig=fig, *args, **kwargs) def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: #TODO Use BasisSmoother to return basis? @@ -469,7 +465,7 @@ def __repr__(self) -> str: f"\nfunction_indices={self.function_indices!r}," f"\nfunction_arguments={self.function_arguments!r}," f"\nfunction_values={self.function_values!r}," - #f"\ndomain_range={self.domain_range!r}," + f"\ndomain_range={self.domain_range!r}," f"\ndataset_name={self.dataset_name!r}," f"\nargument_names={self.argument_names!r}," f"\ncoordinate_names={self.coordinate_names!r}," From a7ab76571201329cd15b58069ad39f87b019bc52 Mon Sep 17 00:00:00 2001 From: opintosant Date: Sun, 29 Jan 2023 21:35:46 +0100 Subject: [PATCH 004/144] Code cleanup and implement mean --- skfda/representation/irregular.py | 111 +++++++++++++++++++++++------- 1 file changed, 86 insertions(+), 25 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 353608481..3b1ba20e7 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -164,10 +164,22 @@ def _load_long_dataframe( def round( # noqa: WPS125 self, decimals: int = 0, - out: Optional[FDataGrid] = None, - ) -> FDataGrid: - #TODO Implement when attributes are done. Round the values probably - pass + out: Optional[FDataIrregular] = None, + ) -> FDataIrregular: + rounded_arguments = self.function_arguments.round(decimals=decimals) + rounded_values = self.function_values.round(decimals=decimals) + + if out is not None and isinstance(out, FDataIrregular): + out.function_indices = self.function_indices + out.function_arguments = rounded_arguments + out.function_values = rounded_values + + return out + + return self.copy( + function_arguments=rounded_arguments, + function_values=rounded_values + ) @property def sample_points(self) -> GridPoints: @@ -238,7 +250,7 @@ def _evaluate( aligned: bool = True, ) -> NDArrayFloat: - #TODO Implement when attributes are done + #TODO pass def derivative( @@ -247,7 +259,7 @@ def derivative( order: int = 1, method: Optional[Basis] = None, ) -> T: - #TODO Implement when attributes are done + #TODO pass def integrate( @@ -255,12 +267,14 @@ def integrate( *, domain: Optional[DomainRange] = None, ) -> NDArrayFloat: - #TODO Implement when attributes are done + #TODO pass def _check_same_dimensions(self: T, other: T) -> None: - #TODO Implement when attributes are done - pass + if self.dim_codomain != other.dim_codomain: + raise ValueError("Dimension mismatch in coordinates") + if self.dim_domain != other.dim_domain: + raise ValueError("Dimension mismatch in arguments") def sum( # noqa: WPS125 self: T, @@ -274,12 +288,62 @@ def sum( # noqa: WPS125 #TODO Implement when attributes are done pass + def mean(self: T) -> T: + """Compute the mean pointwise for a sparse dataset. + + Note that, for irregular data, points may be represented in few + or even an only curve. + + Returns: + A FDataIrregular object with just one sample representing the + mean of all curves the across each value. + + """ + + # Find all distinct arguments (ordered) and corresponding values + distinct_args = np.unique(np.matrix.flatten(self.function_arguments)) + values = [np.matrix.flatten(self.function_values[np.where(self.function_arguments == arg)[0]]) + for arg in distinct_args] + + # Obtain mean of all available values for each argument point + vars = np.array([np.mean(vals) for vals in values]) + + # Create a FDataGrid object with only 1 curve, the mean curve + return FDataGrid( + grid_points=distinct_args, + data_matrix=np.array([vars]), + sample_names=("mean",), + ) + def var(self: T) -> T: - #TODO Implement when attributes are done - pass + """Compute the variance pointwise for a sparse dataset. + + Note that, for irregular data, points may be represented in few + or even an only curve. + + Returns: + A FDataIrregular object with just one sample representing the + variance of all curves the across each value. + + """ + + # Find all distinct arguments (ordered) and corresponding values + distinct_args = np.unique(np.matrix.flatten(self.function_arguments)) + values = [np.matrix.flatten(self.function_values[np.where(self.function_arguments == arg)[0]]) + for arg in distinct_args] + + # Obtain variance of all available values for each argument point + vars = np.array([np.var(vals) for vals in values]) + + # Create a FDataGrid object with only 1 curve, the variance curve + return FDataGrid( + grid_points=distinct_args, + data_matrix=np.array([vars]), + sample_names=("variance",), + ) def cov(self: T) -> T: - #TODO Implement when attributes are done + #TODO Implementation to be decided pass def gmean(self: T) -> T: @@ -287,12 +351,12 @@ def gmean(self: T) -> T: pass def equals(self, other: object) -> bool: - """Comparison of FDataGrid objects.""" + """Comparison of FDataSparse objects.""" #TODO Implement when attributes are done pass def _eq_elemenwise(self: T, other: T) -> NDArrayBool: - """Elementwise equality of FDataGrid.""" + """Elementwise equality of FDataSparse.""" #TODO Implement when attributes are done pass @@ -374,7 +438,7 @@ def __neg__(self: T) -> T: pass def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: - #TODO This should be easy to implement, using the add_function methods + #TODO Implement allocing memory only once pass def plot(self, *args: Any, **kwargs: Any) -> Figure: @@ -402,26 +466,23 @@ def to_grid( # noqa: D102 sample_points: Optional[GridPointsLike] = None, ) -> T: - #TODO Return list of data grids + #TODO Return list of data grids? Data grid with holes? pass def copy( # noqa: WPS211 self: T, - *, deep: bool = False, # For Pandas compatibility - data_matrix: Optional[ArrayLike] = None, - grid_points: Optional[GridPointsLike] = None, - sample_points: Optional[GridPointsLike] = None, - domain_range: Optional[DomainRangeLike] = None, - dataset_name: Optional[str] = None, argument_names: Optional[LabelTupleLike] = None, coordinate_names: Optional[LabelTupleLike] = None, + dim_domain: Optional[int] = 1, + dim_codomain: Optional[int] = 1, + domain_range: Optional[DomainRangeLike] = None, + dataset_name: Optional[str] = None, sample_names: Optional[LabelTupleLike] = None, extrapolation: Optional[ExtrapolationLike] = None, - interpolation: Optional[Evaluator] = None, ) -> T: - #TODO Define copy after all attributes are locked + #TODO Should allow to copy directly from FDataIrregular, not from dataframe pass def restrict( @@ -465,7 +526,7 @@ def __repr__(self) -> str: f"\nfunction_indices={self.function_indices!r}," f"\nfunction_arguments={self.function_arguments!r}," f"\nfunction_values={self.function_values!r}," - f"\ndomain_range={self.domain_range!r}," + #f"\ndomain_range={self.domain_range!r}," f"\ndataset_name={self.dataset_name!r}," f"\nargument_names={self.argument_names!r}," f"\ncoordinate_names={self.coordinate_names!r}," From 3904443dd54aa236fa81a19ff9ccca92e1aea784 Mon Sep 17 00:00:00 2001 From: opintosant Date: Sun, 29 Jan 2023 23:59:51 +0100 Subject: [PATCH 005/144] Update default constructor FDataIrregular to take arrays as input. Loading from dataframe can now be done as a class method --- skfda/representation/irregular.py | 100 +++++++++++++++++------------- 1 file changed, 57 insertions(+), 43 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 3b1ba20e7..9658a3622 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -23,10 +23,8 @@ cast, ) -import findiff import numpy as np import pandas.api.extensions -import scipy.integrate import scipy.stats.mstats from matplotlib.figure import Figure @@ -53,13 +51,12 @@ class FDataIrregular(FData): # noqa: WPS214 # TODO Docstring - - def __init__( # noqa: WPS211 + + def __init__( self, - input_dataframe: pandas.DataFrame, - id_name: str, - argument_names: LabelTupleLike, - coordinate_names: LabelTupleLike, + function_indices: ArrayLike, + function_arguments: ArrayLike, + function_values: ArrayLike, *, dim_domain: Optional[int] = 1, dim_codomain: Optional[int] = 1, @@ -67,28 +64,25 @@ def __init__( # noqa: WPS211 dataset_name: Optional[str] = None, sample_names: Optional[LabelTupleLike] = None, extrapolation: Optional[ExtrapolationLike] = None, - ): + argument_names: Optional[LabelTupleLike] = None, + coordinate_names: Optional[LabelTupleLike] = None + ): """Construct a FDataIrregular object.""" + # Set dimensions # TODO Check dimensions against num of arguments and coordinates? self._dim_domain = dim_domain self._dim_codomain = dim_codomain - # Accept stringsd but ensure the column names are tuples - _is_str = isinstance(argument_names, str) - argument_names = [argument_names] if _is_str else argument_names + # Set structure to given data + self.num_functions = function_indices.shape[0] - _is_str = isinstance(coordinate_names, str) - coordinate_names = [coordinate_names] if _is_str else coordinate_names + assert function_arguments.shape[0] == function_values.shape[0] + self.num_observations = function_arguments.shape[0] - #TODO Add long/wide differentiation, matrix loading - - self._load_long_dataframe( - input_dataframe, - id_name, - argument_names, - coordinate_names, - ) + self.set_function_indices(function_indices) + self.set_function_arguments(function_arguments) + self.set_function_values(function_values) #TODO Fix for higher dimensions i=0 @@ -115,53 +109,73 @@ def __init__( # noqa: WPS211 coordinate_names=coordinate_names, sample_names=sample_names, ) - - #TODO Fix for higher dimensions - #TODO Check typing - def _load_long_dataframe( - self, + + @classmethod + def from_dataframe( + cls, + *, dataframe: pandas.DataFrame, id_column: str, argument_columns: LabelTupleLike, coordinate_columns: LabelTupleLike, - ) -> None: + **kwargs + ) -> FDataIrregular: + + # Accept strings but ensure the column names are tuples + _is_str = isinstance(argument_columns, str) + argument_columns = [argument_columns] if _is_str else argument_columns + + _is_str = isinstance(coordinate_columns, str) + coordinate_columns = [coordinate_columns] if _is_str else coordinate_columns # Obtain num functions and num observations from data num_observations = dataframe.shape[0] num_functions = dataframe[id_column].nunique() # Create data structure of function pointers and coordinates - self.num_functions = num_functions - self.num_observations = num_observations - - self.function_indices = np.zeros((self.num_functions, ), - dtype=np.uint32) - self.function_arguments = np.zeros((self.num_observations, - self.dim_domain)) - self.function_values = np.zeros((self.num_observations, - self.dim_codomain)) + function_indices = np.zeros((num_functions, ), + dtype=np.uint32) + function_arguments = np.zeros((num_observations, + len(argument_columns))) + function_values = np.zeros((num_observations, + len(coordinate_columns))) head = 0 index = 0 for _, f_values in dataframe.groupby(id_column): - self.function_indices[index] = head + function_indices[index] = head num_values = f_values.shape[0] # Insert in order f_values = f_values.sort_values(argument_columns) new_args = f_values[argument_columns].values - self.function_arguments[head:head+num_values, :] = new_args + function_arguments[head:head+num_values, :] = new_args new_coords = f_values[coordinate_columns].values - self.function_values[head:head+num_values, :] = new_coords + function_values[head:head+num_values, :] = new_coords # Update head and index head += num_values index += 1 - - - def round( # noqa: WPS125 + + return cls( + function_indices, + function_arguments, + function_values, + **kwargs + ) + + def set_function_indices(self, function_indices): + self.function_indices = function_indices.copy() + + def set_function_arguments(self, function_arguments): + self.function_arguments = function_arguments.copy() + + def set_function_values(self, function_values): + self.function_values = function_values.copy() + + def round( self, decimals: int = 0, out: Optional[FDataIrregular] = None, From 583a2be31fcd99ad4ae349675f6ee6181e3568ad Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 30 Jan 2023 00:37:29 +0100 Subject: [PATCH 006/144] Add functionality to construct an FDataIrregular object from a FDataGrid with NaNs in the unasigned values --- skfda/representation/irregular.py | 52 +++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 9658a3622..5e92652c2 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -113,11 +113,10 @@ def __init__( @classmethod def from_dataframe( cls, - *, dataframe: pandas.DataFrame, id_column: str, argument_columns: LabelTupleLike, - coordinate_columns: LabelTupleLike, + coordinate_columns: LabelTupleLike, **kwargs ) -> FDataIrregular: @@ -165,7 +164,54 @@ def from_dataframe( function_values, **kwargs ) - + + @classmethod + def from_FDataGrid( + cls, + f_data: FDataGrid, + **kwargs + ) -> FDataIrregular: + + # Obtain num functions and num observations from data + num_observations = np.sum(~np.isnan(f_data.data_matrix)) + num_functions = f_data.data_matrix.shape[0] + + # Create data structure of function pointers and coordinates + function_indices = np.zeros((num_functions, ), + dtype=np.uint32) + function_arguments = np.zeros((num_observations, + f_data.dim_domain)) + function_values = np.zeros((num_observations, + f_data.dim_codomain)) + + head = 0 + for i in range(num_functions): + function_indices[i] = head + num_values = 0 + + for j in range(f_data.data_matrix.shape[1]): + if np.isnan(f_data.data_matrix[i][j]): + continue + + arg = [f_data.grid_points[dim][j] for dim + in range(f_data.dim_domain)] + function_arguments[head+num_values, :] = arg + + value = [f_data.data_matrix[i,j,dim] for dim + in range(f_data.dim_codomain)] + function_values[head+num_values, :] = value + + num_values += 1 + + head += num_values + + return cls( + function_indices, + function_arguments, + function_values, + **kwargs + ) + def set_function_indices(self, function_indices): self.function_indices = function_indices.copy() From ab15784bbfdff48f0f6f113afa9905aa073a2d49 Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 30 Jan 2023 01:01:43 +0100 Subject: [PATCH 007/144] Allowing selecting the keyword argument 'marker' for plot andscatter. Function plot_and_scatter removed as it can be replicated by using the marker argument --- skfda/exploratory/visualization/representation.py | 6 +++++- skfda/representation/irregular.py | 4 ---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 86fb873f2..cdbe4fe78 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -8,7 +8,7 @@ """ from __future__ import annotations -from typing import Any, Dict, Sequence, Sized, Tuple, TypeVar +from typing import Any, Dict, Sequence, Sized, Tuple, TypeVar, Optional import matplotlib.cm import matplotlib.patches @@ -610,6 +610,7 @@ def __init__( group_colors: Indexable[K, ColorLike] | None = None, group_names: Indexable[K, str] | None = None, legend: bool = False, + marker: Optional[str] = None, **kwargs: Any, ) -> None: super().__init__( @@ -639,6 +640,7 @@ def __init__( self.group_colors = group_colors self.group_names = group_names self.legend = legend + self.marker = marker if self.domain_range is None: self.domain_range = self.fdata.domain_range @@ -712,6 +714,7 @@ def _plot( **color_dict, picker=True, pickradius=2, + marker=self.marker ) else: @@ -759,6 +762,7 @@ def _plot( **color_dict, picker=True, pickradius=2, + marker=self.marker, ) else: diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 5e92652c2..68c800f0d 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -510,10 +510,6 @@ def scatter(self, *args: Any, **kwargs: Any) -> Figure: from ..exploratory.visualization.representation import ScatterPlotIrregular return ScatterPlotIrregular(self, *args, **kwargs).plot() - - def plot_and_scatter(self, *args: Any, **kwargs: Any) -> Figure: - fig = self.scatter(*args, **kwargs) - self.plot(fig=fig, *args, **kwargs) def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: #TODO Use BasisSmoother to return basis? From 265cb7e1adcfd97e4f738f307a4d3224a75daef2 Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 30 Jan 2023 05:49:24 +0100 Subject: [PATCH 008/144] Implemented basic operations sum, __neg__, gmean, equals, copy --- skfda/representation/irregular.py | 124 +++++++++++++++++++++++++----- 1 file changed, 105 insertions(+), 19 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 68c800f0d..182564701 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -345,8 +345,17 @@ def sum( # noqa: WPS125 skipna: bool = False, min_count: int = 0, ) -> T: - #TODO Implement when attributes are done - pass + super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna) + + data = ( + np.nansum(self.function_values, axis=0, keepdims=True) if skipna + else np.sum(self.function_values, axis=0, keepdims=True) + ) + + return FDataGrid( + data_matrix=data, + sample_names=(None,), + ) def mean(self: T) -> T: """Compute the mean pointwise for a sparse dataset. @@ -407,18 +416,38 @@ def cov(self: T) -> T: pass def gmean(self: T) -> T: - #TODO Implement when attributes are done - pass + return FDataGrid( + data_matrix=[ + scipy.stats.mstats.gmean(self.function_values, 0), + ], + sample_names=("geometric mean",), + ) def equals(self, other: object) -> bool: - """Comparison of FDataSparse objects.""" - #TODO Implement when attributes are done - pass + """Comparison of FDataIrregular objects.""" + if not super().equals(other): + return False + + other = cast(FDataIrregular, other) + + if not self._eq_elemenwise(other): + return False + + # Comparison of the domain + if not np.array_equal(self.domain_range, other.domain_range): + return False + + #TODO interpolation/extrapolation when implemented + + return True def _eq_elemenwise(self: T, other: T) -> NDArrayBool: - """Elementwise equality of FDataSparse.""" - #TODO Implement when attributes are done - pass + """Elementwise equality of FDataIrregular.""" + return np.all( + [(self.function_indices == other.function_indices).all(), + (self.function_arguments == other.function_arguments).all(), + (self.function_values == other.function_values).all()] + ) def _get_op_matrix( self, @@ -493,9 +522,9 @@ def __rtruediv__( pass def __neg__(self: T) -> T: - """Negation of FData object.""" - #TODO Should be easy to implement, just negating the values - pass + """Negation of FDataIrregular object.""" + + return self.copy(function_values=-self.function_values) def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: #TODO Implement allocing memory only once @@ -528,18 +557,75 @@ def to_grid( # noqa: D102 def copy( # noqa: WPS211 self: T, deep: bool = False, # For Pandas compatibility - argument_names: Optional[LabelTupleLike] = None, - coordinate_names: Optional[LabelTupleLike] = None, - dim_domain: Optional[int] = 1, - dim_codomain: Optional[int] = 1, + function_indices: Optional[ArrayLike] = None, + function_arguments: Optional[ArrayLike] = None, + function_values: Optional[ArrayLike] = None, + dim_domain: Optional[int] = None, + dim_codomain: Optional[int] = None, domain_range: Optional[DomainRangeLike] = None, dataset_name: Optional[str] = None, sample_names: Optional[LabelTupleLike] = None, extrapolation: Optional[ExtrapolationLike] = None, + argument_names: Optional[LabelTupleLike] = None, + coordinate_names: Optional[LabelTupleLike] = None, ) -> T: - #TODO Should allow to copy directly from FDataIrregular, not from dataframe - pass + """ + Return a copy of the FDataIrregular. + + If an argument is provided the corresponding attribute in the new copy + is updated. + + """ + if function_indices is None: + function_indices = self.function_indices + + if function_arguments is None: + function_arguments = self.function_arguments + + if function_values is None: + function_values = self.function_values + + if dim_domain is None: + dim_domain = self.dim_domain + + if dim_codomain is None: + dim_codomain = self.dim_codomain + + if domain_range is None: + domain_range = copy.deepcopy(self.domain_range) + + if dataset_name is None: + dataset_name = self.dataset_name + + if argument_names is None: + # Tuple, immutable + argument_names = self.argument_names + + if coordinate_names is None: + # Tuple, immutable + coordinate_names = self.coordinate_names + + if sample_names is None: + # Tuple, immutable + sample_names = self.sample_names + + if extrapolation is None: + extrapolation = self.extrapolation + + return FDataIrregular( + function_indices, + function_arguments, + function_values, + dim_domain=dim_domain, + dim_codomain=dim_codomain, + domain_range=domain_range, + dataset_name=dataset_name, + argument_names=argument_names, + coordinate_names=coordinate_names, + sample_names=sample_names, + extrapolation=extrapolation, + ) def restrict( self: T, From 5a994d173c1f29f445761c161614330401f6a356 Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 30 Jan 2023 10:58:04 +0100 Subject: [PATCH 009/144] Added basic loader for bone density irregular dataframe from CRAN --- skfda/datasets/_real_datasets.py | 53 ++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index df5f8c34e..134b76ef5 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -12,6 +12,7 @@ import rdata from ..representation import FDataGrid +from ..representation.irregular import FDataIrregular from ..typing._numpy import NDArrayFloat, NDArrayInt @@ -1541,3 +1542,55 @@ def fetch_mco( cite=":footcite:`ruiz++_2003_cariporide`", bibliography=".. footbibliography::", ) + _param_descr + + +def _fetch_loon_data(name: str) -> Any: + return _fetch_cran_no_encoding_warning( + name, + "loon.data", + version="0.1.3", + ) + +_bone_density_descr = """ + The Bone Density dataset is a study of bone density in boys and girls aged 8-17. + It contains data from 423 individuals, measured irregularly in different times, + with an average of ~3 points per individual. + + References: +""" + +def fetch_bone_density( +) -> Bunch | Tuple[FDataGrid, NDArrayInt] | Tuple[DataFrame, Series]: + """ + Load the Bone Density dataset. This is an irregular dataset. + + The data is obtained from the R package 'loon.data', which compiles several + irregular datasets. Sources to be determined. + + """ + descr = _bone_density_descr + + raw_dataset = _fetch_loon_data("bone_ext") + + data = raw_dataset["bone_ext"] + + curve_name = "idnum" + argument_name = "age" + target_name = "spnbmd" + + curves = FDataIrregular.from_dataframe( + data, + id_column=curve_name, + argument_columns=argument_name, + coordinate_columns=target_name, + argument_names=[argument_name], + coordinate_names=[target_name], + dataset_name="bone_ext" + ) + + return Bunch( + data=curves, + feature_names=[argument_name], + target_names=[target_name], + DESCR=descr, + ) \ No newline at end of file From a9434c76f7c740498228f329e008d333fb72201b Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 30 Jan 2023 12:09:53 +0100 Subject: [PATCH 010/144] Implemented function concatenate --- skfda/representation/irregular.py | 53 +++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 182564701..976d3d278 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -527,8 +527,55 @@ def __neg__(self: T) -> T: return self.copy(function_values=-self.function_values) def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: - #TODO Implement allocing memory only once - pass + if as_coordinates: + raise NotImplementedError( + "Not implemented for as_coordinates = True", + ) + + # Verify that dimensions are compatible + + assert len(others) > 0 + self._check_same_dimensions(others[0]) + + if len(others) > 1: + for x, y in zip(others, others[1:]): + x._check_same_dimensions(y) + + # Allocate all required memory + + total_functions = self.num_functions + sum([o.num_functions for o in others]) + total_values = self.num_observations + sum([o.num_observations for o in others]) + total_sample_names = [] + + function_indices = np.zeros((total_functions, ), + dtype=np.uint32) + function_arguments = np.zeros((total_values, + self.dim_domain)) + function_values = np.zeros((total_values, + self.dim_codomain)) + + index = 0 + head = 0 + + # Add samples sequentially + for f_data in [self] + list(others): + function_indices[index:index+f_data.num_functions] = f_data.function_indices + function_arguments[head:head+f_data.num_observations] = f_data.function_arguments + function_values[head:head+f_data.num_observations] = f_data.function_values + + # Adjust pointers to the concatenated array + function_indices[index:index+f_data.num_functions] += head + + index += f_data.num_functions + head += f_data.num_observations + total_sample_names = total_sample_names + list(f_data.sample_names) + + return self.copy( + function_indices, + function_arguments, + function_values, + sample_names = total_sample_names + ) def plot(self, *args: Any, **kwargs: Any) -> Figure: from ..exploratory.visualization.representation import LinearPlotIrregular @@ -556,10 +603,10 @@ def to_grid( # noqa: D102 def copy( # noqa: WPS211 self: T, - deep: bool = False, # For Pandas compatibility function_indices: Optional[ArrayLike] = None, function_arguments: Optional[ArrayLike] = None, function_values: Optional[ArrayLike] = None, + deep: bool = False, # For Pandas compatibility dim_domain: Optional[int] = None, dim_codomain: Optional[int] = None, domain_range: Optional[DomainRangeLike] = None, From d0c295dc158079ea9dffaeff43ee8199f51a96a7 Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 1 Mar 2023 15:24:50 +0100 Subject: [PATCH 011/144] Added conversion to matrix and grid --- skfda/representation/irregular.py | 74 +++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 13 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 976d3d278..f4d908662 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -38,7 +38,7 @@ ) from ..typing._numpy import ArrayLike, NDArrayBool, NDArrayFloat, NDArrayInt from ._functional_data import FData -from .grid import FDataGrid +from .grid import FDataGrid, FDataGridDType from .evaluator import Evaluator from .extrapolation import ExtrapolationLike from .interpolation import SplineInterpolation @@ -166,7 +166,7 @@ def from_dataframe( ) @classmethod - def from_FDataGrid( + def from_datagrid( cls, f_data: FDataGrid, **kwargs @@ -590,16 +590,67 @@ def scatter(self, *args: Any, **kwargs: Any) -> Figure: def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: #TODO Use BasisSmoother to return basis? pass + + def to_matrix(self, **kwargs: Any) -> ArrayLike: + #Convert FDataIrregular to matrix of all points + #with NaN in undefined values + + if self.dim_domain > 1: + warnings.warn(f"Not implemented for domain dimension > 1, \ + currently {self.dim_domain}") + + # Find the grid points and values for each function + grid_points = [] + evaluated_points = [] + for index_start, index_end in zip(list(self.function_indices), + list(self.function_indices[1:])): + grid_points.append( + [x[0] for x in self.function_arguments[index_start:index_end]]) + evaluated_points.append( + self.function_values[index_start:index_end]) + + # Dont forget to add the last one + grid_points.append([x[0] for x in self.function_arguments[index_end:]]) + evaluated_points.append(self.function_values[index_end:]) + + # Aggregate into a complete data matrix + from functools import reduce + unified_grid_points = reduce( + lambda x,y: set(list(y)).union(list(x)), + grid_points, + ) + + unified_grid_points = sorted(unified_grid_points) + + # Fill matrix with known values, leave unknown as NA + num_curves = len(grid_points) + num_points = len(unified_grid_points) + + unified_matrix = np.empty((num_curves, num_points, self.dim_codomain)) + unified_matrix.fill(np.nan) + + for curve in range(num_curves): + for point in range(len(grid_points[curve])): + for dimension in range(self.dim_codomain): + point_index = unified_grid_points.index(grid_points[curve][point]) + unified_matrix[curve, point_index, dimension] = evaluated_points[curve][point][dimension] + return unified_matrix, unified_grid_points + def to_grid( # noqa: D102 self: T, - grid_points: Optional[GridPointsLike] = None, - *, - sample_points: Optional[GridPointsLike] = None, ) -> T: - - #TODO Return list of data grids? Data grid with holes? - pass + + data_matrix, grid_points = self.to_matrix() + + return FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, + dataset_name=self.dataset_name, + argument_names=self.argument_names, + coordinate_names=self.coordinate_names, + extrapolation=self.extrapolation, + ) def copy( # noqa: WPS211 self: T, @@ -810,12 +861,9 @@ def _take_allow_fill( @property def dtype(self) -> FDataGridDType: + #TODO Do this natively? """The dtype for this extension array, FDataGridDType""" - return FDataGridDType( - grid_points=self.grid_points, - domain_range=self.domain_range, - dim_codomain=self.dim_codomain, - ) + return self.to_grid().dtype @property def nbytes(self) -> int: From c28b451bb6d03292d829e0096714b5d04fb93ebc Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 1 Mar 2023 15:25:54 +0100 Subject: [PATCH 012/144] Added corrections to target in fetch_bone_density. Recovered functionality of as_frame and return_X_y arguments --- skfda/datasets/_real_datasets.py | 35 +++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index 134b76ef5..590a1d1ea 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -1558,8 +1558,10 @@ def _fetch_loon_data(name: str) -> Any: References: """ - + def fetch_bone_density( + return_X_y: Literal[True], + as_frame: bool = False, ) -> Bunch | Tuple[FDataGrid, NDArrayInt] | Tuple[DataFrame, Series]: """ Load the Bone Density dataset. This is an irregular dataset. @@ -1576,21 +1578,44 @@ def fetch_bone_density( curve_name = "idnum" argument_name = "age" - target_name = "spnbmd" + target_name = "sex" + coordinate_name = "spnbmd" curves = FDataIrregular.from_dataframe( data, id_column=curve_name, argument_columns=argument_name, - coordinate_columns=target_name, + coordinate_columns=coordinate_name, argument_names=[argument_name], - coordinate_names=[target_name], + coordinate_names=[coordinate_name], dataset_name="bone_ext" ) + + target = pd.Series( + data.drop_duplicates(subset=["idnum"])['sex'], + name="group", + ) + + feature_name = curves.dataset_name.lower() + target_names = target.values.tolist() + + if as_frame: + #TODO Arreglar problemas que tiene esto con dtype + #curves = pd.DataFrame({feature_name: curves}) + curves = pd.DataFrame({feature_name: curves.to_grid()}) + frame = pd.concat([curves, target], axis=1) + else: + target = target.values.codes + + if return_X_y: + return curves, target return Bunch( data=curves, + target=target, + frame=frame, + categories={}, feature_names=[argument_name], - target_names=[target_name], + target_names=target_names, DESCR=descr, ) \ No newline at end of file From 12432919bc2eab6403f96dce7be1a73f836e05eb Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 1 Mar 2023 16:31:19 +0100 Subject: [PATCH 013/144] Fix round function to only round coordinate values. Added custom IrregularCoordinateIterator for the FDataIrregular class --- skfda/representation/irregular.py | 32 +++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f4d908662..973e77ea8 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -226,18 +226,17 @@ def round( decimals: int = 0, out: Optional[FDataIrregular] = None, ) -> FDataIrregular: - rounded_arguments = self.function_arguments.round(decimals=decimals) + # Arguments are not rounded due to possibility of + # coalescing various arguments to the same rounded value rounded_values = self.function_values.round(decimals=decimals) if out is not None and isinstance(out, FDataIrregular): out.function_indices = self.function_indices - out.function_arguments = rounded_arguments out.function_values = rounded_values return out return self.copy( - function_arguments=rounded_arguments, function_values=rounded_values ) @@ -889,4 +888,29 @@ def isna(self) -> NDArrayBool: #TODO Do i need a FDataIrregularDType? -#TODO Do I need a _CoordinateIterator? +class _IrregularCoordinateIterator(Sequence[T]): + """Internal class to iterate through the image coordinates.""" + + def __init__(self, fdatairregular: T) -> None: + """Create an iterator through the image coordinates.""" + self._fdatairregular = fdatairregular + + def __getitem__( + self, + key: Union[int, slice, NDArrayInt, NDArrayBool], + ) -> T: + """Get a specific coordinate.""" + s_key = key + if isinstance(s_key, int): + s_key = slice(s_key, s_key + 1) + + coordinate_names = np.array(self._fdatairregular.coordinate_names)[s_key] + + return self._fdatairregular.copy( + function_coordinates=self._fdatairregular.function_coordinates[..., key], + coordinate_names=tuple(coordinate_names), + ) + + def __len__(self) -> int: + """Return the number of coordinates.""" + return self._fdatairregular.dim_codomain From 87267b7fc156c3a73a678ae209ee166cd1fb5b10 Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 1 Mar 2023 18:31:02 +0100 Subject: [PATCH 014/144] Fix wrong argument type for return_X_y --- skfda/datasets/_real_datasets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index 590a1d1ea..acdb59566 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -1560,7 +1560,7 @@ def _fetch_loon_data(name: str) -> Any: """ def fetch_bone_density( - return_X_y: Literal[True], + return_X_y: bool = False, as_frame: bool = False, ) -> Bunch | Tuple[FDataGrid, NDArrayInt] | Tuple[DataFrame, Series]: """ @@ -1571,6 +1571,7 @@ def fetch_bone_density( """ descr = _bone_density_descr + frame = None raw_dataset = _fetch_loon_data("bone_ext") From 24bea750b3f40f11e8f93271b6f98d7bc2ab53ab Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 1 Mar 2023 18:32:07 +0100 Subject: [PATCH 015/144] Change return type of operations from FDataGrid back to FDataIrregular --- skfda/representation/irregular.py | 47 ++++++++++++++++--------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 973e77ea8..5e5af80ef 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -259,9 +259,8 @@ def dim_codomain(self) -> int: #TODO Remove CoordinateIterator in an appropiate way @property - def coordinates(self: T) -> _CoordinateIterator[T]: - #TODO Does it even make sense to do this? Maybe it requires an specific _IrregularCoordinateIterator over the structure - pass + def coordinates(self: T) -> _IrregularCoordinateIterator[T]: + return _IrregularCoordinateIterator(self) @property def n_samples(self) -> int: @@ -351,9 +350,11 @@ def sum( # noqa: WPS125 else np.sum(self.function_values, axis=0, keepdims=True) ) - return FDataGrid( - data_matrix=data, - sample_names=(None,), + return FDataIrregular( + function_indices=np.array([0]), + function_arguments=np.array(np.zeros((1, self.dim_domain))), + function_values=data, + sample_names=("sum",), ) def mean(self: T) -> T: @@ -374,12 +375,13 @@ def mean(self: T) -> T: for arg in distinct_args] # Obtain mean of all available values for each argument point - vars = np.array([np.mean(vals) for vals in values]) + means = np.array([np.mean(vals) for vals in values]) - # Create a FDataGrid object with only 1 curve, the mean curve - return FDataGrid( - grid_points=distinct_args, - data_matrix=np.array([vars]), + # Create a FDataIrregular object with only 1 curve, the mean curve + return FDataIrregular( + function_indices=np.array([0]), + function_arguments=distinct_args.reshape(-1,1), + function_values=means.reshape(-1,1), sample_names=("mean",), ) @@ -403,11 +405,12 @@ def var(self: T) -> T: # Obtain variance of all available values for each argument point vars = np.array([np.var(vals) for vals in values]) - # Create a FDataGrid object with only 1 curve, the variance curve - return FDataGrid( - grid_points=distinct_args, - data_matrix=np.array([vars]), - sample_names=("variance",), + # Create a FDataIrregular object with only 1 curve, the variance curve + return FDataIrregular( + function_indices=np.array([0]), + function_arguments=distinct_args.reshape(-1,1), + function_values=vars.reshape(-1,1), + sample_names=("var",), ) def cov(self: T) -> T: @@ -415,10 +418,10 @@ def cov(self: T) -> T: pass def gmean(self: T) -> T: - return FDataGrid( - data_matrix=[ - scipy.stats.mstats.gmean(self.function_values, 0), - ], + return FDataIrregular( + function_indices=np.array([0]), + function_arguments=np.array(np.zeros((1, self.dim_domain))), + function_values=scipy.stats.mstats.gmean(self.function_values, 0), sample_names=("geometric mean",), ) @@ -739,7 +742,7 @@ def shift( restrict_domain: bool = False, extrapolation: Optional[ExtrapolationLike] = None, grid_points: Optional[GridPointsLike] = None, - ) -> FDataGrid: + ) -> FDataIrregular: #TODO Is this possible with this structure? pass @@ -907,7 +910,7 @@ def __getitem__( coordinate_names = np.array(self._fdatairregular.coordinate_names)[s_key] return self._fdatairregular.copy( - function_coordinates=self._fdatairregular.function_coordinates[..., key], + function_values=self._fdatairregular.function_values[..., key], coordinate_names=tuple(coordinate_names), ) From a5a0c2be8951e579daa88cb9f3a0992998107da5 Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 1 Mar 2023 18:48:45 +0100 Subject: [PATCH 016/144] Bring back interpolation to irregular class --- skfda/representation/irregular.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 5e5af80ef..9e1c4aedb 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -64,6 +64,7 @@ def __init__( dataset_name: Optional[str] = None, sample_names: Optional[LabelTupleLike] = None, extrapolation: Optional[ExtrapolationLike] = None, + interpolation: Optional[Evaluator] = None, argument_names: Optional[LabelTupleLike] = None, coordinate_names: Optional[LabelTupleLike] = None ): @@ -101,6 +102,8 @@ def __init__( # the first and last element of each list of the grid_points. self._domain_range = validate_domain_range(domain_range) + + self.interpolation = interpolation super().__init__( extrapolation=extrapolation, @@ -308,8 +311,11 @@ def _evaluate( aligned: bool = True, ) -> NDArrayFloat: - #TODO - pass + return self.interpolation( + self.to_grid(), #TODO Create native interpolation to irregular type + eval_points, + aligned=aligned, + ) def derivative( self: T, @@ -441,6 +447,9 @@ def equals(self, other: object) -> bool: #TODO interpolation/extrapolation when implemented + if self.interpolation != other.interpolation: + return False + return True def _eq_elemenwise(self: T, other: T) -> NDArrayBool: @@ -666,6 +675,7 @@ def copy( # noqa: WPS211 dataset_name: Optional[str] = None, sample_names: Optional[LabelTupleLike] = None, extrapolation: Optional[ExtrapolationLike] = None, + interpolation: Optional[Evaluator] = None, argument_names: Optional[LabelTupleLike] = None, coordinate_names: Optional[LabelTupleLike] = None, ) -> T: @@ -712,6 +722,9 @@ def copy( # noqa: WPS211 if extrapolation is None: extrapolation = self.extrapolation + + if interpolation is None: + interpolation = self.interpolation return FDataIrregular( function_indices, @@ -725,6 +738,7 @@ def copy( # noqa: WPS211 coordinate_names=coordinate_names, sample_names=sample_names, extrapolation=extrapolation, + interpolation=interpolation, ) def restrict( @@ -768,12 +782,12 @@ def __repr__(self) -> str: f"\nfunction_indices={self.function_indices!r}," f"\nfunction_arguments={self.function_arguments!r}," f"\nfunction_values={self.function_values!r}," - #f"\ndomain_range={self.domain_range!r}," + f"\ndomain_range={self.domain_range!r}," f"\ndataset_name={self.dataset_name!r}," f"\nargument_names={self.argument_names!r}," f"\ncoordinate_names={self.coordinate_names!r}," f"\nextrapolation={self.extrapolation!r}," - #f"\ninterpolation={self.interpolation!r})" + f"\ninterpolation={self.interpolation!r})" ).replace( '\n', '\n ', From 21e099cd2be513a3d772a88710e5aebaa3bd73f6 Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 1 Mar 2023 19:12:43 +0100 Subject: [PATCH 017/144] Corrections in format --- skfda/datasets/_real_datasets.py | 12 +++++++----- skfda/exploratory/visualization/representation.py | 9 +++------ skfda/representation/irregular.py | 11 +++++------ 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index acdb59566..9ce488f64 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -1536,7 +1536,6 @@ def fetch_mco( DESCR=descr, ) - if fetch_mco.__doc__ is not None: # docstrings can be stripped off fetch_mco.__doc__ += _mco_descr_template.format( cite=":footcite:`ruiz++_2003_cariporide`", @@ -1550,15 +1549,18 @@ def _fetch_loon_data(name: str) -> Any: "loon.data", version="0.1.3", ) - + + _bone_density_descr = """ - The Bone Density dataset is a study of bone density in boys and girls aged 8-17. - It contains data from 423 individuals, measured irregularly in different times, + The Bone Density dataset is a study of bone density + in boys and girls aged 8-17. It contains data from 423 + individuals, measured irregularly in different times, with an average of ~3 points per individual. References: """ + def fetch_bone_density( return_X_y: bool = False, as_frame: bool = False, @@ -1593,7 +1595,7 @@ def fetch_bone_density( ) target = pd.Series( - data.drop_duplicates(subset=["idnum"])['sex'], + data.drop_duplicates(subset=["idnum"])[target_name], name="group", ) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index cdbe4fe78..573c4c435 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -714,22 +714,19 @@ def _plot( **color_dict, picker=True, pickradius=2, - marker=self.marker + marker=self.marker, ) - else: - # TODO Implementar para multidimension. Como hacer mesh? import warnings warnings.warn("Not implemented") _set_labels(self.fdata, fig, axes, self.patches) - - + + class ScatterPlotIrregular(PlotIrregular): """ Class used to scatter a FDataIrregular object. - """ def _plot( diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 9e1c4aedb..4efb388ac 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -115,7 +115,7 @@ def __init__( @classmethod def from_dataframe( - cls, + cls: Type[T], dataframe: pandas.DataFrame, id_column: str, argument_columns: LabelTupleLike, @@ -170,7 +170,7 @@ def from_dataframe( @classmethod def from_datagrid( - cls, + cls: Type[T], f_data: FDataGrid, **kwargs ) -> FDataIrregular: @@ -215,13 +215,13 @@ def from_datagrid( **kwargs ) - def set_function_indices(self, function_indices): + def set_function_indices(self, function_indices) -> ArrayLike: self.function_indices = function_indices.copy() - def set_function_arguments(self, function_arguments): + def set_function_arguments(self, function_arguments) -> ArrayLike: self.function_arguments = function_arguments.copy() - def set_function_values(self, function_values): + def set_function_values(self, function_values) -> ArrayLike: self.function_values = function_values.copy() def round( @@ -472,7 +472,6 @@ def __add__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - #TODO Implement when attributes are done pass From 7e2aa653faa28a967b2d8c6c9e577ae396e28b20 Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 1 Mar 2023 21:16:55 +0100 Subject: [PATCH 018/144] Move all changes to operations to separate PR. Fix getitem to work with composite slices --- skfda/datasets/_real_datasets.py | 1 + skfda/representation/irregular.py | 221 +++++++----------------------- 2 files changed, 51 insertions(+), 171 deletions(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index 9ce488f64..226de3681 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -1536,6 +1536,7 @@ def fetch_mco( DESCR=descr, ) + if fetch_mco.__doc__ is not None: # docstrings can be stripped off fetch_mco.__doc__ += _mco_descr_template.format( cite=":footcite:`ruiz++_2003_cariporide`", diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 4efb388ac..6ea587c8b 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -28,7 +28,7 @@ import scipy.stats.mstats from matplotlib.figure import Figure -from .._utils import _check_array_key, _int_to_real, _to_grid_points, constants +from .._utils import _check_array_key from ..typing._base import ( DomainRange, DomainRangeLike, @@ -50,7 +50,7 @@ class FDataIrregular(FData): # noqa: WPS214 - # TODO Docstring + # TODO Docstring def __init__( self, @@ -67,11 +67,10 @@ def __init__( interpolation: Optional[Evaluator] = None, argument_names: Optional[LabelTupleLike] = None, coordinate_names: Optional[LabelTupleLike] = None - ): + ): """Construct a FDataIrregular object.""" # Set dimensions - # TODO Check dimensions against num of arguments and coordinates? self._dim_domain = dim_domain self._dim_codomain = dim_codomain @@ -85,15 +84,15 @@ def __init__( self.set_function_arguments(function_arguments) self.set_function_values(function_values) - #TODO Fix for higher dimensions - i=0 + # TODO Fix for higher dimensions + i = 0 self._sample_range = list() for f in self.function_indices[1:]: self._sample_range.append((self.function_arguments[i][0], self.function_arguments[f-1][0])) i = f self._sample_range.append((self.function_arguments[i][0], - self.function_arguments[-1][0])) + self.function_arguments[-1][0])) from ..misc.validation import validate_domain_range if domain_range is None: @@ -125,10 +124,12 @@ def from_dataframe( # Accept strings but ensure the column names are tuples _is_str = isinstance(argument_columns, str) - argument_columns = [argument_columns] if _is_str else argument_columns + argument_columns = [argument_columns] if _is_str else \ + argument_columns _is_str = isinstance(coordinate_columns, str) - coordinate_columns = [coordinate_columns] if _is_str else coordinate_columns + coordinate_columns = [coordinate_columns] if _is_str else \ + coordinate_columns # Obtain num functions and num observations from data num_observations = dataframe.shape[0] @@ -136,7 +137,7 @@ def from_dataframe( # Create data structure of function pointers and coordinates function_indices = np.zeros((num_functions, ), - dtype=np.uint32) + dtype=np.uint32) function_arguments = np.zeros((num_observations, len(argument_columns))) function_values = np.zeros((num_observations, @@ -181,7 +182,7 @@ def from_datagrid( # Create data structure of function pointers and coordinates function_indices = np.zeros((num_functions, ), - dtype=np.uint32) + dtype=np.uint32) function_arguments = np.zeros((num_observations, f_data.dim_domain)) function_values = np.zeros((num_observations, @@ -195,19 +196,19 @@ def from_datagrid( for j in range(f_data.data_matrix.shape[1]): if np.isnan(f_data.data_matrix[i][j]): continue - + arg = [f_data.grid_points[dim][j] for dim in range(f_data.dim_domain)] function_arguments[head+num_values, :] = arg - + value = [f_data.data_matrix[i,j,dim] for dim in range(f_data.dim_codomain)] function_values[head+num_values, :] = value num_values += 1 - + head += num_values - + return cls( function_indices, function_arguments, @@ -312,7 +313,7 @@ def _evaluate( ) -> NDArrayFloat: return self.interpolation( - self.to_grid(), #TODO Create native interpolation to irregular type + self.to_grid(), # TODO Create native interpolation for irregular eval_points, aligned=aligned, ) @@ -323,7 +324,6 @@ def derivative( order: int = 1, method: Optional[Basis] = None, ) -> T: - #TODO pass def integrate( @@ -331,7 +331,6 @@ def integrate( *, domain: Optional[DomainRange] = None, ) -> NDArrayFloat: - #TODO pass def _check_same_dimensions(self: T, other: T) -> None: @@ -349,78 +348,15 @@ def sum( # noqa: WPS125 skipna: bool = False, min_count: int = 0, ) -> T: - super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna) - - data = ( - np.nansum(self.function_values, axis=0, keepdims=True) if skipna - else np.sum(self.function_values, axis=0, keepdims=True) - ) - - return FDataIrregular( - function_indices=np.array([0]), - function_arguments=np.array(np.zeros((1, self.dim_domain))), - function_values=data, - sample_names=("sum",), - ) + pass def mean(self: T) -> T: - """Compute the mean pointwise for a sparse dataset. - - Note that, for irregular data, points may be represented in few - or even an only curve. - - Returns: - A FDataIrregular object with just one sample representing the - mean of all curves the across each value. - - """ - - # Find all distinct arguments (ordered) and corresponding values - distinct_args = np.unique(np.matrix.flatten(self.function_arguments)) - values = [np.matrix.flatten(self.function_values[np.where(self.function_arguments == arg)[0]]) - for arg in distinct_args] - - # Obtain mean of all available values for each argument point - means = np.array([np.mean(vals) for vals in values]) - - # Create a FDataIrregular object with only 1 curve, the mean curve - return FDataIrregular( - function_indices=np.array([0]), - function_arguments=distinct_args.reshape(-1,1), - function_values=means.reshape(-1,1), - sample_names=("mean",), - ) + pass def var(self: T) -> T: - """Compute the variance pointwise for a sparse dataset. - - Note that, for irregular data, points may be represented in few - or even an only curve. - - Returns: - A FDataIrregular object with just one sample representing the - variance of all curves the across each value. - - """ - - # Find all distinct arguments (ordered) and corresponding values - distinct_args = np.unique(np.matrix.flatten(self.function_arguments)) - values = [np.matrix.flatten(self.function_values[np.where(self.function_arguments == arg)[0]]) - for arg in distinct_args] - - # Obtain variance of all available values for each argument point - vars = np.array([np.var(vals) for vals in values]) - - # Create a FDataIrregular object with only 1 curve, the variance curve - return FDataIrregular( - function_indices=np.array([0]), - function_arguments=distinct_args.reshape(-1,1), - function_values=vars.reshape(-1,1), - sample_names=("var",), - ) + pass def cov(self: T) -> T: - #TODO Implementation to be decided pass def gmean(self: T) -> T: @@ -445,7 +381,7 @@ def equals(self, other: object) -> bool: if not np.array_equal(self.domain_range, other.domain_range): return False - #TODO interpolation/extrapolation when implemented + # TODO extrapolation when implemented if self.interpolation != other.interpolation: return False @@ -464,128 +400,61 @@ def _get_op_matrix( self, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> Union[None, float, NDArrayFloat, NDArrayInt]: - - #TODO Implement when attributes are done pass def __add__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - #TODO Implement when attributes are done pass def __radd__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - - #TODO Implement when attributes are done pass def __sub__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - - #TODO Implement when attributes are done pass def __rsub__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - - #TODO Implement when attributes are done pass def __mul__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - - #TODO Implement when attributes are done pass def __rmul__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - - #TODO Implement when attributes are done pass def __truediv__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - - #TODO Implement when attributes are done pass def __rtruediv__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - - #TODO Implement when attributes are done pass def __neg__(self: T) -> T: - """Negation of FDataIrregular object.""" - - return self.copy(function_values=-self.function_values) + pass def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: - if as_coordinates: - raise NotImplementedError( - "Not implemented for as_coordinates = True", - ) - - # Verify that dimensions are compatible - - assert len(others) > 0 - self._check_same_dimensions(others[0]) - - if len(others) > 1: - for x, y in zip(others, others[1:]): - x._check_same_dimensions(y) - - # Allocate all required memory - - total_functions = self.num_functions + sum([o.num_functions for o in others]) - total_values = self.num_observations + sum([o.num_observations for o in others]) - total_sample_names = [] - - function_indices = np.zeros((total_functions, ), - dtype=np.uint32) - function_arguments = np.zeros((total_values, - self.dim_domain)) - function_values = np.zeros((total_values, - self.dim_codomain)) - - index = 0 - head = 0 - - # Add samples sequentially - for f_data in [self] + list(others): - function_indices[index:index+f_data.num_functions] = f_data.function_indices - function_arguments[head:head+f_data.num_observations] = f_data.function_arguments - function_values[head:head+f_data.num_observations] = f_data.function_values - - # Adjust pointers to the concatenated array - function_indices[index:index+f_data.num_functions] += head - - index += f_data.num_functions - head += f_data.num_observations - total_sample_names = total_sample_names + list(f_data.sample_names) - - return self.copy( - function_indices, - function_arguments, - function_values, - sample_names = total_sample_names - ) + pass def plot(self, *args: Any, **kwargs: Any) -> Figure: from ..exploratory.visualization.representation import LinearPlotIrregular @@ -598,12 +467,11 @@ def scatter(self, *args: Any, **kwargs: Any) -> Figure: return ScatterPlotIrregular(self, *args, **kwargs).plot() def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: - #TODO Use BasisSmoother to return basis? pass def to_matrix(self, **kwargs: Any) -> ArrayLike: - #Convert FDataIrregular to matrix of all points - #with NaN in undefined values + # Convert FDataIrregular to matrix of all points + # with NaN in undefined values if self.dim_domain > 1: warnings.warn(f"Not implemented for domain dimension > 1, \ @@ -626,7 +494,7 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: # Aggregate into a complete data matrix from functools import reduce unified_grid_points = reduce( - lambda x,y: set(list(y)).union(list(x)), + lambda x, y: set(list(y)).union(list(x)), grid_points, ) @@ -744,8 +612,6 @@ def restrict( self: T, domain_range: DomainRangeLike, ) -> T: - - #TODO Is this possible with this structure pass def shift( @@ -754,9 +620,7 @@ def shift( *, restrict_domain: bool = False, extrapolation: Optional[ExtrapolationLike] = None, - grid_points: Optional[GridPointsLike] = None, ) -> FDataIrregular: - #TODO Is this possible with this structure? pass def compose( @@ -765,14 +629,16 @@ def compose( *, eval_points: Optional[GridPointsLike] = None, ) -> T: - - #TODO Is this possible with this structure? pass def __str__(self) -> str: """Return str(self).""" - #TODO Define str method after all attributes are locked - pass + return ( + f"function_indices: {self.function_indices}\n" + f"function_arguments: {self.function_arguments}\n" + f"function_values: {self.function_values}\n" + f"time range: {self.domain_range}" + ) def __repr__(self) -> str: """Return repr(self).""" @@ -796,9 +662,22 @@ def __getitem__( self: T, key: Union[int, slice, NDArrayInt, NDArrayBool], ) -> T: - """Return self[key].""" - #TODO Maybe return from the view? Or transform using view functions directly from data structure? - + required_items = [] + key = _check_array_key(self.function_indices, key) + indices = range(self.num_functions) + required_indices = indices[key] + for i in required_indices: + next_index = self.function_indices[i + 1] if i + 1 < \ + self.num_functions else -1 + s = slice(self.function_indices[i], next_index) + + required_items.append(self.copy( + function_indices=np.array([0]), + function_arguments=self.function_arguments[s], + function_values=self.function_values[s], + sample_names=list(np.array(self.sample_names)[key]), + )) + return required_items ##################################################################### # Numpy methods ##################################################################### @@ -876,7 +755,7 @@ def _take_allow_fill( @property def dtype(self) -> FDataGridDType: - #TODO Do this natively? + # TODO Do this natively? """The dtype for this extension array, FDataGridDType""" return self.to_grid().dtype @@ -902,7 +781,7 @@ def isna(self) -> NDArrayBool: ) -#TODO Do i need a FDataIrregularDType? +# TODO FDataIrregularDType? class _IrregularCoordinateIterator(Sequence[T]): """Internal class to iterate through the image coordinates.""" From db112b932d9f3c3a59daa51e490ffb84cbb14ff9 Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 1 Mar 2023 23:12:37 +0100 Subject: [PATCH 019/144] Return a single object in getitem for slices --- skfda/representation/irregular.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 6ea587c8b..197704b8e 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -88,8 +88,8 @@ def __init__( i = 0 self._sample_range = list() for f in self.function_indices[1:]: - self._sample_range.append((self.function_arguments[i][0], - self.function_arguments[f-1][0])) + self._sample_range.append(tuple((self.function_arguments[i][0], + self.function_arguments[f-1][0]))) i = f self._sample_range.append((self.function_arguments[i][0], self.function_arguments[-1][0])) @@ -98,7 +98,7 @@ def __init__( if domain_range is None: domain_range = self.sample_range # Default value for domain_range is a list of tuples with - # the first and last element of each list of the grid_points. + # the first and last arguments of each curve self._domain_range = validate_domain_range(domain_range) @@ -662,7 +662,7 @@ def __getitem__( self: T, key: Union[int, slice, NDArrayInt, NDArrayBool], ) -> T: - required_items = [] + required_slices = [] key = _check_array_key(self.function_indices, key) indices = range(self.num_functions) required_indices = indices[key] @@ -670,14 +670,23 @@ def __getitem__( next_index = self.function_indices[i + 1] if i + 1 < \ self.num_functions else -1 s = slice(self.function_indices[i], next_index) + required_slices.append(s) - required_items.append(self.copy( - function_indices=np.array([0]), - function_arguments=self.function_arguments[s], - function_values=self.function_values[s], - sample_names=list(np.array(self.sample_names)[key]), - )) - return required_items + arguments = np.concatenate([self.function_arguments[s] + for s in required_slices]) + values = np.concatenate([self.function_values[s] + for s in required_slices]) + + chunk_sizes = np.array([s.stop-s.start for s in required_slices]) + indices = np.cumsum(chunk_sizes) - chunk_sizes[0] + + return self.copy( + function_indices=indices.astype(int), + function_arguments=arguments, + function_values=values, + sample_names=self.sample_names[key], + domain_range=self.sample_names[key], + ) ##################################################################### # Numpy methods ##################################################################### From 8ba5ef64ec5d875254e935a3eb8e7bc46b3c50e3 Mon Sep 17 00:00:00 2001 From: opintosant Date: Tue, 7 Mar 2023 14:06:34 +0100 Subject: [PATCH 020/144] Modified domain range to be separate from sample range and allow multiple dimensions --- skfda/representation/irregular.py | 36 ++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 197704b8e..c76c8ffee 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -86,19 +86,39 @@ def __init__( # TODO Fix for higher dimensions i = 0 + dim_ranges = list() + for dim in range(self.dim_domain): + dim_sample_ranges = list() + for f in self.function_indices[1:]: + dim_sample_ranges.append(tuple((self.function_arguments[i][dim], + self.function_arguments[f-1][dim]))) + i = f + dim_sample_ranges.append((self.function_arguments[i][dim], + self.function_arguments[-1][dim])) + dim_ranges.append(dim_sample_ranges) + self._sample_range = list() - for f in self.function_indices[1:]: - self._sample_range.append(tuple((self.function_arguments[i][0], - self.function_arguments[f-1][0]))) - i = f - self._sample_range.append((self.function_arguments[i][0], - self.function_arguments[-1][0])) + for sample in range(len(dim_sample_ranges)): + self._sample_range.append( + tuple([dim_ranges[dim][sample] for dim in range(self.dim_domain)]) + ) + + # Default value for sample_range is a list of tuples with + # the first and last arguments of each curve for each dimension from ..misc.validation import validate_domain_range if domain_range is None: - domain_range = self.sample_range + ranges = list() + for dim in range(self.dim_domain): + min_argument = min([x[dim][0] for x in self._sample_range]) + max_argument = max([x[dim][1] for x in self._sample_range]) + ranges.append((min_argument, max_argument)) + + domain_range = tuple(ranges) + # Default value for domain_range is a list of tuples with - # the first and last arguments of each curve + # the minimum and maximum value of the arguments for each + # dimension self._domain_range = validate_domain_range(domain_range) From e562097610e81e2d5e6d4c31a26ff54b8a97feb2 Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 8 Mar 2023 16:58:48 +0100 Subject: [PATCH 021/144] General fix of formatting --- skfda/representation/irregular.py | 137 +++++++++++++++--------------- 1 file changed, 70 insertions(+), 67 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index c76c8ffee..fda8a09b5 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -47,11 +47,11 @@ from .basis import Basis, FDataBasis T = TypeVar("T", bound='FDataIrregular') - + class FDataIrregular(FData): # noqa: WPS214 # TODO Docstring - + def __init__( self, function_indices: ArrayLike, @@ -69,21 +69,21 @@ def __init__( coordinate_names: Optional[LabelTupleLike] = None ): """Construct a FDataIrregular object.""" - + # Set dimensions self._dim_domain = dim_domain self._dim_codomain = dim_codomain - + # Set structure to given data self.num_functions = function_indices.shape[0] - + assert function_arguments.shape[0] == function_values.shape[0] self.num_observations = function_arguments.shape[0] - + self.set_function_indices(function_indices) self.set_function_arguments(function_arguments) self.set_function_values(function_values) - + # TODO Fix for higher dimensions i = 0 dim_ranges = list() @@ -96,16 +96,16 @@ def __init__( dim_sample_ranges.append((self.function_arguments[i][dim], self.function_arguments[-1][dim])) dim_ranges.append(dim_sample_ranges) - + self._sample_range = list() for sample in range(len(dim_sample_ranges)): self._sample_range.append( tuple([dim_ranges[dim][sample] for dim in range(self.dim_domain)]) ) - + # Default value for sample_range is a list of tuples with # the first and last arguments of each curve for each dimension - + from ..misc.validation import validate_domain_range if domain_range is None: ranges = list() @@ -113,15 +113,16 @@ def __init__( min_argument = min([x[dim][0] for x in self._sample_range]) max_argument = max([x[dim][1] for x in self._sample_range]) ranges.append((min_argument, max_argument)) - + domain_range = tuple(ranges) - + # Default value for domain_range is a list of tuples with + # the minimum and maximum value of the arguments for each # dimension self._domain_range = validate_domain_range(domain_range) - + self.interpolation = interpolation super().__init__( @@ -131,7 +132,7 @@ def __init__( coordinate_names=coordinate_names, sample_names=sample_names, ) - + @classmethod def from_dataframe( cls: Type[T], @@ -141,20 +142,20 @@ def from_dataframe( coordinate_columns: LabelTupleLike, **kwargs ) -> FDataIrregular: - + # Accept strings but ensure the column names are tuples _is_str = isinstance(argument_columns, str) argument_columns = [argument_columns] if _is_str else \ argument_columns - + _is_str = isinstance(coordinate_columns, str) coordinate_columns = [coordinate_columns] if _is_str else \ coordinate_columns - + # Obtain num functions and num observations from data num_observations = dataframe.shape[0] num_functions = dataframe[id_column].nunique() - + # Create data structure of function pointers and coordinates function_indices = np.zeros((num_functions, ), dtype=np.uint32) @@ -162,40 +163,40 @@ def from_dataframe( len(argument_columns))) function_values = np.zeros((num_observations, len(coordinate_columns))) - + head = 0 index = 0 for _, f_values in dataframe.groupby(id_column): function_indices[index] = head num_values = f_values.shape[0] - + # Insert in order f_values = f_values.sort_values(argument_columns) - + new_args = f_values[argument_columns].values function_arguments[head:head+num_values, :] = new_args - + new_coords = f_values[coordinate_columns].values function_values[head:head+num_values, :] = new_coords - + # Update head and index head += num_values index += 1 - + return cls( function_indices, function_arguments, function_values, **kwargs ) - + @classmethod def from_datagrid( cls: Type[T], f_data: FDataGrid, **kwargs - ) -> FDataIrregular: - + ) -> FDataIrregular: + # Obtain num functions and num observations from data num_observations = np.sum(~np.isnan(f_data.data_matrix)) num_functions = f_data.data_matrix.shape[0] @@ -216,35 +217,35 @@ def from_datagrid( for j in range(f_data.data_matrix.shape[1]): if np.isnan(f_data.data_matrix[i][j]): continue - + arg = [f_data.grid_points[dim][j] for dim in range(f_data.dim_domain)] function_arguments[head+num_values, :] = arg - + value = [f_data.data_matrix[i,j,dim] for dim in range(f_data.dim_codomain)] function_values[head+num_values, :] = value num_values += 1 - + head += num_values - + return cls( function_indices, function_arguments, function_values, **kwargs ) - + def set_function_indices(self, function_indices) -> ArrayLike: self.function_indices = function_indices.copy() - + def set_function_arguments(self, function_arguments) -> ArrayLike: self.function_arguments = function_arguments.copy() - + def set_function_values(self, function_values) -> ArrayLike: self.function_values = function_values.copy() - + def round( self, decimals: int = 0, @@ -253,13 +254,13 @@ def round( # Arguments are not rounded due to possibility of # coalescing various arguments to the same rounded value rounded_values = self.function_values.round(decimals=decimals) - + if out is not None and isinstance(out, FDataIrregular): out.function_indices = self.function_indices out.function_values = rounded_values - + return out - + return self.copy( function_values=rounded_values ) @@ -281,7 +282,7 @@ def dim_domain(self) -> int: def dim_codomain(self) -> int: return self._dim_codomain - #TODO Remove CoordinateIterator in an appropiate way + # TODO Remove CoordinateIterator in an appropiate way @property def coordinates(self: T) -> _IrregularCoordinateIterator[T]: return _IrregularCoordinateIterator(self) @@ -369,10 +370,10 @@ def sum( # noqa: WPS125 min_count: int = 0, ) -> T: pass - + def mean(self: T) -> T: pass - + def var(self: T) -> T: pass @@ -402,10 +403,10 @@ def equals(self, other: object) -> bool: return False # TODO extrapolation when implemented - + if self.interpolation != other.interpolation: return False - + return True def _eq_elemenwise(self: T, other: T) -> NDArrayBool: @@ -475,28 +476,30 @@ def __neg__(self: T) -> T: def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: pass - + def plot(self, *args: Any, **kwargs: Any) -> Figure: - from ..exploratory.visualization.representation import LinearPlotIrregular + from ..exploratory.visualization.representation \ + import LinearPlotIrregular return LinearPlotIrregular(self, *args, **kwargs).plot() def scatter(self, *args: Any, **kwargs: Any) -> Figure: - from ..exploratory.visualization.representation import ScatterPlotIrregular + from ..exploratory.visualization.representation \ + import ScatterPlotIrregular return ScatterPlotIrregular(self, *args, **kwargs).plot() def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: pass - + def to_matrix(self, **kwargs: Any) -> ArrayLike: # Convert FDataIrregular to matrix of all points # with NaN in undefined values - + if self.dim_domain > 1: warnings.warn(f"Not implemented for domain dimension > 1, \ currently {self.dim_domain}") - + # Find the grid points and values for each function grid_points = [] evaluated_points = [] @@ -506,27 +509,27 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: [x[0] for x in self.function_arguments[index_start:index_end]]) evaluated_points.append( self.function_values[index_start:index_end]) - + # Dont forget to add the last one grid_points.append([x[0] for x in self.function_arguments[index_end:]]) evaluated_points.append(self.function_values[index_end:]) - + # Aggregate into a complete data matrix from functools import reduce unified_grid_points = reduce( lambda x, y: set(list(y)).union(list(x)), grid_points, ) - + unified_grid_points = sorted(unified_grid_points) - + # Fill matrix with known values, leave unknown as NA num_curves = len(grid_points) num_points = len(unified_grid_points) - + unified_matrix = np.empty((num_curves, num_points, self.dim_codomain)) unified_matrix.fill(np.nan) - + for curve in range(num_curves): for point in range(len(grid_points[curve])): for dimension in range(self.dim_codomain): @@ -534,13 +537,13 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: unified_matrix[curve, point_index, dimension] = evaluated_points[curve][point][dimension] return unified_matrix, unified_grid_points - + def to_grid( # noqa: D102 self: T, ) -> T: - + data_matrix, grid_points = self.to_matrix() - + return FDataGrid( data_matrix=data_matrix, grid_points=grid_points, @@ -566,7 +569,7 @@ def copy( # noqa: WPS211 argument_names: Optional[LabelTupleLike] = None, coordinate_names: Optional[LabelTupleLike] = None, ) -> T: - + """ Return a copy of the FDataIrregular. @@ -576,16 +579,16 @@ def copy( # noqa: WPS211 """ if function_indices is None: function_indices = self.function_indices - + if function_arguments is None: function_arguments = self.function_arguments - + if function_values is None: function_values = self.function_values - + if dim_domain is None: dim_domain = self.dim_domain - + if dim_codomain is None: dim_codomain = self.dim_codomain @@ -609,7 +612,7 @@ def copy( # noqa: WPS211 if extrapolation is None: extrapolation = self.extrapolation - + if interpolation is None: interpolation = self.interpolation @@ -691,15 +694,15 @@ def __getitem__( self.num_functions else -1 s = slice(self.function_indices[i], next_index) required_slices.append(s) - + arguments = np.concatenate([self.function_arguments[s] for s in required_slices]) values = np.concatenate([self.function_values[s] for s in required_slices]) - + chunk_sizes = np.array([s.stop-s.start for s in required_slices]) indices = np.cumsum(chunk_sizes) - chunk_sizes[0] - + return self.copy( function_indices=indices.astype(int), function_arguments=arguments, From 37cb3a7c19f421f2a3ae0f86f83fb62b119859f0 Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 8 Mar 2023 21:10:51 +0100 Subject: [PATCH 022/144] Allow irregular class and dataset to be imported --- skfda/datasets/__init__.py | 1 + skfda/datasets/_real_datasets.py | 2 +- skfda/representation/__init__.py | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/skfda/datasets/__init__.py b/skfda/datasets/__init__.py index 666495cef..8abd0dfda 100644 --- a/skfda/datasets/__init__.py +++ b/skfda/datasets/__init__.py @@ -20,6 +20,7 @@ "fetch_tecator", "fetch_ucr", "fetch_weather", + "fetch_bone_density", ], "_samples_generators": [ "make_gaussian", diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index 226de3681..8aeec39fa 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -1604,7 +1604,7 @@ def fetch_bone_density( target_names = target.values.tolist() if as_frame: - #TODO Arreglar problemas que tiene esto con dtype + #TODO Fix dtype problems #curves = pd.DataFrame({feature_name: curves}) curves = pd.DataFrame({feature_name: curves.to_grid()}) frame = pd.concat([curves, target], axis=1) diff --git a/skfda/representation/__init__.py b/skfda/representation/__init__.py index fd5afe3c5..879e4b5a2 100644 --- a/skfda/representation/__init__.py +++ b/skfda/representation/__init__.py @@ -10,11 +10,13 @@ "extrapolation", "grid", "interpolation", + "irregular", ], submod_attrs={ '_functional_data': ["FData", "concatenate"], 'basis': ["FDataBasis"], 'grid': ["FDataGrid"], + 'irregular': ["FDataIrregular"], }, ) From 4d4b79796a70fd4fc1957a6f49f86e5e8560c020 Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 8 Mar 2023 21:11:20 +0100 Subject: [PATCH 023/144] Added test of basic generation of FDataIrregular --- skfda/tests/test_irregular.py | 121 ++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 skfda/tests/test_irregular.py diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py new file mode 100644 index 000000000..2b76e5cc9 --- /dev/null +++ b/skfda/tests/test_irregular.py @@ -0,0 +1,121 @@ +"""Test the basic methods of the FDataIrregular structure""" +from typing import Any, Callable, Tuple +from ..typing._numpy import ArrayLike, NDArrayBool, NDArrayFloat, NDArrayInt +import numpy as np +import pytest + +from skfda.datasets import fetch_bone_density +from skfda.misc.covariances import CovarianceLike, Gaussian +from skfda.representation import FDataIrregular, FDataGrid + +############ +# FIXTURES +############ + +NUM_CURVES = 10 +MAX_VALUES_PER_CURVE = 99 +DIMENSIONS = 2 + + +@pytest.fixture() +def input_arrays( +) -> ArrayLike: + """Generate three unidimensional arrays describing a FDataIrregular structure""" + # TODO Make editable with pytest + num_curves = NUM_CURVES + num_values_per_curve = np.random.randint(1, + MAX_VALUES_PER_CURVE, + size=(num_curves, ) + ) + + values_per_curve = [np.random.rand(num_values, 1) + for num_values in num_values_per_curve] + args_per_curve = [np.random.rand(num_values, 1) + for num_values in num_values_per_curve] + + indices = np.cumsum(num_values_per_curve) - num_values_per_curve + values = np.concatenate(values_per_curve) + arguments = np.concatenate(args_per_curve) + + return indices, values, arguments + + +@pytest.fixture() +def input_arrays_multidimensional( +) -> ArrayLike: + """Generate three multidimensional arrays describing a FDataIrregular structure""" + # TODO Make editable with pytest + num_curves = NUM_CURVES + num_values_per_curve = np.random.randint(1, + MAX_VALUES_PER_CURVE, + size=(num_curves, ) + ) + + values_per_curve = [np.random.rand(num_values, DIMENSIONS) + for num_values in num_values_per_curve] + args_per_curve = [np.random.rand(num_values, DIMENSIONS) + for num_values in num_values_per_curve] + + indices = np.cumsum(num_values_per_curve) - num_values_per_curve + values = np.concatenate(values_per_curve) + arguments = np.concatenate(args_per_curve) + + return indices, values, arguments + +############ +# TESTS +############ + + +def test_fdatairregular_from_arrays( + input_arrays: ArrayLike, +) -> None: + """Tests creating a correct FDataIrregular object from escriptive arrays + + Args: + input_arrays (ArrayLike): tuple of three arrays required for + FDataIrregular + indices: Array of pointers to the beginning of the arguments and + values of each curve + arguments: Array of each of the points of the domain + values: Array of each of the coordinates of the codomain + """ + indices, arguments, values = input_arrays + + f_data_irreg = FDataIrregular( + indices, + arguments, + values, + ) + + assert f_data_irreg is not None + assert len(f_data_irreg) == len(indices) + assert len(f_data_irreg.function_arguments) == len(arguments) + + +def test_fdatairregular_from_multidimensional_arrays( + input_arrays_multidimensional: ArrayLike, +) -> None: + """Tests creating a correct FDataIrregular object from escriptive arrays + + Args: + input_arrays (ArrayLike): tuple of three arrays required for + FDataIrregular + indices: Array of pointers to the beginning of the arguments and + values of each curve + arguments: Array of each of the points of the domain + values: Array of each of the coordinates of the codomain + """ + indices, arguments, values = input_arrays_multidimensional + + f_data_irreg = FDataIrregular( + indices, + arguments, + values, + ) + + assert f_data_irreg is not None + assert len(f_data_irreg) == len(indices) + assert len(f_data_irreg.function_arguments) == len(arguments) + + From 6f8ca6187acb1b3944cf7779988eaff0481df44d Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 9 Mar 2023 13:27:41 +0100 Subject: [PATCH 024/144] First iteration of FDataIrregular data structure test --- skfda/representation/irregular.py | 31 +--- skfda/tests/test_irregular.py | 292 ++++++++++++++++++++++++++++-- 2 files changed, 290 insertions(+), 33 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index fda8a09b5..07340af41 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -58,8 +58,6 @@ def __init__( function_arguments: ArrayLike, function_values: ArrayLike, *, - dim_domain: Optional[int] = 1, - dim_codomain: Optional[int] = 1, domain_range: Optional[DomainRangeLike] = None, dataset_name: Optional[str] = None, sample_names: Optional[LabelTupleLike] = None, @@ -71,8 +69,8 @@ def __init__( """Construct a FDataIrregular object.""" # Set dimensions - self._dim_domain = dim_domain - self._dim_codomain = dim_codomain + self._dim_domain = function_arguments.shape[1] + self._dim_codomain = function_values.shape[1] # Set structure to given data self.num_functions = function_indices.shape[0] @@ -202,11 +200,11 @@ def from_datagrid( num_functions = f_data.data_matrix.shape[0] # Create data structure of function pointers and coordinates - function_indices = np.zeros((num_functions, ), + function_indices = np.zeros((num_functions, ), dtype=np.uint32) - function_arguments = np.zeros((num_observations, + function_arguments = np.zeros((num_observations, f_data.dim_domain)) - function_values = np.zeros((num_observations, + function_values = np.zeros((num_observations, f_data.dim_codomain)) head = 0 @@ -215,14 +213,14 @@ def from_datagrid( num_values = 0 for j in range(f_data.data_matrix.shape[1]): - if np.isnan(f_data.data_matrix[i][j]): + if np.all(np.isnan(f_data.data_matrix[i, j])): continue arg = [f_data.grid_points[dim][j] for dim in range(f_data.dim_domain)] function_arguments[head+num_values, :] = arg - value = [f_data.data_matrix[i,j,dim] for dim + value = [f_data.data_matrix[i, j, dim] for dim in range(f_data.dim_codomain)] function_values[head+num_values, :] = value @@ -559,8 +557,6 @@ def copy( # noqa: WPS211 function_arguments: Optional[ArrayLike] = None, function_values: Optional[ArrayLike] = None, deep: bool = False, # For Pandas compatibility - dim_domain: Optional[int] = None, - dim_codomain: Optional[int] = None, domain_range: Optional[DomainRangeLike] = None, dataset_name: Optional[str] = None, sample_names: Optional[LabelTupleLike] = None, @@ -586,12 +582,6 @@ def copy( # noqa: WPS211 if function_values is None: function_values = self.function_values - if dim_domain is None: - dim_domain = self.dim_domain - - if dim_codomain is None: - dim_codomain = self.dim_codomain - if domain_range is None: domain_range = copy.deepcopy(self.domain_range) @@ -620,8 +610,6 @@ def copy( # noqa: WPS211 function_indices, function_arguments, function_values, - dim_domain=dim_domain, - dim_codomain=dim_codomain, domain_range=domain_range, dataset_name=dataset_name, argument_names=argument_names, @@ -708,7 +696,6 @@ def __getitem__( function_arguments=arguments, function_values=values, sample_names=self.sample_names[key], - domain_range=self.sample_names[key], ) ##################################################################### # Numpy methods @@ -831,7 +818,9 @@ def __getitem__( if isinstance(s_key, int): s_key = slice(s_key, s_key + 1) - coordinate_names = np.array(self._fdatairregular.coordinate_names)[s_key] + coordinate_names = np.array( + self._fdatairregular.coordinate_names + )[s_key] return self._fdatairregular.copy( function_values=self._fdatairregular.function_values[..., key], diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 2b76e5cc9..30bb3f7a6 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -1,12 +1,11 @@ """Test the basic methods of the FDataIrregular structure""" -from typing import Any, Callable, Tuple -from ..typing._numpy import ArrayLike, NDArrayBool, NDArrayFloat, NDArrayInt +from ..typing._numpy import ArrayLike import numpy as np import pytest -from skfda.datasets import fetch_bone_density -from skfda.misc.covariances import CovarianceLike, Gaussian +from skfda.datasets._real_datasets import _fetch_loon_data from skfda.representation import FDataIrregular, FDataGrid +from skfda.representation.interpolation import SplineInterpolation ############ # FIXTURES @@ -15,17 +14,29 @@ NUM_CURVES = 10 MAX_VALUES_PER_CURVE = 99 DIMENSIONS = 2 +TEST_DECIMALS = range(10) +COPY_KWARGS = [ + {"domain_range": ((0, 10))}, + {"dataset_name": "test"}, + {"sample_names": ["test"]*NUM_CURVES}, + # TODO Extrapolation + {"interpolation": SplineInterpolation(3)}, + {"argument_names": ("test",)}, + {"coordinate_names": ("test",)}, +] @pytest.fixture() def input_arrays( ) -> ArrayLike: - """Generate three unidimensional arrays describing a FDataIrregular structure""" + """ + Generate three unidimensional arrays describing a + FDataIrregular structure + """ # TODO Make editable with pytest - num_curves = NUM_CURVES num_values_per_curve = np.random.randint(1, MAX_VALUES_PER_CURVE, - size=(num_curves, ) + size=(NUM_CURVES, ) ) values_per_curve = [np.random.rand(num_values, 1) @@ -43,12 +54,14 @@ def input_arrays( @pytest.fixture() def input_arrays_multidimensional( ) -> ArrayLike: - """Generate three multidimensional arrays describing a FDataIrregular structure""" + """ + Generate three multidimensional arrays + describing a FDataIrregular structure + """ # TODO Make editable with pytest - num_curves = NUM_CURVES num_values_per_curve = np.random.randint(1, MAX_VALUES_PER_CURVE, - size=(num_curves, ) + size=(NUM_CURVES, ) ) values_per_curve = [np.random.rand(num_values, DIMENSIONS) @@ -62,6 +75,52 @@ def input_arrays_multidimensional( return indices, values, arguments + +@pytest.fixture() +def fdatagrid( +) -> ArrayLike: + """Generate FDataGrid""" + # TODO Make editable with pytest + num_values_per_curve = np.random.randint(1, + MAX_VALUES_PER_CURVE, + ) + + data_matrix = np.random.rand(NUM_CURVES, num_values_per_curve, 1) + # Grid points must be sorted + grid_points = np.sort(np.random.rand(num_values_per_curve)) + + return FDataGrid(data_matrix=data_matrix, + grid_points=grid_points, + ) + + +@pytest.fixture() +def fdatagrid_multidimensional( +) -> ArrayLike: + """Generate multidimensional FDataGrid""" + # TODO Make editable with pytest + num_values_per_curve = np.random.randint(1, + MAX_VALUES_PER_CURVE, + ) + + data_matrix = np.random.rand(NUM_CURVES, num_values_per_curve, DIMENSIONS) + # Grid points must be sorted + grid_points = np.sort(np.random.rand(num_values_per_curve)) + + return FDataGrid(data_matrix=data_matrix, + grid_points=grid_points, + ) + + +@pytest.fixture() +def dataframe( +) -> ArrayLike: + """Generate long dataframe for testing""" + raw_dataset = _fetch_loon_data("bone_ext") + data = raw_dataset["bone_ext"] + + return data + ############ # TESTS ############ @@ -117,5 +176,214 @@ def test_fdatairregular_from_multidimensional_arrays( assert f_data_irreg is not None assert len(f_data_irreg) == len(indices) assert len(f_data_irreg.function_arguments) == len(arguments) - - + + +def test_fdatairregular_copy( + input_arrays: ArrayLike, +) -> None: + """Test the copy function for FDataIrregular for an exact copy + + Args: + input_arrays (ArrayLike): tuple of three arrays required for + FDataIrregular + indices: Array of pointers to the beginning of the arguments and + values of each curve + arguments: Array of each of the points of the domain + values: Array of each of the coordinates of the codomain + """ + indices, arguments, values = input_arrays + + f_data_irreg = FDataIrregular( + indices, + arguments, + values, + ) + + assert f_data_irreg == f_data_irreg.copy() + + +@pytest.mark.parametrize("kwargs", COPY_KWARGS) +def test_fdatairregular_copy_kwargs( + input_arrays: ArrayLike, + kwargs: dict, +) -> None: + """Test the copy function for FDataIrregular with additional arguments + which replace certain parameters of the object + + Args: + input_arrays (ArrayLike): tuple of three arrays required for + FDataIrregular + indices: Array of pointers to the beginning of the arguments and + values of each curve + arguments: Array of each of the points of the domain + values: Array of each of the coordinates of the codomain + kwargs: Dict with the parameters for each iteration of the test + """ + indices, arguments, values = input_arrays + + f_data_irreg = FDataIrregular( + indices, + arguments, + values, + ) + + f_data_copy = f_data_irreg.copy(**kwargs) + + # Check everything equal except specified kwarg + assert len(f_data_copy) == len(f_data_irreg) + assert len(f_data_copy.function_arguments) == \ + len(f_data_irreg.function_arguments) + assert f_data_copy.dim_domain == f_data_irreg.dim_domain + assert f_data_copy.dim_domain == f_data_irreg.dim_codomain + changed_attribute = next(iter(kwargs)) + assert getattr(f_data_copy, changed_attribute) != \ + getattr(f_data_irreg, changed_attribute) + + +def test_fdatairregular_from_fdatagrid( + fdatagrid: FDataGrid, +) -> None: + """Tests creating a correct FDataIrregular object from FDataGrid + + Args: + fdatagrid (FDataGrid): FDataGrid object. Can be dense or sparse + (contain NaNs) + """ + f_data_irreg = FDataIrregular.from_datagrid(fdatagrid) + + assert f_data_irreg is not None + assert len(f_data_irreg) == len(fdatagrid) + + +def test_fdatairregular_from_fdatagrid_multidimensional( + fdatagrid_multidimensional: FDataGrid, +) -> None: + """Tests creating a correct FDataIrregular object from + a multidimensional FDataGrid + + Args: + fdatagrid (FDataGrid): FDataGrid object. Can be dense or sparse + (contain NaNs) + """ + f_data_irreg = FDataIrregular.from_datagrid(fdatagrid_multidimensional) + + assert f_data_irreg is not None + assert len(f_data_irreg) == len(fdatagrid_multidimensional) + + +def test_fdatairregular_from_dataframe( + dataframe: FDataGrid, +) -> None: + """Tests creating a correct FDataIrregular object from + a multidimensional FDataGrid + + Args: + fdatagrid (FDataGrid): FDataGrid object. Can be dense or sparse + (contain NaNs) + """ + + curve_name = "idnum" + argument_name = "age" + coordinate_name = "spnbmd" + + f_irreg = FDataIrregular.from_dataframe( + dataframe, + id_column=curve_name, + argument_columns=argument_name, + coordinate_columns=coordinate_name, + argument_names=[argument_name], + coordinate_names=[coordinate_name], + dataset_name="bone_ext" + ) + + assert len(f_irreg) == 423 + assert len(f_irreg.function_values) == 1003 + + +def test_fdatairregular_getitem( + input_arrays: ArrayLike, +) -> None: + """Tests using slices to get subsamples of a given FDataIrregular, + using the method __getitem__ of the class + + Args: + input_arrays (ArrayLike): tuple of three arrays required for + FDataIrregular + indices: Array of pointers to the beginning of the arguments and + values of each curve + arguments: Array of each of the points of the domain + values: Array of each of the coordinates of the codomain + """ + indices, arguments, values = input_arrays + + f_data_irreg = FDataIrregular( + indices, + arguments, + values, + ) + + assert len(f_data_irreg[0]) == 1 + assert len(f_data_irreg[-1]) == 1 + assert len(f_data_irreg[0:10]) == 10 + assert len(f_data_irreg[0:]) == len(f_data_irreg) + assert len(f_data_irreg[:10]) == 10 + assert len(f_data_irreg[0:10:2]) == 5 + assert len(f_data_irreg[0:10:2]) == 5 + + +def test_fdatairregular_coordinates( + input_arrays_multidimensional: ArrayLike, +) -> None: + """Test obtaining the different coordinates for a multidimensional + FDataGrid object by using the custom _IrregularCoordinateIterator + + Args: + input_arrays (ArrayLike): tuple of three arrays required for + FDataIrregular + indices: Array of pointers to the beginning of the arguments and + values of each curve + arguments: Array of each of the points of the domain + values: Array of each of the coordinates of the codomain + """ + indices, arguments, values = input_arrays_multidimensional + + f_data_irreg = FDataIrregular( + indices, + arguments, + values, + ) + + for dim, f_data_coordinate in enumerate(f_data_irreg.coordinates): + assert len(f_data_coordinate) == len(f_data_irreg) + assert f_data_coordinate.dim_codomain == 1 + assert f_data_coordinate.function_values[:, 0] == \ + f_data_irreg.function_values[:, dim] + + +@pytest.mark.parametrize("decimals", TEST_DECIMALS) +def test_fdatairregular_round( + input_arrays: ArrayLike, + decimals: int, +) -> None: + """Test the round function for FDataIrregular + + Args: + input_arrays (ArrayLike): tuple of three arrays required for + FDataIrregular + indices: Array of pointers to the beginning of the arguments and + values of each curve + arguments: Array of each of the points of the domain + values: Array of each of the coordinates of the codomain + """ + indices, arguments, values = input_arrays + + f_data_irreg = FDataIrregular( + indices, + arguments, + values, + ) + + assert np.all( + f_data_irreg.round(decimals).function_values == + np.round(f_data_irreg.function_values, decimals) + ) From de4d4703483a0015ea8b38946f08eedad23d2f3c Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 2 Mar 2023 12:54:35 +0100 Subject: [PATCH 025/144] Added mean,var,gmean, concatenate. Added structure for other operations --- skfda/representation/irregular.py | 167 ++++++++++++++++++++++++++---- 1 file changed, 145 insertions(+), 22 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 07340af41..cff63bc8a 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -139,15 +139,15 @@ def from_dataframe( argument_columns: LabelTupleLike, coordinate_columns: LabelTupleLike, **kwargs - ) -> FDataIrregular: - + ) -> FDataIrregular: + # Accept strings but ensure the column names are tuples _is_str = isinstance(argument_columns, str) argument_columns = [argument_columns] if _is_str else \ argument_columns _is_str = isinstance(coordinate_columns, str) - coordinate_columns = [coordinate_columns] if _is_str else \ + coordinate_columns = [coordinate_columns] if _is_str else \ coordinate_columns # Obtain num functions and num observations from data @@ -194,7 +194,7 @@ def from_datagrid( f_data: FDataGrid, **kwargs ) -> FDataIrregular: - + # Obtain num functions and num observations from data num_observations = np.sum(~np.isnan(f_data.data_matrix)) num_functions = f_data.data_matrix.shape[0] @@ -219,7 +219,7 @@ def from_datagrid( arg = [f_data.grid_points[dim][j] for dim in range(f_data.dim_domain)] function_arguments[head+num_values, :] = arg - + value = [f_data.data_matrix[i, j, dim] for dim in range(f_data.dim_codomain)] function_values[head+num_values, :] = value @@ -266,7 +266,7 @@ def round( @property def sample_points(self) -> GridPoints: warnings.warn( - "Parameter sample_points is deprecated. Use the " \ + "Parameter sample_points is deprecated. Use the " "parameter grid_points instead.", DeprecationWarning, ) @@ -280,7 +280,6 @@ def dim_domain(self) -> int: def dim_codomain(self) -> int: return self._dim_codomain - # TODO Remove CoordinateIterator in an appropiate way @property def coordinates(self: T) -> _IrregularCoordinateIterator[T]: return _IrregularCoordinateIterator(self) @@ -367,15 +366,82 @@ def sum( # noqa: WPS125 skipna: bool = False, min_count: int = 0, ) -> T: - pass + super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna) + + data = ( + np.nansum(self.function_values, axis=0, keepdims=True) if skipna + else np.sum(self.function_values, axis=0, keepdims=True) + ) + return FDataIrregular( + function_indices=np.array([0]), + function_arguments=np.array(np.zeros((1, self.dim_domain))), + function_values=data, + sample_names=("sum",), + ) + def mean(self: T) -> T: - pass + """Compute the mean pointwise for a sparse dataset. + + Note that, for irregular data, points may be represented in few + or even an only curve. + Returns: + A FDataIrregular object with just one sample representing the + mean of all curves the across each value. + + """ + + # Find all distinct arguments (ordered) and corresponding values + distinct_args = np.unique(np.matrix.flatten(self.function_arguments)) + values = [np.matrix.flatten(self.function_values[ + np.where(self.function_arguments == arg)[0] + ]) + for arg in distinct_args] + + # Obtain mean of all available values for each argument point + means = np.array([np.mean(vals) for vals in values]) + + # Create a FDataIrregular object with only 1 curve, the mean curve + return FDataIrregular( + function_indices=np.array([0]), + function_arguments=distinct_args.reshape(-1, 1), + function_values=means.reshape(-1, 1), + sample_names=("mean",), + ) + def var(self: T) -> T: - pass + """Compute the variance pointwise for a sparse dataset. + + Note that, for irregular data, points may be represented in few + or even an only curve. + + Returns: + A FDataIrregular object with just one sample representing the + variance of all curves the across each value. + + """ + + # Find all distinct arguments (ordered) and corresponding values + distinct_args = np.unique(np.matrix.flatten(self.function_arguments)) + values = [np.matrix.flatten(self.function_values[ + np.where(self.function_arguments == arg)[0] + ]) + for arg in distinct_args] + + # Obtain variance of all available values for each argument point + vars = np.array([np.var(vals) for vals in values]) + + # Create a FDataIrregular object with only 1 curve, the variance curve + return FDataIrregular( + function_indices=np.array([0]), + function_arguments=distinct_args.reshape(-1, 1), + function_values=vars.reshape(-1, 1), + sample_names=("var",), + ) def cov(self: T) -> T: + # TODO Implementation to be decided pass def gmean(self: T) -> T: @@ -470,24 +536,76 @@ def __rtruediv__( pass def __neg__(self: T) -> T: - pass + """Negation of FDataIrregular object.""" + + return self.copy(function_values=-self.function_values) def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: - pass - + if as_coordinates: + raise NotImplementedError( + "Not implemented for as_coordinates = True", + ) + # Verify that dimensions are compatible + assert len(others) > 0 + self._check_same_dimensions(others[0]) + if len(others) > 1: + for x, y in zip(others, others[1:]): + x._check_same_dimensions(y) + + # Allocate all required memory + total_functions = self.num_functions + sum([o.num_functions + for o in others]) + total_values = self.num_observations + sum([o.num_observations + for o in others]) + total_sample_names = [] + function_indices = np.zeros((total_functions, ), + dtype=np.uint32) + function_args = np.zeros((total_values, + self.dim_domain)) + function_values = np.zeros((total_values, + self.dim_codomain)) + index = 0 + head = 0 + + # Add samples sequentially + for f_data in [self] + list(others): + function_indices[index:index + + f_data.num_functions] = f_data.function_indices + function_args[head:head + + f_data.num_observations] = f_data.function_args + function_values[head:head + + f_data.num_observations] = f_data.function_values + # Adjust pointers to the concatenated array + function_indices[index:index+f_data.num_functions] += head + index += f_data.num_functions + head += f_data.num_observations + total_sample_names = total_sample_names + list(f_data.sample_names) + + return self.copy( + function_indices, + function_args, + function_values, + sample_names=total_sample_names, + ) + def plot(self, *args: Any, **kwargs: Any) -> Figure: from ..exploratory.visualization.representation \ import LinearPlotIrregular + from ..exploratory.visualization.representation \ + import LinearPlotIrregular return LinearPlotIrregular(self, *args, **kwargs).plot() def scatter(self, *args: Any, **kwargs: Any) -> Figure: from ..exploratory.visualization.representation \ import ScatterPlotIrregular + from ..exploratory.visualization.representation \ + import ScatterPlotIrregular return ScatterPlotIrregular(self, *args, **kwargs).plot() def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: + # TODO Use BasisSmoother to return basis? pass def to_matrix(self, **kwargs: Any) -> ArrayLike: @@ -497,7 +615,7 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: if self.dim_domain > 1: warnings.warn(f"Not implemented for domain dimension > 1, \ currently {self.dim_domain}") - + # Find the grid points and values for each function grid_points = [] evaluated_points = [] @@ -531,8 +649,11 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: for curve in range(num_curves): for point in range(len(grid_points[curve])): for dimension in range(self.dim_codomain): - point_index = unified_grid_points.index(grid_points[curve][point]) - unified_matrix[curve, point_index, dimension] = evaluated_points[curve][point][dimension] + point_index = unified_grid_points.index( + grid_points[curve][point] + ) + unified_matrix[curve, point_index, dimension] = \ + evaluated_points[curve][point][dimension] return unified_matrix, unified_grid_points @@ -623,6 +744,8 @@ def restrict( self: T, domain_range: DomainRangeLike, ) -> T: + + #TODO Is this possible with this structure pass def shift( @@ -631,7 +754,9 @@ def shift( *, restrict_domain: bool = False, extrapolation: Optional[ExtrapolationLike] = None, + grid_points: Optional[GridPointsLike] = None, ) -> FDataIrregular: + #TODO Is this possible with this structure? pass def compose( @@ -640,16 +765,14 @@ def compose( *, eval_points: Optional[GridPointsLike] = None, ) -> T: + + #TODO Is this possible with this structure? pass def __str__(self) -> str: """Return str(self).""" - return ( - f"function_indices: {self.function_indices}\n" - f"function_arguments: {self.function_arguments}\n" - f"function_values: {self.function_values}\n" - f"time range: {self.domain_range}" - ) + #TODO Define str method after all attributes are locked + pass def __repr__(self) -> str: """Return repr(self).""" From 13c476f62b66d49157ffae24ba25ec0e345b450d Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 2 Mar 2023 13:44:30 +0100 Subject: [PATCH 026/144] Restrict domain --- skfda/representation/irregular.py | 79 +++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 4 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index cff63bc8a..cf964925e 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -537,7 +537,6 @@ def __rtruediv__( def __neg__(self: T) -> T: """Negation of FDataIrregular object.""" - return self.copy(function_values=-self.function_values) def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: @@ -572,7 +571,7 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: function_indices[index:index + f_data.num_functions] = f_data.function_indices function_args[head:head + - f_data.num_observations] = f_data.function_args + f_data.num_observations] = f_data.function_arguments function_values[head:head + f_data.num_observations] = f_data.function_values # Adjust pointers to the concatenated array @@ -744,9 +743,81 @@ def restrict( self: T, domain_range: DomainRangeLike, ) -> T: + from ..misc.validation import validate_domain_range + + domain_range = validate_domain_range(domain_range) + assert all( + c <= a < b <= d # noqa: WPS228 + for ((a, b), (c, d)) in zip(domain_range, self.domain_range) + ) + + head = 0 + indices = [] + arguments = [] + values = [] + sample_names = [] + + # Eliminate points outside the new range. + # Must also modify function indices to point to new array + i=-1 + for i, index in enumerate(self.function_indices[1:]): + prev_index = self.function_indices[i] + s = slice(prev_index, index) + masks = set() + for dr in domain_range: + dr_start, dr_end = dr + select_mask = np.where( + (dr_start <= self.function_arguments[s]) & + (self.function_arguments[s] <= dr_end) + ) + + # Must be union, it is valid if it is in any interval + masks = masks.union(set(select_mask[0])) + + # TODO Keep functions with no values? + masks = list(masks) + if len(masks) > 1: + indices.append(head) + arguments.append(self.function_arguments[s][masks, :]) + values.append(self.function_values[s][masks, :]) + sample_names.append(self.sample_names[i]) + head += len(masks) - #TODO Is this possible with this structure - pass + # Last index + i += 1 + prev_index = self.function_indices[i] + s = slice(prev_index, None) + masks = set() + for dr in domain_range: + dr_start, dr_end = dr + select_mask = np.where( + (dr_start <= self.function_arguments[s]) & + (self.function_arguments[s] <= dr_end) + ) + + # Must be union, it is valid if it is in any interval + masks = masks.union(set(select_mask[0])) + + # TODO Keep functions with no values? + masks = list(masks) + if len(masks) > 0: + indices.append(head) + arguments.append(self.function_arguments[s][masks, :]) + values.append(self.function_values[s][masks, :]) + sample_names.append(self.sample_names[i]) + head += len(masks) + + function_indices = np.array(indices) + function_arguments = np.concatenate(arguments) + function_values = np.concatenate(values) + + return self.copy( + function_indices=function_indices, + function_arguments=function_arguments, + function_values=function_values, + sample_names=sample_names, + domain_range=domain_range, + ) def shift( self, From 0f4275b215c6e977a27020f447c75ed6b31fbdee Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 2 Mar 2023 13:50:48 +0100 Subject: [PATCH 027/144] Added docstring to shift. Implemented __str__ --- skfda/representation/irregular.py | 41 ++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index cf964925e..2e04a59c6 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -827,7 +827,37 @@ def shift( extrapolation: Optional[ExtrapolationLike] = None, grid_points: Optional[GridPointsLike] = None, ) -> FDataIrregular: - #TODO Is this possible with this structure? + r""" + Perform a shift of the curves. + + The i-th shifted function :math:`y_i` has the form + + .. math:: + y_i(t) = x_i(t + \delta_i) + + where :math:`x_i` is the i-th original function and :math:`delta_i` is + the shift performed for that function, that must be a vector in the + domain space. + + Note that a positive shift moves the graph of the function in the + negative direction and vice versa. + + Args: + shifts: List with the shifts + corresponding for each sample or numeric with the shift to + apply to all samples. + restrict_domain: If True restricts the domain to avoid the + evaluation of points outside the domain using extrapolation. + Defaults uses extrapolation. + extrapolation: Controls the + extrapolation mode for elements outside the domain range. + By default uses the method defined in fd. See extrapolation to + more information. + + Returns: + Shifted functions. + """ + #TODO build based in above pass def compose( @@ -842,8 +872,13 @@ def compose( def __str__(self) -> str: """Return str(self).""" - #TODO Define str method after all attributes are locked - pass + return ( + f"Data set: {self.data_matrix}\n" + f"function indices: {self.function_indices}\n" + f"function arguments: {self.function_arguments}\n" + f"function values: {self.function_values}\n" + f"time range: {self.domain_range}" + ) def __repr__(self) -> str: """Return repr(self).""" From f52fd98eded3ec0b19ff62aa65823021a4d8d131 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 2 Mar 2023 14:16:15 +0100 Subject: [PATCH 028/144] Implemented and tested operations with scalarss --- skfda/representation/irregular.py | 95 +++++++++++++++++++++++++++---- 1 file changed, 85 insertions(+), 10 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 2e04a59c6..037f27f5c 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -485,55 +485,130 @@ def _get_op_matrix( self, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> Union[None, float, NDArrayFloat, NDArrayInt]: - pass + if isinstance(other, numbers.Real): + return float(other) + elif isinstance(other, np.ndarray): + if other.shape in {(), (1,)}: + return other + elif other.shape == (self.n_samples,): + other_index = ( + (slice(None),) + (np.newaxis,) + * (self.function_values.ndim - 1) + ) + + return other[other_index] + elif other.shape == ( + self.n_samples, + self.dim_codomain, + ): + other_index = ( + (slice(None),) + (np.newaxis,) + * (self.function_values.ndim - 2) + + (slice(None),) + ) + + return other[other_index] + + raise ValueError( + f"Invalid dimensions in operator between FDataGrid and Numpy " + f"array: {other.shape}" + ) + + elif isinstance(other, FDataIrregular): + #TODO What to do with different arguments? + return None + + return None def __add__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - pass + function_values = self._get_op_matrix(other) + if function_values is None: + return NotImplemented + + return self._copy_op(other, + function_values=self.function_values + + function_values + ) def __radd__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - pass + return self.__add__(other) def __sub__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - pass + function_values = self._get_op_matrix(other) + if function_values is None: + return NotImplemented + + return self._copy_op(other, + function_values=self.function_values - + function_values + ) def __rsub__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - pass + function_values = self._get_op_matrix(other) + if function_values is None: + return NotImplemented + + return self._copy_op(other, + function_values=function_values - + self.function_values + ) def __mul__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - pass + function_values = self._get_op_matrix(other) + if function_values is None: + return NotImplemented + + return self._copy_op(other, + function_values=self.function_values * + function_values + ) def __rmul__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - pass + return self.__mul__(other) def __truediv__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - pass + function_values = self._get_op_matrix(other) + if function_values is None: + return NotImplemented + + return self._copy_op(other, + function_values=self.function_values / + function_values + ) def __rtruediv__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - pass + function_values = self._get_op_matrix(other) + if function_values is None: + return NotImplemented + + return self._copy_op(other, + function_values=function_values / + self.function_values + ) def __neg__(self: T) -> T: """Negation of FDataIrregular object.""" @@ -759,7 +834,7 @@ def restrict( # Eliminate points outside the new range. # Must also modify function indices to point to new array - i=-1 + i = -1 for i, index in enumerate(self.function_indices[1:]): prev_index = self.function_indices[i] s = slice(prev_index, index) From d7a0097c897ea7ccf84cceaee3bb1f1d31647ebd Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 2 Mar 2023 15:36:59 +0100 Subject: [PATCH 029/144] Implemented operations with vectores and arrays. Unsolved problem with right operations is WIP --- skfda/representation/irregular.py | 50 ++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 037f27f5c..34ad4639e 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -495,8 +495,23 @@ def _get_op_matrix( (slice(None),) + (np.newaxis,) * (self.function_values.ndim - 1) ) + + other_vector = other[other_index] + + # Must expand for the number of values in each curve + values_after = np.concatenate((self.function_indices, + np.array([self.num_observations])) + ) + + values_before = np.concatenate((np.array([0]), + self.function_indices) + ) + + values_curve = (values_after-values_before)[1:] - return other[other_index] + # Repeat the other value for each curve as many times + # as values inside the curve + return np.repeat(other_vector, values_curve) elif other.shape == ( self.n_samples, self.dim_codomain, @@ -507,7 +522,22 @@ def _get_op_matrix( + (slice(None),) ) - return other[other_index] + other_vector = other[other_index] + + # Must expand for the number of values in each curve + values_after = np.concatenate((self.function_indices, + np.array([self.num_observations])) + ) + + values_before = np.concatenate((np.array([0]), + self.function_indices) + ) + + values_curve = (values_after-values_before)[1:] + + # Repeat the other value for each curve as many times + # as values inside the curve + return np.repeat(other_vector, values_curve, axis=0) raise ValueError( f"Invalid dimensions in operator between FDataGrid and Numpy " @@ -1015,26 +1045,30 @@ def __array_ufunc__( for i in inputs: if ( - isinstance(i, FDataGrid) - and not np.array_equal(i.grid_points, self.grid_points) + isinstance(i, FDataIrregular) + and not np.array_equal(i.function_arguments, + self.function_arguments + ) ): return NotImplemented new_inputs = [ - i.data_matrix if isinstance(i, FDataGrid) - else self._get_op_matrix(i) for i in inputs + self._get_op_matrix(i) for i in inputs ] outputs = kwargs.pop('out', None) if outputs: new_outputs = [ - o.data_matrix if isinstance(o, FDataGrid) + o.function_values if isinstance(o, FDataIrregular) else o for o in outputs ] kwargs['out'] = tuple(new_outputs) else: new_outputs = (None,) * ufunc.nout + print(kwargs) + print() + print(new_inputs) results = getattr(ufunc, method)(*new_inputs, **kwargs) if results is NotImplemented: return NotImplemented @@ -1047,7 +1081,7 @@ def __array_ufunc__( for result, output in zip(results, new_outputs) ) - results = [self.copy(data_matrix=r) for r in results] + results = [self.copy(function_values=r) for r in results] return results[0] if len(results) == 1 else results From 4260d687a156ad37857616fab74c19ce1cc4c2ba Mon Sep 17 00:00:00 2001 From: opintosant Date: Tue, 7 Mar 2023 13:09:30 +0100 Subject: [PATCH 030/144] Add the case of operations between FDataIrregular --- skfda/representation/irregular.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 34ad4639e..80e05795a 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -545,8 +545,8 @@ def _get_op_matrix( ) elif isinstance(other, FDataIrregular): - #TODO What to do with different arguments? - return None + # TODO What to do with different arguments? + return other.function_values return None @@ -1066,9 +1066,6 @@ def __array_ufunc__( else: new_outputs = (None,) * ufunc.nout - print(kwargs) - print() - print(new_inputs) results = getattr(ufunc, method)(*new_inputs, **kwargs) if results is NotImplemented: return NotImplemented From ac37a257ece6fee59f3ca0de5fac6455df0b1d27 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 9 Mar 2023 13:57:10 +0100 Subject: [PATCH 031/144] Fix formatting issues after rebase --- skfda/representation/irregular.py | 167 +++++++++++++++--------------- 1 file changed, 84 insertions(+), 83 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 80e05795a..ae3bd61e2 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -18,8 +18,6 @@ Type, TypeVar, Union, - List, - Tuple, cast, ) @@ -89,16 +87,17 @@ def __init__( dim_sample_ranges = list() for f in self.function_indices[1:]: dim_sample_ranges.append(tuple((self.function_arguments[i][dim], - self.function_arguments[f-1][dim]))) + self.function_arguments[f-1][dim]))) i = f - dim_sample_ranges.append((self.function_arguments[i][dim], - self.function_arguments[-1][dim])) + dim_sample_ranges.append((self.function_arguments[i][dim], + self.function_arguments[-1][dim])) dim_ranges.append(dim_sample_ranges) self._sample_range = list() for sample in range(len(dim_sample_ranges)): self._sample_range.append( - tuple([dim_ranges[dim][sample] for dim in range(self.dim_domain)]) + tuple([dim_ranges[dim][sample] + for dim in range(self.dim_domain)]) ) # Default value for sample_range is a list of tuples with @@ -137,10 +136,10 @@ def from_dataframe( dataframe: pandas.DataFrame, id_column: str, argument_columns: LabelTupleLike, - coordinate_columns: LabelTupleLike, + coordinate_columns: LabelTupleLike, **kwargs ) -> FDataIrregular: - + # Accept strings but ensure the column names are tuples _is_str = isinstance(argument_columns, str) argument_columns = [argument_columns] if _is_str else \ @@ -155,9 +154,9 @@ def from_dataframe( num_functions = dataframe[id_column].nunique() # Create data structure of function pointers and coordinates - function_indices = np.zeros((num_functions, ), + function_indices = np.zeros((num_functions, ), dtype=np.uint32) - function_arguments = np.zeros((num_observations, + function_arguments = np.zeros((num_observations, len(argument_columns))) function_values = np.zeros((num_observations, len(coordinate_columns))) @@ -182,9 +181,9 @@ def from_dataframe( index += 1 return cls( - function_indices, - function_arguments, - function_values, + function_indices, + function_arguments, + function_values, **kwargs ) @@ -194,7 +193,7 @@ def from_datagrid( f_data: FDataGrid, **kwargs ) -> FDataIrregular: - + # Obtain num functions and num observations from data num_observations = np.sum(~np.isnan(f_data.data_matrix)) num_functions = f_data.data_matrix.shape[0] @@ -216,11 +215,11 @@ def from_datagrid( if np.all(np.isnan(f_data.data_matrix[i, j])): continue - arg = [f_data.grid_points[dim][j] for dim + arg = [f_data.grid_points[dim][j] for dim in range(f_data.dim_domain)] function_arguments[head+num_values, :] = arg - - value = [f_data.data_matrix[i, j, dim] for dim + + value = [f_data.data_matrix[i, j, dim] for dim in range(f_data.dim_codomain)] function_values[head+num_values, :] = value @@ -229,9 +228,9 @@ def from_datagrid( head += num_values return cls( - function_indices, - function_arguments, - function_values, + function_indices, + function_arguments, + function_values, **kwargs ) @@ -367,7 +366,7 @@ def sum( # noqa: WPS125 min_count: int = 0, ) -> T: super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna) - + data = ( np.nansum(self.function_values, axis=0, keepdims=True) if skipna else np.sum(self.function_values, axis=0, keepdims=True) @@ -379,10 +378,10 @@ def sum( # noqa: WPS125 function_values=data, sample_names=("sum",), ) - + def mean(self: T) -> T: """Compute the mean pointwise for a sparse dataset. - + Note that, for irregular data, points may be represented in few or even an only curve. @@ -390,18 +389,18 @@ def mean(self: T) -> T: A FDataIrregular object with just one sample representing the mean of all curves the across each value. - """ - +""" + # Find all distinct arguments (ordered) and corresponding values distinct_args = np.unique(np.matrix.flatten(self.function_arguments)) values = [np.matrix.flatten(self.function_values[ np.where(self.function_arguments == arg)[0] ]) - for arg in distinct_args] - + for arg in distinct_args] + # Obtain mean of all available values for each argument point means = np.array([np.mean(vals) for vals in values]) - + # Create a FDataIrregular object with only 1 curve, the mean curve return FDataIrregular( function_indices=np.array([0]), @@ -409,10 +408,10 @@ def mean(self: T) -> T: function_values=means.reshape(-1, 1), sample_names=("mean",), ) - + def var(self: T) -> T: """Compute the variance pointwise for a sparse dataset. - + Note that, for irregular data, points may be represented in few or even an only curve. @@ -421,17 +420,17 @@ def var(self: T) -> T: variance of all curves the across each value. """ - + # Find all distinct arguments (ordered) and corresponding values distinct_args = np.unique(np.matrix.flatten(self.function_arguments)) values = [np.matrix.flatten(self.function_values[ np.where(self.function_arguments == arg)[0] ]) for arg in distinct_args] - + # Obtain variance of all available values for each argument point vars = np.array([np.var(vals) for vals in values]) - + # Create a FDataIrregular object with only 1 curve, the variance curve return FDataIrregular( function_indices=np.array([0]), @@ -442,7 +441,7 @@ def var(self: T) -> T: def cov(self: T) -> T: # TODO Implementation to be decided - pass + return None def gmean(self: T) -> T: return FDataIrregular( @@ -495,21 +494,24 @@ def _get_op_matrix( (slice(None),) + (np.newaxis,) * (self.function_values.ndim - 1) ) - + other_vector = other[other_index] - + # Must expand for the number of values in each curve - values_after = np.concatenate((self.function_indices, - np.array([self.num_observations])) + values_after = np.concatenate((self.function_indices, + np.array( + [self.num_observations] + ) + ) ) - - values_before = np.concatenate((np.array([0]), + + values_before = np.concatenate((np.array([0]), self.function_indices) ) - + values_curve = (values_after-values_before)[1:] - # Repeat the other value for each curve as many times + # Repeat the other value for each curve as many times # as values inside the curve return np.repeat(other_vector, values_curve) elif other.shape == ( @@ -523,19 +525,22 @@ def _get_op_matrix( ) other_vector = other[other_index] - + # Must expand for the number of values in each curve - values_after = np.concatenate((self.function_indices, - np.array([self.num_observations])) + values_after = np.concatenate((self.function_indices, + np.array( + [self.num_observations] + ) + ) ) - - values_before = np.concatenate((np.array([0]), + + values_before = np.concatenate((np.array([0]), self.function_indices) ) - + values_curve = (values_after-values_before)[1:] - # Repeat the other value for each curve as many times + # Repeat the other value for each curve as many times # as values inside the curve return np.repeat(other_vector, values_curve, axis=0) @@ -655,22 +660,22 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: if len(others) > 1: for x, y in zip(others, others[1:]): x._check_same_dimensions(y) - + # Allocate all required memory - total_functions = self.num_functions + sum([o.num_functions + total_functions = self.num_functions + sum([o.num_functions for o in others]) - total_values = self.num_observations + sum([o.num_observations + total_values = self.num_observations + sum([o.num_observations for o in others]) - total_sample_names = [] - function_indices = np.zeros((total_functions, ), + total_sample_names = list() + function_indices = np.zeros((total_functions, ), dtype=np.uint32) - function_args = np.zeros((total_values, + function_args = np.zeros((total_values, self.dim_domain)) function_values = np.zeros((total_values, self.dim_codomain)) index = 0 head = 0 - + # Add samples sequentially for f_data in [self] + list(others): function_indices[index:index + @@ -684,33 +689,29 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: index += f_data.num_functions head += f_data.num_observations total_sample_names = total_sample_names + list(f_data.sample_names) - + return self.copy( - function_indices, - function_args, - function_values, + function_indices, + function_args, + function_values, sample_names=total_sample_names, ) - + def plot(self, *args: Any, **kwargs: Any) -> Figure: from ..exploratory.visualization.representation \ import LinearPlotIrregular - from ..exploratory.visualization.representation \ - import LinearPlotIrregular return LinearPlotIrregular(self, *args, **kwargs).plot() def scatter(self, *args: Any, **kwargs: Any) -> Figure: from ..exploratory.visualization.representation \ import ScatterPlotIrregular - from ..exploratory.visualization.representation \ - import ScatterPlotIrregular return ScatterPlotIrregular(self, *args, **kwargs).plot() def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: # TODO Use BasisSmoother to return basis? - pass + return None def to_matrix(self, **kwargs: Any) -> ArrayLike: # Convert FDataIrregular to matrix of all points @@ -719,11 +720,11 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: if self.dim_domain > 1: warnings.warn(f"Not implemented for domain dimension > 1, \ currently {self.dim_domain}") - + # Find the grid points and values for each function grid_points = [] evaluated_points = [] - for index_start, index_end in zip(list(self.function_indices), + for index_start, index_end in zip(list(self.function_indices), list(self.function_indices[1:])): grid_points.append( [x[0] for x in self.function_arguments[index_start:index_end]]) @@ -763,7 +764,7 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: def to_grid( # noqa: D102 self: T, - ) -> T: + ) -> FDataGrid: data_matrix, grid_points = self.to_matrix() @@ -875,10 +876,10 @@ def restrict( (dr_start <= self.function_arguments[s]) & (self.function_arguments[s] <= dr_end) ) - + # Must be union, it is valid if it is in any interval masks = masks.union(set(select_mask[0])) - + # TODO Keep functions with no values? masks = list(masks) if len(masks) > 1: @@ -887,7 +888,7 @@ def restrict( values.append(self.function_values[s][masks, :]) sample_names.append(self.sample_names[i]) head += len(masks) - + # Last index i += 1 prev_index = self.function_indices[i] @@ -899,10 +900,10 @@ def restrict( (dr_start <= self.function_arguments[s]) & (self.function_arguments[s] <= dr_end) ) - + # Must be union, it is valid if it is in any interval masks = masks.union(set(select_mask[0])) - + # TODO Keep functions with no values? masks = list(masks) if len(masks) > 0: @@ -911,11 +912,11 @@ def restrict( values.append(self.function_values[s][masks, :]) sample_names.append(self.sample_names[i]) head += len(masks) - + function_indices = np.array(indices) function_arguments = np.concatenate(arguments) function_values = np.concatenate(values) - + return self.copy( function_indices=function_indices, function_arguments=function_arguments, @@ -962,8 +963,8 @@ def shift( Returns: Shifted functions. """ - #TODO build based in above - pass + # TODO build based in above + return None def compose( self: T, @@ -971,9 +972,9 @@ def compose( *, eval_points: Optional[GridPointsLike] = None, ) -> T: - + #TODO Is this possible with this structure? - pass + return None def __str__(self) -> str: """Return str(self).""" @@ -1017,9 +1018,9 @@ def __getitem__( s = slice(self.function_indices[i], next_index) required_slices.append(s) - arguments = np.concatenate([self.function_arguments[s] + arguments = np.concatenate([self.function_arguments[s] for s in required_slices]) - values = np.concatenate([self.function_values[s] + values = np.concatenate([self.function_values[s] for s in required_slices]) chunk_sizes = np.array([s.stop-s.start for s in required_slices]) @@ -1046,7 +1047,7 @@ def __array_ufunc__( for i in inputs: if ( isinstance(i, FDataIrregular) - and not np.array_equal(i.function_arguments, + and not np.array_equal(i.function_arguments, self.function_arguments ) ): From c6f5859c3a000c070e81abc643f87866b38480ef Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 9 Mar 2023 14:00:18 +0100 Subject: [PATCH 032/144] Fix wrong shape in gmean function values --- skfda/representation/irregular.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index ae3bd61e2..aae75fa17 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -444,10 +444,11 @@ def cov(self: T) -> T: return None def gmean(self: T) -> T: + _gmean = scipy.stats.mstats.gmean(self.function_values, axis=0) return FDataIrregular( function_indices=np.array([0]), function_arguments=np.array(np.zeros((1, self.dim_domain))), - function_values=scipy.stats.mstats.gmean(self.function_values, 0), + function_values=_gmean.reshape(-1, 1), sample_names=("geometric mean",), ) From 4f8985a07484a1bc9af58ba7dcd93c42d41f9200 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 9 Mar 2023 15:29:54 +0100 Subject: [PATCH 033/144] Remove outdated reference to data_matrix in __str__ method --- skfda/representation/irregular.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index aae75fa17..92ce5198a 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -980,7 +980,6 @@ def compose( def __str__(self) -> str: """Return str(self).""" return ( - f"Data set: {self.data_matrix}\n" f"function indices: {self.function_indices}\n" f"function arguments: {self.function_arguments}\n" f"function values: {self.function_values}\n" From de17d265fc25ebc4f324f13b893b6add8f68aa34 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 9 Mar 2023 15:33:37 +0100 Subject: [PATCH 034/144] Correct return types for fixtures --- skfda/tests/test_irregular.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 30bb3f7a6..2cc5c302b 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -1,6 +1,8 @@ """Test the basic methods of the FDataIrregular structure""" +from typing import Tuple from ..typing._numpy import ArrayLike import numpy as np +import pandas import pytest from skfda.datasets._real_datasets import _fetch_loon_data @@ -28,7 +30,7 @@ @pytest.fixture() def input_arrays( -) -> ArrayLike: +) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: """ Generate three unidimensional arrays describing a FDataIrregular structure @@ -53,7 +55,7 @@ def input_arrays( @pytest.fixture() def input_arrays_multidimensional( -) -> ArrayLike: +) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: """ Generate three multidimensional arrays describing a FDataIrregular structure @@ -78,7 +80,7 @@ def input_arrays_multidimensional( @pytest.fixture() def fdatagrid( -) -> ArrayLike: +) -> FDataGrid: """Generate FDataGrid""" # TODO Make editable with pytest num_values_per_curve = np.random.randint(1, @@ -96,7 +98,7 @@ def fdatagrid( @pytest.fixture() def fdatagrid_multidimensional( -) -> ArrayLike: +) -> FDataGrid: """Generate multidimensional FDataGrid""" # TODO Make editable with pytest num_values_per_curve = np.random.randint(1, @@ -114,7 +116,7 @@ def fdatagrid_multidimensional( @pytest.fixture() def dataframe( -) -> ArrayLike: +) -> pandas.DataFrame: """Generate long dataframe for testing""" raw_dataset = _fetch_loon_data("bone_ext") data = raw_dataset["bone_ext"] From 2b6e078ff29ce9533a94fccd5098d623fba16af5 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 9 Mar 2023 17:16:37 +0100 Subject: [PATCH 035/144] Test for arithmetic operations for scalar irregular data --- skfda/representation/irregular.py | 2 +- skfda/tests/test_irregular_operations.py | 314 +++++++++++++++++++++++ 2 files changed, 315 insertions(+), 1 deletion(-) create mode 100644 skfda/tests/test_irregular_operations.py diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 92ce5198a..9ff3a26b4 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -514,7 +514,7 @@ def _get_op_matrix( # Repeat the other value for each curve as many times # as values inside the curve - return np.repeat(other_vector, values_curve) + return np.repeat(other_vector, values_curve).reshape(-1,1) elif other.shape == ( self.n_samples, self.dim_codomain, diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py new file mode 100644 index 000000000..affb65194 --- /dev/null +++ b/skfda/tests/test_irregular_operations.py @@ -0,0 +1,314 @@ +"""Test the basic methods of the FDataIrregular structure""" +from ..typing._numpy import ArrayLike, Any +from typing import Tuple, Optional +import numpy as np +import pytest + +from skfda.datasets._real_datasets import _fetch_loon_data +from skfda.representation import FDataIrregular, FDataGrid +from skfda.representation.interpolation import SplineInterpolation + +############ +# FIXTURES +############ + +NUM_CURVES = 10 +MAX_VALUES_PER_CURVE = 99 +DIMENSIONS = 2 + + +@pytest.fixture() +def input_arrays( + num_curves: Optional[int] = NUM_CURVES, + max_values_per_curve: Optional[int] = MAX_VALUES_PER_CURVE, + dimensions: Optional[int] = 1 +) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: + """ + Generate three unidimensional arrays describing a + FDataIrregular structure with fixed sizes given by + the parameters + """ + num_values_per_curve = max_values_per_curve*np.ones(num_curves).astype(int) + values_per_curve = [np.random.rand(num_values, dimensions) + for num_values in num_values_per_curve] + args_per_curve = [np.random.rand(num_values, dimensions) + for num_values in num_values_per_curve] + + indices = np.cumsum(num_values_per_curve) - num_values_per_curve + values = np.concatenate(values_per_curve) + arguments = np.concatenate(args_per_curve) + + return indices, values, arguments + + +@pytest.fixture() +def input_arrays_multidimensional( +) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: + """ + Generate three multidimensional arrays + describing a FDataIrregular structure + """ + # TODO Make editable with pytest + num_values_per_curve = np.random.randint(1, + MAX_VALUES_PER_CURVE, + size=(NUM_CURVES, ) + ) + + values_per_curve = [np.random.rand(num_values, DIMENSIONS) + for num_values in num_values_per_curve] + args_per_curve = [np.random.rand(num_values, DIMENSIONS) + for num_values in num_values_per_curve] + + indices = np.cumsum(num_values_per_curve) - num_values_per_curve + values = np.concatenate(values_per_curve) + arguments = np.concatenate(args_per_curve) + + return indices, values, arguments + + +@pytest.fixture() +def fdatairregular( + input_arrays: Tuple[ArrayLike, ArrayLike, ArrayLike], +) -> FDataIrregular: + """ + Generate three multidimensional arrays + describing a FDataIrregular structure + """ + return FDataIrregular(*input_arrays) + +############ +# TESTS +############ + + +@pytest.mark.parametrize( + ("other"), + [ + (2), + (2*np.ones(NUM_CURVES)), + (2*np.ones((NUM_CURVES, 1))), + ("fdatairregular") + ], +) +class TestArithmeticOperations: + """Class which encapsulates the testing of basic arithmetic operations""" + + def _take_first( + self, + other, + ) -> float: + if isinstance(other, np.ndarray): + return other[0] + elif isinstance(other, FDataIrregular): + return other.function_values + return other + + def test_fdatairregular_arithmetic_sum( + self, + fdatairregular: FDataIrregular, + other: Any, + request, + ) -> None: + """Tests the basic arithmetic operation fdatairregular + other + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + if isinstance(other, str): + other = request.getfixturevalue(other) + + f_data_sum = fdatairregular + other + + assert np.all( + f_data_sum.function_values == + fdatairregular.function_values + self._take_first(other) + ) + + def test_fdatairregular_arithmetic_rsum( + self, + fdatairregular: FDataIrregular, + other: Any, + request, + ) -> None: + """Tests the basic arithmetic operation other + fdatairregular + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + if isinstance(other, str): + other = request.getfixturevalue(other) + + f_data_sum = other + fdatairregular + + assert np.all( + f_data_sum.function_values == + self._take_first(other) + fdatairregular.function_values + ) + + def test_fdatairregular_arithmetic_sum_commutative( + self, + fdatairregular: FDataIrregular, + other: Any, + request, + ) -> None: + """Tests the basic arithmetic operation other + fdatairregular + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + if isinstance(other, str): + other = request.getfixturevalue(other) + + assert fdatairregular + other == other + fdatairregular + + def test_fdatairregular_arithmetic_sub( + self, + fdatairregular: FDataIrregular, + other: Any, + request, + ) -> None: + """Tests the basic arithmetic operation fdatairregular - other + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + if isinstance(other, str): + other = request.getfixturevalue(other) + + f_data_sum = fdatairregular - other + + assert np.all( + f_data_sum.function_values == + fdatairregular.function_values - self._take_first(other) + ) + + def test_fdatairregular_arithmetic_rsub( + self, + fdatairregular: FDataIrregular, + other: Any, + request, + ) -> None: + """Tests the basic arithmetic operation other - fdatairregular + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + if isinstance(other, str): + other = request.getfixturevalue(other) + + f_data_sum = other - fdatairregular + + assert np.all( + f_data_sum.function_values == + self._take_first(other) - fdatairregular.function_values + ) + + def test_fdatairregular_arithmetic_mul( + self, + fdatairregular: FDataIrregular, + other: Any, + request, + ) -> None: + """Tests the basic arithmetic operation fdatairregular * other + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + if isinstance(other, str): + other = request.getfixturevalue(other) + + f_data_sum = fdatairregular * other + + assert np.all( + f_data_sum.function_values == + fdatairregular.function_values * self._take_first(other) + ) + + def test_fdatairregular_arithmetic_rmul( + self, + fdatairregular: FDataIrregular, + other: Any, + request, + ) -> None: + """Tests the basic arithmetic operation other * fdatairregular + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + if isinstance(other, str): + other = request.getfixturevalue(other) + + f_data_sum = other * fdatairregular + + assert np.all( + f_data_sum.function_values == + self._take_first(other) * fdatairregular.function_values + ) + + def test_fdatairregular_arithmetic_mul_commutative( + self, + fdatairregular: FDataIrregular, + other: Any, + request, + ) -> None: + """Tests the basic arithmetic operation other * fdatairregular + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + if isinstance(other, str): + other = request.getfixturevalue(other) + + assert fdatairregular * other == other * fdatairregular + + def test_fdatairregular_arithmetic_div( + self, + fdatairregular: FDataIrregular, + other: Any, + request, + ) -> None: + """Tests the basic arithmetic operation fdatairregular / other + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + if isinstance(other, str): + other = request.getfixturevalue(other) + + f_data_sum = fdatairregular / other + + assert np.all( + f_data_sum.function_values == + fdatairregular.function_values / self._take_first(other) + ) + + def test_fdatairregular_arithmetic_rdiv( + self, + fdatairregular: FDataIrregular, + other: Any, + request, + ) -> None: + """Tests the basic arithmetic operation other / fdatairregular + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + if isinstance(other, str): + other = request.getfixturevalue(other) + + f_data_sum = other / fdatairregular + + assert np.all( + f_data_sum.function_values == + self._take_first(other) / fdatairregular.function_values + ) From c3dba8b07d9de0cafdebad695160df2750029405 Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 22 Mar 2023 19:35:10 +0100 Subject: [PATCH 036/144] Fix incorrect selection of sample_ranges in FDataIrregular --- skfda/representation/irregular.py | 53 ++++++++++++++++++------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 9ff3a26b4..628f800b3 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -81,16 +81,24 @@ def __init__( self.set_function_values(function_values) # TODO Fix for higher dimensions - i = 0 dim_ranges = list() for dim in range(self.dim_domain): + i = 0 dim_sample_ranges = list() for f in self.function_indices[1:]: - dim_sample_ranges.append(tuple((self.function_arguments[i][dim], - self.function_arguments[f-1][dim]))) + min_argument = min([self.function_arguments[j][dim] for j in range(i, f)]) + max_argument = max([self.function_arguments[j][dim] for j in range(i, f)]) + dim_sample_ranges.append(tuple((min_argument, + max_argument)) + ) i = f - dim_sample_ranges.append((self.function_arguments[i][dim], - self.function_arguments[-1][dim])) + + min_argument = min([self.function_arguments[f + j][dim] + for j in range(self.function_arguments.shape[0] - f)]) + max_argument = max([self.function_arguments[f + j][dim] + for j in range(self.function_arguments.shape[0] - f)]) + dim_sample_ranges.append((min_argument, + max_argument)) dim_ranges.append(dim_sample_ranges) self._sample_range = list() @@ -651,6 +659,7 @@ def __neg__(self: T) -> T: return self.copy(function_values=-self.function_values) def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: + #TODO As coordinates if as_coordinates: raise NotImplementedError( "Not implemented for as_coordinates = True", @@ -718,35 +727,35 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: # Convert FDataIrregular to matrix of all points # with NaN in undefined values - if self.dim_domain > 1: - warnings.warn(f"Not implemented for domain dimension > 1, \ - currently {self.dim_domain}") - # Find the grid points and values for each function - grid_points = [] + grid_points = [[]]*self.dim_domain evaluated_points = [] for index_start, index_end in zip(list(self.function_indices), list(self.function_indices[1:])): - grid_points.append( - [x[0] for x in self.function_arguments[index_start:index_end]]) + for dim in range(self.dim_domain): + grid_points[dim].append( + [x[dim] for x in self.function_arguments[index_start:index_end]]) + evaluated_points.append( self.function_values[index_start:index_end]) # Dont forget to add the last one - grid_points.append([x[0] for x in self.function_arguments[index_end:]]) + for dim in range(self.dim_domain): + grid_points[dim].append([x for x in self.function_arguments[index_end:]]) evaluated_points.append(self.function_values[index_end:]) # Aggregate into a complete data matrix + unified_grid_points = []*self.dim_domain from functools import reduce - unified_grid_points = reduce( - lambda x, y: set(list(y)).union(list(x)), - grid_points, - ) - - unified_grid_points = sorted(unified_grid_points) + for dim in range(self.dim_domain): + _unified_points = reduce( + lambda x, y: set(list(y)).union(list(x)), + grid_points[dim], + ) + unified_grid_points[dim] = sorted(_unified_points) # Fill matrix with known values, leave unknown as NA - num_curves = len(grid_points) + num_curves = len(grid_points[0]) num_points = len(unified_grid_points) unified_matrix = np.empty((num_curves, num_points, self.dim_codomain)) @@ -755,8 +764,8 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: for curve in range(num_curves): for point in range(len(grid_points[curve])): for dimension in range(self.dim_codomain): - point_index = unified_grid_points.index( - grid_points[curve][point] + point_index = unified_grid_points[0].index( + grid_points[curve][point][0] ) unified_matrix[curve, point_index, dimension] = \ evaluated_points[curve][point][dimension] From 00ad9de14d12667a9170ba7bb6b209df7f5fa82c Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 23 Mar 2023 13:03:40 +0100 Subject: [PATCH 037/144] Fully functioning structure for multidimensional data, passes full test --- skfda/representation/irregular.py | 35 ++++--- skfda/tests/test_irregular.py | 28 ++---- skfda/tests/test_irregular_operations.py | 123 ++++++++++++++--------- 3 files changed, 101 insertions(+), 85 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 628f800b3..53c0e3ccb 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -93,10 +93,10 @@ def __init__( ) i = f - min_argument = min([self.function_arguments[f + j][dim] - for j in range(self.function_arguments.shape[0] - f)]) - max_argument = max([self.function_arguments[f + j][dim] - for j in range(self.function_arguments.shape[0] - f)]) + min_argument = min([self.function_arguments[i + j][dim] + for j in range(self.function_arguments.shape[0] - i)]) + max_argument = max([self.function_arguments[i + j][dim] + for j in range(self.function_arguments.shape[0] - i)]) dim_sample_ranges.append((min_argument, max_argument)) dim_ranges.append(dim_sample_ranges) @@ -728,7 +728,7 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: # with NaN in undefined values # Find the grid points and values for each function - grid_points = [[]]*self.dim_domain + grid_points = [list() for i in range(self.dim_domain)] evaluated_points = [] for index_start, index_end in zip(list(self.function_indices), list(self.function_indices[1:])): @@ -741,11 +741,11 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: # Dont forget to add the last one for dim in range(self.dim_domain): - grid_points[dim].append([x for x in self.function_arguments[index_end:]]) + grid_points[dim].append([x[dim] for x in self.function_arguments[index_end:]]) evaluated_points.append(self.function_values[index_end:]) # Aggregate into a complete data matrix - unified_grid_points = []*self.dim_domain + unified_grid_points = [list() for i in range(self.dim_domain)] from functools import reduce for dim in range(self.dim_domain): _unified_points = reduce( @@ -756,18 +756,18 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: # Fill matrix with known values, leave unknown as NA num_curves = len(grid_points[0]) - num_points = len(unified_grid_points) + num_points = len(unified_grid_points[0]) - unified_matrix = np.empty((num_curves, num_points, self.dim_codomain)) + unified_matrix = np.empty((num_curves, *(num_points,)*self.dim_domain , self.dim_codomain)) unified_matrix.fill(np.nan) for curve in range(num_curves): - for point in range(len(grid_points[curve])): + #TODO Ensure that there is always at least one dimension + for point in range(len(grid_points[0][curve])): for dimension in range(self.dim_codomain): - point_index = unified_grid_points[0].index( - grid_points[curve][point][0] - ) - unified_matrix[curve, point_index, dimension] = \ + point_index = [unified_grid_points[i].index(grid_points[i][curve][point]) + for i in range(self.dim_domain)] + unified_matrix[(curve, *point_index, dimension)] = \ evaluated_points[curve][point][dimension] return unified_matrix, unified_grid_points @@ -1023,7 +1023,7 @@ def __getitem__( required_indices = indices[key] for i in required_indices: next_index = self.function_indices[i + 1] if i + 1 < \ - self.num_functions else -1 + self.num_functions else None s = slice(self.function_indices[i], next_index) required_slices.append(s) @@ -1032,7 +1032,10 @@ def __getitem__( values = np.concatenate([self.function_values[s] for s in required_slices]) - chunk_sizes = np.array([s.stop-s.start for s in required_slices]) + chunk_sizes = np.array([s.stop-s.start if s.stop is not None + else self.num_observations - s.start + for s in required_slices]) + indices = np.cumsum(chunk_sizes) - chunk_sizes[0] return self.copy( diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 2cc5c302b..e853d8040 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -14,7 +14,6 @@ ############ NUM_CURVES = 10 -MAX_VALUES_PER_CURVE = 99 DIMENSIONS = 2 TEST_DECIMALS = range(10) COPY_KWARGS = [ @@ -36,16 +35,12 @@ def input_arrays( FDataIrregular structure """ # TODO Make editable with pytest - num_values_per_curve = np.random.randint(1, - MAX_VALUES_PER_CURVE, - size=(NUM_CURVES, ) - ) + num_values_per_curve = np.array(range(NUM_CURVES)) + 1 values_per_curve = [np.random.rand(num_values, 1) for num_values in num_values_per_curve] args_per_curve = [np.random.rand(num_values, 1) for num_values in num_values_per_curve] - indices = np.cumsum(num_values_per_curve) - num_values_per_curve values = np.concatenate(values_per_curve) arguments = np.concatenate(args_per_curve) @@ -61,10 +56,7 @@ def input_arrays_multidimensional( describing a FDataIrregular structure """ # TODO Make editable with pytest - num_values_per_curve = np.random.randint(1, - MAX_VALUES_PER_CURVE, - size=(NUM_CURVES, ) - ) + num_values_per_curve = np.array(range(NUM_CURVES)) + 1 values_per_curve = [np.random.rand(num_values, DIMENSIONS) for num_values in num_values_per_curve] @@ -83,9 +75,7 @@ def fdatagrid( ) -> FDataGrid: """Generate FDataGrid""" # TODO Make editable with pytest - num_values_per_curve = np.random.randint(1, - MAX_VALUES_PER_CURVE, - ) + num_values_per_curve = NUM_CURVES data_matrix = np.random.rand(NUM_CURVES, num_values_per_curve, 1) # Grid points must be sorted @@ -101,9 +91,7 @@ def fdatagrid_multidimensional( ) -> FDataGrid: """Generate multidimensional FDataGrid""" # TODO Make editable with pytest - num_values_per_curve = np.random.randint(1, - MAX_VALUES_PER_CURVE, - ) + num_values_per_curve = NUM_CURVES data_matrix = np.random.rand(NUM_CURVES, num_values_per_curve, DIMENSIONS) # Grid points must be sorted @@ -326,11 +314,11 @@ def test_fdatairregular_getitem( assert len(f_data_irreg[0]) == 1 assert len(f_data_irreg[-1]) == 1 - assert len(f_data_irreg[0:10]) == 10 + assert len(f_data_irreg[0:NUM_CURVES]) == NUM_CURVES assert len(f_data_irreg[0:]) == len(f_data_irreg) - assert len(f_data_irreg[:10]) == 10 - assert len(f_data_irreg[0:10:2]) == 5 - assert len(f_data_irreg[0:10:2]) == 5 + assert len(f_data_irreg[:NUM_CURVES]) == NUM_CURVES + assert len(f_data_irreg[0:NUM_CURVES:2]) == NUM_CURVES/2 + assert len(f_data_irreg[0:NUM_CURVES:2]) == NUM_CURVES/2 def test_fdatairregular_coordinates( diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index affb65194..55d627858 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -42,21 +42,20 @@ def input_arrays( @pytest.fixture() -def input_arrays_multidimensional( +def input_arrays_2D( + num_curves: Optional[int] = NUM_CURVES, + max_values_per_curve: Optional[int] = MAX_VALUES_PER_CURVE, + dimensions: Optional[int] = DIMENSIONS ) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: """ - Generate three multidimensional arrays - describing a FDataIrregular structure + Generate three unidimensional arrays describing a + FDataIrregular structure with fixed sizes given by + the parameters """ - # TODO Make editable with pytest - num_values_per_curve = np.random.randint(1, - MAX_VALUES_PER_CURVE, - size=(NUM_CURVES, ) - ) - - values_per_curve = [np.random.rand(num_values, DIMENSIONS) + num_values_per_curve = max_values_per_curve*np.ones(num_curves).astype(int) + values_per_curve = [np.random.rand(num_values, dimensions) for num_values in num_values_per_curve] - args_per_curve = [np.random.rand(num_values, DIMENSIONS) + args_per_curve = [np.random.rand(num_values, dimensions) for num_values in num_values_per_curve] indices = np.cumsum(num_values_per_curve) - num_values_per_curve @@ -67,7 +66,7 @@ def input_arrays_multidimensional( @pytest.fixture() -def fdatairregular( +def fdatairregular1D( input_arrays: Tuple[ArrayLike, ArrayLike, ArrayLike], ) -> FDataIrregular: """ @@ -76,20 +75,36 @@ def fdatairregular( """ return FDataIrregular(*input_arrays) + +@pytest.fixture() +def fdatairregular2D( + input_arrays_2D: Tuple[ArrayLike, ArrayLike, ArrayLike], +) -> FDataIrregular: + """ + Generate three multidimensional arrays + describing a FDataIrregular structure + """ + return FDataIrregular(*input_arrays_2D) + ############ # TESTS ############ @pytest.mark.parametrize( - ("other"), + ("fdatairregular", "other"), [ - (2), - (2*np.ones(NUM_CURVES)), - (2*np.ones((NUM_CURVES, 1))), - ("fdatairregular") + ("fdatairregular1D", 2), + ("fdatairregular1D", 2*np.ones(NUM_CURVES)), + ("fdatairregular1D", 2*np.ones((NUM_CURVES, 1))), + ("fdatairregular1D", "fdatairregular1D"), + ("fdatairregular2D", 2), + ("fdatairregular2D", 2*np.ones(NUM_CURVES)), + ("fdatairregular2D", 2*np.ones((NUM_CURVES, 2))), + ("fdatairregular2D", "fdatairregular2D") ], ) + class TestArithmeticOperations: """Class which encapsulates the testing of basic arithmetic operations""" @@ -105,7 +120,7 @@ def _take_first( def test_fdatairregular_arithmetic_sum( self, - fdatairregular: FDataIrregular, + fdatairregular: str, other: Any, request, ) -> None: @@ -115,19 +130,20 @@ def test_fdatairregular_arithmetic_sum( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ + f_data_irreg = request.getfixturevalue(fdatairregular) if isinstance(other, str): other = request.getfixturevalue(other) - f_data_sum = fdatairregular + other + f_data_sum = f_data_irreg + other assert np.all( f_data_sum.function_values == - fdatairregular.function_values + self._take_first(other) + f_data_irreg.function_values + self._take_first(other) ) def test_fdatairregular_arithmetic_rsum( self, - fdatairregular: FDataIrregular, + fdatairregular: str, other: Any, request, ) -> None: @@ -137,19 +153,20 @@ def test_fdatairregular_arithmetic_rsum( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ + f_data_irreg = request.getfixturevalue(fdatairregular) if isinstance(other, str): other = request.getfixturevalue(other) - f_data_sum = other + fdatairregular + f_data_sum = other + f_data_irreg assert np.all( f_data_sum.function_values == - self._take_first(other) + fdatairregular.function_values + self._take_first(other) + f_data_irreg.function_values ) def test_fdatairregular_arithmetic_sum_commutative( self, - fdatairregular: FDataIrregular, + fdatairregular: str, other: Any, request, ) -> None: @@ -159,14 +176,15 @@ def test_fdatairregular_arithmetic_sum_commutative( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ + f_data_irreg = request.getfixturevalue(fdatairregular) if isinstance(other, str): other = request.getfixturevalue(other) - assert fdatairregular + other == other + fdatairregular + assert f_data_irreg + other == other + f_data_irreg def test_fdatairregular_arithmetic_sub( self, - fdatairregular: FDataIrregular, + fdatairregular: str, other: Any, request, ) -> None: @@ -176,19 +194,20 @@ def test_fdatairregular_arithmetic_sub( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ + f_data_irreg = request.getfixturevalue(fdatairregular) if isinstance(other, str): other = request.getfixturevalue(other) - f_data_sum = fdatairregular - other + f_data_sum = f_data_irreg - other assert np.all( f_data_sum.function_values == - fdatairregular.function_values - self._take_first(other) + f_data_irreg.function_values - self._take_first(other) ) def test_fdatairregular_arithmetic_rsub( self, - fdatairregular: FDataIrregular, + fdatairregular: str, other: Any, request, ) -> None: @@ -198,19 +217,20 @@ def test_fdatairregular_arithmetic_rsub( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ + f_data_irreg = request.getfixturevalue(fdatairregular) if isinstance(other, str): other = request.getfixturevalue(other) - f_data_sum = other - fdatairregular + f_data_sum = other - f_data_irreg assert np.all( f_data_sum.function_values == - self._take_first(other) - fdatairregular.function_values + self._take_first(other) - f_data_irreg.function_values ) def test_fdatairregular_arithmetic_mul( self, - fdatairregular: FDataIrregular, + fdatairregular: str, other: Any, request, ) -> None: @@ -220,19 +240,20 @@ def test_fdatairregular_arithmetic_mul( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ + f_data_irreg = request.getfixturevalue(fdatairregular) if isinstance(other, str): other = request.getfixturevalue(other) - f_data_sum = fdatairregular * other + f_data_mul = f_data_irreg * other assert np.all( - f_data_sum.function_values == - fdatairregular.function_values * self._take_first(other) + f_data_mul.function_values == + f_data_irreg.function_values * self._take_first(other) ) def test_fdatairregular_arithmetic_rmul( self, - fdatairregular: FDataIrregular, + fdatairregular: str, other: Any, request, ) -> None: @@ -242,19 +263,20 @@ def test_fdatairregular_arithmetic_rmul( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ + f_data_irreg = request.getfixturevalue(fdatairregular) if isinstance(other, str): other = request.getfixturevalue(other) - f_data_sum = other * fdatairregular + f_data_mul = other * f_data_irreg assert np.all( - f_data_sum.function_values == - self._take_first(other) * fdatairregular.function_values + f_data_mul.function_values == + self._take_first(other) * f_data_irreg.function_values ) def test_fdatairregular_arithmetic_mul_commutative( self, - fdatairregular: FDataIrregular, + fdatairregular: str, other: Any, request, ) -> None: @@ -264,14 +286,15 @@ def test_fdatairregular_arithmetic_mul_commutative( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ + f_data_irreg = request.getfixturevalue(fdatairregular) if isinstance(other, str): other = request.getfixturevalue(other) - assert fdatairregular * other == other * fdatairregular + assert f_data_irreg * other == other * f_data_irreg def test_fdatairregular_arithmetic_div( self, - fdatairregular: FDataIrregular, + fdatairregular: str, other: Any, request, ) -> None: @@ -281,19 +304,20 @@ def test_fdatairregular_arithmetic_div( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ + f_data_irreg = request.getfixturevalue(fdatairregular) if isinstance(other, str): other = request.getfixturevalue(other) - f_data_sum = fdatairregular / other + f_data_div = f_data_irreg / other assert np.all( - f_data_sum.function_values == - fdatairregular.function_values / self._take_first(other) + f_data_div.function_values == + f_data_irreg.function_values / self._take_first(other) ) def test_fdatairregular_arithmetic_rdiv( self, - fdatairregular: FDataIrregular, + fdatairregular: str, other: Any, request, ) -> None: @@ -303,12 +327,13 @@ def test_fdatairregular_arithmetic_rdiv( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ + f_data_irreg = request.getfixturevalue(fdatairregular) if isinstance(other, str): other = request.getfixturevalue(other) - f_data_sum = other / fdatairregular + f_data_div = other / f_data_irreg assert np.all( - f_data_sum.function_values == - self._take_first(other) / fdatairregular.function_values + f_data_div.function_values == + self._take_first(other) / f_data_irreg.function_values ) From f27420e2306ea34de99de8527a9afd1e38aa99be Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 23 Mar 2023 13:29:35 +0100 Subject: [PATCH 038/144] Add fixed seed to random generators in tests --- skfda/tests/test_irregular.py | 17 +++++++++-------- skfda/tests/test_irregular_operations.py | 10 ++++++---- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index e853d8040..21e5ead68 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -26,6 +26,7 @@ {"coordinate_names": ("test",)}, ] +random_state = np.random.RandomState(seed=14) @pytest.fixture() def input_arrays( @@ -37,9 +38,9 @@ def input_arrays( # TODO Make editable with pytest num_values_per_curve = np.array(range(NUM_CURVES)) + 1 - values_per_curve = [np.random.rand(num_values, 1) + values_per_curve = [random_state.rand(num_values, 1) for num_values in num_values_per_curve] - args_per_curve = [np.random.rand(num_values, 1) + args_per_curve = [random_state.rand(num_values, 1) for num_values in num_values_per_curve] indices = np.cumsum(num_values_per_curve) - num_values_per_curve values = np.concatenate(values_per_curve) @@ -58,9 +59,9 @@ def input_arrays_multidimensional( # TODO Make editable with pytest num_values_per_curve = np.array(range(NUM_CURVES)) + 1 - values_per_curve = [np.random.rand(num_values, DIMENSIONS) + values_per_curve = [random_state.rand(num_values, DIMENSIONS) for num_values in num_values_per_curve] - args_per_curve = [np.random.rand(num_values, DIMENSIONS) + args_per_curve = [random_state.rand(num_values, DIMENSIONS) for num_values in num_values_per_curve] indices = np.cumsum(num_values_per_curve) - num_values_per_curve @@ -77,9 +78,9 @@ def fdatagrid( # TODO Make editable with pytest num_values_per_curve = NUM_CURVES - data_matrix = np.random.rand(NUM_CURVES, num_values_per_curve, 1) + data_matrix = random_state.rand(NUM_CURVES, num_values_per_curve, 1) # Grid points must be sorted - grid_points = np.sort(np.random.rand(num_values_per_curve)) + grid_points = np.sort(random_state.rand(num_values_per_curve)) return FDataGrid(data_matrix=data_matrix, grid_points=grid_points, @@ -93,9 +94,9 @@ def fdatagrid_multidimensional( # TODO Make editable with pytest num_values_per_curve = NUM_CURVES - data_matrix = np.random.rand(NUM_CURVES, num_values_per_curve, DIMENSIONS) + data_matrix = random_state.rand(NUM_CURVES, num_values_per_curve, DIMENSIONS) # Grid points must be sorted - grid_points = np.sort(np.random.rand(num_values_per_curve)) + grid_points = np.sort(random_state.rand(num_values_per_curve)) return FDataGrid(data_matrix=data_matrix, grid_points=grid_points, diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index 55d627858..211963047 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -16,6 +16,8 @@ MAX_VALUES_PER_CURVE = 99 DIMENSIONS = 2 +random_state = np.random.RandomState(seed=14) + @pytest.fixture() def input_arrays( @@ -29,9 +31,9 @@ def input_arrays( the parameters """ num_values_per_curve = max_values_per_curve*np.ones(num_curves).astype(int) - values_per_curve = [np.random.rand(num_values, dimensions) + values_per_curve = [random_state.rand(num_values, dimensions) for num_values in num_values_per_curve] - args_per_curve = [np.random.rand(num_values, dimensions) + args_per_curve = [random_state.rand(num_values, dimensions) for num_values in num_values_per_curve] indices = np.cumsum(num_values_per_curve) - num_values_per_curve @@ -53,9 +55,9 @@ def input_arrays_2D( the parameters """ num_values_per_curve = max_values_per_curve*np.ones(num_curves).astype(int) - values_per_curve = [np.random.rand(num_values, dimensions) + values_per_curve = [random_state.rand(num_values, dimensions) for num_values in num_values_per_curve] - args_per_curve = [np.random.rand(num_values, dimensions) + args_per_curve = [random_state.rand(num_values, dimensions) for num_values in num_values_per_curve] indices = np.cumsum(num_values_per_curve) - num_values_per_curve From 62401cff1a37e7df2128ec03a9a73c35e24f9143 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 23 Mar 2023 15:07:50 +0100 Subject: [PATCH 039/144] Set uninitialized variable to 0 --- skfda/representation/irregular.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 53c0e3ccb..2ccb6f54e 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -80,7 +80,6 @@ def __init__( self.set_function_arguments(function_arguments) self.set_function_values(function_values) - # TODO Fix for higher dimensions dim_ranges = list() for dim in range(self.dim_domain): i = 0 @@ -559,7 +558,7 @@ def _get_op_matrix( ) elif isinstance(other, FDataIrregular): - # TODO What to do with different arguments? + # TODO What to do with different argument and value sizes? return other.function_values return None @@ -728,6 +727,7 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: # with NaN in undefined values # Find the grid points and values for each function + index_end = 0 grid_points = [list() for i in range(self.dim_domain)] evaluated_points = [] for index_start, index_end in zip(list(self.function_indices), @@ -762,7 +762,8 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: unified_matrix.fill(np.nan) for curve in range(num_curves): - #TODO Ensure that there is always at least one dimension + # There must always be one dimension, + # and same size across all domain dimensions for point in range(len(grid_points[0][curve])): for dimension in range(self.dim_codomain): point_index = [unified_grid_points[i].index(grid_points[i][curve][point]) @@ -1122,7 +1123,7 @@ def _take_allow_fill( @property def dtype(self) -> FDataGridDType: - # TODO Do this natively? + # TODO Do this natively? FDataIrregularDType? """The dtype for this extension array, FDataGridDType""" return self.to_grid().dtype @@ -1147,9 +1148,6 @@ def isna(self) -> NDArrayBool: axis=tuple(range(1, self.data_matrix.ndim)), ) - -# TODO FDataIrregularDType? - class _IrregularCoordinateIterator(Sequence[T]): """Internal class to iterate through the image coordinates.""" From 42596f8954a1724f33dafb4829dd9553ebff7a0c Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 23 Mar 2023 15:09:09 +0100 Subject: [PATCH 040/144] Update test_irregular_operations.py to use a better test structure inspired in the pandas tests --- skfda/tests/test_irregular_operations.py | 502 ++++++++++++++++++----- 1 file changed, 392 insertions(+), 110 deletions(-) diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index 211963047..a4ec0b44e 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -9,15 +9,18 @@ from skfda.representation.interpolation import SplineInterpolation ############ -# FIXTURES +# MACROS ############ NUM_CURVES = 10 -MAX_VALUES_PER_CURVE = 99 +MAX_VALUES_PER_CURVE = 100 DIMENSIONS = 2 random_state = np.random.RandomState(seed=14) +############ +# FIXTURES +############ @pytest.fixture() def input_arrays( @@ -66,49 +69,328 @@ def input_arrays_2D( return indices, values, arguments - -@pytest.fixture() +@pytest.fixture(params=["single_curve", "multiple_curves"]) def fdatairregular1D( + request: Any, input_arrays: Tuple[ArrayLike, ArrayLike, ArrayLike], ) -> FDataIrregular: - """ - Generate three multidimensional arrays - describing a FDataIrregular structure - """ - return FDataIrregular(*input_arrays) - - -@pytest.fixture() + """Return FDataIrregular with only 1 curve or NUM_CURVES as requested.""" + indices, arguments, values = input_arrays + f_data_irreg = FDataIrregular( + function_indices=indices, + function_arguments=arguments, + function_values=values, + ) + + if request.param == "single_curve": + return f_data_irreg[0] + elif request.param == "multiple_curves": + return f_data_irreg + +@pytest.fixture(params=["single_curve", "multiple_curves"]) def fdatairregular2D( + request: Any, input_arrays_2D: Tuple[ArrayLike, ArrayLike, ArrayLike], ) -> FDataIrregular: - """ - Generate three multidimensional arrays - describing a FDataIrregular structure - """ - return FDataIrregular(*input_arrays_2D) + """Return FDataIrregular with only 1 curve or NUM_CURVES as requested.""" + indices, arguments, values = input_arrays_2D + f_data_irreg = FDataIrregular( + function_indices=indices, + function_arguments=arguments, + function_values=values, + ) + + if request.param == "single_curve": + return f_data_irreg[0] + elif request.param == "multiple_curves": + return f_data_irreg + +@pytest.fixture(params=["fdatairregular1D", "fdatairregular2D"]) +def fdatairregular( + request: Any, + fdatairregular1D: FDataIrregular, + fdatairregular2D: FDataIrregular, +) -> FDataIrregular: + """Return 'fdatairregular1D' or 'fdatairregular2D'.""" + if request.param == "fdatairregular1D": + return fdatairregular1D + elif request.param == "fdatairregular2D": + return fdatairregular2D + +@pytest.fixture(params=["scalar", "vector", "matrix", "fdatairregular"]) +def other_1D( + request: Any, + fdatairregular1D: FDataIrregular, +) -> FDataIrregular: + """Return an operator for testing FDataIrregular operations.""" + if request.param == "scalar": + return 2 + elif request.param == "vector": + return 2*np.ones(NUM_CURVES) + elif request.param == "matrix": + return 2*np.ones((NUM_CURVES, 1)) + elif request.param == "fdatairregular": + return fdatairregular1D + +@pytest.fixture(params=["scalar", "vector", "matrix", "fdatairregular"]) +def other_2D( + request: Any, + fdatairregular2D: FDataIrregular, +) -> FDataIrregular: + """Return an operator for testing FDataIrregular operations.""" + if request.param == "scalar": + return 2 + elif request.param == "vector": + return 2*np.ones(NUM_CURVES) + elif request.param == "matrix": + return 2*np.ones((NUM_CURVES, DIMENSIONS)) + elif request.param == "fdatairregular": + return fdatairregular2D ############ # TESTS ############ +class TestArithmeticOperations1D: + """ + Class which encapsulates the testing of basic arithmetic operations + for unidimensional FDataIrregular + """ + + def _take_first( + self, + other, + ) -> float: + if isinstance(other, np.ndarray): + return other[0] + elif isinstance(other, FDataIrregular): + return other.function_values + return other + + def test_fdatairregular_arithmetic_sum( + self, + fdatairregular1D: FDataIrregular, + other_1D: Any, + ) -> None: + """Tests the basic arithmetic operation fdatairregular + other + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + # Account for single curve test + if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): + if len(fdatairregular1D) == 1: + other_1D = other_1D[0] + + f_data_sum = fdatairregular1D + other_1D + + assert np.all( + f_data_sum.function_values == + fdatairregular1D.function_values + self._take_first(other_1D) + ) + + def test_fdatairregular_arithmetic_rsum( + self, + fdatairregular1D: FDataIrregular, + other_1D: Any, + ) -> None: + """Tests the basic arithmetic operation other + fdatairregular + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + # Account for single curve test + if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): + if len(fdatairregular1D) == 1: + other_1D = other_1D[0] + + f_data_sum = other_1D + fdatairregular1D + + assert np.all( + f_data_sum.function_values == + self._take_first(other_1D) + fdatairregular1D.function_values + ) + + def test_fdatairregular_arithmetic_sum_commutative( + self, + fdatairregular1D: FDataIrregular, + other_1D: Any, + ) -> None: + """Tests the basic arithmetic operation other + fdatairregular + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + # Account for single curve test + if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): + if len(fdatairregular1D) == 1: + other_1D = other_1D[0] + + assert fdatairregular1D + other_1D == other_1D + fdatairregular1D + + def test_fdatairregular_arithmetic_sub( + self, + fdatairregular1D: FDataIrregular, + other_1D: Any, + ) -> None: + """Tests the basic arithmetic operation fdatairregular - other + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + # Account for single curve test + if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): + if len(fdatairregular1D) == 1: + other_1D = other_1D[0] + + f_data_sum = fdatairregular1D - other_1D + + assert np.all( + f_data_sum.function_values == + fdatairregular1D.function_values - self._take_first(other_1D) + ) + + def test_fdatairregular_arithmetic_rsub( + self, + fdatairregular1D: FDataIrregular, + other_1D: Any, + ) -> None: + """Tests the basic arithmetic operation other - fdatairregular + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + # Account for single curve test + if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): + if len(fdatairregular1D) == 1: + other_1D = other_1D[0] + + f_data_sum = other_1D - fdatairregular1D + + assert np.all( + f_data_sum.function_values == + self._take_first(other_1D) - fdatairregular1D.function_values + ) + def test_fdatairregular_arithmetic_mul( + self, + fdatairregular1D: FDataIrregular, + other_1D: Any, + ) -> None: + """Tests the basic arithmetic operation fdatairregular * other -@pytest.mark.parametrize( - ("fdatairregular", "other"), - [ - ("fdatairregular1D", 2), - ("fdatairregular1D", 2*np.ones(NUM_CURVES)), - ("fdatairregular1D", 2*np.ones((NUM_CURVES, 1))), - ("fdatairregular1D", "fdatairregular1D"), - ("fdatairregular2D", 2), - ("fdatairregular2D", 2*np.ones(NUM_CURVES)), - ("fdatairregular2D", 2*np.ones((NUM_CURVES, 2))), - ("fdatairregular2D", "fdatairregular2D") - ], -) + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + # Account for single curve test + if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): + if len(fdatairregular1D) == 1: + other_1D = other_1D[0] -class TestArithmeticOperations: - """Class which encapsulates the testing of basic arithmetic operations""" + f_data_mul = fdatairregular1D * other_1D + + assert np.all( + f_data_mul.function_values == + fdatairregular1D.function_values * self._take_first(other_1D) + ) + + def test_fdatairregular_arithmetic_rmul( + self, + fdatairregular1D: FDataIrregular, + other_1D: Any, + ) -> None: + """Tests the basic arithmetic operation other * fdatairregular + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + # Account for single curve test + if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): + if len(fdatairregular1D) == 1: + other_1D = other_1D[0] + + f_data_mul = other_1D * fdatairregular1D + + assert np.all( + f_data_mul.function_values == + self._take_first(other_1D) * fdatairregular1D.function_values + ) + + def test_fdatairregular_arithmetic_mul_commutative( + self, + fdatairregular1D: FDataIrregular, + other_1D: Any, + ) -> None: + """Tests the basic arithmetic operation other * fdatairregular + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + # Account for single curve test + if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): + if len(fdatairregular1D) == 1: + other_1D = other_1D[0] + + assert fdatairregular1D * other_1D == other_1D * fdatairregular1D + + def test_fdatairregular_arithmetic_div( + self, + fdatairregular1D: FDataIrregular, + other_1D: Any, + ) -> None: + """Tests the basic arithmetic operation fdatairregular / other + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + # Account for single curve test + if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): + if len(fdatairregular1D) == 1: + other_1D = other_1D[0] + + f_data_div = fdatairregular1D / other_1D + + assert np.all( + f_data_div.function_values == + fdatairregular1D.function_values / self._take_first(other_1D) + ) + + def test_fdatairregular_arithmetic_rdiv( + self, + fdatairregular1D: FDataIrregular, + other_1D: Any, + ) -> None: + """Tests the basic arithmetic operation other / fdatairregular + + Args: + fdatairregular (FDataIrregular): FDataIrregular object to test + other (Any): Scalar, vector, matrix or FDataIrregular + """ + # Account for single curve test + if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): + if len(fdatairregular1D) == 1: + other_1D = other_1D[0] + + f_data_div = other_1D / fdatairregular1D + + assert np.all( + f_data_div.function_values == + self._take_first(other_1D) / fdatairregular1D.function_values + ) + +class TestArithmeticOperations2D: + """ + Class which encapsulates the testing of basic arithmetic operations + for multidimensional FDataIrregular + """ def _take_first( self, @@ -122,9 +404,8 @@ def _take_first( def test_fdatairregular_arithmetic_sum( self, - fdatairregular: str, - other: Any, - request, + fdatairregular2D: FDataIrregular, + other_2D: Any, ) -> None: """Tests the basic arithmetic operation fdatairregular + other @@ -132,22 +413,22 @@ def test_fdatairregular_arithmetic_sum( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ - f_data_irreg = request.getfixturevalue(fdatairregular) - if isinstance(other, str): - other = request.getfixturevalue(other) + # Account for single curve test + if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): + if len(fdatairregular2D) == 1: + other_2D = other_2D[:1] - f_data_sum = f_data_irreg + other + f_data_sum = fdatairregular2D + other_2D assert np.all( f_data_sum.function_values == - f_data_irreg.function_values + self._take_first(other) + fdatairregular2D.function_values + self._take_first(other_2D) ) def test_fdatairregular_arithmetic_rsum( self, - fdatairregular: str, - other: Any, - request, + fdatairregular2D: FDataIrregular, + other_2D: Any, ) -> None: """Tests the basic arithmetic operation other + fdatairregular @@ -155,22 +436,22 @@ def test_fdatairregular_arithmetic_rsum( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ - f_data_irreg = request.getfixturevalue(fdatairregular) - if isinstance(other, str): - other = request.getfixturevalue(other) + # Account for single curve test + if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): + if len(fdatairregular2D) == 1: + other_2D = other_2D[:1] - f_data_sum = other + f_data_irreg + f_data_sum = other_2D + fdatairregular2D assert np.all( f_data_sum.function_values == - self._take_first(other) + f_data_irreg.function_values + self._take_first(other_2D) + fdatairregular2D.function_values ) def test_fdatairregular_arithmetic_sum_commutative( self, - fdatairregular: str, - other: Any, - request, + fdatairregular2D: FDataIrregular, + other_2D: Any, ) -> None: """Tests the basic arithmetic operation other + fdatairregular @@ -178,17 +459,17 @@ def test_fdatairregular_arithmetic_sum_commutative( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ - f_data_irreg = request.getfixturevalue(fdatairregular) - if isinstance(other, str): - other = request.getfixturevalue(other) + # Account for single curve test + if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): + if len(fdatairregular2D) == 1: + other_2D = other_2D[:1] - assert f_data_irreg + other == other + f_data_irreg + assert fdatairregular2D + other_2D == other_2D + fdatairregular2D def test_fdatairregular_arithmetic_sub( self, - fdatairregular: str, - other: Any, - request, + fdatairregular2D: FDataIrregular, + other_2D: Any, ) -> None: """Tests the basic arithmetic operation fdatairregular - other @@ -196,22 +477,22 @@ def test_fdatairregular_arithmetic_sub( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ - f_data_irreg = request.getfixturevalue(fdatairregular) - if isinstance(other, str): - other = request.getfixturevalue(other) + # Account for single curve test + if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): + if len(fdatairregular2D) == 1: + other_2D = other_2D[:1] - f_data_sum = f_data_irreg - other + f_data_sum = fdatairregular2D - other_2D assert np.all( f_data_sum.function_values == - f_data_irreg.function_values - self._take_first(other) + fdatairregular2D.function_values - self._take_first(other_2D) ) def test_fdatairregular_arithmetic_rsub( self, - fdatairregular: str, - other: Any, - request, + fdatairregular2D: FDataIrregular, + other_2D: Any, ) -> None: """Tests the basic arithmetic operation other - fdatairregular @@ -219,22 +500,22 @@ def test_fdatairregular_arithmetic_rsub( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ - f_data_irreg = request.getfixturevalue(fdatairregular) - if isinstance(other, str): - other = request.getfixturevalue(other) + # Account for single curve test + if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): + if len(fdatairregular2D) == 1: + other_2D = other_2D[:1] - f_data_sum = other - f_data_irreg + f_data_sum = other_2D - fdatairregular2D assert np.all( f_data_sum.function_values == - self._take_first(other) - f_data_irreg.function_values + self._take_first(other_2D) - fdatairregular2D.function_values ) def test_fdatairregular_arithmetic_mul( self, - fdatairregular: str, - other: Any, - request, + fdatairregular2D: FDataIrregular, + other_2D: Any, ) -> None: """Tests the basic arithmetic operation fdatairregular * other @@ -242,22 +523,22 @@ def test_fdatairregular_arithmetic_mul( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ - f_data_irreg = request.getfixturevalue(fdatairregular) - if isinstance(other, str): - other = request.getfixturevalue(other) + # Account for single curve test + if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): + if len(fdatairregular2D) == 1: + other_2D = other_2D[:1] - f_data_mul = f_data_irreg * other + f_data_mul = fdatairregular2D * other_2D assert np.all( f_data_mul.function_values == - f_data_irreg.function_values * self._take_first(other) + fdatairregular2D.function_values * self._take_first(other_2D) ) def test_fdatairregular_arithmetic_rmul( self, - fdatairregular: str, - other: Any, - request, + fdatairregular2D: FDataIrregular, + other_2D: Any, ) -> None: """Tests the basic arithmetic operation other * fdatairregular @@ -265,22 +546,22 @@ def test_fdatairregular_arithmetic_rmul( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ - f_data_irreg = request.getfixturevalue(fdatairregular) - if isinstance(other, str): - other = request.getfixturevalue(other) + # Account for single curve test + if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): + if len(fdatairregular2D) == 1: + other_2D = other_2D[:1] - f_data_mul = other * f_data_irreg + f_data_mul = other_2D * fdatairregular2D assert np.all( f_data_mul.function_values == - self._take_first(other) * f_data_irreg.function_values + self._take_first(other_2D) * fdatairregular2D.function_values ) def test_fdatairregular_arithmetic_mul_commutative( self, - fdatairregular: str, - other: Any, - request, + fdatairregular2D: FDataIrregular, + other_2D: Any, ) -> None: """Tests the basic arithmetic operation other * fdatairregular @@ -288,17 +569,17 @@ def test_fdatairregular_arithmetic_mul_commutative( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ - f_data_irreg = request.getfixturevalue(fdatairregular) - if isinstance(other, str): - other = request.getfixturevalue(other) + # Account for single curve test + if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): + if len(fdatairregular2D) == 1: + other_2D = other_2D[:1] - assert f_data_irreg * other == other * f_data_irreg + assert fdatairregular2D * other_2D == other_2D * fdatairregular2D def test_fdatairregular_arithmetic_div( self, - fdatairregular: str, - other: Any, - request, + fdatairregular2D: FDataIrregular, + other_2D: Any, ) -> None: """Tests the basic arithmetic operation fdatairregular / other @@ -306,22 +587,22 @@ def test_fdatairregular_arithmetic_div( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ - f_data_irreg = request.getfixturevalue(fdatairregular) - if isinstance(other, str): - other = request.getfixturevalue(other) + # Account for single curve test + if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): + if len(fdatairregular2D) == 1: + other_2D = other_2D[:1] - f_data_div = f_data_irreg / other + f_data_div = fdatairregular2D / other_2D assert np.all( f_data_div.function_values == - f_data_irreg.function_values / self._take_first(other) + fdatairregular2D.function_values / self._take_first(other_2D) ) def test_fdatairregular_arithmetic_rdiv( self, - fdatairregular: str, - other: Any, - request, + fdatairregular2D: FDataIrregular, + other_2D: Any, ) -> None: """Tests the basic arithmetic operation other / fdatairregular @@ -329,13 +610,14 @@ def test_fdatairregular_arithmetic_rdiv( fdatairregular (FDataIrregular): FDataIrregular object to test other (Any): Scalar, vector, matrix or FDataIrregular """ - f_data_irreg = request.getfixturevalue(fdatairregular) - if isinstance(other, str): - other = request.getfixturevalue(other) + # Account for single curve test + if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): + if len(fdatairregular2D) == 1: + other_2D = other_2D[:1] - f_data_div = other / f_data_irreg + f_data_div = other_2D / fdatairregular2D assert np.all( f_data_div.function_values == - self._take_first(other) / f_data_irreg.function_values + self._take_first(other_2D) / fdatairregular2D.function_values ) From 197fa286266dd9a998a3821095da9c102763db3d Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 23 Mar 2023 15:41:39 +0100 Subject: [PATCH 041/144] Preliminary test of reduction operations for FDataIrregular --- skfda/representation/irregular.py | 6 ++-- skfda/tests/test_irregular_operations.py | 40 ++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 2ccb6f54e..429f2d0be 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -381,7 +381,7 @@ def sum( # noqa: WPS125 return FDataIrregular( function_indices=np.array([0]), - function_arguments=np.array(np.zeros((1, self.dim_domain))), + function_arguments=np.zeros((1, self.dim_domain)), function_values=data, sample_names=("sum",), ) @@ -454,8 +454,8 @@ def gmean(self: T) -> T: _gmean = scipy.stats.mstats.gmean(self.function_values, axis=0) return FDataIrregular( function_indices=np.array([0]), - function_arguments=np.array(np.zeros((1, self.dim_domain))), - function_values=_gmean.reshape(-1, 1), + function_arguments=np.zeros((1, self.dim_domain)), + function_values=np.array([_gmean]), sample_names=("geometric mean",), ) diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index a4ec0b44e..ce72cdf0d 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -146,10 +146,25 @@ def other_2D( return 2*np.ones((NUM_CURVES, DIMENSIONS)) elif request.param == "fdatairregular": return fdatairregular2D + +_all_numeric_reductions = [ + "sum", + "var", + "mean", + "gmean", + #"cov", +] + +@pytest.fixture(params=_all_numeric_reductions) +def all_numeric_reductions(request: Any) -> Any: + """ + Fixture for numeric reduction names. + """ + return request.param -############ -# TESTS -############ +################## +# TEST OPERATIONS +################## class TestArithmeticOperations1D: """ Class which encapsulates the testing of basic arithmetic operations @@ -621,3 +636,22 @@ def test_fdatairregular_arithmetic_rdiv( f_data_div.function_values == self._take_first(other_2D) / fdatairregular2D.function_values ) + + +########################## +# TEST NUMERIC REDUCTIONS +########################## + +class TestNumericReductions: + """ + Class which encapsulates the testing of numeric reductions + (such as mean, std) for FDataIrregular objects + """ + def test_fdatairregular_numeric_reduction( + self, + fdatairregular: FDataIrregular, + all_numeric_reductions: str, + ) -> None: + + reduction = getattr(fdatairregular, all_numeric_reductions)() + assert isinstance(reduction, FDataIrregular) \ No newline at end of file From bd0f9d89a13fb418d49c4edd5bbcb5e9957f4c17 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 23 Mar 2023 16:55:48 +0100 Subject: [PATCH 042/144] Finalize testing of numeric reductions and comparison operators --- skfda/tests/test_irregular.py | 38 ++++++++++++++++++++++++ skfda/tests/test_irregular_operations.py | 2 +- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 21e5ead68..2cd1d6deb 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -378,3 +378,41 @@ def test_fdatairregular_round( f_data_irreg.round(decimals).function_values == np.round(f_data_irreg.function_values, decimals) ) + +def test_fdatairregular_equals( + input_arrays: ArrayLike, + input_arrays_multidimensional: ArrayLike, +) -> None: + """Test for equals method, which in turn uses _eq_elementwise + to verify equality in every index, argument and value + + Args: + input_arrays (ArrayLike): tuple of three arrays required for + FDataIrregular + input_arrays_multidimensional (ArrayLike): tuple of three arrays required for + FDataIrregular, with multiple dimensions + indices: Array of pointers to the beginning of the arguments and + values of each curve + arguments: Array of each of the points of the domain + values: Array of each of the coordinates of the codomain + """ + indices, arguments, values = input_arrays_multidimensional + + f_data_irreg_multidimensional = FDataIrregular( + indices, + arguments, + values, + ) + + indices, arguments, values = input_arrays + + f_data_irreg = FDataIrregular( + indices, + arguments, + values, + ) + + assert f_data_irreg.equals(f_data_irreg) + assert f_data_irreg_multidimensional.equals(f_data_irreg_multidimensional) + assert not f_data_irreg.equals(f_data_irreg_multidimensional) + assert f_data_irreg.equals(f_data_irreg.copy()) \ No newline at end of file diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index ce72cdf0d..30e46bb0a 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -654,4 +654,4 @@ def test_fdatairregular_numeric_reduction( ) -> None: reduction = getattr(fdatairregular, all_numeric_reductions)() - assert isinstance(reduction, FDataIrregular) \ No newline at end of file + assert isinstance(reduction, FDataIrregular) From 73b79c1da85cfc377f642a063f41e39526649c07 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 13 Apr 2023 14:19:50 +0200 Subject: [PATCH 043/144] Added custom numpy Dtype for Irregular data --- skfda/representation/irregular.py | 220 +++++++++++++++++------ skfda/tests/test_irregular.py | 1 + skfda/tests/test_irregular_operations.py | 4 +- 3 files changed, 164 insertions(+), 61 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 429f2d0be..f6dedd9ca 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -36,7 +36,7 @@ ) from ..typing._numpy import ArrayLike, NDArrayBool, NDArrayFloat, NDArrayInt from ._functional_data import FData -from .grid import FDataGrid, FDataGridDType +from .grid import FDataGrid from .evaluator import Evaluator from .extrapolation import ExtrapolationLike from .interpolation import SplineInterpolation @@ -44,8 +44,8 @@ if TYPE_CHECKING: from .basis import Basis, FDataBasis -T = TypeVar("T", bound='FDataIrregular') - +T = TypeVar("T", bound='FDataIrregular' + ) class FDataIrregular(FData): # noqa: WPS214 # TODO Docstring @@ -80,51 +80,26 @@ def __init__( self.set_function_arguments(function_arguments) self.set_function_values(function_values) - dim_ranges = list() - for dim in range(self.dim_domain): - i = 0 - dim_sample_ranges = list() - for f in self.function_indices[1:]: - min_argument = min([self.function_arguments[j][dim] for j in range(i, f)]) - max_argument = max([self.function_arguments[j][dim] for j in range(i, f)]) - dim_sample_ranges.append(tuple((min_argument, - max_argument)) - ) - i = f - - min_argument = min([self.function_arguments[i + j][dim] - for j in range(self.function_arguments.shape[0] - i)]) - max_argument = max([self.function_arguments[i + j][dim] - for j in range(self.function_arguments.shape[0] - i)]) - dim_sample_ranges.append((min_argument, - max_argument)) - dim_ranges.append(dim_sample_ranges) - - self._sample_range = list() - for sample in range(len(dim_sample_ranges)): - self._sample_range.append( - tuple([dim_ranges[dim][sample] - for dim in range(self.dim_domain)]) - ) + self._sample_range = FDataIrregular._get_sample_range_from_data( + self.function_indices, + self.function_arguments, + self.dim_domain + ) # Default value for sample_range is a list of tuples with # the first and last arguments of each curve for each dimension - - from ..misc.validation import validate_domain_range + if domain_range is None: - ranges = list() - for dim in range(self.dim_domain): - min_argument = min([x[dim][0] for x in self._sample_range]) - max_argument = max([x[dim][1] for x in self._sample_range]) - ranges.append((min_argument, max_argument)) - - domain_range = tuple(ranges) - - # Default value for domain_range is a list of tuples with - - # the minimum and maximum value of the arguments for each - # dimension + domain_range = FDataIrregular._get_domain_range_from_sample_range( + self._sample_range, + self.dim_domain + ) + # Default value for domain_range is a list of tuples with + # the minimum and maximum value of the arguments for each + # dimension + + from ..misc.validation import validate_domain_range self._domain_range = validate_domain_range(domain_range) self.interpolation = interpolation @@ -240,7 +215,7 @@ def from_datagrid( function_values, **kwargs ) - + def set_function_indices(self, function_indices) -> ArrayLike: self.function_indices = function_indices.copy() @@ -249,6 +224,47 @@ def set_function_arguments(self, function_arguments) -> ArrayLike: def set_function_values(self, function_values) -> ArrayLike: self.function_values = function_values.copy() + + def _get_sample_range_from_data(function_indices, function_arguments, dim_domain): + dim_ranges = list() + for dim in range(dim_domain): + i = 0 + dim_sample_ranges = list() + for f in function_indices[1:]: + min_argument = min([function_arguments[j][dim] for j in range(i, f)]) + max_argument = max([function_arguments[j][dim] for j in range(i, f)]) + dim_sample_ranges.append(tuple((min_argument, + max_argument)) + ) + i = f + + min_argument = min([function_arguments[i + j][dim] + for j in range(function_arguments.shape[0] - i)]) + max_argument = max([function_arguments[i + j][dim] + for j in range(function_arguments.shape[0] - i)]) + dim_sample_ranges.append((min_argument, + max_argument)) + dim_ranges.append(dim_sample_ranges) + + sample_range = list() + for sample in range(len(dim_sample_ranges)): + sample_range.append( + tuple([dim_ranges[dim][sample] + for dim in range(dim_domain)]) + ) + + return sample_range + + def _get_domain_range_from_sample_range(sample_range, dim_domain): + ranges = list() + for dim in range(dim_domain): + min_argument = min([x[dim][0] for x in sample_range]) + max_argument = max([x[dim][1] for x in sample_range]) + ranges.append((min_argument, max_argument)) + + domain_range = tuple(ranges) + + return domain_range def round( self, @@ -461,9 +477,6 @@ def gmean(self: T) -> T: def equals(self, other: object) -> bool: """Comparison of FDataIrregular objects.""" - if not super().equals(other): - return False - other = cast(FDataIrregular, other) if not self._eq_elemenwise(other): @@ -478,7 +491,7 @@ def equals(self, other: object) -> bool: if self.interpolation != other.interpolation: return False - return True + return super().equals(other) def _eq_elemenwise(self: T, other: T) -> NDArrayBool: """Elementwise equality of FDataIrregular.""" @@ -487,6 +500,9 @@ def _eq_elemenwise(self: T, other: T) -> NDArrayBool: (self.function_arguments == other.function_arguments).all(), (self.function_values == other.function_values).all()] ) + + def __eq__(self, other: object) -> NDArrayBool: + return self.equals(other) def _get_op_matrix( self, @@ -553,7 +569,7 @@ def _get_op_matrix( return np.repeat(other_vector, values_curve, axis=0) raise ValueError( - f"Invalid dimensions in operator between FDataGrid and Numpy " + f"Invalid dimensions in operator between FDataIrregular and Numpy " f"array: {other.shape}" ) @@ -820,7 +836,7 @@ def copy( # noqa: WPS211 function_values = self.function_values if domain_range is None: - domain_range = copy.deepcopy(self.domain_range) + domain_range = self.domain_range if dataset_name is None: dataset_name = self.dataset_name @@ -1122,19 +1138,21 @@ def _take_allow_fill( return result @property - def dtype(self) -> FDataGridDType: - # TODO Do this natively? FDataIrregularDType? - """The dtype for this extension array, FDataGridDType""" - return self.to_grid().dtype + def dtype(self) -> FDataIrregularDType: + """The dtype for this extension array, FDataIrregularDType""" + return FDataIrregularDType( + function_indices=self.function_indices, + function_arguments=self.function_arguments, + dim_codomain=self.dim_codomain, + domain_range=self.domain_range + ) @property def nbytes(self) -> int: """ The number of bytes needed to store this object in memory. """ - return self.data_matrix.nbytes + sum( - p.nbytes for p in self.grid_points - ) + return self.function_indices.nbytes + self.function_arguments.nbytes + self.function_values def isna(self) -> NDArrayBool: """ @@ -1144,10 +1162,94 @@ def isna(self) -> NDArrayBool: na_values: Positions of NA. """ return np.all( # type: ignore[no-any-return] - np.isnan(self.data_matrix), - axis=tuple(range(1, self.data_matrix.ndim)), + np.isnan(self.function_values), + axis=tuple(range(1, self.function_values.ndim)), + ) + +class FDataIrregularDType( + pandas.api.extensions.ExtensionDtype, # type: ignore[misc] +): + """DType corresponding to FDataIrregular in Pandas.""" + + name = 'FDataIrregular' + kind = 'O' + type = FDataIrregular # noqa: WPS125 + na_value = pandas.NA + + def __init__( + self, + function_indices: ArrayLike, + function_arguments: ArrayLike, + dim_codomain: int, + domain_range: Optional[DomainRangeLike] = None, + ) -> None: + from ..misc.validation import validate_domain_range + self.function_indices = function_indices + self.function_arguments = function_arguments + self.dim_domain = function_arguments.shape[1] + self.num_observations = len(function_arguments) + + if domain_range is None: + sample_range = FDataIrregular._get_sample_range_from_data( + self.function_indices, + self.function_arguments, + self.dim_domain + ) + domain_range = FDataIrregular._get_domain_range_from_sample_range( + sample_range, + self.dim_domain + ) + + self.domain_range = validate_domain_range(domain_range) + self.dim_codomain = dim_codomain + + @classmethod + def construct_array_type(cls) -> Type[FDataIrregular]: # noqa: D102 + return FDataIrregular + + def _na_repr(self) -> FDataIrregular: + + shape = ( + (self.num_observations,) + + (self.dim_codomain,) ) + function_values = np.full(shape=shape, fill_value=self.na_value) + + return FDataIrregular( + function_indices=self.function_indices, + function_arguments=self.function_arguments, + function_values=function_values, + domain_range=self.domain_range, + ) + + def __eq__(self, other: Any) -> bool: + """ + Compare dtype equality. + + Rules for equality (similar to categorical): + 1) Any FData is equal to the string 'category' + 2) Any FData is equal to itself + 3) Otherwise, they are equal if the arguments are equal. + 6) Any other comparison returns False + """ + if isinstance(other, str): + return other == self.name + elif other is self: + return True + + return ( + isinstance(other, FDataIrregularDType) + and self.function_indices == other.function_indices + and self.function_arguments == other.function_arguments + and self.domain_range == other.domain_range + and self.dim_codomain == other.dim_codomain + ) + + def __hash__(self) -> int: + return hash((self.function_indices, self.function_arguments, + self.domain_range, self.dim_codomain)) + class _IrregularCoordinateIterator(Sequence[T]): """Internal class to iterate through the image coordinates.""" diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 2cd1d6deb..3574ce9d7 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -28,6 +28,7 @@ random_state = np.random.RandomState(seed=14) + @pytest.fixture() def input_arrays( ) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index 30e46bb0a..f8a8eeab5 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -12,8 +12,8 @@ # MACROS ############ -NUM_CURVES = 10 -MAX_VALUES_PER_CURVE = 100 +NUM_CURVES = 100 +MAX_VALUES_PER_CURVE = 10 DIMENSIONS = 2 random_state = np.random.RandomState(seed=14) From 46f187e49010e76869ace42562786112381ea3c2 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 13 Apr 2023 15:45:21 +0200 Subject: [PATCH 044/144] Added to_basis method to FDataIrregular --- skfda/representation/irregular.py | 44 +++++++++++++++++++--- skfda/tests/test_irregular_operations.py | 47 ++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 5 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f6dedd9ca..5190df9d6 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -40,9 +40,7 @@ from .evaluator import Evaluator from .extrapolation import ExtrapolationLike from .interpolation import SplineInterpolation - -if TYPE_CHECKING: - from .basis import Basis, FDataBasis +from .basis import Basis, FDataBasis T = TypeVar("T", bound='FDataIrregular' ) @@ -735,8 +733,44 @@ def scatter(self, *args: Any, **kwargs: Any) -> Figure: return ScatterPlotIrregular(self, *args, **kwargs).plot() def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: - # TODO Use BasisSmoother to return basis? - return None + from ..preprocessing.smoothing import BasisSmoother + + if self.dim_domain != basis.dim_domain: + raise ValueError( + f"The domain of the function has " + f"dimension {self.dim_domain} " + f"but the domain of the basis has " + f"dimension {basis.dim_domain}", + ) + elif self.dim_codomain != basis.dim_codomain: + raise ValueError( + f"The codomain of the function has " + f"dimension {self.dim_codomain} " + f"but the codomain of the basis has " + f"dimension {basis.dim_codomain}", + ) + + # Readjust the domain range if there was not an explicit one + if not basis.is_domain_range_fixed(): + basis = basis.copy(domain_range=self.domain_range) + + smoother = BasisSmoother( + basis=basis, + **kwargs, + return_basis=True, + ) + + # Only uses the available values for each curve + basis_coefficients = [smoother.fit_transform(curve.to_grid()).coefficients[0] for curve in self] + + return FDataBasis( + basis, + basis_coefficients, + dataset_name=self.dataset_name, + argument_names=self.argument_names, + coordinate_names=self.coordinate_names, + sample_names=self.sample_names, + extrapolation=self.extrapolation) def to_matrix(self, **kwargs: Any) -> ArrayLike: # Convert FDataIrregular to matrix of all points diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index f8a8eeab5..8f9350c55 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -7,6 +7,7 @@ from skfda.datasets._real_datasets import _fetch_loon_data from skfda.representation import FDataIrregular, FDataGrid from skfda.representation.interpolation import SplineInterpolation +from skfda.representation.basis import Basis, FDataBasis, FourierBasis, BSplineBasis ############ # MACROS @@ -15,6 +16,7 @@ NUM_CURVES = 100 MAX_VALUES_PER_CURVE = 10 DIMENSIONS = 2 +N_BASIS = 5 random_state = np.random.RandomState(seed=14) @@ -162,6 +164,29 @@ def all_numeric_reductions(request: Any) -> Any: """ return request.param +_all_basis_operations = [ + "to_basis", +] + +@pytest.fixture(params=_all_basis_operations) +def all_basis_operations(request: Any) -> Any: + """ + Fixture for basis operation names. + """ + return request.param + +_all_basis = [ + FourierBasis, + BSplineBasis, +] + +@pytest.fixture(params=_all_basis) +def all_basis(request: Any) -> Any: + """ + Fixture for basis names. + """ + return request.param + ################## # TEST OPERATIONS ################## @@ -655,3 +680,25 @@ def test_fdatairregular_numeric_reduction( reduction = getattr(fdatairregular, all_numeric_reductions)() assert isinstance(reduction, FDataIrregular) + +######################## +# TEST BASIS OPERATIONS +######################## + +class TestBasisOperations: + """ + Class which encapsulates the testing of numeric reductions + (such as mean, std) for FDataIrregular objects + """ + def test_fdatairregular_numeric_reduction( + self, + fdatairregular1D: FDataIrregular, + all_basis: Basis, + all_basis_operations: str, + ) -> None: + basis = all_basis( + domain_range=fdatairregular1D.domain_range, + n_basis=N_BASIS + ) + basis_operation = getattr(fdatairregular1D, all_basis_operations)(basis) + assert isinstance(basis_operation, FDataBasis) \ No newline at end of file From 94644feeb57c36acba5827af744fd5a92a16ab8b Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 13 Apr 2023 18:53:52 +0200 Subject: [PATCH 045/144] General cleanup and wemake and flake 8 formatting --- skfda/representation/irregular.py | 593 +++++++++++++++++------------- 1 file changed, 340 insertions(+), 253 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 5190df9d6..a4488eacb 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -7,19 +7,9 @@ """ from __future__ import annotations -import copy import numbers import warnings -from typing import ( - TYPE_CHECKING, - Any, - Optional, - Sequence, - Type, - TypeVar, - Union, - cast, -) +from typing import Any, Optional, Sequence, Type, TypeVar, Union, cast import numpy as np import pandas.api.extensions @@ -36,14 +26,76 @@ ) from ..typing._numpy import ArrayLike, NDArrayBool, NDArrayFloat, NDArrayInt from ._functional_data import FData -from .grid import FDataGrid +from .basis import Basis, FDataBasis from .evaluator import Evaluator from .extrapolation import ExtrapolationLike +from .grid import FDataGrid from .interpolation import SplineInterpolation -from .basis import Basis, FDataBasis -T = TypeVar("T", bound='FDataIrregular' +T = TypeVar("T", bound='FDataIrregular') + +###################### +# Auxiliary functions# +###################### + +def _get_sample_range_from_data(function_indices, function_arguments, dim_domain): + dim_ranges = [] + for dim in range(dim_domain): + i = 0 + dim_sample_ranges = [] + for f in function_indices[1:]: + min_argument = min( + [function_arguments[j][dim] for j in range(i, f)], ) + max_argument = max( + [function_arguments[j][dim] for j in range(i, f)], + ) + dim_sample_ranges.append( + ((min_argument, max_argument)), + ) + i = f + + min_argument = min( + [ + function_arguments[i + j][dim] + for j in range(function_arguments.shape[0] - i) + ], + ) + + max_argument = max( + [ + function_arguments[i + j][dim] + for j in range(function_arguments.shape[0] - i) + ], + ) + + dim_sample_ranges.append( + (min_argument, max_argument), + ) + dim_ranges.append(dim_sample_ranges) + + sample_range = [] + for sample, _ in enumerate(dim_sample_ranges): + sample_range.append( + tuple( + [dim_ranges[dim][sample] for dim in range(dim_domain)], + ), + ) + + return sample_range + +def _get_domain_range_from_sample_range(sample_range, dim_domain): + ranges = [] + for dim in range(dim_domain): + min_argument = min([x[dim][0] for x in sample_range]) + max_argument = max([x[dim][1] for x in sample_range]) + ranges.append((min_argument, max_argument)) + + return tuple(ranges) # domain_range + +###################### +# FDataIrregular# +###################### class FDataIrregular(FData): # noqa: WPS214 # TODO Docstring @@ -60,10 +112,9 @@ def __init__( extrapolation: Optional[ExtrapolationLike] = None, interpolation: Optional[Evaluator] = None, argument_names: Optional[LabelTupleLike] = None, - coordinate_names: Optional[LabelTupleLike] = None + coordinate_names: Optional[LabelTupleLike] = None, ): """Construct a FDataIrregular object.""" - # Set dimensions self._dim_domain = function_arguments.shape[1] self._dim_codomain = function_values.shape[1] @@ -78,25 +129,25 @@ def __init__( self.set_function_arguments(function_arguments) self.set_function_values(function_values) - self._sample_range = FDataIrregular._get_sample_range_from_data( - self.function_indices, - self.function_arguments, - self.dim_domain - ) + self._sample_range = _get_sample_range_from_data( + self.function_indices, + self.function_arguments, + self.dim_domain, + ) # Default value for sample_range is a list of tuples with # the first and last arguments of each curve for each dimension - + if domain_range is None: - domain_range = FDataIrregular._get_domain_range_from_sample_range( - self._sample_range, - self.dim_domain - ) + domain_range = _get_domain_range_from_sample_range( + self._sample_range, + self.dim_domain, + ) # Default value for domain_range is a list of tuples with # the minimum and maximum value of the arguments for each # dimension - + from ..misc.validation import validate_domain_range self._domain_range = validate_domain_range(domain_range) @@ -117,16 +168,16 @@ def from_dataframe( id_column: str, argument_columns: LabelTupleLike, coordinate_columns: LabelTupleLike, - **kwargs + **kwargs, ) -> FDataIrregular: # Accept strings but ensure the column names are tuples - _is_str = isinstance(argument_columns, str) - argument_columns = [argument_columns] if _is_str else \ + is_str = isinstance(argument_columns, str) + argument_columns = [argument_columns] if is_str else \ argument_columns - _is_str = isinstance(coordinate_columns, str) - coordinate_columns = [coordinate_columns] if _is_str else \ + is_str = isinstance(coordinate_columns, str) + coordinate_columns = [coordinate_columns] if is_str else \ coordinate_columns # Obtain num functions and num observations from data @@ -134,12 +185,13 @@ def from_dataframe( num_functions = dataframe[id_column].nunique() # Create data structure of function pointers and coordinates - function_indices = np.zeros((num_functions, ), - dtype=np.uint32) - function_arguments = np.zeros((num_observations, - len(argument_columns))) - function_values = np.zeros((num_observations, - len(coordinate_columns))) + function_indices = np.zeros((num_functions, ), dtype=np.uint32) + function_arguments = np.zeros( + (num_observations, len(argument_columns)), + ) + function_values = np.zeros( + (num_observations, len(coordinate_columns)), + ) head = 0 index = 0 @@ -151,10 +203,10 @@ def from_dataframe( f_values = f_values.sort_values(argument_columns) new_args = f_values[argument_columns].values - function_arguments[head:head+num_values, :] = new_args + function_arguments[head:head + num_values, :] = new_args new_coords = f_values[coordinate_columns].values - function_values[head:head+num_values, :] = new_coords + function_values[head:head + num_values, :] = new_coords # Update head and index head += num_values @@ -164,14 +216,14 @@ def from_dataframe( function_indices, function_arguments, function_values, - **kwargs - ) + **kwargs, + ) @classmethod def from_datagrid( cls: Type[T], f_data: FDataGrid, - **kwargs + **kwargs, ) -> FDataIrregular: # Obtain num functions and num observations from data @@ -179,12 +231,13 @@ def from_datagrid( num_functions = f_data.data_matrix.shape[0] # Create data structure of function pointers and coordinates - function_indices = np.zeros((num_functions, ), - dtype=np.uint32) - function_arguments = np.zeros((num_observations, - f_data.dim_domain)) - function_values = np.zeros((num_observations, - f_data.dim_codomain)) + function_indices = np.zeros((num_functions, ), dtype=np.uint32) + function_arguments = np.zeros( + (num_observations, f_data.dim_domain), + ) + function_values = np.zeros( + (num_observations, f_data.dim_codomain), + ) head = 0 for i in range(num_functions): @@ -195,13 +248,17 @@ def from_datagrid( if np.all(np.isnan(f_data.data_matrix[i, j])): continue - arg = [f_data.grid_points[dim][j] for dim - in range(f_data.dim_domain)] - function_arguments[head+num_values, :] = arg + arg = [ + f_data.grid_points[dim][j] + for dim in range(f_data.dim_domain) + ] + function_arguments[head + num_values, :] = arg - value = [f_data.data_matrix[i, j, dim] for dim - in range(f_data.dim_codomain)] - function_values[head+num_values, :] = value + value = [ + f_data.data_matrix[i, j, dim] + for dim in range(f_data.dim_codomain) + ] + function_values[head + num_values, :] = value num_values += 1 @@ -211,9 +268,9 @@ def from_datagrid( function_indices, function_arguments, function_values, - **kwargs - ) - + **kwargs, + ) + def set_function_indices(self, function_indices) -> ArrayLike: self.function_indices = function_indices.copy() @@ -222,47 +279,6 @@ def set_function_arguments(self, function_arguments) -> ArrayLike: def set_function_values(self, function_values) -> ArrayLike: self.function_values = function_values.copy() - - def _get_sample_range_from_data(function_indices, function_arguments, dim_domain): - dim_ranges = list() - for dim in range(dim_domain): - i = 0 - dim_sample_ranges = list() - for f in function_indices[1:]: - min_argument = min([function_arguments[j][dim] for j in range(i, f)]) - max_argument = max([function_arguments[j][dim] for j in range(i, f)]) - dim_sample_ranges.append(tuple((min_argument, - max_argument)) - ) - i = f - - min_argument = min([function_arguments[i + j][dim] - for j in range(function_arguments.shape[0] - i)]) - max_argument = max([function_arguments[i + j][dim] - for j in range(function_arguments.shape[0] - i)]) - dim_sample_ranges.append((min_argument, - max_argument)) - dim_ranges.append(dim_sample_ranges) - - sample_range = list() - for sample in range(len(dim_sample_ranges)): - sample_range.append( - tuple([dim_ranges[dim][sample] - for dim in range(dim_domain)]) - ) - - return sample_range - - def _get_domain_range_from_sample_range(sample_range, dim_domain): - ranges = list() - for dim in range(dim_domain): - min_argument = min([x[dim][0] for x in sample_range]) - max_argument = max([x[dim][1] for x in sample_range]) - ranges.append((min_argument, max_argument)) - - domain_range = tuple(ranges) - - return domain_range def round( self, @@ -280,8 +296,8 @@ def round( return out return self.copy( - function_values=rounded_values - ) + function_values=rounded_values, + ) @property def sample_points(self) -> GridPoints: @@ -409,18 +425,18 @@ def mean(self: T) -> T: Returns: A FDataIrregular object with just one sample representing the mean of all curves the across each value. - -""" - + """ # Find all distinct arguments (ordered) and corresponding values distinct_args = np.unique(np.matrix.flatten(self.function_arguments)) - values = [np.matrix.flatten(self.function_values[ - np.where(self.function_arguments == arg)[0] + values = [ + np.matrix.flatten(self.function_values[ + np.where(self.function_arguments == arg)[0] ]) - for arg in distinct_args] + for arg in distinct_args + ] # Obtain mean of all available values for each argument point - means = np.array([np.mean(vals) for vals in values]) + means = np.array([np.mean(value) for value in values]) # Create a FDataIrregular object with only 1 curve, the mean curve return FDataIrregular( @@ -441,35 +457,36 @@ def var(self: T) -> T: variance of all curves the across each value. """ - # Find all distinct arguments (ordered) and corresponding values distinct_args = np.unique(np.matrix.flatten(self.function_arguments)) - values = [np.matrix.flatten(self.function_values[ - np.where(self.function_arguments == arg)[0] + values = [ + np.matrix.flatten(self.function_values[ + np.where(self.function_arguments == arg)[0] ]) - for arg in distinct_args] + for arg in distinct_args + ] # Obtain variance of all available values for each argument point - vars = np.array([np.var(vals) for vals in values]) + variances = np.array([np.var(value) for value in values]) # Create a FDataIrregular object with only 1 curve, the variance curve return FDataIrregular( function_indices=np.array([0]), function_arguments=distinct_args.reshape(-1, 1), - function_values=vars.reshape(-1, 1), + function_values=variances.reshape(-1, 1), sample_names=("var",), ) def cov(self: T) -> T: # TODO Implementation to be decided - return None + pass def gmean(self: T) -> T: - _gmean = scipy.stats.mstats.gmean(self.function_values, axis=0) + gmean = scipy.stats.mstats.gmean(self.function_values, axis=0) return FDataIrregular( function_indices=np.array([0]), function_arguments=np.zeros((1, self.dim_domain)), - function_values=np.array([_gmean]), + function_values=np.array([gmean]), sample_names=("geometric mean",), ) @@ -494,11 +511,13 @@ def equals(self, other: object) -> bool: def _eq_elemenwise(self: T, other: T) -> NDArrayBool: """Elementwise equality of FDataIrregular.""" return np.all( - [(self.function_indices == other.function_indices).all(), - (self.function_arguments == other.function_arguments).all(), - (self.function_values == other.function_values).all()] + [ + (self.function_indices == other.function_indices).all(), + (self.function_arguments == other.function_arguments).all(), + (self.function_values == other.function_values).all(), + ], ) - + def __eq__(self, other: object) -> NDArrayBool: return self.equals(other) @@ -520,22 +539,25 @@ def _get_op_matrix( other_vector = other[other_index] # Must expand for the number of values in each curve - values_after = np.concatenate((self.function_indices, - np.array( - [self.num_observations] - ) - ) - ) + values_after = np.concatenate( + ( + self.function_indices, + np.array([self.num_observations]), + ), + ) - values_before = np.concatenate((np.array([0]), - self.function_indices) - ) + values_before = np.concatenate( + ( + np.array([0]), + self.function_indices, + ), + ) - values_curve = (values_after-values_before)[1:] + values_curve = (values_after - values_before)[1:] # Repeat the other value for each curve as many times # as values inside the curve - return np.repeat(other_vector, values_curve).reshape(-1,1) + return np.repeat(other_vector, values_curve).reshape(-1, 1) elif other.shape == ( self.n_samples, self.dim_codomain, @@ -549,26 +571,29 @@ def _get_op_matrix( other_vector = other[other_index] # Must expand for the number of values in each curve - values_after = np.concatenate((self.function_indices, - np.array( - [self.num_observations] - ) - ) - ) + values_after = np.concatenate( + ( + self.function_indices, + np.array([self.num_observations]), + ), + ) - values_before = np.concatenate((np.array([0]), - self.function_indices) - ) + values_before = np.concatenate( + ( + np.array([0]), + self.function_indices, + ), + ) - values_curve = (values_after-values_before)[1:] + values_curve = (values_after - values_before)[1:] # Repeat the other value for each curve as many times # as values inside the curve return np.repeat(other_vector, values_curve, axis=0) raise ValueError( - f"Invalid dimensions in operator between FDataIrregular and Numpy " - f"array: {other.shape}" + f"Invalid dimensions in operator between FDataIrregular " + f"and Numpy array: {other.shape}", ) elif isinstance(other, FDataIrregular): @@ -585,10 +610,10 @@ def __add__( if function_values is None: return NotImplemented - return self._copy_op(other, - function_values=self.function_values + - function_values - ) + return self._copy_op( + other, + function_values=self.function_values + function_values, + ) def __radd__( self: T, @@ -604,10 +629,10 @@ def __sub__( if function_values is None: return NotImplemented - return self._copy_op(other, - function_values=self.function_values - - function_values - ) + return self._copy_op( + other, + function_values=self.function_values - function_values, + ) def __rsub__( self: T, @@ -617,10 +642,10 @@ def __rsub__( if function_values is None: return NotImplemented - return self._copy_op(other, - function_values=function_values - - self.function_values - ) + return self._copy_op( + other, + function_values=function_values - self.function_values, + ) def __mul__( self: T, @@ -630,10 +655,10 @@ def __mul__( if function_values is None: return NotImplemented - return self._copy_op(other, - function_values=self.function_values * - function_values - ) + return self._copy_op( + other, + function_values=self.function_values * function_values, + ) def __rmul__( self: T, @@ -649,10 +674,10 @@ def __truediv__( if function_values is None: return NotImplemented - return self._copy_op(other, - function_values=self.function_values / - function_values - ) + return self._copy_op( + other, + function_values=self.function_values / function_values, + ) def __rtruediv__( self: T, @@ -662,17 +687,17 @@ def __rtruediv__( if function_values is None: return NotImplemented - return self._copy_op(other, - function_values=function_values / - self.function_values - ) + return self._copy_op( + other, + function_values=function_values / self.function_values, + ) def __neg__(self: T) -> T: """Negation of FDataIrregular object.""" return self.copy(function_values=-self.function_values) def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: - #TODO As coordinates + # TODO As coordinates if as_coordinates: raise NotImplementedError( "Not implemented for as_coordinates = True", @@ -685,30 +710,42 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: x._check_same_dimensions(y) # Allocate all required memory - total_functions = self.num_functions + sum([o.num_functions - for o in others]) - total_values = self.num_observations + sum([o.num_observations - for o in others]) - total_sample_names = list() - function_indices = np.zeros((total_functions, ), - dtype=np.uint32) - function_args = np.zeros((total_values, - self.dim_domain)) - function_values = np.zeros((total_values, - self.dim_codomain)) + total_functions = self.num_functions + sum( + [ + o.num_functions + for o in others + ], + ) + total_values = self.num_observations + sum( + [ + o.num_observations + for o in others + ], + ) + total_sample_names = [] + function_indices = np.zeros((total_functions, ), dtype=np.uint32) + function_args = np.zeros( + (total_values, self.dim_domain), + ) + function_values = np.zeros( + (total_values, self.dim_codomain), + ) index = 0 head = 0 # Add samples sequentially for f_data in [self] + list(others): - function_indices[index:index + - f_data.num_functions] = f_data.function_indices - function_args[head:head + - f_data.num_observations] = f_data.function_arguments - function_values[head:head + - f_data.num_observations] = f_data.function_values + function_indices[ + index:index + f_data.num_functions + ] = f_data.function_indices + function_args[ + head:head + f_data.num_observations + ] = f_data.function_arguments + function_values[ + head:head + f_data.num_observations + ] = f_data.function_values # Adjust pointers to the concatenated array - function_indices[index:index+f_data.num_functions] += head + function_indices[index:index + f_data.num_functions] += head index += f_data.num_functions head += f_data.num_observations total_sample_names = total_sample_names + list(f_data.sample_names) @@ -718,17 +755,19 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: function_args, function_values, sample_names=total_sample_names, - ) + ) def plot(self, *args: Any, **kwargs: Any) -> Figure: - from ..exploratory.visualization.representation \ - import LinearPlotIrregular + from ..exploratory.visualization.representation import ( + LinearPlotIrregular, + ) return LinearPlotIrregular(self, *args, **kwargs).plot() def scatter(self, *args: Any, **kwargs: Any) -> Figure: - from ..exploratory.visualization.representation \ - import ScatterPlotIrregular + from ..exploratory.visualization.representation import ( + ScatterPlotIrregular, + ) return ScatterPlotIrregular(self, *args, **kwargs).plot() @@ -759,18 +798,22 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: **kwargs, return_basis=True, ) - + # Only uses the available values for each curve - basis_coefficients = [smoother.fit_transform(curve.to_grid()).coefficients[0] for curve in self] + basis_coefficients = [ + smoother.fit_transform(curve.to_grid()).coefficients[0] + for curve in self + ] return FDataBasis( - basis, - basis_coefficients, - dataset_name=self.dataset_name, - argument_names=self.argument_names, - coordinate_names=self.coordinate_names, + basis, + basis_coefficients, + dataset_name=self.dataset_name, + argument_names=self.argument_names, + coordinate_names=self.coordinate_names, sample_names=self.sample_names, - extrapolation=self.extrapolation) + extrapolation=self.extrapolation, + ) def to_matrix(self, **kwargs: Any) -> ArrayLike: # Convert FDataIrregular to matrix of all points @@ -778,46 +821,65 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: # Find the grid points and values for each function index_end = 0 - grid_points = [list() for i in range(self.dim_domain)] + grid_points = [[] for i in range(self.dim_domain)] evaluated_points = [] - for index_start, index_end in zip(list(self.function_indices), - list(self.function_indices[1:])): + for index_start, index_end in zip( + list(self.function_indices), + list(self.function_indices[1:]), + ): for dim in range(self.dim_domain): grid_points[dim].append( - [x[dim] for x in self.function_arguments[index_start:index_end]]) - + [ + x[dim] + for x in self.function_arguments[index_start:index_end] + ], + ) + evaluated_points.append( - self.function_values[index_start:index_end]) + self.function_values[index_start:index_end], + ) # Dont forget to add the last one for dim in range(self.dim_domain): - grid_points[dim].append([x[dim] for x in self.function_arguments[index_end:]]) + grid_points[dim].append( + [x[dim] for x in self.function_arguments[index_end:]], + ) evaluated_points.append(self.function_values[index_end:]) # Aggregate into a complete data matrix - unified_grid_points = [list() for i in range(self.dim_domain)] + unified_grid_points = [[] for i in range(self.dim_domain)] from functools import reduce for dim in range(self.dim_domain): - _unified_points = reduce( - lambda x, y: set(list(y)).union(list(x)), + unified_points = reduce( + lambda x, y: set(y).union(list(x)), grid_points[dim], - ) - unified_grid_points[dim] = sorted(_unified_points) + ) + unified_grid_points[dim] = sorted(unified_points) # Fill matrix with known values, leave unknown as NA num_curves = len(grid_points[0]) num_points = len(unified_grid_points[0]) - unified_matrix = np.empty((num_curves, *(num_points,)*self.dim_domain , self.dim_codomain)) + unified_matrix = np.empty( + ( + num_curves, + *(num_points,) * self.dim_domain, + self.dim_codomain, + ), + ) unified_matrix.fill(np.nan) for curve in range(num_curves): - # There must always be one dimension, + # There must always be one dimension, # and same size across all domain dimensions - for point in range(len(grid_points[0][curve])): + for point, _ in enumerate(grid_points[0][curve]): for dimension in range(self.dim_codomain): - point_index = [unified_grid_points[i].index(grid_points[i][curve][point]) - for i in range(self.dim_domain)] + point_index = [ + unified_grid_points[i].index( + grid_points[i][curve][point], + ) + for i in range(self.dim_domain) + ] unified_matrix[(curve, *point_index, dimension)] = \ evaluated_points[curve][point][dimension] @@ -852,7 +914,6 @@ def copy( # noqa: WPS211 argument_names: Optional[LabelTupleLike] = None, coordinate_names: Optional[LabelTupleLike] = None, ) -> T: - """ Return a copy of the FDataIrregular. @@ -934,8 +995,10 @@ def restrict( for dr in domain_range: dr_start, dr_end = dr select_mask = np.where( - (dr_start <= self.function_arguments[s]) & - (self.function_arguments[s] <= dr_end) + ( + (dr_start <= self.function_arguments[s]) + & (self.function_arguments[s] <= dr_end) + ), ) # Must be union, it is valid if it is in any interval @@ -958,8 +1021,10 @@ def restrict( for dr in domain_range: dr_start, dr_end = dr select_mask = np.where( - (dr_start <= self.function_arguments[s]) & - (self.function_arguments[s] <= dr_end) + ( + (dr_start <= self.function_arguments[s]) + & (self.function_arguments[s] <= dr_end) + ), ) # Must be union, it is valid if it is in any interval @@ -992,7 +1057,6 @@ def shift( *, restrict_domain: bool = False, extrapolation: Optional[ExtrapolationLike] = None, - grid_points: Optional[GridPointsLike] = None, ) -> FDataIrregular: r""" Perform a shift of the curves. @@ -1025,7 +1089,7 @@ def shift( Shifted functions. """ # TODO build based in above - return None + pass def compose( self: T, @@ -1034,8 +1098,8 @@ def compose( eval_points: Optional[GridPointsLike] = None, ) -> T: - #TODO Is this possible with this structure? - return None + # TODO Is this possible with this structure? + pass def __str__(self) -> str: """Return str(self).""" @@ -1078,15 +1142,27 @@ def __getitem__( s = slice(self.function_indices[i], next_index) required_slices.append(s) - arguments = np.concatenate([self.function_arguments[s] - for s in required_slices]) - values = np.concatenate([self.function_values[s] - for s in required_slices]) + arguments = np.concatenate( + [ + self.function_arguments[s] + for s in required_slices + ], + ) + values = np.concatenate( + [ + self.function_values[s] + for s in required_slices + ], + ) + + chunk_sizes = np.array( + [ + s.stop - s.start if s.stop is not None + else self.num_observations - s.start + for s in required_slices + ], + ) - chunk_sizes = np.array([s.stop-s.start if s.stop is not None - else self.num_observations - s.start - for s in required_slices]) - indices = np.cumsum(chunk_sizes) - chunk_sizes[0] return self.copy( @@ -1110,9 +1186,10 @@ def __array_ufunc__( for i in inputs: if ( isinstance(i, FDataIrregular) - and not np.array_equal(i.function_arguments, - self.function_arguments - ) + and not np.array_equal( + i.function_arguments, + self.function_arguments, + ) ): return NotImplemented @@ -1178,7 +1255,7 @@ def dtype(self) -> FDataIrregularDType: function_indices=self.function_indices, function_arguments=self.function_arguments, dim_codomain=self.dim_codomain, - domain_range=self.domain_range + domain_range=self.domain_range, ) @property @@ -1186,7 +1263,8 @@ def nbytes(self) -> int: """ The number of bytes needed to store this object in memory. """ - return self.function_indices.nbytes + self.function_arguments.nbytes + self.function_values + return self.function_indices.nbytes + \ + self.function_arguments.nbytes + self.function_values def isna(self) -> NDArrayBool: """ @@ -1199,7 +1277,8 @@ def isna(self) -> NDArrayBool: np.isnan(self.function_values), axis=tuple(range(1, self.function_values.ndim)), ) - + + class FDataIrregularDType( pandas.api.extensions.ExtensionDtype, # type: ignore[misc] ): @@ -1224,14 +1303,14 @@ def __init__( self.num_observations = len(function_arguments) if domain_range is None: - sample_range = FDataIrregular._get_sample_range_from_data( - self.function_indices, - self.function_arguments, - self.dim_domain + sample_range = _get_sample_range_from_data( + self.function_indices, + self.function_arguments, + self.dim_domain, ) - domain_range = FDataIrregular._get_domain_range_from_sample_range( - sample_range, - self.dim_domain + domain_range = _get_domain_range_from_sample_range( + sample_range, + self.dim_domain, ) self.domain_range = validate_domain_range(domain_range) @@ -1271,18 +1350,26 @@ def __eq__(self, other: Any) -> bool: return other == self.name elif other is self: return True + elif not isinstance(other, FDataIrregularDType): + return False return ( - isinstance(other, FDataIrregularDType) - and self.function_indices == other.function_indices + self.function_indices == other.function_indices and self.function_arguments == other.function_arguments and self.domain_range == other.domain_range and self.dim_codomain == other.dim_codomain ) def __hash__(self) -> int: - return hash((self.function_indices, self.function_arguments, - self.domain_range, self.dim_codomain)) + return hash( + ( + self.function_indices, + self.function_arguments, + self.domain_range, + self.dim_codomain, + ), + ) + class _IrregularCoordinateIterator(Sequence[T]): """Internal class to iterate through the image coordinates.""" @@ -1301,8 +1388,8 @@ def __getitem__( s_key = slice(s_key, s_key + 1) coordinate_names = np.array( - self._fdatairregular.coordinate_names - )[s_key] + self._fdatairregular.coordinate_names, + )[s_key] return self._fdatairregular.copy( function_values=self._fdatairregular.function_values[..., key], From 5ce6a86fc210ca9b870f9d9f371def189dcadf6d Mon Sep 17 00:00:00 2001 From: opintosant Date: Fri, 21 Apr 2023 17:37:18 +0200 Subject: [PATCH 046/144] Wemake format and FDataIrregular docstring --- skfda/representation/irregular.py | 143 ++++++++++++++++++++++++++---- 1 file changed, 124 insertions(+), 19 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index a4488eacb..3f22cc3d0 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -38,7 +38,12 @@ # Auxiliary functions# ###################### -def _get_sample_range_from_data(function_indices, function_arguments, dim_domain): + +def _get_sample_range_from_data( + function_indices, + function_arguments, + dim_domain, +): dim_ranges = [] for dim in range(dim_domain): i = 0 @@ -84,7 +89,11 @@ def _get_sample_range_from_data(function_indices, function_arguments, dim_domain return sample_range -def _get_domain_range_from_sample_range(sample_range, dim_domain): + +def _get_domain_range_from_sample_range( + sample_range, + dim_domain, +): ranges = [] for dim in range(dim_domain): min_argument = min([x[dim][0] for x in sample_range]) @@ -97,8 +106,106 @@ def _get_domain_range_from_sample_range(sample_range, dim_domain): # FDataIrregular# ###################### + class FDataIrregular(FData): # noqa: WPS214 - # TODO Docstring + r"""Represent discretised functional data of an irregular or sparse nature. + + Class for representing irregular functional data in a compact manner, + allowing basic operations, representation and conversion to basis format. + + Attributes: + functional_indices: a unidimensional array which stores the index of + the functional_values and functional_values arrays where the data + of each individual curve of the sample begins. + functional_arguments: an array of every argument of the domain for + every curve in the sample. Each row contains an observation. + functional_values: an array of every value of the codomain for + every curve in the sample. Each row contains an observation. + domain_range: 2 dimension matrix where each row + contains the bounds of the interval in which the functional data + is considered to exist for each one of the axies. + dataset_name: name of the dataset. + argument_names: tuple containing the names of the different + arguments. + coordinate_names: tuple containing the names of the different + coordinate functions. + extrapolation: defines the default type of + extrapolation. By default None, which does not apply any type of + extrapolation. See `Extrapolation` for detailled information of the + types of extrapolation. + interpolation: Defines the type of interpolation + applied in `evaluate`. + + Examples: + Representation of an irregular functional data object with 2 samples + representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}`, + with 2 and 3 discretization points respectively. + + >>> indices = [0, 2] + >>> arguments = [[1], [2], [3], [4], [5]] + >>> values = [[1], [2], [3], [4], [5]] + >>> FDataIrregular(indices, arguments, values) + FDataIrregular( + function_indices=array([0,2]), + function_arguments=array([[1], + [2], + [3], + [4], + [5]]), + function_values=array([[1], + [2], + [3], + [4], + [5]]), + domain_range=((1.0, 5.0),), + ...) + + The number of arguments and values must be the same. + + >>> indices = [0,2] + >>> arguments = np.arange(5).reshape(-1, 1) + >>> values = np.arange(6).reshape(-1, 1) + >>> FDataIrregular(indices, arguments, values) + Traceback (most recent call last): + .... + ValueError: Dimension mismatch between function_arguments + and function_values... + + The indices in function_indices must point to correct rows + in function_arguments and function_values. + + >>> indices = [0,7] + >>> arguments = np.arange(5).reshape(-1, 1) + >>> values = np.arange(5).reshape(-1, 1) + >>> FDataIrregular(indices, arguments, values) + Traceback (most recent call last): + .... + ValueError: Index in function_indices out of bounds... + + FDataIrregular supports higher dimensional data both in the domain + and in the codomain (image). + + Representation of a functional data object with 2 samples + representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}^2`. + + >>> indices = [0, 2] + >>> arguments = [[1], [2], [3], [4], [5]] + >>> values = [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + >>> fd = FDataIrregular(indices, arguments, values) + >>> fd.dim_domain, fd.dim_codomain + (1, 2) + + Representation of a functional data object with 2 samples + representing a function :math:`f : \mathbb{R}^2\longmapsto\mathbb{R}`. + + >>> indices = [0, 2] + >>> arguments = [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + >>> values = [[1], [2], [3], [4], [5]] + >>> fd = FDataIrregular(indices, arguments, values) + >>> fd.dim_domain, fd.dim_codomain + (2, 1) + + """ def __init__( self, @@ -115,19 +222,25 @@ def __init__( coordinate_names: Optional[LabelTupleLike] = None, ): """Construct a FDataIrregular object.""" + self.function_indices = np.array(function_indices) + self.function_arguments = np.array(function_arguments) + self.function_values = np.array(function_values) + # Set dimensions - self._dim_domain = function_arguments.shape[1] - self._dim_codomain = function_values.shape[1] + self._dim_domain = self.function_arguments.shape[1] + self._dim_codomain = self.function_values.shape[1] # Set structure to given data - self.num_functions = function_indices.shape[0] + self.num_functions = self.function_indices.shape[0] + + if self.function_arguments.shape[0] != self.function_values.shape[0]: + raise ValueError("Dimension mismatch between function_arguments \ + and function_values") - assert function_arguments.shape[0] == function_values.shape[0] - self.num_observations = function_arguments.shape[0] + self.num_observations = self.function_arguments.shape[0] - self.set_function_indices(function_indices) - self.set_function_arguments(function_arguments) - self.set_function_values(function_values) + if max(self.function_indices) >= self.num_observations: + raise ValueError("Index in function_indices out of bounds") self._sample_range = _get_sample_range_from_data( self.function_indices, @@ -271,14 +384,6 @@ def from_datagrid( **kwargs, ) - def set_function_indices(self, function_indices) -> ArrayLike: - self.function_indices = function_indices.copy() - - def set_function_arguments(self, function_arguments) -> ArrayLike: - self.function_arguments = function_arguments.copy() - - def set_function_values(self, function_values) -> ArrayLike: - self.function_values = function_values.copy() def round( self, From 5a4ace7c3192130d5fbe056d60c2324617c05f1e Mon Sep 17 00:00:00 2001 From: opintosant Date: Sat, 22 Apr 2023 18:42:59 +0200 Subject: [PATCH 047/144] Docstrings and examples. Fixed incorrect domain range in concatenate. --- skfda/representation/irregular.py | 232 +++++++++++++++++++++++++++++- 1 file changed, 225 insertions(+), 7 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 3f22cc3d0..08a3f2bd9 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -283,7 +283,28 @@ def from_dataframe( coordinate_columns: LabelTupleLike, **kwargs, ) -> FDataIrregular: + """Create a FDataIrregular object from a pandas dataframe. + The pandas dataframe should be in 'long' format: each row + containing the arguments and values of a given point of the + dataset, and an identifier which specifies which curve they + belong to. + + Args: + dataframe (pandas.DataFrame): Pandas dataframe containing the + irregular functional dataset. + id_column (str): Name of the column which contains the information + about which curve does each each row belong to. + argument_columns (LabelTupleLike): list of columns where + the arguments for each dimension of the domain can be found. + coordinate_columns (LabelTupleLike): list of columns where + the values for each dimension of the image can be found. + kwargs: Arguments for the FDataIrregular constructor. + + Returns: + FDataIrregular: Returns a FDataIrregular object which contains + the irregular functional data of the dataset. + """ # Accept strings but ensure the column names are tuples is_str = isinstance(argument_columns, str) argument_columns = [argument_columns] if is_str else \ @@ -338,7 +359,16 @@ def from_datagrid( f_data: FDataGrid, **kwargs, ) -> FDataIrregular: + """Create a FDataIrregular object from a source FDataGrid. + Args: + f_data (FDataGrid): FDataGrid object used as source. + kwargs: Arguments for the FDataIrregular constructor. + + Returns: + FDataIrregular: FDataIrregular containing the same data + as the source but with an irregular structure. + """ # Obtain num functions and num observations from data num_observations = np.sum(~np.isnan(f_data.data_matrix)) num_functions = f_data.data_matrix.shape[0] @@ -384,12 +414,30 @@ def from_datagrid( **kwargs, ) - def round( self, decimals: int = 0, out: Optional[FDataIrregular] = None, ) -> FDataIrregular: + """Evenly round function_values to the given number of decimals. + + Arguments are not rounded due to possibility of coalescing + various arguments to the same rounded value. + + .. deprecated:: 0.6 + Use :func:`numpy.round` function instead. + + Args: + decimals: Number of decimal places to round to. + If decimals is negative, it specifies the number of + positions to the left of the decimal point. Defaults to 0. + out: FDataIrregular where to place the result, if any. + + Returns: + Returns a FDataIrregular object where all elements + in its function_values are rounded. + + """ # Arguments are not rounded due to possibility of # coalescing various arguments to the same rounded value rounded_values = self.function_values.round(decimals=decimals) @@ -479,17 +527,32 @@ def _evaluate( def derivative( self: T, - *, order: int = 1, method: Optional[Basis] = None, ) -> T: + """Differentiate the FDataIrregular object. + + Args: + order: Order of the derivative. Defaults to one. + method (Optional[Basis]): + + Returns: + FDataIrregular with the derivative of the dataset. + """ pass def integrate( self: T, - *, domain: Optional[DomainRange] = None, ) -> NDArrayFloat: + """Integrate the FDataIrregular object. + + Args: + domain (Optional[DomainRange]): + + Returns: + FDataIrregular with the integral. + """ pass def _check_same_dimensions(self: T, other: T) -> None: @@ -507,6 +570,24 @@ def sum( # noqa: WPS125 skipna: bool = False, min_count: int = 0, ) -> T: + """Compute the sum of all the samples. + + Args: + axis (Optional[int]): Used for compatibility with numpy. + Must be None or 0. + out (None): Used for compatibility with numpy. + Must be None. + keepdims (bool): Used for compatibility with numpy. + Must be False. + skipna (bool): Wether the NaNs are ignored or not. + min_count: Number of valid (non NaN) data to have in order + for the a variable to not be NaN when `skipna` is + `True`. + + Returns: + T: FDataIrregular object with only one curve and one value + representing the sum of all the samples in the original object. + """ super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna) data = ( @@ -583,9 +664,15 @@ def var(self: T) -> T: ) def cov(self: T) -> T: + """Compute the covariance for a FDataIrregular object. + + Returns: + FDataIrregular with the covariance function. + """ # TODO Implementation to be decided pass + # TODO remove and remove scipy def gmean(self: T) -> T: gmean = scipy.stats.mstats.gmean(self.function_values, axis=0) return FDataIrregular( @@ -802,13 +889,62 @@ def __neg__(self: T) -> T: return self.copy(function_values=-self.function_values) def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: + """Join samples from a similar FDataIrregular object. + + Joins samples from another FDataIrregular object if it has the same + dimensions. + + Args: + others: Objects to be concatenated. + as_coordinates (bool): If False concatenates as + new samples, else, concatenates the other functions as + new components of the image. Defaults to false. + + Raises: + NotImplementedError: Not implemented for as_coordinates = True + + Returns: + T: FDataIrregular object with the samples from the source objects. + + Examples: + >>> indices = [0,2] + >>> arguments = values = np.arange(5).reshape(-1, 1) + >>> fd = FDataIrregular(indices, arguments, values) + >>> arguments_2 = values_2 = np.arange(5, 10).reshape(-1, 1) + >>> fd_2 = FDataIrregular(indices, arguments_2, values_2) + >>> fd.concatenate(fd_2) + FDataIrregular( + function_indices=array([0, 2, 5, 7], dtype=uint32), + function_arguments=array([[0.], + [1.], + [2.], + [3.], + [4.], + [5.], + [6.], + [7.], + [8.], + [9.]]), + function_values=array([[0.], + [1.], + [2.], + [3.], + [4.], + [5.], + [6.], + [7.], + [8.], + [9.]]), + domain_range=((0.0, 9.0),), + ...) + """ # TODO As coordinates if as_coordinates: raise NotImplementedError( "Not implemented for as_coordinates = True", ) # Verify that dimensions are compatible - assert len(others) > 0 + assert len(others) > 0, "No objects to concatenate" self._check_same_dimensions(others[0]) if len(others) > 1: for x, y in zip(others, others[1:]): @@ -855,14 +991,37 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: head += f_data.num_observations total_sample_names = total_sample_names + list(f_data.sample_names) + # Check domain range + domain_range = [list(r) for r in self.domain_range] + for dim in range(self.dim_domain): + dim_max = np.max(function_args, axis=(1, dim)) + dim_min = np.min(function_args, axis=(1, dim)) + + if dim_max > self.domain_range[dim][1]: + domain_range[dim][1] = dim_max + if dim_min < self.domain_range[dim][0]: + domain_range[dim][0] = dim_min + return self.copy( function_indices, function_args, function_values, + domain_range=domain_range, sample_names=total_sample_names, ) def plot(self, *args: Any, **kwargs: Any) -> Figure: + """Plot the functional data of FDataIrregular with a lines plot. + + Args: + args: Positional arguments to be passed to the class + :class:`~skfda.exploratory.visualization.representation.LinearPlotIrregular`. + kwargs: Keyword arguments to be passed to the class + :class:`~skfda.exploratory.visualization.representation.LinearPlotIrregular`. + + Returns: + Figure object in which the graphs are plotted. + """ from ..exploratory.visualization.representation import ( LinearPlotIrregular, ) @@ -870,6 +1029,17 @@ def plot(self, *args: Any, **kwargs: Any) -> Figure: return LinearPlotIrregular(self, *args, **kwargs).plot() def scatter(self, *args: Any, **kwargs: Any) -> Figure: + """Plot the functional data of FDataIrregular with a scatter plot. + + Args: + args: Positional arguments to be passed to the class + :class:`~skfda.exploratory.visualization.representation.ScatterPlotIrregular`. + kwargs: Keyword arguments to be passed to the class + :class:`~skfda.exploratory.visualization.representation.ScatterPlotIrregular`. + + Returns: + Figure object in which the graphs are plotted. + """ from ..exploratory.visualization.representation import ( ScatterPlotIrregular, ) @@ -877,6 +1047,22 @@ def scatter(self, *args: Any, **kwargs: Any) -> Figure: return ScatterPlotIrregular(self, *args, **kwargs).plot() def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: + """Return the basis representation of the object. + + Args: + basis (Basis): basis object in which the functional data are + going to be represented. + kwargs: keyword arguments to be passed to + FDataBasis.from_data(). + + Raises: + ValueError: Incorrect domain dimension + ValueError: Incorrect codomain dimension + + Returns: + FDataBasis: Basis representation of the funtional data + object. + """ from ..preprocessing.smoothing import BasisSmoother if self.dim_domain != basis.dim_domain: @@ -920,10 +1106,14 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: extrapolation=self.extrapolation, ) - def to_matrix(self, **kwargs: Any) -> ArrayLike: - # Convert FDataIrregular to matrix of all points - # with NaN in undefined values + def to_matrix(self) -> ArrayLike: + """Convert FDataIrregular values to numpy matrix. + + Undefined values in the grid will be represented with np.nan. + Returns: + ArrayLike: numpy array with the resulting matrix. + """ # Find the grid points and values for each function index_end = 0 grid_points = [[] for i in range(self.dim_domain)] @@ -993,7 +1183,13 @@ def to_matrix(self, **kwargs: Any) -> ArrayLike: def to_grid( # noqa: D102 self: T, ) -> FDataGrid: + """Convert FDataIrregular to FDataGrid. + + Undefined values in the grid will be represented with np.nan. + Returns: + FDataGrid: FDataGrid with the irregular functional data. + """ data_matrix, grid_points = self.to_matrix() return FDataGrid( @@ -1076,6 +1272,16 @@ def restrict( self: T, domain_range: DomainRangeLike, ) -> T: + """ + Restrict the functions to a new domain range. + + Args: + domain_range: New domain range. + + Returns: + T: Restricted function. + + """ from ..misc.validation import validate_domain_range domain_range = validate_domain_range(domain_range) @@ -1202,7 +1408,19 @@ def compose( *, eval_points: Optional[GridPointsLike] = None, ) -> T: + """Composition of functions. + + Performs the composition of functions. + Args: + fd: FData object to make the composition. Should + have the same number of samples and image dimension equal to 1. + eval_points: Points to perform the evaluation. + + Returns: + Function representing the composition. + + """ # TODO Is this possible with this structure? pass From 2a845c49dd6abac1c4431653dbf4be2e98bb8e25 Mon Sep 17 00:00:00 2001 From: opintosant Date: Sat, 22 Apr 2023 18:43:43 +0200 Subject: [PATCH 048/144] Delete legacy function gmean. --- skfda/representation/irregular.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 08a3f2bd9..88fdf0ff4 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -672,16 +672,6 @@ def cov(self: T) -> T: # TODO Implementation to be decided pass - # TODO remove and remove scipy - def gmean(self: T) -> T: - gmean = scipy.stats.mstats.gmean(self.function_values, axis=0) - return FDataIrregular( - function_indices=np.array([0]), - function_arguments=np.zeros((1, self.dim_domain)), - function_values=np.array([gmean]), - sample_names=("geometric mean",), - ) - def equals(self, other: object) -> bool: """Comparison of FDataIrregular objects.""" other = cast(FDataIrregular, other) From 8a81bd7227db65d6a3b31649c9e505bc8ce229ba Mon Sep 17 00:00:00 2001 From: opintosant Date: Sun, 23 Apr 2023 13:25:23 +0200 Subject: [PATCH 049/144] Remove gmean from testing --- skfda/tests/test_irregular_operations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index 8f9350c55..044b59506 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -153,7 +153,6 @@ def other_2D( "sum", "var", "mean", - "gmean", #"cov", ] From 194cab5a9ea56dc990c3e1b30472da4e23604141 Mon Sep 17 00:00:00 2001 From: opintosant Date: Sun, 23 Apr 2023 13:26:06 +0200 Subject: [PATCH 050/144] Add initial documentation for FDataIrregular --- docs/modules/representation.rst | 22 +++++++++++ skfda/representation/irregular.py | 66 +++++++++++++++---------------- 2 files changed, 55 insertions(+), 33 deletions(-) diff --git a/docs/modules/representation.rst b/docs/modules/representation.rst index 099109b7a..4bb8a5146 100644 --- a/docs/modules/representation.rst +++ b/docs/modules/representation.rst @@ -84,6 +84,28 @@ methods. skfda.representation.basis.Basis + +Irregular representation +------------------------ + +In practice, most functional datasets do not contain functions evaluated +uniformly over a fixed grid. In other words, it is paramount to be able +to represent irregular functional data. + +While the FDataGrid class could support these kind of datasets, it is +inefficient to store a complete grid with low data density. Furthermore, +there are specific methods that can be applied to irregular data in order +to obtain, among other things, a better convesion to basis representation. + +The FDataIrregular class provides the functionality which suits these purposes. + + +.. autosummary:: + :toctree: autosummary + + skfda.representation.irregular.FDataIrregular + + Generic representation ---------------------- diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 88fdf0ff4..8bf6c354b 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -115,12 +115,12 @@ class FDataIrregular(FData): # noqa: WPS214 Attributes: functional_indices: a unidimensional array which stores the index of - the functional_values and functional_values arrays where the data - of each individual curve of the sample begins. + the functional_values and functional_values arrays where the data + of each individual curve of the sample begins. functional_arguments: an array of every argument of the domain for - every curve in the sample. Each row contains an observation. + every curve in the sample. Each row contains an observation. functional_values: an array of every value of the codomain for - every curve in the sample. Each row contains an observation. + every curve in the sample. Each row contains an observation. domain_range: 2 dimension matrix where each row contains the bounds of the interval in which the functional data is considered to exist for each one of the axies. @@ -146,18 +146,18 @@ class FDataIrregular(FData): # noqa: WPS214 >>> values = [[1], [2], [3], [4], [5]] >>> FDataIrregular(indices, arguments, values) FDataIrregular( - function_indices=array([0,2]), + function_indices=array([0, 2]), function_arguments=array([[1], - [2], - [3], - [4], - [5]]), + [2], + [3], + [4], + [5]]), function_values=array([[1], - [2], - [3], - [4], - [5]]), - domain_range=((1.0, 5.0),), + [2], + [3], + [4], + [5]]), + domain_range=((1.0, 5.0),), ...) The number of arguments and values must be the same. @@ -897,7 +897,7 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: T: FDataIrregular object with the samples from the source objects. Examples: - >>> indices = [0,2] + >>> indices = [0, 2] >>> arguments = values = np.arange(5).reshape(-1, 1) >>> fd = FDataIrregular(indices, arguments, values) >>> arguments_2 = values_2 = np.arange(5, 10).reshape(-1, 1) @@ -906,25 +906,25 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: FDataIrregular( function_indices=array([0, 2, 5, 7], dtype=uint32), function_arguments=array([[0.], - [1.], - [2.], - [3.], - [4.], - [5.], - [6.], - [7.], - [8.], - [9.]]), + [1.], + [2.], + [3.], + [4.], + [5.], + [6.], + [7.], + [8.], + [9.]]), function_values=array([[0.], - [1.], - [2.], - [3.], - [4.], - [5.], - [6.], - [7.], - [8.], - [9.]]), + [1.], + [2.], + [3.], + [4.], + [5.], + [6.], + [7.], + [8.], + [9.]]), domain_range=((0.0, 9.0),), ...) """ From f82e7e3d72610d00950b40fa2e72085f59a47fec Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 24 Apr 2023 14:03:23 +0200 Subject: [PATCH 051/144] IrregularBasisSmoother. Fixed error in coordinates function --- skfda/preprocessing/smoothing/__init__.py | 2 +- skfda/preprocessing/smoothing/_basis.py | 131 +++++++++++++++++++++- skfda/representation/irregular.py | 37 +++--- 3 files changed, 152 insertions(+), 18 deletions(-) diff --git a/skfda/preprocessing/smoothing/__init__.py b/skfda/preprocessing/smoothing/__init__.py index 4d3840d1e..bf7f78ae1 100644 --- a/skfda/preprocessing/smoothing/__init__.py +++ b/skfda/preprocessing/smoothing/__init__.py @@ -10,7 +10,7 @@ "validation", ], submod_attrs={ - "_basis": ["BasisSmoother"], + "_basis": ["BasisSmoother", "IrregularBasisSmoother"], "_kernel_smoothers": ["KernelSmoother"], }, ) diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index 687ab16b1..446140a04 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -14,7 +14,7 @@ from ..._utils import _cartesian_product, _to_grid_points from ...misc.lstsq import LstsqMethod, solve_regularized_weighted_lstsq from ...misc.regularization import L2Regularization -from ...representation import FData, FDataBasis, FDataGrid +from ...representation import FData, FDataBasis, FDataGrid, FDataIrregular from ...representation.basis import Basis from ...typing._base import GridPointsLike from ...typing._numpy import NDArrayFloat @@ -334,3 +334,132 @@ def transform( ) return super().transform(X, y) + +class IrregularBasisSmoother(_LinearSmoother): + _required_parameters = ["basis"] + + def __init__( + self, + basis: Basis, + *, + smoothing_parameter: float = 1.0, + weights: Optional[NDArrayFloat] = None, + regularization: Optional[L2Regularization[FDataGrid]] = None, + output_points: Optional[GridPointsLike] = None, + method: LstsqMethod = 'svd', + return_basis: bool = False, + ) -> None: + self.basis = basis + self.smoothing_parameter = smoothing_parameter + self.weights = weights + self.regularization = regularization + self.output_points = output_points + self.method = method + self.return_basis: Final = return_basis + + def _coef_matrix( + self, + input_points: NDArrayFloat, + *, + function_values: Optional[NDArrayFloat] = None, + ) -> NDArrayFloat: + """Get the matrix that gives the coefficients.""" + from ...misc.regularization import compute_penalty_matrix + + basis_values_input = self.basis( + input_points, + ).reshape((self.basis.n_basis, -1)).T + + penalty_matrix = compute_penalty_matrix( + basis_iterable=(self.basis,), + regularization_parameter=self.smoothing_parameter, + regularization=self.regularization, + ) + + # Get the matrix for computing the coefficients if no + # data_matrix is passed + if function_values is None: + function_values = np.eye(basis_values_input.shape[0]) + + return solve_regularized_weighted_lstsq( + coefs=basis_values_input, + result=function_values, + weights=self.weights, + penalty_matrix=penalty_matrix, + lstsq_method=self.method, + ) + + def _hat_matrix( + self, + input_points: GridPointsLike, + output_points: GridPointsLike, + ) -> NDArrayFloat: + raise NotImplementedError( + "Not implemented for as_coordinates = True", + ) + + def fit( + self, + X: FDataIrregular, + y: object = None, + ) -> IrregularBasisSmoother: + """Compute the hat matrix for the desired output points. + + Args: + X: The data whose points are used to compute the matrix. + y: Ignored. + + Returns: + self + + """ + self.input_points_ = X.function_arguments + self.output_points_ = (self.output_points + if self.output_points is not None + else self.input_points_ + ) + + if not self.return_basis: + super().fit(X, y) + + return self + + def transform( + self, + X: FDataIrregular, + y: object = None, + ) -> FData: + """ + Smooth the data. + + Args: + X: The data to smooth. + y: Ignored + + Returns: + Smoothed data. + + """ + assert all( + np.array_equal(i, s) for i, s in zip( + self.input_points_, + X.function_arguments, + ) + ) + + if self.return_basis: + coefficients = self._coef_matrix( + input_points=X.function_arguments, + function_values=X.function_values, + ).T + + return FDataBasis( + basis=self.basis, + coefficients=coefficients, + dataset_name=X.dataset_name, + argument_names=X.argument_names, + coordinate_names=X.coordinate_names, + sample_names=X.sample_names, + ) + + return super().transform(X, y) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 8bf6c354b..f0bd77d16 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -370,7 +370,7 @@ def from_datagrid( as the source but with an irregular structure. """ # Obtain num functions and num observations from data - num_observations = np.sum(~np.isnan(f_data.data_matrix)) + num_observations = np.sum(~(np.isnan(f_data.data_matrix).all(axis=-1))) num_functions = f_data.data_matrix.shape[0] # Create data structure of function pointers and coordinates @@ -382,25 +382,28 @@ def from_datagrid( (num_observations, f_data.dim_codomain), ) + # Find all the combinations of grid points and indices + from itertools import product + grid_point_indexes = [ + np.indices(np.array(gp).shape)[0] + for gp in f_data.grid_points + ] + combinations = list(product(*f_data.grid_points)) + index_combinations = list(product(*grid_point_indexes)) + head = 0 for i in range(num_functions): function_indices[i] = head num_values = 0 - for j in range(f_data.data_matrix.shape[1]): - if np.all(np.isnan(f_data.data_matrix[i, j])): + for g_index, g in enumerate(index_combinations): + if np.all(np.isnan(f_data.data_matrix[(i,) + g])): continue - arg = [ - f_data.grid_points[dim][j] - for dim in range(f_data.dim_domain) - ] - function_arguments[head + num_values, :] = arg + arg = combinations[g_index] + value = f_data.data_matrix[(i, ) + g] - value = [ - f_data.data_matrix[i, j, dim] - for dim in range(f_data.dim_codomain) - ] + function_arguments[head + num_values, :] = arg function_values[head + num_values, :] = value num_values += 1 @@ -1053,7 +1056,7 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: FDataBasis: Basis representation of the funtional data object. """ - from ..preprocessing.smoothing import BasisSmoother + from ..preprocessing.smoothing import IrregularBasisSmoother if self.dim_domain != basis.dim_domain: raise ValueError( @@ -1074,7 +1077,7 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: if not basis.is_domain_range_fixed(): basis = basis.copy(domain_range=self.domain_range) - smoother = BasisSmoother( + smoother = IrregularBasisSmoother( basis=basis, **kwargs, return_basis=True, @@ -1082,7 +1085,7 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: # Only uses the available values for each curve basis_coefficients = [ - smoother.fit_transform(curve.to_grid()).coefficients[0] + smoother.fit_transform(curve).coefficients[0] for curve in self ] @@ -1704,8 +1707,10 @@ def __getitem__( self._fdatairregular.coordinate_names, )[s_key] + coordinate_values = self._fdatairregular.function_values[..., key] + return self._fdatairregular.copy( - function_values=self._fdatairregular.function_values[..., key], + function_values=coordinate_values.reshape(-1, 1), coordinate_names=tuple(coordinate_names), ) From 56aa3212cc5fc6fe87df44ccfad91c5fd801e4bf Mon Sep 17 00:00:00 2001 From: opintosant Date: Tue, 25 Apr 2023 03:50:48 +0200 Subject: [PATCH 052/144] Cleanup to_grid function and adapt it to multidimensional datasets. Add forced sorting of arguments in FDataIrregular __init__ for consistency --- skfda/representation/irregular.py | 108 ++++++++++++++---------------- skfda/tests/test_irregular.py | 67 +++++++++++++++++- 2 files changed, 115 insertions(+), 60 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f0bd77d16..02834a277 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -224,7 +224,11 @@ def __init__( """Construct a FDataIrregular object.""" self.function_indices = np.array(function_indices) self.function_arguments = np.array(function_arguments) + if len(self.function_arguments.shape) == 1: + self.function_arguments = self.function_arguments.reshape(-1, 1) self.function_values = np.array(function_values) + if len(self.function_values.shape) == 1: + self.function_values = self.function_values.reshape(-1, 1) # Set dimensions self._dim_domain = self.function_arguments.shape[1] @@ -241,6 +245,9 @@ def __init__( if max(self.function_indices) >= self.num_observations: raise ValueError("Index in function_indices out of bounds") + + # Ensure arguments are in order within each function + self.function_arguments, self.function_values = self._sort_by_arguments() self._sample_range = _get_sample_range_from_data( self.function_indices, @@ -417,6 +424,26 @@ def from_datagrid( **kwargs, ) + def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: + """Sort the arguments lexicographically functionwise. + + Additionally, sort the values accordingly. + + Returns: + Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) + """ + indices_start_end = np.append(self.function_indices, self.num_observations) + slices = list(zip(indices_start_end, indices_start_end[1:])) + slice_args = [self.function_arguments[slice(*s)] for s in slices] + slice_values = [self.function_values[slice(*s)] for s in slices] + + # Sort lexicographically, first to last dimension + sorting_masks = [np.lexsort(np.flip(f_args, axis=1).T) for f_args in slice_args] + sorted_args = [slice_args[i][mask] for i, mask in enumerate(sorting_masks)] + sorted_values = [slice_values[i][mask] for i, mask in enumerate(sorting_masks)] + + return np.concatenate(sorted_args), np.concatenate(sorted_values) + def round( self, decimals: int = 0, @@ -1107,71 +1134,38 @@ def to_matrix(self) -> ArrayLike: Returns: ArrayLike: numpy array with the resulting matrix. """ - # Find the grid points and values for each function - index_end = 0 - grid_points = [[] for i in range(self.dim_domain)] - evaluated_points = [] - for index_start, index_end in zip( - list(self.function_indices), - list(self.function_indices[1:]), - ): - for dim in range(self.dim_domain): - grid_points[dim].append( - [ - x[dim] - for x in self.function_arguments[index_start:index_end] - ], - ) - - evaluated_points.append( - self.function_values[index_start:index_end], - ) - - # Dont forget to add the last one - for dim in range(self.dim_domain): - grid_points[dim].append( - [x[dim] for x in self.function_arguments[index_end:]], - ) - evaluated_points.append(self.function_values[index_end:]) - - # Aggregate into a complete data matrix - unified_grid_points = [[] for i in range(self.dim_domain)] - from functools import reduce - for dim in range(self.dim_domain): - unified_points = reduce( - lambda x, y: set(y).union(list(x)), - grid_points[dim], - ) - unified_grid_points[dim] = sorted(unified_points) - - # Fill matrix with known values, leave unknown as NA - num_curves = len(grid_points[0]) - num_points = len(unified_grid_points[0]) + # Find the common grid points + grid_points = [ + np.unique(self.function_arguments[:, dim]) + for dim in range(self.dim_domain) + ] unified_matrix = np.empty( ( - num_curves, - *(num_points,) * self.dim_domain, + self.n_samples, + *[len(gp) for gp in grid_points], self.dim_codomain, ), ) unified_matrix.fill(np.nan) - for curve in range(num_curves): - # There must always be one dimension, - # and same size across all domain dimensions - for point, _ in enumerate(grid_points[0][curve]): - for dimension in range(self.dim_codomain): - point_index = [ - unified_grid_points[i].index( - grid_points[i][curve][point], - ) - for i in range(self.dim_domain) - ] - unified_matrix[(curve, *point_index, dimension)] = \ - evaluated_points[curve][point][dimension] - - return unified_matrix, unified_grid_points + #Fill with each function + next_indices = np.append( + self.function_indices, + self.num_observations, + ) + + for i, index in enumerate(self.function_indices): + for j in range(index, next_indices[i + 1]): + arg = self.function_arguments[j] + val = self.function_values[j] + pos = [ + np.where(gp==arg[dim])[0][0] + for dim, gp in enumerate(grid_points) + ] + unified_matrix[(i,) + tuple(pos)] = val + + return unified_matrix, grid_points def to_grid( # noqa: D102 self: T, diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 3574ce9d7..913f47ca4 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -348,8 +348,10 @@ def test_fdatairregular_coordinates( for dim, f_data_coordinate in enumerate(f_data_irreg.coordinates): assert len(f_data_coordinate) == len(f_data_irreg) assert f_data_coordinate.dim_codomain == 1 - assert f_data_coordinate.function_values[:, 0] == \ - f_data_irreg.function_values[:, dim] + assert np.all( + f_data_coordinate.function_values[:, 0] + == f_data_irreg.function_values[:, dim] + ) @pytest.mark.parametrize("decimals", TEST_DECIMALS) @@ -416,4 +418,63 @@ def test_fdatairregular_equals( assert f_data_irreg.equals(f_data_irreg) assert f_data_irreg_multidimensional.equals(f_data_irreg_multidimensional) assert not f_data_irreg.equals(f_data_irreg_multidimensional) - assert f_data_irreg.equals(f_data_irreg.copy()) \ No newline at end of file + assert f_data_irreg.equals(f_data_irreg.copy()) + + +def test_fdatairregular_to_grid( + input_arrays: ArrayLike, + fdatagrid: FDataGrid, +) -> None: + """Test conversion of FDataIrregular to and from FDataGrid. + + Args: + input_arrays (ArrayLike): tuple of three arrays required for + FDataIrregular + indices: Array of pointers to the beginning of the arguments and + values of each curve + arguments: Array of each of the points of the domain + values: Array of each of the coordinates of the codomain + """ + indices, arguments, values = input_arrays + + f_data_irreg = FDataIrregular( + indices, + arguments, + values, + ) + + f_data_grid = f_data_irreg.to_grid() + + # FDataGrid -> FDataIrregular -> FDataGrid + assert fdatagrid.equals(FDataIrregular.from_datagrid(fdatagrid).to_grid()) + # FDataIrregular -> FDataGrid -> FDataIrregular + assert f_data_irreg.equals(FDataIrregular.from_datagrid(f_data_grid)) + +def test_fdatairregular_to_grid_multidimensional( + input_arrays_multidimensional: ArrayLike, + fdatagrid_multidimensional: FDataGrid, +) -> None: + """Test conversion of FDataIrregular to and from FDataGrid. + + Args: + input_arrays_multidimensional (ArrayLike): tuple of three arrays required for + FDataIrregular + indices: Array of pointers to the beginning of the arguments and + values of each curve + arguments: Array of each of the points of the domain + values: Array of each of the coordinates of the codomain + """ + indices, arguments, values = input_arrays_multidimensional + + f_data_irreg = FDataIrregular( + indices, + arguments, + values, + ) + + f_data_grid = f_data_irreg.to_grid() + + # FDataGrid -> FDataIrregular -> FDataGrid + assert fdatagrid_multidimensional.equals(FDataIrregular.from_datagrid(fdatagrid_multidimensional).to_grid()) + # FDataIrregular -> FDataGrid -> FDataIrregular + assert f_data_irreg.equals(FDataIrregular.from_datagrid(f_data_grid)) \ No newline at end of file From 0b103331b4dfa1101dde8613655692cf780346ae Mon Sep 17 00:00:00 2001 From: opintosant Date: Tue, 25 Apr 2023 04:22:33 +0200 Subject: [PATCH 053/144] Add fixed seed to irregular tests --- skfda/tests/test_irregular.py | 4 +++- skfda/tests/test_irregular_operations.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 913f47ca4..d675280bc 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -13,6 +13,8 @@ # FIXTURES ############ +SEED = 2906198114 + NUM_CURVES = 10 DIMENSIONS = 2 TEST_DECIMALS = range(10) @@ -26,7 +28,7 @@ {"coordinate_names": ("test",)}, ] -random_state = np.random.RandomState(seed=14) +random_state = np.random.RandomState(seed=SEED) @pytest.fixture() diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index 044b59506..25c68e6bb 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -12,13 +12,14 @@ ############ # MACROS ############ +SEED = 2906198114 NUM_CURVES = 100 MAX_VALUES_PER_CURVE = 10 DIMENSIONS = 2 N_BASIS = 5 -random_state = np.random.RandomState(seed=14) +random_state = np.random.RandomState(seed=SEED) ############ # FIXTURES From 9cba9e89c5b184660879a1b0b70ad60858530072 Mon Sep 17 00:00:00 2001 From: opintosant Date: Tue, 25 Apr 2023 20:54:07 +0200 Subject: [PATCH 054/144] Reformat test_irregular.py to be more efficient and clean. Added testing for multidimensional FDataGrid conversion --- skfda/tests/test_irregular.py | 305 ++++++++++++---------------------- 1 file changed, 105 insertions(+), 200 deletions(-) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index d675280bc..204ead234 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -1,5 +1,5 @@ """Test the basic methods of the FDataIrregular structure""" -from typing import Tuple +from typing import Any, Tuple from ..typing._numpy import ArrayLike import numpy as np import pandas @@ -22,7 +22,6 @@ {"domain_range": ((0, 10))}, {"dataset_name": "test"}, {"sample_names": ["test"]*NUM_CURVES}, - # TODO Extrapolation {"interpolation": SplineInterpolation(3)}, {"argument_names": ("test",)}, {"coordinate_names": ("test",)}, @@ -38,7 +37,6 @@ def input_arrays( Generate three unidimensional arrays describing a FDataIrregular structure """ - # TODO Make editable with pytest num_values_per_curve = np.array(range(NUM_CURVES)) + 1 values_per_curve = [random_state.rand(num_values, 1) @@ -59,7 +57,6 @@ def input_arrays_multidimensional( Generate three multidimensional arrays describing a FDataIrregular structure """ - # TODO Make editable with pytest num_values_per_curve = np.array(range(NUM_CURVES)) + 1 values_per_curve = [random_state.rand(num_values, DIMENSIONS) @@ -74,36 +71,72 @@ def input_arrays_multidimensional( return indices, values, arguments +@pytest.fixture( + params=[ + "input_arrays", + "input_arrays_multidimensional", + ], +) +def fdatairregular( + request: Any, + input_arrays: FDataIrregular, + input_arrays_multidimensional: FDataIrregular, +) -> FDataIrregular: + """Return 'input_arrays' or 'input_arrays_multidimensional'.""" + if request.param == "input_arrays": + return FDataIrregular(*input_arrays) + elif request.param == "input_arrays_multidimensional": + return FDataIrregular(*input_arrays_multidimensional) + + @pytest.fixture() -def fdatagrid( +def fdatagrid_unidimensional( ) -> FDataGrid: """Generate FDataGrid""" - # TODO Make editable with pytest num_values_per_curve = NUM_CURVES data_matrix = random_state.rand(NUM_CURVES, num_values_per_curve, 1) # Grid points must be sorted grid_points = np.sort(random_state.rand(num_values_per_curve)) - return FDataGrid(data_matrix=data_matrix, - grid_points=grid_points, - ) + return FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, + ) @pytest.fixture() def fdatagrid_multidimensional( ) -> FDataGrid: """Generate multidimensional FDataGrid""" - # TODO Make editable with pytest num_values_per_curve = NUM_CURVES data_matrix = random_state.rand(NUM_CURVES, num_values_per_curve, DIMENSIONS) # Grid points must be sorted grid_points = np.sort(random_state.rand(num_values_per_curve)) - return FDataGrid(data_matrix=data_matrix, - grid_points=grid_points, - ) + return FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, + ) + + +@pytest.fixture( + params=[ + "fdatagrid_unidimensional", + "fdatagrid_multidimensional", + ], +) +def fdatagrid( + request: Any, + fdatagrid_unidimensional: FDataGrid, + fdatagrid_multidimensional: FDataGrid, +) -> FDataIrregular: + """Return 'fdatagrid_unidimensional' or 'fdatagrid_multidimensional'.""" + if request.param == "fdatagrid_unidimensional": + return fdatagrid_unidimensional + elif request.param == "fdatagrid_multidimensional": + return fdatagrid_multidimensional @pytest.fixture() @@ -173,65 +206,52 @@ def test_fdatairregular_from_multidimensional_arrays( def test_fdatairregular_copy( - input_arrays: ArrayLike, + fdatairregular: FDataIrregular, ) -> None: """Test the copy function for FDataIrregular for an exact copy Args: - input_arrays (ArrayLike): tuple of three arrays required for - FDataIrregular - indices: Array of pointers to the beginning of the arguments and - values of each curve - arguments: Array of each of the points of the domain - values: Array of each of the coordinates of the codomain + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. """ - indices, arguments, values = input_arrays - - f_data_irreg = FDataIrregular( - indices, - arguments, - values, - ) - - assert f_data_irreg == f_data_irreg.copy() + assert fdatairregular == fdatairregular.copy() @pytest.mark.parametrize("kwargs", COPY_KWARGS) def test_fdatairregular_copy_kwargs( - input_arrays: ArrayLike, + fdatairregular: FDataIrregular, kwargs: dict, ) -> None: """Test the copy function for FDataIrregular with additional arguments which replace certain parameters of the object Args: - input_arrays (ArrayLike): tuple of three arrays required for - FDataIrregular - indices: Array of pointers to the beginning of the arguments and - values of each curve - arguments: Array of each of the points of the domain - values: Array of each of the coordinates of the codomain + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. kwargs: Dict with the parameters for each iteration of the test """ - indices, arguments, values = input_arrays - - f_data_irreg = FDataIrregular( - indices, - arguments, - values, - ) - - f_data_copy = f_data_irreg.copy(**kwargs) + changed_attribute = next(iter(kwargs)) + local_kwargs = kwargs.copy() + + if changed_attribute == "argument_names": + # Set correct dimensionality + dim = fdatairregular.dim_domain + local_kwargs[changed_attribute] = kwargs[changed_attribute]*dim + if changed_attribute == "coordinate_names": + # Set correct dimensionality + dim = fdatairregular.dim_codomain + local_kwargs[changed_attribute] = kwargs[changed_attribute]*dim + + f_data_copy = fdatairregular.copy(**local_kwargs) # Check everything equal except specified kwarg - assert len(f_data_copy) == len(f_data_irreg) + assert len(f_data_copy) == len(fdatairregular) assert len(f_data_copy.function_arguments) == \ - len(f_data_irreg.function_arguments) - assert f_data_copy.dim_domain == f_data_irreg.dim_domain - assert f_data_copy.dim_domain == f_data_irreg.dim_codomain - changed_attribute = next(iter(kwargs)) + len(fdatairregular.function_arguments) + assert f_data_copy.dim_domain == fdatairregular.dim_domain + assert f_data_copy.dim_domain == fdatairregular.dim_codomain assert getattr(f_data_copy, changed_attribute) != \ - getattr(f_data_irreg, changed_attribute) + getattr(fdatairregular, changed_attribute) def test_fdatairregular_from_fdatagrid( @@ -249,22 +269,6 @@ def test_fdatairregular_from_fdatagrid( assert len(f_data_irreg) == len(fdatagrid) -def test_fdatairregular_from_fdatagrid_multidimensional( - fdatagrid_multidimensional: FDataGrid, -) -> None: - """Tests creating a correct FDataIrregular object from - a multidimensional FDataGrid - - Args: - fdatagrid (FDataGrid): FDataGrid object. Can be dense or sparse - (contain NaNs) - """ - f_data_irreg = FDataIrregular.from_datagrid(fdatagrid_multidimensional) - - assert f_data_irreg is not None - assert len(f_data_irreg) == len(fdatagrid_multidimensional) - - def test_fdatairregular_from_dataframe( dataframe: FDataGrid, ) -> None: @@ -295,188 +299,89 @@ def test_fdatairregular_from_dataframe( def test_fdatairregular_getitem( - input_arrays: ArrayLike, + fdatairregular: FDataIrregular, ) -> None: """Tests using slices to get subsamples of a given FDataIrregular, using the method __getitem__ of the class Args: - input_arrays (ArrayLike): tuple of three arrays required for - FDataIrregular - indices: Array of pointers to the beginning of the arguments and - values of each curve - arguments: Array of each of the points of the domain - values: Array of each of the coordinates of the codomain + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. """ - indices, arguments, values = input_arrays - - f_data_irreg = FDataIrregular( - indices, - arguments, - values, - ) - - assert len(f_data_irreg[0]) == 1 - assert len(f_data_irreg[-1]) == 1 - assert len(f_data_irreg[0:NUM_CURVES]) == NUM_CURVES - assert len(f_data_irreg[0:]) == len(f_data_irreg) - assert len(f_data_irreg[:NUM_CURVES]) == NUM_CURVES - assert len(f_data_irreg[0:NUM_CURVES:2]) == NUM_CURVES/2 - assert len(f_data_irreg[0:NUM_CURVES:2]) == NUM_CURVES/2 + assert len(fdatairregular[0]) == 1 + assert len(fdatairregular[-1]) == 1 + assert len(fdatairregular[0:NUM_CURVES]) == NUM_CURVES + assert len(fdatairregular[0:]) == len(fdatairregular) + assert len(fdatairregular[:NUM_CURVES]) == NUM_CURVES + assert len(fdatairregular[0:NUM_CURVES:2]) == NUM_CURVES/2 + assert len(fdatairregular[0:NUM_CURVES:2]) == NUM_CURVES/2 def test_fdatairregular_coordinates( - input_arrays_multidimensional: ArrayLike, + fdatairregular: FDataIrregular, ) -> None: """Test obtaining the different coordinates for a multidimensional FDataGrid object by using the custom _IrregularCoordinateIterator Args: - input_arrays (ArrayLike): tuple of three arrays required for - FDataIrregular - indices: Array of pointers to the beginning of the arguments and - values of each curve - arguments: Array of each of the points of the domain - values: Array of each of the coordinates of the codomain + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. """ - indices, arguments, values = input_arrays_multidimensional - - f_data_irreg = FDataIrregular( - indices, - arguments, - values, - ) - - for dim, f_data_coordinate in enumerate(f_data_irreg.coordinates): - assert len(f_data_coordinate) == len(f_data_irreg) + for dim, f_data_coordinate in enumerate(fdatairregular.coordinates): + assert len(f_data_coordinate) == len(fdatairregular) assert f_data_coordinate.dim_codomain == 1 assert np.all( f_data_coordinate.function_values[:, 0] - == f_data_irreg.function_values[:, dim] + == fdatairregular.function_values[:, dim] ) @pytest.mark.parametrize("decimals", TEST_DECIMALS) def test_fdatairregular_round( - input_arrays: ArrayLike, + fdatairregular: FDataIrregular, decimals: int, ) -> None: """Test the round function for FDataIrregular Args: - input_arrays (ArrayLike): tuple of three arrays required for - FDataIrregular - indices: Array of pointers to the beginning of the arguments and - values of each curve - arguments: Array of each of the points of the domain - values: Array of each of the coordinates of the codomain + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + decimals (int): Number of decimal places to round. """ - indices, arguments, values = input_arrays - - f_data_irreg = FDataIrregular( - indices, - arguments, - values, - ) - assert np.all( - f_data_irreg.round(decimals).function_values == - np.round(f_data_irreg.function_values, decimals) + fdatairregular.round(decimals).function_values == + np.round(fdatairregular.function_values, decimals) ) def test_fdatairregular_equals( - input_arrays: ArrayLike, - input_arrays_multidimensional: ArrayLike, + fdatairregular: FDataIrregular, ) -> None: """Test for equals method, which in turn uses _eq_elementwise to verify equality in every index, argument and value Args: - input_arrays (ArrayLike): tuple of three arrays required for - FDataIrregular - input_arrays_multidimensional (ArrayLike): tuple of three arrays required for - FDataIrregular, with multiple dimensions - indices: Array of pointers to the beginning of the arguments and - values of each curve - arguments: Array of each of the points of the domain - values: Array of each of the coordinates of the codomain + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. """ - indices, arguments, values = input_arrays_multidimensional + assert fdatairregular.equals(fdatairregular) + assert fdatairregular.equals(fdatairregular.copy()) - f_data_irreg_multidimensional = FDataIrregular( - indices, - arguments, - values, - ) - - indices, arguments, values = input_arrays - - f_data_irreg = FDataIrregular( - indices, - arguments, - values, - ) - - assert f_data_irreg.equals(f_data_irreg) - assert f_data_irreg_multidimensional.equals(f_data_irreg_multidimensional) - assert not f_data_irreg.equals(f_data_irreg_multidimensional) - assert f_data_irreg.equals(f_data_irreg.copy()) - def test_fdatairregular_to_grid( - input_arrays: ArrayLike, + fdatairregular: FDataIrregular, fdatagrid: FDataGrid, ) -> None: """Test conversion of FDataIrregular to and from FDataGrid. Args: - input_arrays (ArrayLike): tuple of three arrays required for - FDataIrregular - indices: Array of pointers to the beginning of the arguments and - values of each curve - arguments: Array of each of the points of the domain - values: Array of each of the coordinates of the codomain + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + fdatagrid (FDataGrid): FDataGrid object. """ - indices, arguments, values = input_arrays - - f_data_irreg = FDataIrregular( - indices, - arguments, - values, - ) - - f_data_grid = f_data_irreg.to_grid() + f_data_grid = fdatairregular.to_grid() # FDataGrid -> FDataIrregular -> FDataGrid assert fdatagrid.equals(FDataIrregular.from_datagrid(fdatagrid).to_grid()) # FDataIrregular -> FDataGrid -> FDataIrregular - assert f_data_irreg.equals(FDataIrregular.from_datagrid(f_data_grid)) - -def test_fdatairregular_to_grid_multidimensional( - input_arrays_multidimensional: ArrayLike, - fdatagrid_multidimensional: FDataGrid, -) -> None: - """Test conversion of FDataIrregular to and from FDataGrid. - - Args: - input_arrays_multidimensional (ArrayLike): tuple of three arrays required for - FDataIrregular - indices: Array of pointers to the beginning of the arguments and - values of each curve - arguments: Array of each of the points of the domain - values: Array of each of the coordinates of the codomain - """ - indices, arguments, values = input_arrays_multidimensional - - f_data_irreg = FDataIrregular( - indices, - arguments, - values, - ) - - f_data_grid = f_data_irreg.to_grid() - - # FDataGrid -> FDataIrregular -> FDataGrid - assert fdatagrid_multidimensional.equals(FDataIrregular.from_datagrid(fdatagrid_multidimensional).to_grid()) - # FDataIrregular -> FDataGrid -> FDataIrregular - assert f_data_irreg.equals(FDataIrregular.from_datagrid(f_data_grid)) \ No newline at end of file + assert fdatairregular.equals(FDataIrregular.from_datagrid(f_data_grid)) \ No newline at end of file From 786f186a4b47afe52a9014ba70c63ed6bd7323a7 Mon Sep 17 00:00:00 2001 From: opintosant Date: Tue, 25 Apr 2023 21:31:16 +0200 Subject: [PATCH 055/144] Fix error in concatenate for multiple domain dimensions. Add test for concatenate --- skfda/representation/irregular.py | 4 ++-- skfda/tests/test_irregular.py | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 02834a277..448bd5df0 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1014,8 +1014,8 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: # Check domain range domain_range = [list(r) for r in self.domain_range] for dim in range(self.dim_domain): - dim_max = np.max(function_args, axis=(1, dim)) - dim_min = np.min(function_args, axis=(1, dim)) + dim_max = np.max(function_args[:, dim]) + dim_min = np.min(function_args[:, dim]) if dim_max > self.domain_range[dim][1]: domain_range[dim][1] = dim_max diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 204ead234..a0f44ff3f 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -353,6 +353,29 @@ def test_fdatairregular_round( np.round(fdatairregular.function_values, decimals) ) + +def test_fdatairregular_concatenate( + fdatairregular: FDataIrregular, +) -> None: + """Test concatenate FDataIrregular objects. + + Args: + input_arrays (ArrayLike): tuple of three arrays required for + FDataIrregular + indices: Array of pointers to the beginning of the arguments and + values of each curve + arguments: Array of each of the points of the domain + values: Array of each of the coordinates of the codomain + """ + fd_concat = fdatairregular.concatenate(fdatairregular) + assert len(fd_concat) == 2*len(fdatairregular) + assert np.all(np.split(fd_concat.function_indices, 2)[0] == fdatairregular.function_indices) + assert np.all(np.split(fd_concat.function_indices, 2)[1] == fdatairregular.function_indices + fdatairregular.num_observations) + assert fd_concat.num_observations == 2*fdatairregular.num_observations + assert np.all(np.split(fd_concat.function_arguments, 2)[0] == fdatairregular.function_arguments) + assert np.all(np.split(fd_concat.function_values, 2)[0] == fdatairregular.function_values) + + def test_fdatairregular_equals( fdatairregular: FDataIrregular, ) -> None: From e2ebf2e51a7080e9f546e68156da8d41073d1922 Mon Sep 17 00:00:00 2001 From: opintosant Date: Wed, 26 Apr 2023 03:24:55 +0200 Subject: [PATCH 056/144] Extend testing of to_basis to include multidimensional datasets (using TensorBasis) --- skfda/tests/test_irregular_operations.py | 38 +++++++++++++++++------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index 25c68e6bb..4e6760354 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -7,7 +7,7 @@ from skfda.datasets._real_datasets import _fetch_loon_data from skfda.representation import FDataIrregular, FDataGrid from skfda.representation.interpolation import SplineInterpolation -from skfda.representation.basis import Basis, FDataBasis, FourierBasis, BSplineBasis +from skfda.representation.basis import Basis, FDataBasis, FourierBasis, BSplineBasis, TensorBasis ############ # MACROS @@ -687,18 +687,34 @@ def test_fdatairregular_numeric_reduction( class TestBasisOperations: """ - Class which encapsulates the testing of numeric reductions - (such as mean, std) for FDataIrregular objects + Class which encapsulates the testing of basis operations + (such as to_basis) for FDataIrregular objects """ - def test_fdatairregular_numeric_reduction( + def test_fdatairregular_basis_operation( self, - fdatairregular1D: FDataIrregular, + fdatairregular: FDataIrregular, all_basis: Basis, all_basis_operations: str, ) -> None: - basis = all_basis( - domain_range=fdatairregular1D.domain_range, - n_basis=N_BASIS - ) - basis_operation = getattr(fdatairregular1D, all_basis_operations)(basis) - assert isinstance(basis_operation, FDataBasis) \ No newline at end of file + # Create Tensor basis for higher dimensions + if fdatairregular.dim_domain == 1: + basis = all_basis( + domain_range=fdatairregular.domain_range, + n_basis=N_BASIS, + ) + else: + basis_by_dim = [ + all_basis( + domain_range=fdatairregular.domain_range[dim: dim + 1], + n_basis=N_BASIS, + ) + for dim in range(fdatairregular.dim_domain) + ] + basis = TensorBasis(basis_by_dim) + + fd_basis_coords = [ + getattr(coordinate, all_basis_operations)(basis) + for coordinate in fdatairregular.coordinates + ] + + assert all([isinstance(fd_basis, FDataBasis) for fd_basis in fd_basis_coords]) \ No newline at end of file From 55bbcb10e0092e0930468745ddd0d4d1b4074240 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 27 Apr 2023 11:52:10 +0200 Subject: [PATCH 057/144] Comply with PEP8 and wemake --- skfda/tests/test_irregular.py | 246 ++++++++++++++++------------------ 1 file changed, 116 insertions(+), 130 deletions(-) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index a0f44ff3f..4a3091d6a 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -1,14 +1,16 @@ -"""Test the basic methods of the FDataIrregular structure""" +"""Test the basic methods of the FDataIrregular structure.""" from typing import Any, Tuple -from ..typing._numpy import ArrayLike + import numpy as np import pandas import pytest from skfda.datasets._real_datasets import _fetch_loon_data -from skfda.representation import FDataIrregular, FDataGrid +from skfda.representation import FDataGrid, FDataIrregular from skfda.representation.interpolation import SplineInterpolation +from ..typing._numpy import ArrayLike + ############ # FIXTURES ############ @@ -18,10 +20,10 @@ NUM_CURVES = 10 DIMENSIONS = 2 TEST_DECIMALS = range(10) -COPY_KWARGS = [ +COPY_KWARGS = [ # noqa: WPS407 {"domain_range": ((0, 10))}, {"dataset_name": "test"}, - {"sample_names": ["test"]*NUM_CURVES}, + {"sample_names": ["test"] * NUM_CURVES}, {"interpolation": SplineInterpolation(3)}, {"argument_names": ("test",)}, {"coordinate_names": ("test",)}, @@ -33,16 +35,19 @@ @pytest.fixture() def input_arrays( ) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: - """ - Generate three unidimensional arrays describing a - FDataIrregular structure - """ + """Create unidimensional arrays describing a FDataIrregular structure.""" num_values_per_curve = np.array(range(NUM_CURVES)) + 1 - values_per_curve = [random_state.rand(num_values, 1) - for num_values in num_values_per_curve] - args_per_curve = [random_state.rand(num_values, 1) - for num_values in num_values_per_curve] + values_per_curve = [ + random_state.rand(num_values, 1) + for num_values in num_values_per_curve + ] + + args_per_curve = [ + random_state.rand(num_values, 1) + for num_values in num_values_per_curve + ] + indices = np.cumsum(num_values_per_curve) - num_values_per_curve values = np.concatenate(values_per_curve) arguments = np.concatenate(args_per_curve) @@ -53,16 +58,18 @@ def input_arrays( @pytest.fixture() def input_arrays_multidimensional( ) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: - """ - Generate three multidimensional arrays - describing a FDataIrregular structure - """ + """Create multidimensional arrays describing a FDataIrregular structure.""" num_values_per_curve = np.array(range(NUM_CURVES)) + 1 - values_per_curve = [random_state.rand(num_values, DIMENSIONS) - for num_values in num_values_per_curve] - args_per_curve = [random_state.rand(num_values, DIMENSIONS) - for num_values in num_values_per_curve] + values_per_curve = [ + random_state.rand(num_values, DIMENSIONS) + for num_values in num_values_per_curve + ] + + args_per_curve = [ + random_state.rand(num_values, DIMENSIONS) + for num_values in num_values_per_curve + ] indices = np.cumsum(num_values_per_curve) - num_values_per_curve values = np.concatenate(values_per_curve) @@ -92,7 +99,7 @@ def fdatairregular( @pytest.fixture() def fdatagrid_unidimensional( ) -> FDataGrid: - """Generate FDataGrid""" + """Generate FDataGrid.""" num_values_per_curve = NUM_CURVES data_matrix = random_state.rand(NUM_CURVES, num_values_per_curve, 1) @@ -108,10 +115,15 @@ def fdatagrid_unidimensional( @pytest.fixture() def fdatagrid_multidimensional( ) -> FDataGrid: - """Generate multidimensional FDataGrid""" + """Generate multidimensional FDataGrid.""" num_values_per_curve = NUM_CURVES - data_matrix = random_state.rand(NUM_CURVES, num_values_per_curve, DIMENSIONS) + data_matrix = random_state.rand( + NUM_CURVES, + num_values_per_curve, + DIMENSIONS, + ) + # Grid points must be sorted grid_points = np.sort(random_state.rand(num_values_per_curve)) @@ -142,77 +154,40 @@ def fdatagrid( @pytest.fixture() def dataframe( ) -> pandas.DataFrame: - """Generate long dataframe for testing""" + """Generate long dataframe for testing.""" raw_dataset = _fetch_loon_data("bone_ext") - data = raw_dataset["bone_ext"] - return data + return raw_dataset["bone_ext"] ############ # TESTS ############ -def test_fdatairregular_from_arrays( - input_arrays: ArrayLike, +def test_fdatairregular_init( + fdatairregular: FDataIrregular, ) -> None: - """Tests creating a correct FDataIrregular object from escriptive arrays - - Args: - input_arrays (ArrayLike): tuple of three arrays required for - FDataIrregular - indices: Array of pointers to the beginning of the arguments and - values of each curve - arguments: Array of each of the points of the domain - values: Array of each of the coordinates of the codomain - """ - indices, arguments, values = input_arrays - - f_data_irreg = FDataIrregular( - indices, - arguments, - values, - ) - - assert f_data_irreg is not None - assert len(f_data_irreg) == len(indices) - assert len(f_data_irreg.function_arguments) == len(arguments) - + """Tests creating a correct FDataIrregular object from arrays. -def test_fdatairregular_from_multidimensional_arrays( - input_arrays_multidimensional: ArrayLike, -) -> None: - """Tests creating a correct FDataIrregular object from escriptive arrays + Test both unidimensional and multidimensional. Args: - input_arrays (ArrayLike): tuple of three arrays required for - FDataIrregular - indices: Array of pointers to the beginning of the arguments and - values of each curve - arguments: Array of each of the points of the domain - values: Array of each of the coordinates of the codomain + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. """ - indices, arguments, values = input_arrays_multidimensional - - f_data_irreg = FDataIrregular( - indices, - arguments, - values, - ) - - assert f_data_irreg is not None - assert len(f_data_irreg) == len(indices) - assert len(f_data_irreg.function_arguments) == len(arguments) + assert fdatairregular is not None + assert len(fdatairregular) == len(fdatairregular) + assert len(fdatairregular.function_arguments) == len(fdatairregular) def test_fdatairregular_copy( fdatairregular: FDataIrregular, ) -> None: - """Test the copy function for FDataIrregular for an exact copy + """Test the copy function for FDataIrregular for an exact copy. Args: fdatairregular (FDataIrregular): FDataIrregular object - which can be unidimensional or multidimensional. + which can be unidimensional or multidimensional. """ assert fdatairregular == fdatairregular.copy() @@ -222,46 +197,49 @@ def test_fdatairregular_copy_kwargs( fdatairregular: FDataIrregular, kwargs: dict, ) -> None: - """Test the copy function for FDataIrregular with additional arguments - which replace certain parameters of the object + """Test the copy function for FDataIrregular. + + Test with additional keyword arguments which replace + certain parameters of the object. Args: fdatairregular (FDataIrregular): FDataIrregular object - which can be unidimensional or multidimensional. + which can be unidimensional or multidimensional. kwargs: Dict with the parameters for each iteration of the test """ changed_attribute = next(iter(kwargs)) local_kwargs = kwargs.copy() - + if changed_attribute == "argument_names": # Set correct dimensionality dim = fdatairregular.dim_domain - local_kwargs[changed_attribute] = kwargs[changed_attribute]*dim + local_kwargs[changed_attribute] = kwargs[changed_attribute] * dim if changed_attribute == "coordinate_names": # Set correct dimensionality dim = fdatairregular.dim_codomain - local_kwargs[changed_attribute] = kwargs[changed_attribute]*dim - + local_kwargs[changed_attribute] = kwargs[changed_attribute] * dim + f_data_copy = fdatairregular.copy(**local_kwargs) + og_attribute = getattr(fdatairregular, changed_attribute) + copy_attribute = getattr(f_data_copy, changed_attribute) + # Check everything equal except specified kwarg assert len(f_data_copy) == len(fdatairregular) - assert len(f_data_copy.function_arguments) == \ - len(fdatairregular.function_arguments) + assert f_data_copy.num_observations == fdatairregular.num_observations assert f_data_copy.dim_domain == fdatairregular.dim_domain assert f_data_copy.dim_domain == fdatairregular.dim_codomain - assert getattr(f_data_copy, changed_attribute) != \ - getattr(fdatairregular, changed_attribute) + assert og_attribute != copy_attribute def test_fdatairregular_from_fdatagrid( fdatagrid: FDataGrid, ) -> None: - """Tests creating a correct FDataIrregular object from FDataGrid + """Tests creating a correct FDataIrregular object from FDataGrid. Args: fdatagrid (FDataGrid): FDataGrid object. Can be dense or sparse - (contain NaNs) + (contain NaNs) """ f_data_irreg = FDataIrregular.from_datagrid(fdatagrid) @@ -270,16 +248,14 @@ def test_fdatairregular_from_fdatagrid( def test_fdatairregular_from_dataframe( - dataframe: FDataGrid, + dataframe: pandas.DataFrame, ) -> None: - """Tests creating a correct FDataIrregular object from - a multidimensional FDataGrid + """Test creating FDataIrregular from pandas DataFrame. Args: - fdatagrid (FDataGrid): FDataGrid object. Can be dense or sparse - (contain NaNs) + dataframe (pandas:DataFrame): DataFrame object. + It should be in 'long' format. """ - curve_name = "idnum" argument_name = "age" coordinate_name = "spnbmd" @@ -291,7 +267,7 @@ def test_fdatairregular_from_dataframe( coordinate_columns=coordinate_name, argument_names=[argument_name], coordinate_names=[coordinate_name], - dataset_name="bone_ext" + dataset_name="bone_ext", ) assert len(f_irreg) == 423 @@ -301,38 +277,44 @@ def test_fdatairregular_from_dataframe( def test_fdatairregular_getitem( fdatairregular: FDataIrregular, ) -> None: - """Tests using slices to get subsamples of a given FDataIrregular, - using the method __getitem__ of the class + """Tests the getitem method of FDataIrregular. + + Use slices to get subsamples of a given FDataIrregular, + using the method __getitem__ of the class, and then + verify the length of the result is correct. Args: fdatairregular (FDataIrregular): FDataIrregular object - which can be unidimensional or multidimensional. + which can be unidimensional or multidimensional. """ - assert len(fdatairregular[0]) == 1 - assert len(fdatairregular[-1]) == 1 - assert len(fdatairregular[0:NUM_CURVES]) == NUM_CURVES - assert len(fdatairregular[0:]) == len(fdatairregular) + assert len(fdatairregular[0]) == len(fdatairregular[-1]) == 1 + assert len(fdatairregular[:]) == len(fdatairregular) assert len(fdatairregular[:NUM_CURVES]) == NUM_CURVES - assert len(fdatairregular[0:NUM_CURVES:2]) == NUM_CURVES/2 - assert len(fdatairregular[0:NUM_CURVES:2]) == NUM_CURVES/2 + assert len(fdatairregular[:NUM_CURVES:2]) == NUM_CURVES / 2 + assert len(fdatairregular[:NUM_CURVES:2]) == NUM_CURVES / 2 def test_fdatairregular_coordinates( fdatairregular: FDataIrregular, ) -> None: - """Test obtaining the different coordinates for a multidimensional - FDataGrid object by using the custom _IrregularCoordinateIterator + """Test the coordinates function. + + First obtain the different coordinates for a multidimensional + FDataGrid object by using the custom _IrregularCoordinateIterator. + + Then check that the coordinates are equal elementwise to the + original. Args: fdatairregular (FDataIrregular): FDataIrregular object - which can be unidimensional or multidimensional. + which can be unidimensional or multidimensional. """ for dim, f_data_coordinate in enumerate(fdatairregular.coordinates): assert len(f_data_coordinate) == len(fdatairregular) assert f_data_coordinate.dim_codomain == 1 assert np.all( f_data_coordinate.function_values[:, 0] - == fdatairregular.function_values[:, dim] + == fdatairregular.function_values[:, dim], ) @@ -341,17 +323,17 @@ def test_fdatairregular_round( fdatairregular: FDataIrregular, decimals: int, ) -> None: - """Test the round function for FDataIrregular + """Test the round function for FDataIrregular. Args: fdatairregular (FDataIrregular): FDataIrregular object - which can be unidimensional or multidimensional. + which can be unidimensional or multidimensional. decimals (int): Number of decimal places to round. """ assert np.all( - fdatairregular.round(decimals).function_values == - np.round(fdatairregular.function_values, decimals) - ) + fdatairregular.round(decimals).function_values + == np.round(fdatairregular.function_values, decimals), + ) def test_fdatairregular_concatenate( @@ -360,31 +342,36 @@ def test_fdatairregular_concatenate( """Test concatenate FDataIrregular objects. Args: - input_arrays (ArrayLike): tuple of three arrays required for - FDataIrregular - indices: Array of pointers to the beginning of the arguments and - values of each curve - arguments: Array of each of the points of the domain - values: Array of each of the coordinates of the codomain + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. """ fd_concat = fdatairregular.concatenate(fdatairregular) - assert len(fd_concat) == 2*len(fdatairregular) - assert np.all(np.split(fd_concat.function_indices, 2)[0] == fdatairregular.function_indices) - assert np.all(np.split(fd_concat.function_indices, 2)[1] == fdatairregular.function_indices + fdatairregular.num_observations) - assert fd_concat.num_observations == 2*fdatairregular.num_observations - assert np.all(np.split(fd_concat.function_arguments, 2)[0] == fdatairregular.function_arguments) - assert np.all(np.split(fd_concat.function_values, 2)[0] == fdatairregular.function_values) + + function_indices_halves = np.split(fd_concat.function_indices, 2) + indices = fdatairregular.function_indices + second_half_indices = indices + fdatairregular.num_observations + + function_args_halves = np.split(fd_concat.function_arguments, 2) + function_values_halves = np.split(fd_concat.function_values, 2) + + assert len(fd_concat) == 2 * len(fdatairregular) + assert np.all(function_indices_halves[1] == second_half_indices) + assert fd_concat.num_observations == 2 * fdatairregular.num_observations + assert np.all(function_args_halves[1] == fdatairregular.function_arguments) + assert np.all(function_values_halves[1] == fdatairregular.function_values) def test_fdatairregular_equals( fdatairregular: FDataIrregular, ) -> None: - """Test for equals method, which in turn uses _eq_elementwise - to verify equality in every index, argument and value + """Test for equals method. + + It uses _eq_elementwise to verify equality in every + index, argument and value. Args: fdatairregular (FDataIrregular): FDataIrregular object - which can be unidimensional or multidimensional. + which can be unidimensional or multidimensional. """ assert fdatairregular.equals(fdatairregular) assert fdatairregular.equals(fdatairregular.copy()) @@ -397,9 +384,8 @@ def test_fdatairregular_to_grid( """Test conversion of FDataIrregular to and from FDataGrid. Args: - Args: fdatairregular (FDataIrregular): FDataIrregular object - which can be unidimensional or multidimensional. + which can be unidimensional or multidimensional. fdatagrid (FDataGrid): FDataGrid object. """ f_data_grid = fdatairregular.to_grid() @@ -407,4 +393,4 @@ def test_fdatairregular_to_grid( # FDataGrid -> FDataIrregular -> FDataGrid assert fdatagrid.equals(FDataIrregular.from_datagrid(fdatagrid).to_grid()) # FDataIrregular -> FDataGrid -> FDataIrregular - assert fdatairregular.equals(FDataIrregular.from_datagrid(f_data_grid)) \ No newline at end of file + assert fdatairregular.equals(FDataIrregular.from_datagrid(f_data_grid)) From 2c3d6c746e6bc4d279f34d484c7da5bf868da7d1 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 27 Apr 2023 12:11:13 +0200 Subject: [PATCH 058/144] Fix incorrect assertions in init test. --- skfda/tests/test_irregular.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 4a3091d6a..267a95fa0 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -175,9 +175,10 @@ def test_fdatairregular_init( fdatairregular (FDataIrregular): FDataIrregular object which can be unidimensional or multidimensional. """ + arguments = fdatairregular.function_arguments assert fdatairregular is not None - assert len(fdatairregular) == len(fdatairregular) - assert len(fdatairregular.function_arguments) == len(fdatairregular) + assert len(fdatairregular) == len(fdatairregular.function_indices) + assert len(arguments) == fdatairregular.num_observations def test_fdatairregular_copy( From 3f5890879f62584447bf9e67deda21afadf8b68a Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 27 Apr 2023 12:52:06 +0200 Subject: [PATCH 059/144] Fix incorrect implementation of fetch_bone_density with argument as_frame. PEP8 compliant. --- skfda/datasets/_real_datasets.py | 30 +++++++++++++++++++----------- skfda/representation/irregular.py | 12 ++++++------ 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index e71647159..ddeaa859f 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -1555,12 +1555,22 @@ def _fetch_loon_data(name: str) -> Any: _bone_density_descr = """ - The Bone Density dataset is a study of bone density + The Bone Density dataset is a study of bone density in boys and girls aged 8-17. It contains data from 423 individuals, measured irregularly in different times, with an average of ~3 points per individual. References: + https://cran.r-project.org/package=loon.data + Laura K. Bachrach, Trevor Hastie, May-Choo Wang, + Balasubramanian Narasimhan, and Robert Marcus (1999) + "Bone Mineral Acquisition in Healthy Asian, Hispanic, Black + and Caucasian Youth. A Longitudinal Study", + J Clin Endocrinol Metab, 84, 4702-12. + Trevor Hastie, Robert Tibshirani, and Jerome Friedman (2009) + "The Elements of Statistical Learning", + 2nd Edition, Springer New York + """ @@ -1573,7 +1583,6 @@ def fetch_bone_density( The data is obtained from the R package 'loon.data', which compiles several irregular datasets. Sources to be determined. - """ descr = _bone_density_descr frame = None @@ -1594,22 +1603,21 @@ def fetch_bone_density( coordinate_columns=coordinate_name, argument_names=[argument_name], coordinate_names=[coordinate_name], - dataset_name="bone_ext" + dataset_name="bone_ext", ) - + target = pd.Series( data.drop_duplicates(subset=["idnum"])[target_name], name="group", ) - + feature_name = curves.dataset_name.lower() target_names = target.values.tolist() - + if as_frame: - #TODO Fix dtype problems - #curves = pd.DataFrame({feature_name: curves}) - curves = pd.DataFrame({feature_name: curves.to_grid()}) - frame = pd.concat([curves, target], axis=1) + curves = pd.DataFrame({feature_name: curves}) + target_as_frame = target.reset_index(drop=True).to_frame() + frame = pd.concat([curves, target_as_frame], axis=1) else: target = target.values.codes @@ -1624,4 +1632,4 @@ def fetch_bone_density( feature_names=[argument_name], target_names=target_names, DESCR=descr, - ) \ No newline at end of file + ) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 448bd5df0..edd31f2e4 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1543,18 +1543,18 @@ def _take_allow_fill( fill_value: T, ) -> T: result = self.copy() - result.data_matrix = np.full( - (len(indices),) + self.data_matrix.shape[1:], + result.function_values = np.full( + (len(indices),) + self.function_values.shape[1:], np.nan, ) positive_mask = indices >= 0 - result.data_matrix[positive_mask] = self.data_matrix[ + result.function_values[positive_mask] = self.function_values[ indices[positive_mask] ] if fill_value is not self.dtype.na_value: - result.data_matrix[~positive_mask] = fill_value.data_matrix[0] + result.function_values[~positive_mask] = fill_value.function_values[0] return result @@ -1673,8 +1673,8 @@ def __eq__(self, other: Any) -> bool: def __hash__(self) -> int: return hash( ( - self.function_indices, - self.function_arguments, + str(self.function_indices), + str(self.function_arguments), self.domain_range, self.dim_codomain, ), From 0ca3f49abb45c819edf920be155549306d6cad9d Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 27 Apr 2023 13:16:50 +0200 Subject: [PATCH 060/144] Make IrregularScatterPlot and IrregularPlot, as well as class PlotIrregular, compliant with PEP 8 and wemake --- .../visualization/representation.py | 49 +++++++++---------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 2af74d21a..e59efb284 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -550,7 +550,7 @@ def _plot( _set_labels(self.fdata, fig, axes, self.patches) -class PlotIrregular(BasePlot): +class PlotIrregular(BasePlot): # noqa: WPS230 """ Class used to plot a FDataIrregular object. @@ -598,7 +598,7 @@ class PlotIrregular(BasePlot): matplotlib.pyplot.plot_surface function. """ - def __init__( + def __init__( # noqa: WPS211 self, fdata: FDataIrregular, chart: Figure | Axes | None = None, @@ -627,15 +627,17 @@ def __init__( # There may be different points for each function self.grid_points = [] self.evaluated_points = [] - for index_start, index_end in zip(list(self.fdata.function_indices), - list(self.fdata.function_indices[1:])): + indices = np.append(self.fdata.function_indices, self.fdata.n_samples) + for index_start, index_end in zip( + indices, + indices[1:], + ): self.grid_points.append( - self.fdata.function_arguments[index_start:index_end]) + self.fdata.function_arguments[index_start:index_end], + ) self.evaluated_points.append( - self.fdata.function_values[index_start:index_end]) - # Dont forget to add the last one - self.grid_points.append(self.fdata.function_arguments[index_end:]) - self.evaluated_points.append(self.fdata.function_values[index_end:]) + self.fdata.function_values[index_start:index_end], + ) self.domain_range = domain_range self.group = group @@ -678,13 +680,14 @@ def _plot( axes: Sequence[Axes], ) -> None: # Implement in subclasses - pass - - + pass + + class LinearPlotIrregular(PlotIrregular): """ - Class used to plot the individual curves of a FDataIrregular object - using linear interpolation between the points. + Class used to plot the individual curves of a FDataIrregular object. + + It uses linear interpolation between the points of each curve. """ def _plot( @@ -698,10 +701,8 @@ def _plot( Returns: fig: figure object in which the graphs are plotted. """ - self.artists = np.zeros( - (self.n_samples, self.fdata.dim_codomain), - dtype=Artist, - ) + artists_shape = (self.n_samples, self.fdata.dim_codomain) + self.artists = np.zeros(artists_shape, dtype=Artist) color_dict: Dict[str, ColorLike | None] = {} @@ -725,11 +726,9 @@ def _plot( _set_labels(self.fdata, fig, axes, self.patches) - + class ScatterPlotIrregular(PlotIrregular): - """ - Class used to scatter a FDataIrregular object. - """ + """Class used to scatter plot a FDataIrregular object.""" def _plot( self, @@ -742,10 +741,8 @@ def _plot( Returns: fig: figure object in which the graphs are plotted. """ - self.artists = np.zeros( - (self.n_samples, self.fdata.dim_codomain), - dtype=Artist, - ) + artists_shape = (self.n_samples, self.fdata.dim_codomain) + self.artists = np.zeros(artists_shape, dtype=Artist) color_dict: Dict[str, ColorLike | None] = {} From 29da53e6df18c856d69fa923e088a278f091d96f Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 27 Apr 2023 13:50:53 +0200 Subject: [PATCH 061/144] Test implementation of to_basis --- skfda/tests/test_irregular_operations.py | 112 ++++++++++++++++++++++- 1 file changed, 111 insertions(+), 1 deletion(-) diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index 4e6760354..17b47a92e 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -18,6 +18,7 @@ MAX_VALUES_PER_CURVE = 10 DIMENSIONS = 2 N_BASIS = 5 +DECIMALS = 4 random_state = np.random.RandomState(seed=SEED) @@ -72,6 +73,58 @@ def input_arrays_2D( return indices, values, arguments +@pytest.fixture() +def fdatagrid1D( +) -> FDataGrid: + """Generate FDataGrid""" + num_values_per_curve = NUM_CURVES + + data_matrix = random_state.rand(NUM_CURVES, num_values_per_curve, 1) + # Grid points must be sorted + grid_points = np.sort(random_state.rand(num_values_per_curve)) + + return FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, + ) + +@pytest.fixture() +def fdatagrid2D( +) -> FDataGrid: + """Generate multidimensional FDataGrid.""" + num_values_per_curve = NUM_CURVES + + data_matrix = random_state.rand( + NUM_CURVES, + num_values_per_curve, + DIMENSIONS, + ) + + # Grid points must be sorted + grid_points = np.sort(random_state.rand(num_values_per_curve)) + + return FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, + ) + +@pytest.fixture( + params=[ + "fdatagrid1D", + "fdatagrid2D", + ], +) +def fdatagrid( + request: Any, + fdatagrid1D: FDataGrid, + fdatagrid2D: FDataGrid, +) -> FDataIrregular: + """Return 'fdatagrid1D' or 'fdatagrid2D'.""" + if request.param == "fdatagrid1D": + return fdatagrid1D + elif request.param == "fdatagrid2D": + return fdatagrid2D + @pytest.fixture(params=["single_curve", "multiple_curves"]) def fdatairregular1D( request: Any, @@ -717,4 +770,61 @@ def test_fdatairregular_basis_operation( for coordinate in fdatairregular.coordinates ] - assert all([isinstance(fd_basis, FDataBasis) for fd_basis in fd_basis_coords]) \ No newline at end of file + assert all([isinstance(fd_basis, FDataBasis) for fd_basis in fd_basis_coords]) + + +def test_fdatairregular_to_basis_consistency( + fdatagrid: FDataIrregular, + all_basis : Basis, +) -> None: + """Test that irregular to_basis is consistent with + analogous FDataGrid to_basis in 1D. + + Args: + fdatairregular1D (FDataIrregular): FDataIrregular + object with dimensions (1,1). + all_basis (Basis): FDataBasis object. + """ + fd_irregular = FDataIrregular.from_datagrid(fdatagrid) + + if fd_irregular.dim_domain == 1: + basis = all_basis( + domain_range=fd_irregular.domain_range, + n_basis=N_BASIS, + ) + else: + basis_by_dim = [ + all_basis( + domain_range=fd_irregular.domain_range[dim: dim + 1], + n_basis=N_BASIS, + ) + for dim in range(fd_irregular.dim_domain) + ] + basis = TensorBasis(basis_by_dim) + + irregular_basis = [ + coord.to_basis(basis) + for coord in fd_irregular.coordinates + ] + + grid_basis = [ + coord.to_basis(basis) + for coord in fdatagrid.coordinates + ] + + irregular_coefs = [ + b.coefficients.round(DECIMALS) + for b in irregular_basis + ] + + grid_coefs = [ + b.coefficients.round(DECIMALS) + for b in grid_basis + ] + + assert all( + [ + np.all(irregular_coefs[i] == g_coef) + for i, g_coef in enumerate(grid_coefs) + ], + ) \ No newline at end of file From 95d86bcf58fd07b13206240513065eb8ce40a8c1 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 27 Apr 2023 14:45:16 +0200 Subject: [PATCH 062/144] Make test_irregular_operations cleaner and comply with PEP8 and wemake --- skfda/tests/test_irregular_operations.py | 721 ++++++++++++----------- 1 file changed, 369 insertions(+), 352 deletions(-) diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index 17b47a92e..cf1dc66b9 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -1,13 +1,19 @@ -"""Test the basic methods of the FDataIrregular structure""" -from ..typing._numpy import ArrayLike, Any -from typing import Tuple, Optional +"""Test the operations of the FDataIrregular structure.""" +from typing import Optional, Tuple + import numpy as np import pytest -from skfda.datasets._real_datasets import _fetch_loon_data -from skfda.representation import FDataIrregular, FDataGrid -from skfda.representation.interpolation import SplineInterpolation -from skfda.representation.basis import Basis, FDataBasis, FourierBasis, BSplineBasis, TensorBasis +from skfda.representation import FDataGrid, FDataIrregular +from skfda.representation.basis import ( + Basis, + BSplineBasis, + FDataBasis, + FourierBasis, + TensorBasis, +) + +from ..typing._numpy import Any, ArrayLike ############ # MACROS @@ -26,22 +32,30 @@ # FIXTURES ############ + @pytest.fixture() def input_arrays( num_curves: Optional[int] = NUM_CURVES, max_values_per_curve: Optional[int] = MAX_VALUES_PER_CURVE, - dimensions: Optional[int] = 1 + dimensions: Optional[int] = 1, ) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: - """ + """Create undiimensional arrays for FDataIrregular. + Generate three unidimensional arrays describing a FDataIrregular structure with fixed sizes given by the parameters """ - num_values_per_curve = max_values_per_curve*np.ones(num_curves).astype(int) - values_per_curve = [random_state.rand(num_values, dimensions) - for num_values in num_values_per_curve] - args_per_curve = [random_state.rand(num_values, dimensions) - for num_values in num_values_per_curve] + num_values_per_curve = max_values_per_curve * np.ones(num_curves) + num_values_per_curve = num_values_per_curve.astype(int) + + values_per_curve = [ + random_state.rand(num_values, dimensions) + for num_values in num_values_per_curve + ] + args_per_curve = [ + random_state.rand(num_values, dimensions) + for num_values in num_values_per_curve + ] indices = np.cumsum(num_values_per_curve) - num_values_per_curve values = np.concatenate(values_per_curve) @@ -51,21 +65,28 @@ def input_arrays( @pytest.fixture() -def input_arrays_2D( +def input_arrays_2d( num_curves: Optional[int] = NUM_CURVES, max_values_per_curve: Optional[int] = MAX_VALUES_PER_CURVE, - dimensions: Optional[int] = DIMENSIONS + dimensions: Optional[int] = DIMENSIONS, ) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: - """ + """Create multidimensional arrays for FDataIrregular. + Generate three unidimensional arrays describing a FDataIrregular structure with fixed sizes given by the parameters """ - num_values_per_curve = max_values_per_curve*np.ones(num_curves).astype(int) - values_per_curve = [random_state.rand(num_values, dimensions) - for num_values in num_values_per_curve] - args_per_curve = [random_state.rand(num_values, dimensions) - for num_values in num_values_per_curve] + num_values_per_curve = max_values_per_curve * np.ones(num_curves) + num_values_per_curve = num_values_per_curve.astype(int) + + values_per_curve = [ + random_state.rand(num_values, dimensions) + for num_values in num_values_per_curve + ] + args_per_curve = [ + random_state.rand(num_values, dimensions) + for num_values in num_values_per_curve + ] indices = np.cumsum(num_values_per_curve) - num_values_per_curve values = np.concatenate(values_per_curve) @@ -73,10 +94,11 @@ def input_arrays_2D( return indices, values, arguments + @pytest.fixture() -def fdatagrid1D( +def fdatagrid_1d( ) -> FDataGrid: - """Generate FDataGrid""" + """Generate FDataGrid.""" num_values_per_curve = NUM_CURVES data_matrix = random_state.rand(NUM_CURVES, num_values_per_curve, 1) @@ -88,8 +110,9 @@ def fdatagrid1D( grid_points=grid_points, ) + @pytest.fixture() -def fdatagrid2D( +def fdatagrid_2d( ) -> FDataGrid: """Generate multidimensional FDataGrid.""" num_values_per_curve = NUM_CURVES @@ -108,25 +131,27 @@ def fdatagrid2D( grid_points=grid_points, ) + @pytest.fixture( params=[ - "fdatagrid1D", - "fdatagrid2D", + "fdatagrid_1d", + "fdatagrid_2d", ], ) def fdatagrid( request: Any, - fdatagrid1D: FDataGrid, - fdatagrid2D: FDataGrid, + fdatagrid_1d: FDataGrid, + fdatagrid_2d: FDataGrid, ) -> FDataIrregular: - """Return 'fdatagrid1D' or 'fdatagrid2D'.""" - if request.param == "fdatagrid1D": - return fdatagrid1D - elif request.param == "fdatagrid2D": - return fdatagrid2D + """Return 'fdatagrid_1d' or 'fdatagrid_2d'.""" + if request.param == "fdatagrid_1d": + return fdatagrid_1d + elif request.param == "fdatagrid_2d": + return fdatagrid_2d + @pytest.fixture(params=["single_curve", "multiple_curves"]) -def fdatairregular1D( +def fdatairregular_1d( request: Any, input_arrays: Tuple[ArrayLike, ArrayLike, ArrayLike], ) -> FDataIrregular: @@ -137,117 +162,120 @@ def fdatairregular1D( function_arguments=arguments, function_values=values, ) - + if request.param == "single_curve": return f_data_irreg[0] elif request.param == "multiple_curves": return f_data_irreg - + + @pytest.fixture(params=["single_curve", "multiple_curves"]) -def fdatairregular2D( +def fdatairregular_2d( request: Any, - input_arrays_2D: Tuple[ArrayLike, ArrayLike, ArrayLike], + input_arrays_2d: Tuple[ArrayLike, ArrayLike, ArrayLike], ) -> FDataIrregular: """Return FDataIrregular with only 1 curve or NUM_CURVES as requested.""" - indices, arguments, values = input_arrays_2D + indices, arguments, values = input_arrays_2d f_data_irreg = FDataIrregular( function_indices=indices, function_arguments=arguments, function_values=values, ) - + if request.param == "single_curve": return f_data_irreg[0] elif request.param == "multiple_curves": return f_data_irreg -@pytest.fixture(params=["fdatairregular1D", "fdatairregular2D"]) + +@pytest.fixture(params=["fdatairregular_1d", "fdatairregular_2d"]) def fdatairregular( request: Any, - fdatairregular1D: FDataIrregular, - fdatairregular2D: FDataIrregular, + fdatairregular_1d: FDataIrregular, + fdatairregular_2d: FDataIrregular, ) -> FDataIrregular: - """Return 'fdatairregular1D' or 'fdatairregular2D'.""" - if request.param == "fdatairregular1D": - return fdatairregular1D - elif request.param == "fdatairregular2D": - return fdatairregular2D + """Return 'fdatairregular_1d' or 'fdatairregular_2d'.""" + if request.param == "fdatairregular_1d": + return fdatairregular_1d + elif request.param == "fdatairregular_2d": + return fdatairregular_2d + @pytest.fixture(params=["scalar", "vector", "matrix", "fdatairregular"]) -def other_1D( +def other_1d( request: Any, - fdatairregular1D: FDataIrregular, + fdatairregular_1d: FDataIrregular, ) -> FDataIrregular: """Return an operator for testing FDataIrregular operations.""" if request.param == "scalar": return 2 elif request.param == "vector": - return 2*np.ones(NUM_CURVES) + return 2 * np.ones(NUM_CURVES) elif request.param == "matrix": - return 2*np.ones((NUM_CURVES, 1)) + return 2 * np.ones((NUM_CURVES, 1)) elif request.param == "fdatairregular": - return fdatairregular1D - + return fdatairregular_1d + + @pytest.fixture(params=["scalar", "vector", "matrix", "fdatairregular"]) -def other_2D( +def other_2d( request: Any, - fdatairregular2D: FDataIrregular, + fdatairregular_2d: FDataIrregular, ) -> FDataIrregular: """Return an operator for testing FDataIrregular operations.""" if request.param == "scalar": return 2 elif request.param == "vector": - return 2*np.ones(NUM_CURVES) + return 2 * np.ones(NUM_CURVES) elif request.param == "matrix": - return 2*np.ones((NUM_CURVES, DIMENSIONS)) + return 2 * np.ones((NUM_CURVES, DIMENSIONS)) elif request.param == "fdatairregular": - return fdatairregular2D - + return fdatairregular_2d + + _all_numeric_reductions = [ "sum", "var", "mean", - #"cov", + # "cov", ] + @pytest.fixture(params=_all_numeric_reductions) def all_numeric_reductions(request: Any) -> Any: - """ - Fixture for numeric reduction names. - """ + """Fixture for numeric reduction names.""" return request.param + _all_basis_operations = [ "to_basis", ] + @pytest.fixture(params=_all_basis_operations) def all_basis_operations(request: Any) -> Any: - """ - Fixture for basis operation names. - """ + """Fixture for basis operation names.""" return request.param + _all_basis = [ FourierBasis, BSplineBasis, ] + @pytest.fixture(params=_all_basis) def all_basis(request: Any) -> Any: - """ - Fixture for basis names. - """ + """Fixture for basis names.""" return request.param ################## # TEST OPERATIONS ################## + + class TestArithmeticOperations1D: - """ - Class which encapsulates the testing of basic arithmetic operations - for unidimensional FDataIrregular - """ + """Class for testing basic operations for unidimensional FDataIrregular.""" def _take_first( self, @@ -259,231 +287,211 @@ def _take_first( return other.function_values return other + def _single_curve( + self, + fdatairregular_1d, + other_1d, + ) -> np.ndarray: + if isinstance(other_1d, (np.ndarray, FDataIrregular)): + if len(fdatairregular_1d) == 1: + return other_1d[:1] + return other_1d + def test_fdatairregular_arithmetic_sum( self, - fdatairregular1D: FDataIrregular, - other_1D: Any, + fdatairregular_1d: FDataIrregular, + other_1d: Any, ) -> None: - """Tests the basic arithmetic operation fdatairregular + other + """Tests the basic arithmetic operation fdatairregular + other. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): - if len(fdatairregular1D) == 1: - other_1D = other_1D[0] + other_1d = self._single_curve(fdatairregular_1d, other_1d) - f_data_sum = fdatairregular1D + other_1D + f_data_sum = fdatairregular_1d + other_1d - assert np.all( - f_data_sum.function_values == - fdatairregular1D.function_values + self._take_first(other_1D) - ) + result = fdatairregular_1d.function_values + self._take_first(other_1d) + + assert np.all(f_data_sum.function_values == result) def test_fdatairregular_arithmetic_rsum( self, - fdatairregular1D: FDataIrregular, - other_1D: Any, + fdatairregular_1d: FDataIrregular, + other_1d: Any, ) -> None: - """Tests the basic arithmetic operation other + fdatairregular + """Tests the basic arithmetic operation other + fdatairregular. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): - if len(fdatairregular1D) == 1: - other_1D = other_1D[0] + other_1d = self._single_curve(fdatairregular_1d, other_1d) - f_data_sum = other_1D + fdatairregular1D + f_data_sum = other_1d + fdatairregular_1d - assert np.all( - f_data_sum.function_values == - self._take_first(other_1D) + fdatairregular1D.function_values - ) + result = self._take_first(other_1d) + fdatairregular_1d.function_values + + assert np.all(f_data_sum.function_values == result) - def test_fdatairregular_arithmetic_sum_commutative( + def test_fdatairregular_arithmetic_sum_commutative( # noqa: WPS118 self, - fdatairregular1D: FDataIrregular, - other_1D: Any, + fdatairregular_1d: FDataIrregular, + other_1d: Any, ) -> None: - """Tests the basic arithmetic operation other + fdatairregular + """Tests the basic arithmetic operation other + fdatairregular. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): - if len(fdatairregular1D) == 1: - other_1D = other_1D[0] + other_1d = self._single_curve(fdatairregular_1d, other_1d) - assert fdatairregular1D + other_1D == other_1D + fdatairregular1D + assert fdatairregular_1d + other_1d == other_1d + fdatairregular_1d def test_fdatairregular_arithmetic_sub( self, - fdatairregular1D: FDataIrregular, - other_1D: Any, + fdatairregular_1d: FDataIrregular, + other_1d: Any, ) -> None: - """Tests the basic arithmetic operation fdatairregular - other + """Tests the basic arithmetic operation fdatairregular - other. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): - if len(fdatairregular1D) == 1: - other_1D = other_1D[0] + other_1d = self._single_curve(fdatairregular_1d, other_1d) - f_data_sum = fdatairregular1D - other_1D + f_data_sub = fdatairregular_1d - other_1d - assert np.all( - f_data_sum.function_values == - fdatairregular1D.function_values - self._take_first(other_1D) - ) + result = fdatairregular_1d.function_values - self._take_first(other_1d) + + assert np.all(f_data_sub.function_values == result) def test_fdatairregular_arithmetic_rsub( self, - fdatairregular1D: FDataIrregular, - other_1D: Any, + fdatairregular_1d: FDataIrregular, + other_1d: Any, ) -> None: - """Tests the basic arithmetic operation other - fdatairregular + """Tests the basic arithmetic operation other - fdatairregular. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): - if len(fdatairregular1D) == 1: - other_1D = other_1D[0] + other_1d = self._single_curve(fdatairregular_1d, other_1d) - f_data_sum = other_1D - fdatairregular1D + f_data_sub = other_1d - fdatairregular_1d - assert np.all( - f_data_sum.function_values == - self._take_first(other_1D) - fdatairregular1D.function_values - ) + result = self._take_first(other_1d) - fdatairregular_1d.function_values + + assert np.all(f_data_sub.function_values == result) def test_fdatairregular_arithmetic_mul( self, - fdatairregular1D: FDataIrregular, - other_1D: Any, + fdatairregular_1d: FDataIrregular, + other_1d: Any, ) -> None: - """Tests the basic arithmetic operation fdatairregular * other + """Tests the basic arithmetic operation fdatairregular * other. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): - if len(fdatairregular1D) == 1: - other_1D = other_1D[0] + other_1d = self._single_curve(fdatairregular_1d, other_1d) - f_data_mul = fdatairregular1D * other_1D + f_data_mul = fdatairregular_1d * other_1d - assert np.all( - f_data_mul.function_values == - fdatairregular1D.function_values * self._take_first(other_1D) - ) + result = fdatairregular_1d.function_values * self._take_first(other_1d) + + assert np.all(f_data_mul.function_values == result) def test_fdatairregular_arithmetic_rmul( self, - fdatairregular1D: FDataIrregular, - other_1D: Any, + fdatairregular_1d: FDataIrregular, + other_1d: Any, ) -> None: - """Tests the basic arithmetic operation other * fdatairregular + """Tests the basic arithmetic operation other * fdatairregular. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): - if len(fdatairregular1D) == 1: - other_1D = other_1D[0] + other_1d = self._single_curve(fdatairregular_1d, other_1d) - f_data_mul = other_1D * fdatairregular1D + f_data_mul = other_1d * fdatairregular_1d - assert np.all( - f_data_mul.function_values == - self._take_first(other_1D) * fdatairregular1D.function_values - ) + result = self._take_first(other_1d) * fdatairregular_1d.function_values - def test_fdatairregular_arithmetic_mul_commutative( + assert np.all(f_data_mul.function_values == result) + + def test_fdatairregular_arithmetic_mul_commutative( # noqa: WPS118 self, - fdatairregular1D: FDataIrregular, - other_1D: Any, + fdatairregular_1d: FDataIrregular, + other_1d: Any, ) -> None: - """Tests the basic arithmetic operation other * fdatairregular + """Tests the basic arithmetic operation other * fdatairregular. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): - if len(fdatairregular1D) == 1: - other_1D = other_1D[0] + other_1d = self._single_curve(fdatairregular_1d, other_1d) - assert fdatairregular1D * other_1D == other_1D * fdatairregular1D + assert fdatairregular_1d * other_1d == other_1d * fdatairregular_1d def test_fdatairregular_arithmetic_div( self, - fdatairregular1D: FDataIrregular, - other_1D: Any, + fdatairregular_1d: FDataIrregular, + other_1d: Any, ) -> None: - """Tests the basic arithmetic operation fdatairregular / other + """Tests the basic arithmetic operation fdatairregular / other. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): - if len(fdatairregular1D) == 1: - other_1D = other_1D[0] + other_1d = self._single_curve(fdatairregular_1d, other_1d) - f_data_div = fdatairregular1D / other_1D + f_data_div = fdatairregular_1d / other_1d - assert np.all( - f_data_div.function_values == - fdatairregular1D.function_values / self._take_first(other_1D) - ) + result = fdatairregular_1d.function_values / self._take_first(other_1d) + + assert np.all(f_data_div.function_values == result) def test_fdatairregular_arithmetic_rdiv( self, - fdatairregular1D: FDataIrregular, - other_1D: Any, + fdatairregular_1d: FDataIrregular, + other_1d: Any, ) -> None: - """Tests the basic arithmetic operation other / fdatairregular + """Tests the basic arithmetic operation other / fdatairregular. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_1D, np.ndarray) or isinstance(other_1D, FDataIrregular): - if len(fdatairregular1D) == 1: - other_1D = other_1D[0] + other_1d = self._single_curve(fdatairregular_1d, other_1d) - f_data_div = other_1D / fdatairregular1D + f_data_div = other_1d / fdatairregular_1d + + result = self._take_first(other_1d) / fdatairregular_1d.function_values + + assert np.all(f_data_div.function_values == result) - assert np.all( - f_data_div.function_values == - self._take_first(other_1D) / fdatairregular1D.function_values - ) class TestArithmeticOperations2D: - """ - Class which encapsulates the testing of basic arithmetic operations - for multidimensional FDataIrregular - """ + """Test basic operations for multidimensional FDataIrregular.""" def _take_first( self, @@ -495,225 +503,207 @@ def _take_first( return other.function_values return other + def _single_curve( + self, + fdatairregular_2d, + other_2d, + ) -> np.ndarray: + if isinstance(other_2d, (np.ndarray, FDataIrregular)): + if len(fdatairregular_2d) == 1: + return other_2d[:1] + return other_2d + def test_fdatairregular_arithmetic_sum( self, - fdatairregular2D: FDataIrregular, - other_2D: Any, + fdatairregular_2d: FDataIrregular, + other_2d: Any, ) -> None: - """Tests the basic arithmetic operation fdatairregular + other + """Tests the basic arithmetic operation fdatairregular + other. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): - if len(fdatairregular2D) == 1: - other_2D = other_2D[:1] + other_2d = self._single_curve(fdatairregular_2d, other_2d) - f_data_sum = fdatairregular2D + other_2D + f_data_sum = fdatairregular_2d + other_2d - assert np.all( - f_data_sum.function_values == - fdatairregular2D.function_values + self._take_first(other_2D) - ) + result = fdatairregular_2d.function_values + self._take_first(other_2d) + + assert np.all(f_data_sum.function_values == result) def test_fdatairregular_arithmetic_rsum( self, - fdatairregular2D: FDataIrregular, - other_2D: Any, + fdatairregular_2d: FDataIrregular, + other_2d: Any, ) -> None: - """Tests the basic arithmetic operation other + fdatairregular + """Tests the basic arithmetic operation other + fdatairregular. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): - if len(fdatairregular2D) == 1: - other_2D = other_2D[:1] + other_2d = self._single_curve(fdatairregular_2d, other_2d) - f_data_sum = other_2D + fdatairregular2D + f_data_sum = other_2d + fdatairregular_2d - assert np.all( - f_data_sum.function_values == - self._take_first(other_2D) + fdatairregular2D.function_values - ) + result = self._take_first(other_2d) + fdatairregular_2d.function_values + + assert np.all(f_data_sum.function_values == result) - def test_fdatairregular_arithmetic_sum_commutative( + def test_fdatairregular_arithmetic_sum_commutative( # noqa: WPS118 self, - fdatairregular2D: FDataIrregular, - other_2D: Any, + fdatairregular_2d: FDataIrregular, + other_2d: Any, ) -> None: - """Tests the basic arithmetic operation other + fdatairregular + """Tests the basic arithmetic operation other + fdatairregular. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): - if len(fdatairregular2D) == 1: - other_2D = other_2D[:1] + other_2d = self._single_curve(fdatairregular_2d, other_2d) - assert fdatairregular2D + other_2D == other_2D + fdatairregular2D + assert fdatairregular_2d + other_2d == other_2d + fdatairregular_2d def test_fdatairregular_arithmetic_sub( self, - fdatairregular2D: FDataIrregular, - other_2D: Any, + fdatairregular_2d: FDataIrregular, + other_2d: Any, ) -> None: - """Tests the basic arithmetic operation fdatairregular - other + """Tests the basic arithmetic operation fdatairregular - other. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): - if len(fdatairregular2D) == 1: - other_2D = other_2D[:1] + other_2d = self._single_curve(fdatairregular_2d, other_2d) - f_data_sum = fdatairregular2D - other_2D + f_data_sub = fdatairregular_2d - other_2d - assert np.all( - f_data_sum.function_values == - fdatairregular2D.function_values - self._take_first(other_2D) - ) + result = fdatairregular_2d.function_values - self._take_first(other_2d) + + assert np.all(f_data_sub.function_values == result) def test_fdatairregular_arithmetic_rsub( self, - fdatairregular2D: FDataIrregular, - other_2D: Any, + fdatairregular_2d: FDataIrregular, + other_2d: Any, ) -> None: - """Tests the basic arithmetic operation other - fdatairregular + """Tests the basic arithmetic operation other - fdatairregular. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): - if len(fdatairregular2D) == 1: - other_2D = other_2D[:1] + other_2d = self._single_curve(fdatairregular_2d, other_2d) - f_data_sum = other_2D - fdatairregular2D + f_data_sub = other_2d - fdatairregular_2d - assert np.all( - f_data_sum.function_values == - self._take_first(other_2D) - fdatairregular2D.function_values - ) + result = self._take_first(other_2d) - fdatairregular_2d.function_values + + assert np.all(f_data_sub.function_values == result) def test_fdatairregular_arithmetic_mul( self, - fdatairregular2D: FDataIrregular, - other_2D: Any, + fdatairregular_2d: FDataIrregular, + other_2d: Any, ) -> None: - """Tests the basic arithmetic operation fdatairregular * other + """Tests the basic arithmetic operation fdatairregular * other. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): - if len(fdatairregular2D) == 1: - other_2D = other_2D[:1] + other_2d = self._single_curve(fdatairregular_2d, other_2d) - f_data_mul = fdatairregular2D * other_2D + f_data_mul = fdatairregular_2d * other_2d - assert np.all( - f_data_mul.function_values == - fdatairregular2D.function_values * self._take_first(other_2D) - ) + result = fdatairregular_2d.function_values * self._take_first(other_2d) + + assert np.all(f_data_mul.function_values == result) def test_fdatairregular_arithmetic_rmul( self, - fdatairregular2D: FDataIrregular, - other_2D: Any, + fdatairregular_2d: FDataIrregular, + other_2d: Any, ) -> None: - """Tests the basic arithmetic operation other * fdatairregular + """Tests the basic arithmetic operation other * fdatairregular. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): - if len(fdatairregular2D) == 1: - other_2D = other_2D[:1] + other_2d = self._single_curve(fdatairregular_2d, other_2d) - f_data_mul = other_2D * fdatairregular2D + f_data_mul = other_2d * fdatairregular_2d - assert np.all( - f_data_mul.function_values == - self._take_first(other_2D) * fdatairregular2D.function_values - ) + result = self._take_first(other_2d) * fdatairregular_2d.function_values + + assert np.all(f_data_mul.function_values == result) - def test_fdatairregular_arithmetic_mul_commutative( + def test_fdatairregular_arithmetic_mul_commutative( # noqa: WPS118 self, - fdatairregular2D: FDataIrregular, - other_2D: Any, + fdatairregular_2d: FDataIrregular, + other_2d: Any, ) -> None: - """Tests the basic arithmetic operation other * fdatairregular + """Tests the basic arithmetic operation other * fdatairregular. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): - if len(fdatairregular2D) == 1: - other_2D = other_2D[:1] + other_2d = self._single_curve(fdatairregular_2d, other_2d) - assert fdatairregular2D * other_2D == other_2D * fdatairregular2D + assert fdatairregular_2d * other_2d == other_2d * fdatairregular_2d def test_fdatairregular_arithmetic_div( self, - fdatairregular2D: FDataIrregular, - other_2D: Any, + fdatairregular_2d: FDataIrregular, + other_2d: Any, ) -> None: - """Tests the basic arithmetic operation fdatairregular / other + """Tests the basic arithmetic operation fdatairregular / other. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): - if len(fdatairregular2D) == 1: - other_2D = other_2D[:1] + other_2d = self._single_curve(fdatairregular_2d, other_2d) - f_data_div = fdatairregular2D / other_2D + f_data_div = fdatairregular_2d / other_2d - assert np.all( - f_data_div.function_values == - fdatairregular2D.function_values / self._take_first(other_2D) - ) + result = fdatairregular_2d.function_values / self._take_first(other_2d) + + assert np.all(f_data_div.function_values == result) def test_fdatairregular_arithmetic_rdiv( self, - fdatairregular2D: FDataIrregular, - other_2D: Any, + fdatairregular_2d: FDataIrregular, + other_2d: Any, ) -> None: - """Tests the basic arithmetic operation other / fdatairregular + """Tests the basic arithmetic operation other / fdatairregular. Args: - fdatairregular (FDataIrregular): FDataIrregular object to test - other (Any): Scalar, vector, matrix or FDataIrregular + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. """ # Account for single curve test - if isinstance(other_2D, np.ndarray) or isinstance(other_2D, FDataIrregular): - if len(fdatairregular2D) == 1: - other_2D = other_2D[:1] + other_2d = self._single_curve(fdatairregular_2d, other_2d) - f_data_div = other_2D / fdatairregular2D + f_data_div = other_2d / fdatairregular_2d - assert np.all( - f_data_div.function_values == - self._take_first(other_2D) / fdatairregular2D.function_values - ) + result = self._take_first(other_2d) / fdatairregular_2d.function_values + + assert np.all(f_data_div.function_values == result) ########################## @@ -721,16 +711,24 @@ def test_fdatairregular_arithmetic_rdiv( ########################## class TestNumericReductions: - """ - Class which encapsulates the testing of numeric reductions - (such as mean, std) for FDataIrregular objects - """ + """Class for testing numeric reductions (mean, std) for FDataIrregular.""" + def test_fdatairregular_numeric_reduction( self, fdatairregular: FDataIrregular, all_numeric_reductions: str, ) -> None: - + """Test FDataIrregular numeric statistichal operations. + + All conversion methods will be tested with multiple + dimensions of codomain and domain. + + Args: + fdatairregular (FDataIrregular): FDataIrregular + object. + all_numeric_reductions (str): Method of the class + FDataIrregular to be tested. + """ reduction = getattr(fdatairregular, all_numeric_reductions)() assert isinstance(reduction, FDataIrregular) @@ -738,64 +736,83 @@ def test_fdatairregular_numeric_reduction( # TEST BASIS OPERATIONS ######################## + class TestBasisOperations: - """ - Class which encapsulates the testing of basis operations - (such as to_basis) for FDataIrregular objects - """ + """Class for testing the basis operations or FDataIrregular objects.""" + def test_fdatairregular_basis_operation( self, fdatairregular: FDataIrregular, all_basis: Basis, all_basis_operations: str, ) -> None: + """Test FDataIrregular conversion to FDataBasis. + + All conversion methods will be tested with multiple + dimensions of codomain and domain, as well as with + different types of Basis. + + Args: + fdatairregular (FDataIrregular): FDataIrregular + object to be transformed to basis. + all_basis (Basis): Basis to use (Spline, Fourier, ..). + all_basis_operations (str): Method of the class + FDataIrregular to be tested. + """ # Create Tensor basis for higher dimensions if fdatairregular.dim_domain == 1: basis = all_basis( - domain_range=fdatairregular.domain_range, + domain_range=fdatairregular.domain_range, n_basis=N_BASIS, - ) + ) else: basis_by_dim = [ all_basis( - domain_range=fdatairregular.domain_range[dim: dim + 1], + domain_range=fdatairregular.domain_range[dim: dim + 1], n_basis=N_BASIS, ) for dim in range(fdatairregular.dim_domain) ] basis = TensorBasis(basis_by_dim) - + fd_basis_coords = [ getattr(coordinate, all_basis_operations)(basis) for coordinate in fdatairregular.coordinates ] - - assert all([isinstance(fd_basis, FDataBasis) for fd_basis in fd_basis_coords]) + + assert all( + [ + isinstance(fd_basis, FDataBasis) + for fd_basis in fd_basis_coords + ], + ) def test_fdatairregular_to_basis_consistency( - fdatagrid: FDataIrregular, - all_basis : Basis, + fdatagrid: FDataGrid, + all_basis: Basis, ) -> None: - """Test that irregular to_basis is consistent with - analogous FDataGrid to_basis in 1D. + """Test that irregular to_basis is consistent with FDataGrid. + + FDataGrid is used as source because FDataIrregular can support + regular data, but the reverse is not necessarily true. The + to_basis method specifically does not allow NaN values. Args: - fdatairregular1D (FDataIrregular): FDataIrregular - object with dimensions (1,1). + fdatagrid (FDataGrid): FDataGrid object all_basis (Basis): FDataBasis object. """ fd_irregular = FDataIrregular.from_datagrid(fdatagrid) - + if fd_irregular.dim_domain == 1: basis = all_basis( - domain_range=fd_irregular.domain_range, + domain_range=fd_irregular.domain_range, n_basis=N_BASIS, ) else: basis_by_dim = [ all_basis( - domain_range=fd_irregular.domain_range[dim: dim + 1], + domain_range=fd_irregular.domain_range[dim: dim + 1], n_basis=N_BASIS, ) for dim in range(fd_irregular.dim_domain) @@ -827,4 +844,4 @@ def test_fdatairregular_to_basis_consistency( np.all(irregular_coefs[i] == g_coef) for i, g_coef in enumerate(grid_coefs) ], - ) \ No newline at end of file + ) From bbe6e06fd14b1d4cc60ca589e95375c421dc1ce7 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 27 Apr 2023 15:20:33 +0200 Subject: [PATCH 063/144] Make representation/irregular.py PEP8 and wemake compliant. --- skfda/representation/irregular.py | 106 ++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 36 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index edd31f2e4..2a1a3f1ad 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -9,11 +9,10 @@ import numbers import warnings -from typing import Any, Optional, Sequence, Type, TypeVar, Union, cast +from typing import Any, Optional, Sequence, Tuple, Type, TypeVar, Union, cast import numpy as np import pandas.api.extensions -import scipy.stats.mstats from matplotlib.figure import Figure from .._utils import _check_array_key @@ -83,7 +82,7 @@ def _get_sample_range_from_data( for sample, _ in enumerate(dim_sample_ranges): sample_range.append( tuple( - [dim_ranges[dim][sample] for dim in range(dim_domain)], + [dim_ranges[d][sample] for d in range(dim_domain)], ), ) @@ -207,7 +206,7 @@ class FDataIrregular(FData): # noqa: WPS214 """ - def __init__( + def __init__( # noqa: WPS211 self, function_indices: ArrayLike, function_arguments: ArrayLike, @@ -238,16 +237,19 @@ def __init__( self.num_functions = self.function_indices.shape[0] if self.function_arguments.shape[0] != self.function_values.shape[0]: - raise ValueError("Dimension mismatch between function_arguments \ - and function_values") + raise ValueError( + "Dimension mismatch in function_arguments and function_values", + ) self.num_observations = self.function_arguments.shape[0] if max(self.function_indices) >= self.num_observations: raise ValueError("Index in function_indices out of bounds") - + # Ensure arguments are in order within each function - self.function_arguments, self.function_values = self._sort_by_arguments() + sorted_arguments, sorted_values = self._sort_by_arguments() + self.function_arguments = sorted_arguments + self.function_values = sorted_values self._sample_range = _get_sample_range_from_data( self.function_indices, @@ -313,13 +315,11 @@ def from_dataframe( the irregular functional data of the dataset. """ # Accept strings but ensure the column names are tuples - is_str = isinstance(argument_columns, str) - argument_columns = [argument_columns] if is_str else \ - argument_columns + if isinstance(argument_columns, str): + argument_columns = [argument_columns] - is_str = isinstance(coordinate_columns, str) - coordinate_columns = [coordinate_columns] if is_str else \ - coordinate_columns + if isinstance(coordinate_columns, str): + coordinate_columns = [coordinate_columns] # Obtain num functions and num observations from data num_observations = dataframe.shape[0] @@ -426,22 +426,37 @@ def from_datagrid( def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: """Sort the arguments lexicographically functionwise. - + Additionally, sort the values accordingly. Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ - indices_start_end = np.append(self.function_indices, self.num_observations) + indices_start_end = np.append( + self.function_indices, + self.num_observations, + ) + slices = list(zip(indices_start_end, indices_start_end[1:])) slice_args = [self.function_arguments[slice(*s)] for s in slices] slice_values = [self.function_values[slice(*s)] for s in slices] - + # Sort lexicographically, first to last dimension - sorting_masks = [np.lexsort(np.flip(f_args, axis=1).T) for f_args in slice_args] - sorted_args = [slice_args[i][mask] for i, mask in enumerate(sorting_masks)] - sorted_values = [slice_values[i][mask] for i, mask in enumerate(sorting_masks)] - + sorting_masks = [ + np.lexsort(np.flip(f_args, axis=1).T) + for f_args in slice_args + ] + + sorted_args = [ + slice_args[i][mask] + for i, mask in enumerate(sorting_masks) + ] + + sorted_values = [ + slice_values[i][mask] + for i, mask in enumerate(sorting_masks) + ] + return np.concatenate(sorted_args), np.concatenate(sorted_values) def round( @@ -564,7 +579,8 @@ def derivative( Args: order: Order of the derivative. Defaults to one. - method (Optional[Basis]): + method (Optional[Basis]): Method used to generate + the derivatives. Returns: FDataIrregular with the derivative of the dataset. @@ -578,14 +594,26 @@ def integrate( """Integrate the FDataIrregular object. Args: - domain (Optional[DomainRange]): + domain (Optional[DomainRange]): tuple with + the domain ranges for each dimension + of the domain Returns: FDataIrregular with the integral. """ pass - def _check_same_dimensions(self: T, other: T) -> None: + def check_same_dimensions(self: T, other: T) -> None: + """Ensure that other FDataIrregular object ahs compatible dimensions. + + Args: + other (T): FDataIrregular object to compare dimensions + with. + + Raises: + ValueError: Dimension mismatch in coordinates. + ValueError: Dimension mismatch in arguments. + """ if self.dim_codomain != other.dim_codomain: raise ValueError("Dimension mismatch in coordinates") if self.dim_domain != other.dim_domain: @@ -733,7 +761,7 @@ def _eq_elemenwise(self: T, other: T) -> NDArrayBool: def __eq__(self, other: object) -> NDArrayBool: return self.equals(other) - def _get_op_matrix( + def _get_op_matrix( # noqa: WPS212 self, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> Union[None, float, NDArrayFloat, NDArrayInt]: @@ -965,10 +993,10 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: ) # Verify that dimensions are compatible assert len(others) > 0, "No objects to concatenate" - self._check_same_dimensions(others[0]) + self.check_same_dimensions(others[0]) if len(others) > 1: for x, y in zip(others, others[1:]): - x._check_same_dimensions(y) + x.check_same_dimensions(y) # Allocate all required memory total_functions = self.num_functions + sum( @@ -1149,7 +1177,7 @@ def to_matrix(self) -> ArrayLike: ) unified_matrix.fill(np.nan) - #Fill with each function + # Fill with each function next_indices = np.append( self.function_indices, self.num_observations, @@ -1160,7 +1188,7 @@ def to_matrix(self) -> ArrayLike: arg = self.function_arguments[j] val = self.function_values[j] pos = [ - np.where(gp==arg[dim])[0][0] + np.where(gp == arg[dim])[0][0] for dim, gp in enumerate(grid_points) ] unified_matrix[(i,) + tuple(pos)] = val @@ -1447,8 +1475,9 @@ def __getitem__( indices = range(self.num_functions) required_indices = indices[key] for i in required_indices: - next_index = self.function_indices[i + 1] if i + 1 < \ - self.num_functions else None + next_index = None + if i + 1 < self.num_functions: + next_index = self.function_indices[i + 1] s = slice(self.function_indices[i], next_index) required_slices.append(s) @@ -1504,7 +1533,7 @@ def __array_ufunc__( return NotImplemented new_inputs = [ - self._get_op_matrix(i) for i in inputs + self._get_op_matrix(input_) for input_ in inputs ] outputs = kwargs.pop('out', None) @@ -1554,7 +1583,8 @@ def _take_allow_fill( ] if fill_value is not self.dtype.na_value: - result.function_values[~positive_mask] = fill_value.function_values[0] + fill_value_ = fill_value.function_values[0] + result.function_values[~positive_mask] = fill_value_ return result @@ -1573,15 +1603,19 @@ def nbytes(self) -> int: """ The number of bytes needed to store this object in memory. """ - return self.function_indices.nbytes + \ - self.function_arguments.nbytes + self.function_values + array_nbytes = [ + self.function_indices.nbytes, + self.function_arguments.nbytes, + self.function_values, + ] + return sum(array_nbytes) def isna(self) -> NDArrayBool: """ Return a 1-D array indicating if each value is missing. Returns: - na_values: Positions of NA. + na_values (NDArrayBool): Positions of NA. """ return np.all( # type: ignore[no-any-return] np.isnan(self.function_values), From c657967fc4bd5596d61c1c42a661d85069b335c7 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 27 Apr 2023 16:33:23 +0200 Subject: [PATCH 064/144] Clean up implementation of restrict function. Add test for restritct method in test_irregular.py --- skfda/representation/irregular.py | 69 ++++++++++--------------------- skfda/tests/test_irregular.py | 41 ++++++++++++++++++ 2 files changed, 62 insertions(+), 48 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 2a1a3f1ad..9d68ea7a8 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1283,7 +1283,7 @@ def copy( # noqa: WPS211 interpolation=interpolation, ) - def restrict( + def restrict( # noqa: WPS210 self: T, domain_range: DomainRangeLike, ) -> T: @@ -1300,10 +1300,7 @@ def restrict( from ..misc.validation import validate_domain_range domain_range = validate_domain_range(domain_range) - assert all( - c <= a < b <= d # noqa: WPS228 - for ((a, b), (c, d)) in zip(domain_range, self.domain_range) - ) + head = 0 indices = [] @@ -1313,66 +1310,42 @@ def restrict( # Eliminate points outside the new range. # Must also modify function indices to point to new array - i = -1 - for i, index in enumerate(self.function_indices[1:]): - prev_index = self.function_indices[i] + iterable_indices = np.append( + self.function_indices, + self.num_observations, + ) + + for i, index_tuple in enumerate(zip( + iterable_indices, + iterable_indices[1:], + )): + prev_index, index = index_tuple s = slice(prev_index, index) - masks = set() - for dr in domain_range: + masks = set(range(self.function_arguments[s].shape[0])) + for dim, dr in enumerate(domain_range): dr_start, dr_end = dr select_mask = np.where( ( - (dr_start <= self.function_arguments[s]) - & (self.function_arguments[s] <= dr_end) + (dr_start <= self.function_arguments[s][:, dim]) + & (self.function_arguments[s][:, dim] <= dr_end) ), ) - # Must be union, it is valid if it is in any interval - masks = masks.union(set(select_mask[0])) + masks = masks.intersection(set(select_mask[0])) # TODO Keep functions with no values? masks = list(masks) - if len(masks) > 1: + if len(masks) > 0: indices.append(head) arguments.append(self.function_arguments[s][masks, :]) values.append(self.function_values[s][masks, :]) sample_names.append(self.sample_names[i]) head += len(masks) - # Last index - i += 1 - prev_index = self.function_indices[i] - s = slice(prev_index, None) - masks = set() - for dr in domain_range: - dr_start, dr_end = dr - select_mask = np.where( - ( - (dr_start <= self.function_arguments[s]) - & (self.function_arguments[s] <= dr_end) - ), - ) - - # Must be union, it is valid if it is in any interval - masks = masks.union(set(select_mask[0])) - - # TODO Keep functions with no values? - masks = list(masks) - if len(masks) > 0: - indices.append(head) - arguments.append(self.function_arguments[s][masks, :]) - values.append(self.function_values[s][masks, :]) - sample_names.append(self.sample_names[i]) - head += len(masks) - - function_indices = np.array(indices) - function_arguments = np.concatenate(arguments) - function_values = np.concatenate(values) - return self.copy( - function_indices=function_indices, - function_arguments=function_arguments, - function_values=function_values, + function_indices=np.array(indices), + function_arguments=np.concatenate(arguments), + function_values=np.concatenate(values), sample_names=sample_names, domain_range=domain_range, ) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 267a95fa0..e0d232ad6 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -378,6 +378,47 @@ def test_fdatairregular_equals( assert fdatairregular.equals(fdatairregular.copy()) +def test_fdatairregular_restrict( + fdatairregular: FDataIrregular, +) -> None: + """Test the restrict function for FDataIrregular. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + """ + restricted_domain = [ + (dr[0] + (dr[0] + dr[1]) / 4, dr[1] - (dr[0] + dr[1]) / 4) + for dr in fdatairregular.domain_range + ] + + restricted_fdata = fdatairregular.restrict(restricted_domain) + + samples_by_dim = [ + restricted_fdata.function_arguments[:, dim] + for dim in range(fdatairregular.dim_domain) + ] + + sample_ranges = [(np.min(args), np.max(args)) for args in samples_by_dim] + + # The min arg is larger than the domain min constraint + assert len(restricted_fdata) > 0 + assert all( + [ + sr[0] > restricted_domain[i][0] + for i, sr in enumerate(sample_ranges) + ], + ) + + # The max arg is lesser than the domain max constraint + assert all( + [ + sr[1] < restricted_domain[i][1] + for i, sr in enumerate(sample_ranges) + ], + ) + + def test_fdatairregular_to_grid( fdatairregular: FDataIrregular, fdatagrid: FDataGrid, From 7fd0f44403cc48494328770c9b938016aee44e06 Mon Sep 17 00:00:00 2001 From: opintosant Date: Thu, 27 Apr 2023 16:35:52 +0200 Subject: [PATCH 065/144] Remove done TODOs --- skfda/representation/irregular.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 9d68ea7a8..43f88d3ce 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -741,8 +741,6 @@ def equals(self, other: object) -> bool: if not np.array_equal(self.domain_range, other.domain_range): return False - # TODO extrapolation when implemented - if self.interpolation != other.interpolation: return False @@ -1301,7 +1299,6 @@ def restrict( # noqa: WPS210 domain_range = validate_domain_range(domain_range) - head = 0 indices = [] arguments = [] @@ -1333,7 +1330,7 @@ def restrict( # noqa: WPS210 masks = masks.intersection(set(select_mask[0])) - # TODO Keep functions with no values? + # Do not keep functions with no values. masks = list(masks) if len(masks) > 0: indices.append(head) @@ -1387,7 +1384,6 @@ def shift( Returns: Shifted functions. """ - # TODO build based in above pass def compose( @@ -1409,7 +1405,6 @@ def compose( Function representing the composition. """ - # TODO Is this possible with this structure? pass def __str__(self) -> str: From 00a348efed27cb0795b4e9addc0f9964e4e0cde7 Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 1 May 2023 13:19:22 +0200 Subject: [PATCH 066/144] Fix errors in Doctest --- skfda/representation/irregular.py | 43 +++++++++++++++---------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 43f88d3ce..0818f9774 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -167,8 +167,7 @@ class FDataIrregular(FData): # noqa: WPS214 >>> FDataIrregular(indices, arguments, values) Traceback (most recent call last): .... - ValueError: Dimension mismatch between function_arguments - and function_values... + ValueError: Dimension mismatch ... The indices in function_indices must point to correct rows in function_arguments and function_values. @@ -961,26 +960,26 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: >>> fd.concatenate(fd_2) FDataIrregular( function_indices=array([0, 2, 5, 7], dtype=uint32), - function_arguments=array([[0.], - [1.], - [2.], - [3.], - [4.], - [5.], - [6.], - [7.], - [8.], - [9.]]), - function_values=array([[0.], - [1.], - [2.], - [3.], - [4.], - [5.], - [6.], - [7.], - [8.], - [9.]]), + function_arguments=array([[ 0.], + [ 1.], + [ 2.], + [ 3.], + [ 4.], + [ 5.], + [ 6.], + [ 7.], + [ 8.], + [ 9.]]), + function_values=array([[ 0.], + [ 1.], + [ 2.], + [ 3.], + [ 4.], + [ 5.], + [ 6.], + [ 7.], + [ 8.], + [ 9.]]), domain_range=((0.0, 9.0),), ...) """ From 22beb0ef188f0a25ba760f44bf0fe3649f426c0c Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 1 May 2023 13:21:29 +0200 Subject: [PATCH 067/144] Revert "Remove done TODOs" This reverts commit 7fd0f44403cc48494328770c9b938016aee44e06. --- skfda/representation/irregular.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 0818f9774..93244500d 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -740,6 +740,8 @@ def equals(self, other: object) -> bool: if not np.array_equal(self.domain_range, other.domain_range): return False + # TODO extrapolation when implemented + if self.interpolation != other.interpolation: return False @@ -1298,6 +1300,7 @@ def restrict( # noqa: WPS210 domain_range = validate_domain_range(domain_range) + head = 0 indices = [] arguments = [] @@ -1329,7 +1332,7 @@ def restrict( # noqa: WPS210 masks = masks.intersection(set(select_mask[0])) - # Do not keep functions with no values. + # TODO Keep functions with no values? masks = list(masks) if len(masks) > 0: indices.append(head) @@ -1383,6 +1386,7 @@ def shift( Returns: Shifted functions. """ + # TODO build based in above pass def compose( @@ -1404,6 +1408,7 @@ def compose( Function representing the composition. """ + # TODO Is this possible with this structure? pass def __str__(self) -> str: From f2244f32fc61da64cfa41678b40c930626a8dc3e Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 1 May 2023 13:23:47 +0200 Subject: [PATCH 068/144] Revert "Revert "Remove done TODOs"" This reverts commit 22beb0ef188f0a25ba760f44bf0fe3649f426c0c. --- skfda/representation/irregular.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 93244500d..0818f9774 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -740,8 +740,6 @@ def equals(self, other: object) -> bool: if not np.array_equal(self.domain_range, other.domain_range): return False - # TODO extrapolation when implemented - if self.interpolation != other.interpolation: return False @@ -1300,7 +1298,6 @@ def restrict( # noqa: WPS210 domain_range = validate_domain_range(domain_range) - head = 0 indices = [] arguments = [] @@ -1332,7 +1329,7 @@ def restrict( # noqa: WPS210 masks = masks.intersection(set(select_mask[0])) - # TODO Keep functions with no values? + # Do not keep functions with no values. masks = list(masks) if len(masks) > 0: indices.append(head) @@ -1386,7 +1383,6 @@ def shift( Returns: Shifted functions. """ - # TODO build based in above pass def compose( @@ -1408,7 +1404,6 @@ def compose( Function representing the composition. """ - # TODO Is this possible with this structure? pass def __str__(self) -> str: From c438deb7bea7c192b4971a0af26e0ca20031b193 Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 1 May 2023 14:26:51 +0200 Subject: [PATCH 069/144] Fix incorrect isort format for improts in _real_datasets --- skfda/datasets/_real_datasets.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index 50221f747..434754477 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -5,12 +5,11 @@ import numpy as np import pandas as pd +import rdata from pandas import DataFrame, Series from sklearn.utils import Bunch from typing_extensions import Literal -import rdata - from ..representation import FDataGrid from ..representation.irregular import FDataIrregular from ..typing._numpy import NDArrayFloat, NDArrayInt From aff4baa588952335a1d27e5024f573a7aed59891 Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 1 May 2023 14:48:14 +0200 Subject: [PATCH 070/144] Fix incorrect style in _real_datasets and smoothing/_basis --- skfda/datasets/_real_datasets.py | 4 ++-- skfda/preprocessing/smoothing/_basis.py | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index 434754477..df2de16fa 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -174,7 +174,7 @@ def fetch_ucr( return_X_y: bool = False, **kwargs: Any, ) -> Bunch | Tuple[FDataGrid, NDArrayInt]: - """ + r""" Fetch a dataset from the UCR/UEA repository. The UCR/UEA Time Series Classification repository, hosted at @@ -261,7 +261,7 @@ def _fetch_fda_usc(name: str) -> Any: Acoustic-Phonetic Continuous Speech Corpus, NTIS, US Dept of Commerce) which is a widely used resource for research in speech recognition. A dataset was formed by selecting five phonemes for - classification based on digitized speech from this database. + classification based on digitized speech from this database. phonemes are transcribed as follows: "sh" as in "she", "dcl" as in "dark", "iy" as the vowel in "she", "aa" as the vowel in "dark", and "ao" as the first vowel in "water". From continuous speech of 50 male diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index 446140a04..3efd5a668 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -334,8 +334,11 @@ def transform( ) return super().transform(X, y) - + + class IrregularBasisSmoother(_LinearSmoother): + """Transform irregular data to a smooth basis functional form.""" + _required_parameters = ["basis"] def __init__( @@ -395,8 +398,8 @@ def _hat_matrix( output_points: GridPointsLike, ) -> NDArrayFloat: raise NotImplementedError( - "Not implemented for as_coordinates = True", - ) + "Not implemented for as_coordinates = True", + ) def fit( self, @@ -414,7 +417,8 @@ def fit( """ self.input_points_ = X.function_arguments - self.output_points_ = (self.output_points + self.output_points_ = ( + self.output_points if self.output_points is not None else self.input_points_ ) From 87d754f7471c46fede992d67a883657f8e7773bc Mon Sep 17 00:00:00 2001 From: opintosant Date: Mon, 1 May 2023 15:04:36 +0200 Subject: [PATCH 071/144] Fix incorrect sorting of import in exploratory/visualization/representation and missing parameter in docstring in datasets/_real_datasets --- skfda/datasets/_real_datasets.py | 1 + skfda/exploratory/visualization/representation.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index df2de16fa..6ccd5bf7a 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -185,6 +185,7 @@ def fetch_ucr( Args: name: Dataset name. + return_X_y: Return tuple (data, target) kwargs: Additional parameters for the function :func:`skdatasets.repositories.ucr.fetch`. diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index e59efb284..a5b7579e4 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -8,7 +8,7 @@ """ from __future__ import annotations -from typing import Any, Dict, Sequence, Sized, Tuple, TypeVar, Optional +from typing import Any, Dict, Optional, Sequence, Sized, Tuple, TypeVar import matplotlib.cm import matplotlib.patches From b7a7489e1c20f6d98761ffb2bc7645f1b05fd69e Mon Sep 17 00:00:00 2001 From: pcuestas Date: Wed, 20 Sep 2023 19:45:08 +0200 Subject: [PATCH 072/144] Fix mistake in PlotIrregular (number of measurements in functions indices extraction) --- skfda/exploratory/visualization/representation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index a5b7579e4..c50b34bc8 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -627,7 +627,10 @@ def __init__( # noqa: WPS211 # There may be different points for each function self.grid_points = [] self.evaluated_points = [] - indices = np.append(self.fdata.function_indices, self.fdata.n_samples) + indices = np.append( + self.fdata.function_indices, + self.fdata.num_observations, + ) for index_start, index_end in zip( indices, indices[1:], From 22388f3e78d27b8409639fd2ed6dad9cd179b2ab Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 09:21:42 +0200 Subject: [PATCH 073/144] Correct rst suggestions --- docs/modules/representation.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/modules/representation.rst b/docs/modules/representation.rst index c26ba333a..4f00b3fde 100644 --- a/docs/modules/representation.rst +++ b/docs/modules/representation.rst @@ -89,14 +89,15 @@ methods. Irregular representation ------------------------ -In practice, most functional datasets do not contain functions evaluated +In practice, many functional datasets do not contain functions evaluated uniformly over a fixed grid. In other words, it is paramount to be able to represent irregular functional data. -While the FDataGrid class could support these kind of datasets, it is -inefficient to store a complete grid with low data density. Furthermore, -there are specific methods that can be applied to irregular data in order -to obtain, among other things, a better convesion to basis representation. +While the FDataGrid class could support these kind of datasets by filling a +common grid with possibly emtpy (or nan) values, it is inefficient to store a +complete grid with low data density. Furthermore, there are specific methods +that can be applied to irregular data in order to obtain, among other things, +a better conversion to basis representation. The FDataIrregular class provides the functionality which suits these purposes. From 494fa6773d8ab912ed603151e95bc404e86aa0b2 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 09:26:59 +0200 Subject: [PATCH 074/144] NotImplementedError instead of warning. Fix FDataIrregular.isna() and test its shape --- skfda/exploratory/visualization/representation.py | 6 ++---- skfda/representation/irregular.py | 11 +++++------ skfda/tests/test_irregular.py | 12 ++++++++++++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index c50b34bc8..c0ac810f2 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -724,8 +724,7 @@ def _plot( ) else: # TODO Implementar para multidimension. Como hacer mesh? - import warnings - warnings.warn("Not implemented") + raise NotImplementedError() _set_labels(self.fdata, fig, axes, self.patches) @@ -767,8 +766,7 @@ def _plot( else: # TODO Implement for multidimensional - import warnings - warnings.warn("Not implemented") + raise NotImplementedError() _set_labels(self.fdata, fig, axes, self.patches) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 0818f9774..30c87ef2a 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1383,7 +1383,7 @@ def shift( Returns: Shifted functions. """ - pass + raise NotImplementedError() def compose( self: T, @@ -1404,7 +1404,7 @@ def compose( Function representing the composition. """ - pass + raise NotImplementedError() def __str__(self) -> str: """Return str(self).""" @@ -1584,10 +1584,9 @@ def isna(self) -> NDArrayBool: Returns: na_values (NDArrayBool): Positions of NA. """ - return np.all( # type: ignore[no-any-return] - np.isnan(self.function_values), - axis=tuple(range(1, self.function_values.ndim)), - ) + return np.array([ + np.all(np.isnan(v.function_values)) for v in self + ]) class FDataIrregularDType( diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index e0d232ad6..ac5813ce0 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -436,3 +436,15 @@ def test_fdatairregular_to_grid( assert fdatagrid.equals(FDataIrregular.from_datagrid(fdatagrid).to_grid()) # FDataIrregular -> FDataGrid -> FDataIrregular assert fdatairregular.equals(FDataIrregular.from_datagrid(f_data_grid)) + + +def test_fdatairregular_isna( + fdatairregular: FDataIrregular, +) -> None: + """Test the shape of isna function output for FDataIrregular. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + """ + assert fdatairregular.isna().shape == (len(fdatairregular),) From b750e749a145e046e9e9c206d962c743566811ff Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 09:29:05 +0200 Subject: [PATCH 075/144] Use default_rng instead of RandomState --- skfda/tests/test_irregular_operations.py | 31 ++++++++++-------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index cf1dc66b9..a1e3dd705 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -26,7 +26,7 @@ N_BASIS = 5 DECIMALS = 4 -random_state = np.random.RandomState(seed=SEED) +random_state = np.random.default_rng(seed=SEED) ############ # FIXTURES @@ -49,11 +49,11 @@ def input_arrays( num_values_per_curve = num_values_per_curve.astype(int) values_per_curve = [ - random_state.rand(num_values, dimensions) + random_state.random((num_values, dimensions)) for num_values in num_values_per_curve ] args_per_curve = [ - random_state.rand(num_values, dimensions) + random_state.random((num_values, dimensions)) for num_values in num_values_per_curve ] @@ -80,11 +80,11 @@ def input_arrays_2d( num_values_per_curve = num_values_per_curve.astype(int) values_per_curve = [ - random_state.rand(num_values, dimensions) + random_state.random((num_values, dimensions)) for num_values in num_values_per_curve ] args_per_curve = [ - random_state.rand(num_values, dimensions) + random_state.random((num_values, dimensions)) for num_values in num_values_per_curve ] @@ -101,9 +101,9 @@ def fdatagrid_1d( """Generate FDataGrid.""" num_values_per_curve = NUM_CURVES - data_matrix = random_state.rand(NUM_CURVES, num_values_per_curve, 1) + data_matrix = random_state.random((NUM_CURVES, num_values_per_curve, 1)) # Grid points must be sorted - grid_points = np.sort(random_state.rand(num_values_per_curve)) + grid_points = np.sort(random_state.random((num_values_per_curve,))) return FDataGrid( data_matrix=data_matrix, @@ -117,14 +117,14 @@ def fdatagrid_2d( """Generate multidimensional FDataGrid.""" num_values_per_curve = NUM_CURVES - data_matrix = random_state.rand( + data_matrix = random_state.random(( NUM_CURVES, num_values_per_curve, DIMENSIONS, - ) + )) # Grid points must be sorted - grid_points = np.sort(random_state.rand(num_values_per_curve)) + grid_points = np.sort(random_state.random((num_values_per_curve,))) return FDataGrid( data_matrix=data_matrix, @@ -781,10 +781,7 @@ def test_fdatairregular_basis_operation( ] assert all( - [ - isinstance(fd_basis, FDataBasis) - for fd_basis in fd_basis_coords - ], + isinstance(fd_basis, FDataBasis) for fd_basis in fd_basis_coords ) @@ -840,8 +837,6 @@ def test_fdatairregular_to_basis_consistency( ] assert all( - [ - np.all(irregular_coefs[i] == g_coef) - for i, g_coef in enumerate(grid_coefs) - ], + np.all(irregular_coefs[i] == g_coef) + for i, g_coef in enumerate(grid_coefs) ) From 59d10d4dfd36e1b3566859d961036ab5312ed7d1 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 09:32:33 +0200 Subject: [PATCH 076/144] Correct "axies" to "axes" --- skfda/representation/basis/_fdatabasis.py | 2 +- skfda/representation/grid.py | 2 +- skfda/representation/irregular.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 8289471d4..d4298707a 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -55,7 +55,7 @@ class FDataBasis(FData): # noqa: WPS214 functional datum. domain_range: 2 dimension matrix where each row contains the bounds of the interval in which the functional data - is considered to exist for each one of the axies. + is considered to exist for each one of the axes. dataset_name: name of the dataset. argument_names: tuple containing the names of the different arguments. diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 5801d3a01..71898cf69 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -64,7 +64,7 @@ class FDataGrid(FData): # noqa: WPS214 contains the points of dicretisation for each axis of data_matrix. domain_range: 2 dimension matrix where each row contains the bounds of the interval in which the functional data - is considered to exist for each one of the axies. + is considered to exist for each one of the axes. dataset_name: name of the dataset. argument_names: tuple containing the names of the different arguments. diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 30c87ef2a..f6efb3160 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -122,7 +122,7 @@ class FDataIrregular(FData): # noqa: WPS214 every curve in the sample. Each row contains an observation. domain_range: 2 dimension matrix where each row contains the bounds of the interval in which the functional data - is considered to exist for each one of the axies. + is considered to exist for each one of the axes. dataset_name: name of the dataset. argument_names: tuple containing the names of the different arguments. From 1b7a25e30bce216d3f38b5ceecc9d917ada47d9c Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 13:28:15 +0200 Subject: [PATCH 077/144] Upper case attributes description and function arguments typing --- skfda/representation/irregular.py | 34 ++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f6efb3160..2d6af8921 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -39,13 +39,15 @@ def _get_sample_range_from_data( - function_indices, - function_arguments, - dim_domain, -): - dim_ranges = [] + function_indices: NDArrayInt, + function_arguments: NDArrayFloat, + dim_domain: int, +) -> DomainRange: + dim_ranges = [] # sample range for each dimension for dim in range(dim_domain): i = 0 + + # Sample range for each function in current dimension: dim_sample_ranges = [] for f in function_indices[1:]: min_argument = min( @@ -86,13 +88,13 @@ def _get_sample_range_from_data( ), ) - return sample_range + return tuple(sample_range) def _get_domain_range_from_sample_range( - sample_range, - dim_domain, -): + sample_range: DomainRange, + dim_domain: int, +) -> DomainRange: ranges = [] for dim in range(dim_domain): min_argument = min([x[dim][0] for x in sample_range]) @@ -113,22 +115,22 @@ class FDataIrregular(FData): # noqa: WPS214 allowing basic operations, representation and conversion to basis format. Attributes: - functional_indices: a unidimensional array which stores the index of + functional_indices: A unidimensional array which stores the index of the functional_values and functional_values arrays where the data of each individual curve of the sample begins. - functional_arguments: an array of every argument of the domain for + functional_arguments: An array of every argument of the domain for every curve in the sample. Each row contains an observation. - functional_values: an array of every value of the codomain for + functional_values: An array of every value of the codomain for every curve in the sample. Each row contains an observation. domain_range: 2 dimension matrix where each row contains the bounds of the interval in which the functional data is considered to exist for each one of the axes. - dataset_name: name of the dataset. - argument_names: tuple containing the names of the different + dataset_name: Name of the dataset. + argument_names: Tuple containing the names of the different arguments. - coordinate_names: tuple containing the names of the different + coordinate_names: Tuple containing the names of the different coordinate functions. - extrapolation: defines the default type of + extrapolation: Defines the default type of extrapolation. By default None, which does not apply any type of extrapolation. See `Extrapolation` for detailled information of the types of extrapolation. From cbc03a9ef61fe736b6b2b4ec9e2d28dcfcee162b Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 13:30:13 +0200 Subject: [PATCH 078/144] Use asarray --- skfda/representation/irregular.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 2d6af8921..1fccd4fcb 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -222,11 +222,11 @@ def __init__( # noqa: WPS211 coordinate_names: Optional[LabelTupleLike] = None, ): """Construct a FDataIrregular object.""" - self.function_indices = np.array(function_indices) - self.function_arguments = np.array(function_arguments) + self.function_indices = np.asarray(function_indices) + self.function_arguments = np.asarray(function_arguments) if len(self.function_arguments.shape) == 1: self.function_arguments = self.function_arguments.reshape(-1, 1) - self.function_values = np.array(function_values) + self.function_values = np.asarray(function_values) if len(self.function_values.shape) == 1: self.function_values = self.function_values.reshape(-1, 1) From 810b5b0347f322a169232fbdeabc777dbc4886c9 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 13:43:12 +0200 Subject: [PATCH 079/144] Remove unnecessary attributes: * `num_functions` was the same as `n_samples` * remove `_dim_(co)_domain`, they can be computed in property --- skfda/representation/irregular.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 1fccd4fcb..e186578b5 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -230,13 +230,6 @@ def __init__( # noqa: WPS211 if len(self.function_values.shape) == 1: self.function_values = self.function_values.reshape(-1, 1) - # Set dimensions - self._dim_domain = self.function_arguments.shape[1] - self._dim_codomain = self.function_values.shape[1] - - # Set structure to given data - self.num_functions = self.function_indices.shape[0] - if self.function_arguments.shape[0] != self.function_values.shape[0]: raise ValueError( "Dimension mismatch in function_arguments and function_values", @@ -509,11 +502,11 @@ def sample_points(self) -> GridPoints: @property def dim_domain(self) -> int: - return self._dim_domain + return self.function_arguments.shape[1] @property def dim_codomain(self) -> int: - return self._dim_codomain + return self.function_values.shape[1] @property def coordinates(self: T) -> _IrregularCoordinateIterator[T]: @@ -521,7 +514,7 @@ def coordinates(self: T) -> _IrregularCoordinateIterator[T]: @property def n_samples(self) -> int: - return self.num_functions + return self.function_indices.shape[0] @property def sample_range(self) -> DomainRange: @@ -998,9 +991,9 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: x.check_same_dimensions(y) # Allocate all required memory - total_functions = self.num_functions + sum( + total_functions = self.n_samples + sum( [ - o.num_functions + o.n_samples for o in others ], ) @@ -1024,7 +1017,7 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: # Add samples sequentially for f_data in [self] + list(others): function_indices[ - index:index + f_data.num_functions + index:index + f_data.n_samples ] = f_data.function_indices function_args[ head:head + f_data.num_observations @@ -1033,8 +1026,8 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: head:head + f_data.num_observations ] = f_data.function_values # Adjust pointers to the concatenated array - function_indices[index:index + f_data.num_functions] += head - index += f_data.num_functions + function_indices[index:index + f_data.n_samples] += head + index += f_data.n_samples head += f_data.num_observations total_sample_names = total_sample_names + list(f_data.sample_names) @@ -1441,11 +1434,11 @@ def __getitem__( ) -> T: required_slices = [] key = _check_array_key(self.function_indices, key) - indices = range(self.num_functions) + indices = range(self.n_samples) required_indices = indices[key] for i in required_indices: next_index = None - if i + 1 < self.num_functions: + if i + 1 < self.n_samples: next_index = self.function_indices[i + 1] s = slice(self.function_indices[i], next_index) required_slices.append(s) From 598d5acdd864b79ec7f6b3c044a660e3451c5eb1 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 13:45:37 +0200 Subject: [PATCH 080/144] Not implemented error --- skfda/representation/irregular.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index e186578b5..743002895 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -722,7 +722,7 @@ def cov(self: T) -> T: FDataIrregular with the covariance function. """ # TODO Implementation to be decided - pass + raise NotImplementedError() def equals(self, other: object) -> bool: """Comparison of FDataIrregular objects.""" From 3d9329ecfad5c7d3bc0bc148af09825bde1be4af Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 13:56:57 +0200 Subject: [PATCH 081/144] Remove num_observations: * Converted to property. * Renamed as n_measurements --- .../visualization/representation.py | 2 +- skfda/representation/irregular.py | 56 ++++++++++--------- skfda/tests/test_irregular.py | 8 +-- 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index c0ac810f2..f02e90bcd 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -629,7 +629,7 @@ def __init__( # noqa: WPS211 self.evaluated_points = [] indices = np.append( self.fdata.function_indices, - self.fdata.num_observations, + self.fdata.n_measurements, ) for index_start, index_end in zip( indices, diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 743002895..f6cd36405 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -115,12 +115,12 @@ class FDataIrregular(FData): # noqa: WPS214 allowing basic operations, representation and conversion to basis format. Attributes: - functional_indices: A unidimensional array which stores the index of + function_indices: A unidimensional array which stores the index of the functional_values and functional_values arrays where the data of each individual curve of the sample begins. - functional_arguments: An array of every argument of the domain for + function_arguments: An array of every argument of the domain for every curve in the sample. Each row contains an observation. - functional_values: An array of every value of the codomain for + function_values: An array of every value of the codomain for every curve in the sample. Each row contains an observation. domain_range: 2 dimension matrix where each row contains the bounds of the interval in which the functional data @@ -235,9 +235,7 @@ def __init__( # noqa: WPS211 "Dimension mismatch in function_arguments and function_values", ) - self.num_observations = self.function_arguments.shape[0] - - if max(self.function_indices) >= self.num_observations: + if max(self.function_indices) >= self.n_measurements: raise ValueError("Index in function_indices out of bounds") # Ensure arguments are in order within each function @@ -316,16 +314,16 @@ def from_dataframe( coordinate_columns = [coordinate_columns] # Obtain num functions and num observations from data - num_observations = dataframe.shape[0] + n_measurements = dataframe.shape[0] num_functions = dataframe[id_column].nunique() # Create data structure of function pointers and coordinates function_indices = np.zeros((num_functions, ), dtype=np.uint32) function_arguments = np.zeros( - (num_observations, len(argument_columns)), + (n_measurements, len(argument_columns)), ) function_values = np.zeros( - (num_observations, len(coordinate_columns)), + (n_measurements, len(coordinate_columns)), ) head = 0 @@ -371,16 +369,16 @@ def from_datagrid( as the source but with an irregular structure. """ # Obtain num functions and num observations from data - num_observations = np.sum(~(np.isnan(f_data.data_matrix).all(axis=-1))) + n_measurements = np.sum(~(np.isnan(f_data.data_matrix).all(axis=-1))) num_functions = f_data.data_matrix.shape[0] # Create data structure of function pointers and coordinates function_indices = np.zeros((num_functions, ), dtype=np.uint32) function_arguments = np.zeros( - (num_observations, f_data.dim_domain), + (n_measurements, f_data.dim_domain), ) function_values = np.zeros( - (num_observations, f_data.dim_codomain), + (n_measurements, f_data.dim_codomain), ) # Find all the combinations of grid points and indices @@ -428,7 +426,7 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: """ indices_start_end = np.append( self.function_indices, - self.num_observations, + self.n_measurements, ) slices = list(zip(indices_start_end, indices_start_end[1:])) @@ -516,6 +514,10 @@ def coordinates(self: T) -> _IrregularCoordinateIterator[T]: def n_samples(self) -> int: return self.function_indices.shape[0] + @property + def n_measurements(self) -> int: + return self.function_arguments.shape[0] + @property def sample_range(self) -> DomainRange: """ @@ -774,7 +776,7 @@ def _get_op_matrix( # noqa: WPS212 values_after = np.concatenate( ( self.function_indices, - np.array([self.num_observations]), + np.array([self.n_measurements]), ), ) @@ -806,7 +808,7 @@ def _get_op_matrix( # noqa: WPS212 values_after = np.concatenate( ( self.function_indices, - np.array([self.num_observations]), + np.array([self.n_measurements]), ), ) @@ -997,9 +999,9 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: for o in others ], ) - total_values = self.num_observations + sum( + total_values = self.n_measurements + sum( [ - o.num_observations + o.n_measurements for o in others ], ) @@ -1020,15 +1022,15 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: index:index + f_data.n_samples ] = f_data.function_indices function_args[ - head:head + f_data.num_observations + head:head + f_data.n_measurements ] = f_data.function_arguments function_values[ - head:head + f_data.num_observations + head:head + f_data.n_measurements ] = f_data.function_values # Adjust pointers to the concatenated array function_indices[index:index + f_data.n_samples] += head index += f_data.n_samples - head += f_data.num_observations + head += f_data.n_measurements total_sample_names = total_sample_names + list(f_data.sample_names) # Check domain range @@ -1172,7 +1174,7 @@ def to_matrix(self) -> ArrayLike: # Fill with each function next_indices = np.append( self.function_indices, - self.num_observations, + self.n_measurements, ) for i, index in enumerate(self.function_indices): @@ -1303,7 +1305,7 @@ def restrict( # noqa: WPS210 # Must also modify function indices to point to new array iterable_indices = np.append( self.function_indices, - self.num_observations, + self.n_measurements, ) for i, index_tuple in enumerate(zip( @@ -1459,7 +1461,7 @@ def __getitem__( chunk_sizes = np.array( [ s.stop - s.start if s.stop is not None - else self.num_observations - s.start + else self.n_measurements - s.start for s in required_slices ], ) @@ -1605,7 +1607,6 @@ def __init__( self.function_indices = function_indices self.function_arguments = function_arguments self.dim_domain = function_arguments.shape[1] - self.num_observations = len(function_arguments) if domain_range is None: sample_range = _get_sample_range_from_data( @@ -1621,6 +1622,11 @@ def __init__( self.domain_range = validate_domain_range(domain_range) self.dim_codomain = dim_codomain + @property + def n_measurements(self) -> int: + """Number of measurements.""" + return self.function_arguments.shape[0] + @classmethod def construct_array_type(cls) -> Type[FDataIrregular]: # noqa: D102 return FDataIrregular @@ -1628,7 +1634,7 @@ def construct_array_type(cls) -> Type[FDataIrregular]: # noqa: D102 def _na_repr(self) -> FDataIrregular: shape = ( - (self.num_observations,) + (self.n_measurements,) + (self.dim_codomain,) ) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index ac5813ce0..0f4a4397f 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -178,7 +178,7 @@ def test_fdatairregular_init( arguments = fdatairregular.function_arguments assert fdatairregular is not None assert len(fdatairregular) == len(fdatairregular.function_indices) - assert len(arguments) == fdatairregular.num_observations + assert len(arguments) == fdatairregular.n_measurements def test_fdatairregular_copy( @@ -227,7 +227,7 @@ def test_fdatairregular_copy_kwargs( # Check everything equal except specified kwarg assert len(f_data_copy) == len(fdatairregular) - assert f_data_copy.num_observations == fdatairregular.num_observations + assert f_data_copy.n_measurements == fdatairregular.n_measurements assert f_data_copy.dim_domain == fdatairregular.dim_domain assert f_data_copy.dim_domain == fdatairregular.dim_codomain assert og_attribute != copy_attribute @@ -350,14 +350,14 @@ def test_fdatairregular_concatenate( function_indices_halves = np.split(fd_concat.function_indices, 2) indices = fdatairregular.function_indices - second_half_indices = indices + fdatairregular.num_observations + second_half_indices = indices + fdatairregular.n_measurements function_args_halves = np.split(fd_concat.function_arguments, 2) function_values_halves = np.split(fd_concat.function_values, 2) assert len(fd_concat) == 2 * len(fdatairregular) assert np.all(function_indices_halves[1] == second_half_indices) - assert fd_concat.num_observations == 2 * fdatairregular.num_observations + assert fd_concat.n_measurements == 2 * fdatairregular.n_measurements assert np.all(function_args_halves[1] == fdatairregular.function_arguments) assert np.all(function_values_halves[1] == fdatairregular.function_values) From a78673a83cad664a39f1aaeea1878377d4e46d92 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 13:59:57 +0200 Subject: [PATCH 082/144] Not implemented errors --- skfda/representation/irregular.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f6cd36405..1e6305ba4 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -581,7 +581,7 @@ def derivative( Returns: FDataIrregular with the derivative of the dataset. """ - pass + raise NotImplementedError() def integrate( self: T, @@ -597,10 +597,10 @@ def integrate( Returns: FDataIrregular with the integral. """ - pass + raise NotImplementedError() def check_same_dimensions(self: T, other: T) -> None: - """Ensure that other FDataIrregular object ahs compatible dimensions. + """Ensure that other FDataIrregular object has compatible dimensions. Args: other (T): FDataIrregular object to compare dimensions From 2d988309bfc6e38e9774cf83b83de99e93e80bc1 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 14:05:05 +0200 Subject: [PATCH 083/144] from fdatagrid --- skfda/representation/irregular.py | 2 +- skfda/tests/test_irregular.py | 6 +++--- skfda/tests/test_irregular_operations.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 1e6305ba4..7b7a61716 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -353,7 +353,7 @@ def from_dataframe( ) @classmethod - def from_datagrid( + def from_fdatagrid( cls: Type[T], f_data: FDataGrid, **kwargs, diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 0f4a4397f..375fde0ed 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -242,7 +242,7 @@ def test_fdatairregular_from_fdatagrid( fdatagrid (FDataGrid): FDataGrid object. Can be dense or sparse (contain NaNs) """ - f_data_irreg = FDataIrregular.from_datagrid(fdatagrid) + f_data_irreg = FDataIrregular.from_fdatagrid(fdatagrid) assert f_data_irreg is not None assert len(f_data_irreg) == len(fdatagrid) @@ -433,9 +433,9 @@ def test_fdatairregular_to_grid( f_data_grid = fdatairregular.to_grid() # FDataGrid -> FDataIrregular -> FDataGrid - assert fdatagrid.equals(FDataIrregular.from_datagrid(fdatagrid).to_grid()) + assert fdatagrid.equals(FDataIrregular.from_fdatagrid(fdatagrid).to_grid()) # FDataIrregular -> FDataGrid -> FDataIrregular - assert fdatairregular.equals(FDataIrregular.from_datagrid(f_data_grid)) + assert fdatairregular.equals(FDataIrregular.from_fdatagrid(f_data_grid)) def test_fdatairregular_isna( diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index a1e3dd705..c30fbb750 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -799,7 +799,7 @@ def test_fdatairregular_to_basis_consistency( fdatagrid (FDataGrid): FDataGrid object all_basis (Basis): FDataBasis object. """ - fd_irregular = FDataIrregular.from_datagrid(fdatagrid) + fd_irregular = FDataIrregular.from_fdatagrid(fdatagrid) if fd_irregular.dim_domain == 1: basis = all_basis( From 8091e78226dbd9587cdb4d2efa2f5dc81ba871d5 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 14:08:39 +0200 Subject: [PATCH 084/144] cls not typed --- skfda/representation/irregular.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 7b7a61716..b6cbc7150 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -277,7 +277,7 @@ def __init__( # noqa: WPS211 @classmethod def from_dataframe( - cls: Type[T], + cls, dataframe: pandas.DataFrame, id_column: str, argument_columns: LabelTupleLike, @@ -507,7 +507,7 @@ def dim_codomain(self) -> int: return self.function_values.shape[1] @property - def coordinates(self: T) -> _IrregularCoordinateIterator[T]: + def coordinates(self) -> _IrregularCoordinateIterator[T]: return _IrregularCoordinateIterator(self) @property From 15454a67e8cab0f3ebf608cd7e8b417ce8e6af7f Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 14:11:24 +0200 Subject: [PATCH 085/144] Typing --- skfda/representation/irregular.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index b6cbc7150..549712def 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -280,8 +280,8 @@ def from_dataframe( cls, dataframe: pandas.DataFrame, id_column: str, - argument_columns: LabelTupleLike, - coordinate_columns: LabelTupleLike, + argument_columns: Sequence[str | None], + coordinate_columns: Sequence[str | None], **kwargs, ) -> FDataIrregular: """Create a FDataIrregular object from a pandas dataframe. @@ -296,9 +296,9 @@ def from_dataframe( irregular functional dataset. id_column (str): Name of the column which contains the information about which curve does each each row belong to. - argument_columns (LabelTupleLike): list of columns where + argument_columns (Sequence[str | None]): list of columns where the arguments for each dimension of the domain can be found. - coordinate_columns (LabelTupleLike): list of columns where + coordinate_columns (Sequence[str | None]): list of columns where the values for each dimension of the image can be found. kwargs: Arguments for the FDataIrregular constructor. From 10479ef2e5633f9cb895e77f17abb47540ba94d3 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 14:12:07 +0200 Subject: [PATCH 086/144] Typing --- skfda/representation/irregular.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 549712def..bdc985fcc 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -282,7 +282,7 @@ def from_dataframe( id_column: str, argument_columns: Sequence[str | None], coordinate_columns: Sequence[str | None], - **kwargs, + **kwargs: Any, ) -> FDataIrregular: """Create a FDataIrregular object from a pandas dataframe. From bdc7858bf524f8a001cc6cc1d44aee181615b7e4 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 14:13:35 +0200 Subject: [PATCH 087/144] Remove sample_points (old) --- skfda/representation/irregular.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index bdc985fcc..eb6e59e08 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -489,15 +489,6 @@ def round( function_values=rounded_values, ) - @property - def sample_points(self) -> GridPoints: - warnings.warn( - "Parameter sample_points is deprecated. Use the " - "parameter grid_points instead.", - DeprecationWarning, - ) - return self.grid_points - @property def dim_domain(self) -> int: return self.function_arguments.shape[1] From 6d15f066113552b9a89766a39bfff786613e6729 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 14:23:44 +0200 Subject: [PATCH 088/144] Fix __eq__ --- skfda/representation/irregular.py | 4 +++- skfda/tests/test_irregular.py | 2 +- skfda/tests/test_irregular_operations.py | 16 ++++++++++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index eb6e59e08..f6a65e64a 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -744,7 +744,9 @@ def _eq_elemenwise(self: T, other: T) -> NDArrayBool: ) def __eq__(self, other: object) -> NDArrayBool: - return self.equals(other) + return np.array([ + f.equals(o) for f, o in zip(self, other) + ]) def _get_op_matrix( # noqa: WPS212 self, diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 375fde0ed..4313acaaa 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -190,7 +190,7 @@ def test_fdatairregular_copy( fdatairregular (FDataIrregular): FDataIrregular object which can be unidimensional or multidimensional. """ - assert fdatairregular == fdatairregular.copy() + assert np.all(fdatairregular == fdatairregular.copy()) @pytest.mark.parametrize("kwargs", COPY_KWARGS) diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index c30fbb750..9d63176ab 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -351,7 +351,9 @@ def test_fdatairregular_arithmetic_sum_commutative( # noqa: WPS118 # Account for single curve test other_1d = self._single_curve(fdatairregular_1d, other_1d) - assert fdatairregular_1d + other_1d == other_1d + fdatairregular_1d + assert np.all( + (fdatairregular_1d + other_1d) == (other_1d + fdatairregular_1d), + ) def test_fdatairregular_arithmetic_sub( self, @@ -447,7 +449,9 @@ def test_fdatairregular_arithmetic_mul_commutative( # noqa: WPS118 # Account for single curve test other_1d = self._single_curve(fdatairregular_1d, other_1d) - assert fdatairregular_1d * other_1d == other_1d * fdatairregular_1d + assert np.all( + (fdatairregular_1d * other_1d) == (other_1d * fdatairregular_1d), + ) def test_fdatairregular_arithmetic_div( self, @@ -567,7 +571,9 @@ def test_fdatairregular_arithmetic_sum_commutative( # noqa: WPS118 # Account for single curve test other_2d = self._single_curve(fdatairregular_2d, other_2d) - assert fdatairregular_2d + other_2d == other_2d + fdatairregular_2d + assert np.all( + (fdatairregular_2d + other_2d) == (other_2d + fdatairregular_2d), + ) def test_fdatairregular_arithmetic_sub( self, @@ -663,7 +669,9 @@ def test_fdatairregular_arithmetic_mul_commutative( # noqa: WPS118 # Account for single curve test other_2d = self._single_curve(fdatairregular_2d, other_2d) - assert fdatairregular_2d * other_2d == other_2d * fdatairregular_2d + assert np.all( + (fdatairregular_2d * other_2d) == (other_2d * fdatairregular_2d), + ) def test_fdatairregular_arithmetic_div( self, From 8f1ea022adebe114a25905e79ac8d52010ed8616 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 28 Sep 2023 14:36:27 +0200 Subject: [PATCH 089/144] indices_start_end function --- .../visualization/representation.py | 9 +-------- skfda/representation/irregular.py | 20 +++++++++++-------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index f02e90bcd..d8e7130c1 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -627,14 +627,7 @@ def __init__( # noqa: WPS211 # There may be different points for each function self.grid_points = [] self.evaluated_points = [] - indices = np.append( - self.fdata.function_indices, - self.fdata.n_measurements, - ) - for index_start, index_end in zip( - indices, - indices[1:], - ): + for index_start, index_end in self.fdata.indices_start_end(): self.grid_points.append( self.fdata.function_arguments[index_start:index_end], ) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f6a65e64a..90a620fda 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1296,15 +1296,8 @@ def restrict( # noqa: WPS210 # Eliminate points outside the new range. # Must also modify function indices to point to new array - iterable_indices = np.append( - self.function_indices, - self.n_measurements, - ) - for i, index_tuple in enumerate(zip( - iterable_indices, - iterable_indices[1:], - )): + for i, index_tuple in enumerate(self.indices_start_end()): prev_index, index = index_tuple s = slice(prev_index, index) masks = set(range(self.function_arguments[s].shape[0])) @@ -1423,6 +1416,17 @@ def __repr__(self) -> str: '\n ', ) + def indices_start_end(self) -> Sequence[Tuple[int, int]]: + """Return the indices of the start and end of each function. + + Returns: + Sequence[Tuple[int, int]]: Sequence of tuples with the indices of + the start and end of each function. + + """ + indices = np.append(self.function_indices, self.n_measurements) + return list(zip(indices, indices[1:])) + def __getitem__( self: T, key: Union[int, slice, NDArrayInt, NDArrayBool], From 75085818a1e0b78ed858a0188a6d7e479e6b9045 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Fri, 6 Oct 2023 08:58:46 +0200 Subject: [PATCH 090/144] Comments in _get_sample_range_from_data --- skfda/representation/irregular.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 90a620fda..bf8bbe833 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -43,13 +43,15 @@ def _get_sample_range_from_data( function_arguments: NDArrayFloat, dim_domain: int, ) -> DomainRange: - dim_ranges = [] # sample range for each dimension + dim_ranges = [] # Sample range for each dimension for dim in range(dim_domain): i = 0 # Sample range for each function in current dimension: dim_sample_ranges = [] for f in function_indices[1:]: + # i, f: first and last+1 index of the current function. + # Get min and max argument for the current function and dimension. min_argument = min( [function_arguments[j][dim] for j in range(i, f)], ) @@ -61,23 +63,24 @@ def _get_sample_range_from_data( ) i = f + # Get min and max argument for the last function and dimension. min_argument = min( [ function_arguments[i + j][dim] for j in range(function_arguments.shape[0] - i) ], ) - max_argument = max( [ function_arguments[i + j][dim] for j in range(function_arguments.shape[0] - i) ], ) - dim_sample_ranges.append( (min_argument, max_argument), ) + + # Append sample ranges for current dimension dim_ranges.append(dim_sample_ranges) sample_range = [] From 71d08a6d165406a370ae5ab63ba3164c6bcc6d1d Mon Sep 17 00:00:00 2001 From: pcuestas Date: Fri, 6 Oct 2023 09:12:03 +0200 Subject: [PATCH 091/144] Remove property FDataIrregular.n_measurements --- skfda/representation/irregular.py | 42 +++++++++++-------------------- skfda/tests/test_irregular.py | 14 ++++++++--- 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index bf8bbe833..62597c983 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -238,7 +238,7 @@ def __init__( # noqa: WPS211 "Dimension mismatch in function_arguments and function_values", ) - if max(self.function_indices) >= self.n_measurements: + if max(self.function_indices) >= len(self.function_arguments): raise ValueError("Index in function_indices out of bounds") # Ensure arguments are in order within each function @@ -427,12 +427,7 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ - indices_start_end = np.append( - self.function_indices, - self.n_measurements, - ) - - slices = list(zip(indices_start_end, indices_start_end[1:])) + slices = self.indices_start_end() slice_args = [self.function_arguments[slice(*s)] for s in slices] slice_values = [self.function_values[slice(*s)] for s in slices] @@ -508,10 +503,6 @@ def coordinates(self) -> _IrregularCoordinateIterator[T]: def n_samples(self) -> int: return self.function_indices.shape[0] - @property - def n_measurements(self) -> int: - return self.function_arguments.shape[0] - @property def sample_range(self) -> DomainRange: """ @@ -772,7 +763,7 @@ def _get_op_matrix( # noqa: WPS212 values_after = np.concatenate( ( self.function_indices, - np.array([self.n_measurements]), + np.array([len(self.function_arguments)]), ), ) @@ -804,7 +795,7 @@ def _get_op_matrix( # noqa: WPS212 values_after = np.concatenate( ( self.function_indices, - np.array([self.n_measurements]), + np.array([len(self.function_arguments)]), ), ) @@ -995,9 +986,9 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: for o in others ], ) - total_values = self.n_measurements + sum( + total_values = len(self.function_arguments) + sum( [ - o.n_measurements + len(o.function_arguments) for o in others ], ) @@ -1018,15 +1009,15 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: index:index + f_data.n_samples ] = f_data.function_indices function_args[ - head:head + f_data.n_measurements + head:head + len(f_data.function_arguments) ] = f_data.function_arguments function_values[ - head:head + f_data.n_measurements + head:head + len(f_data.function_arguments) ] = f_data.function_values # Adjust pointers to the concatenated array function_indices[index:index + f_data.n_samples] += head index += f_data.n_samples - head += f_data.n_measurements + head += len(f_data.function_arguments) total_sample_names = total_sample_names + list(f_data.sample_names) # Check domain range @@ -1170,7 +1161,7 @@ def to_matrix(self) -> ArrayLike: # Fill with each function next_indices = np.append( self.function_indices, - self.n_measurements, + len(self.function_arguments), ) for i, index in enumerate(self.function_indices): @@ -1427,7 +1418,9 @@ def indices_start_end(self) -> Sequence[Tuple[int, int]]: the start and end of each function. """ - indices = np.append(self.function_indices, self.n_measurements) + indices = np.append( + self.function_indices, len(self.function_arguments) + ) return list(zip(indices, indices[1:])) def __getitem__( @@ -1461,7 +1454,7 @@ def __getitem__( chunk_sizes = np.array( [ s.stop - s.start if s.stop is not None - else self.n_measurements - s.start + else len(self.function_arguments) - s.start for s in required_slices ], ) @@ -1622,11 +1615,6 @@ def __init__( self.domain_range = validate_domain_range(domain_range) self.dim_codomain = dim_codomain - @property - def n_measurements(self) -> int: - """Number of measurements.""" - return self.function_arguments.shape[0] - @classmethod def construct_array_type(cls) -> Type[FDataIrregular]: # noqa: D102 return FDataIrregular @@ -1634,7 +1622,7 @@ def construct_array_type(cls) -> Type[FDataIrregular]: # noqa: D102 def _na_repr(self) -> FDataIrregular: shape = ( - (self.n_measurements,) + (len(self.function_arguments),) + (self.dim_codomain,) ) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 4313acaaa..c5c5ddef8 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -178,7 +178,7 @@ def test_fdatairregular_init( arguments = fdatairregular.function_arguments assert fdatairregular is not None assert len(fdatairregular) == len(fdatairregular.function_indices) - assert len(arguments) == fdatairregular.n_measurements + assert len(arguments) == len(fdatairregular.function_values) def test_fdatairregular_copy( @@ -227,7 +227,10 @@ def test_fdatairregular_copy_kwargs( # Check everything equal except specified kwarg assert len(f_data_copy) == len(fdatairregular) - assert f_data_copy.n_measurements == fdatairregular.n_measurements + assert ( + len(f_data_copy.function_arguments) + == len(fdatairregular.function_arguments) + ) assert f_data_copy.dim_domain == fdatairregular.dim_domain assert f_data_copy.dim_domain == fdatairregular.dim_codomain assert og_attribute != copy_attribute @@ -350,14 +353,17 @@ def test_fdatairregular_concatenate( function_indices_halves = np.split(fd_concat.function_indices, 2) indices = fdatairregular.function_indices - second_half_indices = indices + fdatairregular.n_measurements + second_half_indices = indices + len(fdatairregular.function_arguments) function_args_halves = np.split(fd_concat.function_arguments, 2) function_values_halves = np.split(fd_concat.function_values, 2) assert len(fd_concat) == 2 * len(fdatairregular) assert np.all(function_indices_halves[1] == second_half_indices) - assert fd_concat.n_measurements == 2 * fdatairregular.n_measurements + assert ( + len(fd_concat.function_arguments) + == 2 * len(fdatairregular.function_arguments) + ) assert np.all(function_args_halves[1] == fdatairregular.function_arguments) assert np.all(function_values_halves[1] == fdatairregular.function_values) From 742ecc14162a25dcbc80b06e00813f7144397bc4 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Fri, 6 Oct 2023 09:23:18 +0200 Subject: [PATCH 092/144] Rename FDataIrregular attributes: `function_`: * `function_indices` to `start_indices` * `function_arguments` to `points` * `function_values` to `values` --- .../visualization/representation.py | 4 +- skfda/preprocessing/smoothing/_basis.py | 8 +- skfda/representation/irregular.py | 414 +++++++++--------- skfda/tests/test_irregular.py | 42 +- skfda/tests/test_irregular_operations.py | 80 ++-- 5 files changed, 274 insertions(+), 274 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index d8e7130c1..fc1c7e1fb 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -629,10 +629,10 @@ def __init__( # noqa: WPS211 self.evaluated_points = [] for index_start, index_end in self.fdata.indices_start_end(): self.grid_points.append( - self.fdata.function_arguments[index_start:index_end], + self.fdata.points[index_start:index_end], ) self.evaluated_points.append( - self.fdata.function_values[index_start:index_end], + self.fdata.values[index_start:index_end], ) self.domain_range = domain_range diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index 3efd5a668..740dbece1 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -416,7 +416,7 @@ def fit( self """ - self.input_points_ = X.function_arguments + self.input_points_ = X.points self.output_points_ = ( self.output_points if self.output_points is not None @@ -447,14 +447,14 @@ def transform( assert all( np.array_equal(i, s) for i, s in zip( self.input_points_, - X.function_arguments, + X.points, ) ) if self.return_basis: coefficients = self._coef_matrix( - input_points=X.function_arguments, - function_values=X.function_values, + input_points=X.points, + function_values=X.values, ).T return FDataBasis( diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 62597c983..fb460e516 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -39,8 +39,8 @@ def _get_sample_range_from_data( - function_indices: NDArrayInt, - function_arguments: NDArrayFloat, + start_indices: NDArrayInt, + points: NDArrayFloat, dim_domain: int, ) -> DomainRange: dim_ranges = [] # Sample range for each dimension @@ -49,14 +49,14 @@ def _get_sample_range_from_data( # Sample range for each function in current dimension: dim_sample_ranges = [] - for f in function_indices[1:]: + for f in start_indices[1:]: # i, f: first and last+1 index of the current function. # Get min and max argument for the current function and dimension. min_argument = min( - [function_arguments[j][dim] for j in range(i, f)], + [points[j][dim] for j in range(i, f)], ) max_argument = max( - [function_arguments[j][dim] for j in range(i, f)], + [points[j][dim] for j in range(i, f)], ) dim_sample_ranges.append( ((min_argument, max_argument)), @@ -66,14 +66,14 @@ def _get_sample_range_from_data( # Get min and max argument for the last function and dimension. min_argument = min( [ - function_arguments[i + j][dim] - for j in range(function_arguments.shape[0] - i) + points[i + j][dim] + for j in range(points.shape[0] - i) ], ) max_argument = max( [ - function_arguments[i + j][dim] - for j in range(function_arguments.shape[0] - i) + points[i + j][dim] + for j in range(points.shape[0] - i) ], ) dim_sample_ranges.append( @@ -118,12 +118,12 @@ class FDataIrregular(FData): # noqa: WPS214 allowing basic operations, representation and conversion to basis format. Attributes: - function_indices: A unidimensional array which stores the index of + start_indices: A unidimensional array which stores the index of the functional_values and functional_values arrays where the data of each individual curve of the sample begins. - function_arguments: An array of every argument of the domain for + points: An array of every argument of the domain for every curve in the sample. Each row contains an observation. - function_values: An array of every value of the codomain for + values: An array of every value of the codomain for every curve in the sample. Each row contains an observation. domain_range: 2 dimension matrix where each row contains the bounds of the interval in which the functional data @@ -150,13 +150,13 @@ class FDataIrregular(FData): # noqa: WPS214 >>> values = [[1], [2], [3], [4], [5]] >>> FDataIrregular(indices, arguments, values) FDataIrregular( - function_indices=array([0, 2]), - function_arguments=array([[1], + start_indices=array([0, 2]), + points=array([[1], [2], [3], [4], [5]]), - function_values=array([[1], + values=array([[1], [2], [3], [4], @@ -174,8 +174,8 @@ class FDataIrregular(FData): # noqa: WPS214 .... ValueError: Dimension mismatch ... - The indices in function_indices must point to correct rows - in function_arguments and function_values. + The indices in start_indices must point to correct rows + in points and values. >>> indices = [0,7] >>> arguments = np.arange(5).reshape(-1, 1) @@ -183,7 +183,7 @@ class FDataIrregular(FData): # noqa: WPS214 >>> FDataIrregular(indices, arguments, values) Traceback (most recent call last): .... - ValueError: Index in function_indices out of bounds... + ValueError: Index in start_indices out of bounds... FDataIrregular supports higher dimensional data both in the domain and in the codomain (image). @@ -212,9 +212,9 @@ class FDataIrregular(FData): # noqa: WPS214 def __init__( # noqa: WPS211 self, - function_indices: ArrayLike, - function_arguments: ArrayLike, - function_values: ArrayLike, + start_indices: ArrayLike, + points: ArrayLike, + values: ArrayLike, *, domain_range: Optional[DomainRangeLike] = None, dataset_name: Optional[str] = None, @@ -225,30 +225,30 @@ def __init__( # noqa: WPS211 coordinate_names: Optional[LabelTupleLike] = None, ): """Construct a FDataIrregular object.""" - self.function_indices = np.asarray(function_indices) - self.function_arguments = np.asarray(function_arguments) - if len(self.function_arguments.shape) == 1: - self.function_arguments = self.function_arguments.reshape(-1, 1) - self.function_values = np.asarray(function_values) - if len(self.function_values.shape) == 1: - self.function_values = self.function_values.reshape(-1, 1) - - if self.function_arguments.shape[0] != self.function_values.shape[0]: + self.start_indices = np.asarray(start_indices) + self.points = np.asarray(points) + if len(self.points.shape) == 1: + self.points = self.points.reshape(-1, 1) + self.values = np.asarray(values) + if len(self.values.shape) == 1: + self.values = self.values.reshape(-1, 1) + + if self.points.shape[0] != self.values.shape[0]: raise ValueError( - "Dimension mismatch in function_arguments and function_values", + "Dimension mismatch in points and values", ) - if max(self.function_indices) >= len(self.function_arguments): - raise ValueError("Index in function_indices out of bounds") + if max(self.start_indices) >= len(self.points): + raise ValueError("Index in start_indices out of bounds") # Ensure arguments are in order within each function sorted_arguments, sorted_values = self._sort_by_arguments() - self.function_arguments = sorted_arguments - self.function_values = sorted_values + self.points = sorted_arguments + self.values = sorted_values self._sample_range = _get_sample_range_from_data( - self.function_indices, - self.function_arguments, + self.start_indices, + self.points, self.dim_domain, ) @@ -321,37 +321,37 @@ def from_dataframe( num_functions = dataframe[id_column].nunique() # Create data structure of function pointers and coordinates - function_indices = np.zeros((num_functions, ), dtype=np.uint32) - function_arguments = np.zeros( + start_indices = np.zeros((num_functions, ), dtype=np.uint32) + points = np.zeros( (n_measurements, len(argument_columns)), ) - function_values = np.zeros( + values = np.zeros( (n_measurements, len(coordinate_columns)), ) head = 0 index = 0 for _, f_values in dataframe.groupby(id_column): - function_indices[index] = head + start_indices[index] = head num_values = f_values.shape[0] # Insert in order f_values = f_values.sort_values(argument_columns) new_args = f_values[argument_columns].values - function_arguments[head:head + num_values, :] = new_args + points[head:head + num_values, :] = new_args new_coords = f_values[coordinate_columns].values - function_values[head:head + num_values, :] = new_coords + values[head:head + num_values, :] = new_coords # Update head and index head += num_values index += 1 return cls( - function_indices, - function_arguments, - function_values, + start_indices, + points, + values, **kwargs, ) @@ -376,11 +376,11 @@ def from_fdatagrid( num_functions = f_data.data_matrix.shape[0] # Create data structure of function pointers and coordinates - function_indices = np.zeros((num_functions, ), dtype=np.uint32) - function_arguments = np.zeros( + start_indices = np.zeros((num_functions, ), dtype=np.uint32) + points = np.zeros( (n_measurements, f_data.dim_domain), ) - function_values = np.zeros( + values = np.zeros( (n_measurements, f_data.dim_codomain), ) @@ -395,7 +395,7 @@ def from_fdatagrid( head = 0 for i in range(num_functions): - function_indices[i] = head + start_indices[i] = head num_values = 0 for g_index, g in enumerate(index_combinations): @@ -405,17 +405,17 @@ def from_fdatagrid( arg = combinations[g_index] value = f_data.data_matrix[(i, ) + g] - function_arguments[head + num_values, :] = arg - function_values[head + num_values, :] = value + points[head + num_values, :] = arg + values[head + num_values, :] = value num_values += 1 head += num_values return cls( - function_indices, - function_arguments, - function_values, + start_indices, + points, + values, **kwargs, ) @@ -428,8 +428,8 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ slices = self.indices_start_end() - slice_args = [self.function_arguments[slice(*s)] for s in slices] - slice_values = [self.function_values[slice(*s)] for s in slices] + slice_args = [self.points[slice(*s)] for s in slices] + slice_values = [self.values[slice(*s)] for s in slices] # Sort lexicographically, first to last dimension sorting_masks = [ @@ -454,7 +454,7 @@ def round( decimals: int = 0, out: Optional[FDataIrregular] = None, ) -> FDataIrregular: - """Evenly round function_values to the given number of decimals. + """Evenly round values to the given number of decimals. Arguments are not rounded due to possibility of coalescing various arguments to the same rounded value. @@ -470,30 +470,30 @@ def round( Returns: Returns a FDataIrregular object where all elements - in its function_values are rounded. + in its values are rounded. """ # Arguments are not rounded due to possibility of # coalescing various arguments to the same rounded value - rounded_values = self.function_values.round(decimals=decimals) + rounded_values = self.values.round(decimals=decimals) if out is not None and isinstance(out, FDataIrregular): - out.function_indices = self.function_indices - out.function_values = rounded_values + out.start_indices = self.start_indices + out.values = rounded_values return out return self.copy( - function_values=rounded_values, + values=rounded_values, ) @property def dim_domain(self) -> int: - return self.function_arguments.shape[1] + return self.points.shape[1] @property def dim_codomain(self) -> int: - return self.function_values.shape[1] + return self.values.shape[1] @property def coordinates(self) -> _IrregularCoordinateIterator[T]: @@ -501,7 +501,7 @@ def coordinates(self) -> _IrregularCoordinateIterator[T]: @property def n_samples(self) -> int: - return self.function_indices.shape[0] + return self.start_indices.shape[0] @property def sample_range(self) -> DomainRange: @@ -630,14 +630,14 @@ def sum( # noqa: WPS125 super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna) data = ( - np.nansum(self.function_values, axis=0, keepdims=True) if skipna - else np.sum(self.function_values, axis=0, keepdims=True) + np.nansum(self.values, axis=0, keepdims=True) if skipna + else np.sum(self.values, axis=0, keepdims=True) ) return FDataIrregular( - function_indices=np.array([0]), - function_arguments=np.zeros((1, self.dim_domain)), - function_values=data, + start_indices=np.array([0]), + points=np.zeros((1, self.dim_domain)), + values=data, sample_names=("sum",), ) @@ -652,10 +652,10 @@ def mean(self: T) -> T: mean of all curves the across each value. """ # Find all distinct arguments (ordered) and corresponding values - distinct_args = np.unique(np.matrix.flatten(self.function_arguments)) + distinct_args = np.unique(np.matrix.flatten(self.points)) values = [ - np.matrix.flatten(self.function_values[ - np.where(self.function_arguments == arg)[0] + np.matrix.flatten(self.values[ + np.where(self.points == arg)[0] ]) for arg in distinct_args ] @@ -665,9 +665,9 @@ def mean(self: T) -> T: # Create a FDataIrregular object with only 1 curve, the mean curve return FDataIrregular( - function_indices=np.array([0]), - function_arguments=distinct_args.reshape(-1, 1), - function_values=means.reshape(-1, 1), + start_indices=np.array([0]), + points=distinct_args.reshape(-1, 1), + values=means.reshape(-1, 1), sample_names=("mean",), ) @@ -683,10 +683,10 @@ def var(self: T) -> T: """ # Find all distinct arguments (ordered) and corresponding values - distinct_args = np.unique(np.matrix.flatten(self.function_arguments)) + distinct_args = np.unique(np.matrix.flatten(self.points)) values = [ - np.matrix.flatten(self.function_values[ - np.where(self.function_arguments == arg)[0] + np.matrix.flatten(self.values[ + np.where(self.points == arg)[0] ]) for arg in distinct_args ] @@ -696,9 +696,9 @@ def var(self: T) -> T: # Create a FDataIrregular object with only 1 curve, the variance curve return FDataIrregular( - function_indices=np.array([0]), - function_arguments=distinct_args.reshape(-1, 1), - function_values=variances.reshape(-1, 1), + start_indices=np.array([0]), + points=distinct_args.reshape(-1, 1), + values=variances.reshape(-1, 1), sample_names=("var",), ) @@ -731,9 +731,9 @@ def _eq_elemenwise(self: T, other: T) -> NDArrayBool: """Elementwise equality of FDataIrregular.""" return np.all( [ - (self.function_indices == other.function_indices).all(), - (self.function_arguments == other.function_arguments).all(), - (self.function_values == other.function_values).all(), + (self.start_indices == other.start_indices).all(), + (self.points == other.points).all(), + (self.values == other.values).all(), ], ) @@ -754,7 +754,7 @@ def _get_op_matrix( # noqa: WPS212 elif other.shape == (self.n_samples,): other_index = ( (slice(None),) + (np.newaxis,) - * (self.function_values.ndim - 1) + * (self.values.ndim - 1) ) other_vector = other[other_index] @@ -762,15 +762,15 @@ def _get_op_matrix( # noqa: WPS212 # Must expand for the number of values in each curve values_after = np.concatenate( ( - self.function_indices, - np.array([len(self.function_arguments)]), + self.start_indices, + np.array([len(self.points)]), ), ) values_before = np.concatenate( ( np.array([0]), - self.function_indices, + self.start_indices, ), ) @@ -785,7 +785,7 @@ def _get_op_matrix( # noqa: WPS212 ): other_index = ( (slice(None),) + (np.newaxis,) - * (self.function_values.ndim - 2) + * (self.values.ndim - 2) + (slice(None),) ) @@ -794,15 +794,15 @@ def _get_op_matrix( # noqa: WPS212 # Must expand for the number of values in each curve values_after = np.concatenate( ( - self.function_indices, - np.array([len(self.function_arguments)]), + self.start_indices, + np.array([len(self.points)]), ), ) values_before = np.concatenate( ( np.array([0]), - self.function_indices, + self.start_indices, ), ) @@ -819,7 +819,7 @@ def _get_op_matrix( # noqa: WPS212 elif isinstance(other, FDataIrregular): # TODO What to do with different argument and value sizes? - return other.function_values + return other.values return None @@ -827,13 +827,13 @@ def __add__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - function_values = self._get_op_matrix(other) - if function_values is None: + values = self._get_op_matrix(other) + if values is None: return NotImplemented return self._copy_op( other, - function_values=self.function_values + function_values, + values=self.values + values, ) def __radd__( @@ -846,39 +846,39 @@ def __sub__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - function_values = self._get_op_matrix(other) - if function_values is None: + values = self._get_op_matrix(other) + if values is None: return NotImplemented return self._copy_op( other, - function_values=self.function_values - function_values, + values=self.values - values, ) def __rsub__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - function_values = self._get_op_matrix(other) - if function_values is None: + values = self._get_op_matrix(other) + if values is None: return NotImplemented return self._copy_op( other, - function_values=function_values - self.function_values, + values=values - self.values, ) def __mul__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - function_values = self._get_op_matrix(other) - if function_values is None: + values = self._get_op_matrix(other) + if values is None: return NotImplemented return self._copy_op( other, - function_values=self.function_values * function_values, + values=self.values * values, ) def __rmul__( @@ -891,31 +891,31 @@ def __truediv__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - function_values = self._get_op_matrix(other) - if function_values is None: + values = self._get_op_matrix(other) + if values is None: return NotImplemented return self._copy_op( other, - function_values=self.function_values / function_values, + values=self.values / values, ) def __rtruediv__( self: T, other: Union[T, NDArrayFloat, NDArrayInt, float], ) -> T: - function_values = self._get_op_matrix(other) - if function_values is None: + values = self._get_op_matrix(other) + if values is None: return NotImplemented return self._copy_op( other, - function_values=function_values / self.function_values, + values=values / self.values, ) def __neg__(self: T) -> T: """Negation of FDataIrregular object.""" - return self.copy(function_values=-self.function_values) + return self.copy(values=-self.values) def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: """Join samples from a similar FDataIrregular object. @@ -943,8 +943,8 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: >>> fd_2 = FDataIrregular(indices, arguments_2, values_2) >>> fd.concatenate(fd_2) FDataIrregular( - function_indices=array([0, 2, 5, 7], dtype=uint32), - function_arguments=array([[ 0.], + start_indices=array([0, 2, 5, 7], dtype=uint32), + points=array([[ 0.], [ 1.], [ 2.], [ 3.], @@ -954,7 +954,7 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: [ 7.], [ 8.], [ 9.]]), - function_values=array([[ 0.], + values=array([[ 0.], [ 1.], [ 2.], [ 3.], @@ -986,18 +986,18 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: for o in others ], ) - total_values = len(self.function_arguments) + sum( + total_values = len(self.points) + sum( [ - len(o.function_arguments) + len(o.points) for o in others ], ) total_sample_names = [] - function_indices = np.zeros((total_functions, ), dtype=np.uint32) + start_indices = np.zeros((total_functions, ), dtype=np.uint32) function_args = np.zeros( (total_values, self.dim_domain), ) - function_values = np.zeros( + values = np.zeros( (total_values, self.dim_codomain), ) index = 0 @@ -1005,19 +1005,19 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: # Add samples sequentially for f_data in [self] + list(others): - function_indices[ + start_indices[ index:index + f_data.n_samples - ] = f_data.function_indices + ] = f_data.start_indices function_args[ - head:head + len(f_data.function_arguments) - ] = f_data.function_arguments - function_values[ - head:head + len(f_data.function_arguments) - ] = f_data.function_values + head:head + len(f_data.points) + ] = f_data.points + values[ + head:head + len(f_data.points) + ] = f_data.values # Adjust pointers to the concatenated array - function_indices[index:index + f_data.n_samples] += head + start_indices[index:index + f_data.n_samples] += head index += f_data.n_samples - head += len(f_data.function_arguments) + head += len(f_data.points) total_sample_names = total_sample_names + list(f_data.sample_names) # Check domain range @@ -1032,9 +1032,9 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: domain_range[dim][0] = dim_min return self.copy( - function_indices, + start_indices, function_args, - function_values, + values, domain_range=domain_range, sample_names=total_sample_names, ) @@ -1145,7 +1145,7 @@ def to_matrix(self) -> ArrayLike: """ # Find the common grid points grid_points = [ - np.unique(self.function_arguments[:, dim]) + np.unique(self.points[:, dim]) for dim in range(self.dim_domain) ] @@ -1160,14 +1160,14 @@ def to_matrix(self) -> ArrayLike: # Fill with each function next_indices = np.append( - self.function_indices, - len(self.function_arguments), + self.start_indices, + len(self.points), ) - for i, index in enumerate(self.function_indices): + for i, index in enumerate(self.start_indices): for j in range(index, next_indices[i + 1]): - arg = self.function_arguments[j] - val = self.function_values[j] + arg = self.points[j] + val = self.values[j] pos = [ np.where(gp == arg[dim])[0][0] for dim, gp in enumerate(grid_points) @@ -1199,9 +1199,9 @@ def to_grid( # noqa: D102 def copy( # noqa: WPS211 self: T, - function_indices: Optional[ArrayLike] = None, - function_arguments: Optional[ArrayLike] = None, - function_values: Optional[ArrayLike] = None, + start_indices: Optional[ArrayLike] = None, + points: Optional[ArrayLike] = None, + values: Optional[ArrayLike] = None, deep: bool = False, # For Pandas compatibility domain_range: Optional[DomainRangeLike] = None, dataset_name: Optional[str] = None, @@ -1218,14 +1218,14 @@ def copy( # noqa: WPS211 is updated. """ - if function_indices is None: - function_indices = self.function_indices + if start_indices is None: + start_indices = self.start_indices - if function_arguments is None: - function_arguments = self.function_arguments + if points is None: + points = self.points - if function_values is None: - function_values = self.function_values + if values is None: + values = self.values if domain_range is None: domain_range = self.domain_range @@ -1252,9 +1252,9 @@ def copy( # noqa: WPS211 interpolation = self.interpolation return FDataIrregular( - function_indices, - function_arguments, - function_values, + start_indices, + points, + values, domain_range=domain_range, dataset_name=dataset_name, argument_names=argument_names, @@ -1294,13 +1294,13 @@ def restrict( # noqa: WPS210 for i, index_tuple in enumerate(self.indices_start_end()): prev_index, index = index_tuple s = slice(prev_index, index) - masks = set(range(self.function_arguments[s].shape[0])) + masks = set(range(self.points[s].shape[0])) for dim, dr in enumerate(domain_range): dr_start, dr_end = dr select_mask = np.where( ( - (dr_start <= self.function_arguments[s][:, dim]) - & (self.function_arguments[s][:, dim] <= dr_end) + (dr_start <= self.points[s][:, dim]) + & (self.points[s][:, dim] <= dr_end) ), ) @@ -1310,15 +1310,15 @@ def restrict( # noqa: WPS210 masks = list(masks) if len(masks) > 0: indices.append(head) - arguments.append(self.function_arguments[s][masks, :]) - values.append(self.function_values[s][masks, :]) + arguments.append(self.points[s][masks, :]) + values.append(self.values[s][masks, :]) sample_names.append(self.sample_names[i]) head += len(masks) return self.copy( - function_indices=np.array(indices), - function_arguments=np.concatenate(arguments), - function_values=np.concatenate(values), + start_indices=np.array(indices), + points=np.concatenate(arguments), + values=np.concatenate(values), sample_names=sample_names, domain_range=domain_range, ) @@ -1386,9 +1386,9 @@ def compose( def __str__(self) -> str: """Return str(self).""" return ( - f"function indices: {self.function_indices}\n" - f"function arguments: {self.function_arguments}\n" - f"function values: {self.function_values}\n" + f"function indices: {self.start_indices}\n" + f"function arguments: {self.points}\n" + f"function values: {self.values}\n" f"time range: {self.domain_range}" ) @@ -1396,9 +1396,9 @@ def __repr__(self) -> str: """Return repr(self).""" return ( f"FDataIrregular(" # noqa: WPS221 - f"\nfunction_indices={self.function_indices!r}," - f"\nfunction_arguments={self.function_arguments!r}," - f"\nfunction_values={self.function_values!r}," + f"\nstart_indices={self.start_indices!r}," + f"\npoints={self.points!r}," + f"\nvalues={self.values!r}," f"\ndomain_range={self.domain_range!r}," f"\ndataset_name={self.dataset_name!r}," f"\nargument_names={self.argument_names!r}," @@ -1419,7 +1419,7 @@ def indices_start_end(self) -> Sequence[Tuple[int, int]]: """ indices = np.append( - self.function_indices, len(self.function_arguments) + self.start_indices, len(self.points) ) return list(zip(indices, indices[1:])) @@ -1428,25 +1428,25 @@ def __getitem__( key: Union[int, slice, NDArrayInt, NDArrayBool], ) -> T: required_slices = [] - key = _check_array_key(self.function_indices, key) + key = _check_array_key(self.start_indices, key) indices = range(self.n_samples) required_indices = indices[key] for i in required_indices: next_index = None if i + 1 < self.n_samples: - next_index = self.function_indices[i + 1] - s = slice(self.function_indices[i], next_index) + next_index = self.start_indices[i + 1] + s = slice(self.start_indices[i], next_index) required_slices.append(s) arguments = np.concatenate( [ - self.function_arguments[s] + self.points[s] for s in required_slices ], ) values = np.concatenate( [ - self.function_values[s] + self.values[s] for s in required_slices ], ) @@ -1454,7 +1454,7 @@ def __getitem__( chunk_sizes = np.array( [ s.stop - s.start if s.stop is not None - else len(self.function_arguments) - s.start + else len(self.points) - s.start for s in required_slices ], ) @@ -1462,9 +1462,9 @@ def __getitem__( indices = np.cumsum(chunk_sizes) - chunk_sizes[0] return self.copy( - function_indices=indices.astype(int), - function_arguments=arguments, - function_values=values, + start_indices=indices.astype(int), + points=arguments, + values=values, sample_names=self.sample_names[key], ) ##################################################################### @@ -1483,8 +1483,8 @@ def __array_ufunc__( if ( isinstance(i, FDataIrregular) and not np.array_equal( - i.function_arguments, - self.function_arguments, + i.points, + self.points, ) ): return NotImplemented @@ -1496,7 +1496,7 @@ def __array_ufunc__( outputs = kwargs.pop('out', None) if outputs: new_outputs = [ - o.function_values if isinstance(o, FDataIrregular) + o.values if isinstance(o, FDataIrregular) else o for o in outputs ] kwargs['out'] = tuple(new_outputs) @@ -1515,7 +1515,7 @@ def __array_ufunc__( for result, output in zip(results, new_outputs) ) - results = [self.copy(function_values=r) for r in results] + results = [self.copy(values=r) for r in results] return results[0] if len(results) == 1 else results @@ -1529,19 +1529,19 @@ def _take_allow_fill( fill_value: T, ) -> T: result = self.copy() - result.function_values = np.full( - (len(indices),) + self.function_values.shape[1:], + result.values = np.full( + (len(indices),) + self.values.shape[1:], np.nan, ) positive_mask = indices >= 0 - result.function_values[positive_mask] = self.function_values[ + result.values[positive_mask] = self.values[ indices[positive_mask] ] if fill_value is not self.dtype.na_value: - fill_value_ = fill_value.function_values[0] - result.function_values[~positive_mask] = fill_value_ + fill_value_ = fill_value.values[0] + result.values[~positive_mask] = fill_value_ return result @@ -1549,8 +1549,8 @@ def _take_allow_fill( def dtype(self) -> FDataIrregularDType: """The dtype for this extension array, FDataIrregularDType""" return FDataIrregularDType( - function_indices=self.function_indices, - function_arguments=self.function_arguments, + start_indices=self.start_indices, + points=self.points, dim_codomain=self.dim_codomain, domain_range=self.domain_range, ) @@ -1561,9 +1561,9 @@ def nbytes(self) -> int: The number of bytes needed to store this object in memory. """ array_nbytes = [ - self.function_indices.nbytes, - self.function_arguments.nbytes, - self.function_values, + self.start_indices.nbytes, + self.points.nbytes, + self.values, ] return sum(array_nbytes) @@ -1575,7 +1575,7 @@ def isna(self) -> NDArrayBool: na_values (NDArrayBool): Positions of NA. """ return np.array([ - np.all(np.isnan(v.function_values)) for v in self + np.all(np.isnan(v.values)) for v in self ]) @@ -1591,20 +1591,20 @@ class FDataIrregularDType( def __init__( self, - function_indices: ArrayLike, - function_arguments: ArrayLike, + start_indices: ArrayLike, + points: ArrayLike, dim_codomain: int, domain_range: Optional[DomainRangeLike] = None, ) -> None: from ..misc.validation import validate_domain_range - self.function_indices = function_indices - self.function_arguments = function_arguments - self.dim_domain = function_arguments.shape[1] + self.start_indices = start_indices + self.points = points + self.dim_domain = points.shape[1] if domain_range is None: sample_range = _get_sample_range_from_data( - self.function_indices, - self.function_arguments, + self.start_indices, + self.points, self.dim_domain, ) domain_range = _get_domain_range_from_sample_range( @@ -1622,16 +1622,16 @@ def construct_array_type(cls) -> Type[FDataIrregular]: # noqa: D102 def _na_repr(self) -> FDataIrregular: shape = ( - (len(self.function_arguments),) + (len(self.points),) + (self.dim_codomain,) ) - function_values = np.full(shape=shape, fill_value=self.na_value) + values = np.full(shape=shape, fill_value=self.na_value) return FDataIrregular( - function_indices=self.function_indices, - function_arguments=self.function_arguments, - function_values=function_values, + start_indices=self.start_indices, + points=self.points, + values=values, domain_range=self.domain_range, ) @@ -1653,8 +1653,8 @@ def __eq__(self, other: Any) -> bool: return False return ( - self.function_indices == other.function_indices - and self.function_arguments == other.function_arguments + self.start_indices == other.start_indices + and self.points == other.points and self.domain_range == other.domain_range and self.dim_codomain == other.dim_codomain ) @@ -1662,8 +1662,8 @@ def __eq__(self, other: Any) -> bool: def __hash__(self) -> int: return hash( ( - str(self.function_indices), - str(self.function_arguments), + str(self.start_indices), + str(self.points), self.domain_range, self.dim_codomain, ), @@ -1690,10 +1690,10 @@ def __getitem__( self._fdatairregular.coordinate_names, )[s_key] - coordinate_values = self._fdatairregular.function_values[..., key] + coordinate_values = self._fdatairregular.values[..., key] return self._fdatairregular.copy( - function_values=coordinate_values.reshape(-1, 1), + values=coordinate_values.reshape(-1, 1), coordinate_names=tuple(coordinate_names), ) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index c5c5ddef8..b1a340f2e 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -175,10 +175,10 @@ def test_fdatairregular_init( fdatairregular (FDataIrregular): FDataIrregular object which can be unidimensional or multidimensional. """ - arguments = fdatairregular.function_arguments + arguments = fdatairregular.points assert fdatairregular is not None - assert len(fdatairregular) == len(fdatairregular.function_indices) - assert len(arguments) == len(fdatairregular.function_values) + assert len(fdatairregular) == len(fdatairregular.start_indices) + assert len(arguments) == len(fdatairregular.values) def test_fdatairregular_copy( @@ -228,8 +228,8 @@ def test_fdatairregular_copy_kwargs( # Check everything equal except specified kwarg assert len(f_data_copy) == len(fdatairregular) assert ( - len(f_data_copy.function_arguments) - == len(fdatairregular.function_arguments) + len(f_data_copy.points) + == len(fdatairregular.points) ) assert f_data_copy.dim_domain == fdatairregular.dim_domain assert f_data_copy.dim_domain == fdatairregular.dim_codomain @@ -275,7 +275,7 @@ def test_fdatairregular_from_dataframe( ) assert len(f_irreg) == 423 - assert len(f_irreg.function_values) == 1003 + assert len(f_irreg.values) == 1003 def test_fdatairregular_getitem( @@ -317,8 +317,8 @@ def test_fdatairregular_coordinates( assert len(f_data_coordinate) == len(fdatairregular) assert f_data_coordinate.dim_codomain == 1 assert np.all( - f_data_coordinate.function_values[:, 0] - == fdatairregular.function_values[:, dim], + f_data_coordinate.values[:, 0] + == fdatairregular.values[:, dim], ) @@ -335,8 +335,8 @@ def test_fdatairregular_round( decimals (int): Number of decimal places to round. """ assert np.all( - fdatairregular.round(decimals).function_values - == np.round(fdatairregular.function_values, decimals), + fdatairregular.round(decimals).values + == np.round(fdatairregular.values, decimals), ) @@ -351,21 +351,21 @@ def test_fdatairregular_concatenate( """ fd_concat = fdatairregular.concatenate(fdatairregular) - function_indices_halves = np.split(fd_concat.function_indices, 2) - indices = fdatairregular.function_indices - second_half_indices = indices + len(fdatairregular.function_arguments) + start_indices_halves = np.split(fd_concat.start_indices, 2) + indices = fdatairregular.start_indices + second_half_indices = indices + len(fdatairregular.points) - function_args_halves = np.split(fd_concat.function_arguments, 2) - function_values_halves = np.split(fd_concat.function_values, 2) + function_args_halves = np.split(fd_concat.points, 2) + values_halves = np.split(fd_concat.values, 2) assert len(fd_concat) == 2 * len(fdatairregular) - assert np.all(function_indices_halves[1] == second_half_indices) + assert np.all(start_indices_halves[1] == second_half_indices) assert ( - len(fd_concat.function_arguments) - == 2 * len(fdatairregular.function_arguments) + len(fd_concat.points) + == 2 * len(fdatairregular.points) ) - assert np.all(function_args_halves[1] == fdatairregular.function_arguments) - assert np.all(function_values_halves[1] == fdatairregular.function_values) + assert np.all(function_args_halves[1] == fdatairregular.points) + assert np.all(values_halves[1] == fdatairregular.values) def test_fdatairregular_equals( @@ -401,7 +401,7 @@ def test_fdatairregular_restrict( restricted_fdata = fdatairregular.restrict(restricted_domain) samples_by_dim = [ - restricted_fdata.function_arguments[:, dim] + restricted_fdata.points[:, dim] for dim in range(fdatairregular.dim_domain) ] diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index 9d63176ab..08dda67b8 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -158,9 +158,9 @@ def fdatairregular_1d( """Return FDataIrregular with only 1 curve or NUM_CURVES as requested.""" indices, arguments, values = input_arrays f_data_irreg = FDataIrregular( - function_indices=indices, - function_arguments=arguments, - function_values=values, + start_indices=indices, + points=arguments, + values=values, ) if request.param == "single_curve": @@ -177,9 +177,9 @@ def fdatairregular_2d( """Return FDataIrregular with only 1 curve or NUM_CURVES as requested.""" indices, arguments, values = input_arrays_2d f_data_irreg = FDataIrregular( - function_indices=indices, - function_arguments=arguments, - function_values=values, + start_indices=indices, + points=arguments, + values=values, ) if request.param == "single_curve": @@ -284,7 +284,7 @@ def _take_first( if isinstance(other, np.ndarray): return other[0] elif isinstance(other, FDataIrregular): - return other.function_values + return other.values return other def _single_curve( @@ -313,9 +313,9 @@ def test_fdatairregular_arithmetic_sum( f_data_sum = fdatairregular_1d + other_1d - result = fdatairregular_1d.function_values + self._take_first(other_1d) + result = fdatairregular_1d.values + self._take_first(other_1d) - assert np.all(f_data_sum.function_values == result) + assert np.all(f_data_sum.values == result) def test_fdatairregular_arithmetic_rsum( self, @@ -333,9 +333,9 @@ def test_fdatairregular_arithmetic_rsum( f_data_sum = other_1d + fdatairregular_1d - result = self._take_first(other_1d) + fdatairregular_1d.function_values + result = self._take_first(other_1d) + fdatairregular_1d.values - assert np.all(f_data_sum.function_values == result) + assert np.all(f_data_sum.values == result) def test_fdatairregular_arithmetic_sum_commutative( # noqa: WPS118 self, @@ -371,9 +371,9 @@ def test_fdatairregular_arithmetic_sub( f_data_sub = fdatairregular_1d - other_1d - result = fdatairregular_1d.function_values - self._take_first(other_1d) + result = fdatairregular_1d.values - self._take_first(other_1d) - assert np.all(f_data_sub.function_values == result) + assert np.all(f_data_sub.values == result) def test_fdatairregular_arithmetic_rsub( self, @@ -391,9 +391,9 @@ def test_fdatairregular_arithmetic_rsub( f_data_sub = other_1d - fdatairregular_1d - result = self._take_first(other_1d) - fdatairregular_1d.function_values + result = self._take_first(other_1d) - fdatairregular_1d.values - assert np.all(f_data_sub.function_values == result) + assert np.all(f_data_sub.values == result) def test_fdatairregular_arithmetic_mul( self, @@ -411,9 +411,9 @@ def test_fdatairregular_arithmetic_mul( f_data_mul = fdatairregular_1d * other_1d - result = fdatairregular_1d.function_values * self._take_first(other_1d) + result = fdatairregular_1d.values * self._take_first(other_1d) - assert np.all(f_data_mul.function_values == result) + assert np.all(f_data_mul.values == result) def test_fdatairregular_arithmetic_rmul( self, @@ -431,9 +431,9 @@ def test_fdatairregular_arithmetic_rmul( f_data_mul = other_1d * fdatairregular_1d - result = self._take_first(other_1d) * fdatairregular_1d.function_values + result = self._take_first(other_1d) * fdatairregular_1d.values - assert np.all(f_data_mul.function_values == result) + assert np.all(f_data_mul.values == result) def test_fdatairregular_arithmetic_mul_commutative( # noqa: WPS118 self, @@ -469,9 +469,9 @@ def test_fdatairregular_arithmetic_div( f_data_div = fdatairregular_1d / other_1d - result = fdatairregular_1d.function_values / self._take_first(other_1d) + result = fdatairregular_1d.values / self._take_first(other_1d) - assert np.all(f_data_div.function_values == result) + assert np.all(f_data_div.values == result) def test_fdatairregular_arithmetic_rdiv( self, @@ -489,9 +489,9 @@ def test_fdatairregular_arithmetic_rdiv( f_data_div = other_1d / fdatairregular_1d - result = self._take_first(other_1d) / fdatairregular_1d.function_values + result = self._take_first(other_1d) / fdatairregular_1d.values - assert np.all(f_data_div.function_values == result) + assert np.all(f_data_div.values == result) class TestArithmeticOperations2D: @@ -504,7 +504,7 @@ def _take_first( if isinstance(other, np.ndarray): return other[0] elif isinstance(other, FDataIrregular): - return other.function_values + return other.values return other def _single_curve( @@ -533,9 +533,9 @@ def test_fdatairregular_arithmetic_sum( f_data_sum = fdatairregular_2d + other_2d - result = fdatairregular_2d.function_values + self._take_first(other_2d) + result = fdatairregular_2d.values + self._take_first(other_2d) - assert np.all(f_data_sum.function_values == result) + assert np.all(f_data_sum.values == result) def test_fdatairregular_arithmetic_rsum( self, @@ -553,9 +553,9 @@ def test_fdatairregular_arithmetic_rsum( f_data_sum = other_2d + fdatairregular_2d - result = self._take_first(other_2d) + fdatairregular_2d.function_values + result = self._take_first(other_2d) + fdatairregular_2d.values - assert np.all(f_data_sum.function_values == result) + assert np.all(f_data_sum.values == result) def test_fdatairregular_arithmetic_sum_commutative( # noqa: WPS118 self, @@ -591,9 +591,9 @@ def test_fdatairregular_arithmetic_sub( f_data_sub = fdatairregular_2d - other_2d - result = fdatairregular_2d.function_values - self._take_first(other_2d) + result = fdatairregular_2d.values - self._take_first(other_2d) - assert np.all(f_data_sub.function_values == result) + assert np.all(f_data_sub.values == result) def test_fdatairregular_arithmetic_rsub( self, @@ -611,9 +611,9 @@ def test_fdatairregular_arithmetic_rsub( f_data_sub = other_2d - fdatairregular_2d - result = self._take_first(other_2d) - fdatairregular_2d.function_values + result = self._take_first(other_2d) - fdatairregular_2d.values - assert np.all(f_data_sub.function_values == result) + assert np.all(f_data_sub.values == result) def test_fdatairregular_arithmetic_mul( self, @@ -631,9 +631,9 @@ def test_fdatairregular_arithmetic_mul( f_data_mul = fdatairregular_2d * other_2d - result = fdatairregular_2d.function_values * self._take_first(other_2d) + result = fdatairregular_2d.values * self._take_first(other_2d) - assert np.all(f_data_mul.function_values == result) + assert np.all(f_data_mul.values == result) def test_fdatairregular_arithmetic_rmul( self, @@ -651,9 +651,9 @@ def test_fdatairregular_arithmetic_rmul( f_data_mul = other_2d * fdatairregular_2d - result = self._take_first(other_2d) * fdatairregular_2d.function_values + result = self._take_first(other_2d) * fdatairregular_2d.values - assert np.all(f_data_mul.function_values == result) + assert np.all(f_data_mul.values == result) def test_fdatairregular_arithmetic_mul_commutative( # noqa: WPS118 self, @@ -689,9 +689,9 @@ def test_fdatairregular_arithmetic_div( f_data_div = fdatairregular_2d / other_2d - result = fdatairregular_2d.function_values / self._take_first(other_2d) + result = fdatairregular_2d.values / self._take_first(other_2d) - assert np.all(f_data_div.function_values == result) + assert np.all(f_data_div.values == result) def test_fdatairregular_arithmetic_rdiv( self, @@ -709,9 +709,9 @@ def test_fdatairregular_arithmetic_rdiv( f_data_div = other_2d / fdatairregular_2d - result = self._take_first(other_2d) / fdatairregular_2d.function_values + result = self._take_first(other_2d) / fdatairregular_2d.values - assert np.all(f_data_div.function_values == result) + assert np.all(f_data_div.values == result) ########################## From 272d0f6ed83c73f2cf497b7ccc1c9b0832bebabc Mon Sep 17 00:00:00 2001 From: pcuestas Date: Fri, 6 Oct 2023 09:24:46 +0200 Subject: [PATCH 093/144] Style --- skfda/tests/test_irregular.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index b1a340f2e..23c1faa43 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -227,10 +227,7 @@ def test_fdatairregular_copy_kwargs( # Check everything equal except specified kwarg assert len(f_data_copy) == len(fdatairregular) - assert ( - len(f_data_copy.points) - == len(fdatairregular.points) - ) + assert len(f_data_copy.points) == len(fdatairregular.points) assert f_data_copy.dim_domain == fdatairregular.dim_domain assert f_data_copy.dim_domain == fdatairregular.dim_codomain assert og_attribute != copy_attribute @@ -317,8 +314,7 @@ def test_fdatairregular_coordinates( assert len(f_data_coordinate) == len(fdatairregular) assert f_data_coordinate.dim_codomain == 1 assert np.all( - f_data_coordinate.values[:, 0] - == fdatairregular.values[:, dim], + f_data_coordinate.values[:, 0] == fdatairregular.values[:, dim], ) @@ -360,10 +356,7 @@ def test_fdatairregular_concatenate( assert len(fd_concat) == 2 * len(fdatairregular) assert np.all(start_indices_halves[1] == second_half_indices) - assert ( - len(fd_concat.points) - == 2 * len(fdatairregular.points) - ) + assert len(fd_concat.points) == 2 * len(fdatairregular.points) assert np.all(function_args_halves[1] == fdatairregular.points) assert np.all(values_halves[1] == fdatairregular.values) From 8420f1e4452a2b73e729de9dd8da1a38703b4f00 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sat, 7 Oct 2023 08:37:42 +0200 Subject: [PATCH 094/144] Refactor _get_domain_range_from_sample_range --- skfda/representation/irregular.py | 28 +++++++++++----------------- skfda/tests/test_irregular.py | 12 ++++-------- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index fb460e516..ab670fee6 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -85,26 +85,24 @@ def _get_sample_range_from_data( sample_range = [] for sample, _ in enumerate(dim_sample_ranges): - sample_range.append( - tuple( - [dim_ranges[d][sample] for d in range(dim_domain)], - ), - ) + # For each function, get the sample range for each dimension + sample_range.append(tuple( + (dim_ranges[d][sample] for d in range(dim_domain)), + )) + # sample_range[f][d] => (min_point, max_point) + # is the sample range for the function f and dimension d return tuple(sample_range) def _get_domain_range_from_sample_range( sample_range: DomainRange, - dim_domain: int, ) -> DomainRange: - ranges = [] - for dim in range(dim_domain): - min_argument = min([x[dim][0] for x in sample_range]) - max_argument = max([x[dim][1] for x in sample_range]) - ranges.append((min_argument, max_argument)) + sample_range_array = np.asarray(sample_range) + min_arguments = sample_range_array[..., 0].min(axis=0) + max_arguments = sample_range_array[..., 1].max(axis=0) + return tuple(zip(min_arguments, max_arguments)) - return tuple(ranges) # domain_range ###################### # FDataIrregular# @@ -258,7 +256,6 @@ def __init__( # noqa: WPS211 if domain_range is None: domain_range = _get_domain_range_from_sample_range( self._sample_range, - self.dim_domain, ) # Default value for domain_range is a list of tuples with @@ -1607,10 +1604,7 @@ def __init__( self.points, self.dim_domain, ) - domain_range = _get_domain_range_from_sample_range( - sample_range, - self.dim_domain, - ) + domain_range = _get_domain_range_from_sample_range(sample_range) self.domain_range = validate_domain_range(domain_range) self.dim_codomain = dim_codomain diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 23c1faa43..85c780480 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -403,18 +403,14 @@ def test_fdatairregular_restrict( # The min arg is larger than the domain min constraint assert len(restricted_fdata) > 0 assert all( - [ - sr[0] > restricted_domain[i][0] - for i, sr in enumerate(sample_ranges) - ], + sr[0] > restricted_domain[i][0] + for i, sr in enumerate(sample_ranges) ) # The max arg is lesser than the domain max constraint assert all( - [ - sr[1] < restricted_domain[i][1] - for i, sr in enumerate(sample_ranges) - ], + sr[1] < restricted_domain[i][1] + for i, sr in enumerate(sample_ranges) ) From 2e933a46988643939243fe58a4ddbf52fcdcfd93 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sat, 7 Oct 2023 09:28:12 +0200 Subject: [PATCH 095/144] Refactor _get_sample_range_from_data --- skfda/representation/irregular.py | 76 +++++++++---------------------- 1 file changed, 22 insertions(+), 54 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index ab670fee6..2e77ff2c4 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -9,7 +9,9 @@ import numbers import warnings -from typing import Any, Optional, Sequence, Tuple, Type, TypeVar, Union, cast +from typing import ( + Any, List, Optional, Sequence, Tuple, Type, TypeVar, Union, cast, +) import numpy as np import pandas.api.extensions @@ -19,7 +21,6 @@ from ..typing._base import ( DomainRange, DomainRangeLike, - GridPoints, GridPointsLike, LabelTupleLike, ) @@ -38,61 +39,30 @@ ###################### +def _get_array_slices_by_function( + start_indices: NDArrayInt, + array: NDArrayFloat, +) -> List[NDArrayFloat]: + return np.split(array, start_indices[1:]) + + def _get_sample_range_from_data( start_indices: NDArrayInt, points: NDArrayFloat, - dim_domain: int, ) -> DomainRange: - dim_ranges = [] # Sample range for each dimension - for dim in range(dim_domain): - i = 0 - - # Sample range for each function in current dimension: - dim_sample_ranges = [] - for f in start_indices[1:]: - # i, f: first and last+1 index of the current function. - # Get min and max argument for the current function and dimension. - min_argument = min( - [points[j][dim] for j in range(i, f)], - ) - max_argument = max( - [points[j][dim] for j in range(i, f)], - ) - dim_sample_ranges.append( - ((min_argument, max_argument)), - ) - i = f - - # Get min and max argument for the last function and dimension. - min_argument = min( - [ - points[i + j][dim] - for j in range(points.shape[0] - i) - ], - ) - max_argument = max( - [ - points[i + j][dim] - for j in range(points.shape[0] - i) - ], - ) - dim_sample_ranges.append( - (min_argument, max_argument), + """Computes the domain ranges of each sample. + + Returns: + sample_range: A tuple of tuples. Where + sample_range[f][d] = (min_point, max_point) is the domain range for + the function f in dimension d. + """ + return tuple( + tuple( + zip(np.min(f_points, axis=0), np.max(f_points, axis=0)), ) - - # Append sample ranges for current dimension - dim_ranges.append(dim_sample_ranges) - - sample_range = [] - for sample, _ in enumerate(dim_sample_ranges): - # For each function, get the sample range for each dimension - sample_range.append(tuple( - (dim_ranges[d][sample] for d in range(dim_domain)), - )) - - # sample_range[f][d] => (min_point, max_point) - # is the sample range for the function f and dimension d - return tuple(sample_range) + for f_points in _get_array_slices_by_function(start_indices, points) + ) def _get_domain_range_from_sample_range( @@ -247,7 +217,6 @@ def __init__( # noqa: WPS211 self._sample_range = _get_sample_range_from_data( self.start_indices, self.points, - self.dim_domain, ) # Default value for sample_range is a list of tuples with @@ -1602,7 +1571,6 @@ def __init__( sample_range = _get_sample_range_from_data( self.start_indices, self.points, - self.dim_domain, ) domain_range = _get_domain_range_from_sample_range(sample_range) From d92d0055a4f0a8261537d924d5ce0623272a0f1e Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sat, 7 Oct 2023 10:09:28 +0200 Subject: [PATCH 096/144] Remove use of np.matrix --- skfda/representation/irregular.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 2e77ff2c4..a4af011fe 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -618,11 +618,9 @@ def mean(self: T) -> T: mean of all curves the across each value. """ # Find all distinct arguments (ordered) and corresponding values - distinct_args = np.unique(np.matrix.flatten(self.points)) + distinct_args = np.unique(self.points.flatten()) values = [ - np.matrix.flatten(self.values[ - np.where(self.points == arg)[0] - ]) + self.values[np.where(self.points == arg)[0]].flatten() for arg in distinct_args ] @@ -649,11 +647,9 @@ def var(self: T) -> T: """ # Find all distinct arguments (ordered) and corresponding values - distinct_args = np.unique(np.matrix.flatten(self.points)) + distinct_args = np.unique(self.points.flatten()) values = [ - np.matrix.flatten(self.values[ - np.where(self.points == arg)[0] - ]) + self.values[np.where(self.points == arg)[0]].flatten() for arg in distinct_args ] From 3c130350558a3e2d37bd97ab286ce6f932bd12a5 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sat, 7 Oct 2023 10:18:32 +0200 Subject: [PATCH 097/144] Fix mean and var for more than one dimension --- skfda/representation/irregular.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index a4af011fe..c57e642ad 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -618,20 +618,20 @@ def mean(self: T) -> T: mean of all curves the across each value. """ # Find all distinct arguments (ordered) and corresponding values - distinct_args = np.unique(self.points.flatten()) + distinct_args = np.unique(self.points, axis=0) values = [ - self.values[np.where(self.points == arg)[0]].flatten() + self.values[np.where(self.points == arg)[0]] for arg in distinct_args ] # Obtain mean of all available values for each argument point - means = np.array([np.mean(value) for value in values]) + means = np.array([np.mean(value, axis=0) for value in values]) # Create a FDataIrregular object with only 1 curve, the mean curve return FDataIrregular( start_indices=np.array([0]), - points=distinct_args.reshape(-1, 1), - values=means.reshape(-1, 1), + points=distinct_args, + values=means, sample_names=("mean",), ) @@ -647,20 +647,20 @@ def var(self: T) -> T: """ # Find all distinct arguments (ordered) and corresponding values - distinct_args = np.unique(self.points.flatten()) + distinct_args = np.unique(self.points, axis=0) values = [ - self.values[np.where(self.points == arg)[0]].flatten() + self.values[np.where(self.points == arg)[0]] for arg in distinct_args ] # Obtain variance of all available values for each argument point - variances = np.array([np.var(value) for value in values]) + variances = np.array([np.var(value, axis=0) for value in values]) # Create a FDataIrregular object with only 1 curve, the variance curve return FDataIrregular( start_indices=np.array([0]), - points=distinct_args.reshape(-1, 1), - values=variances.reshape(-1, 1), + points=distinct_args, + values=variances, sample_names=("var",), ) From 91b74b67be22f3c83264f5765bb930d047cae8f7 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sat, 7 Oct 2023 10:20:37 +0200 Subject: [PATCH 098/144] Check if is instance in equals() --- skfda/representation/irregular.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index c57e642ad..b2a34560b 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -675,7 +675,8 @@ def cov(self: T) -> T: def equals(self, other: object) -> bool: """Comparison of FDataIrregular objects.""" - other = cast(FDataIrregular, other) + if not isinstance(other, FDataIrregular): + return False if not self._eq_elemenwise(other): return False From 80b814ac694b0e208fae09262c76d2d570799b4b Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sat, 7 Oct 2023 10:22:46 +0200 Subject: [PATCH 099/144] Change order of checks in equals functions --- skfda/representation/irregular.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index b2a34560b..2b73bf66d 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -677,6 +677,9 @@ def equals(self, other: object) -> bool: """Comparison of FDataIrregular objects.""" if not isinstance(other, FDataIrregular): return False + + if not super().equals(other): + return False if not self._eq_elemenwise(other): return False @@ -688,7 +691,7 @@ def equals(self, other: object) -> bool: if self.interpolation != other.interpolation: return False - return super().equals(other) + return True def _eq_elemenwise(self: T, other: T) -> NDArrayBool: """Elementwise equality of FDataIrregular.""" From 5183987d31d183305331bb5589a423f41220077e Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sat, 7 Oct 2023 12:13:01 +0200 Subject: [PATCH 100/144] Integrate IrregularBasisSmoother into BasisSmoother --- skfda/preprocessing/smoothing/__init__.py | 2 +- skfda/preprocessing/smoothing/_basis.py | 200 ++++++---------------- skfda/representation/__init__.py | 1 + skfda/representation/irregular.py | 4 +- 4 files changed, 56 insertions(+), 151 deletions(-) diff --git a/skfda/preprocessing/smoothing/__init__.py b/skfda/preprocessing/smoothing/__init__.py index bf7f78ae1..4d3840d1e 100644 --- a/skfda/preprocessing/smoothing/__init__.py +++ b/skfda/preprocessing/smoothing/__init__.py @@ -10,7 +10,7 @@ "validation", ], submod_attrs={ - "_basis": ["BasisSmoother", "IrregularBasisSmoother"], + "_basis": ["BasisSmoother"], "_kernel_smoothers": ["KernelSmoother"], }, ) diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index 740dbece1..582e553a0 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -16,10 +16,45 @@ from ...misc.regularization import L2Regularization from ...representation import FData, FDataBasis, FDataGrid, FDataIrregular from ...representation.basis import Basis -from ...typing._base import GridPointsLike +from ...typing._base import GridPointsLike, GridPoints from ...typing._numpy import NDArrayFloat from ._linear import _LinearSmoother +############################# +# Auxiliary functions to treat with FDataGrid and FDataIrregular +############################# + +def _eval_points(fd: FData) -> NDArrayFloat: + """Get the eval points of a FDataGrid or FDataIrregular.""" + if isinstance(fd, FDataGrid): + return _cartesian_product(_to_grid_points(fd.grid_points)) + if isinstance(fd, FDataIrregular): + return fd.points + raise ValueError("fd must be a FDataGrid or FDataIrregular") + + +def _input_points(fd: FData) -> GridPoints: + """Get the input points of a FDataGrid or FDataIrregular.""" + if isinstance(fd, FDataGrid): + return fd.grid_points + if isinstance(fd, FDataIrregular): + # There exists no equivalent in FDataIrregular to grid_points + return fd.points # type: ignore[return-value] + raise ValueError("fd must be a FDataGrid or FDataIrregular") + + +def _function_values(fd: FData) -> NDArrayFloat: + """Get the function values of a FDataGrid or FDataIrregular.""" + if isinstance(fd, FDataGrid): + return fd.data_matrix.reshape((fd.n_samples, -1)).T + if isinstance(fd, FDataIrregular): + return fd.values + raise ValueError("fd must be a FDataGrid or FDataIrregular") + + +############################# +# BasisSmoother +############################# class BasisSmoother(_LinearSmoother): r""" @@ -225,15 +260,15 @@ def __init__( def _coef_matrix( self, - input_points: GridPointsLike, + eval_points: NDArrayFloat, *, - data_matrix: Optional[NDArrayFloat] = None, + function_values: Optional[NDArrayFloat] = None, ) -> NDArrayFloat: """Get the matrix that gives the coefficients.""" from ...misc.regularization import compute_penalty_matrix basis_values_input = self.basis( - _cartesian_product(_to_grid_points(input_points)), + eval_points, ).reshape((self.basis.n_basis, -1)).T penalty_matrix = compute_penalty_matrix( @@ -243,13 +278,13 @@ def _coef_matrix( ) # Get the matrix for computing the coefficients if no - # data_matrix is passed - if data_matrix is None: - data_matrix = np.eye(basis_values_input.shape[0]) + # function_values is passed + if function_values is None: + function_values = np.eye(basis_values_input.shape[0]) return solve_regularized_weighted_lstsq( coefs=basis_values_input, - result=data_matrix, + result=function_values, weights=self.weights, penalty_matrix=penalty_matrix, lstsq_method=self.method, @@ -266,11 +301,13 @@ def _hat_matrix( ), ).reshape((self.basis.n_basis, -1)).T - return basis_values_output @ self._coef_matrix(input_points) + return basis_values_output @ self._coef_matrix( + _cartesian_product(_to_grid_points(input_points)), + ) def fit( self, - X: FDataGrid, + X: FDataGrid | FDataIrregular, y: object = None, ) -> BasisSmoother: """Compute the hat matrix for the desired output points. @@ -283,7 +320,7 @@ def fit( self """ - self.input_points_ = X.grid_points + self.input_points_ = _input_points(X) self.output_points_ = ( _to_grid_points(self.output_points) if self.output_points is not None @@ -297,140 +334,7 @@ def fit( def transform( self, - X: FDataGrid, - y: object = None, - ) -> FData: - """ - Smooth the data. - - Args: - X: The data to smooth. - y: Ignored - - Returns: - Smoothed data. - - """ - assert all( - np.array_equal(i, s) for i, s in zip( - self.input_points_, - X.grid_points, - ) - ) - - if self.return_basis: - coefficients = self._coef_matrix( - input_points=X.grid_points, - data_matrix=X.data_matrix.reshape((X.n_samples, -1)).T, - ).T - - return FDataBasis( - basis=self.basis, - coefficients=coefficients, - dataset_name=X.dataset_name, - argument_names=X.argument_names, - coordinate_names=X.coordinate_names, - sample_names=X.sample_names, - ) - - return super().transform(X, y) - - -class IrregularBasisSmoother(_LinearSmoother): - """Transform irregular data to a smooth basis functional form.""" - - _required_parameters = ["basis"] - - def __init__( - self, - basis: Basis, - *, - smoothing_parameter: float = 1.0, - weights: Optional[NDArrayFloat] = None, - regularization: Optional[L2Regularization[FDataGrid]] = None, - output_points: Optional[GridPointsLike] = None, - method: LstsqMethod = 'svd', - return_basis: bool = False, - ) -> None: - self.basis = basis - self.smoothing_parameter = smoothing_parameter - self.weights = weights - self.regularization = regularization - self.output_points = output_points - self.method = method - self.return_basis: Final = return_basis - - def _coef_matrix( - self, - input_points: NDArrayFloat, - *, - function_values: Optional[NDArrayFloat] = None, - ) -> NDArrayFloat: - """Get the matrix that gives the coefficients.""" - from ...misc.regularization import compute_penalty_matrix - - basis_values_input = self.basis( - input_points, - ).reshape((self.basis.n_basis, -1)).T - - penalty_matrix = compute_penalty_matrix( - basis_iterable=(self.basis,), - regularization_parameter=self.smoothing_parameter, - regularization=self.regularization, - ) - - # Get the matrix for computing the coefficients if no - # data_matrix is passed - if function_values is None: - function_values = np.eye(basis_values_input.shape[0]) - - return solve_regularized_weighted_lstsq( - coefs=basis_values_input, - result=function_values, - weights=self.weights, - penalty_matrix=penalty_matrix, - lstsq_method=self.method, - ) - - def _hat_matrix( - self, - input_points: GridPointsLike, - output_points: GridPointsLike, - ) -> NDArrayFloat: - raise NotImplementedError( - "Not implemented for as_coordinates = True", - ) - - def fit( - self, - X: FDataIrregular, - y: object = None, - ) -> IrregularBasisSmoother: - """Compute the hat matrix for the desired output points. - - Args: - X: The data whose points are used to compute the matrix. - y: Ignored. - - Returns: - self - - """ - self.input_points_ = X.points - self.output_points_ = ( - self.output_points - if self.output_points is not None - else self.input_points_ - ) - - if not self.return_basis: - super().fit(X, y) - - return self - - def transform( - self, - X: FDataIrregular, + X: FDataGrid | FDataIrregular, y: object = None, ) -> FData: """ @@ -447,14 +351,14 @@ def transform( assert all( np.array_equal(i, s) for i, s in zip( self.input_points_, - X.points, + _input_points(X), ) ) if self.return_basis: coefficients = self._coef_matrix( - input_points=X.points, - function_values=X.values, + eval_points=_eval_points(X), + function_values=_function_values(X), ).T return FDataBasis( diff --git a/skfda/representation/__init__.py b/skfda/representation/__init__.py index 879e4b5a2..9467213a3 100644 --- a/skfda/representation/__init__.py +++ b/skfda/representation/__init__.py @@ -24,3 +24,4 @@ from ._functional_data import FData as FData, concatenate as concatenate from .basis import FDataBasis as FDataBasis from .grid import FDataGrid as FDataGrid + from .irregular import FDataIrregular as FDataIrregular diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 2b73bf66d..b5ec6616a 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1058,7 +1058,7 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: FDataBasis: Basis representation of the funtional data object. """ - from ..preprocessing.smoothing import IrregularBasisSmoother + from ..preprocessing.smoothing import BasisSmoother if self.dim_domain != basis.dim_domain: raise ValueError( @@ -1079,7 +1079,7 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: if not basis.is_domain_range_fixed(): basis = basis.copy(domain_range=self.domain_range) - smoother = IrregularBasisSmoother( + smoother = BasisSmoother( basis=basis, **kwargs, return_basis=True, From 1a15e9cdbec979d141a46bf46ff873ba4010c061 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Sat, 7 Oct 2023 12:20:43 +0200 Subject: [PATCH 101/144] nbytes typo --- skfda/representation/irregular.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index b5ec6616a..ab98dbe68 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1529,7 +1529,7 @@ def nbytes(self) -> int: array_nbytes = [ self.start_indices.nbytes, self.points.nbytes, - self.values, + self.values.nbytes, ] return sum(array_nbytes) From 8a6dd5deedb1de3b1154202cbcb54603580c5b99 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 12 Oct 2023 10:28:29 +0200 Subject: [PATCH 102/144] Comments --- skfda/representation/irregular.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index ab98dbe68..a57484117 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -51,11 +51,15 @@ def _get_sample_range_from_data( points: NDArrayFloat, ) -> DomainRange: """Computes the domain ranges of each sample. - + + Args: + start_indices: start_indices of the FDataIrregular object. + points: points of the FDataIrregular object. + Returns: - sample_range: A tuple of tuples. Where + DomainRange: (sample_range) a tuple of tuples of 2-tuples where sample_range[f][d] = (min_point, max_point) is the domain range for - the function f in dimension d. + the function f in dimension d. """ return tuple( tuple( @@ -68,6 +72,16 @@ def _get_sample_range_from_data( def _get_domain_range_from_sample_range( sample_range: DomainRange, ) -> DomainRange: + """Computes the domain range of the whole dataset. + + Args: + sample_range: sample_range of the FDataIrregular object. + + Returns: + DomainRange: (domain_range) a tuple of 2-tuples where + domain_range[d] = (min_point, max_point) is the domain range for + the dimension d. + """ sample_range_array = np.asarray(sample_range) min_arguments = sample_range_array[..., 0].min(axis=0) max_arguments = sample_range_array[..., 1].max(axis=0) From 07ffeffaf5c9fd4e6e053b2f325b9d6f790ebf16 Mon Sep 17 00:00:00 2001 From: Pablo Cuesta Sierra <71875712+pcuestas@users.noreply.github.com> Date: Mon, 16 Oct 2023 10:41:55 +0200 Subject: [PATCH 103/144] Remove Optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Ramos Carreño --- skfda/exploratory/visualization/representation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index fc1c7e1fb..604efd021 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -612,7 +612,7 @@ def __init__( # noqa: WPS211 group_colors: Indexable[K, ColorLike] | None = None, group_names: Indexable[K, str] | None = None, legend: bool = False, - marker: Optional[str] = None, + marker: str | None = None, **kwargs: Any, ) -> None: super().__init__( From 47d1ba1201f984518d27ec4eed8474b2aa7298a9 Mon Sep 17 00:00:00 2001 From: Pablo Cuesta Sierra <71875712+pcuestas@users.noreply.github.com> Date: Mon, 16 Oct 2023 10:42:46 +0200 Subject: [PATCH 104/144] Remove Optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Ramos Carreño --- skfda/preprocessing/smoothing/_basis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index 582e553a0..469746322 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -262,7 +262,7 @@ def _coef_matrix( self, eval_points: NDArrayFloat, *, - function_values: Optional[NDArrayFloat] = None, + function_values: NDArrayFloat | None = None, ) -> NDArrayFloat: """Get the matrix that gives the coefficients.""" from ...misc.regularization import compute_penalty_matrix From e225c3e15a57777eab07292d80474f58e9460e0b Mon Sep 17 00:00:00 2001 From: pcuestas Date: Mon, 16 Oct 2023 10:56:21 +0200 Subject: [PATCH 105/144] Rename to_matrix -> _to_data_matrix, remove np.matrix usage --- skfda/exploratory/visualization/representation.py | 4 ++-- skfda/representation/irregular.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 604efd021..bd96afb55 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -708,8 +708,8 @@ def _plot( set_color_dict(self.sample_colors, j, color_dict) self.artists[j, 0] = axes[0].plot( - np.matrix.flatten(self.grid_points[j]), - np.matrix.flatten(self.evaluated_points[j]), + self.grid_points[j].flatten(), + self.evaluated_points[j].flatten(), **color_dict, picker=True, pickradius=2, diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index a57484117..7642821d0 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1115,7 +1115,7 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: extrapolation=self.extrapolation, ) - def to_matrix(self) -> ArrayLike: + def _to_data_matrix(self) -> ArrayLike: """Convert FDataIrregular values to numpy matrix. Undefined values in the grid will be represented with np.nan. @@ -1166,7 +1166,7 @@ def to_grid( # noqa: D102 Returns: FDataGrid: FDataGrid with the irregular functional data. """ - data_matrix, grid_points = self.to_matrix() + data_matrix, grid_points = self._to_data_matrix() return FDataGrid( data_matrix=data_matrix, From c4cd858066bf5380cce65d5fc659b865bcc146a3 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Mon, 16 Oct 2023 11:01:18 +0200 Subject: [PATCH 106/144] Remove types from docstring, remove single line function --- skfda/representation/irregular.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 7642821d0..45e86687a 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -39,18 +39,11 @@ ###################### -def _get_array_slices_by_function( - start_indices: NDArrayInt, - array: NDArrayFloat, -) -> List[NDArrayFloat]: - return np.split(array, start_indices[1:]) - - def _get_sample_range_from_data( start_indices: NDArrayInt, points: NDArrayFloat, ) -> DomainRange: - """Computes the domain ranges of each sample. + """Compute the domain ranges of each sample. Args: start_indices: start_indices of the FDataIrregular object. @@ -65,14 +58,14 @@ def _get_sample_range_from_data( tuple( zip(np.min(f_points, axis=0), np.max(f_points, axis=0)), ) - for f_points in _get_array_slices_by_function(start_indices, points) + for f_points in np.split(points, start_indices[1:]) ) def _get_domain_range_from_sample_range( sample_range: DomainRange, ) -> DomainRange: - """Computes the domain range of the whole dataset. + """Compute the domain range of the whole dataset. Args: sample_range: sample_range of the FDataIrregular object. @@ -275,13 +268,13 @@ def from_dataframe( belong to. Args: - dataframe (pandas.DataFrame): Pandas dataframe containing the + dataframe: Pandas dataframe containing the irregular functional dataset. - id_column (str): Name of the column which contains the information + id_column: Name of the column which contains the information about which curve does each each row belong to. - argument_columns (Sequence[str | None]): list of columns where + argument_columns: list of columns where the arguments for each dimension of the domain can be found. - coordinate_columns (Sequence[str | None]): list of columns where + coordinate_columns: list of columns where the values for each dimension of the image can be found. kwargs: Arguments for the FDataIrregular constructor. From dec2db3c7e06a838d082b5cdffc8ffc7143bbdd7 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Mon, 16 Oct 2023 11:08:29 +0200 Subject: [PATCH 107/144] Remove use of indices_start_end (in representation) --- skfda/exploratory/visualization/representation.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index bd96afb55..f54ca010d 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -625,15 +625,12 @@ def __init__( # noqa: WPS211 self.fdata = fdata # There may be different points for each function - self.grid_points = [] - self.evaluated_points = [] - for index_start, index_end in self.fdata.indices_start_end(): - self.grid_points.append( - self.fdata.points[index_start:index_end], - ) - self.evaluated_points.append( - self.fdata.values[index_start:index_end], - ) + self.grid_points = np.split( + self.fdata.points, self.fdata.start_indices[1:], + ) + self.evaluated_points = np.split( + self.fdata.values, self.fdata.start_indices[1:], + ) self.domain_range = domain_range self.group = group From bd289409ade7f2afb92fb0906d367bc1f91f867d Mon Sep 17 00:00:00 2001 From: pcuestas Date: Mon, 16 Oct 2023 11:16:58 +0200 Subject: [PATCH 108/144] Remove use of indices_start_end (all remaining) --- skfda/representation/irregular.py | 36 +++++++++++-------------------- 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 45e86687a..3545c1be5 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -400,9 +400,8 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ - slices = self.indices_start_end() - slice_args = [self.points[slice(*s)] for s in slices] - slice_values = [self.values[slice(*s)] for s in slices] + slice_args = np.split(self.points, self.start_indices[1:]) + slice_values = np.split(self.values, self.start_indices[1:]) # Sort lexicographically, first to last dimension sorting_masks = [ @@ -1264,16 +1263,18 @@ def restrict( # noqa: WPS210 # Eliminate points outside the new range. # Must also modify function indices to point to new array - for i, index_tuple in enumerate(self.indices_start_end()): - prev_index, index = index_tuple - s = slice(prev_index, index) - masks = set(range(self.points[s].shape[0])) + slice_points = np.split(self.points, self.start_indices[1:]) + slice_values = np.split(self.values, self.start_indices[1:]) + + for i, points_values in enumerate(zip(slice_points, slice_values)): + sample_points, sample_values = points_values + masks = set(range(sample_points.shape[0])) for dim, dr in enumerate(domain_range): dr_start, dr_end = dr select_mask = np.where( ( - (dr_start <= self.points[s][:, dim]) - & (self.points[s][:, dim] <= dr_end) + (dr_start <= sample_points[:, dim]) + & (sample_points[:, dim] <= dr_end) ), ) @@ -1283,8 +1284,8 @@ def restrict( # noqa: WPS210 masks = list(masks) if len(masks) > 0: indices.append(head) - arguments.append(self.points[s][masks, :]) - values.append(self.values[s][masks, :]) + arguments.append(sample_points[masks, :]) + values.append(sample_values[masks, :]) sample_names.append(self.sample_names[i]) head += len(masks) @@ -1383,19 +1384,6 @@ def __repr__(self) -> str: '\n ', ) - def indices_start_end(self) -> Sequence[Tuple[int, int]]: - """Return the indices of the start and end of each function. - - Returns: - Sequence[Tuple[int, int]]: Sequence of tuples with the indices of - the start and end of each function. - - """ - indices = np.append( - self.start_indices, len(self.points) - ) - return list(zip(indices, indices[1:])) - def __getitem__( self: T, key: Union[int, slice, NDArrayInt, NDArrayBool], From fff823061ff43a58f4ef3c50a5ff7b562c31cd47 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Mon, 16 Oct 2023 13:37:19 +0200 Subject: [PATCH 109/144] spacing --- skfda/preprocessing/smoothing/_basis.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index 469746322..f052a33e9 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -24,6 +24,7 @@ # Auxiliary functions to treat with FDataGrid and FDataIrregular ############################# + def _eval_points(fd: FData) -> NDArrayFloat: """Get the eval points of a FDataGrid or FDataIrregular.""" if isinstance(fd, FDataGrid): From 20fb03d0247f699655b4d21cbd74f67b4f2890e9 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Tue, 17 Oct 2023 19:54:29 +0200 Subject: [PATCH 110/144] Private functions in grid and irregular for BasisSmoother --- skfda/preprocessing/smoothing/_basis.py | 42 ++++--------------------- skfda/representation/grid.py | 18 ++++++++++- skfda/representation/irregular.py | 9 +++++- 3 files changed, 31 insertions(+), 38 deletions(-) diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index f052a33e9..a7dd93e1d 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -20,38 +20,6 @@ from ...typing._numpy import NDArrayFloat from ._linear import _LinearSmoother -############################# -# Auxiliary functions to treat with FDataGrid and FDataIrregular -############################# - - -def _eval_points(fd: FData) -> NDArrayFloat: - """Get the eval points of a FDataGrid or FDataIrregular.""" - if isinstance(fd, FDataGrid): - return _cartesian_product(_to_grid_points(fd.grid_points)) - if isinstance(fd, FDataIrregular): - return fd.points - raise ValueError("fd must be a FDataGrid or FDataIrregular") - - -def _input_points(fd: FData) -> GridPoints: - """Get the input points of a FDataGrid or FDataIrregular.""" - if isinstance(fd, FDataGrid): - return fd.grid_points - if isinstance(fd, FDataIrregular): - # There exists no equivalent in FDataIrregular to grid_points - return fd.points # type: ignore[return-value] - raise ValueError("fd must be a FDataGrid or FDataIrregular") - - -def _function_values(fd: FData) -> NDArrayFloat: - """Get the function values of a FDataGrid or FDataIrregular.""" - if isinstance(fd, FDataGrid): - return fd.data_matrix.reshape((fd.n_samples, -1)).T - if isinstance(fd, FDataIrregular): - return fd.values - raise ValueError("fd must be a FDataGrid or FDataIrregular") - ############################# # BasisSmoother @@ -321,7 +289,7 @@ def fit( self """ - self.input_points_ = _input_points(X) + self.input_points_ = X._get_input_points() self.output_points_ = ( _to_grid_points(self.output_points) if self.output_points is not None @@ -352,14 +320,16 @@ def transform( assert all( np.array_equal(i, s) for i, s in zip( self.input_points_, - _input_points(X), + X._get_input_points(), ) ) + eval_points, function_values = X._get_points_and_values() + if self.return_basis: coefficients = self._coef_matrix( - eval_points=_eval_points(X), - function_values=_function_values(X), + eval_points=eval_points, + function_values=function_values, ).T return FDataBasis( diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 71898cf69..88f135f51 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -16,6 +16,7 @@ Callable, Optional, Sequence, + Tuple, Type, TypeVar, Union, @@ -30,7 +31,13 @@ import scipy.stats.mstats from matplotlib.figure import Figure -from .._utils import _check_array_key, _int_to_real, _to_grid_points, constants +from .._utils import ( + _cartesian_product, + _check_array_key, + _int_to_real, + _to_grid_points, + constants, +) from ..typing._base import ( DomainRange, DomainRangeLike, @@ -529,6 +536,15 @@ def _check_same_dimensions(self: T, other: T) -> None: if not np.array_equal(self.grid_points, other.grid_points): raise ValueError("Grid points for both objects must be equal") + def _get_points_and_values(self: T) -> Tuple[NDArrayFloat, NDArrayFloat]: + return ( + _cartesian_product(_to_grid_points(self.grid_points)), + self.data_matrix.reshape((self.n_samples, -1)).T, + ) + + def _get_input_points(self: T) -> GridPoints: + return self.grid_points + def sum( # noqa: WPS125 self: T, *, diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 3545c1be5..3b764e2c0 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -21,6 +21,7 @@ from ..typing._base import ( DomainRange, DomainRangeLike, + GridPoints, GridPointsLike, LabelTupleLike, ) @@ -572,6 +573,12 @@ def check_same_dimensions(self: T, other: T) -> None: if self.dim_domain != other.dim_domain: raise ValueError("Dimension mismatch in arguments") + def _get_points_and_values(self: T) -> Tuple[NDArrayFloat, NDArrayFloat]: + return (self.points, self.values) + + def _get_input_points(self: T) -> GridPoints: + return self.points # type: ignore[return-value] + def sum( # noqa: WPS125 self: T, *, @@ -683,7 +690,7 @@ def equals(self, other: object) -> bool: """Comparison of FDataIrregular objects.""" if not isinstance(other, FDataIrregular): return False - + if not super().equals(other): return False From 8081d68a63f2105449f78dbf94ae792d8672b8ff Mon Sep 17 00:00:00 2001 From: pcuestas Date: Tue, 17 Oct 2023 21:52:26 +0200 Subject: [PATCH 111/144] Rewrite from_fdatagrid to remove all python loops (at the cost of more memory use) --- skfda/representation/irregular.py | 67 +++++++++++++------------------ 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 3b764e2c0..9e78b9fe3 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -17,7 +17,11 @@ import pandas.api.extensions from matplotlib.figure import Figure -from .._utils import _check_array_key +from .._utils import ( + _cartesian_product, + _check_array_key, + _to_grid_points, +) from ..typing._base import ( DomainRange, DomainRangeLike, @@ -345,46 +349,33 @@ def from_fdatagrid( FDataIrregular: FDataIrregular containing the same data as the source but with an irregular structure. """ - # Obtain num functions and num observations from data - n_measurements = np.sum(~(np.isnan(f_data.data_matrix).all(axis=-1))) - num_functions = f_data.data_matrix.shape[0] - - # Create data structure of function pointers and coordinates - start_indices = np.zeros((num_functions, ), dtype=np.uint32) - points = np.zeros( - (n_measurements, f_data.dim_domain), + all_points_single_function = _cartesian_product( + _to_grid_points(f_data.grid_points), ) - values = np.zeros( - (n_measurements, f_data.dim_codomain), + # Repeat points for each function + flat_points = np.tile( + all_points_single_function, (f_data.n_samples, 1), ) - # Find all the combinations of grid points and indices - from itertools import product - grid_point_indexes = [ - np.indices(np.array(gp).shape)[0] - for gp in f_data.grid_points - ] - combinations = list(product(*f_data.grid_points)) - index_combinations = list(product(*grid_point_indexes)) - - head = 0 - for i in range(num_functions): - start_indices[i] = head - num_values = 0 - - for g_index, g in enumerate(index_combinations): - if np.all(np.isnan(f_data.data_matrix[(i,) + g])): - continue - - arg = combinations[g_index] - value = f_data.data_matrix[(i, ) + g] - - points[head + num_values, :] = arg - values[head + num_values, :] = value - - num_values += 1 - - head += num_values + # Array with values of each function + all_values = f_data.data_matrix.reshape( + (f_data.n_samples, -1, f_data.dim_codomain), + ) + # Concatenated values of all functions + flat_values = all_values.reshape((-1, f_data.dim_codomain)) + # Which values are not nan with shape: all_values.shape + nonnan_all_values = ~np.all(np.isnan(all_values), axis=-1) + # Which values are not nan with shape: flat_values.shape + nonnan_flat_values = nonnan_all_values.reshape((-1,)) + + values = flat_values[nonnan_flat_values] + points = flat_points[nonnan_flat_values] + + # Count non-nan values per function to obtain start_indices + n_points_per_function = np.sum(nonnan_all_values, axis=-1) + start_indices = np.concatenate(( + np.zeros(1, np.int32), np.cumsum(n_points_per_function[:-1]), + )) return cls( start_indices, From 7bbe8bc02cbd36a857cacd770991f988c19d1a26 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Tue, 17 Oct 2023 22:00:54 +0200 Subject: [PATCH 112/144] Remove unused imports --- skfda/preprocessing/smoothing/_basis.py | 2 +- skfda/representation/irregular.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index a7dd93e1d..9dcb7ca52 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -16,7 +16,7 @@ from ...misc.regularization import L2Regularization from ...representation import FData, FDataBasis, FDataGrid, FDataIrregular from ...representation.basis import Basis -from ...typing._base import GridPointsLike, GridPoints +from ...typing._base import GridPointsLike from ...typing._numpy import NDArrayFloat from ._linear import _LinearSmoother diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 9e78b9fe3..7b3b80e19 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -8,9 +8,8 @@ from __future__ import annotations import numbers -import warnings from typing import ( - Any, List, Optional, Sequence, Tuple, Type, TypeVar, Union, cast, + Any, Optional, Sequence, Tuple, Type, TypeVar, Union, ) import numpy as np From 0b9b5bbef1da392828b5de27f3673e26cd5e393c Mon Sep 17 00:00:00 2001 From: pcuestas Date: Tue, 24 Oct 2023 18:36:16 +0200 Subject: [PATCH 113/144] from_fdatagrid without innecessary comments --- skfda/representation/irregular.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 7b3b80e19..c55940fa0 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -351,26 +351,20 @@ def from_fdatagrid( all_points_single_function = _cartesian_product( _to_grid_points(f_data.grid_points), ) - # Repeat points for each function flat_points = np.tile( all_points_single_function, (f_data.n_samples, 1), ) - # Array with values of each function all_values = f_data.data_matrix.reshape( (f_data.n_samples, -1, f_data.dim_codomain), ) - # Concatenated values of all functions flat_values = all_values.reshape((-1, f_data.dim_codomain)) - # Which values are not nan with shape: all_values.shape nonnan_all_values = ~np.all(np.isnan(all_values), axis=-1) - # Which values are not nan with shape: flat_values.shape nonnan_flat_values = nonnan_all_values.reshape((-1,)) values = flat_values[nonnan_flat_values] points = flat_points[nonnan_flat_values] - # Count non-nan values per function to obtain start_indices n_points_per_function = np.sum(nonnan_all_values, axis=-1) start_indices = np.concatenate(( np.zeros(1, np.int32), np.cumsum(n_points_per_function[:-1]), From e1ddca407df2b1582658d8b3359ce0848c6ad230 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Tue, 24 Oct 2023 18:40:38 +0200 Subject: [PATCH 114/144] Make `from_dataframe` private --- skfda/datasets/_real_datasets.py | 2 +- skfda/representation/irregular.py | 2 +- skfda/tests/test_irregular.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index 6ccd5bf7a..94765e920 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -1616,7 +1616,7 @@ def fetch_bone_density( target_name = "sex" coordinate_name = "spnbmd" - curves = FDataIrregular.from_dataframe( + curves = FDataIrregular._from_dataframe( data, id_column=curve_name, argument_columns=argument_name, diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index c55940fa0..900273015 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -256,7 +256,7 @@ def __init__( # noqa: WPS211 ) @classmethod - def from_dataframe( + def _from_dataframe( cls, dataframe: pandas.DataFrame, id_column: str, diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 85c780480..a056efc6b 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -261,7 +261,7 @@ def test_fdatairregular_from_dataframe( argument_name = "age" coordinate_name = "spnbmd" - f_irreg = FDataIrregular.from_dataframe( + f_irreg = FDataIrregular._from_dataframe( dataframe, id_column=curve_name, argument_columns=argument_name, From e1728155acac8786f10c84e8c75d468a4ef99057 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Tue, 24 Oct 2023 20:09:43 +0200 Subject: [PATCH 115/144] sum of fdatairregular objects --- skfda/representation/irregular.py | 35 +++++++++--- skfda/tests/test_irregular.py | 93 +++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 7 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 900273015..5aeb7d0a2 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -588,20 +588,41 @@ def sum( # noqa: WPS125 Returns: T: FDataIrregular object with only one curve and one value - representing the sum of all the samples in the original object. + representing the sum of all the samples in the original object. + The points of the new object are the points common to all the + samples in the original object. Only values present in those + common points are considered for the sum. """ super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna) - data = ( - np.nansum(self.values, axis=0, keepdims=True) if skipna - else np.sum(self.values, axis=0, keepdims=True) + unique_points, counts = ( + np.unique(self.points, axis=0, return_counts=True) ) + common_points = unique_points[counts == self.n_samples] + + if len(common_points) == 0: + raise ValueError("No common points in FDataIrregular object") + + sum_points = common_points[ + np.lexsort(np.flip(common_points, axis=1).T), + ] + + # Find which points are common to all curves by subtracting each point + # to each of the common points + subtraction = self.points[:, np.newaxis, :] - sum_points + is_common_point = np.any(~np.any(subtraction, axis=-1), axis=-1) + common_points_values = self.values[is_common_point].reshape( + (self.n_samples, len(sum_points), self.dim_codomain), + ) + + sum_function = np.nansum if skipna else np.sum + sum_values = sum_function(common_points_values, axis=0) return FDataIrregular( start_indices=np.array([0]), - points=np.zeros((1, self.dim_domain)), - values=data, - sample_names=("sum",), + points=sum_points, + values=sum_values, + sample_names=(None,), ) def mean(self: T) -> T: diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index a056efc6b..0c871a5e4 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -159,6 +159,71 @@ def dataframe( return raw_dataset["bone_ext"] + +@pytest.fixture( + params=[ + "unidimensional", + "multidimensional", + ], +) +def fdatairregular_and_sum(request: Any) -> FDataIrregular: + if request.param == "unidimensional": + return ( + FDataIrregular( + start_indices=[0, 3, 7], + points=[ + -9, -3, 3, -3, 3, 9, 15, -15, -9, -3, 3, 9, 17, 22, 29, + ], + values=[ + 548, 893, 657, 752, 459, 181, 434, 846, 1102, 801, 824, + 866, 704, 757, 726, + ], + ), + FDataIrregular( + start_indices=[0], + points=[-3, 3], + values=[2446, 1940], + ), + ) + if request.param == "multidimensional": + return ( + FDataIrregular( + start_indices=[0, 3, 5], + points=[ + [0, 0], [1, 2], [1, 1], + [0, 0], [1, 1], + [0, 0], [6, 2], [1, 1], + ], + values=[ + [0, 0, -1], [657, 752, 5], [10, 20, 30], + [-1, 0, 0], [1102, 801, 2], + [0, 1, 0], [704, 0, 757], [-11, -21, 31], + ], + ), + FDataIrregular( + start_indices=[0], + points=[[0, 0], [1, 1]], + values=[[-1, 1, -1], [1101, 800, 63]], + ), + ) + + +@pytest.fixture() +def fdatairregular_no_common_points() -> FDataIrregular: + return FDataIrregular( + start_indices=[0, 3, 5], + points=[ + [0, 1], [1, 2], [1, 1], + [0, -1], [1, 10], + [0, -2], [6, 2], [10, 1], + ], + values=[ + [0, 0, -1], [657, 752, 5], [10, 20, 30], + [-1, 0, 0], [1102, 801, 2], + [0, 1, 0], [704, 0, 757], [-11, -21, 31], + ], + ) + ############ # TESTS ############ @@ -443,3 +508,31 @@ def test_fdatairregular_isna( which can be unidimensional or multidimensional. """ assert fdatairregular.isna().shape == (len(fdatairregular),) + + +def test_fdatairregular_sum( + fdatairregular_and_sum: Tuple[FDataIrregular, FDataIrregular], +) -> None: + """Test the sum function for FDataIrregular. + + Test both unidimensional and multidimensional. + + Args: + fdatairregular_and_sum: FDataIrregular object and expected sum. + """ + fdatairregular, expected_sum = fdatairregular_and_sum + actual_sum = fdatairregular.sum() + assert actual_sum.equals(expected_sum), actual_sum + + +def test_fdatairregular_sum_invalid( + fdatairregular_no_common_points: FDataIrregular, +) -> None: + """Test the sum function for FDataIrregular, case with no common points. + + Args: + fdatairregular_no_common_points: FDataIrregular object with no common + points. + """ + with pytest.raises(ValueError): + fdatairregular_no_common_points.sum() From eee790945363b250ec4c5c1c9fe2b3b6cbe3ae72 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Tue, 24 Oct 2023 20:11:45 +0200 Subject: [PATCH 116/144] FDataIrregular mean (inherited from FData method) --- skfda/representation/irregular.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 5aeb7d0a2..35d14e0d0 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -625,34 +625,6 @@ def sum( # noqa: WPS125 sample_names=(None,), ) - def mean(self: T) -> T: - """Compute the mean pointwise for a sparse dataset. - - Note that, for irregular data, points may be represented in few - or even an only curve. - - Returns: - A FDataIrregular object with just one sample representing the - mean of all curves the across each value. - """ - # Find all distinct arguments (ordered) and corresponding values - distinct_args = np.unique(self.points, axis=0) - values = [ - self.values[np.where(self.points == arg)[0]] - for arg in distinct_args - ] - - # Obtain mean of all available values for each argument point - means = np.array([np.mean(value, axis=0) for value in values]) - - # Create a FDataIrregular object with only 1 curve, the mean curve - return FDataIrregular( - start_indices=np.array([0]), - points=distinct_args, - values=means, - sample_names=("mean",), - ) - def var(self: T) -> T: """Compute the variance pointwise for a sparse dataset. From c7a623524d167fd49c72b5b56ca8cafedb1fc9cf Mon Sep 17 00:00:00 2001 From: pcuestas Date: Tue, 24 Oct 2023 20:13:09 +0200 Subject: [PATCH 117/144] Test mean function --- skfda/tests/test_irregular.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index 0c871a5e4..f54f64434 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -522,7 +522,22 @@ def test_fdatairregular_sum( """ fdatairregular, expected_sum = fdatairregular_and_sum actual_sum = fdatairregular.sum() - assert actual_sum.equals(expected_sum), actual_sum + assert actual_sum.equals(expected_sum) + + +def test_fdatairregular_mean( + fdatairregular_and_sum: Tuple[FDataIrregular, FDataIrregular], +) -> None: + """Test the mean function for FDataIrregular. + + Test both unidimensional and multidimensional. + + Args: + fdatairregular_and_sum: FDataIrregular object and expected sum. + """ + fdatairregular, expected_sum = fdatairregular_and_sum + actual_mean = fdatairregular.mean() + assert actual_mean.equals(expected_sum / fdatairregular.n_samples) def test_fdatairregular_sum_invalid( From d82693f652a30ff273cae2f31b6e52882c7e7eda Mon Sep 17 00:00:00 2001 From: pcuestas Date: Tue, 24 Oct 2023 20:30:27 +0200 Subject: [PATCH 118/144] std of FDataIrregular --- skfda/exploratory/stats/_stats.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 5bf81b3b9..2f16cc79e 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -6,13 +6,12 @@ from typing import Callable, TypeVar, Union import numpy as np -from scipy import integrate from scipy.stats import rankdata from skfda._utils.ndfunction import average_function_value from ...misc.metrics._lp_distances import l2_distance -from ...representation import FData, FDataBasis, FDataGrid +from ...representation import FData, FDataBasis, FDataGrid, FDataIrregular from ...typing._metric import Metric from ...typing._numpy import NDArrayFloat from ..depth import Depth, ModifiedBandDepth @@ -137,6 +136,23 @@ def std_fdatagrid(X: FDataGrid, correction: int = 1) -> FDataGrid: ) +@std.register +def std_fdatairregular( + X: FDataIrregular, correction: int = 1, +) -> FDataIrregular: + """Compute the standard deviation of a FDataIrregular.""" + common_points, common_values = X._get_common_points_and_values() + std_values = np.std( + common_values, axis=0, ddof=correction, + ) + + return FDataIrregular( + start_indices=np.array([0]), + points=common_points, + values=std_values, + sample_names=(None,), + ) + @std.register def std_fdatabasis(X: FDataBasis, correction: int = 1) -> FDataBasis: """Compute the standard deviation of a FDataBasis.""" From e202aa7c3e19f37068a0ec26ee96f6d1c80e8c2d Mon Sep 17 00:00:00 2001 From: pcuestas Date: Tue, 24 Oct 2023 20:31:04 +0200 Subject: [PATCH 119/144] Private method in FDataIrregular to get common points and corresponding values --- skfda/representation/irregular.py | 38 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 35d14e0d0..f0a553e43 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -563,6 +563,23 @@ def _get_points_and_values(self: T) -> Tuple[NDArrayFloat, NDArrayFloat]: def _get_input_points(self: T) -> GridPoints: return self.points # type: ignore[return-value] + def _get_common_points_and_values( + self: T, + ) -> Tuple[NDArrayFloat, NDArrayFloat]: + unique_points, counts = ( + np.unique(self.points, axis=0, return_counts=True) + ) + common_points = unique_points[counts == self.n_samples] + + # Find which points are common to all curves by subtracting each point + # to each of the common points + subtraction = self.points[:, np.newaxis, :] - common_points + is_common_point = np.any(~np.any(subtraction, axis=-1), axis=-1) + common_points_values = self.values[is_common_point].reshape( + (self.n_samples, len(common_points), self.dim_codomain), + ) + return common_points, common_points_values + def sum( # noqa: WPS125 self: T, *, @@ -595,32 +612,17 @@ def sum( # noqa: WPS125 """ super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna) - unique_points, counts = ( - np.unique(self.points, axis=0, return_counts=True) - ) - common_points = unique_points[counts == self.n_samples] + common_points, common_values = self._get_common_points_and_values() if len(common_points) == 0: raise ValueError("No common points in FDataIrregular object") - sum_points = common_points[ - np.lexsort(np.flip(common_points, axis=1).T), - ] - - # Find which points are common to all curves by subtracting each point - # to each of the common points - subtraction = self.points[:, np.newaxis, :] - sum_points - is_common_point = np.any(~np.any(subtraction, axis=-1), axis=-1) - common_points_values = self.values[is_common_point].reshape( - (self.n_samples, len(sum_points), self.dim_codomain), - ) - sum_function = np.nansum if skipna else np.sum - sum_values = sum_function(common_points_values, axis=0) + sum_values = sum_function(common_values, axis=0) return FDataIrregular( start_indices=np.array([0]), - points=sum_points, + points=common_points, values=sum_values, sample_names=(None,), ) From c2afecc64f7d8baeae057adbaea2ed20ffd17acf Mon Sep 17 00:00:00 2001 From: pcuestas Date: Tue, 24 Oct 2023 21:00:59 +0200 Subject: [PATCH 120/144] test_stats_std for fdatairregular, add FDataGrid to skfda __init__, fix test reductions in test_irregular_operations.py, fix correction default value in stats.std and its tests --- skfda/__init__.py | 4 +- skfda/exploratory/stats/_stats.py | 9 +- skfda/representation/irregular.py | 38 +++--- skfda/tests/test_irregular.py | 108 ---------------- skfda/tests/test_irregular_operations.py | 153 ++++++++++++++++++++++- skfda/tests/test_stats_std.py | 28 ++++- 6 files changed, 197 insertions(+), 143 deletions(-) diff --git a/skfda/__init__.py b/skfda/__init__.py index ac12bc7f5..d62b2d60e 100644 --- a/skfda/__init__.py +++ b/skfda/__init__.py @@ -17,7 +17,9 @@ "representation", ], submod_attrs={ - 'representation': ["FData", "FDataBasis", "FDataGrid"], + 'representation': [ + "FData", "FDataBasis", "FDataGrid", "FDataIrregular", + ], 'representation._functional_data': ['concatenate'], }, ) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 2f16cc79e..b16f0c01e 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -102,7 +102,7 @@ def cov( @functools.singledispatch -def std(X: F, correction: int = 1) -> F: +def std(X: F, correction: int = 0) -> F: r""" Compute the standard deviation of all the samples in a FData object. @@ -126,7 +126,7 @@ def std(X: F, correction: int = 1) -> F: @std.register -def std_fdatagrid(X: FDataGrid, correction: int = 1) -> FDataGrid: +def std_fdatagrid(X: FDataGrid, correction: int = 0) -> FDataGrid: """Compute the standard deviation of a FDataGrid.""" return X.copy( data_matrix=np.std( @@ -138,7 +138,7 @@ def std_fdatagrid(X: FDataGrid, correction: int = 1) -> FDataGrid: @std.register def std_fdatairregular( - X: FDataIrregular, correction: int = 1, + X: FDataIrregular, correction: int = 0, ) -> FDataIrregular: """Compute the standard deviation of a FDataIrregular.""" common_points, common_values = X._get_common_points_and_values() @@ -153,8 +153,9 @@ def std_fdatairregular( sample_names=(None,), ) + @std.register -def std_fdatabasis(X: FDataBasis, correction: int = 1) -> FDataBasis: +def std_fdatabasis(X: FDataBasis, correction: int = 0) -> FDataBasis: """Compute the standard deviation of a FDataBasis.""" from ..._utils import function_to_fdatabasis diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f0a553e43..0de90f22c 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -604,7 +604,7 @@ def sum( # noqa: WPS125 `True`. Returns: - T: FDataIrregular object with only one curve and one value + FDataIrregular object with only one curve and one value representing the sum of all the samples in the original object. The points of the new object are the points common to all the samples in the original object. Only values present in those @@ -627,33 +627,31 @@ def sum( # noqa: WPS125 sample_names=(None,), ) - def var(self: T) -> T: - """Compute the variance pointwise for a sparse dataset. + def var(self: T, correction: int = 0) -> T: + """Compute the variance of all the samples. - Note that, for irregular data, points may be represented in few - or even an only curve. + Args: + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the + number of elements. Default: `0`. Returns: - A FDataIrregular object with just one sample representing the - variance of all curves the across each value. - + FDataIrregular object with only one curve and one value + representing the pointwise variance of all the samples in the + original object. The points of the new object are the points + common to all the samples in the original object. """ # Find all distinct arguments (ordered) and corresponding values - distinct_args = np.unique(self.points, axis=0) - values = [ - self.values[np.where(self.points == arg)[0]] - for arg in distinct_args - ] - - # Obtain variance of all available values for each argument point - variances = np.array([np.var(value, axis=0) for value in values]) + common_points, common_values = self._get_common_points_and_values() + var_values = np.var( + common_values, axis=0, ddof=correction, + ) - # Create a FDataIrregular object with only 1 curve, the variance curve return FDataIrregular( start_indices=np.array([0]), - points=distinct_args, - values=variances, - sample_names=("var",), + points=common_points, + values=var_values, + sample_names=(None,), ) def cov(self: T) -> T: diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py index f54f64434..a056efc6b 100644 --- a/skfda/tests/test_irregular.py +++ b/skfda/tests/test_irregular.py @@ -159,71 +159,6 @@ def dataframe( return raw_dataset["bone_ext"] - -@pytest.fixture( - params=[ - "unidimensional", - "multidimensional", - ], -) -def fdatairregular_and_sum(request: Any) -> FDataIrregular: - if request.param == "unidimensional": - return ( - FDataIrregular( - start_indices=[0, 3, 7], - points=[ - -9, -3, 3, -3, 3, 9, 15, -15, -9, -3, 3, 9, 17, 22, 29, - ], - values=[ - 548, 893, 657, 752, 459, 181, 434, 846, 1102, 801, 824, - 866, 704, 757, 726, - ], - ), - FDataIrregular( - start_indices=[0], - points=[-3, 3], - values=[2446, 1940], - ), - ) - if request.param == "multidimensional": - return ( - FDataIrregular( - start_indices=[0, 3, 5], - points=[ - [0, 0], [1, 2], [1, 1], - [0, 0], [1, 1], - [0, 0], [6, 2], [1, 1], - ], - values=[ - [0, 0, -1], [657, 752, 5], [10, 20, 30], - [-1, 0, 0], [1102, 801, 2], - [0, 1, 0], [704, 0, 757], [-11, -21, 31], - ], - ), - FDataIrregular( - start_indices=[0], - points=[[0, 0], [1, 1]], - values=[[-1, 1, -1], [1101, 800, 63]], - ), - ) - - -@pytest.fixture() -def fdatairregular_no_common_points() -> FDataIrregular: - return FDataIrregular( - start_indices=[0, 3, 5], - points=[ - [0, 1], [1, 2], [1, 1], - [0, -1], [1, 10], - [0, -2], [6, 2], [10, 1], - ], - values=[ - [0, 0, -1], [657, 752, 5], [10, 20, 30], - [-1, 0, 0], [1102, 801, 2], - [0, 1, 0], [704, 0, 757], [-11, -21, 31], - ], - ) - ############ # TESTS ############ @@ -508,46 +443,3 @@ def test_fdatairregular_isna( which can be unidimensional or multidimensional. """ assert fdatairregular.isna().shape == (len(fdatairregular),) - - -def test_fdatairregular_sum( - fdatairregular_and_sum: Tuple[FDataIrregular, FDataIrregular], -) -> None: - """Test the sum function for FDataIrregular. - - Test both unidimensional and multidimensional. - - Args: - fdatairregular_and_sum: FDataIrregular object and expected sum. - """ - fdatairregular, expected_sum = fdatairregular_and_sum - actual_sum = fdatairregular.sum() - assert actual_sum.equals(expected_sum) - - -def test_fdatairregular_mean( - fdatairregular_and_sum: Tuple[FDataIrregular, FDataIrregular], -) -> None: - """Test the mean function for FDataIrregular. - - Test both unidimensional and multidimensional. - - Args: - fdatairregular_and_sum: FDataIrregular object and expected sum. - """ - fdatairregular, expected_sum = fdatairregular_and_sum - actual_mean = fdatairregular.mean() - assert actual_mean.equals(expected_sum / fdatairregular.n_samples) - - -def test_fdatairregular_sum_invalid( - fdatairregular_no_common_points: FDataIrregular, -) -> None: - """Test the sum function for FDataIrregular, case with no common points. - - Args: - fdatairregular_no_common_points: FDataIrregular object with no common - points. - """ - with pytest.raises(ValueError): - fdatairregular_no_common_points.sum() diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py index 08dda67b8..3c6b36ea3 100644 --- a/skfda/tests/test_irregular_operations.py +++ b/skfda/tests/test_irregular_operations.py @@ -201,6 +201,105 @@ def fdatairregular( return fdatairregular_2d +@pytest.fixture( + params=[ + "unidimensional", + "multidimensional", + ], +) +def fdatairregular_and_sum(request: Any) -> FDataIrregular: + if request.param == "unidimensional": + return ( + FDataIrregular( + start_indices=[0, 3, 7], + points=[ + -9, -3, 3, -3, 3, 9, 15, -15, -9, -3, 3, 9, 17, 22, 29, + ], + values=[ + 548, 893, 657, 752, 459, 181, 434, 846, 1102, 801, 824, + 866, 704, 757, 726, + ], + ), + FDataIrregular( + start_indices=[0], + points=[-3, 3], + values=[2446, 1940], + ), + ) + if request.param == "multidimensional": + return ( + FDataIrregular( + start_indices=[0, 3, 5], + points=[ + [0, 0], [1, 2], [1, 1], + [0, 0], [1, 1], + [0, 0], [6, 2], [1, 1], + ], + values=[ + [0, 0, -1], [657, 752, 5], [10, 20, 30], + [-1, 0, 0], [1102, 801, 2], + [0, 1, 0], [704, 0, 757], [-11, -21, 31], + ], + ), + FDataIrregular( + start_indices=[0], + points=[[0, 0], [1, 1]], + values=[[-1, 1, -1], [1101, 800, 63]], + ), + ) + + +@pytest.fixture( + params=[ + "unidimensional", + "multidimensional", + ], +) +def fdatairregular_common_points(request: Any) -> FDataIrregular: + if request.param == "unidimensional": + return FDataIrregular( + start_indices=[0, 3, 7], + points=[ + -9, -3, 3, -3, 3, 9, 15, -15, -9, -3, 3, 9, 17, 22, 29, + ], + values=[ + 548, 893, 657, 752, 459, 181, 434, 846, 1102, 801, 824, + 866, 704, 757, 726, + ], + ) + if request.param == "multidimensional": + return FDataIrregular( + start_indices=[0, 3, 5], + points=[ + [0, 0], [1, 2], [1, 1], + [0, 0], [1, 1], + [0, 0], [6, 2], [1, 1], + ], + values=[ + [0, 0, -1], [657, 752, 5], [10, 20, 30], + [-1, 0, 0], [1102, 801, 2], + [0, 1, 0], [704, 0, 757], [-11, -21, 31], + ], + ) + + +@pytest.fixture() +def fdatairregular_no_common_points() -> FDataIrregular: + return FDataIrregular( + start_indices=[0, 3, 5], + points=[ + [0, 1], [1, 2], [1, 1], + [0, -1], [1, 10], + [0, -2], [6, 2], [10, 1], + ], + values=[ + [0, 0, -1], [657, 752, 5], [10, 20, 30], + [-1, 0, 0], [1102, 801, 2], + [0, 1, 0], [704, 0, 757], [-11, -21, 31], + ], + ) + + @pytest.fixture(params=["scalar", "vector", "matrix", "fdatairregular"]) def other_1d( request: Any, @@ -723,7 +822,7 @@ class TestNumericReductions: def test_fdatairregular_numeric_reduction( self, - fdatairregular: FDataIrregular, + fdatairregular_common_points: FDataIrregular, all_numeric_reductions: str, ) -> None: """Test FDataIrregular numeric statistichal operations. @@ -732,14 +831,60 @@ def test_fdatairregular_numeric_reduction( dimensions of codomain and domain. Args: - fdatairregular (FDataIrregular): FDataIrregular - object. + fdatairregular_common_points (FDataIrregular): FDataIrregular + object with points common to all samples. all_numeric_reductions (str): Method of the class FDataIrregular to be tested. """ - reduction = getattr(fdatairregular, all_numeric_reductions)() + reduction = getattr( + fdatairregular_common_points, all_numeric_reductions, + )() assert isinstance(reduction, FDataIrregular) + def test_fdatairregular_sum( + self, + fdatairregular_and_sum: Tuple[FDataIrregular, FDataIrregular], + ) -> None: + """Test the sum function for FDataIrregular. + + Test both unidimensional and multidimensional. + + Args: + fdatairregular_and_sum: FDataIrregular object and expected sum. + """ + fdatairregular, expected_sum = fdatairregular_and_sum + actual_sum = fdatairregular.sum() + assert actual_sum.equals(expected_sum) + + def test_fdatairregular_mean( + self, + fdatairregular_and_sum: Tuple[FDataIrregular, FDataIrregular], + ) -> None: + """Test the mean function for FDataIrregular. + + Test both unidimensional and multidimensional. + + Args: + fdatairregular_and_sum: FDataIrregular object and expected sum. + """ + fdatairregular, expected_sum = fdatairregular_and_sum + actual_mean = fdatairregular.mean() + assert actual_mean.equals(expected_sum / fdatairregular.n_samples) + + def test_fdatairregular_sum_invalid( + self, + fdatairregular_no_common_points: FDataIrregular, + ) -> None: + """Test the sum function for FDataIrregular. + + Args: + fdatairregular_no_common_points: FDataIrregular object with no + common points. + """ + with pytest.raises(ValueError): + fdatairregular_no_common_points.sum() + + ######################## # TEST BASIS OPERATIONS ######################## diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py index 24f6687af..8bbc64d5b 100644 --- a/skfda/tests/test_stats_std.py +++ b/skfda/tests/test_stats_std.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from skfda import FDataBasis, FDataGrid +from skfda import FDataBasis, FDataGrid, FDataIrregular from skfda.exploratory.stats import std from skfda.representation.basis import ( Basis, @@ -66,6 +66,22 @@ def t_basis2(request: Any, t_n_basis2: int = 5) -> Basis: # Tests +def test_std_fdatairregular_1d_to_1d() -> None: + """Test std_fdatairregular with R to R functions.""" + fd = FDataIrregular( + start_indices=[0, 3, 7], + points=[0, 1, 10, 0, 1, 2, 10, 0, 1, 4, 10], + values=[0, 0, 10, 1, 1, 6, 10, 2, 2, 9, 10], + ) + expected_std = FDataIrregular( + start_indices=[0], + points=[0, 1, 10], + values=[np.sqrt(2 / 3), np.sqrt(2 / 3), 0], + ) + actual_std = std(fd) + assert actual_std.equals(expected_std), actual_std + + def test_std_fdatagrid_1d_to_2d() -> None: """Test std_fdatagrid with R to R^2 functions.""" fd = FDataGrid( @@ -78,7 +94,7 @@ def test_std_fdatagrid_1d_to_2d() -> None: [0, 1, 2, 3, 4, 5], ], ) - expected_std_data_matrix = np.full((1, 2, 6, 1), np.sqrt(2)) + expected_std_data_matrix = np.full((1, 2, 6, 1), 1) np.testing.assert_allclose( std(fd).data_matrix, expected_std_data_matrix, @@ -103,7 +119,7 @@ def test_std_fdatagrid_2d_to_2d() -> None: [0, 1, 2], ], ) - expected_std_data_matrix = np.full((1, 2, 3, 2), np.sqrt(1 / 2)) + expected_std_data_matrix = np.full((1, 2, 3, 2), np.sqrt(1 / 4)) np.testing.assert_allclose( std(fd).data_matrix, expected_std_data_matrix, @@ -129,7 +145,7 @@ def test_std_fdatabasis_vector_valued_basis( ) np.testing.assert_allclose( - std(fd).coefficients, + std(fd, correction=1).coefficients, np.array([np.sqrt(1 / 2) * one_coefficients]), rtol=1e-7, atol=1e-7, @@ -152,7 +168,7 @@ def test_std_fdatabasis_tensor_basis( ) np.testing.assert_allclose( - std(fd).coefficients, + std(fd, correction=1).coefficients, np.array([np.sqrt(1 / 2) * one_coefficients]), rtol=1e-7, atol=1e-7, @@ -181,7 +197,7 @@ def test_std_fdatabasis_2d_to_2d() -> None: expected_coefficients = np.array([[np.sqrt(1 / 2), 0, 0, 0] * 2]) np.testing.assert_allclose( - std(fd).coefficients, + std(fd, correction=1).coefficients, expected_coefficients, rtol=1e-7, atol=1e-7, From 0f90db2ccfafaea0bd51b5a323a36c8384f2afc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Tue, 14 Nov 2023 16:06:09 +0100 Subject: [PATCH 121/144] use ary.ndim instead of len(ary.shape) --- skfda/misc/covariances.py | 2 +- skfda/ml/regression/_linear_regression.py | 2 +- .../variable_selection/recursive_maxima_hunting.py | 2 +- skfda/representation/basis/_fdatabasis.py | 4 ++-- skfda/representation/irregular.py | 10 ++++------ 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index 43635eb02..298fcf64d 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -31,7 +31,7 @@ def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: """Transform 1d arrays in column vectors.""" t = np.asfarray(t) - dim = len(t.shape) + dim = t.ndim assert dim <= 2 if dim < 2: diff --git a/skfda/ml/regression/_linear_regression.py b/skfda/ml/regression/_linear_regression.py index f672a2923..4cc45524a 100644 --- a/skfda/ml/regression/_linear_regression.py +++ b/skfda/ml/regression/_linear_regression.py @@ -607,7 +607,7 @@ def _check_and_convert( np.ndarray: numpy 2D array. """ new_X = np.asarray(X) - if len(new_X.shape) == 1: + if new_X.ndim == 1: new_X = new_X[:, np.newaxis] return new_X diff --git a/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py b/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py index 432b65bb8..2a66ffa19 100644 --- a/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py +++ b/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py @@ -46,7 +46,7 @@ def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: t = np.asfarray(t) - dim = len(t.shape) + dim = t.ndim assert dim <= 2 if dim < 2: diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 1a4e0830d..b943f7932 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -687,8 +687,8 @@ def _array_to_R( # noqa: N802 coefficients: NDArrayFloat, transpose: bool = False, ) -> str: - if len(coefficients.shape) == 1: - coefficients = coefficients.reshape((1, coefficients.shape[0])) + if coefficients.ndim == 1: + coefficients = coefficients[None] if transpose is True: coefficients = np.transpose(coefficients) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 0de90f22c..fc13632b5 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -206,16 +206,14 @@ def __init__( # noqa: WPS211 """Construct a FDataIrregular object.""" self.start_indices = np.asarray(start_indices) self.points = np.asarray(points) - if len(self.points.shape) == 1: + if self.points.ndim == 1: self.points = self.points.reshape(-1, 1) self.values = np.asarray(values) - if len(self.values.shape) == 1: + if self.values.ndim == 1: self.values = self.values.reshape(-1, 1) - if self.points.shape[0] != self.values.shape[0]: - raise ValueError( - "Dimension mismatch in points and values", - ) + if len(self.points) != len(self.values): + raise ValueError("Dimension mismatch in points and values") if max(self.start_indices) >= len(self.points): raise ValueError("Index in start_indices out of bounds") From 26b5f0c4386681db1b6e225dbd7ec20a1efcacfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Tue, 14 Nov 2023 17:15:37 +0100 Subject: [PATCH 122/144] points_split and values_split as properties --- skfda/representation/irregular.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index fc13632b5..8765e928e 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -44,8 +44,7 @@ def _get_sample_range_from_data( - start_indices: NDArrayInt, - points: NDArrayFloat, + points_split: list[NDArrayFloat], ) -> DomainRange: """Compute the domain ranges of each sample. @@ -62,7 +61,7 @@ def _get_sample_range_from_data( tuple( zip(np.min(f_points, axis=0), np.max(f_points, axis=0)), ) - for f_points in np.split(points, start_indices[1:]) + for f_points in points_split ) @@ -223,10 +222,7 @@ def __init__( # noqa: WPS211 self.points = sorted_arguments self.values = sorted_values - self._sample_range = _get_sample_range_from_data( - self.start_indices, - self.points, - ) + self._sample_range = _get_sample_range_from_data(self.points_split) # Default value for sample_range is a list of tuples with # the first and last arguments of each curve for each dimension @@ -383,8 +379,8 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ - slice_args = np.split(self.points, self.start_indices[1:]) - slice_values = np.split(self.values, self.start_indices[1:]) + slice_args = self.points_split + slice_values = self.values_split # Sort lexicographically, first to last dimension sorting_masks = [ @@ -458,6 +454,14 @@ def coordinates(self) -> _IrregularCoordinateIterator[T]: def n_samples(self) -> int: return self.start_indices.shape[0] + @property + def points_split(self) -> NDArrayFloat: + return np.split(self.points, self.start_indices[1:]) + + @property + def values_split(self) -> NDArrayFloat: + return np.split(self.values, self.start_indices[1:]) + @property def sample_range(self) -> DomainRange: """ From fb69594d5d08ea71fc1722cdf1d78a48c265f8cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Tue, 14 Nov 2023 17:16:11 +0100 Subject: [PATCH 123/144] FDataIrregular.cleaned restrict method --- skfda/representation/irregular.py | 60 +++++++++++++++---------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 8765e928e..f02919f57 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1225,59 +1225,60 @@ def copy( # noqa: WPS211 def restrict( # noqa: WPS210 self: T, domain_range: DomainRangeLike, + *, + with_bounds: bool = False, ) -> T: """ Restrict the functions to a new domain range. Args: domain_range: New domain range. + with_bounds: Whether or not to ensure domain boundaries + appear in `grid_points`. Returns: T: Restricted function. """ + if with_bounds: # To do + raise NotImplementedError('Not yet implemented for FDataIrregular') + from ..misc.validation import validate_domain_range - domain_range = validate_domain_range(domain_range) + npdr = np.asarray(validate_domain_range(domain_range)) # (dim, 2) head = 0 - indices = [] - arguments = [] + start_indices = [] + points = [] values = [] sample_names = [] # Eliminate points outside the new range. # Must also modify function indices to point to new array - slice_points = np.split(self.points, self.start_indices[1:]) - slice_values = np.split(self.values, self.start_indices[1:]) - - for i, points_values in enumerate(zip(slice_points, slice_values)): - sample_points, sample_values = points_values - masks = set(range(sample_points.shape[0])) - for dim, dr in enumerate(domain_range): - dr_start, dr_end = dr - select_mask = np.where( - ( - (dr_start <= sample_points[:, dim]) - & (sample_points[:, dim] <= dr_end) - ), - ) + for sample_points, sample_values, sample_name in zip( + self.points_split, # (num_points, dim) + self.values_split, + self.sample_names, + ): - masks = masks.intersection(set(select_mask[0])) + mask = np.all( + (npdr[:, 0] <= sample_points) & (sample_points <= npdr[:, 1]), + axis=1, + ) # Do not keep functions with no values. - masks = list(masks) - if len(masks) > 0: - indices.append(head) - arguments.append(sample_points[masks, :]) - values.append(sample_values[masks, :]) - sample_names.append(self.sample_names[i]) - head += len(masks) + num_valid_points = mask.sum() + if num_valid_points: + start_indices.append(head) + points.append(sample_points[mask]) + values.append(sample_values[mask]) + sample_names.append(sample_name) + head += num_valid_points return self.copy( - start_indices=np.array(indices), - points=np.concatenate(arguments), + start_indices=np.array(start_indices), + points=np.concatenate(points), values=np.concatenate(values), sample_names=sample_names, domain_range=domain_range, @@ -1549,10 +1550,7 @@ def __init__( self.dim_domain = points.shape[1] if domain_range is None: - sample_range = _get_sample_range_from_data( - self.start_indices, - self.points, - ) + sample_range = _get_sample_range_from_data(self.points_split) domain_range = _get_domain_range_from_sample_range(sample_range) self.domain_range = validate_domain_range(domain_range) From ad713397ffe6bb6118faa713f4ac5712c7ccb0d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 15 Nov 2023 21:34:35 +0100 Subject: [PATCH 124/144] cleaner concatenate --- skfda/representation/irregular.py | 86 ++++++++++--------------------- 1 file changed, 28 insertions(+), 58 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f02919f57..4a3117b43 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -7,6 +7,7 @@ """ from __future__ import annotations +import itertools import numbers from typing import ( Any, Optional, Sequence, Tuple, Type, TypeVar, Union, @@ -931,67 +932,36 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: "Not implemented for as_coordinates = True", ) # Verify that dimensions are compatible - assert len(others) > 0, "No objects to concatenate" - self.check_same_dimensions(others[0]) - if len(others) > 1: - for x, y in zip(others, others[1:]): - x.check_same_dimensions(y) - - # Allocate all required memory - total_functions = self.n_samples + sum( - [ - o.n_samples - for o in others - ], - ) - total_values = len(self.points) + sum( - [ - len(o.points) - for o in others - ], - ) - total_sample_names = [] - start_indices = np.zeros((total_functions, ), dtype=np.uint32) - function_args = np.zeros( - (total_values, self.dim_domain), - ) - values = np.zeros( - (total_values, self.dim_codomain), - ) - index = 0 - head = 0 - - # Add samples sequentially - for f_data in [self] + list(others): - start_indices[ - index:index + f_data.n_samples - ] = f_data.start_indices - function_args[ - head:head + len(f_data.points) - ] = f_data.points - values[ - head:head + len(f_data.points) - ] = f_data.values - # Adjust pointers to the concatenated array - start_indices[index:index + f_data.n_samples] += head - index += f_data.n_samples - head += len(f_data.points) - total_sample_names = total_sample_names + list(f_data.sample_names) - - # Check domain range - domain_range = [list(r) for r in self.domain_range] - for dim in range(self.dim_domain): - dim_max = np.max(function_args[:, dim]) - dim_min = np.min(function_args[:, dim]) - - if dim_max > self.domain_range[dim][1]: - domain_range[dim][1] = dim_max - if dim_min < self.domain_range[dim][0]: - domain_range[dim][0] = dim_min + assert others, "No objects to concatenate" + all_ = (self,) + others + start_indices_split = [] + total_points = 0 + points_split = [] + values_split = [] + total_sample_names_split = [] + domain_range_split = [] + for x, y in itertools.pairwise(all_ + (self,)): + x.check_same_dimensions(y) + start_indices_split.append(x.start_indices + total_points) + total_points += len(x.points) + points_split.append(x.points) + values_split.append(x.values) + total_sample_names_split.append(x.sample_names) + domain_range_split.append(x.domain_range) + + start_indices = np.concatenate(start_indices_split) + points = np.concatenate(points_split) + values = np.concatenate(values_split) + total_sample_names = list(itertools.chain(*total_sample_names_split)) + domain_range_stacked = np.stack(domain_range_split, axis=-1) + domain_range = np.c_[ + domain_range_stacked[:, 0].min(axis=-1), + domain_range_stacked[:, 1].max(axis=-1), + ] return self.copy( start_indices, - function_args, + points, values, domain_range=domain_range, sample_names=total_sample_names, From f6a87ad84cb4ae1e0cd76f35a2fac2ac6dc8d955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 23 Nov 2023 19:20:22 +0100 Subject: [PATCH 125/144] FDataIrregular.__init__: validate start_indices --- skfda/representation/irregular.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 4a3117b43..b351ed54a 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -119,6 +119,13 @@ class FDataIrregular(FData): # noqa: WPS214 interpolation: Defines the type of interpolation applied in `evaluate`. + Raises: + ValueError: + - if `points` and `values` lengths don't match + - if `start_indices` does'nt start with `0`, or is decreasing + somewhere, or ends with a value greater than or equal to + `len(points)`. + Examples: Representation of an irregular functional data object with 2 samples representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}`, @@ -215,7 +222,13 @@ def __init__( # noqa: WPS211 if len(self.points) != len(self.values): raise ValueError("Dimension mismatch in points and values") - if max(self.start_indices) >= len(self.points): + if self.start_indices[0] != 0: + raise ValueError("Array start_indices must start with 0") + + if np.any(np.diff(self.start_indices) < 0): + raise ValueError("Array start_indices must be non-decreasing") + + if self.start_indices[-1] >= len(self.points): raise ValueError("Index in start_indices out of bounds") # Ensure arguments are in order within each function From eee51ad320efa5d50105abd089e9857927ae4cbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 23 Nov 2023 20:48:35 +0100 Subject: [PATCH 126/144] FDataIrregular.round clean (why start_indices special treatment?) --- skfda/representation/irregular.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index b351ed54a..186ea3c93 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -442,15 +442,11 @@ def round( # coalescing various arguments to the same rounded value rounded_values = self.values.round(decimals=decimals) - if out is not None and isinstance(out, FDataIrregular): - out.start_indices = self.start_indices + if isinstance(out, FDataIrregular): out.values = rounded_values - return out - return self.copy( - values=rounded_values, - ) + return self.copy(values=rounded_values) @property def dim_domain(self) -> int: From 3bd25facd74600fa1c39f12afe7bd0f284207311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 23 Nov 2023 20:50:05 +0100 Subject: [PATCH 127/144] minor clean --- skfda/representation/irregular.py | 38 ++++++------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 186ea3c93..a4b8a0055 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -462,7 +462,7 @@ def coordinates(self) -> _IrregularCoordinateIterator[T]: @property def n_samples(self) -> int: - return self.start_indices.shape[0] + return len(self.start_indices) @property def points_split(self) -> NDArrayFloat: @@ -727,23 +727,11 @@ def _get_op_matrix( # noqa: WPS212 other_vector = other[other_index] - # Must expand for the number of values in each curve - values_after = np.concatenate( - ( - self.start_indices, - np.array([len(self.points)]), - ), + # Number of values in each curve + values_curve = np.diff( + np.r_[self.start_indices, [len(self.points)]] ) - values_before = np.concatenate( - ( - np.array([0]), - self.start_indices, - ), - ) - - values_curve = (values_after - values_before)[1:] - # Repeat the other value for each curve as many times # as values inside the curve return np.repeat(other_vector, values_curve).reshape(-1, 1) @@ -759,23 +747,11 @@ def _get_op_matrix( # noqa: WPS212 other_vector = other[other_index] - # Must expand for the number of values in each curve - values_after = np.concatenate( - ( - self.start_indices, - np.array([len(self.points)]), - ), + # Number of values in each curve + values_curve = np.diff( + np.r_[self.start_indices, [len(self.points)]] ) - values_before = np.concatenate( - ( - np.array([0]), - self.start_indices, - ), - ) - - values_curve = (values_after - values_before)[1:] - # Repeat the other value for each curve as many times # as values inside the curve return np.repeat(other_vector, values_curve, axis=0) From ea7e6dd9fc72988acbdf0e16ecfd0f4acb15f4fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 23 Nov 2023 20:53:41 +0100 Subject: [PATCH 128/144] FDataIrregular._to_data_matrix clean remove loops --- skfda/representation/irregular.py | 37 +++++++++++-------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index a4b8a0055..852787fe3 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1057,35 +1057,24 @@ def _to_data_matrix(self) -> ArrayLike: ArrayLike: numpy array with the resulting matrix. """ # Find the common grid points - grid_points = [ - np.unique(self.points[:, dim]) - for dim in range(self.dim_domain) - ] + grid_points = list(map(np.unique, self.points.T)) - unified_matrix = np.empty( - ( - self.n_samples, - *[len(gp) for gp in grid_points], - self.dim_codomain, - ), + unified_matrix = np.full( + (self.n_samples, *map(len, grid_points), self.dim_codomain), np.nan + ) + + points_pos = tuple( + np.searchsorted(*arg) for arg in zip(grid_points, self.points.T) ) - unified_matrix.fill(np.nan) - # Fill with each function - next_indices = np.append( - self.start_indices, - len(self.points), + sample_idx = ( + np.searchsorted( + self.start_indices, np.arange(len(self.points)), "right" + ) + - 1 ) - for i, index in enumerate(self.start_indices): - for j in range(index, next_indices[i + 1]): - arg = self.points[j] - val = self.values[j] - pos = [ - np.where(gp == arg[dim])[0][0] - for dim, gp in enumerate(grid_points) - ] - unified_matrix[(i,) + tuple(pos)] = val + unified_matrix[(sample_idx,) + points_pos] = self.values return unified_matrix, grid_points From 6e32109b29fd1a0bb5de866c90151ca015c24d7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 23 Nov 2023 21:42:48 +0100 Subject: [PATCH 129/144] revert: remove *_split properties --- skfda/representation/irregular.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 852787fe3..6ceb55e60 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -45,7 +45,8 @@ def _get_sample_range_from_data( - points_split: list[NDArrayFloat], + start_indices: NDArrayInt, + points: NDArrayFloat, ) -> DomainRange: """Compute the domain ranges of each sample. @@ -62,7 +63,7 @@ def _get_sample_range_from_data( tuple( zip(np.min(f_points, axis=0), np.max(f_points, axis=0)), ) - for f_points in points_split + for f_points in np.split(points, start_indices[1:]) ) @@ -236,7 +237,9 @@ def __init__( # noqa: WPS211 self.points = sorted_arguments self.values = sorted_values - self._sample_range = _get_sample_range_from_data(self.points_split) + self._sample_range = _get_sample_range_from_data( + self.start_indices, self.points + ) # Default value for sample_range is a list of tuples with # the first and last arguments of each curve for each dimension @@ -393,8 +396,8 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ - slice_args = self.points_split - slice_values = self.values_split + slice_args = np.split(self.points, self.start_indices)[1:] + slice_values = np.split(self.values, self.start_indices)[1:] # Sort lexicographically, first to last dimension sorting_masks = [ @@ -464,14 +467,6 @@ def coordinates(self) -> _IrregularCoordinateIterator[T]: def n_samples(self) -> int: return len(self.start_indices) - @property - def points_split(self) -> NDArrayFloat: - return np.split(self.points, self.start_indices[1:]) - - @property - def values_split(self) -> NDArrayFloat: - return np.split(self.values, self.start_indices[1:]) - @property def sample_range(self) -> DomainRange: """ @@ -1048,13 +1043,14 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: extrapolation=self.extrapolation, ) - def _to_data_matrix(self) -> ArrayLike: + def _to_data_matrix(self) -> tuple[ArrayLike, list[ArrayLike]]: """Convert FDataIrregular values to numpy matrix. Undefined values in the grid will be represented with np.nan. Returns: ArrayLike: numpy array with the resulting matrix. + list: numpy arrays representing grid_points. """ # Find the common grid points grid_points = list(map(np.unique, self.points.T)) @@ -1494,7 +1490,9 @@ def __init__( self.dim_domain = points.shape[1] if domain_range is None: - sample_range = _get_sample_range_from_data(self.points_split) + sample_range = _get_sample_range_from_data( + self.start_indices, self.points + ) domain_range = _get_domain_range_from_sample_range(sample_range) self.domain_range = validate_domain_range(domain_range) From 5b0ba7185875d2240563340db1d1fe6163b57522 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 23 Nov 2023 21:43:31 +0100 Subject: [PATCH 130/144] restrict keep empty samples --- skfda/representation/irregular.py | 37 +++++++++---------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 6ceb55e60..6b196b427 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1187,40 +1187,23 @@ def restrict( # noqa: WPS210 npdr = np.asarray(validate_domain_range(domain_range)) # (dim, 2) - head = 0 - start_indices = [] - points = [] - values = [] - sample_names = [] - - # Eliminate points outside the new range. - # Must also modify function indices to point to new array - - for sample_points, sample_values, sample_name in zip( - self.points_split, # (num_points, dim) - self.values_split, - self.sample_names, - ): + mask = np.all( + (npdr[:, 0] <= sample_points) & (sample_points <= npdr[:, 1]), + axis=1, + ) - mask = np.all( - (npdr[:, 0] <= sample_points) & (sample_points <= npdr[:, 1]), - axis=1, - ) + num_samples = np.add.reduceat(mask, self.start_indices)[:-1] * ( + np.diff(self.start_indices) > 0 + ) - # Do not keep functions with no values. - num_valid_points = mask.sum() - if num_valid_points: - start_indices.append(head) - points.append(sample_points[mask]) - values.append(sample_values[mask]) - sample_names.append(sample_name) - head += num_valid_points + start_indices = np.r_[[0], num_samples.cumsum()] + points = self.points[mask] + values = self.values[mask] return self.copy( start_indices=np.array(start_indices), points=np.concatenate(points), values=np.concatenate(values), - sample_names=sample_names, domain_range=domain_range, ) From b1075259c7b907d58bfc187b41b1cdc9ec1883b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Fri, 24 Nov 2023 18:11:28 +0100 Subject: [PATCH 131/144] _reduceat v0 --- skfda/representation/irregular.py | 42 +++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 6b196b427..37ed07824 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -44,6 +44,48 @@ ###################### +def _reduceat(### FINISH DOC + TYPE HINTS + array: ArrayLike, + indices: ArrayLike, + axis: int = 0, + dtype=None, + out=None, + *, + ufunc, + value_empty +) -> NDArray: + """Wrapped `np.ufunc.reduceat` to manage edge cases. + + The edge cases are the one described in the doc of + `np.ufunc.reduceat`. Different behaviours are the following: + - No exception is raised when `indices[i] < 0` or + `indices[i] >=len(array)`. Instead, the corresponding value + is `value_empty`. + - When not in the previous case, the result is `value_empty` if + `indices[i] >= indices[i+1]` and otherwise, the same as + `ufunc.reduce(array[indices[i]:indices[i+1]])`. + """ + array, indices = map(np.asarray, [array, indices]) + axis %= array.ndim + ax_idx = (slice(None),) * axis + n = array.shape[axis] + + pad_width = np.full((array.ndim, 2), 0) + pad_width[axis, 1] = 1 + extended_array = np.pad(array, pad_width, mode="empty") + extended_indices = np.append(indices, n) + + bad = (indices < 0) | (indices > n) + empty = (np.diff(extended_indices) <= 0) | bad + extended_indices[:-1][bad] = n + + out = ufunc.reduceat( + extended_array, extended_indices, axis=axis, dtype=dtype, out=out + )[ax_idx + (slice(-1),)] + out[ax_idx + (empty,)] = value_empty + + return out + def _get_sample_range_from_data( start_indices: NDArrayInt, points: NDArrayFloat, From 0e9949f2ea6c13fa96c1c3ba1bd9edd46741d778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Fri, 24 Nov 2023 18:11:49 +0100 Subject: [PATCH 132/144] _get_sample_range_from_data update --- skfda/representation/irregular.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 37ed07824..48cb73e35 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -89,7 +89,7 @@ def _reduceat(### FINISH DOC + TYPE HINTS def _get_sample_range_from_data( start_indices: NDArrayInt, points: NDArrayFloat, -) -> DomainRange: +) -> DomainRangeLike: """Compute the domain ranges of each sample. Args: @@ -101,14 +101,20 @@ def _get_sample_range_from_data( sample_range[f][d] = (min_point, max_point) is the domain range for the function f in dimension d. """ - return tuple( - tuple( - zip(np.min(f_points, axis=0), np.max(f_points, axis=0)), - ) - for f_points in np.split(points, start_indices[1:]) + return np.stack( + [ + _reduceat( + points, + start_indices, + ufunc=ufunc, + value_empty=np.nan, + dtype=float, + ) + for ufunc in (np.fmin, np.fmax) + ], + axis=-1, ) - def _get_domain_range_from_sample_range( sample_range: DomainRange, ) -> DomainRange: From 6a8a90d87702522aee009ef97d06ddb6ba67c8cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 29 Nov 2023 08:55:39 +0100 Subject: [PATCH 133/144] Two-modes _reduceat for later decision --- skfda/representation/irregular.py | 124 ++++++++++++++++++++++-------- skfda/typing/_numpy.py | 8 +- 2 files changed, 96 insertions(+), 36 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 48cb73e35..f42a4370a 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -10,7 +10,14 @@ import itertools import numbers from typing import ( - Any, Optional, Sequence, Tuple, Type, TypeVar, Union, + Any, + Callable, + Optional, + Sequence, + Tuple, + Type, + TypeVar, + Union, ) import numpy as np @@ -29,7 +36,13 @@ GridPointsLike, LabelTupleLike, ) -from ..typing._numpy import ArrayLike, NDArrayBool, NDArrayFloat, NDArrayInt +from ..typing._numpy import ( + ArrayLike, + DTypeLIke, + NDArrayBool, + NDArrayFloat, + NDArrayInt, +) from ._functional_data import FData from .basis import Basis, FDataBasis from .evaluator import Evaluator @@ -43,48 +56,91 @@ # Auxiliary functions# ###################### - -def _reduceat(### FINISH DOC + TYPE HINTS +def _reduceat(#CHOOSE MODE 1 OR 2 array: ArrayLike, indices: ArrayLike, axis: int = 0, - dtype=None, - out=None, + dtype: Union[DTypeLike, None] = None, + out: Union[NDArray, None] = None, *, - ufunc, - value_empty + ufunc: Callable,# TO PRECISE(?) + value_empty: Any, ) -> NDArray: """Wrapped `np.ufunc.reduceat` to manage edge cases. The edge cases are the one described in the doc of `np.ufunc.reduceat`. Different behaviours are the following: - - No exception is raised when `indices[i] < 0` or - `indices[i] >=len(array)`. Instead, the corresponding value - is `value_empty`. - - When not in the previous case, the result is `value_empty` if - `indices[i] >= indices[i+1]` and otherwise, the same as - `ufunc.reduce(array[indices[i]:indices[i+1]])`. + - No exception is raised when `indices[i] < 0` or + `indices[i] >= len(array)`. Instead, the corresponding value is + `value_empty`. + - When not in the previous case, the result is `value_empty` if + `indices[i] >= indices[i+1]` and otherwise, the same as + `ufunc.reduce(array[indices[i]:indices[i+1]])`. + + Note that when necessary, `value_empty` is casted to `dtype` if not + `None`, or to the type of `array`'s elements. If not possible, an + exception will be raised. """ - array, indices = map(np.asarray, [array, indices]) - axis %= array.ndim - ax_idx = (slice(None),) * axis - n = array.shape[axis] - - pad_width = np.full((array.ndim, 2), 0) - pad_width[axis, 1] = 1 - extended_array = np.pad(array, pad_width, mode="empty") - extended_indices = np.append(indices, n) - - bad = (indices < 0) | (indices > n) - empty = (np.diff(extended_indices) <= 0) | bad - extended_indices[:-1][bad] = n - - out = ufunc.reduceat( - extended_array, extended_indices, axis=axis, dtype=dtype, out=out - )[ax_idx + (slice(-1),)] - out[ax_idx + (empty,)] = value_empty - - return out + # MODE 1 OR 2 TO CHOOSE (more extensive benchmarks to do) + # Not obvious depending on the use case, can go from x0.01 to x100. + # Maybe both mode can be kept and activated when more appropriate? + MODE = 1 + + # MODE 1: Fix start_indices + one `np.ufun.reducaet` call + post-fix + if MODE == 1: + + array, indices = map(np.asarray, [array, indices]) + axis %= array.ndim + ax_idx = (slice(None),) * axis + n = array.shape[axis] + + pad_width = np.full((array.ndim, 2), 0) + pad_width[axis, 1] = 1 + extended_array = np.pad(array, pad_width, mode="empty") + extended_indices = np.append(indices, n) + + bad = (indices < 0) | (indices >= n) + empty = (np.diff(extended_indices) <= 0) | bad + extended_indices[:-1][bad] = n + + out = ufunc.reduceat( + extended_array, extended_indices, axis=axis, dtype=dtype, out=out + )[ax_idx + (slice(-1),)] + if empty.any(): + out[ax_idx + (empty,)] = value_empty + + return out + + # MODE 2: Iterative calls of `np.ufunc.reduce` + if MODE == 2: + + array, indices = map(np.asarray, [array, indices]) + ndim = array.ndim + axis = axis if axis >= 0 else ndim - axis + pre, (n, *post) = array.shape[:axis], array.shape[axis:] + shape = pre + (len(indices),) + tuple(post) + + if dtype is None: + dtype = array.dtype + + if out is None: + out = np.empty(shape, dtype=dtype) + else: + assert out.shape == shape + out = out.astype(dtype) + + ii = [slice(None)] * ndim + for i, (a, b) in enumerate(itertools.pairwise(np.append(indices, n))): + ii[axis] = i + ii_out = tuple(ii) + if a < 0 or a >= min(b, n): # Nothing to reduce + out[ii_out] = value_empty + else: + ii[axis] = slice(a, b) + ii_array = tuple(ii) + out[ii_out] = ufunc.reduce(array[ii_array], axis=axis) + + return out def _get_sample_range_from_data( start_indices: NDArrayInt, diff --git a/skfda/typing/_numpy.py b/skfda/typing/_numpy.py index 774511cc4..d49ee9e93 100644 --- a/skfda/typing/_numpy.py +++ b/skfda/typing/_numpy.py @@ -4,10 +4,14 @@ import numpy as np -try: - from numpy.typing import ArrayLike as ArrayLike # noqa: WPS113 +try: # noqa: WPS113 + from numpy.typing import ( + ArrayLike as ArrayLike, + DTypeLike as DTypeLike, + ) except ImportError: ArrayLike = np.ndarray # type:ignore[misc] # noqa: WPS440 + DTypeLIke = np.dtype # type:ignore[misc] try: # noqa: WPS229 from numpy.typing import NDArray From 22aa8c739562e9facb9a204ea9765b2a6a7f6014 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 29 Nov 2023 10:40:32 +0100 Subject: [PATCH 134/144] handle nan for domain range compute + enforce float type + allow len(points) as start_index for empty sample --- skfda/representation/irregular.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f42a4370a..31e79be6c 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -172,7 +172,7 @@ def _get_sample_range_from_data( ) def _get_domain_range_from_sample_range( - sample_range: DomainRange, + sample_range: DomainRangeLike, ) -> DomainRange: """Compute the domain range of the whole dataset. @@ -185,8 +185,8 @@ def _get_domain_range_from_sample_range( the dimension d. """ sample_range_array = np.asarray(sample_range) - min_arguments = sample_range_array[..., 0].min(axis=0) - max_arguments = sample_range_array[..., 1].max(axis=0) + min_arguments = np.nanmin(sample_range_array[..., 0], axis=0) + max_arguments = np.nanmin(sample_range_array[..., 1], axis=0) return tuple(zip(min_arguments, max_arguments)) @@ -317,10 +317,10 @@ def __init__( # noqa: WPS211 ): """Construct a FDataIrregular object.""" self.start_indices = np.asarray(start_indices) - self.points = np.asarray(points) + self.points = np.asarray(points, dtype=float) if self.points.ndim == 1: self.points = self.points.reshape(-1, 1) - self.values = np.asarray(values) + self.values = np.asarray(values, dtype=float) if self.values.ndim == 1: self.values = self.values.reshape(-1, 1) @@ -333,7 +333,7 @@ def __init__( # noqa: WPS211 if np.any(np.diff(self.start_indices) < 0): raise ValueError("Array start_indices must be non-decreasing") - if self.start_indices[-1] >= len(self.points): + if self.start_indices[-1] > len(self.points): raise ValueError("Index in start_indices out of bounds") # Ensure arguments are in order within each function From 11508c1f7c03c7d061d9c6e6fd7536e5e1d8e1e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 29 Nov 2023 13:58:41 +0100 Subject: [PATCH 135/144] clean _sort_by_arguments --- skfda/representation/irregular.py | 34 +++++++++++-------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 31e79be6c..e708fe39c 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -38,7 +38,7 @@ ) from ..typing._numpy import ( ArrayLike, - DTypeLIke, + DTypeLike, NDArrayBool, NDArrayFloat, NDArrayInt, @@ -494,32 +494,22 @@ def from_fdatagrid( def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: """Sort the arguments lexicographically functionwise. - + Additionally, sort the values accordingly. - + Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ - slice_args = np.split(self.points, self.start_indices)[1:] - slice_values = np.split(self.values, self.start_indices)[1:] - - # Sort lexicographically, first to last dimension - sorting_masks = [ - np.lexsort(np.flip(f_args, axis=1).T) - for f_args in slice_args - ] - - sorted_args = [ - slice_args[i][mask] - for i, mask in enumerate(sorting_masks) - ] - - sorted_values = [ - slice_values[i][mask] - for i, mask in enumerate(sorting_masks) - ] + points_split = np.split(self.points, self.start_indices)[1:] + shifts = itertools.accumulate(map(len, [[]] + points_split[:-1])) + sorter = np.concatenate( + [ + np.lexsort(np.rot90(points)) + shift + for points, shift in zip(points_split, shifts) + ] + ) - return np.concatenate(sorted_args), np.concatenate(sorted_values) + return self.points[sorter], self.values[sorter] def round( self, From 5a841edbe82f5e108b6e244246a535a6090cc25e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Fri, 2 Feb 2024 15:15:34 +0100 Subject: [PATCH 136/144] _reduceat wrapper + minor mods --- skfda/representation/irregular.py | 145 ++++++++++++------------------ 1 file changed, 56 insertions(+), 89 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index e708fe39c..097a14dd0 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -56,91 +56,62 @@ # Auxiliary functions# ###################### -def _reduceat(#CHOOSE MODE 1 OR 2 +def _reduceat( + ufunc, array: ArrayLike, indices: ArrayLike, axis: int = 0, - dtype: Union[DTypeLike, None] = None, - out: Union[NDArray, None] = None, + dtype=None, + out=None, *, - ufunc: Callable,# TO PRECISE(?) - value_empty: Any, -) -> NDArray: - """Wrapped `np.ufunc.reduceat` to manage edge cases. + value_empty +): + """ + Wrapped `np.ufunc.reduceat` to manage some edge cases. The edge cases are the one described in the doc of `np.ufunc.reduceat`. Different behaviours are the following: - - No exception is raised when `indices[i] < 0` or - `indices[i] >= len(array)`. Instead, the corresponding value is - `value_empty`. - - When not in the previous case, the result is `value_empty` if - `indices[i] >= indices[i+1]` and otherwise, the same as - `ufunc.reduce(array[indices[i]:indices[i+1]])`. - - Note that when necessary, `value_empty` is casted to `dtype` if not - `None`, or to the type of `array`'s elements. If not possible, an - exception will be raised. + - No exception is raised when `indices[i] < 0` or + `indices[i] >=len(array)`. Instead, the corresponding value + is `value_empty`. + - When not in the previous case, the result is `value_empty` if + `indices[i] == indices[i+1]` and otherwise, the same as + `ufunc.reduce(array[indices[i]:indices[i+1]])`. This means + that an exception is still be raised if `indices[i] > + indices[i+1]`. + + Note: The `value_empty` must be convertible to the `dtype` (either + provided or inferred from the `ufunc` operations). """ - # MODE 1 OR 2 TO CHOOSE (more extensive benchmarks to do) - # Not obvious depending on the use case, can go from x0.01 to x100. - # Maybe both mode can be kept and activated when more appropriate? - MODE = 1 - - # MODE 1: Fix start_indices + one `np.ufun.reducaet` call + post-fix - if MODE == 1: - - array, indices = map(np.asarray, [array, indices]) - axis %= array.ndim - ax_idx = (slice(None),) * axis - n = array.shape[axis] - - pad_width = np.full((array.ndim, 2), 0) - pad_width[axis, 1] = 1 - extended_array = np.pad(array, pad_width, mode="empty") - extended_indices = np.append(indices, n) - - bad = (indices < 0) | (indices >= n) - empty = (np.diff(extended_indices) <= 0) | bad - extended_indices[:-1][bad] = n - - out = ufunc.reduceat( - extended_array, extended_indices, axis=axis, dtype=dtype, out=out - )[ax_idx + (slice(-1),)] - if empty.any(): - out[ax_idx + (empty,)] = value_empty - - return out + array = np.asarray(array) + indices = np.asarray(indices) - # MODE 2: Iterative calls of `np.ufunc.reduce` - if MODE == 2: + n = array.shape[axis] + good_axis_idx = ( + (indices >= 0) & (indices < n) & (np.diff(indices, append=n) > 0) + ) - array, indices = map(np.asarray, [array, indices]) - ndim = array.ndim - axis = axis if axis >= 0 else ndim - axis - pre, (n, *post) = array.shape[:axis], array.shape[axis:] - shape = pre + (len(indices),) + tuple(post) - - if dtype is None: - dtype = array.dtype - - if out is None: - out = np.empty(shape, dtype=dtype) - else: - assert out.shape == shape - out = out.astype(dtype) - - ii = [slice(None)] * ndim - for i, (a, b) in enumerate(itertools.pairwise(np.append(indices, n))): - ii[axis] = i - ii_out = tuple(ii) - if a < 0 or a >= min(b, n): # Nothing to reduce - out[ii_out] = value_empty - else: - ii[axis] = slice(a, b) - ii_array = tuple(ii) - out[ii_out] = ufunc.reduce(array[ii_array], axis=axis) - - return out + good_idx = [slice(None)] * array.ndim + good_idx[axis] = good_axis_idx + good_idx = tuple(good_idx) + + reduceat_out = ufunc.reduceat( + array, indices[good_axis_idx], axis=axis, dtype=dtype + ) + + out_shape = list(array.shape) + out_shape[axis] = len(indices) + out_dtype = dtype or reduceat_out.dtype + + if out is None: + out = np.full(out_shape, value_empty, dtype=out_dtype) + else: + out.astype(out_dtype, copy=False) + out.fill(value_empty) + + out[good_idx] = reduceat_out + + return out def _get_sample_range_from_data( start_indices: NDArrayInt, @@ -160,9 +131,9 @@ def _get_sample_range_from_data( return np.stack( [ _reduceat( + ufunc, points, start_indices, - ufunc=ufunc, value_empty=np.nan, dtype=float, ) @@ -810,16 +781,14 @@ def _get_op_matrix( # noqa: WPS212 return other elif other.shape == (self.n_samples,): other_index = ( - (slice(None),) + (np.newaxis,) - * (self.values.ndim - 1) + (slice(None),) + + (np.newaxis,) * (self.values.ndim - 1) ) other_vector = other[other_index] # Number of values in each curve - values_curve = np.diff( - np.r_[self.start_indices, [len(self.points)]] - ) + values_curve = np.diff(self.start_indices, append=len(self.points)) # Repeat the other value for each curve as many times # as values inside the curve @@ -829,25 +798,23 @@ def _get_op_matrix( # noqa: WPS212 self.dim_codomain, ): other_index = ( - (slice(None),) + (np.newaxis,) - * (self.values.ndim - 2) + (slice(None),) + + (np.newaxis,) * (self.values.ndim - 2) + (slice(None),) ) other_vector = other[other_index] # Number of values in each curve - values_curve = np.diff( - np.r_[self.start_indices, [len(self.points)]] - ) + values_curve = np.diff(self.start_indices, append=len(self.points)) # Repeat the other value for each curve as many times # as values inside the curve return np.repeat(other_vector, values_curve, axis=0) raise ValueError( - f"Invalid dimensions in operator between FDataIrregular " - f"and Numpy array: {other.shape}", + f"Invalid dimensions in operator between FDataIrregular and " + f"Numpy array: {other.shape}", ) elif isinstance(other, FDataIrregular): @@ -1395,7 +1362,7 @@ def __getitem__( required_slices = [] key = _check_array_key(self.start_indices, key) indices = range(self.n_samples) - required_indices = indices[key] + required_indices = np.array(indices)[key] for i in required_indices: next_index = None if i + 1 < self.n_samples: From 1a1835e56a761b6fe6c0b3bd177bb411edd0dd6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 14 Feb 2024 06:03:30 +0100 Subject: [PATCH 137/144] removed useless op (???) --- skfda/representation/irregular.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 097a14dd0..07f132ba7 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -472,11 +472,10 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ points_split = np.split(self.points, self.start_indices)[1:] - shifts = itertools.accumulate(map(len, [[]] + points_split[:-1])) sorter = np.concatenate( [ np.lexsort(np.rot90(points)) + shift - for points, shift in zip(points_split, shifts) + for points, shift in zip(points_split, self.start_indices) ] ) From f0fe0d7f81d44ef1bd09be047fc129ee1a3f0644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 14 Feb 2024 07:06:29 +0100 Subject: [PATCH 138/144] cleaner _sort_by_arguments from vnmabus --- skfda/representation/irregular.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 07f132ba7..421ac7842 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -471,13 +471,11 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ - points_split = np.split(self.points, self.start_indices)[1:] - sorter = np.concatenate( - [ - np.lexsort(np.rot90(points)) + shift - for points, shift in zip(points_split, self.start_indices) - ] + ind = np.repeat( + range(len(self.start_indices)), + np.diff(self.start_indices, append=len(self.points)), ) + sorter = np.lexsort(np.rot90(np.c_[ind, self.points])) return self.points[sorter], self.values[sorter] From 4a2fc88fbbc00fc5b1aac8021b15c840935471bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 14 Feb 2024 07:09:18 +0100 Subject: [PATCH 139/144] resolve reviews --- skfda/representation/irregular.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 421ac7842..64299ef9d 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -971,14 +971,14 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: ) # Verify that dimensions are compatible assert others, "No objects to concatenate" - all_ = (self,) + others + all_objects = (self,) + others start_indices_split = [] total_points = 0 points_split = [] values_split = [] total_sample_names_split = [] domain_range_split = [] - for x, y in itertools.pairwise(all_ + (self,)): + for x, y in itertools.pairwise(all_objects + (self,)): x.check_same_dimensions(y) start_indices_split.append(x.start_indices + total_points) total_points += len(x.points) @@ -1243,7 +1243,7 @@ def restrict( # noqa: WPS210 from ..misc.validation import validate_domain_range - npdr = np.asarray(validate_domain_range(domain_range)) # (dim, 2) + npdr = np.asarray(validate_domain_range(domain_range)) # shape(dim, 2) mask = np.all( (npdr[:, 0] <= sample_points) & (sample_points <= npdr[:, 1]), From 01fe0ba96ea73e5c55a96d9851397277153ab876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 15 Feb 2024 23:18:22 +0100 Subject: [PATCH 140/144] better lexsort comment --- skfda/representation/irregular.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 64299ef9d..14a946f62 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -475,7 +475,11 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: range(len(self.start_indices)), np.diff(self.start_indices, append=len(self.points)), ) - sorter = np.lexsort(np.rot90(np.c_[ind, self.points])) + # In order to use lexsort the following manipulations are required: + # - Transpose the axis, so that the first axis contains the keys. + # - Flip that axis so that the primary key is last, and they are thus + # in last-to-first order. + sorter = np.lexsort(np.c_[ind, self.points].T[::-1]) return self.points[sorter], self.values[sorter] From ee89c2cf7dabd91d492ffdecaf95615693d604d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Mon, 26 Feb 2024 15:06:02 +0100 Subject: [PATCH 141/144] fixed typo domain_range max --- skfda/representation/irregular.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 14a946f62..c9a7335cd 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -157,7 +157,7 @@ def _get_domain_range_from_sample_range( """ sample_range_array = np.asarray(sample_range) min_arguments = np.nanmin(sample_range_array[..., 0], axis=0) - max_arguments = np.nanmin(sample_range_array[..., 1], axis=0) + max_arguments = np.nanmax(sample_range_array[..., 1], axis=0) return tuple(zip(min_arguments, max_arguments)) From 6bf925c334087227c4461d6316e85f88f677a6b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Mon, 26 Feb 2024 15:07:24 +0100 Subject: [PATCH 142/144] fixed restrict + allow domain_range broadcast --- skfda/representation/irregular.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index c9a7335cd..f4c7c4a7f 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1247,26 +1247,24 @@ def restrict( # noqa: WPS210 from ..misc.validation import validate_domain_range - npdr = np.asarray(validate_domain_range(domain_range)) # shape(dim, 2) + npdr = np.broadcast_to( + validate_domain_range(domain_range), + (self.dim_domain, 2), + ) mask = np.all( - (npdr[:, 0] <= sample_points) & (sample_points <= npdr[:, 1]), + (npdr[:, 0] <= self.points) & (self.points <= npdr[:, 1]), axis=1, ) - num_samples = np.add.reduceat(mask, self.start_indices)[:-1] * ( - np.diff(self.start_indices) > 0 - ) - - start_indices = np.r_[[0], num_samples.cumsum()] - points = self.points[mask] - values = self.values[mask] + num_points = _reduceat(np.add, mask, self.start_indices, value_empty=0) + start_indices = np.r_[[0], num_points[:-1].cumsum()] return self.copy( - start_indices=np.array(start_indices), - points=np.concatenate(points), - values=np.concatenate(values), - domain_range=domain_range, + start_indices=start_indices, + points=self.points[mask], + values=self.values[mask], + domain_range=npdr, ) def shift( From fb6502f6d23fe6ab8cd0bfe599dedd418bf8bf44 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 7 Mar 2024 17:18:25 +0100 Subject: [PATCH 143/144] Fix doctests. Automatic casting to float dtype is removed, as it prevents using different float sizes (or even integers in the future). --- skfda/representation/irregular.py | 38 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f4c7c4a7f..1524a02fa 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -24,11 +24,7 @@ import pandas.api.extensions from matplotlib.figure import Figure -from .._utils import ( - _cartesian_product, - _check_array_key, - _to_grid_points, -) +from .._utils import _cartesian_product, _check_array_key, _to_grid_points from ..typing._base import ( DomainRange, DomainRangeLike, @@ -56,6 +52,7 @@ # Auxiliary functions# ###################### + def _reduceat( ufunc, array: ArrayLike, @@ -113,6 +110,7 @@ def _reduceat( return out + def _get_sample_range_from_data( start_indices: NDArrayInt, points: NDArrayFloat, @@ -142,6 +140,7 @@ def _get_sample_range_from_data( axis=-1, ) + def _get_domain_range_from_sample_range( sample_range: DomainRangeLike, ) -> DomainRange: @@ -254,8 +253,8 @@ class FDataIrregular(FData): # noqa: WPS214 representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}^2`. >>> indices = [0, 2] - >>> arguments = [[1], [2], [3], [4], [5]] - >>> values = [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + >>> arguments = [[1.], [2.], [3.], [4.], [5.]] + >>> values = [[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]] >>> fd = FDataIrregular(indices, arguments, values) >>> fd.dim_domain, fd.dim_codomain (1, 2) @@ -264,8 +263,8 @@ class FDataIrregular(FData): # noqa: WPS214 representing a function :math:`f : \mathbb{R}^2\longmapsto\mathbb{R}`. >>> indices = [0, 2] - >>> arguments = [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] - >>> values = [[1], [2], [3], [4], [5]] + >>> arguments = [[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]] + >>> values = [[1.], [2.], [3.], [4.], [5.]] >>> fd = FDataIrregular(indices, arguments, values) >>> fd.dim_domain, fd.dim_codomain (2, 1) @@ -288,10 +287,10 @@ def __init__( # noqa: WPS211 ): """Construct a FDataIrregular object.""" self.start_indices = np.asarray(start_indices) - self.points = np.asarray(points, dtype=float) + self.points = np.asarray(points) if self.points.ndim == 1: self.points = self.points.reshape(-1, 1) - self.values = np.asarray(values, dtype=float) + self.values = np.asarray(values) if self.values.ndim == 1: self.values = self.values.reshape(-1, 1) @@ -313,7 +312,8 @@ def __init__( # noqa: WPS211 self.values = sorted_values self._sample_range = _get_sample_range_from_data( - self.start_indices, self.points + self.start_indices, + self.points, ) # Default value for sample_range is a list of tuples with @@ -465,9 +465,9 @@ def from_fdatagrid( def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: """Sort the arguments lexicographically functionwise. - + Additionally, sort the values accordingly. - + Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ @@ -789,7 +789,8 @@ def _get_op_matrix( # noqa: WPS212 other_vector = other[other_index] # Number of values in each curve - values_curve = np.diff(self.start_indices, append=len(self.points)) + values_curve = np.diff( + self.start_indices, append=len(self.points)) # Repeat the other value for each curve as many times # as values inside the curve @@ -807,7 +808,8 @@ def _get_op_matrix( # noqa: WPS212 other_vector = other[other_index] # Number of values in each curve - values_curve = np.diff(self.start_indices, append=len(self.points)) + values_curve = np.diff( + self.start_indices, append=len(self.points)) # Repeat the other value for each curve as many times # as values inside the curve @@ -938,13 +940,13 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: Examples: >>> indices = [0, 2] - >>> arguments = values = np.arange(5).reshape(-1, 1) + >>> arguments = values = np.arange(5.).reshape(-1, 1) >>> fd = FDataIrregular(indices, arguments, values) >>> arguments_2 = values_2 = np.arange(5, 10).reshape(-1, 1) >>> fd_2 = FDataIrregular(indices, arguments_2, values_2) >>> fd.concatenate(fd_2) FDataIrregular( - start_indices=array([0, 2, 5, 7], dtype=uint32), + start_indices=array([0, 2, 5, 7]), points=array([[ 0.], [ 1.], [ 2.], From cd7e73e73cdc6cc0b056c93bcc1bf7f004dfce10 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 11 Mar 2024 12:08:20 +0100 Subject: [PATCH 144/144] Fix typo. --- skfda/typing/_numpy.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/skfda/typing/_numpy.py b/skfda/typing/_numpy.py index d49ee9e93..b870c2bc4 100644 --- a/skfda/typing/_numpy.py +++ b/skfda/typing/_numpy.py @@ -5,13 +5,10 @@ import numpy as np try: # noqa: WPS113 - from numpy.typing import ( - ArrayLike as ArrayLike, - DTypeLike as DTypeLike, - ) + from numpy.typing import ArrayLike as ArrayLike, DTypeLike as DTypeLike except ImportError: ArrayLike = np.ndarray # type:ignore[misc] # noqa: WPS440 - DTypeLIke = np.dtype # type:ignore[misc] + DTypeLike = np.dtype # type:ignore[misc] try: # noqa: WPS229 from numpy.typing import NDArray