From 0f90db2ccfafaea0bd51b5a323a36c8384f2afc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Tue, 14 Nov 2023 16:06:09 +0100 Subject: [PATCH 01/24] use ary.ndim instead of len(ary.shape) --- skfda/misc/covariances.py | 2 +- skfda/ml/regression/_linear_regression.py | 2 +- .../variable_selection/recursive_maxima_hunting.py | 2 +- skfda/representation/basis/_fdatabasis.py | 4 ++-- skfda/representation/irregular.py | 10 ++++------ 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index 43635eb02..298fcf64d 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -31,7 +31,7 @@ def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: """Transform 1d arrays in column vectors.""" t = np.asfarray(t) - dim = len(t.shape) + dim = t.ndim assert dim <= 2 if dim < 2: diff --git a/skfda/ml/regression/_linear_regression.py b/skfda/ml/regression/_linear_regression.py index f672a2923..4cc45524a 100644 --- a/skfda/ml/regression/_linear_regression.py +++ b/skfda/ml/regression/_linear_regression.py @@ -607,7 +607,7 @@ def _check_and_convert( np.ndarray: numpy 2D array. """ new_X = np.asarray(X) - if len(new_X.shape) == 1: + if new_X.ndim == 1: new_X = new_X[:, np.newaxis] return new_X diff --git a/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py b/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py index 432b65bb8..2a66ffa19 100644 --- a/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py +++ b/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py @@ -46,7 +46,7 @@ def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: t = np.asfarray(t) - dim = len(t.shape) + dim = t.ndim assert dim <= 2 if dim < 2: diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 1a4e0830d..b943f7932 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -687,8 +687,8 @@ def _array_to_R( # noqa: N802 coefficients: NDArrayFloat, transpose: bool = False, ) -> str: - if len(coefficients.shape) == 1: - coefficients = coefficients.reshape((1, coefficients.shape[0])) + if coefficients.ndim == 1: + coefficients = coefficients[None] if transpose is True: coefficients = np.transpose(coefficients) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 0de90f22c..fc13632b5 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -206,16 +206,14 @@ def __init__( # noqa: WPS211 """Construct a FDataIrregular object.""" self.start_indices = np.asarray(start_indices) self.points = np.asarray(points) - if len(self.points.shape) == 1: + if self.points.ndim == 1: self.points = self.points.reshape(-1, 1) self.values = np.asarray(values) - if len(self.values.shape) == 1: + if self.values.ndim == 1: self.values = self.values.reshape(-1, 1) - if self.points.shape[0] != self.values.shape[0]: - raise ValueError( - "Dimension mismatch in points and values", - ) + if len(self.points) != len(self.values): + raise ValueError("Dimension mismatch in points and values") if max(self.start_indices) >= len(self.points): raise ValueError("Index in start_indices out of bounds") From 26b5f0c4386681db1b6e225dbd7ec20a1efcacfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Tue, 14 Nov 2023 17:15:37 +0100 Subject: [PATCH 02/24] points_split and values_split as properties --- skfda/representation/irregular.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index fc13632b5..8765e928e 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -44,8 +44,7 @@ def _get_sample_range_from_data( - start_indices: NDArrayInt, - points: NDArrayFloat, + points_split: list[NDArrayFloat], ) -> DomainRange: """Compute the domain ranges of each sample. @@ -62,7 +61,7 @@ def _get_sample_range_from_data( tuple( zip(np.min(f_points, axis=0), np.max(f_points, axis=0)), ) - for f_points in np.split(points, start_indices[1:]) + for f_points in points_split ) @@ -223,10 +222,7 @@ def __init__( # noqa: WPS211 self.points = sorted_arguments self.values = sorted_values - self._sample_range = _get_sample_range_from_data( - self.start_indices, - self.points, - ) + self._sample_range = _get_sample_range_from_data(self.points_split) # Default value for sample_range is a list of tuples with # the first and last arguments of each curve for each dimension @@ -383,8 +379,8 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ - slice_args = np.split(self.points, self.start_indices[1:]) - slice_values = np.split(self.values, self.start_indices[1:]) + slice_args = self.points_split + slice_values = self.values_split # Sort lexicographically, first to last dimension sorting_masks = [ @@ -458,6 +454,14 @@ def coordinates(self) -> _IrregularCoordinateIterator[T]: def n_samples(self) -> int: return self.start_indices.shape[0] + @property + def points_split(self) -> NDArrayFloat: + return np.split(self.points, self.start_indices[1:]) + + @property + def values_split(self) -> NDArrayFloat: + return np.split(self.values, self.start_indices[1:]) + @property def sample_range(self) -> DomainRange: """ From fb69594d5d08ea71fc1722cdf1d78a48c265f8cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Tue, 14 Nov 2023 17:16:11 +0100 Subject: [PATCH 03/24] FDataIrregular.cleaned restrict method --- skfda/representation/irregular.py | 60 +++++++++++++++---------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 8765e928e..f02919f57 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1225,59 +1225,60 @@ def copy( # noqa: WPS211 def restrict( # noqa: WPS210 self: T, domain_range: DomainRangeLike, + *, + with_bounds: bool = False, ) -> T: """ Restrict the functions to a new domain range. Args: domain_range: New domain range. + with_bounds: Whether or not to ensure domain boundaries + appear in `grid_points`. Returns: T: Restricted function. """ + if with_bounds: # To do + raise NotImplementedError('Not yet implemented for FDataIrregular') + from ..misc.validation import validate_domain_range - domain_range = validate_domain_range(domain_range) + npdr = np.asarray(validate_domain_range(domain_range)) # (dim, 2) head = 0 - indices = [] - arguments = [] + start_indices = [] + points = [] values = [] sample_names = [] # Eliminate points outside the new range. # Must also modify function indices to point to new array - slice_points = np.split(self.points, self.start_indices[1:]) - slice_values = np.split(self.values, self.start_indices[1:]) - - for i, points_values in enumerate(zip(slice_points, slice_values)): - sample_points, sample_values = points_values - masks = set(range(sample_points.shape[0])) - for dim, dr in enumerate(domain_range): - dr_start, dr_end = dr - select_mask = np.where( - ( - (dr_start <= sample_points[:, dim]) - & (sample_points[:, dim] <= dr_end) - ), - ) + for sample_points, sample_values, sample_name in zip( + self.points_split, # (num_points, dim) + self.values_split, + self.sample_names, + ): - masks = masks.intersection(set(select_mask[0])) + mask = np.all( + (npdr[:, 0] <= sample_points) & (sample_points <= npdr[:, 1]), + axis=1, + ) # Do not keep functions with no values. - masks = list(masks) - if len(masks) > 0: - indices.append(head) - arguments.append(sample_points[masks, :]) - values.append(sample_values[masks, :]) - sample_names.append(self.sample_names[i]) - head += len(masks) + num_valid_points = mask.sum() + if num_valid_points: + start_indices.append(head) + points.append(sample_points[mask]) + values.append(sample_values[mask]) + sample_names.append(sample_name) + head += num_valid_points return self.copy( - start_indices=np.array(indices), - points=np.concatenate(arguments), + start_indices=np.array(start_indices), + points=np.concatenate(points), values=np.concatenate(values), sample_names=sample_names, domain_range=domain_range, @@ -1549,10 +1550,7 @@ def __init__( self.dim_domain = points.shape[1] if domain_range is None: - sample_range = _get_sample_range_from_data( - self.start_indices, - self.points, - ) + sample_range = _get_sample_range_from_data(self.points_split) domain_range = _get_domain_range_from_sample_range(sample_range) self.domain_range = validate_domain_range(domain_range) From ad713397ffe6bb6118faa713f4ac5712c7ccb0d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 15 Nov 2023 21:34:35 +0100 Subject: [PATCH 04/24] cleaner concatenate --- skfda/representation/irregular.py | 86 ++++++++++--------------------- 1 file changed, 28 insertions(+), 58 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f02919f57..4a3117b43 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -7,6 +7,7 @@ """ from __future__ import annotations +import itertools import numbers from typing import ( Any, Optional, Sequence, Tuple, Type, TypeVar, Union, @@ -931,67 +932,36 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: "Not implemented for as_coordinates = True", ) # Verify that dimensions are compatible - assert len(others) > 0, "No objects to concatenate" - self.check_same_dimensions(others[0]) - if len(others) > 1: - for x, y in zip(others, others[1:]): - x.check_same_dimensions(y) - - # Allocate all required memory - total_functions = self.n_samples + sum( - [ - o.n_samples - for o in others - ], - ) - total_values = len(self.points) + sum( - [ - len(o.points) - for o in others - ], - ) - total_sample_names = [] - start_indices = np.zeros((total_functions, ), dtype=np.uint32) - function_args = np.zeros( - (total_values, self.dim_domain), - ) - values = np.zeros( - (total_values, self.dim_codomain), - ) - index = 0 - head = 0 - - # Add samples sequentially - for f_data in [self] + list(others): - start_indices[ - index:index + f_data.n_samples - ] = f_data.start_indices - function_args[ - head:head + len(f_data.points) - ] = f_data.points - values[ - head:head + len(f_data.points) - ] = f_data.values - # Adjust pointers to the concatenated array - start_indices[index:index + f_data.n_samples] += head - index += f_data.n_samples - head += len(f_data.points) - total_sample_names = total_sample_names + list(f_data.sample_names) - - # Check domain range - domain_range = [list(r) for r in self.domain_range] - for dim in range(self.dim_domain): - dim_max = np.max(function_args[:, dim]) - dim_min = np.min(function_args[:, dim]) - - if dim_max > self.domain_range[dim][1]: - domain_range[dim][1] = dim_max - if dim_min < self.domain_range[dim][0]: - domain_range[dim][0] = dim_min + assert others, "No objects to concatenate" + all_ = (self,) + others + start_indices_split = [] + total_points = 0 + points_split = [] + values_split = [] + total_sample_names_split = [] + domain_range_split = [] + for x, y in itertools.pairwise(all_ + (self,)): + x.check_same_dimensions(y) + start_indices_split.append(x.start_indices + total_points) + total_points += len(x.points) + points_split.append(x.points) + values_split.append(x.values) + total_sample_names_split.append(x.sample_names) + domain_range_split.append(x.domain_range) + + start_indices = np.concatenate(start_indices_split) + points = np.concatenate(points_split) + values = np.concatenate(values_split) + total_sample_names = list(itertools.chain(*total_sample_names_split)) + domain_range_stacked = np.stack(domain_range_split, axis=-1) + domain_range = np.c_[ + domain_range_stacked[:, 0].min(axis=-1), + domain_range_stacked[:, 1].max(axis=-1), + ] return self.copy( start_indices, - function_args, + points, values, domain_range=domain_range, sample_names=total_sample_names, From f6a87ad84cb4ae1e0cd76f35a2fac2ac6dc8d955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 23 Nov 2023 19:20:22 +0100 Subject: [PATCH 05/24] FDataIrregular.__init__: validate start_indices --- skfda/representation/irregular.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 4a3117b43..b351ed54a 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -119,6 +119,13 @@ class FDataIrregular(FData): # noqa: WPS214 interpolation: Defines the type of interpolation applied in `evaluate`. + Raises: + ValueError: + - if `points` and `values` lengths don't match + - if `start_indices` does'nt start with `0`, or is decreasing + somewhere, or ends with a value greater than or equal to + `len(points)`. + Examples: Representation of an irregular functional data object with 2 samples representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}`, @@ -215,7 +222,13 @@ def __init__( # noqa: WPS211 if len(self.points) != len(self.values): raise ValueError("Dimension mismatch in points and values") - if max(self.start_indices) >= len(self.points): + if self.start_indices[0] != 0: + raise ValueError("Array start_indices must start with 0") + + if np.any(np.diff(self.start_indices) < 0): + raise ValueError("Array start_indices must be non-decreasing") + + if self.start_indices[-1] >= len(self.points): raise ValueError("Index in start_indices out of bounds") # Ensure arguments are in order within each function From eee51ad320efa5d50105abd089e9857927ae4cbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 23 Nov 2023 20:48:35 +0100 Subject: [PATCH 06/24] FDataIrregular.round clean (why start_indices special treatment?) --- skfda/representation/irregular.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index b351ed54a..186ea3c93 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -442,15 +442,11 @@ def round( # coalescing various arguments to the same rounded value rounded_values = self.values.round(decimals=decimals) - if out is not None and isinstance(out, FDataIrregular): - out.start_indices = self.start_indices + if isinstance(out, FDataIrregular): out.values = rounded_values - return out - return self.copy( - values=rounded_values, - ) + return self.copy(values=rounded_values) @property def dim_domain(self) -> int: From 3bd25facd74600fa1c39f12afe7bd0f284207311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 23 Nov 2023 20:50:05 +0100 Subject: [PATCH 07/24] minor clean --- skfda/representation/irregular.py | 38 ++++++------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 186ea3c93..a4b8a0055 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -462,7 +462,7 @@ def coordinates(self) -> _IrregularCoordinateIterator[T]: @property def n_samples(self) -> int: - return self.start_indices.shape[0] + return len(self.start_indices) @property def points_split(self) -> NDArrayFloat: @@ -727,23 +727,11 @@ def _get_op_matrix( # noqa: WPS212 other_vector = other[other_index] - # Must expand for the number of values in each curve - values_after = np.concatenate( - ( - self.start_indices, - np.array([len(self.points)]), - ), + # Number of values in each curve + values_curve = np.diff( + np.r_[self.start_indices, [len(self.points)]] ) - values_before = np.concatenate( - ( - np.array([0]), - self.start_indices, - ), - ) - - values_curve = (values_after - values_before)[1:] - # Repeat the other value for each curve as many times # as values inside the curve return np.repeat(other_vector, values_curve).reshape(-1, 1) @@ -759,23 +747,11 @@ def _get_op_matrix( # noqa: WPS212 other_vector = other[other_index] - # Must expand for the number of values in each curve - values_after = np.concatenate( - ( - self.start_indices, - np.array([len(self.points)]), - ), + # Number of values in each curve + values_curve = np.diff( + np.r_[self.start_indices, [len(self.points)]] ) - values_before = np.concatenate( - ( - np.array([0]), - self.start_indices, - ), - ) - - values_curve = (values_after - values_before)[1:] - # Repeat the other value for each curve as many times # as values inside the curve return np.repeat(other_vector, values_curve, axis=0) From ea7e6dd9fc72988acbdf0e16ecfd0f4acb15f4fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 23 Nov 2023 20:53:41 +0100 Subject: [PATCH 08/24] FDataIrregular._to_data_matrix clean remove loops --- skfda/representation/irregular.py | 37 +++++++++++-------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index a4b8a0055..852787fe3 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1057,35 +1057,24 @@ def _to_data_matrix(self) -> ArrayLike: ArrayLike: numpy array with the resulting matrix. """ # Find the common grid points - grid_points = [ - np.unique(self.points[:, dim]) - for dim in range(self.dim_domain) - ] + grid_points = list(map(np.unique, self.points.T)) - unified_matrix = np.empty( - ( - self.n_samples, - *[len(gp) for gp in grid_points], - self.dim_codomain, - ), + unified_matrix = np.full( + (self.n_samples, *map(len, grid_points), self.dim_codomain), np.nan + ) + + points_pos = tuple( + np.searchsorted(*arg) for arg in zip(grid_points, self.points.T) ) - unified_matrix.fill(np.nan) - # Fill with each function - next_indices = np.append( - self.start_indices, - len(self.points), + sample_idx = ( + np.searchsorted( + self.start_indices, np.arange(len(self.points)), "right" + ) + - 1 ) - for i, index in enumerate(self.start_indices): - for j in range(index, next_indices[i + 1]): - arg = self.points[j] - val = self.values[j] - pos = [ - np.where(gp == arg[dim])[0][0] - for dim, gp in enumerate(grid_points) - ] - unified_matrix[(i,) + tuple(pos)] = val + unified_matrix[(sample_idx,) + points_pos] = self.values return unified_matrix, grid_points From 6e32109b29fd1a0bb5de866c90151ca015c24d7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 23 Nov 2023 21:42:48 +0100 Subject: [PATCH 09/24] revert: remove *_split properties --- skfda/representation/irregular.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 852787fe3..6ceb55e60 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -45,7 +45,8 @@ def _get_sample_range_from_data( - points_split: list[NDArrayFloat], + start_indices: NDArrayInt, + points: NDArrayFloat, ) -> DomainRange: """Compute the domain ranges of each sample. @@ -62,7 +63,7 @@ def _get_sample_range_from_data( tuple( zip(np.min(f_points, axis=0), np.max(f_points, axis=0)), ) - for f_points in points_split + for f_points in np.split(points, start_indices[1:]) ) @@ -236,7 +237,9 @@ def __init__( # noqa: WPS211 self.points = sorted_arguments self.values = sorted_values - self._sample_range = _get_sample_range_from_data(self.points_split) + self._sample_range = _get_sample_range_from_data( + self.start_indices, self.points + ) # Default value for sample_range is a list of tuples with # the first and last arguments of each curve for each dimension @@ -393,8 +396,8 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ - slice_args = self.points_split - slice_values = self.values_split + slice_args = np.split(self.points, self.start_indices)[1:] + slice_values = np.split(self.values, self.start_indices)[1:] # Sort lexicographically, first to last dimension sorting_masks = [ @@ -464,14 +467,6 @@ def coordinates(self) -> _IrregularCoordinateIterator[T]: def n_samples(self) -> int: return len(self.start_indices) - @property - def points_split(self) -> NDArrayFloat: - return np.split(self.points, self.start_indices[1:]) - - @property - def values_split(self) -> NDArrayFloat: - return np.split(self.values, self.start_indices[1:]) - @property def sample_range(self) -> DomainRange: """ @@ -1048,13 +1043,14 @@ def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: extrapolation=self.extrapolation, ) - def _to_data_matrix(self) -> ArrayLike: + def _to_data_matrix(self) -> tuple[ArrayLike, list[ArrayLike]]: """Convert FDataIrregular values to numpy matrix. Undefined values in the grid will be represented with np.nan. Returns: ArrayLike: numpy array with the resulting matrix. + list: numpy arrays representing grid_points. """ # Find the common grid points grid_points = list(map(np.unique, self.points.T)) @@ -1494,7 +1490,9 @@ def __init__( self.dim_domain = points.shape[1] if domain_range is None: - sample_range = _get_sample_range_from_data(self.points_split) + sample_range = _get_sample_range_from_data( + self.start_indices, self.points + ) domain_range = _get_domain_range_from_sample_range(sample_range) self.domain_range = validate_domain_range(domain_range) From 5b0ba7185875d2240563340db1d1fe6163b57522 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 23 Nov 2023 21:43:31 +0100 Subject: [PATCH 10/24] restrict keep empty samples --- skfda/representation/irregular.py | 37 +++++++++---------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 6ceb55e60..6b196b427 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1187,40 +1187,23 @@ def restrict( # noqa: WPS210 npdr = np.asarray(validate_domain_range(domain_range)) # (dim, 2) - head = 0 - start_indices = [] - points = [] - values = [] - sample_names = [] - - # Eliminate points outside the new range. - # Must also modify function indices to point to new array - - for sample_points, sample_values, sample_name in zip( - self.points_split, # (num_points, dim) - self.values_split, - self.sample_names, - ): + mask = np.all( + (npdr[:, 0] <= sample_points) & (sample_points <= npdr[:, 1]), + axis=1, + ) - mask = np.all( - (npdr[:, 0] <= sample_points) & (sample_points <= npdr[:, 1]), - axis=1, - ) + num_samples = np.add.reduceat(mask, self.start_indices)[:-1] * ( + np.diff(self.start_indices) > 0 + ) - # Do not keep functions with no values. - num_valid_points = mask.sum() - if num_valid_points: - start_indices.append(head) - points.append(sample_points[mask]) - values.append(sample_values[mask]) - sample_names.append(sample_name) - head += num_valid_points + start_indices = np.r_[[0], num_samples.cumsum()] + points = self.points[mask] + values = self.values[mask] return self.copy( start_indices=np.array(start_indices), points=np.concatenate(points), values=np.concatenate(values), - sample_names=sample_names, domain_range=domain_range, ) From b1075259c7b907d58bfc187b41b1cdc9ec1883b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Fri, 24 Nov 2023 18:11:28 +0100 Subject: [PATCH 11/24] _reduceat v0 --- skfda/representation/irregular.py | 42 +++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 6b196b427..37ed07824 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -44,6 +44,48 @@ ###################### +def _reduceat(### FINISH DOC + TYPE HINTS + array: ArrayLike, + indices: ArrayLike, + axis: int = 0, + dtype=None, + out=None, + *, + ufunc, + value_empty +) -> NDArray: + """Wrapped `np.ufunc.reduceat` to manage edge cases. + + The edge cases are the one described in the doc of + `np.ufunc.reduceat`. Different behaviours are the following: + - No exception is raised when `indices[i] < 0` or + `indices[i] >=len(array)`. Instead, the corresponding value + is `value_empty`. + - When not in the previous case, the result is `value_empty` if + `indices[i] >= indices[i+1]` and otherwise, the same as + `ufunc.reduce(array[indices[i]:indices[i+1]])`. + """ + array, indices = map(np.asarray, [array, indices]) + axis %= array.ndim + ax_idx = (slice(None),) * axis + n = array.shape[axis] + + pad_width = np.full((array.ndim, 2), 0) + pad_width[axis, 1] = 1 + extended_array = np.pad(array, pad_width, mode="empty") + extended_indices = np.append(indices, n) + + bad = (indices < 0) | (indices > n) + empty = (np.diff(extended_indices) <= 0) | bad + extended_indices[:-1][bad] = n + + out = ufunc.reduceat( + extended_array, extended_indices, axis=axis, dtype=dtype, out=out + )[ax_idx + (slice(-1),)] + out[ax_idx + (empty,)] = value_empty + + return out + def _get_sample_range_from_data( start_indices: NDArrayInt, points: NDArrayFloat, From 0e9949f2ea6c13fa96c1c3ba1bd9edd46741d778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Fri, 24 Nov 2023 18:11:49 +0100 Subject: [PATCH 12/24] _get_sample_range_from_data update --- skfda/representation/irregular.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 37ed07824..48cb73e35 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -89,7 +89,7 @@ def _reduceat(### FINISH DOC + TYPE HINTS def _get_sample_range_from_data( start_indices: NDArrayInt, points: NDArrayFloat, -) -> DomainRange: +) -> DomainRangeLike: """Compute the domain ranges of each sample. Args: @@ -101,14 +101,20 @@ def _get_sample_range_from_data( sample_range[f][d] = (min_point, max_point) is the domain range for the function f in dimension d. """ - return tuple( - tuple( - zip(np.min(f_points, axis=0), np.max(f_points, axis=0)), - ) - for f_points in np.split(points, start_indices[1:]) + return np.stack( + [ + _reduceat( + points, + start_indices, + ufunc=ufunc, + value_empty=np.nan, + dtype=float, + ) + for ufunc in (np.fmin, np.fmax) + ], + axis=-1, ) - def _get_domain_range_from_sample_range( sample_range: DomainRange, ) -> DomainRange: From 6a8a90d87702522aee009ef97d06ddb6ba67c8cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 29 Nov 2023 08:55:39 +0100 Subject: [PATCH 13/24] Two-modes _reduceat for later decision --- skfda/representation/irregular.py | 124 ++++++++++++++++++++++-------- skfda/typing/_numpy.py | 8 +- 2 files changed, 96 insertions(+), 36 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 48cb73e35..f42a4370a 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -10,7 +10,14 @@ import itertools import numbers from typing import ( - Any, Optional, Sequence, Tuple, Type, TypeVar, Union, + Any, + Callable, + Optional, + Sequence, + Tuple, + Type, + TypeVar, + Union, ) import numpy as np @@ -29,7 +36,13 @@ GridPointsLike, LabelTupleLike, ) -from ..typing._numpy import ArrayLike, NDArrayBool, NDArrayFloat, NDArrayInt +from ..typing._numpy import ( + ArrayLike, + DTypeLIke, + NDArrayBool, + NDArrayFloat, + NDArrayInt, +) from ._functional_data import FData from .basis import Basis, FDataBasis from .evaluator import Evaluator @@ -43,48 +56,91 @@ # Auxiliary functions# ###################### - -def _reduceat(### FINISH DOC + TYPE HINTS +def _reduceat(#CHOOSE MODE 1 OR 2 array: ArrayLike, indices: ArrayLike, axis: int = 0, - dtype=None, - out=None, + dtype: Union[DTypeLike, None] = None, + out: Union[NDArray, None] = None, *, - ufunc, - value_empty + ufunc: Callable,# TO PRECISE(?) + value_empty: Any, ) -> NDArray: """Wrapped `np.ufunc.reduceat` to manage edge cases. The edge cases are the one described in the doc of `np.ufunc.reduceat`. Different behaviours are the following: - - No exception is raised when `indices[i] < 0` or - `indices[i] >=len(array)`. Instead, the corresponding value - is `value_empty`. - - When not in the previous case, the result is `value_empty` if - `indices[i] >= indices[i+1]` and otherwise, the same as - `ufunc.reduce(array[indices[i]:indices[i+1]])`. + - No exception is raised when `indices[i] < 0` or + `indices[i] >= len(array)`. Instead, the corresponding value is + `value_empty`. + - When not in the previous case, the result is `value_empty` if + `indices[i] >= indices[i+1]` and otherwise, the same as + `ufunc.reduce(array[indices[i]:indices[i+1]])`. + + Note that when necessary, `value_empty` is casted to `dtype` if not + `None`, or to the type of `array`'s elements. If not possible, an + exception will be raised. """ - array, indices = map(np.asarray, [array, indices]) - axis %= array.ndim - ax_idx = (slice(None),) * axis - n = array.shape[axis] - - pad_width = np.full((array.ndim, 2), 0) - pad_width[axis, 1] = 1 - extended_array = np.pad(array, pad_width, mode="empty") - extended_indices = np.append(indices, n) - - bad = (indices < 0) | (indices > n) - empty = (np.diff(extended_indices) <= 0) | bad - extended_indices[:-1][bad] = n - - out = ufunc.reduceat( - extended_array, extended_indices, axis=axis, dtype=dtype, out=out - )[ax_idx + (slice(-1),)] - out[ax_idx + (empty,)] = value_empty - - return out + # MODE 1 OR 2 TO CHOOSE (more extensive benchmarks to do) + # Not obvious depending on the use case, can go from x0.01 to x100. + # Maybe both mode can be kept and activated when more appropriate? + MODE = 1 + + # MODE 1: Fix start_indices + one `np.ufun.reducaet` call + post-fix + if MODE == 1: + + array, indices = map(np.asarray, [array, indices]) + axis %= array.ndim + ax_idx = (slice(None),) * axis + n = array.shape[axis] + + pad_width = np.full((array.ndim, 2), 0) + pad_width[axis, 1] = 1 + extended_array = np.pad(array, pad_width, mode="empty") + extended_indices = np.append(indices, n) + + bad = (indices < 0) | (indices >= n) + empty = (np.diff(extended_indices) <= 0) | bad + extended_indices[:-1][bad] = n + + out = ufunc.reduceat( + extended_array, extended_indices, axis=axis, dtype=dtype, out=out + )[ax_idx + (slice(-1),)] + if empty.any(): + out[ax_idx + (empty,)] = value_empty + + return out + + # MODE 2: Iterative calls of `np.ufunc.reduce` + if MODE == 2: + + array, indices = map(np.asarray, [array, indices]) + ndim = array.ndim + axis = axis if axis >= 0 else ndim - axis + pre, (n, *post) = array.shape[:axis], array.shape[axis:] + shape = pre + (len(indices),) + tuple(post) + + if dtype is None: + dtype = array.dtype + + if out is None: + out = np.empty(shape, dtype=dtype) + else: + assert out.shape == shape + out = out.astype(dtype) + + ii = [slice(None)] * ndim + for i, (a, b) in enumerate(itertools.pairwise(np.append(indices, n))): + ii[axis] = i + ii_out = tuple(ii) + if a < 0 or a >= min(b, n): # Nothing to reduce + out[ii_out] = value_empty + else: + ii[axis] = slice(a, b) + ii_array = tuple(ii) + out[ii_out] = ufunc.reduce(array[ii_array], axis=axis) + + return out def _get_sample_range_from_data( start_indices: NDArrayInt, diff --git a/skfda/typing/_numpy.py b/skfda/typing/_numpy.py index 774511cc4..d49ee9e93 100644 --- a/skfda/typing/_numpy.py +++ b/skfda/typing/_numpy.py @@ -4,10 +4,14 @@ import numpy as np -try: - from numpy.typing import ArrayLike as ArrayLike # noqa: WPS113 +try: # noqa: WPS113 + from numpy.typing import ( + ArrayLike as ArrayLike, + DTypeLike as DTypeLike, + ) except ImportError: ArrayLike = np.ndarray # type:ignore[misc] # noqa: WPS440 + DTypeLIke = np.dtype # type:ignore[misc] try: # noqa: WPS229 from numpy.typing import NDArray From 22aa8c739562e9facb9a204ea9765b2a6a7f6014 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 29 Nov 2023 10:40:32 +0100 Subject: [PATCH 14/24] handle nan for domain range compute + enforce float type + allow len(points) as start_index for empty sample --- skfda/representation/irregular.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f42a4370a..31e79be6c 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -172,7 +172,7 @@ def _get_sample_range_from_data( ) def _get_domain_range_from_sample_range( - sample_range: DomainRange, + sample_range: DomainRangeLike, ) -> DomainRange: """Compute the domain range of the whole dataset. @@ -185,8 +185,8 @@ def _get_domain_range_from_sample_range( the dimension d. """ sample_range_array = np.asarray(sample_range) - min_arguments = sample_range_array[..., 0].min(axis=0) - max_arguments = sample_range_array[..., 1].max(axis=0) + min_arguments = np.nanmin(sample_range_array[..., 0], axis=0) + max_arguments = np.nanmin(sample_range_array[..., 1], axis=0) return tuple(zip(min_arguments, max_arguments)) @@ -317,10 +317,10 @@ def __init__( # noqa: WPS211 ): """Construct a FDataIrregular object.""" self.start_indices = np.asarray(start_indices) - self.points = np.asarray(points) + self.points = np.asarray(points, dtype=float) if self.points.ndim == 1: self.points = self.points.reshape(-1, 1) - self.values = np.asarray(values) + self.values = np.asarray(values, dtype=float) if self.values.ndim == 1: self.values = self.values.reshape(-1, 1) @@ -333,7 +333,7 @@ def __init__( # noqa: WPS211 if np.any(np.diff(self.start_indices) < 0): raise ValueError("Array start_indices must be non-decreasing") - if self.start_indices[-1] >= len(self.points): + if self.start_indices[-1] > len(self.points): raise ValueError("Index in start_indices out of bounds") # Ensure arguments are in order within each function From 11508c1f7c03c7d061d9c6e6fd7536e5e1d8e1e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 29 Nov 2023 13:58:41 +0100 Subject: [PATCH 15/24] clean _sort_by_arguments --- skfda/representation/irregular.py | 34 +++++++++++-------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 31e79be6c..e708fe39c 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -38,7 +38,7 @@ ) from ..typing._numpy import ( ArrayLike, - DTypeLIke, + DTypeLike, NDArrayBool, NDArrayFloat, NDArrayInt, @@ -494,32 +494,22 @@ def from_fdatagrid( def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: """Sort the arguments lexicographically functionwise. - + Additionally, sort the values accordingly. - + Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ - slice_args = np.split(self.points, self.start_indices)[1:] - slice_values = np.split(self.values, self.start_indices)[1:] - - # Sort lexicographically, first to last dimension - sorting_masks = [ - np.lexsort(np.flip(f_args, axis=1).T) - for f_args in slice_args - ] - - sorted_args = [ - slice_args[i][mask] - for i, mask in enumerate(sorting_masks) - ] - - sorted_values = [ - slice_values[i][mask] - for i, mask in enumerate(sorting_masks) - ] + points_split = np.split(self.points, self.start_indices)[1:] + shifts = itertools.accumulate(map(len, [[]] + points_split[:-1])) + sorter = np.concatenate( + [ + np.lexsort(np.rot90(points)) + shift + for points, shift in zip(points_split, shifts) + ] + ) - return np.concatenate(sorted_args), np.concatenate(sorted_values) + return self.points[sorter], self.values[sorter] def round( self, From 5a841edbe82f5e108b6e244246a535a6090cc25e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Fri, 2 Feb 2024 15:15:34 +0100 Subject: [PATCH 16/24] _reduceat wrapper + minor mods --- skfda/representation/irregular.py | 145 ++++++++++++------------------ 1 file changed, 56 insertions(+), 89 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index e708fe39c..097a14dd0 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -56,91 +56,62 @@ # Auxiliary functions# ###################### -def _reduceat(#CHOOSE MODE 1 OR 2 +def _reduceat( + ufunc, array: ArrayLike, indices: ArrayLike, axis: int = 0, - dtype: Union[DTypeLike, None] = None, - out: Union[NDArray, None] = None, + dtype=None, + out=None, *, - ufunc: Callable,# TO PRECISE(?) - value_empty: Any, -) -> NDArray: - """Wrapped `np.ufunc.reduceat` to manage edge cases. + value_empty +): + """ + Wrapped `np.ufunc.reduceat` to manage some edge cases. The edge cases are the one described in the doc of `np.ufunc.reduceat`. Different behaviours are the following: - - No exception is raised when `indices[i] < 0` or - `indices[i] >= len(array)`. Instead, the corresponding value is - `value_empty`. - - When not in the previous case, the result is `value_empty` if - `indices[i] >= indices[i+1]` and otherwise, the same as - `ufunc.reduce(array[indices[i]:indices[i+1]])`. - - Note that when necessary, `value_empty` is casted to `dtype` if not - `None`, or to the type of `array`'s elements. If not possible, an - exception will be raised. + - No exception is raised when `indices[i] < 0` or + `indices[i] >=len(array)`. Instead, the corresponding value + is `value_empty`. + - When not in the previous case, the result is `value_empty` if + `indices[i] == indices[i+1]` and otherwise, the same as + `ufunc.reduce(array[indices[i]:indices[i+1]])`. This means + that an exception is still be raised if `indices[i] > + indices[i+1]`. + + Note: The `value_empty` must be convertible to the `dtype` (either + provided or inferred from the `ufunc` operations). """ - # MODE 1 OR 2 TO CHOOSE (more extensive benchmarks to do) - # Not obvious depending on the use case, can go from x0.01 to x100. - # Maybe both mode can be kept and activated when more appropriate? - MODE = 1 - - # MODE 1: Fix start_indices + one `np.ufun.reducaet` call + post-fix - if MODE == 1: - - array, indices = map(np.asarray, [array, indices]) - axis %= array.ndim - ax_idx = (slice(None),) * axis - n = array.shape[axis] - - pad_width = np.full((array.ndim, 2), 0) - pad_width[axis, 1] = 1 - extended_array = np.pad(array, pad_width, mode="empty") - extended_indices = np.append(indices, n) - - bad = (indices < 0) | (indices >= n) - empty = (np.diff(extended_indices) <= 0) | bad - extended_indices[:-1][bad] = n - - out = ufunc.reduceat( - extended_array, extended_indices, axis=axis, dtype=dtype, out=out - )[ax_idx + (slice(-1),)] - if empty.any(): - out[ax_idx + (empty,)] = value_empty - - return out + array = np.asarray(array) + indices = np.asarray(indices) - # MODE 2: Iterative calls of `np.ufunc.reduce` - if MODE == 2: + n = array.shape[axis] + good_axis_idx = ( + (indices >= 0) & (indices < n) & (np.diff(indices, append=n) > 0) + ) - array, indices = map(np.asarray, [array, indices]) - ndim = array.ndim - axis = axis if axis >= 0 else ndim - axis - pre, (n, *post) = array.shape[:axis], array.shape[axis:] - shape = pre + (len(indices),) + tuple(post) - - if dtype is None: - dtype = array.dtype - - if out is None: - out = np.empty(shape, dtype=dtype) - else: - assert out.shape == shape - out = out.astype(dtype) - - ii = [slice(None)] * ndim - for i, (a, b) in enumerate(itertools.pairwise(np.append(indices, n))): - ii[axis] = i - ii_out = tuple(ii) - if a < 0 or a >= min(b, n): # Nothing to reduce - out[ii_out] = value_empty - else: - ii[axis] = slice(a, b) - ii_array = tuple(ii) - out[ii_out] = ufunc.reduce(array[ii_array], axis=axis) - - return out + good_idx = [slice(None)] * array.ndim + good_idx[axis] = good_axis_idx + good_idx = tuple(good_idx) + + reduceat_out = ufunc.reduceat( + array, indices[good_axis_idx], axis=axis, dtype=dtype + ) + + out_shape = list(array.shape) + out_shape[axis] = len(indices) + out_dtype = dtype or reduceat_out.dtype + + if out is None: + out = np.full(out_shape, value_empty, dtype=out_dtype) + else: + out.astype(out_dtype, copy=False) + out.fill(value_empty) + + out[good_idx] = reduceat_out + + return out def _get_sample_range_from_data( start_indices: NDArrayInt, @@ -160,9 +131,9 @@ def _get_sample_range_from_data( return np.stack( [ _reduceat( + ufunc, points, start_indices, - ufunc=ufunc, value_empty=np.nan, dtype=float, ) @@ -810,16 +781,14 @@ def _get_op_matrix( # noqa: WPS212 return other elif other.shape == (self.n_samples,): other_index = ( - (slice(None),) + (np.newaxis,) - * (self.values.ndim - 1) + (slice(None),) + + (np.newaxis,) * (self.values.ndim - 1) ) other_vector = other[other_index] # Number of values in each curve - values_curve = np.diff( - np.r_[self.start_indices, [len(self.points)]] - ) + values_curve = np.diff(self.start_indices, append=len(self.points)) # Repeat the other value for each curve as many times # as values inside the curve @@ -829,25 +798,23 @@ def _get_op_matrix( # noqa: WPS212 self.dim_codomain, ): other_index = ( - (slice(None),) + (np.newaxis,) - * (self.values.ndim - 2) + (slice(None),) + + (np.newaxis,) * (self.values.ndim - 2) + (slice(None),) ) other_vector = other[other_index] # Number of values in each curve - values_curve = np.diff( - np.r_[self.start_indices, [len(self.points)]] - ) + values_curve = np.diff(self.start_indices, append=len(self.points)) # Repeat the other value for each curve as many times # as values inside the curve return np.repeat(other_vector, values_curve, axis=0) raise ValueError( - f"Invalid dimensions in operator between FDataIrregular " - f"and Numpy array: {other.shape}", + f"Invalid dimensions in operator between FDataIrregular and " + f"Numpy array: {other.shape}", ) elif isinstance(other, FDataIrregular): @@ -1395,7 +1362,7 @@ def __getitem__( required_slices = [] key = _check_array_key(self.start_indices, key) indices = range(self.n_samples) - required_indices = indices[key] + required_indices = np.array(indices)[key] for i in required_indices: next_index = None if i + 1 < self.n_samples: From 1a1835e56a761b6fe6c0b3bd177bb411edd0dd6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 14 Feb 2024 06:03:30 +0100 Subject: [PATCH 17/24] removed useless op (???) --- skfda/representation/irregular.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 097a14dd0..07f132ba7 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -472,11 +472,10 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ points_split = np.split(self.points, self.start_indices)[1:] - shifts = itertools.accumulate(map(len, [[]] + points_split[:-1])) sorter = np.concatenate( [ np.lexsort(np.rot90(points)) + shift - for points, shift in zip(points_split, shifts) + for points, shift in zip(points_split, self.start_indices) ] ) From f0fe0d7f81d44ef1bd09be047fc129ee1a3f0644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 14 Feb 2024 07:06:29 +0100 Subject: [PATCH 18/24] cleaner _sort_by_arguments from vnmabus --- skfda/representation/irregular.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 07f132ba7..421ac7842 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -471,13 +471,11 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ - points_split = np.split(self.points, self.start_indices)[1:] - sorter = np.concatenate( - [ - np.lexsort(np.rot90(points)) + shift - for points, shift in zip(points_split, self.start_indices) - ] + ind = np.repeat( + range(len(self.start_indices)), + np.diff(self.start_indices, append=len(self.points)), ) + sorter = np.lexsort(np.rot90(np.c_[ind, self.points])) return self.points[sorter], self.values[sorter] From 4a2fc88fbbc00fc5b1aac8021b15c840935471bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Wed, 14 Feb 2024 07:09:18 +0100 Subject: [PATCH 19/24] resolve reviews --- skfda/representation/irregular.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 421ac7842..64299ef9d 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -971,14 +971,14 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: ) # Verify that dimensions are compatible assert others, "No objects to concatenate" - all_ = (self,) + others + all_objects = (self,) + others start_indices_split = [] total_points = 0 points_split = [] values_split = [] total_sample_names_split = [] domain_range_split = [] - for x, y in itertools.pairwise(all_ + (self,)): + for x, y in itertools.pairwise(all_objects + (self,)): x.check_same_dimensions(y) start_indices_split.append(x.start_indices + total_points) total_points += len(x.points) @@ -1243,7 +1243,7 @@ def restrict( # noqa: WPS210 from ..misc.validation import validate_domain_range - npdr = np.asarray(validate_domain_range(domain_range)) # (dim, 2) + npdr = np.asarray(validate_domain_range(domain_range)) # shape(dim, 2) mask = np.all( (npdr[:, 0] <= sample_points) & (sample_points <= npdr[:, 1]), From 01fe0ba96ea73e5c55a96d9851397277153ab876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Thu, 15 Feb 2024 23:18:22 +0100 Subject: [PATCH 20/24] better lexsort comment --- skfda/representation/irregular.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 64299ef9d..14a946f62 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -475,7 +475,11 @@ def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: range(len(self.start_indices)), np.diff(self.start_indices, append=len(self.points)), ) - sorter = np.lexsort(np.rot90(np.c_[ind, self.points])) + # In order to use lexsort the following manipulations are required: + # - Transpose the axis, so that the first axis contains the keys. + # - Flip that axis so that the primary key is last, and they are thus + # in last-to-first order. + sorter = np.lexsort(np.c_[ind, self.points].T[::-1]) return self.points[sorter], self.values[sorter] From ee89c2cf7dabd91d492ffdecaf95615693d604d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Mon, 26 Feb 2024 15:06:02 +0100 Subject: [PATCH 21/24] fixed typo domain_range max --- skfda/representation/irregular.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index 14a946f62..c9a7335cd 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -157,7 +157,7 @@ def _get_domain_range_from_sample_range( """ sample_range_array = np.asarray(sample_range) min_arguments = np.nanmin(sample_range_array[..., 0], axis=0) - max_arguments = np.nanmin(sample_range_array[..., 1], axis=0) + max_arguments = np.nanmax(sample_range_array[..., 1], axis=0) return tuple(zip(min_arguments, max_arguments)) From 6bf925c334087227c4461d6316e85f88f677a6b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89lie=20Goudout?= Date: Mon, 26 Feb 2024 15:07:24 +0100 Subject: [PATCH 22/24] fixed restrict + allow domain_range broadcast --- skfda/representation/irregular.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index c9a7335cd..f4c7c4a7f 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -1247,26 +1247,24 @@ def restrict( # noqa: WPS210 from ..misc.validation import validate_domain_range - npdr = np.asarray(validate_domain_range(domain_range)) # shape(dim, 2) + npdr = np.broadcast_to( + validate_domain_range(domain_range), + (self.dim_domain, 2), + ) mask = np.all( - (npdr[:, 0] <= sample_points) & (sample_points <= npdr[:, 1]), + (npdr[:, 0] <= self.points) & (self.points <= npdr[:, 1]), axis=1, ) - num_samples = np.add.reduceat(mask, self.start_indices)[:-1] * ( - np.diff(self.start_indices) > 0 - ) - - start_indices = np.r_[[0], num_samples.cumsum()] - points = self.points[mask] - values = self.values[mask] + num_points = _reduceat(np.add, mask, self.start_indices, value_empty=0) + start_indices = np.r_[[0], num_points[:-1].cumsum()] return self.copy( - start_indices=np.array(start_indices), - points=np.concatenate(points), - values=np.concatenate(values), - domain_range=domain_range, + start_indices=start_indices, + points=self.points[mask], + values=self.values[mask], + domain_range=npdr, ) def shift( From fb6502f6d23fe6ab8cd0bfe599dedd418bf8bf44 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 7 Mar 2024 17:18:25 +0100 Subject: [PATCH 23/24] Fix doctests. Automatic casting to float dtype is removed, as it prevents using different float sizes (or even integers in the future). --- skfda/representation/irregular.py | 38 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py index f4c7c4a7f..1524a02fa 100644 --- a/skfda/representation/irregular.py +++ b/skfda/representation/irregular.py @@ -24,11 +24,7 @@ import pandas.api.extensions from matplotlib.figure import Figure -from .._utils import ( - _cartesian_product, - _check_array_key, - _to_grid_points, -) +from .._utils import _cartesian_product, _check_array_key, _to_grid_points from ..typing._base import ( DomainRange, DomainRangeLike, @@ -56,6 +52,7 @@ # Auxiliary functions# ###################### + def _reduceat( ufunc, array: ArrayLike, @@ -113,6 +110,7 @@ def _reduceat( return out + def _get_sample_range_from_data( start_indices: NDArrayInt, points: NDArrayFloat, @@ -142,6 +140,7 @@ def _get_sample_range_from_data( axis=-1, ) + def _get_domain_range_from_sample_range( sample_range: DomainRangeLike, ) -> DomainRange: @@ -254,8 +253,8 @@ class FDataIrregular(FData): # noqa: WPS214 representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}^2`. >>> indices = [0, 2] - >>> arguments = [[1], [2], [3], [4], [5]] - >>> values = [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + >>> arguments = [[1.], [2.], [3.], [4.], [5.]] + >>> values = [[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]] >>> fd = FDataIrregular(indices, arguments, values) >>> fd.dim_domain, fd.dim_codomain (1, 2) @@ -264,8 +263,8 @@ class FDataIrregular(FData): # noqa: WPS214 representing a function :math:`f : \mathbb{R}^2\longmapsto\mathbb{R}`. >>> indices = [0, 2] - >>> arguments = [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] - >>> values = [[1], [2], [3], [4], [5]] + >>> arguments = [[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]] + >>> values = [[1.], [2.], [3.], [4.], [5.]] >>> fd = FDataIrregular(indices, arguments, values) >>> fd.dim_domain, fd.dim_codomain (2, 1) @@ -288,10 +287,10 @@ def __init__( # noqa: WPS211 ): """Construct a FDataIrregular object.""" self.start_indices = np.asarray(start_indices) - self.points = np.asarray(points, dtype=float) + self.points = np.asarray(points) if self.points.ndim == 1: self.points = self.points.reshape(-1, 1) - self.values = np.asarray(values, dtype=float) + self.values = np.asarray(values) if self.values.ndim == 1: self.values = self.values.reshape(-1, 1) @@ -313,7 +312,8 @@ def __init__( # noqa: WPS211 self.values = sorted_values self._sample_range = _get_sample_range_from_data( - self.start_indices, self.points + self.start_indices, + self.points, ) # Default value for sample_range is a list of tuples with @@ -465,9 +465,9 @@ def from_fdatagrid( def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: """Sort the arguments lexicographically functionwise. - + Additionally, sort the values accordingly. - + Returns: Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) """ @@ -789,7 +789,8 @@ def _get_op_matrix( # noqa: WPS212 other_vector = other[other_index] # Number of values in each curve - values_curve = np.diff(self.start_indices, append=len(self.points)) + values_curve = np.diff( + self.start_indices, append=len(self.points)) # Repeat the other value for each curve as many times # as values inside the curve @@ -807,7 +808,8 @@ def _get_op_matrix( # noqa: WPS212 other_vector = other[other_index] # Number of values in each curve - values_curve = np.diff(self.start_indices, append=len(self.points)) + values_curve = np.diff( + self.start_indices, append=len(self.points)) # Repeat the other value for each curve as many times # as values inside the curve @@ -938,13 +940,13 @@ def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: Examples: >>> indices = [0, 2] - >>> arguments = values = np.arange(5).reshape(-1, 1) + >>> arguments = values = np.arange(5.).reshape(-1, 1) >>> fd = FDataIrregular(indices, arguments, values) >>> arguments_2 = values_2 = np.arange(5, 10).reshape(-1, 1) >>> fd_2 = FDataIrregular(indices, arguments_2, values_2) >>> fd.concatenate(fd_2) FDataIrregular( - start_indices=array([0, 2, 5, 7], dtype=uint32), + start_indices=array([0, 2, 5, 7]), points=array([[ 0.], [ 1.], [ 2.], From cd7e73e73cdc6cc0b056c93bcc1bf7f004dfce10 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 11 Mar 2024 12:08:20 +0100 Subject: [PATCH 24/24] Fix typo. --- skfda/typing/_numpy.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/skfda/typing/_numpy.py b/skfda/typing/_numpy.py index d49ee9e93..b870c2bc4 100644 --- a/skfda/typing/_numpy.py +++ b/skfda/typing/_numpy.py @@ -5,13 +5,10 @@ import numpy as np try: # noqa: WPS113 - from numpy.typing import ( - ArrayLike as ArrayLike, - DTypeLike as DTypeLike, - ) + from numpy.typing import ArrayLike as ArrayLike, DTypeLike as DTypeLike except ImportError: ArrayLike = np.ndarray # type:ignore[misc] # noqa: WPS440 - DTypeLIke = np.dtype # type:ignore[misc] + DTypeLike = np.dtype # type:ignore[misc] try: # noqa: WPS229 from numpy.typing import NDArray