From 13decda267e0e02ca5864907296d10ae1de72e6c Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Mon, 16 Sep 2024 19:12:36 +0200 Subject: [PATCH] break(datasets) Drop support for Python 3.8 (#4213) --- .github/workflows/datasets-e2e.yml | 2 +- .github/workflows/datasets.yml | 2 +- datasets/flwr_datasets/common/telemetry.py | 10 ++--- datasets/flwr_datasets/common/typing.py | 4 +- datasets/flwr_datasets/common/version.py | 5 +-- .../flwr_datasets/federated_dataset_test.py | 8 ++-- datasets/flwr_datasets/metrics/utils.py | 6 +-- datasets/flwr_datasets/mock_utils_test.py | 38 +++++++++---------- .../partitioner/dirichlet_partitioner.py | 14 +++---- .../partitioner/dirichlet_partitioner_test.py | 4 +- .../partitioner/distribution_partitioner.py | 4 +- .../distribution_partitioner_test.py | 10 ++--- .../grouped_natural_id_partitioner.py | 12 +++--- .../grouped_natural_id_partitioner_test.py | 10 ++--- .../partitioner/id_to_size_fnc_partitioner.py | 10 ++--- .../partitioner/iid_partitioner_test.py | 3 +- .../inner_dirichlet_partitioner.py | 18 ++++----- .../inner_dirichlet_partitioner_test.py | 8 ++-- .../partitioner/natural_id_partitioner.py | 12 +++--- .../natural_id_partitioner_test.py | 3 +- .../partitioner/pathological_partitioner.py | 10 ++--- .../pathological_partitioner_test.py | 3 +- .../partitioner/shard_partitioner.py | 6 +-- .../partitioner/shard_partitioner_test.py | 4 +- .../preprocessor/divider_test.py | 12 +++--- datasets/flwr_datasets/preprocessor/merger.py | 7 ++-- .../flwr_datasets/preprocessor/merger_test.py | 21 +++++----- datasets/flwr_datasets/utils.py | 36 +++++++++--------- datasets/flwr_datasets/utils_test.py | 6 +-- .../flwr_datasets/visualization/bar_plot.py | 12 +++--- .../comparison_label_distribution.py | 24 ++++++------ .../visualization/heatmap_plot.py | 10 ++--- .../visualization/label_distribution.py | 10 ++--- datasets/pyproject.toml | 5 +-- 34 files changed, 170 insertions(+), 179 deletions(-) diff --git a/.github/workflows/datasets-e2e.yml b/.github/workflows/datasets-e2e.yml index 2a73a8538b1..dbd90635c74 100644 --- a/.github/workflows/datasets-e2e.yml +++ b/.github/workflows/datasets-e2e.yml @@ -45,7 +45,7 @@ jobs: - name: Bootstrap uses: ./.github/actions/bootstrap with: - python-version: 3.8 + python-version: 3.9 - name: Install dependencies run: python -m poetry install - name: Run tests diff --git a/.github/workflows/datasets.yml b/.github/workflows/datasets.yml index ca5aa29248c..860d944696f 100644 --- a/.github/workflows/datasets.yml +++ b/.github/workflows/datasets.yml @@ -37,7 +37,7 @@ jobs: # In case of a mismatch, the job has to download Python to install it. # Note: Due to a bug in actions/setup-python, we have to put "3.10" in # quotes as it will otherwise assume "3.1" - python: [3.8, 3.9, '3.10', '3.11'] + python: ['3.9', '3.10', '3.11'] name: Python ${{ matrix.python }} diff --git a/datasets/flwr_datasets/common/telemetry.py b/datasets/flwr_datasets/common/telemetry.py index ca484fdda73..4bf80b93467 100644 --- a/datasets/flwr_datasets/common/telemetry.py +++ b/datasets/flwr_datasets/common/telemetry.py @@ -25,7 +25,7 @@ from concurrent.futures import Future, ThreadPoolExecutor from enum import Enum, auto from pathlib import Path -from typing import Any, Dict, List, Optional, Union, cast +from typing import Any, Optional, Union, cast from flwr_datasets.common.version import package_name, package_version @@ -114,7 +114,7 @@ class EventType(str, Enum): # The type signature is not compatible with mypy, pylint and flake8 # so each of those needs to be disabled for this line. # pylint: disable-next=no-self-argument,arguments-differ,line-too-long - def _generate_next_value_(name: str, start: int, count: int, last_values: List[Any]) -> Any: # type: ignore # noqa: E501 + def _generate_next_value_(name: str, start: int, count: int, last_values: list[Any]) -> Any: # type: ignore # noqa: E501 return name PING = auto() @@ -127,7 +127,7 @@ def _generate_next_value_(name: str, start: int, count: int, last_values: List[A # Use the ThreadPoolExecutor with max_workers=1 to have a queue # and also ensure that telemetry calls are not blocking. -state: Dict[str, Union[Optional[str], Optional[ThreadPoolExecutor]]] = { +state: dict[str, Union[Optional[str], Optional[ThreadPoolExecutor]]] = { # Will be assigned ThreadPoolExecutor(max_workers=1) # in event() the first time it's required "executor": None, @@ -143,7 +143,7 @@ def _generate_next_value_(name: str, start: int, count: int, last_values: List[A # pylint: disable-next=unsubscriptable-object def event( event_type: EventType, - event_details: Optional[Dict[str, Any]] = None, + event_details: Optional[dict[str, Any]] = None, ) -> Future: # type: ignore """Submit create_event to ThreadPoolExecutor to avoid blocking.""" if state["executor"] is None: @@ -155,7 +155,7 @@ def event( return result -def create_event(event_type: EventType, event_details: Optional[Dict[str, Any]]) -> str: +def create_event(event_type: EventType, event_details: Optional[dict[str, Any]]) -> str: """Create telemetry event.""" if state["source"] is None: state["source"] = _get_source_id() diff --git a/datasets/flwr_datasets/common/typing.py b/datasets/flwr_datasets/common/typing.py index ffaefaeec31..d6d37b46849 100644 --- a/datasets/flwr_datasets/common/typing.py +++ b/datasets/flwr_datasets/common/typing.py @@ -15,7 +15,7 @@ """Flower Datasets type definitions.""" -from typing import Any, List +from typing import Any import numpy as np import numpy.typing as npt @@ -23,4 +23,4 @@ NDArray = npt.NDArray[Any] NDArrayInt = npt.NDArray[np.int_] NDArrayFloat = npt.NDArray[np.float_] -NDArrays = List[NDArray] +NDArrays = list[NDArray] diff --git a/datasets/flwr_datasets/common/version.py b/datasets/flwr_datasets/common/version.py index 48c3fc5aaa9..3e4c9a31fd6 100644 --- a/datasets/flwr_datasets/common/version.py +++ b/datasets/flwr_datasets/common/version.py @@ -19,15 +19,14 @@ import importlib.metadata as importlib_metadata -from typing import Tuple -def _check_package(name: str) -> Tuple[str, str]: +def _check_package(name: str) -> tuple[str, str]: version: str = importlib_metadata.version(name) return name, version -def _version() -> Tuple[str, str]: +def _version() -> tuple[str, str]: """Read and return Flower Dataset package name and version. Returns diff --git a/datasets/flwr_datasets/federated_dataset_test.py b/datasets/flwr_datasets/federated_dataset_test.py index 6ccf06ccf07..bbdfa42292c 100644 --- a/datasets/flwr_datasets/federated_dataset_test.py +++ b/datasets/flwr_datasets/federated_dataset_test.py @@ -17,7 +17,7 @@ import unittest -from typing import Dict, Union +from typing import Union from unittest.mock import Mock, patch import numpy as np @@ -385,7 +385,7 @@ def test_dict_of_partitioners_passes_partitioners(self) -> None: """Test if partitioners are passed directly (no recreation).""" num_train_partitions = 100 num_test_partitions = 100 - partitioners: Dict[str, Union[Partitioner, int]] = { + partitioners: dict[str, Union[Partitioner, int]] = { "train": IidPartitioner(num_partitions=num_train_partitions), "test": IidPartitioner(num_partitions=num_test_partitions), } @@ -419,7 +419,7 @@ def test_mixed_type_partitioners_passes_instantiated_partitioners(self) -> None: """Test if an instantiated partitioner is passed directly.""" num_train_partitions = 100 num_test_partitions = 100 - partitioners: Dict[str, Union[Partitioner, int]] = { + partitioners: dict[str, Union[Partitioner, int]] = { "train": IidPartitioner(num_partitions=num_train_partitions), "test": num_test_partitions, } @@ -433,7 +433,7 @@ def test_mixed_type_partitioners_creates_from_int(self) -> None: """Test if an IidPartitioner partitioner is created.""" num_train_partitions = 100 num_test_partitions = 100 - partitioners: Dict[str, Union[Partitioner, int]] = { + partitioners: dict[str, Union[Partitioner, int]] = { "train": IidPartitioner(num_partitions=num_train_partitions), "test": num_test_partitions, } diff --git a/datasets/flwr_datasets/metrics/utils.py b/datasets/flwr_datasets/metrics/utils.py index 8f78b2fd4c3..14e1f8d6811 100644 --- a/datasets/flwr_datasets/metrics/utils.py +++ b/datasets/flwr_datasets/metrics/utils.py @@ -16,7 +16,7 @@ import warnings -from typing import List, Optional, Union +from typing import Optional, Union import pandas as pd @@ -206,7 +206,7 @@ def compute_frequencies( def _compute_counts( - labels: Union[List[int], List[str]], unique_labels: Union[List[int], List[str]] + labels: Union[list[int], list[str]], unique_labels: Union[list[int], list[str]] ) -> pd.Series: """Compute the count of labels when taking into account all possible labels. @@ -237,7 +237,7 @@ def _compute_counts( def _compute_frequencies( - labels: Union[List[int], List[str]], unique_labels: Union[List[int], List[str]] + labels: Union[list[int], list[str]], unique_labels: Union[list[int], list[str]] ) -> pd.Series: """Compute the distribution of labels when taking into account all possible labels. diff --git a/datasets/flwr_datasets/mock_utils_test.py b/datasets/flwr_datasets/mock_utils_test.py index bc1254eccbd..0976166648e 100644 --- a/datasets/flwr_datasets/mock_utils_test.py +++ b/datasets/flwr_datasets/mock_utils_test.py @@ -19,7 +19,7 @@ import random import string from datetime import datetime, timedelta -from typing import Any, Dict, List, Optional, Set, Tuple, Union +from typing import Any, Optional, Union import numpy as np from PIL import Image @@ -30,7 +30,7 @@ def _generate_artificial_strings( num_rows: int, num_unique: int, string_length: int, seed: int = 42 -) -> List[str]: +) -> list[str]: """Create list of strings for categories or labels mocking. Note to keep the seed the same if you reuse this function for in creation of the @@ -53,7 +53,7 @@ def _generate_artificial_strings( List of generated strings. """ random.seed(seed) - unique_strings: Set[str] = set() + unique_strings: set[str] = set() while len(unique_strings) < num_unique: random_str = "".join( random.choices(string.ascii_letters + string.digits, k=string_length) @@ -68,7 +68,7 @@ def _generate_artificial_strings( return artificial_column -def _generate_artificial_categories(num_rows: int, choices: List[Any]) -> List[str]: +def _generate_artificial_categories(num_rows: int, choices: list[Any]) -> list[str]: """Create list of strings from given `choices` list.""" artificial_column = choices.copy() remaining_to_allocate = num_rows - len(choices) @@ -82,7 +82,7 @@ def _generate_random_word(length: int) -> str: return "".join(random.choices(string.ascii_letters, k=length)) -def _generate_random_text_column(num_rows: int, length: int) -> List[str]: +def _generate_random_text_column(num_rows: int, length: int) -> list[str]: """Generate a list of random text of specified length.""" text_col = [] for _ in range(num_rows): @@ -98,7 +98,7 @@ def _generate_random_sentence( ) -> str: """Generate a random sentence with words of random lengths.""" sentence_length = random.randint(min_sentence_length, max_sentence_length) - sentence: List[str] = [] + sentence: list[str] = [] while len(" ".join(sentence)) < sentence_length: word_length = random.randint(min_word_length, max_word_length) word = _generate_random_word(word_length) @@ -112,7 +112,7 @@ def _generate_random_sentences( max_word_length: int, min_sentence_length: int, max_sentence_length: int, -) -> List[str]: +) -> list[str]: """Generate a list of random sentences.""" text_col = [ _generate_random_sentence( @@ -123,7 +123,7 @@ def _generate_random_sentences( return text_col -def _make_num_rows_none(column: List[Any], num_none: int) -> List[Any]: +def _make_num_rows_none(column: list[Any], num_none: int) -> list[Any]: """Assign none num_none times to the given list.""" column_copy = column.copy() none_positions = random.sample(range(len(column_copy)), num_none) @@ -154,7 +154,7 @@ def _generate_random_date_column( end_date: datetime, date_format: str = "%a %b %d %H:%M:%S %Y", as_string: bool = True, -) -> List[Union[str, datetime]]: +) -> list[Union[str, datetime]]: """Generate a list of random dates.""" return [ _generate_random_date(start_date, end_date, date_format, as_string) @@ -162,21 +162,21 @@ def _generate_random_date_column( ] -def _generate_random_int_column(num_rows: int, min_int: int, max_int: int) -> List[int]: +def _generate_random_int_column(num_rows: int, min_int: int, max_int: int) -> list[int]: """Generate a list of ints.""" return [random.randint(min_int, max_int) for _ in range(num_rows)] -def _generate_random_bool_column(num_rows: int) -> List[bool]: +def _generate_random_bool_column(num_rows: int) -> list[bool]: """Generate a list of bools.""" return [random.choice([True, False]) for _ in range(num_rows)] def _generate_random_image_column( num_rows: int, - image_size: Union[Tuple[int, int], Tuple[int, int, int]], + image_size: Union[tuple[int, int], tuple[int, int, int]], simulate_type: str, -) -> List[Any]: +) -> list[Any]: """Simulate the images with the format that is found in HF Hub. Directly using `Image.fromarray` does not work because it creates `PIL.Image.Image`. @@ -207,7 +207,7 @@ def generate_random_audio_column( num_rows: int, sampling_rate: int, length_in_samples: int, -) -> List[Dict[str, Any]]: +) -> list[dict[str, Any]]: """Simulate the audio column. Audio column in the datset is comprised from an array or floats, sample_rate and a @@ -365,8 +365,8 @@ def _mock_speach_commands(num_rows: int) -> Dataset: def _load_mocked_dataset( dataset_name: str, - num_rows: List[int], - split_names: List[str], + num_rows: list[int], + split_names: list[str], subset: str = "", ) -> DatasetDict: dataset_dict = {} @@ -380,7 +380,7 @@ def _load_mocked_dataset( def _load_mocked_dataset_by_partial_download( dataset_name: str, split_name: str, - skip_take_list: List[Tuple[int, int]], + skip_take_list: list[tuple[int, int]], subset_name: Optional[str] = None, ) -> Dataset: """Download a partial dataset. @@ -423,8 +423,8 @@ def _load_mocked_dataset_by_partial_download( def _load_mocked_dataset_dict_by_partial_download( dataset_name: str, - split_names: List[str], - skip_take_lists: List[List[Tuple[int, int]]], + split_names: list[str], + skip_take_lists: list[list[tuple[int, int]]], subset_name: Optional[str] = None, ) -> DatasetDict: """Like _load_mocked_dataset_by_partial_download but for many splits.""" diff --git a/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py b/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py index dce20841918..55c190087f7 100644 --- a/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py +++ b/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py @@ -16,7 +16,7 @@ import warnings -from typing import Dict, List, Optional, Union +from typing import Optional, Union import numpy as np @@ -89,7 +89,7 @@ def __init__( # pylint: disable=R0913 self, num_partitions: int, partition_by: str, - alpha: Union[int, float, List[float], NDArrayFloat], + alpha: Union[int, float, list[float], NDArrayFloat], min_partition_size: int = 10, self_balancing: bool = False, shuffle: bool = True, @@ -110,8 +110,8 @@ def __init__( # pylint: disable=R0913 # Utility attributes # The attributes below are determined during the first call to load_partition self._avg_num_of_samples_per_partition: Optional[float] = None - self._unique_classes: Optional[Union[List[int], List[str]]] = None - self._partition_id_to_indices: Dict[int, List[int]] = {} + self._unique_classes: Optional[Union[list[int], list[str]]] = None + self._partition_id_to_indices: dict[int, list[int]] = {} self._partition_id_to_indices_determined = False def load_partition(self, partition_id: int) -> datasets.Dataset: @@ -142,7 +142,7 @@ def num_partitions(self) -> int: return self._num_partitions def _initialize_alpha( - self, alpha: Union[int, float, List[float], NDArrayFloat] + self, alpha: Union[int, float, list[float], NDArrayFloat] ) -> NDArrayFloat: """Convert alpha to the used format in the code a NDArrayFloat. @@ -164,7 +164,7 @@ def _initialize_alpha( alpha = np.array([float(alpha)], dtype=float).repeat(self._num_partitions) elif isinstance(alpha, float): alpha = np.array([alpha], dtype=float).repeat(self._num_partitions) - elif isinstance(alpha, List): + elif isinstance(alpha, list): if len(alpha) != self._num_partitions: raise ValueError( "If passing alpha as a List, it needs to be of length of equal to " @@ -217,7 +217,7 @@ def _determine_partition_id_to_indices_if_needed( sampling_try = 0 while True: # Prepare data structure to store indices assigned to partition ids - partition_id_to_indices: Dict[int, List[int]] = {} + partition_id_to_indices: dict[int, list[int]] = {} for nid in range(self._num_partitions): partition_id_to_indices[nid] = [] diff --git a/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py b/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py index b2407b5d582..ed38e8ee2a4 100644 --- a/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py @@ -17,7 +17,7 @@ # pylint: disable=W0212 import unittest -from typing import Tuple, Union +from typing import Union import numpy as np from numpy.typing import NDArray @@ -33,7 +33,7 @@ def _dummy_setup( num_rows: int, partition_by: str, self_balancing: bool = True, -) -> Tuple[Dataset, DirichletPartitioner]: +) -> tuple[Dataset, DirichletPartitioner]: """Create a dummy dataset and partitioner for testing.""" data = { partition_by: [i % 3 for i in range(num_rows)], diff --git a/datasets/flwr_datasets/partitioner/distribution_partitioner.py b/datasets/flwr_datasets/partitioner/distribution_partitioner.py index e9acc41c707..86be62b3607 100644 --- a/datasets/flwr_datasets/partitioner/distribution_partitioner.py +++ b/datasets/flwr_datasets/partitioner/distribution_partitioner.py @@ -16,7 +16,7 @@ from collections import Counter -from typing import Dict, List, Optional, Union +from typing import Optional, Union import numpy as np @@ -182,7 +182,7 @@ def __init__( # pylint: disable=R0913 self._num_unique_labels: int = 0 self._num_columns: int = 0 self._partition_id_to_indices_determined = False - self._partition_id_to_indices: Dict[int, List[int]] = {} + self._partition_id_to_indices: dict[int, list[int]] = {} def load_partition(self, partition_id: int) -> datasets.Dataset: """Load a partition based on the partition index. diff --git a/datasets/flwr_datasets/partitioner/distribution_partitioner_test.py b/datasets/flwr_datasets/partitioner/distribution_partitioner_test.py index bfeafd355be..306e208a706 100644 --- a/datasets/flwr_datasets/partitioner/distribution_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/distribution_partitioner_test.py @@ -17,7 +17,7 @@ import unittest from collections import Counter -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Union import numpy as np from parameterized import parameterized_class @@ -62,7 +62,7 @@ def _get_partitioner( num_unique_labels: int, preassigned_num_samples_per_label: int, rescale_mode: bool = True, -) -> Tuple[DistributionPartitioner, Dict[int, Dataset]]: +) -> tuple[DistributionPartitioner, dict[int, Dataset]]: """Create DistributionPartitioner instance.""" dataset = _dummy_dataset_setup( num_samples, @@ -83,7 +83,7 @@ def _get_partitioner( rescale=rescale_mode, ) partitioner.dataset = dataset - partitions: Dict[int, Dataset] = { + partitions: dict[int, Dataset] = { pid: partitioner.load_partition(pid) for pid in range(num_partitions) } @@ -135,7 +135,7 @@ def test_correct_num_times_classes_sampled_across_partitions(self) -> None: preassigned_num_samples_per_label=self.preassigned_num_samples_per_label, ) - partitioned_distribution: Dict[Any, List[Any]] = { + partitioned_distribution: dict[Any, list[Any]] = { label: [] for label in partitioner.dataset.unique("labels") } @@ -162,7 +162,7 @@ def test_exact_distribution_assignment(self) -> None: preassigned_num_samples_per_label=self.preassigned_num_samples_per_label, rescale_mode=False, ) - partitioned_distribution: Dict[Any, List[Any]] = { + partitioned_distribution: dict[Any, list[Any]] = { label: [] for label in partitioner.dataset.unique("labels") } diff --git a/datasets/flwr_datasets/partitioner/grouped_natural_id_partitioner.py b/datasets/flwr_datasets/partitioner/grouped_natural_id_partitioner.py index f10d80b3aaa..4ce4f371719 100644 --- a/datasets/flwr_datasets/partitioner/grouped_natural_id_partitioner.py +++ b/datasets/flwr_datasets/partitioner/grouped_natural_id_partitioner.py @@ -15,7 +15,7 @@ """Grouped natural id partitioner class that works with Hugging Face Datasets.""" -from typing import Any, Dict, List, Literal +from typing import Any, Literal import numpy as np @@ -72,9 +72,9 @@ def __init__( sort_unique_ids: bool = False, ) -> None: super().__init__() - self._partition_id_to_natural_ids: Dict[int, List[Any]] = {} - self._natural_id_to_partition_id: Dict[Any, int] = {} - self._partition_id_to_indices: Dict[int, NDArrayInt] = {} + self._partition_id_to_natural_ids: dict[int, list[Any]] = {} + self._natural_id_to_partition_id: dict[Any, int] = {} + self._partition_id_to_indices: dict[int, NDArrayInt] = {} self._partition_by = partition_by self._mode = mode self._sort_unique_ids = sort_unique_ids @@ -211,7 +211,7 @@ def num_partitions(self) -> int: return len(self._partition_id_to_natural_ids) @property - def partition_id_to_natural_ids(self) -> Dict[int, List[Any]]: + def partition_id_to_natural_ids(self) -> dict[int, list[Any]]: """Partition id to the corresponding group of natural ids present. Natural ids are the unique values in `partition_by` column in dataset. @@ -219,6 +219,6 @@ def partition_id_to_natural_ids(self) -> Dict[int, List[Any]]: return self._partition_id_to_natural_ids @property - def natural_id_to_partition_id(self) -> Dict[Any, int]: + def natural_id_to_partition_id(self) -> dict[Any, int]: """Natural id to the corresponding partition id.""" return self._natural_id_to_partition_id diff --git a/datasets/flwr_datasets/partitioner/grouped_natural_id_partitioner_test.py b/datasets/flwr_datasets/partitioner/grouped_natural_id_partitioner_test.py index 635d3850624..014d18c1dc1 100644 --- a/datasets/flwr_datasets/partitioner/grouped_natural_id_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/grouped_natural_id_partitioner_test.py @@ -16,7 +16,7 @@ import unittest -from typing import List, Literal, Set +from typing import Literal from parameterized import parameterized, parameterized_class @@ -95,7 +95,7 @@ def test_allow_smaller_mode_num_partitions_and_partition_sizes( num_rows: int, num_unique_natural_id: int, group_size: int, - expected_num_unique_natural_ids: List[int], + expected_num_unique_natural_ids: list[int], ) -> None: """Test allow-smaller mode handles the remainder correctly.""" dataset = _create_dataset(num_rows, num_unique_natural_id) @@ -132,7 +132,7 @@ def test_allow_bigger_mode_num_partitions_and_partition_sizes( num_rows: int, num_unique_natural_id: int, group_size: int, - expected_num_unique_natural_ids: List[int], + expected_num_unique_natural_ids: list[int], ) -> None: """Test allow-bigger mode handles the remainder correctly.""" dataset = _create_dataset(num_rows, num_unique_natural_id) @@ -169,7 +169,7 @@ def test_drop_reminder_mode_num_partitions_and_partition_sizes( num_rows: int, num_unique_natural_id: int, group_size: int, - expected_num_unique_natural_ids: List[int], + expected_num_unique_natural_ids: list[int], ) -> None: """Test drop reminder mode.""" dataset = _create_dataset(num_rows, num_unique_natural_id) @@ -226,7 +226,7 @@ def test_no_overlapping_natural_ids( ] # Check for overlaps between partitions - seen_natural_ids: Set[str] = set() + seen_natural_ids: set[str] = set() for partition in partitions: natural_ids_in_partition = set(partition.unique("natural_id")) diff --git a/datasets/flwr_datasets/partitioner/id_to_size_fnc_partitioner.py b/datasets/flwr_datasets/partitioner/id_to_size_fnc_partitioner.py index d7b3b21037d..bd6336eb080 100644 --- a/datasets/flwr_datasets/partitioner/id_to_size_fnc_partitioner.py +++ b/datasets/flwr_datasets/partitioner/id_to_size_fnc_partitioner.py @@ -15,7 +15,7 @@ """IdToSizeFncPartitioner class.""" -from typing import Callable, Dict, List, Union +from typing import Callable, Union import numpy as np @@ -59,8 +59,8 @@ def __init__( self._num_partitions = num_partitions self._partition_id_to_size_fn = partition_id_to_size_fn - self._partition_id_to_size: Dict[int, int] = {} - self._partition_id_to_indices: Dict[int, List[int]] = {} + self._partition_id_to_size: dict[int, int] = {} + self._partition_id_to_indices: dict[int, list[int]] = {} # A flag to perform only a single compute to determine the indices self._partition_id_to_indices_determined = False @@ -91,12 +91,12 @@ def num_partitions(self) -> int: return self._num_partitions @property - def partition_id_to_size(self) -> Dict[int, int]: + def partition_id_to_size(self) -> dict[int, int]: """Node id to the number of samples.""" return self._partition_id_to_size @property - def partition_id_to_indices(self) -> Dict[int, List[int]]: + def partition_id_to_indices(self) -> dict[int, list[int]]: """Node id to the list of indices.""" return self._partition_id_to_indices diff --git a/datasets/flwr_datasets/partitioner/iid_partitioner_test.py b/datasets/flwr_datasets/partitioner/iid_partitioner_test.py index 64c37c4e712..cbdc67be7fa 100644 --- a/datasets/flwr_datasets/partitioner/iid_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/iid_partitioner_test.py @@ -16,7 +16,6 @@ import unittest -from typing import Tuple from parameterized import parameterized @@ -24,7 +23,7 @@ from flwr_datasets.partitioner.iid_partitioner import IidPartitioner -def _dummy_setup(num_partitions: int, num_rows: int) -> Tuple[Dataset, IidPartitioner]: +def _dummy_setup(num_partitions: int, num_rows: int) -> tuple[Dataset, IidPartitioner]: """Create a dummy dataset and partitioner based on given arguments. The partitioner has automatically the dataset assigned to it. diff --git a/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py b/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py index e3e46813dfc..e62b8fdbb21 100644 --- a/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py +++ b/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py @@ -14,7 +14,7 @@ # ============================================================================== """InnerDirichlet partitioner.""" import warnings -from typing import Dict, List, Optional, Union +from typing import Optional, Union import numpy as np @@ -68,9 +68,9 @@ class InnerDirichletPartitioner(Partitioner): # pylint: disable=R0902 def __init__( # pylint: disable=R0913 self, - partition_sizes: Union[List[int], NDArrayInt], + partition_sizes: Union[list[int], NDArrayInt], partition_by: str, - alpha: Union[int, float, List[float], NDArrayFloat], + alpha: Union[int, float, list[float], NDArrayFloat], shuffle: bool = True, seed: Optional[int] = 42, ) -> None: @@ -87,11 +87,11 @@ def __init__( # pylint: disable=R0913 self._initialized_alpha = False self._rng = np.random.default_rng(seed=self._seed) # NumPy random generator # The attributes below are determined during the first call to load_partition - self._unique_classes: Optional[Union[List[int], List[str]]] = None + self._unique_classes: Optional[Union[list[int], list[str]]] = None self._num_unique_classes: Optional[int] = None self._num_partitions = len(self._partition_sizes) - self._partition_id_to_indices: Dict[int, List[int]] = {} + self._partition_id_to_indices: dict[int, list[int]] = {} self._partition_id_to_indices_determined = False def load_partition(self, partition_id: int) -> datasets.Dataset: @@ -130,7 +130,7 @@ def num_partitions(self) -> int: return self._num_partitions def _initialize_alpha_if_needed( - self, alpha: Union[int, float, List[float], NDArrayFloat] + self, alpha: Union[int, float, list[float], NDArrayFloat] ) -> NDArrayFloat: """Convert alpha to the used format in the code a NDArrayFloat. @@ -159,7 +159,7 @@ def _initialize_alpha_if_needed( elif isinstance(alpha, float): assert self._num_unique_classes is not None alpha = np.array([alpha], dtype=float).repeat(self._num_unique_classes) - elif isinstance(alpha, List): + elif isinstance(alpha, list): if len(alpha) != self._num_unique_classes: raise ValueError( "When passing alpha as a List, its length needs needs to be " @@ -304,10 +304,10 @@ def _check_the_sum_of_partition_sizes(self) -> None: def _instantiate_partition_sizes( - partition_sizes: Union[List[int], NDArrayInt] + partition_sizes: Union[list[int], NDArrayInt] ) -> NDArrayInt: """Transform list to the ndarray of ints if needed.""" - if isinstance(partition_sizes, List): + if isinstance(partition_sizes, list): partition_sizes = np.asarray(partition_sizes) elif isinstance(partition_sizes, np.ndarray): pass diff --git a/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner_test.py b/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner_test.py index 86dc8a5df53..8df09d01f91 100644 --- a/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner_test.py @@ -15,7 +15,7 @@ """Test DirichletPartitioner.""" # pylint: disable=W0212 import unittest -from typing import List, Tuple, Union +from typing import Union from datasets import Dataset from flwr_datasets.common.typing import NDArrayFloat, NDArrayInt @@ -27,9 +27,9 @@ def _dummy_setup( num_rows: int, partition_by: str, - partition_sizes: Union[List[int], NDArrayInt], - alpha: Union[float, List[float], NDArrayFloat], -) -> Tuple[Dataset, InnerDirichletPartitioner]: + partition_sizes: Union[list[int], NDArrayInt], + alpha: Union[float, list[float], NDArrayFloat], +) -> tuple[Dataset, InnerDirichletPartitioner]: """Create a dummy dataset and partitioner for testing.""" data = { partition_by: [i % 3 for i in range(num_rows)], diff --git a/datasets/flwr_datasets/partitioner/natural_id_partitioner.py b/datasets/flwr_datasets/partitioner/natural_id_partitioner.py index 5a9af3271cb..64b51855e1f 100644 --- a/datasets/flwr_datasets/partitioner/natural_id_partitioner.py +++ b/datasets/flwr_datasets/partitioner/natural_id_partitioner.py @@ -15,8 +15,6 @@ """Natural id partitioner class that works with Hugging Face Datasets.""" -from typing import Dict - import numpy as np from tqdm import tqdm @@ -62,9 +60,9 @@ def __init__( partition_by: str, ): super().__init__() - self._partition_id_to_natural_id: Dict[int, str] = {} - self._natural_id_to_partition_id: Dict[str, int] = {} - self._partition_id_to_indices: Dict[int, NDArrayInt] = {} + self._partition_id_to_natural_id: dict[int, str] = {} + self._natural_id_to_partition_id: dict[str, int] = {} + self._partition_id_to_indices: dict[int, NDArrayInt] = {} self._partition_by = partition_by def _create_int_partition_id_to_natural_id(self) -> None: @@ -138,7 +136,7 @@ def num_partitions(self) -> int: return len(self._partition_id_to_natural_id) @property - def partition_id_to_natural_id(self) -> Dict[int, str]: + def partition_id_to_natural_id(self) -> dict[int, str]: """Node id to corresponding natural id present. Natural ids are the unique values in `partition_by` column in dataset. @@ -146,7 +144,7 @@ def partition_id_to_natural_id(self) -> Dict[int, str]: return self._partition_id_to_natural_id @partition_id_to_natural_id.setter - def partition_id_to_natural_id(self, value: Dict[int, str]) -> None: + def partition_id_to_natural_id(self, value: dict[int, str]) -> None: raise AttributeError( "Setting the partition_id_to_natural_id dictionary is not allowed." ) diff --git a/datasets/flwr_datasets/partitioner/natural_id_partitioner_test.py b/datasets/flwr_datasets/partitioner/natural_id_partitioner_test.py index b74a044967e..d3147985dca 100644 --- a/datasets/flwr_datasets/partitioner/natural_id_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/natural_id_partitioner_test.py @@ -18,7 +18,6 @@ import itertools import math import unittest -from typing import Tuple from parameterized import parameterized @@ -28,7 +27,7 @@ def _dummy_setup( num_rows: int, n_unique_natural_ids: int -) -> Tuple[Dataset, NaturalIdPartitioner]: +) -> tuple[Dataset, NaturalIdPartitioner]: """Create a dummy dataset and partitioner based on given arguments. The partitioner has automatically the dataset assigned to it. diff --git a/datasets/flwr_datasets/partitioner/pathological_partitioner.py b/datasets/flwr_datasets/partitioner/pathological_partitioner.py index 1ee60d28304..350383f344e 100644 --- a/datasets/flwr_datasets/partitioner/pathological_partitioner.py +++ b/datasets/flwr_datasets/partitioner/pathological_partitioner.py @@ -16,7 +16,7 @@ import warnings -from typing import Any, Dict, List, Literal, Optional +from typing import Any, Literal, Optional import numpy as np @@ -114,13 +114,13 @@ def __init__( self._rng = np.random.default_rng(seed=self._seed) # Utility attributes - self._partition_id_to_indices: Dict[int, List[int]] = {} - self._partition_id_to_unique_labels: Dict[int, List[Any]] = { + self._partition_id_to_indices: dict[int, list[int]] = {} + self._partition_id_to_unique_labels: dict[int, list[Any]] = { pid: [] for pid in range(self._num_partitions) } - self._unique_labels: List[Any] = [] + self._unique_labels: list[Any] = [] # Count in how many partitions the label is used - self._unique_label_to_times_used_counter: Dict[Any, int] = {} + self._unique_label_to_times_used_counter: dict[Any, int] = {} self._partition_id_to_indices_determined = False def load_partition(self, partition_id: int) -> datasets.Dataset: diff --git a/datasets/flwr_datasets/partitioner/pathological_partitioner_test.py b/datasets/flwr_datasets/partitioner/pathological_partitioner_test.py index 151b7e14659..18707a56bd9 100644 --- a/datasets/flwr_datasets/partitioner/pathological_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/pathological_partitioner_test.py @@ -16,7 +16,6 @@ import unittest -from typing import Dict import numpy as np from parameterized import parameterized @@ -79,7 +78,7 @@ def test_correct_num_classes_when_partitioned( num_classes_per_partition=num_classes_per_partition, ) partitioner.dataset = dataset - partitions: Dict[int, Dataset] = { + partitions: dict[int, Dataset] = { pid: partitioner.load_partition(pid) for pid in range(num_partitions) } unique_classes_per_partition = { diff --git a/datasets/flwr_datasets/partitioner/shard_partitioner.py b/datasets/flwr_datasets/partitioner/shard_partitioner.py index 11cffa515da..3001df6dcb6 100644 --- a/datasets/flwr_datasets/partitioner/shard_partitioner.py +++ b/datasets/flwr_datasets/partitioner/shard_partitioner.py @@ -17,7 +17,7 @@ # pylint: disable=R0912, R0914 import math -from typing import Dict, List, Optional +from typing import Optional import numpy as np @@ -165,7 +165,7 @@ def __init__( # pylint: disable=R0913 # Utility attributes self._rng = np.random.default_rng(seed=self._seed) # NumPy random generator - self._partition_id_to_indices: Dict[int, List[int]] = {} + self._partition_id_to_indices: dict[int, list[int]] = {} self._partition_id_to_indices_determined = False def load_partition(self, partition_id: int) -> datasets.Dataset: @@ -299,7 +299,7 @@ def _determine_partition_id_to_indices_if_needed( nid_to_shard_indices = np.split( shard_indices_array, indices_on_which_to_split_shards )[:-1] - partition_id_to_indices: Dict[int, List[int]] = { + partition_id_to_indices: dict[int, list[int]] = { cid: [] for cid in range(self._num_partitions) } # Compute partition_id to sample indices based on the shard indices diff --git a/datasets/flwr_datasets/partitioner/shard_partitioner_test.py b/datasets/flwr_datasets/partitioner/shard_partitioner_test.py index d6fa8b52959..be8edf9d276 100644 --- a/datasets/flwr_datasets/partitioner/shard_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/shard_partitioner_test.py @@ -17,7 +17,7 @@ # pylint: disable=W0212, R0913 import unittest -from typing import Optional, Tuple +from typing import Optional from datasets import Dataset from flwr_datasets.partitioner.shard_partitioner import ShardPartitioner @@ -30,7 +30,7 @@ def _dummy_setup( num_shards_per_partition: Optional[int], shard_size: Optional[int], keep_incomplete_shard: bool = False, -) -> Tuple[Dataset, ShardPartitioner]: +) -> tuple[Dataset, ShardPartitioner]: """Create a dummy dataset for testing.""" data = { partition_by: [i % 3 for i in range(num_rows)], diff --git a/datasets/flwr_datasets/preprocessor/divider_test.py b/datasets/flwr_datasets/preprocessor/divider_test.py index ed282fbc18b..bb92d72c1c4 100644 --- a/datasets/flwr_datasets/preprocessor/divider_test.py +++ b/datasets/flwr_datasets/preprocessor/divider_test.py @@ -15,7 +15,7 @@ """Divider tests.""" import unittest -from typing import Dict, Union +from typing import Union from parameterized import parameterized_class @@ -84,14 +84,14 @@ class TestDivider(unittest.TestCase): """Divider tests.""" divide_config: Union[ - Dict[str, float], - Dict[str, int], - Dict[str, Dict[str, float]], - Dict[str, Dict[str, int]], + dict[str, float], + dict[str, int], + dict[str, dict[str, float]], + dict[str, dict[str, int]], ] divide_split: str drop_remaining_splits: bool - split_name_to_size: Dict[str, int] + split_name_to_size: dict[str, int] def setUp(self) -> None: """Set up the dataset with 3 splits for tests.""" diff --git a/datasets/flwr_datasets/preprocessor/merger.py b/datasets/flwr_datasets/preprocessor/merger.py index 2b76dbbafe4..e47993dd686 100644 --- a/datasets/flwr_datasets/preprocessor/merger.py +++ b/datasets/flwr_datasets/preprocessor/merger.py @@ -18,7 +18,6 @@ import collections import warnings from functools import reduce -from typing import Dict, List, Tuple import datasets from datasets import Dataset, DatasetDict @@ -56,9 +55,9 @@ class Merger: def __init__( self, - merge_config: Dict[str, Tuple[str, ...]], + merge_config: dict[str, tuple[str, ...]], ) -> None: - self._merge_config: Dict[str, Tuple[str, ...]] = merge_config + self._merge_config: dict[str, tuple[str, ...]] = merge_config self._check_duplicate_merge_splits() def __call__(self, dataset: DatasetDict) -> DatasetDict: @@ -70,7 +69,7 @@ def resplit(self, dataset: DatasetDict) -> DatasetDict: """Resplit the dataset according to the `merge_config`.""" resplit_dataset = {} for divide_to, divided_from__list in self._merge_config.items(): - datasets_from_list: List[Dataset] = [] + datasets_from_list: list[Dataset] = [] for divide_from in divided_from__list: datasets_from_list.append(dataset[divide_from]) if len(datasets_from_list) > 1: diff --git a/datasets/flwr_datasets/preprocessor/merger_test.py b/datasets/flwr_datasets/preprocessor/merger_test.py index 137b0dd1a66..0dd534229eb 100644 --- a/datasets/flwr_datasets/preprocessor/merger_test.py +++ b/datasets/flwr_datasets/preprocessor/merger_test.py @@ -16,7 +16,6 @@ import unittest -from typing import Dict, Tuple import pytest @@ -39,28 +38,28 @@ def setUp(self) -> None: def test_resplitting_train_size(self) -> None: """Test if resplitting for just renaming keeps the lengths correct.""" - strategy: Dict[str, Tuple[str, ...]] = {"new_train": ("train",)} + strategy: dict[str, tuple[str, ...]] = {"new_train": ("train",)} merger = Merger(strategy) new_dataset = merger(self.dataset_dict) self.assertEqual(len(new_dataset["new_train"]), 3) def test_resplitting_valid_size(self) -> None: """Test if resplitting for just renaming keeps the lengths correct.""" - strategy: Dict[str, Tuple[str, ...]] = {"new_valid": ("valid",)} + strategy: dict[str, tuple[str, ...]] = {"new_valid": ("valid",)} merger = Merger(strategy) new_dataset = merger(self.dataset_dict) self.assertEqual(len(new_dataset["new_valid"]), 2) def test_resplitting_test_size(self) -> None: """Test if resplitting for just renaming keeps the lengths correct.""" - strategy: Dict[str, Tuple[str, ...]] = {"new_test": ("test",)} + strategy: dict[str, tuple[str, ...]] = {"new_test": ("test",)} merger = Merger(strategy) new_dataset = merger(self.dataset_dict) self.assertEqual(len(new_dataset["new_test"]), 1) def test_resplitting_train_the_same(self) -> None: """Test if resplitting for just renaming keeps the dataset the same.""" - strategy: Dict[str, Tuple[str, ...]] = {"new_train": ("train",)} + strategy: dict[str, tuple[str, ...]] = {"new_train": ("train",)} merger = Merger(strategy) new_dataset = merger(self.dataset_dict) self.assertTrue( @@ -69,7 +68,7 @@ def test_resplitting_train_the_same(self) -> None: def test_combined_train_valid_size(self) -> None: """Test if the resplitting that combines the datasets has correct size.""" - strategy: Dict[str, Tuple[str, ...]] = { + strategy: dict[str, tuple[str, ...]] = { "train_valid_combined": ("train", "valid") } merger = Merger(strategy) @@ -78,7 +77,7 @@ def test_combined_train_valid_size(self) -> None: def test_resplitting_test_with_combined_strategy_size(self) -> None: """Test if the resplitting that combines the datasets has correct size.""" - strategy: Dict[str, Tuple[str, ...]] = { + strategy: dict[str, tuple[str, ...]] = { "train_valid_combined": ("train", "valid"), "test": ("test",), } @@ -88,7 +87,7 @@ def test_resplitting_test_with_combined_strategy_size(self) -> None: def test_invalid_resplit_strategy_exception_message(self) -> None: """Test if the resplitting raises error when non-existing split is given.""" - strategy: Dict[str, Tuple[str, ...]] = { + strategy: dict[str, tuple[str, ...]] = { "new_train": ("invalid_split",), "new_test": ("test",), } @@ -100,7 +99,7 @@ def test_invalid_resplit_strategy_exception_message(self) -> None: def test_nonexistent_split_in_strategy(self) -> None: """Test if the exception is raised when the nonexistent split name is given.""" - strategy: Dict[str, Tuple[str, ...]] = {"new_split": ("nonexistent_split",)} + strategy: dict[str, tuple[str, ...]] = {"new_split": ("nonexistent_split",)} merger = Merger(strategy) with self.assertRaisesRegex( ValueError, "The given dataset key 'nonexistent_split' is not present" @@ -109,7 +108,7 @@ def test_nonexistent_split_in_strategy(self) -> None: def test_duplicate_merge_split_name(self) -> None: """Test that the new split names are not the same.""" - strategy: Dict[str, Tuple[str, ...]] = { + strategy: dict[str, tuple[str, ...]] = { "new_train": ("train", "valid"), "test": ("train",), } @@ -119,7 +118,7 @@ def test_duplicate_merge_split_name(self) -> None: def test_empty_dataset_dict(self) -> None: """Test that the error is raised when the empty DatasetDict is given.""" empty_dataset = DatasetDict({}) - strategy: Dict[str, Tuple[str, ...]] = {"new_train": ("train",)} + strategy: dict[str, tuple[str, ...]] = {"new_train": ("train",)} merger = Merger(strategy) with self.assertRaisesRegex( ValueError, "The given dataset key 'train' is not present" diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py index 388865a26cf..1657c2a0ebd 100644 --- a/datasets/flwr_datasets/utils.py +++ b/datasets/flwr_datasets/utils.py @@ -16,7 +16,7 @@ import warnings -from typing import Dict, List, Optional, Tuple, Union, cast +from typing import Optional, Union, cast from datasets import Dataset, DatasetDict, concatenate_datasets from flwr_datasets.partitioner import IidPartitioner, Partitioner @@ -57,8 +57,8 @@ def _instantiate_partitioners( - partitioners: Dict[str, Union[Partitioner, int]] -) -> Dict[str, Partitioner]: + partitioners: dict[str, Union[Partitioner, int]] +) -> dict[str, Partitioner]: """Transform the partitioners from the initial format to instantiated objects. Parameters @@ -71,8 +71,8 @@ def _instantiate_partitioners( partitioners : Dict[str, Partitioner] Partitioners specified as split to Partitioner object. """ - instantiated_partitioners: Dict[str, Partitioner] = {} - if isinstance(partitioners, Dict): + instantiated_partitioners: dict[str, Partitioner] = {} + if isinstance(partitioners, dict): for split, partitioner in partitioners.items(): if isinstance(partitioner, Partitioner): instantiated_partitioners[split] = partitioner @@ -95,10 +95,10 @@ def _instantiate_partitioners( def _instantiate_merger_if_needed( - merger: Optional[Union[Preprocessor, Dict[str, Tuple[str, ...]]]] + merger: Optional[Union[Preprocessor, dict[str, tuple[str, ...]]]] ) -> Optional[Preprocessor]: """Instantiate `Merger` if preprocessor is merge_config.""" - if merger and isinstance(merger, Dict): + if merger and isinstance(merger, dict): merger = Merger(merge_config=merger) return cast(Optional[Preprocessor], merger) @@ -113,8 +113,8 @@ def _check_if_dataset_tested(dataset: str) -> None: def divide_dataset( - dataset: Dataset, division: Union[List[float], Tuple[float, ...], Dict[str, float]] -) -> Union[List[Dataset], DatasetDict]: + dataset: Dataset, division: Union[list[float], tuple[float, ...], dict[str, float]] +) -> Union[list[Dataset], DatasetDict]: """Divide the dataset according to the `division`. The division support varying number of splits, which you can name. The splits are @@ -162,12 +162,12 @@ def divide_dataset( dataset_length = len(dataset) ranges = _create_division_indices_ranges(dataset_length, division) if isinstance(division, (list, tuple)): - split_partition: List[Dataset] = [] + split_partition: list[Dataset] = [] for single_range in ranges: split_partition.append(dataset.select(single_range)) return split_partition if isinstance(division, dict): - split_partition_dict: Dict[str, Dataset] = {} + split_partition_dict: dict[str, Dataset] = {} for split_name, single_range in zip(division.keys(), ranges): split_partition_dict[split_name] = dataset.select(single_range) return DatasetDict(split_partition_dict) @@ -179,8 +179,8 @@ def divide_dataset( def _create_division_indices_ranges( dataset_length: int, - division: Union[List[float], Tuple[float, ...], Dict[str, float]], -) -> List[range]: + division: Union[list[float], tuple[float, ...], dict[str, float]], +) -> list[range]: ranges = [] if isinstance(division, (list, tuple)): start_idx = 0 @@ -206,7 +206,7 @@ def _create_division_indices_ranges( def _check_division_config_types_correctness( - division: Union[List[float], Tuple[float, ...], Dict[str, float]] + division: Union[list[float], tuple[float, ...], dict[str, float]] ) -> None: if isinstance(division, (list, tuple)): if not all(isinstance(x, float) for x in division): @@ -225,7 +225,7 @@ def _check_division_config_types_correctness( def _check_division_config_values_correctness( - division: Union[List[float], Tuple[float, ...], Dict[str, float]] + division: Union[list[float], tuple[float, ...], dict[str, float]] ) -> None: if isinstance(division, (list, tuple)): if not all(0 < x <= 1 for x in division): @@ -263,7 +263,7 @@ def _check_division_config_values_correctness( def _check_division_config_correctness( - division: Union[List[float], Tuple[float, ...], Dict[str, float]] + division: Union[list[float], tuple[float, ...], dict[str, float]] ) -> None: _check_division_config_types_correctness(division) _check_division_config_values_correctness(division) @@ -271,7 +271,7 @@ def _check_division_config_correctness( def concatenate_divisions( partitioner: Partitioner, - partition_division: Union[List[float], Tuple[float, ...], Dict[str, float]], + partition_division: Union[list[float], tuple[float, ...], dict[str, float]], division_id: Union[int, str], ) -> Dataset: """Create a dataset by concatenation of divisions from all partitions. @@ -342,7 +342,7 @@ def concatenate_divisions( ) partition = divide_dataset(partition, partition_division) division = partition[division_id] - elif isinstance(partition_division, Dict): + elif isinstance(partition_division, dict): partition = divide_dataset(partition, partition_division) division = partition[division_id] else: diff --git a/datasets/flwr_datasets/utils_test.py b/datasets/flwr_datasets/utils_test.py index 4add9f88eeb..3c94570471a 100644 --- a/datasets/flwr_datasets/utils_test.py +++ b/datasets/flwr_datasets/utils_test.py @@ -14,7 +14,7 @@ # ============================================================================== """Utils tests.""" import unittest -from typing import Dict, List, Tuple, Union +from typing import Union from parameterized import parameterized_class @@ -62,8 +62,8 @@ class UtilsTests(unittest.TestCase): """Utils for tests.""" - partition_division: Union[List[float], Tuple[float, ...], Dict[str, float]] - sizes: Tuple[int] + partition_division: Union[list[float], tuple[float, ...], dict[str, float]] + sizes: tuple[int] division_id: Union[int, str] expected_concatenation_size: int diff --git a/datasets/flwr_datasets/visualization/bar_plot.py b/datasets/flwr_datasets/visualization/bar_plot.py index 352c99a572f..2b09fb189c7 100644 --- a/datasets/flwr_datasets/visualization/bar_plot.py +++ b/datasets/flwr_datasets/visualization/bar_plot.py @@ -15,7 +15,7 @@ """Label distribution bar plotting.""" -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Optional, Union import numpy as np import pandas as pd @@ -28,15 +28,15 @@ def _plot_bar( dataframe: pd.DataFrame, axis: Optional[Axes], - figsize: Optional[Tuple[float, float]], + figsize: Optional[tuple[float, float]], title: str, colormap: Optional[Union[str, mcolors.Colormap]], partition_id_axis: str, size_unit: str, legend: bool, legend_title: Optional[str], - plot_kwargs: Optional[Dict[str, Any]], - legend_kwargs: Optional[Dict[str, Any]], + plot_kwargs: Optional[dict[str, Any]], + legend_kwargs: Optional[dict[str, Any]], ) -> Axes: if axis is None: if figsize is None: @@ -123,7 +123,7 @@ def _plot_bar( def _initialize_figsize( partition_id_axis: str, num_partitions: int, -) -> Tuple[float, float]: +) -> tuple[float, float]: figsize = (0.0, 0.0) if partition_id_axis == "x": figsize = (6.4, 4.8) @@ -132,7 +132,7 @@ def _initialize_figsize( return figsize -def _initialize_xy_labels(size_unit: str, partition_id_axis: str) -> Tuple[str, str]: +def _initialize_xy_labels(size_unit: str, partition_id_axis: str) -> tuple[str, str]: xlabel = "Partition ID" ylabel = "Count" if size_unit == "absolute" else "Percent %" diff --git a/datasets/flwr_datasets/visualization/comparison_label_distribution.py b/datasets/flwr_datasets/visualization/comparison_label_distribution.py index 554f6d78d59..8a15452fb86 100644 --- a/datasets/flwr_datasets/visualization/comparison_label_distribution.py +++ b/datasets/flwr_datasets/visualization/comparison_label_distribution.py @@ -15,7 +15,7 @@ """Comparison of label distribution plotting.""" -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Optional, Union import matplotlib.colors as mcolors import matplotlib.pyplot as plt @@ -31,22 +31,22 @@ # pylint: disable=too-many-arguments,too-many-locals def plot_comparison_label_distribution( - partitioner_list: List[Partitioner], - label_name: Union[str, List[str]], + partitioner_list: list[Partitioner], + label_name: Union[str, list[str]], plot_type: str = "bar", size_unit: str = "percent", max_num_partitions: Optional[Union[int]] = 30, partition_id_axis: str = "y", - figsize: Optional[Tuple[float, float]] = None, + figsize: Optional[tuple[float, float]] = None, subtitle: str = "Comparison of Per Partition Label Distribution", - titles: Optional[List[str]] = None, + titles: Optional[list[str]] = None, cmap: Optional[Union[str, mcolors.Colormap]] = None, legend: bool = False, legend_title: Optional[str] = None, verbose_labels: bool = True, - plot_kwargs_list: Optional[List[Optional[Dict[str, Any]]]] = None, - legend_kwargs: Optional[Dict[str, Any]] = None, -) -> Tuple[Figure, List[Axes], List[pd.DataFrame]]: + plot_kwargs_list: Optional[list[Optional[dict[str, Any]]]] = None, + legend_kwargs: Optional[dict[str, Any]] = None, +) -> tuple[Figure, list[Axes], list[pd.DataFrame]]: """Compare the label distribution across multiple partitioners. Parameters @@ -143,7 +143,7 @@ def plot_comparison_label_distribution( num_partitioners = len(partitioner_list) if isinstance(label_name, str): label_name = [label_name] * num_partitioners - elif isinstance(label_name, List): + elif isinstance(label_name, list): pass else: raise TypeError( @@ -215,8 +215,8 @@ def plot_comparison_label_distribution( def _initialize_comparison_figsize( - figsize: Optional[Tuple[float, float]], num_partitioners: int -) -> Tuple[float, float]: + figsize: Optional[tuple[float, float]], num_partitioners: int +) -> tuple[float, float]: if figsize is not None: return figsize x_value = 4 + (num_partitioners - 1) * 2 @@ -227,7 +227,7 @@ def _initialize_comparison_figsize( def _initialize_comparison_xy_labels( plot_type: str, partition_id_axis: str -) -> Tuple[str, str]: +) -> tuple[str, str]: if plot_type == "bar": xlabel = "Partition ID" ylabel = "Class distribution" diff --git a/datasets/flwr_datasets/visualization/heatmap_plot.py b/datasets/flwr_datasets/visualization/heatmap_plot.py index 3c87de7693a..b5a0e640eb1 100644 --- a/datasets/flwr_datasets/visualization/heatmap_plot.py +++ b/datasets/flwr_datasets/visualization/heatmap_plot.py @@ -15,7 +15,7 @@ """Label distribution heatmap plotting.""" -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Optional, Union import numpy as np import pandas as pd @@ -29,15 +29,15 @@ def _plot_heatmap( dataframe: pd.DataFrame, axis: Optional[Axes], - figsize: Optional[Tuple[float, float]], + figsize: Optional[tuple[float, float]], title: str, colormap: Optional[Union[str, mcolors.Colormap]], partition_id_axis: str, size_unit: str, legend: bool, legend_title: Optional[str], - plot_kwargs: Optional[Dict[str, Any]], - legend_kwargs: Optional[Dict[str, Any]], + plot_kwargs: Optional[dict[str, Any]], + legend_kwargs: Optional[dict[str, Any]], ) -> Axes: if axis is None: if figsize is None: @@ -90,7 +90,7 @@ def _initialize_figsize( partition_id_axis: str, num_partitions: int, num_labels: int, -) -> Tuple[float, float]: +) -> tuple[float, float]: figsize = (0.0, 0.0) if partition_id_axis == "x": figsize = (3 * np.sqrt(num_partitions), np.sqrt(num_labels)) diff --git a/datasets/flwr_datasets/visualization/label_distribution.py b/datasets/flwr_datasets/visualization/label_distribution.py index 0c47bd204a1..b1183c225b8 100644 --- a/datasets/flwr_datasets/visualization/label_distribution.py +++ b/datasets/flwr_datasets/visualization/label_distribution.py @@ -15,7 +15,7 @@ """Label distribution plotting.""" -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Optional, Union import matplotlib.colors as mcolors import pandas as pd @@ -40,15 +40,15 @@ def plot_label_distributions( max_num_partitions: Optional[int] = None, partition_id_axis: str = "x", axis: Optional[Axes] = None, - figsize: Optional[Tuple[float, float]] = None, + figsize: Optional[tuple[float, float]] = None, title: str = "Per Partition Label Distribution", cmap: Optional[Union[str, mcolors.Colormap]] = None, legend: bool = False, legend_title: Optional[str] = None, verbose_labels: bool = True, - plot_kwargs: Optional[Dict[str, Any]] = None, - legend_kwargs: Optional[Dict[str, Any]] = None, -) -> Tuple[Figure, Axes, pd.DataFrame]: + plot_kwargs: Optional[dict[str, Any]] = None, + legend_kwargs: Optional[dict[str, Any]] = None, +) -> tuple[Figure, Axes, pd.DataFrame]: """Plot the label distribution of the partitions. Parameters diff --git a/datasets/pyproject.toml b/datasets/pyproject.toml index 46ecb56233d..73523af2039 100644 --- a/datasets/pyproject.toml +++ b/datasets/pyproject.toml @@ -31,7 +31,6 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -92,7 +91,7 @@ known_first_party = ["flwr_datasets"] [tool.black] line-length = 88 -target-version = ["py38", "py39", "py310", "py311"] +target-version = ["py39", "py310", "py311"] [tool.pylint."MESSAGES CONTROL"] disable = "duplicate-code,too-few-public-methods,useless-import-alias" @@ -130,7 +129,7 @@ wrap-summaries = 88 wrap-descriptions = 88 [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 88 select = ["D", "E", "F", "W", "B", "ISC", "C4", "UP"] fixable = ["D", "E", "F", "W", "B", "ISC", "C4", "UP"]