Skip to content

Commit

Permalink
break(datasets) Drop support for Python 3.8 (#4213)
Browse files Browse the repository at this point in the history
  • Loading branch information
adam-narozniak authored Sep 16, 2024
1 parent d94b22d commit 13decda
Show file tree
Hide file tree
Showing 34 changed files with 170 additions and 179 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/datasets-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
- name: Bootstrap
uses: ./.github/actions/bootstrap
with:
python-version: 3.8
python-version: 3.9
- name: Install dependencies
run: python -m poetry install
- name: Run tests
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
# In case of a mismatch, the job has to download Python to install it.
# Note: Due to a bug in actions/setup-python, we have to put "3.10" in
# quotes as it will otherwise assume "3.1"
python: [3.8, 3.9, '3.10', '3.11']
python: ['3.9', '3.10', '3.11']

name: Python ${{ matrix.python }}

Expand Down
10 changes: 5 additions & 5 deletions datasets/flwr_datasets/common/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from concurrent.futures import Future, ThreadPoolExecutor
from enum import Enum, auto
from pathlib import Path
from typing import Any, Dict, List, Optional, Union, cast
from typing import Any, Optional, Union, cast

from flwr_datasets.common.version import package_name, package_version

Expand Down Expand Up @@ -114,7 +114,7 @@ class EventType(str, Enum):
# The type signature is not compatible with mypy, pylint and flake8
# so each of those needs to be disabled for this line.
# pylint: disable-next=no-self-argument,arguments-differ,line-too-long
def _generate_next_value_(name: str, start: int, count: int, last_values: List[Any]) -> Any: # type: ignore # noqa: E501
def _generate_next_value_(name: str, start: int, count: int, last_values: list[Any]) -> Any: # type: ignore # noqa: E501
return name

PING = auto()
Expand All @@ -127,7 +127,7 @@ def _generate_next_value_(name: str, start: int, count: int, last_values: List[A

# Use the ThreadPoolExecutor with max_workers=1 to have a queue
# and also ensure that telemetry calls are not blocking.
state: Dict[str, Union[Optional[str], Optional[ThreadPoolExecutor]]] = {
state: dict[str, Union[Optional[str], Optional[ThreadPoolExecutor]]] = {
# Will be assigned ThreadPoolExecutor(max_workers=1)
# in event() the first time it's required
"executor": None,
Expand All @@ -143,7 +143,7 @@ def _generate_next_value_(name: str, start: int, count: int, last_values: List[A
# pylint: disable-next=unsubscriptable-object
def event(
event_type: EventType,
event_details: Optional[Dict[str, Any]] = None,
event_details: Optional[dict[str, Any]] = None,
) -> Future: # type: ignore
"""Submit create_event to ThreadPoolExecutor to avoid blocking."""
if state["executor"] is None:
Expand All @@ -155,7 +155,7 @@ def event(
return result


def create_event(event_type: EventType, event_details: Optional[Dict[str, Any]]) -> str:
def create_event(event_type: EventType, event_details: Optional[dict[str, Any]]) -> str:
"""Create telemetry event."""
if state["source"] is None:
state["source"] = _get_source_id()
Expand Down
4 changes: 2 additions & 2 deletions datasets/flwr_datasets/common/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
"""Flower Datasets type definitions."""


from typing import Any, List
from typing import Any

import numpy as np
import numpy.typing as npt

NDArray = npt.NDArray[Any]
NDArrayInt = npt.NDArray[np.int_]
NDArrayFloat = npt.NDArray[np.float_]
NDArrays = List[NDArray]
NDArrays = list[NDArray]
5 changes: 2 additions & 3 deletions datasets/flwr_datasets/common/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,14 @@


import importlib.metadata as importlib_metadata
from typing import Tuple


def _check_package(name: str) -> Tuple[str, str]:
def _check_package(name: str) -> tuple[str, str]:
version: str = importlib_metadata.version(name)
return name, version


def _version() -> Tuple[str, str]:
def _version() -> tuple[str, str]:
"""Read and return Flower Dataset package name and version.
Returns
Expand Down
8 changes: 4 additions & 4 deletions datasets/flwr_datasets/federated_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


import unittest
from typing import Dict, Union
from typing import Union
from unittest.mock import Mock, patch

import numpy as np
Expand Down Expand Up @@ -385,7 +385,7 @@ def test_dict_of_partitioners_passes_partitioners(self) -> None:
"""Test if partitioners are passed directly (no recreation)."""
num_train_partitions = 100
num_test_partitions = 100
partitioners: Dict[str, Union[Partitioner, int]] = {
partitioners: dict[str, Union[Partitioner, int]] = {
"train": IidPartitioner(num_partitions=num_train_partitions),
"test": IidPartitioner(num_partitions=num_test_partitions),
}
Expand Down Expand Up @@ -419,7 +419,7 @@ def test_mixed_type_partitioners_passes_instantiated_partitioners(self) -> None:
"""Test if an instantiated partitioner is passed directly."""
num_train_partitions = 100
num_test_partitions = 100
partitioners: Dict[str, Union[Partitioner, int]] = {
partitioners: dict[str, Union[Partitioner, int]] = {
"train": IidPartitioner(num_partitions=num_train_partitions),
"test": num_test_partitions,
}
Expand All @@ -433,7 +433,7 @@ def test_mixed_type_partitioners_creates_from_int(self) -> None:
"""Test if an IidPartitioner partitioner is created."""
num_train_partitions = 100
num_test_partitions = 100
partitioners: Dict[str, Union[Partitioner, int]] = {
partitioners: dict[str, Union[Partitioner, int]] = {
"train": IidPartitioner(num_partitions=num_train_partitions),
"test": num_test_partitions,
}
Expand Down
6 changes: 3 additions & 3 deletions datasets/flwr_datasets/metrics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


import warnings
from typing import List, Optional, Union
from typing import Optional, Union

import pandas as pd

Expand Down Expand Up @@ -206,7 +206,7 @@ def compute_frequencies(


def _compute_counts(
labels: Union[List[int], List[str]], unique_labels: Union[List[int], List[str]]
labels: Union[list[int], list[str]], unique_labels: Union[list[int], list[str]]
) -> pd.Series:
"""Compute the count of labels when taking into account all possible labels.
Expand Down Expand Up @@ -237,7 +237,7 @@ def _compute_counts(


def _compute_frequencies(
labels: Union[List[int], List[str]], unique_labels: Union[List[int], List[str]]
labels: Union[list[int], list[str]], unique_labels: Union[list[int], list[str]]
) -> pd.Series:
"""Compute the distribution of labels when taking into account all possible labels.
Expand Down
38 changes: 19 additions & 19 deletions datasets/flwr_datasets/mock_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import random
import string
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional, Set, Tuple, Union
from typing import Any, Optional, Union

import numpy as np
from PIL import Image
Expand All @@ -30,7 +30,7 @@

def _generate_artificial_strings(
num_rows: int, num_unique: int, string_length: int, seed: int = 42
) -> List[str]:
) -> list[str]:
"""Create list of strings for categories or labels mocking.
Note to keep the seed the same if you reuse this function for in creation of the
Expand All @@ -53,7 +53,7 @@ def _generate_artificial_strings(
List of generated strings.
"""
random.seed(seed)
unique_strings: Set[str] = set()
unique_strings: set[str] = set()
while len(unique_strings) < num_unique:
random_str = "".join(
random.choices(string.ascii_letters + string.digits, k=string_length)
Expand All @@ -68,7 +68,7 @@ def _generate_artificial_strings(
return artificial_column


def _generate_artificial_categories(num_rows: int, choices: List[Any]) -> List[str]:
def _generate_artificial_categories(num_rows: int, choices: list[Any]) -> list[str]:
"""Create list of strings from given `choices` list."""
artificial_column = choices.copy()
remaining_to_allocate = num_rows - len(choices)
Expand All @@ -82,7 +82,7 @@ def _generate_random_word(length: int) -> str:
return "".join(random.choices(string.ascii_letters, k=length))


def _generate_random_text_column(num_rows: int, length: int) -> List[str]:
def _generate_random_text_column(num_rows: int, length: int) -> list[str]:
"""Generate a list of random text of specified length."""
text_col = []
for _ in range(num_rows):
Expand All @@ -98,7 +98,7 @@ def _generate_random_sentence(
) -> str:
"""Generate a random sentence with words of random lengths."""
sentence_length = random.randint(min_sentence_length, max_sentence_length)
sentence: List[str] = []
sentence: list[str] = []
while len(" ".join(sentence)) < sentence_length:
word_length = random.randint(min_word_length, max_word_length)
word = _generate_random_word(word_length)
Expand All @@ -112,7 +112,7 @@ def _generate_random_sentences(
max_word_length: int,
min_sentence_length: int,
max_sentence_length: int,
) -> List[str]:
) -> list[str]:
"""Generate a list of random sentences."""
text_col = [
_generate_random_sentence(
Expand All @@ -123,7 +123,7 @@ def _generate_random_sentences(
return text_col


def _make_num_rows_none(column: List[Any], num_none: int) -> List[Any]:
def _make_num_rows_none(column: list[Any], num_none: int) -> list[Any]:
"""Assign none num_none times to the given list."""
column_copy = column.copy()
none_positions = random.sample(range(len(column_copy)), num_none)
Expand Down Expand Up @@ -154,29 +154,29 @@ def _generate_random_date_column(
end_date: datetime,
date_format: str = "%a %b %d %H:%M:%S %Y",
as_string: bool = True,
) -> List[Union[str, datetime]]:
) -> list[Union[str, datetime]]:
"""Generate a list of random dates."""
return [
_generate_random_date(start_date, end_date, date_format, as_string)
for _ in range(num_rows)
]


def _generate_random_int_column(num_rows: int, min_int: int, max_int: int) -> List[int]:
def _generate_random_int_column(num_rows: int, min_int: int, max_int: int) -> list[int]:
"""Generate a list of ints."""
return [random.randint(min_int, max_int) for _ in range(num_rows)]


def _generate_random_bool_column(num_rows: int) -> List[bool]:
def _generate_random_bool_column(num_rows: int) -> list[bool]:
"""Generate a list of bools."""
return [random.choice([True, False]) for _ in range(num_rows)]


def _generate_random_image_column(
num_rows: int,
image_size: Union[Tuple[int, int], Tuple[int, int, int]],
image_size: Union[tuple[int, int], tuple[int, int, int]],
simulate_type: str,
) -> List[Any]:
) -> list[Any]:
"""Simulate the images with the format that is found in HF Hub.
Directly using `Image.fromarray` does not work because it creates `PIL.Image.Image`.
Expand Down Expand Up @@ -207,7 +207,7 @@ def generate_random_audio_column(
num_rows: int,
sampling_rate: int,
length_in_samples: int,
) -> List[Dict[str, Any]]:
) -> list[dict[str, Any]]:
"""Simulate the audio column.
Audio column in the datset is comprised from an array or floats, sample_rate and a
Expand Down Expand Up @@ -365,8 +365,8 @@ def _mock_speach_commands(num_rows: int) -> Dataset:

def _load_mocked_dataset(
dataset_name: str,
num_rows: List[int],
split_names: List[str],
num_rows: list[int],
split_names: list[str],
subset: str = "",
) -> DatasetDict:
dataset_dict = {}
Expand All @@ -380,7 +380,7 @@ def _load_mocked_dataset(
def _load_mocked_dataset_by_partial_download(
dataset_name: str,
split_name: str,
skip_take_list: List[Tuple[int, int]],
skip_take_list: list[tuple[int, int]],
subset_name: Optional[str] = None,
) -> Dataset:
"""Download a partial dataset.
Expand Down Expand Up @@ -423,8 +423,8 @@ def _load_mocked_dataset_by_partial_download(

def _load_mocked_dataset_dict_by_partial_download(
dataset_name: str,
split_names: List[str],
skip_take_lists: List[List[Tuple[int, int]]],
split_names: list[str],
skip_take_lists: list[list[tuple[int, int]]],
subset_name: Optional[str] = None,
) -> DatasetDict:
"""Like _load_mocked_dataset_by_partial_download but for many splits."""
Expand Down
14 changes: 7 additions & 7 deletions datasets/flwr_datasets/partitioner/dirichlet_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


import warnings
from typing import Dict, List, Optional, Union
from typing import Optional, Union

import numpy as np

Expand Down Expand Up @@ -89,7 +89,7 @@ def __init__( # pylint: disable=R0913
self,
num_partitions: int,
partition_by: str,
alpha: Union[int, float, List[float], NDArrayFloat],
alpha: Union[int, float, list[float], NDArrayFloat],
min_partition_size: int = 10,
self_balancing: bool = False,
shuffle: bool = True,
Expand All @@ -110,8 +110,8 @@ def __init__( # pylint: disable=R0913
# Utility attributes
# The attributes below are determined during the first call to load_partition
self._avg_num_of_samples_per_partition: Optional[float] = None
self._unique_classes: Optional[Union[List[int], List[str]]] = None
self._partition_id_to_indices: Dict[int, List[int]] = {}
self._unique_classes: Optional[Union[list[int], list[str]]] = None
self._partition_id_to_indices: dict[int, list[int]] = {}
self._partition_id_to_indices_determined = False

def load_partition(self, partition_id: int) -> datasets.Dataset:
Expand Down Expand Up @@ -142,7 +142,7 @@ def num_partitions(self) -> int:
return self._num_partitions

def _initialize_alpha(
self, alpha: Union[int, float, List[float], NDArrayFloat]
self, alpha: Union[int, float, list[float], NDArrayFloat]
) -> NDArrayFloat:
"""Convert alpha to the used format in the code a NDArrayFloat.
Expand All @@ -164,7 +164,7 @@ def _initialize_alpha(
alpha = np.array([float(alpha)], dtype=float).repeat(self._num_partitions)
elif isinstance(alpha, float):
alpha = np.array([alpha], dtype=float).repeat(self._num_partitions)
elif isinstance(alpha, List):
elif isinstance(alpha, list):
if len(alpha) != self._num_partitions:
raise ValueError(
"If passing alpha as a List, it needs to be of length of equal to "
Expand Down Expand Up @@ -217,7 +217,7 @@ def _determine_partition_id_to_indices_if_needed(
sampling_try = 0
while True:
# Prepare data structure to store indices assigned to partition ids
partition_id_to_indices: Dict[int, List[int]] = {}
partition_id_to_indices: dict[int, list[int]] = {}
for nid in range(self._num_partitions):
partition_id_to_indices[nid] = []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

# pylint: disable=W0212
import unittest
from typing import Tuple, Union
from typing import Union

import numpy as np
from numpy.typing import NDArray
Expand All @@ -33,7 +33,7 @@ def _dummy_setup(
num_rows: int,
partition_by: str,
self_balancing: bool = True,
) -> Tuple[Dataset, DirichletPartitioner]:
) -> tuple[Dataset, DirichletPartitioner]:
"""Create a dummy dataset and partitioner for testing."""
data = {
partition_by: [i % 3 for i in range(num_rows)],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


from collections import Counter
from typing import Dict, List, Optional, Union
from typing import Optional, Union

import numpy as np

Expand Down Expand Up @@ -182,7 +182,7 @@ def __init__( # pylint: disable=R0913
self._num_unique_labels: int = 0
self._num_columns: int = 0
self._partition_id_to_indices_determined = False
self._partition_id_to_indices: Dict[int, List[int]] = {}
self._partition_id_to_indices: dict[int, list[int]] = {}

def load_partition(self, partition_id: int) -> datasets.Dataset:
"""Load a partition based on the partition index.
Expand Down
Loading

0 comments on commit 13decda

Please sign in to comment.