diff --git a/.github/workflows/kedro-datasets.yml b/.github/workflows/kedro-datasets.yml index 5dfeed6b6..04927cd6f 100644 --- a/.github/workflows/kedro-datasets.yml +++ b/.github/workflows/kedro-datasets.yml @@ -22,7 +22,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest, windows-latest ] - python-version: [ "3.8", "3.9", "3.10", "3.11" ] + python-version: [ "3.9", "3.10", "3.11" ] uses: ./.github/workflows/unit-tests.yml with: plugin: kedro-datasets @@ -41,15 +41,15 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v3 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v3 with: - python-version: "3.8" + python-version: "3.9" - name: Cache python packages uses: actions/cache@v3 with: path: ~/.cache/pip - key: kedro-datasets-ubuntu-latest-python-"3.8" + key: kedro-datasets-ubuntu-latest-python-"3.9" restore-keys: kedro-datasets - name: Install dependencies run: | diff --git a/kedro-datasets/.readthedocs.yaml b/kedro-datasets/.readthedocs.yaml index 0575bcf1c..ad0ac3665 100644 --- a/kedro-datasets/.readthedocs.yaml +++ b/kedro-datasets/.readthedocs.yaml @@ -8,7 +8,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.8" + python: "3.9" jobs: pre_install: # pip==23.2 breaks pip-tools<7.0, and pip-tools>=7.0 does not support Python 3.7 diff --git a/kedro-datasets/README.md b/kedro-datasets/README.md index 66f324487..8309dbcce 100644 --- a/kedro-datasets/README.md +++ b/kedro-datasets/README.md @@ -1,7 +1,7 @@ # Kedro-Datasets [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -[![Python Version](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue.svg)](https://pypi.org/project/kedro-datasets/) +[![Python Version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11-blue.svg)](https://pypi.org/project/kedro-datasets/) [![PyPI Version](https://badge.fury.io/py/kedro-datasets.svg)](https://pypi.org/project/kedro-datasets/) [![Code Style: Black](https://img.shields.io/badge/code%20style-black-black.svg)](https://github.com/ambv/black) diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md index 9149a8a08..0f73530ea 100755 --- a/kedro-datasets/RELEASE.md +++ b/kedro-datasets/RELEASE.md @@ -1,7 +1,7 @@ # Upcoming Release ## Major features and improvements -* Removed support for Python 3.7 +* Removed support for Python 3.7 and 3.8 * Spark and Databricks based datasets now support [databricks-connect>=13.0](https://docs.databricks.com/en/dev-tools/databricks-connect-ref.html) ## Bug fixes and other changes diff --git a/kedro-datasets/docs/source/conf.py b/kedro-datasets/docs/source/conf.py index a3d21b29d..87d7475ef 100644 --- a/kedro-datasets/docs/source/conf.py +++ b/kedro-datasets/docs/source/conf.py @@ -99,6 +99,7 @@ "py:class": ( "kedro.io.core.AbstractDataset", "kedro.io.AbstractDataset", + "AbstractDataset", "kedro.io.core.Version", "requests.auth.AuthBase", "google.oauth2.credentials.Credentials", diff --git a/kedro-datasets/kedro_datasets/api/api_dataset.py b/kedro-datasets/kedro_datasets/api/api_dataset.py index b4c979304..dca0e898d 100644 --- a/kedro-datasets/kedro_datasets/api/api_dataset.py +++ b/kedro-datasets/kedro_datasets/api/api_dataset.py @@ -3,7 +3,7 @@ """ import json as json_ # make pylint happy from copy import deepcopy -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Union import requests from requests import Session, sessions @@ -93,10 +93,10 @@ def __init__( # noqa: PLR0913 *, url: str, method: str = "GET", - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, - credentials: Union[Tuple[str, str], List[str], AuthBase] = None, - metadata: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, + credentials: Union[tuple[str, str], list[str], AuthBase] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``APIDataset`` to fetch data from an API endpoint. @@ -147,7 +147,7 @@ def __init__( # noqa: PLR0913 if "timeout" in self._params: self._params["timeout"] = self._convert_type(self._params["timeout"]) - self._request_args: Dict[str, Any] = { + self._request_args: dict[str, Any] = { "url": url, "method": method, "auth": self._convert_type(self._auth), @@ -163,11 +163,11 @@ def _convert_type(value: Any): However, for some parameters in the Python requests library, only Tuples are allowed. """ - if isinstance(value, List): + if isinstance(value, list): return tuple(value) return value - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: # prevent auth from logging request_args_cp = self._request_args.copy() request_args_cp.pop("auth", None) @@ -193,7 +193,7 @@ def _load(self) -> requests.Response: def _execute_save_with_chunks( self, - json_data: List[Dict[str, Any]], + json_data: list[dict[str, Any]], ) -> requests.Response: chunk_size = self._chunk_size n_chunks = len(json_data) // chunk_size + 1 diff --git a/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py b/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py index 61a03047c..6152be401 100644 --- a/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py +++ b/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py @@ -3,7 +3,7 @@ """ from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict, List +from typing import Any import fsspec from Bio import SeqIO @@ -12,7 +12,7 @@ from kedro_datasets._io import AbstractDataset -class BioSequenceDataset(AbstractDataset[List, List]): +class BioSequenceDataset(AbstractDataset[list, list]): r"""``BioSequenceDataset`` loads and saves data to a sequence file. Example: @@ -42,18 +42,18 @@ class BioSequenceDataset(AbstractDataset[List, List]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """ Creates a new instance of ``BioSequenceDataset`` pointing @@ -111,7 +111,7 @@ def __init__( # noqa: PLR0913 self.metadata = metadata - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, @@ -119,12 +119,12 @@ def _describe(self) -> Dict[str, Any]: "save_args": self._save_args, } - def _load(self) -> List: + def _load(self) -> list: load_path = get_filepath_str(self._filepath, self._protocol) with self._fs.open(load_path, **self._fs_open_args_load) as fs_file: return list(SeqIO.parse(handle=fs_file, **self._load_args)) - def _save(self, data: List) -> None: + def _save(self, data: list) -> None: save_path = get_filepath_str(self._filepath, self._protocol) with self._fs.open(save_path, **self._fs_open_args_save) as fs_file: diff --git a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py index d0127513e..8c6819a6e 100644 --- a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py +++ b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py @@ -1,7 +1,7 @@ """``ParquetDataset`` is a data set used to load and save data to parquet files using Dask dataframe""" from copy import deepcopy -from typing import Any, Dict +from typing import Any import dask.dataframe as dd import fsspec @@ -78,18 +78,18 @@ class ParquetDataset(AbstractDataset[dd.DataFrame, dd.DataFrame]): col3: [[int32]] """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"write_index": False} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {"write_index": False} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``ParquetDataset`` pointing to concrete parquet files. @@ -123,7 +123,7 @@ def __init__( # noqa: PLR0913 self._save_args.update(save_args) @property - def fs_args(self) -> Dict[str, Any]: + def fs_args(self) -> dict[str, Any]: """Property of optional file system parameters. Returns: @@ -133,7 +133,7 @@ def fs_args(self) -> Dict[str, Any]: fs_args.update(self._credentials) return fs_args - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "load_args": self._load_args, diff --git a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py index 3dd019a1a..31cc72002 100644 --- a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py +++ b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py @@ -4,7 +4,7 @@ import logging import re from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union import pandas as pd from kedro.io.core import Version, VersionNotFoundError @@ -33,7 +33,7 @@ class ManagedTable: dataframe_type: str primary_key: Optional[str] owner_group: str - partition_columns: Union[str, List[str]] + partition_columns: Union[str, list[str]] json_schema: StructType def __post_init__(self): @@ -203,12 +203,12 @@ def __init__( # noqa: PLR0913 database: str = "default", write_mode: Union[str, None] = None, dataframe_type: str = "spark", - primary_key: Optional[Union[str, List[str]]] = None, + primary_key: Optional[Union[str, list[str]]] = None, version: Version = None, # the following parameters are used by project hooks # to create or update table properties - schema: Dict[str, Any] = None, - partition_columns: List[str] = None, + schema: dict[str, Any] = None, + partition_columns: list[str] = None, owner_group: str = None, ) -> None: """Creates a new instance of ``ManagedTableDataset``. @@ -387,7 +387,7 @@ def _save(self, data: Union[DataFrame, pd.DataFrame]) -> None: elif self._table.write_mode == "append": self._save_append(data) - def _describe(self) -> Dict[str, str]: + def _describe(self) -> dict[str, str]: """Returns a description of the instance of ManagedTableDataset Returns: diff --git a/kedro-datasets/kedro_datasets/email/message_dataset.py b/kedro-datasets/kedro_datasets/email/message_dataset.py index fd4a5e727..d2eb03d70 100644 --- a/kedro-datasets/kedro_datasets/email/message_dataset.py +++ b/kedro-datasets/kedro_datasets/email/message_dataset.py @@ -8,7 +8,7 @@ from email.parser import Parser from email.policy import default from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec from kedro.io.core import Version, get_filepath_str, get_protocol_and_path @@ -47,19 +47,19 @@ class EmailMessageDataset(AbstractVersionedDataset[Message, Message]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``EmailMessageDataset`` pointing to a concrete text file on a specific filesystem. @@ -140,7 +140,7 @@ def __init__( # noqa: PLR0913 self._fs_open_args_load = _fs_open_args_load self._fs_open_args_save = _fs_open_args_save - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py b/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py index a284b46bc..fdad31e02 100644 --- a/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py +++ b/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py @@ -4,7 +4,7 @@ """ import copy from pathlib import PurePosixPath -from typing import Any, Dict, Union +from typing import Any, Union import fsspec import geopandas as gpd @@ -15,7 +15,7 @@ class GeoJSONDataset( AbstractVersionedDataset[ - gpd.GeoDataFrame, Union[gpd.GeoDataFrame, Dict[str, gpd.GeoDataFrame]] + gpd.GeoDataFrame, Union[gpd.GeoDataFrame, dict[str, gpd.GeoDataFrame]] ] ): """``GeoJSONDataset`` loads/saves data to a GeoJSON file using an underlying filesystem @@ -43,19 +43,19 @@ class GeoJSONDataset( """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} DEFAULT_SAVE_ARGS = {"driver": "GeoJSON"} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``GeoJSONDataset`` pointing to a concrete GeoJSON file on a specific filesystem fsspec. @@ -120,7 +120,7 @@ def __init__( # noqa: PLR0913 self._fs_open_args_load = _fs_open_args_load self._fs_open_args_save = _fs_open_args_save - def _load(self) -> Union[gpd.GeoDataFrame, Dict[str, gpd.GeoDataFrame]]: + def _load(self) -> Union[gpd.GeoDataFrame, dict[str, gpd.GeoDataFrame]]: load_path = get_filepath_str(self._get_load_path(), self._protocol) with self._fs.open(load_path, **self._fs_open_args_load) as fs_file: return gpd.read_file(fs_file, **self._load_args) @@ -138,7 +138,7 @@ def _exists(self) -> bool: return False return self._fs.exists(load_path) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py b/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py index 8bae8f6ea..5911d8ae8 100644 --- a/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py +++ b/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py @@ -4,7 +4,7 @@ import io from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict, NoReturn, TypeVar +from typing import Any, NoReturn, TypeVar import fsspec import holoviews as hv @@ -34,17 +34,17 @@ class HoloviewsWriter(AbstractVersionedDataset[HoloViews, NoReturn]): """ - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"fmt": "png"} + DEFAULT_SAVE_ARGS: dict[str, Any] = {"fmt": "png"} def __init__( # noqa: PLR0913 self, *, filepath: str, - fs_args: Dict[str, Any] = None, - credentials: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + fs_args: dict[str, Any] = None, + credentials: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - metadata: Dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``HoloviewsWriter``. @@ -99,7 +99,7 @@ def __init__( # noqa: PLR0913 if save_args is not None: self._save_args.update(save_args) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/json/json_dataset.py b/kedro-datasets/kedro_datasets/json/json_dataset.py index 2579f1afd..c6f616b38 100644 --- a/kedro-datasets/kedro_datasets/json/json_dataset.py +++ b/kedro-datasets/kedro_datasets/json/json_dataset.py @@ -4,7 +4,7 @@ import json from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec from kedro.io.core import Version, get_filepath_str, get_protocol_and_path @@ -46,17 +46,17 @@ class JSONDataset(AbstractVersionedDataset[Any, Any]): """ - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"indent": 2} + DEFAULT_SAVE_ARGS: dict[str, Any] = {"indent": 2} def __init__( # noqa: PLR0913 self, *, filepath: str, - save_args: Dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``JSONDataset`` pointing to a concrete JSON file on a specific filesystem. @@ -117,7 +117,7 @@ def __init__( # noqa: PLR0913 self._fs_open_args_load = _fs_open_args_load self._fs_open_args_save = _fs_open_args_save - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py index 5060ff8a8..e84b728b5 100644 --- a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py +++ b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py @@ -4,7 +4,7 @@ import io from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict, List, NoReturn, Union +from typing import Any, NoReturn, Union from warnings import warn import fsspec @@ -16,7 +16,7 @@ class MatplotlibWriter( AbstractVersionedDataset[ - Union[plt.figure, List[plt.figure], Dict[str, plt.figure]], NoReturn + Union[plt.figure, list[plt.figure], dict[str, plt.figure]], NoReturn ] ): """``MatplotlibWriter`` saves one or more Matplotlib objects as @@ -99,18 +99,18 @@ class MatplotlibWriter( """ - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - fs_args: Dict[str, Any] = None, - credentials: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + fs_args: dict[str, Any] = None, + credentials: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, overwrite: bool = False, - metadata: Dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``MatplotlibWriter``. @@ -176,7 +176,7 @@ def __init__( # noqa: PLR0913 overwrite = False self._overwrite = overwrite - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, @@ -188,7 +188,7 @@ def _load(self) -> NoReturn: raise DatasetError(f"Loading not supported for '{self.__class__.__name__}'") def _save( - self, data: Union[plt.figure, List[plt.figure], Dict[str, plt.figure]] + self, data: Union[plt.figure, list[plt.figure], dict[str, plt.figure]] ) -> None: save_path = self._get_save_path() diff --git a/kedro-datasets/kedro_datasets/networkx/gml_dataset.py b/kedro-datasets/kedro_datasets/networkx/gml_dataset.py index 37d03e4b4..fe1272cc4 100644 --- a/kedro-datasets/kedro_datasets/networkx/gml_dataset.py +++ b/kedro-datasets/kedro_datasets/networkx/gml_dataset.py @@ -4,7 +4,7 @@ """ from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import networkx @@ -33,19 +33,19 @@ class GMLDataset(AbstractVersionedDataset[networkx.Graph, networkx.Graph]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``GMLDataset``. @@ -123,7 +123,7 @@ def _exists(self) -> bool: load_path = get_filepath_str(self._get_load_path(), self._protocol) return self._fs.exists(load_path) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py b/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py index 63351d062..fa3ac94dc 100644 --- a/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py +++ b/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py @@ -3,7 +3,7 @@ """ from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import networkx @@ -32,19 +32,19 @@ class GraphMLDataset(AbstractVersionedDataset[networkx.Graph, networkx.Graph]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``GraphMLDataset``. @@ -121,7 +121,7 @@ def _exists(self) -> bool: load_path = get_filepath_str(self._get_load_path(), self._protocol) return self._fs.exists(load_path) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/networkx/json_dataset.py b/kedro-datasets/kedro_datasets/networkx/json_dataset.py index 27a2f0fa7..fcdfb83e3 100644 --- a/kedro-datasets/kedro_datasets/networkx/json_dataset.py +++ b/kedro-datasets/kedro_datasets/networkx/json_dataset.py @@ -4,7 +4,7 @@ import json from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import networkx @@ -33,19 +33,19 @@ class JSONDataset(AbstractVersionedDataset[networkx.Graph, networkx.Graph]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``JSONDataset``. @@ -128,7 +128,7 @@ def _exists(self) -> bool: return self._fs.exists(load_path) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/pandas/csv_dataset.py b/kedro-datasets/kedro_datasets/pandas/csv_dataset.py index f16d7ac1b..670010a79 100644 --- a/kedro-datasets/kedro_datasets/pandas/csv_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/csv_dataset.py @@ -5,7 +5,7 @@ from copy import deepcopy from io import BytesIO from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import pandas as pd @@ -65,19 +65,19 @@ class CSVDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {"index": False} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``CSVDataset`` pointing to a concrete CSV file on a specific filesystem. @@ -143,7 +143,7 @@ def __init__( # noqa: PLR0913 self._save_args.pop("storage_options", None) self._load_args.pop("storage_options", None) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, @@ -194,7 +194,7 @@ def _invalidate_cache(self) -> None: filepath = get_filepath_str(self._filepath, self._protocol) self._fs.invalidate_cache(filepath) - def _preview(self, nrows: int = 40) -> Dict: + def _preview(self, nrows: int = 40) -> dict: # Create a copy so it doesn't contaminate the original dataset dataset_copy = self._copy() dataset_copy._load_args["nrows"] = nrows diff --git a/kedro-datasets/kedro_datasets/pandas/deltatable_dataset.py b/kedro-datasets/kedro_datasets/pandas/deltatable_dataset.py index e492f0375..fd6b6ffa4 100644 --- a/kedro-datasets/kedro_datasets/pandas/deltatable_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/deltatable_dataset.py @@ -3,7 +3,7 @@ load and save using a pandas dataframe. """ from copy import deepcopy -from typing import Any, Dict, List, Optional +from typing import Any, Optional import pandas as pd from deltalake import DataCatalog, DeltaTable, Metadata @@ -81,8 +81,8 @@ class DeltaTableDataset(AbstractDataset): DEFAULT_WRITE_MODE = "overwrite" ACCEPTED_WRITE_MODES = ("overwrite", "append") - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"mode": DEFAULT_WRITE_MODE} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {"mode": DEFAULT_WRITE_MODE} def __init__( # noqa: PLR0913 self, @@ -92,10 +92,10 @@ def __init__( # noqa: PLR0913 catalog_name: Optional[str] = None, database: Optional[str] = None, table: Optional[str] = None, - load_args: Optional[Dict[str, Any]] = None, - save_args: Optional[Dict[str, Any]] = None, - credentials: Optional[Dict[str, Any]] = None, - fs_args: Optional[Dict[str, Any]] = None, + load_args: Optional[dict[str, Any]] = None, + save_args: Optional[dict[str, Any]] = None, + credentials: Optional[dict[str, Any]] = None, + fs_args: Optional[dict[str, Any]] = None, ) -> None: """Creates a new instance of ``DeltaTableDataset`` @@ -186,14 +186,14 @@ def __init__( # noqa: PLR0913 ) @property - def fs_args(self) -> Dict[str, Any]: + def fs_args(self) -> dict[str, Any]: """Appends and returns filesystem credentials to fs_args.""" fs_args = deepcopy(self._fs_args) fs_args.update(self._credentials) return fs_args @property - def schema(self) -> Dict[str, Any]: + def schema(self) -> dict[str, Any]: """Returns the schema of the DeltaTableDataset as a dictionary.""" return self._delta_table.schema().json() @@ -214,7 +214,7 @@ def metadata(self) -> Metadata: return self._delta_table.metadata() @property - def history(self) -> List[Dict[str, Any]]: + def history(self) -> list[dict[str, Any]]: """Returns the history of actions on DeltaTableDataset as a list of dictionaries.""" return self._delta_table.history() @@ -248,7 +248,7 @@ def _save(self, data: pd.DataFrame) -> None: **self._save_args, ) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "catalog_type": self._catalog_type, diff --git a/kedro-datasets/kedro_datasets/pandas/excel_dataset.py b/kedro-datasets/kedro_datasets/pandas/excel_dataset.py index 67b4a6565..893d4eea7 100644 --- a/kedro-datasets/kedro_datasets/pandas/excel_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/excel_dataset.py @@ -5,7 +5,7 @@ from copy import deepcopy from io import BytesIO from pathlib import PurePosixPath -from typing import Any, Dict, Union +from typing import Any, Union import fsspec import pandas as pd @@ -23,8 +23,8 @@ class ExcelDataset( AbstractVersionedDataset[ - Union[pd.DataFrame, Dict[str, pd.DataFrame]], - Union[pd.DataFrame, Dict[str, pd.DataFrame]], + Union[pd.DataFrame, dict[str, pd.DataFrame]], + Union[pd.DataFrame, dict[str, pd.DataFrame]], ] ): """``ExcelDataset`` loads/saves data from/to a Excel file using an underlying @@ -113,12 +113,12 @@ def __init__( # noqa: PLR0913 *, filepath: str, engine: str = "openpyxl", - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``ExcelDataset`` pointing to a concrete Excel file on a specific filesystem. @@ -203,7 +203,7 @@ def __init__( # noqa: PLR0913 self._save_args.pop("storage_options", None) self._load_args.pop("storage_options", None) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, @@ -213,7 +213,7 @@ def _describe(self) -> Dict[str, Any]: "version": self._version, } - def _load(self) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]: + def _load(self) -> Union[pd.DataFrame, dict[str, pd.DataFrame]]: load_path = str(self._get_load_path()) if self._protocol == "file": # file:// protocol seems to misbehave on Windows @@ -227,7 +227,7 @@ def _load(self) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]: load_path, storage_options=self._storage_options, **self._load_args ) - def _save(self, data: Union[pd.DataFrame, Dict[str, pd.DataFrame]]) -> None: + def _save(self, data: Union[pd.DataFrame, dict[str, pd.DataFrame]]) -> None: output = BytesIO() save_path = get_filepath_str(self._get_save_path(), self._protocol) @@ -262,7 +262,7 @@ def _invalidate_cache(self) -> None: filepath = get_filepath_str(self._filepath, self._protocol) self._fs.invalidate_cache(filepath) - def _preview(self, nrows: int = 40) -> Dict: + def _preview(self, nrows: int = 40) -> dict: # Create a copy so it doesn't contaminate the original dataset dataset_copy = self._copy() dataset_copy._load_args["nrows"] = nrows diff --git a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py index 3282ab907..c8c0773df 100644 --- a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py @@ -6,7 +6,7 @@ from copy import deepcopy from io import BytesIO from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import pandas as pd @@ -66,19 +66,19 @@ class FeatherDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``FeatherDataset`` pointing to a concrete filepath. @@ -144,7 +144,7 @@ def __init__( # noqa: PLR0913 self._save_args.pop("storage_options", None) self._load_args.pop("storage_options", None) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py b/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py index fedd05442..5757ac402 100644 --- a/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py @@ -3,7 +3,7 @@ """ import copy from pathlib import PurePosixPath -from typing import Any, Dict, NoReturn, Union +from typing import Any, NoReturn, Union import fsspec import pandas as pd @@ -59,8 +59,8 @@ class GBQTableDataset(AbstractDataset[None, pd.DataFrame]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"progress_bar": False} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {"progress_bar": False} def __init__( # noqa: PLR0913 self, @@ -68,10 +68,10 @@ def __init__( # noqa: PLR0913 dataset: str, table_name: str, project: str = None, - credentials: Union[Dict[str, Any], Credentials] = None, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: Union[dict[str, Any], Credentials] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``GBQTableDataset``. @@ -127,7 +127,7 @@ def __init__( # noqa: PLR0913 self.metadata = metadata - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "dataset": self._dataset, "table_name": self._table_name, @@ -205,17 +205,17 @@ class GBQQueryDataset(AbstractDataset[None, pd.DataFrame]): >>> sql_data = dataset.load() """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, sql: str = None, project: str = None, - credentials: Union[Dict[str, Any], Credentials] = None, - load_args: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, + credentials: Union[dict[str, Any], Credentials] = None, + load_args: dict[str, Any] = None, + fs_args: dict[str, Any] = None, filepath: str = None, - metadata: Dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``GBQQueryDataset``. @@ -289,7 +289,7 @@ def __init__( # noqa: PLR0913 self.metadata = metadata - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: load_args = copy.deepcopy(self._load_args) desc = {} desc["sql"] = str(load_args.pop("query", None)) diff --git a/kedro-datasets/kedro_datasets/pandas/generic_dataset.py b/kedro-datasets/kedro_datasets/pandas/generic_dataset.py index 613a91383..9c01447b2 100644 --- a/kedro-datasets/kedro_datasets/pandas/generic_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/generic_dataset.py @@ -4,7 +4,7 @@ """ from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import pandas as pd @@ -79,20 +79,20 @@ class GenericDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, file_format: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ): """Creates a new instance of ``GenericDataset`` pointing to a concrete data file on a specific filesystem. The appropriate pandas load/save methods are @@ -223,7 +223,7 @@ def _exists(self) -> bool: return self._fs.exists(load_path) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "file_format": self._file_format, "filepath": self._filepath, diff --git a/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py b/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py index 4865e034e..ec3ec9b41 100644 --- a/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py @@ -4,7 +4,7 @@ from copy import deepcopy from pathlib import PurePosixPath from threading import Lock -from typing import Any, Dict +from typing import Any import fsspec import pandas as pd @@ -52,20 +52,20 @@ class HDFDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): # _lock is a class attribute that will be shared across all the instances. # It is used to make dataset safe for threads. _lock = Lock() - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, key: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``HDFDataset`` pointing to a concrete hdf file on a specific filesystem. @@ -135,7 +135,7 @@ def __init__( # noqa: PLR0913 self._fs_open_args_load = _fs_open_args_load self._fs_open_args_save = _fs_open_args_save - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "key": self._key, diff --git a/kedro-datasets/kedro_datasets/pandas/json_dataset.py b/kedro-datasets/kedro_datasets/pandas/json_dataset.py index 5c075855c..71f199139 100644 --- a/kedro-datasets/kedro_datasets/pandas/json_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/json_dataset.py @@ -5,7 +5,7 @@ from copy import deepcopy from io import BytesIO from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import pandas as pd @@ -60,19 +60,19 @@ class JSONDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``JSONDataset`` pointing to a concrete JSON file on a specific filesystem. @@ -137,7 +137,7 @@ def __init__( # noqa: PLR0913 self._save_args.pop("storage_options", None) self._load_args.pop("storage_options", None) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py b/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py index bb925a8d4..243a52169 100644 --- a/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py @@ -5,7 +5,7 @@ from copy import deepcopy from io import BytesIO from pathlib import Path, PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import pandas as pd @@ -71,19 +71,19 @@ class ParquetDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``ParquetDataset`` pointing to a concrete Parquet file on a specific filesystem. @@ -152,7 +152,7 @@ def __init__( # noqa: PLR0913 self._save_args.pop("storage_options", None) self._load_args.pop("storage_options", None) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/pandas/xml_dataset.py b/kedro-datasets/kedro_datasets/pandas/xml_dataset.py index 731c77970..220fb9318 100644 --- a/kedro-datasets/kedro_datasets/pandas/xml_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/xml_dataset.py @@ -5,7 +5,7 @@ from copy import deepcopy from io import BytesIO from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import pandas as pd @@ -43,19 +43,19 @@ class XMLDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {"index": False} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``XMLDataset`` pointing to a concrete XML file on a specific filesystem. @@ -121,7 +121,7 @@ def __init__( # noqa: PLR0913 self._save_args.pop("storage_options", None) self._load_args.pop("storage_options", None) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py b/kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py index d991ddb76..491a0b9cc 100644 --- a/kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py +++ b/kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py @@ -5,7 +5,7 @@ import operator from copy import deepcopy -from typing import Any, Callable, Dict +from typing import Any, Callable from urllib.parse import urlparse from warnings import warn @@ -28,7 +28,7 @@ S3_PROTOCOLS = ("s3", "s3a", "s3n") -class PartitionedDataset(AbstractDataset[Dict[str, Any], Dict[str, Callable[[], Any]]]): +class PartitionedDataset(AbstractDataset[dict[str, Any], dict[str, Callable[[], Any]]]): """``PartitionedDataset`` loads and saves partitioned file-like data using the underlying dataset definition. For filesystem level operations it uses `fsspec`: https://github.com/intake/filesystem_spec. diff --git a/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py b/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py index c1a24524a..f96b59a4b 100644 --- a/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py +++ b/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py @@ -6,7 +6,7 @@ import importlib from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec from kedro.io.core import Version, get_filepath_str, get_protocol_and_path @@ -66,20 +66,20 @@ class PickleDataset(AbstractVersionedDataset[Any, Any]): >>> assert data.equals(reloaded) """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, backend: str = "pickle", - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``PickleDataset`` pointing to a concrete Pickle file on a specific filesystem. ``PickleDataset`` supports custom backends to @@ -198,7 +198,7 @@ def __init__( # noqa: PLR0913 self._fs_open_args_load = _fs_open_args_load self._fs_open_args_save = _fs_open_args_save - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "backend": self._backend, diff --git a/kedro-datasets/kedro_datasets/pillow/image_dataset.py b/kedro-datasets/kedro_datasets/pillow/image_dataset.py index 67855875f..dbda24371 100644 --- a/kedro-datasets/kedro_datasets/pillow/image_dataset.py +++ b/kedro-datasets/kedro_datasets/pillow/image_dataset.py @@ -3,7 +3,7 @@ """ from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec from kedro.io.core import Version, get_filepath_str, get_protocol_and_path @@ -30,17 +30,17 @@ class ImageDataset(AbstractVersionedDataset[Image.Image, Image.Image]): """ - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - save_args: Dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``ImageDataset`` pointing to a concrete image file on a specific filesystem. @@ -101,7 +101,7 @@ def __init__( # noqa: PLR0913 self._fs_open_args_load = _fs_open_args_load self._fs_open_args_save = _fs_open_args_save - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/plotly/json_dataset.py b/kedro-datasets/kedro_datasets/plotly/json_dataset.py index ea51b3e2c..42fc40ff9 100644 --- a/kedro-datasets/kedro_datasets/plotly/json_dataset.py +++ b/kedro-datasets/kedro_datasets/plotly/json_dataset.py @@ -3,7 +3,7 @@ """ from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict, Union +from typing import Any, Union import fsspec import plotly.io as pio @@ -47,19 +47,19 @@ class JSONDataset( >>> assert fig == reloaded """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``JSONDataset`` pointing to a concrete JSON file on a specific filesystem. @@ -127,7 +127,7 @@ def __init__( # noqa: PLR0913 self._fs_open_args_load = _fs_open_args_load self._fs_open_args_save = _fs_open_args_save - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py b/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py index 303fb3612..9159b0312 100644 --- a/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py +++ b/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py @@ -3,7 +3,7 @@ plotly figure. """ from copy import deepcopy -from typing import Any, Dict +from typing import Any import pandas as pd import plotly.express as px @@ -70,13 +70,13 @@ def __init__( # noqa: PLR0913 self, *, filepath: str, - plotly_args: Dict[str, Any], - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + plotly_args: dict[str, Any], + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``PlotlyDataset`` pointing to a concrete JSON file on a specific filesystem. @@ -134,7 +134,7 @@ def __init__( # noqa: PLR0913 self.metadata = metadata - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return {**super()._describe(), "plotly_args": self._plotly_args} def _save(self, data: pd.DataFrame) -> None: diff --git a/kedro-datasets/kedro_datasets/polars/csv_dataset.py b/kedro-datasets/kedro_datasets/polars/csv_dataset.py index 8ee0a49f1..3ee0bbb48 100644 --- a/kedro-datasets/kedro_datasets/polars/csv_dataset.py +++ b/kedro-datasets/kedro_datasets/polars/csv_dataset.py @@ -5,7 +5,7 @@ from copy import deepcopy from io import BytesIO from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import polars as pl @@ -63,19 +63,19 @@ class CSVDataset(AbstractVersionedDataset[pl.DataFrame, pl.DataFrame]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {"rechunk": True} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {"rechunk": True} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``CSVDataset`` pointing to a concrete CSV file on a specific filesystem. @@ -144,7 +144,7 @@ def __init__( # noqa: PLR0913 self._save_args.pop("storage_options", None) self._load_args.pop("storage_options", None) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/polars/eager_polars_dataset.py b/kedro-datasets/kedro_datasets/polars/eager_polars_dataset.py index b72642899..af4f2d8d9 100644 --- a/kedro-datasets/kedro_datasets/polars/eager_polars_dataset.py +++ b/kedro-datasets/kedro_datasets/polars/eager_polars_dataset.py @@ -5,7 +5,7 @@ from copy import deepcopy from io import BytesIO from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import polars as pl @@ -57,11 +57,11 @@ def __init__( # noqa: PLR0913 *, filepath: str, file_format: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, ): """Creates a new instance of ``EagerPolarsDataset`` pointing to a concrete data file on a specific filesystem. The appropriate polars load/save methods are dynamically @@ -178,7 +178,7 @@ def _exists(self) -> bool: return self._fs.exists(load_path) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "file_format": self._file_format, "filepath": self._filepath, diff --git a/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py b/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py index 6a57f20bd..24aae9963 100644 --- a/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py +++ b/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py @@ -6,7 +6,7 @@ from copy import deepcopy from io import BytesIO from pathlib import PurePosixPath -from typing import Any, ClassVar, Dict, Optional, Union +from typing import Any, ClassVar, Optional, Union import fsspec import polars as pl @@ -70,20 +70,20 @@ class LazyPolarsDataset(AbstractVersionedDataset[pl.LazyFrame, PolarsFrame]): """ - DEFAULT_LOAD_ARGS: ClassVar[Dict[str, Any]] = {} - DEFAULT_SAVE_ARGS: ClassVar[Dict[str, Any]] = {} + DEFAULT_LOAD_ARGS: ClassVar[dict[str, Any]] = {} + DEFAULT_SAVE_ARGS: ClassVar[dict[str, Any]] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, file_format: str, - load_args: Optional[Dict[str, Any]] = None, - save_args: Optional[Dict[str, Any]] = None, + load_args: Optional[dict[str, Any]] = None, + save_args: Optional[dict[str, Any]] = None, version: Version = None, - credentials: Optional[Dict[str, Any]] = None, - fs_args: Optional[Dict[str, Any]] = None, - metadata: Optional[Dict[str, Any]] = None, + credentials: Optional[dict[str, Any]] = None, + fs_args: Optional[dict[str, Any]] = None, + metadata: Optional[dict[str, Any]] = None, ) -> None: """Creates a new instance of ``LazyPolarsDataset`` pointing to a concrete data file on a specific filesystem. @@ -179,7 +179,7 @@ def __init__( # noqa: PLR0913 self._save_args.pop("storage_options", None) self._load_args.pop("storage_options", None) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/redis/redis_dataset.py b/kedro-datasets/kedro_datasets/redis/redis_dataset.py index 8cccc1423..d767c5bc2 100644 --- a/kedro-datasets/kedro_datasets/redis/redis_dataset.py +++ b/kedro-datasets/kedro_datasets/redis/redis_dataset.py @@ -4,7 +4,7 @@ import importlib import os from copy import deepcopy -from typing import Any, Dict +from typing import Any import redis @@ -56,19 +56,19 @@ class PickleDataset(AbstractDataset[Any, Any]): """ DEFAULT_REDIS_URL = os.getenv("REDIS_URL", "redis://127.0.0.1:6379") - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, key: str, backend: str = "pickle", - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, - credentials: Dict[str, Any] = None, - redis_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, + credentials: dict[str, Any] = None, + redis_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``PickleDataset``. This loads/saves data from/to a Redis database while deserialising/serialising. Supports custom backends to @@ -161,7 +161,7 @@ def __init__( # noqa: PLR0913 **self._redis_from_url_args, **_credentials ) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return {"key": self._key, **self._redis_from_url_args} # `redis_db` mypy does not work since it is optional and optional is not diff --git a/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py b/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py index 7463236a9..4c7ab8cff 100644 --- a/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py +++ b/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py @@ -2,7 +2,7 @@ """ import logging from copy import deepcopy -from typing import Any, Dict +from typing import Any import snowflake.snowpark as sp @@ -98,8 +98,8 @@ class SnowparkTableDataset(AbstractDataset): # for parallelism within a pipeline please consider # ``ThreadRunner`` instead _SINGLE_PROCESS = True - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, @@ -107,10 +107,10 @@ def __init__( # noqa: PLR0913 table_name: str, schema: str = None, database: str = None, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, - credentials: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, + credentials: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``SnowparkTableDataset``. @@ -174,7 +174,7 @@ def __init__( # noqa: PLR0913 self.metadata = metadata - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "table_name": self._table_name, "database": self._database, diff --git a/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py b/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py index 313cbb821..a967d5416 100644 --- a/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py @@ -2,7 +2,7 @@ ``delta-spark``. """ from pathlib import PurePosixPath -from typing import Any, Dict, NoReturn +from typing import Any, NoReturn from delta.tables import DeltaTable from pyspark.sql.utils import AnalysisException @@ -65,7 +65,7 @@ class DeltaTableDataset(AbstractDataset[None, DeltaTable]): # using ``ThreadRunner`` instead _SINGLE_PROCESS = True - def __init__(self, *, filepath: str, metadata: Dict[str, Any] = None) -> None: + def __init__(self, *, filepath: str, metadata: dict[str, Any] = None) -> None: """Creates a new instance of ``DeltaTableDataset``. Args: diff --git a/kedro-datasets/kedro_datasets/spark/spark_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_dataset.py index c73b8dcf6..5551f9ce8 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_dataset.py @@ -8,7 +8,7 @@ from fnmatch import fnmatch from functools import partial from pathlib import PurePosixPath -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Optional from warnings import warn import fsspec @@ -62,7 +62,7 @@ def _parse_glob_pattern(pattern: str) -> str: return "/".join(clean) -def _split_filepath(filepath: str) -> Tuple[str, str]: +def _split_filepath(filepath: str) -> tuple[str, str]: split_ = filepath.split("://", 1) if len(split_) == 2: # noqa: PLR2004 return split_[0] + "://", split_[1] @@ -73,7 +73,7 @@ def _strip_dbfs_prefix(path: str, prefix: str = "/dbfs") -> str: return path[len(prefix) :] if path.startswith(prefix) else path -def _dbfs_glob(pattern: str, dbutils: Any) -> List[str]: +def _dbfs_glob(pattern: str, dbutils: Any) -> list[str]: """Perform a custom glob search in DBFS using the provided pattern. It is assumed that version paths are managed by Kedro only. @@ -162,7 +162,7 @@ def hdfs_exists(self, hdfs_path: str) -> bool: """ return bool(self.status(hdfs_path, strict=False)) - def hdfs_glob(self, pattern: str) -> List[str]: + def hdfs_glob(self, pattern: str) -> list[str]: """Perform a glob search in HDFS using the provided pattern. Args: @@ -257,19 +257,19 @@ class SparkDataset(AbstractVersionedDataset[DataFrame, DataFrame]): # for parallelism within a Spark pipeline please consider # ``ThreadRunner`` instead _SINGLE_PROCESS = True - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, file_format: str = "parquet", - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``SparkDataset``. @@ -381,7 +381,7 @@ def __init__( # noqa: PLR0913 self._handle_delta_format() @staticmethod - def _load_schema_from_file(schema: Dict[str, Any]) -> StructType: + def _load_schema_from_file(schema: dict[str, Any]) -> StructType: filepath = schema.get("filepath") if not filepath: raise DatasetError( @@ -405,7 +405,7 @@ def _load_schema_from_file(schema: Dict[str, Any]) -> StructType: f"provide a valid JSON-serialised 'pyspark.sql.types.StructType'." ) from exc - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._fs_prefix + str(self._filepath), "file_format": self._file_format, diff --git a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py index aa0bf7ea7..c28879d23 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py @@ -3,7 +3,7 @@ """ import pickle from copy import deepcopy -from typing import Any, Dict, List +from typing import Any from pyspark.sql import DataFrame, Window from pyspark.sql.functions import col, lit, row_number @@ -66,7 +66,7 @@ class SparkHiveDataset(AbstractDataset[DataFrame, DataFrame]): >>> reloaded.take(4) """ - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, @@ -74,9 +74,9 @@ def __init__( # noqa: PLR0913 database: str, table: str, write_mode: str = "errorifexists", - table_pk: List[str] = None, - save_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + table_pk: list[str] = None, + save_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``SparkHiveDataset``. @@ -127,7 +127,7 @@ def __init__( # noqa: PLR0913 self.metadata = metadata - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "database": self._database, "table": self._table, diff --git a/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py index a04277e82..77013c54e 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py @@ -1,6 +1,6 @@ """SparkJDBCDataset to load and save a PySpark DataFrame via JDBC.""" from copy import deepcopy -from typing import Any, Dict +from typing import Any from pyspark.sql import DataFrame @@ -65,18 +65,18 @@ class SparkJDBCDataset(AbstractDataset[DataFrame, DataFrame]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, url: str, table: str, - credentials: Dict[str, Any] = None, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new ``SparkJDBCDataset``. @@ -146,7 +146,7 @@ def __init__( # noqa: PLR0913 self._load_args["properties"] = {**load_properties, **credentials} self._save_args["properties"] = {**save_properties, **credentials} - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: load_args = self._load_args save_args = self._save_args diff --git a/kedro-datasets/kedro_datasets/spark/spark_streaming_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_streaming_dataset.py index 5bd996d3c..8baf7cd9d 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_streaming_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_streaming_dataset.py @@ -1,7 +1,7 @@ """SparkStreamingDataset to load and save a PySpark Streaming DataFrame.""" from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any from pyspark.sql import DataFrame from pyspark.sql.utils import AnalysisException @@ -45,8 +45,8 @@ def __init__( *, filepath: str = "", file_format: str = "", - save_args: Dict[str, Any] = None, - load_args: Dict[str, Any] = None, + save_args: dict[str, Any] = None, + load_args: dict[str, Any] = None, ) -> None: """Creates a new instance of SparkStreamingDataset. @@ -95,7 +95,7 @@ def __init__( if isinstance(self._schema, dict): self._schema = SparkDataset._load_schema_from_file(self._schema) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: """Returns a dict that describes attributes of the dataset.""" return { "filepath": self._fs_prefix + str(self._filepath), diff --git a/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py b/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py index 293f12810..5ca7b9173 100644 --- a/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py +++ b/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py @@ -4,7 +4,7 @@ """ from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Optional, Union import fsspec from kedro.io.core import Version, get_filepath_str, get_protocol_and_path @@ -19,9 +19,9 @@ # in kedro-plugins (https://github.com/kedro-org/kedro-plugins) # Type of data input -_DI = Tuple[Union[ndarray, csr_matrix], ndarray] +_DI = tuple[Union[ndarray, csr_matrix], ndarray] # Type of data output -_DO = Tuple[csr_matrix, ndarray] +_DO = tuple[csr_matrix, ndarray] class SVMLightDataset(AbstractVersionedDataset[_DI, _DO]): @@ -83,19 +83,19 @@ class SVMLightDataset(AbstractVersionedDataset[_DI, _DO]): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Optional[Version] = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of SVMLightDataset to load/save data from a svmlight/libsvm file. diff --git a/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py b/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py index e9acfedae..dfa89190e 100644 --- a/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py +++ b/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py @@ -4,7 +4,7 @@ import copy import tempfile from pathlib import PurePath, PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import tensorflow as tf @@ -57,19 +57,19 @@ class TensorFlowModelDataset(AbstractVersionedDataset[tf.keras.Model, tf.keras.M """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"save_format": "tf"} + DEFAULT_LOAD_ARGS: dict[str, Any] = {} + DEFAULT_SAVE_ARGS: dict[str, Any] = {"save_format": "tf"} def __init__( # noqa: PLR0913 self, *, filepath: str, - load_args: Dict[str, Any] = None, - save_args: Dict[str, Any] = None, + load_args: dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``TensorFlowModelDataset``. @@ -171,7 +171,7 @@ def _exists(self) -> bool: return False return self._fs.exists(load_path) - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/text/text_dataset.py b/kedro-datasets/kedro_datasets/text/text_dataset.py index b734bb429..d96f1729a 100644 --- a/kedro-datasets/kedro_datasets/text/text_dataset.py +++ b/kedro-datasets/kedro_datasets/text/text_dataset.py @@ -3,7 +3,7 @@ """ from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec from kedro.io.core import Version, get_filepath_str, get_protocol_and_path @@ -47,9 +47,9 @@ def __init__( # noqa: PLR0913 *, filepath: str, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``TextDataset`` pointing to a concrete text file on a specific filesystem. @@ -102,7 +102,7 @@ def __init__( # noqa: PLR0913 self._fs_open_args_load = _fs_open_args_load self._fs_open_args_save = _fs_open_args_save - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, diff --git a/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py b/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py index 89c9dc32b..4140c0967 100644 --- a/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py +++ b/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py @@ -4,7 +4,7 @@ and only takes metrics of numeric values. """ import json -from typing import Dict, NoReturn +from typing import NoReturn from kedro.io.core import DatasetError, get_filepath_str @@ -47,7 +47,7 @@ class MetricsDataset(json_dataset.JSONDataset): def _load(self) -> NoReturn: raise DatasetError(f"Loading not supported for '{self.__class__.__name__}'") - def _save(self, data: Dict[str, float]) -> None: + def _save(self, data: dict[str, float]) -> None: """Converts all values in the data from a ``MetricsDataset`` to float to make sure they are numeric values which can be displayed in Kedro Viz and then saves the dataset. """ diff --git a/kedro-datasets/kedro_datasets/video/video_dataset.py b/kedro-datasets/kedro_datasets/video/video_dataset.py index 0f10b7681..59af4fb48 100644 --- a/kedro-datasets/kedro_datasets/video/video_dataset.py +++ b/kedro-datasets/kedro_datasets/video/video_dataset.py @@ -5,9 +5,10 @@ import itertools import tempfile from collections import abc +from collections.abc import Generator, Sequence from copy import deepcopy from pathlib import Path, PurePosixPath -from typing import Any, Dict, Generator, Optional, Sequence, Tuple, Union +from typing import Any, Optional, Union import cv2 import fsspec @@ -54,7 +55,7 @@ def fps(self) -> float: raise NotImplementedError() @property - def size(self) -> Tuple[int, int]: + def size(self) -> tuple[int, int]: """Get the resolution of the video""" raise NotImplementedError() @@ -84,7 +85,7 @@ def fps(self) -> float: return self._cap.get(cv2.CAP_PROP_FPS) @property - def size(self) -> Tuple[int, int]: + def size(self) -> tuple[int, int]: width = int(self._cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(self._cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) return width, height @@ -148,7 +149,7 @@ def fps(self) -> float: return self._fps @property - def size(self) -> Tuple[int, int]: + def size(self) -> tuple[int, int]: return self._size def __getitem__(self, index: Union[int, slice]): @@ -183,7 +184,7 @@ def fps(self) -> float: return self._fps @property - def size(self) -> Tuple[int, int]: + def size(self) -> tuple[int, int]: return self._size def __getitem__(self, index: Union[int, slice]): @@ -271,9 +272,9 @@ def __init__( # noqa: PLR0913 *, filepath: str, fourcc: Optional[str] = "mp4v", - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of VideoDataset to load / save video data for given filepath. @@ -361,7 +362,7 @@ def _write_to_filepath(self, video: AbstractVideo, filepath: str) -> None: finally: writer.release() - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return {"filepath": self._filepath, "protocol": self._protocol} def _exists(self) -> bool: diff --git a/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py b/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py index f3b0ac7e9..647ebb8e1 100644 --- a/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py +++ b/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py @@ -3,7 +3,7 @@ """ from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Dict +from typing import Any import fsspec import yaml @@ -12,7 +12,7 @@ from kedro_datasets._io import AbstractVersionedDataset, DatasetError -class YAMLDataset(AbstractVersionedDataset[Dict, Dict]): +class YAMLDataset(AbstractVersionedDataset[dict, dict]): """``YAMLDataset`` loads/saves data from/to a YAML file using an underlying filesystem (e.g.: local, S3, GCS). It uses PyYAML to handle the YAML file. @@ -43,17 +43,17 @@ class YAMLDataset(AbstractVersionedDataset[Dict, Dict]): """ - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"default_flow_style": False} + DEFAULT_SAVE_ARGS: dict[str, Any] = {"default_flow_style": False} def __init__( # noqa: PLR0913 self, *, filepath: str, - save_args: Dict[str, Any] = None, + save_args: dict[str, Any] = None, version: Version = None, - credentials: Dict[str, Any] = None, - fs_args: Dict[str, Any] = None, - metadata: Dict[str, Any] = None, + credentials: dict[str, Any] = None, + fs_args: dict[str, Any] = None, + metadata: dict[str, Any] = None, ) -> None: """Creates a new instance of ``YAMLDataset`` pointing to a concrete YAML file on a specific filesystem. @@ -114,7 +114,7 @@ def __init__( # noqa: PLR0913 self._fs_open_args_load = _fs_open_args_load self._fs_open_args_save = _fs_open_args_save - def _describe(self) -> Dict[str, Any]: + def _describe(self) -> dict[str, Any]: return { "filepath": self._filepath, "protocol": self._protocol, @@ -122,13 +122,13 @@ def _describe(self) -> Dict[str, Any]: "version": self._version, } - def _load(self) -> Dict: + def _load(self) -> dict: load_path = get_filepath_str(self._get_load_path(), self._protocol) with self._fs.open(load_path, **self._fs_open_args_load) as fs_file: return yaml.safe_load(fs_file) - def _save(self, data: Dict) -> None: + def _save(self, data: dict) -> None: save_path = get_filepath_str(self._get_save_path(), self._protocol) with self._fs.open(save_path, **self._fs_open_args_save) as fs_file: yaml.dump(data, fs_file, **self._save_args) diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index f8b837feb..660813eca 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -8,7 +8,7 @@ authors = [ {name = "Kedro"} ] description = "Kedro-Datasets is where you can find all of Kedro's data connectors." -requires-python = ">=3.8" +requires-python = ">=3.9" license = {text = "Apache Software License (Apache 2.0)"} dependencies = [ "kedro>=0.16", diff --git a/kedro-datasets/setup.py b/kedro-datasets/setup.py index b8c4f4909..754f32fb1 100644 --- a/kedro-datasets/setup.py +++ b/kedro-datasets/setup.py @@ -41,8 +41,7 @@ def _collect_requirements(requires): "pandas.HDFDataset": [ PANDAS, "tables~=3.6.0; platform_system == 'Windows'", - "tables~=3.6, <3.9; platform_system != 'Windows' and python_version<'3.9'", - "tables~=3.6; platform_system != 'Windows' and python_version>='3.9'", + "tables~=3.6; platform_system != 'Windows'", ], "pandas.JSONDataset": [PANDAS], "pandas.ParquetDataset": [PANDAS, "pyarrow>=6.0"], @@ -72,7 +71,7 @@ def _collect_requirements(requires): redis_require = {"redis.PickleDataset": ["redis~=4.1"]} snowflake_require = { "snowflake.SnowparkTableDataset": [ - "snowflake-snowpark-python~=1.0.0", + "snowflake-snowpark-python~=1.0", "pyarrow~=8.0", ] } @@ -212,14 +211,13 @@ def _collect_requirements(requires): "requests~=2.20", "ruff~=0.0.290", "s3fs>=0.3.0, <0.5", # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem. - "snowflake-snowpark-python~=1.0.0; python_version == '3.8'", + "snowflake-snowpark-python~=1.0; python_version == '3.9'", "scikit-learn>=1.0.2,<2", "scipy>=1.7.3", "packaging", "SQLAlchemy~=1.2", "tables~=3.8.0; platform_system == 'Windows'", # Import issues with python 3.8 with pytables pinning to 3.8.0 fixes this https://github.com/PyTables/PyTables/issues/933#issuecomment-1555917593 - "tables~=3.6, <3.9; platform_system != 'Windows' and python_version<'3.9'", - "tables~=3.6; platform_system != 'Windows' and python_version>='3.9'", + "tables~=3.6; platform_system != 'Windows'", "tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'", "tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'", "triad>=0.6.7, <1.0",