diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml
index 5446be9a41..0a2a2fcf98 100644
--- a/.circleci/continue_config.yml
+++ b/.circleci/continue_config.yml
@@ -52,7 +52,7 @@ commands:
name: Install venv for some pre-commit hooks
command: conda install -y virtualenv
- run:
- # pytables does not work properly with python 3.9 to handle our HDFDataSet
+ # pytables does not work properly with python 3.9 to handle our HDFDataset
# if pip-installed, so we install this dependency via conda
name: Install pytables
command: conda install -c conda-forge pytables -y
diff --git a/docs/source/kedro.io.rst b/docs/source/kedro.io.rst
index 56c6a7d6d5..e86af85047 100644
--- a/docs/source/kedro.io.rst
+++ b/docs/source/kedro.io.rst
@@ -13,16 +13,11 @@ kedro.io
kedro.io.AbstractDataset
kedro.io.AbstractVersionedDataset
- kedro.io.CachedDataSet
kedro.io.CachedDataset
kedro.io.DataCatalog
- kedro.io.IncrementalDataSet
kedro.io.IncrementalDataset
- kedro.io.LambdaDataSet
kedro.io.LambdaDataset
- kedro.io.MemoryDataSet
kedro.io.MemoryDataset
- kedro.io.PartitionedDataSet
kedro.io.PartitionedDataset
kedro.io.Version
@@ -32,9 +27,6 @@ kedro.io
:toctree:
:template: autosummary/class.rst
- kedro.io.DataSetAlreadyExistsError
- kedro.io.DataSetError
- kedro.io.DataSetNotFoundError
kedro.io.DatasetAlreadyExistsError
kedro.io.DatasetError
kedro.io.DatasetNotFoundError
diff --git a/docs/source/kedro_datasets.rst b/docs/source/kedro_datasets.rst
index d8db36ee0f..6d3077c338 100644
--- a/docs/source/kedro_datasets.rst
+++ b/docs/source/kedro_datasets.rst
@@ -11,91 +11,48 @@ kedro_datasets
:toctree:
:template: autosummary/class.rst
- kedro_datasets.api.APIDataSet
kedro_datasets.api.APIDataset
- kedro_datasets.biosequence.BioSequenceDataSet
kedro_datasets.biosequence.BioSequenceDataset
- kedro_datasets.dask.ParquetDataSet
kedro_datasets.dask.ParquetDataset
- kedro_datasets.databricks.ManagedTableDataSet
kedro_datasets.databricks.ManagedTableDataset
- kedro_datasets.email.EmailMessageDataSet
kedro_datasets.email.EmailMessageDataset
- kedro_datasets.geopandas.GeoJSONDataSet
kedro_datasets.geopandas.GeoJSONDataset
kedro_datasets.holoviews.HoloviewsWriter
- kedro_datasets.json.JSONDataSet
kedro_datasets.json.JSONDataset
kedro_datasets.matplotlib.MatplotlibWriter
- kedro_datasets.networkx.GMLDataSet
kedro_datasets.networkx.GMLDataset
- kedro_datasets.networkx.GraphMLDataSet
kedro_datasets.networkx.GraphMLDataset
- kedro_datasets.networkx.JSONDataSet
kedro_datasets.networkx.JSONDataset
- kedro_datasets.pandas.CSVDataSet
kedro_datasets.pandas.CSVDataset
- kedro_datasets.pandas.DeltaTableDataSet
kedro_datasets.pandas.DeltaTableDataset
- kedro_datasets.pandas.ExcelDataSet
kedro_datasets.pandas.ExcelDataset
- kedro_datasets.pandas.FeatherDataSet
kedro_datasets.pandas.FeatherDataset
- kedro_datasets.pandas.GBQQueryDataSet
kedro_datasets.pandas.GBQQueryDataset
- kedro_datasets.pandas.GBQTableDataSet
kedro_datasets.pandas.GBQTableDataset
- kedro_datasets.pandas.GenericDataSet
kedro_datasets.pandas.GenericDataset
- kedro_datasets.pandas.HDFDataSet
kedro_datasets.pandas.HDFDataset
- kedro_datasets.pandas.JSONDataSet
kedro_datasets.pandas.JSONDataset
- kedro_datasets.pandas.ParquetDataSet
kedro_datasets.pandas.ParquetDataset
- kedro_datasets.pandas.SQLQueryDataSet
kedro_datasets.pandas.SQLQueryDataset
- kedro_datasets.pandas.SQLTableDataSet
kedro_datasets.pandas.SQLTableDataset
- kedro_datasets.pandas.XMLDataSet
kedro_datasets.pandas.XMLDataset
- kedro_datasets.pickle.PickleDataSet
kedro_datasets.pickle.PickleDataset
- kedro_datasets.pillow.ImageDataSet
kedro_datasets.pillow.ImageDataset
- kedro_datasets.plotly.JSONDataSet
kedro_datasets.plotly.JSONDataset
- kedro_datasets.plotly.PlotlyDataSet
kedro_datasets.plotly.PlotlyDataset
- kedro_datasets.polars.CSVDataSet
kedro_datasets.polars.CSVDataset
- kedro_datasets.polars.GenericDataSet
kedro_datasets.polars.GenericDataset
- kedro_datasets.redis.PickleDataSet
kedro_datasets.redis.PickleDataset
- kedro_datasets.snowflake.SnowparkTableDataSet
kedro_datasets.snowflake.SnowparkTableDataset
- kedro_datasets.spark.DeltaTableDataSet
kedro_datasets.spark.DeltaTableDataset
- kedro_datasets.spark.SparkDataSet
kedro_datasets.spark.SparkDataset
- kedro_datasets.spark.SparkHiveDataSet
kedro_datasets.spark.SparkHiveDataset
- kedro_datasets.spark.SparkJDBCDataSet
kedro_datasets.spark.SparkJDBCDataset
- kedro_datasets.spark.SparkStreamingDataSet
kedro_datasets.spark.SparkStreamingDataset
- kedro_datasets.svmlight.SVMLightDataSet
kedro_datasets.svmlight.SVMLightDataset
- kedro_datasets.tensorflow.TensorFlowModelDataSet
kedro_datasets.tensorflow.TensorFlowModelDataset
- kedro_datasets.text.TextDataSet
kedro_datasets.text.TextDataset
- kedro_datasets.tracking.JSONDataSet
kedro_datasets.tracking.JSONDataset
- kedro_datasets.tracking.MetricsDataSet
kedro_datasets.tracking.MetricsDataset
- kedro_datasets.video.VideoDataSet
kedro_datasets.video.VideoDataset
- kedro_datasets.yaml.YAMLDataSet
kedro_datasets.yaml.YAMLDataset
diff --git a/docs/source/tutorial/set_up_data.md b/docs/source/tutorial/set_up_data.md
index 2315f04068..dfd1c5089b 100644
--- a/docs/source/tutorial/set_up_data.md
+++ b/docs/source/tutorial/set_up_data.md
@@ -28,11 +28,11 @@ Open `conf/base/catalog.yml` for the spaceflights project to inspect the content
```yaml
companies:
- type: pandas.CSVDataSet
+ type: pandas.CSVDataset
filepath: data/01_raw/companies.csv
reviews:
- type: pandas.CSVDataSet
+ type: pandas.CSVDataset
filepath: data/01_raw/reviews.csv
```
@@ -44,7 +44,7 @@ Likewise for the `xlsx` dataset:
```yaml
shuttles:
- type: pandas.ExcelDataSet
+ type: pandas.ExcelDataset
filepath: data/01_raw/shuttles.xlsx
load_args:
engine: openpyxl # Use modern Excel engine (the default since Kedro 0.18.0)
@@ -75,7 +75,7 @@ companies.head()
Click to expand
```
-INFO Loading data from 'companies' (CSVDataSet)
+INFO Loading data from 'companies' (CSVDataset)
Out[1]:
id company_rating company_location total_fleet_count iata_approved
0 35029 100% Niue 4.0 f
@@ -100,7 +100,7 @@ You should see output such as the following:
Click to expand
```
-INFO Loading data from 'shuttles' (ExcelDataSet)
+INFO Loading data from 'shuttles' (ExcelDataset)
Out[1]:
id shuttle_location shuttle_type engine_type ... d_check_complete moon_clearance_complete price company_id
0 63561 Niue Type V5 Quantum ... f f $1,325.0 35029
diff --git a/features/environment.py b/features/environment.py
index 96316cf07f..adaa8ddc4a 100644
--- a/features/environment.py
+++ b/features/environment.py
@@ -118,6 +118,6 @@ def _install_project_requirements(context):
.splitlines()
)
install_reqs = [req for req in install_reqs if "{" not in req and "#" not in req]
- install_reqs.append("kedro-datasets[pandas.CSVDataSet]")
+ install_reqs.append("kedro-datasets[pandas.CSVDataset]")
call([context.pip, "install", *install_reqs], env=context.env)
return context
diff --git a/features/steps/e2e_test_catalog.yml b/features/steps/e2e_test_catalog.yml
index 49cfe5450e..146714c761 100644
--- a/features/steps/e2e_test_catalog.yml
+++ b/features/steps/e2e_test_catalog.yml
@@ -1,20 +1,20 @@
A:
- type: pandas.CSVDataSet
+ type: pandas.CSVDataset
filepath: data/01_raw/input_1.csv
save_args:
index: False
C:
- type: pandas.CSVDataSet
+ type: pandas.CSVDataset
filepath: data/01_raw/input_2.csv
save_args:
index: False
E:
- type: pandas.CSVDataSet
+ type: pandas.CSVDataset
filepath: data/02_intermediate/output_1.csv
save_args:
index: False
F:
- type: pandas.CSVDataSet
+ type: pandas.CSVDataset
filepath: data/02_intermediate/output_2.csv
save_args:
index: False
diff --git a/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml b/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml
index c0c61a3a2c..4d6170963e 100644
--- a/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml
+++ b/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml
@@ -8,11 +8,11 @@
# An example data set definition can look as follows:
#
#bikes:
-# type: pandas.CSVDataSet
+# type: pandas.CSVDataset
# filepath: "data/01_raw/bikes.csv"
#
#weather:
-# type: spark.SparkDataSet
+# type: spark.SparkDataset
# filepath: s3a://your_bucket/data/01_raw/weather*
# file_format: csv
# credentials: dev_s3
@@ -24,7 +24,7 @@
# header: True
#
#scooters:
-# type: pandas.SQLTableDataSet
+# type: pandas.SQLTableDataset
# credentials: scooters_credentials
# table_name: scooters
# load_args:
@@ -35,7 +35,7 @@
# # if_exists: 'fail'
# # if_exists: 'append'
#
-# The Data Catalog supports being able to reference the same file using two different DataSet implementations
+# The Data Catalog supports being able to reference the same file using two different dataset implementations
# (transcoding), templating and a way to reuse arguments that are frequently repeated. See more here:
# https://kedro.readthedocs.io/en/stable/data/data_catalog.html
#
@@ -43,5 +43,5 @@
# template. Please feel free to remove it once you remove the example pipeline.
example_iris_data:
- type: pandas.CSVDataSet
+ type: pandas.CSVDataset
filepath: data/01_raw/iris.csv
diff --git a/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt b/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt
index 39d93ecd53..4d3bf56da4 100644
--- a/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt
+++ b/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt
@@ -5,7 +5,7 @@ jupyter~=1.0
jupyterlab_server>=2.11.1, <2.16.0
jupyterlab~=3.0, <3.6.0
kedro~={{ cookiecutter.kedro_version}}
-kedro-datasets[pandas.CSVDataSet]
+kedro-datasets[pandas.CSVDataset]
kedro-telemetry~=0.2.0
pytest-cov~=3.0
pytest-mock>=1.7.1, <2.0
diff --git a/kedro/config/templated_config.py b/kedro/config/templated_config.py
index 615b75fdda..8a4e3835a4 100644
--- a/kedro/config/templated_config.py
+++ b/kedro/config/templated_config.py
@@ -63,8 +63,8 @@ class TemplatedConfigLoader(AbstractConfigLoader):
environment: "dev"
datasets:
- csv: "pandas.CSVDataSet"
- spark: "spark.SparkDataSet"
+ csv: "pandas.CSVDataset"
+ spark: "spark.SparkDataset"
folders:
raw: "01_raw"
diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index 26d4c3619c..ad1fc1f99f 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -3,7 +3,7 @@
"""
from __future__ import annotations
-from .cached_dataset import CachedDataSet, CachedDataset
+from .cached_dataset import CachedDataset
from .core import (
AbstractDataset,
AbstractVersionedDataset,
@@ -13,52 +13,24 @@
Version,
)
from .data_catalog import DataCatalog
-from .lambda_dataset import LambdaDataSet, LambdaDataset
-from .memory_dataset import MemoryDataSet, MemoryDataset
+from .lambda_dataset import LambdaDataset
+from .memory_dataset import MemoryDataset
from .partitioned_dataset import (
- IncrementalDataSet,
IncrementalDataset,
- PartitionedDataSet,
PartitionedDataset,
)
-# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
-DataSetError: type[DatasetError]
-DataSetNotFoundError: type[DatasetNotFoundError]
-DataSetAlreadyExistsError: type[DatasetAlreadyExistsError]
-AbstractDataSet: type[AbstractDataset]
-AbstractVersionedDataSet: type[AbstractVersionedDataset]
-
-
-def __getattr__(name):
- import kedro.io.core # noqa: import-outside-toplevel
-
- if name in (kedro.io.core._DEPRECATED_CLASSES): # noqa: protected-access
- return getattr(kedro.io.core, name)
- raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}")
-
-
__all__ = [
- "AbstractDataSet",
"AbstractDataset",
- "AbstractVersionedDataSet",
"AbstractVersionedDataset",
- "CachedDataSet",
"CachedDataset",
"DataCatalog",
- "DataSetAlreadyExistsError",
"DatasetAlreadyExistsError",
- "DataSetError",
"DatasetError",
- "DataSetNotFoundError",
"DatasetNotFoundError",
- "IncrementalDataSet",
"IncrementalDataset",
- "LambdaDataSet",
"LambdaDataset",
- "MemoryDataSet",
"MemoryDataset",
- "PartitionedDataSet",
"PartitionedDataset",
"Version",
]
diff --git a/kedro/io/cached_dataset.py b/kedro/io/cached_dataset.py
index 6ec2a59fb7..e0935c8100 100644
--- a/kedro/io/cached_dataset.py
+++ b/kedro/io/cached_dataset.py
@@ -5,15 +5,11 @@
from __future__ import annotations
import logging
-import warnings
from typing import Any
from kedro.io.core import VERSIONED_FLAG_KEY, AbstractDataset, Version
from kedro.io.memory_dataset import MemoryDataset
-# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
-CachedDataSet: type[CachedDataset]
-
class CachedDataset(AbstractDataset):
"""``CachedDataset`` is a dataset wrapper which caches in memory the data saved,
@@ -121,16 +117,3 @@ def __getstate__(self):
logging.getLogger(__name__).warning("%s: clearing cache to pickle.", str(self))
self._cache.release()
return self.__dict__
-
-
-def __getattr__(name):
- if name == "CachedDataSet":
- alias = CachedDataset
- warnings.warn(
- f"{repr(name)} has been renamed to {repr(alias.__name__)}, "
- f"and the alias will be removed in Kedro 0.19.0",
- DeprecationWarning,
- stacklevel=2,
- )
- return alias
- raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}")
diff --git a/kedro/io/core.py b/kedro/io/core.py
index 66dba46495..38307f58e4 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -29,13 +29,6 @@
PROTOCOL_DELIMITER = "://"
CLOUD_PROTOCOLS = ("s3", "s3n", "s3a", "gcs", "gs", "adl", "abfs", "abfss", "gdrive")
-# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
-DataSetError: type[DatasetError]
-DataSetNotFoundError: type[DatasetNotFoundError]
-DataSetAlreadyExistsError: type[DatasetAlreadyExistsError]
-AbstractDataSet: type[AbstractDataset]
-AbstractVersionedDataSet: type[AbstractVersionedDataset]
-
class DatasetError(Exception):
"""``DatasetError`` raised by ``AbstractDataset`` implementations
@@ -757,25 +750,3 @@ def validate_on_forbidden_chars(**kwargs):
raise DatasetError(
f"Neither white-space nor semicolon are allowed in '{key}'."
)
-
-
-_DEPRECATED_CLASSES = {
- "DataSetError": DatasetError,
- "DataSetNotFoundError": DatasetNotFoundError,
- "DataSetAlreadyExistsError": DatasetAlreadyExistsError,
- "AbstractDataSet": AbstractDataset,
- "AbstractVersionedDataSet": AbstractVersionedDataset,
-}
-
-
-def __getattr__(name):
- if name in _DEPRECATED_CLASSES:
- alias = _DEPRECATED_CLASSES[name]
- warnings.warn(
- f"{repr(name)} has been renamed to {repr(alias.__name__)}, "
- f"and the alias will be removed in Kedro 0.19.0",
- DeprecationWarning,
- stacklevel=2,
- )
- return alias
- raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}")
diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py
index 58aebfe73d..443d28b7cb 100644
--- a/kedro/io/data_catalog.py
+++ b/kedro/io/data_catalog.py
@@ -176,9 +176,9 @@ def __init__( # noqa: too-many-arguments
Example:
::
- >>> from kedro_datasets.pandas import CSVDataSet
+ >>> from kedro_datasets.pandas import CSVDataset
>>>
- >>> cars = CSVDataSet(filepath="cars.csv",
+ >>> cars = CSVDataset(filepath="cars.csv",
>>> load_args=None,
>>> save_args={"index": False})
>>> io = DataCatalog(datasets={'cars': cars})
@@ -246,14 +246,14 @@ class to be loaded is specified with the key ``type`` and their
>>> config = {
>>> "cars": {
- >>> "type": "pandas.CSVDataSet",
+ >>> "type": "pandas.CSVDataset",
>>> "filepath": "cars.csv",
>>> "save_args": {
>>> "index": False
>>> }
>>> },
>>> "boats": {
- >>> "type": "pandas.CSVDataSet",
+ >>> "type": "pandas.CSVDataset",
>>> "filepath": "s3://aws-bucket-name/boats.csv",
>>> "credentials": "boats_credentials",
>>> "save_args": {
@@ -484,9 +484,9 @@ def load(self, name: str, version: str = None) -> Any:
::
>>> from kedro.io import DataCatalog
- >>> from kedro_datasets.pandas import CSVDataSet
+ >>> from kedro_datasets.pandas import CSVDataset
>>>
- >>> cars = CSVDataSet(filepath="cars.csv",
+ >>> cars = CSVDataset(filepath="cars.csv",
>>> load_args=None,
>>> save_args={"index": False})
>>> io = DataCatalog(datasets={'cars': cars})
@@ -524,9 +524,9 @@ def save(self, name: str, data: Any) -> None:
>>> import pandas as pd
>>>
- >>> from kedro_datasets.pandas import CSVDataSet
+ >>> from kedro_datasets.pandas import CSVDataset
>>>
- >>> cars = CSVDataSet(filepath="cars.csv",
+ >>> cars = CSVDataset(filepath="cars.csv",
>>> load_args=None,
>>> save_args={"index": False})
>>> io = DataCatalog(datasets={'cars': cars})
@@ -598,13 +598,13 @@ def add(
Example:
::
- >>> from kedro_datasets.pandas import CSVDataSet
+ >>> from kedro_datasets.pandas import CSVDataset
>>>
>>> io = DataCatalog(datasets={
- >>> 'cars': CSVDataSet(filepath="cars.csv")
+ >>> 'cars': CSVDataset(filepath="cars.csv")
>>> })
>>>
- >>> io.add("boats", CSVDataSet(filepath="boats.csv"))
+ >>> io.add("boats", CSVDataset(filepath="boats.csv"))
"""
if dataset_name in self._datasets:
if replace:
@@ -634,14 +634,14 @@ def add_all(
Example:
::
- >>> from kedro_datasets.pandas import CSVDataSet, ParquetDataSet
+ >>> from kedro_datasets.pandas import CSVDataset, ParquetDataset
>>>
>>> io = DataCatalog(datasets={
- >>> "cars": CSVDataSet(filepath="cars.csv")
+ >>> "cars": CSVDataset(filepath="cars.csv")
>>> })
>>> additional = {
- >>> "planes": ParquetDataSet("planes.parq"),
- >>> "boats": CSVDataSet(filepath="boats.csv")
+ >>> "planes": ParquetDataset("planes.parq"),
+ >>> "boats": CSVDataset(filepath="boats.csv")
>>> }
>>>
>>> io.add_all(additional)
diff --git a/kedro/io/lambda_dataset.py b/kedro/io/lambda_dataset.py
index b5ec9f6232..bef5146ee7 100644
--- a/kedro/io/lambda_dataset.py
+++ b/kedro/io/lambda_dataset.py
@@ -4,14 +4,10 @@
"""
from __future__ import annotations
-import warnings
from typing import Any, Callable
from kedro.io.core import AbstractDataset, DatasetError
-# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
-LambdaDataSet: type[LambdaDataset]
-
class LambdaDataset(AbstractDataset):
"""``LambdaDataset`` loads and saves data to a data set.
@@ -121,16 +117,3 @@ def __init__( # noqa: too-many-arguments
self.__exists = exists
self.__release = release
self.metadata = metadata
-
-
-def __getattr__(name):
- if name == "LambdaDataSet":
- alias = LambdaDataset
- warnings.warn(
- f"{repr(name)} has been renamed to {repr(alias.__name__)}, "
- f"and the alias will be removed in Kedro 0.19.0",
- DeprecationWarning,
- stacklevel=2,
- )
- return alias
- raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}")
diff --git a/kedro/io/memory_dataset.py b/kedro/io/memory_dataset.py
index 5e52e6e1bd..5b1075fdb0 100644
--- a/kedro/io/memory_dataset.py
+++ b/kedro/io/memory_dataset.py
@@ -3,16 +3,12 @@
from __future__ import annotations
import copy
-import warnings
from typing import Any
from kedro.io.core import AbstractDataset, DatasetError
_EMPTY = object()
-# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
-MemoryDataSet: type[MemoryDataset]
-
class MemoryDataset(AbstractDataset):
"""``MemoryDataset`` loads and saves data from/to an in-memory
@@ -140,16 +136,3 @@ def _copy_with_mode(data: Any, copy_mode: str) -> Any:
)
return copied_data
-
-
-def __getattr__(name):
- if name == "MemoryDataSet":
- alias = MemoryDataset
- warnings.warn(
- f"{repr(name)} has been renamed to {repr(alias.__name__)}, "
- f"and the alias will be removed in Kedro 0.19.0",
- DeprecationWarning,
- stacklevel=2,
- )
- return alias
- raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}")
diff --git a/kedro/io/partitioned_dataset.py b/kedro/io/partitioned_dataset.py
index 4b3e9eccb3..08a0a8569b 100644
--- a/kedro/io/partitioned_dataset.py
+++ b/kedro/io/partitioned_dataset.py
@@ -31,10 +31,6 @@
S3_PROTOCOLS = ("s3", "s3a", "s3n")
-# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
-PartitionedDataSet: type[PartitionedDataset]
-IncrementalDataSet: type[IncrementalDataset]
-
class PartitionedDataset(AbstractDataset):
# noqa: too-many-instance-attributes,protected-access
@@ -379,7 +375,7 @@ class IncrementalDataset(PartitionedDataset):
>>> dataset.load()
"""
- DEFAULT_CHECKPOINT_TYPE = "kedro_datasets.text.TextDataSet" # TODO: PartitionedDataset should move to kedro-datasets
+ DEFAULT_CHECKPOINT_TYPE = "kedro_datasets.text.TextDataset"
DEFAULT_CHECKPOINT_FILENAME = "CHECKPOINT"
def __init__( # noqa: too-many-arguments
@@ -554,22 +550,3 @@ def confirm(self) -> None:
partition_ids = [self._path_to_partition(p) for p in self._list_partitions()]
if partition_ids:
self._checkpoint.save(partition_ids[-1]) # checkpoint to last partition
-
-
-_DEPRECATED_CLASSES = {
- "PartitionedDataSet": PartitionedDataset,
- "IncrementalDataSet": IncrementalDataset,
-}
-
-
-def __getattr__(name):
- if name in _DEPRECATED_CLASSES:
- alias = _DEPRECATED_CLASSES[name]
- warnings.warn(
- f"{repr(name)} has been renamed to {repr(alias.__name__)}, "
- f"and the alias will be removed in Kedro 0.19.0",
- DeprecationWarning,
- stacklevel=2,
- )
- return alias
- raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}")
diff --git a/kedro/runner/parallel_runner.py b/kedro/runner/parallel_runner.py
index 07c8824274..5f306f2e3e 100644
--- a/kedro/runner/parallel_runner.py
+++ b/kedro/runner/parallel_runner.py
@@ -7,7 +7,6 @@
import os
import pickle
import sys
-import warnings
from collections import Counter
from concurrent.futures import FIRST_COMPLETED, ProcessPoolExecutor, wait
from itertools import chain
@@ -32,9 +31,6 @@
# see https://github.com/python/cpython/blob/master/Lib/concurrent/futures/process.py#L114
_MAX_WINDOWS_WORKERS = 61
-# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
-_SharedMemoryDataSet: type[_SharedMemoryDataset]
-
class _SharedMemoryDataset:
"""``_SharedMemoryDataset`` is a wrapper class for a shared MemoryDataset in SyncManager.
@@ -73,19 +69,6 @@ def save(self, data: Any):
raise exc
-def __getattr__(name):
- if name == "_SharedMemoryDataSet":
- alias = _SharedMemoryDataset
- warnings.warn(
- f"{repr(name)} has been renamed to {repr(alias.__name__)}, "
- f"and the alias will be removed in Kedro 0.19.0",
- DeprecationWarning,
- stacklevel=2,
- )
- return alias
- raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}")
-
-
class ParallelRunnerManager(SyncManager):
"""``ParallelRunnerManager`` is used to create shared ``MemoryDataset``
objects as default data sets in a pipeline.
diff --git a/tests/config/test_config.py b/tests/config/test_config.py
index fd34f8edf8..934eab0639 100644
--- a/tests/config/test_config.py
+++ b/tests/config/test_config.py
@@ -40,9 +40,9 @@ def _write_dummy_ini(filepath: Path):
def base_config(tmp_path):
filepath = str(tmp_path / "cars.csv")
return {
- "trains": {"type": "MemoryDataSet"},
+ "trains": {"type": "MemoryDataset"},
"cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": filepath,
"save_args": {"index": True},
},
@@ -54,11 +54,11 @@ def local_config(tmp_path):
filepath = str(tmp_path / "cars.csv")
return {
"cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": filepath,
"save_args": {"index": False},
},
- "boats": {"type": "MemoryDataSet"},
+ "boats": {"type": "MemoryDataset"},
}
@@ -85,7 +85,7 @@ def proj_catalog(tmp_path, base_config):
@pytest.fixture
def proj_catalog_nested(tmp_path):
path = tmp_path / _BASE_ENV / "catalog" / "dir" / "nested.yml"
- _write_yaml(path, {"nested": {"type": "MemoryDataSet"}})
+ _write_yaml(path, {"nested": {"type": "MemoryDataset"}})
use_config_dir = pytest.mark.usefixtures("create_config_dir")
@@ -101,9 +101,9 @@ def test_load_core_config_dict_get(self, tmp_path):
catalog = conf["catalog"]
assert params["param1"] == 1
- assert catalog["trains"]["type"] == "MemoryDataSet"
- assert catalog["cars"]["type"] == "pandas.CSVDataSet"
- assert catalog["boats"]["type"] == "MemoryDataSet"
+ assert catalog["trains"]["type"] == "MemoryDataset"
+ assert catalog["cars"]["type"] == "pandas.CSVDataset"
+ assert catalog["boats"]["type"] == "MemoryDataset"
assert not catalog["cars"]["save_args"]["index"]
@use_config_dir
@@ -118,9 +118,9 @@ def test_load_local_config(self, tmp_path):
assert params["param1"] == 1
assert db_conf["prod"]["url"] == "postgresql://user:pass@url_prod/db"
- assert catalog["trains"]["type"] == "MemoryDataSet"
- assert catalog["cars"]["type"] == "pandas.CSVDataSet"
- assert catalog["boats"]["type"] == "MemoryDataSet"
+ assert catalog["trains"]["type"] == "MemoryDataset"
+ assert catalog["cars"]["type"] == "pandas.CSVDataset"
+ assert catalog["boats"]["type"] == "MemoryDataset"
assert not catalog["cars"]["save_args"]["index"]
@use_proj_catalog
@@ -159,9 +159,9 @@ def test_nested(self, tmp_path):
config_loader.default_run_env = ""
catalog = config_loader.get("catalog*", "catalog*/**")
assert catalog.keys() == {"cars", "trains", "nested"}
- assert catalog["cars"]["type"] == "pandas.CSVDataSet"
+ assert catalog["cars"]["type"] == "pandas.CSVDataset"
assert catalog["cars"]["save_args"]["index"] is True
- assert catalog["nested"]["type"] == "MemoryDataSet"
+ assert catalog["nested"]["type"] == "MemoryDataset"
@use_config_dir
def test_nested_subdirs_duplicate(self, tmp_path, base_config):
@@ -322,7 +322,7 @@ def test_yaml_parser_error(self, tmp_path):
example_catalog = """
example_iris_data:
- type: pandas.CSVDataSet
+ type: pandas.CSVDataset
filepath: data/01_raw/iris.csv
"""
@@ -359,7 +359,7 @@ def test_adding_extra_keys_to_confloader(self, tmp_path):
catalog = conf["catalog"]
conf["spark"] = {"spark_config": "emr.blabla"}
- assert catalog["trains"]["type"] == "MemoryDataSet"
+ assert catalog["trains"]["type"] == "MemoryDataset"
assert conf["spark"] == {"spark_config": "emr.blabla"}
@use_config_dir
diff --git a/tests/config/test_omegaconf_config.py b/tests/config/test_omegaconf_config.py
index 4a99458f19..162cdecb0b 100644
--- a/tests/config/test_omegaconf_config.py
+++ b/tests/config/test_omegaconf_config.py
@@ -46,9 +46,9 @@ def _write_dummy_ini(filepath: Path):
def base_config(tmp_path):
filepath = str(tmp_path / "cars.csv")
return {
- "trains": {"type": "MemoryDataSet"},
+ "trains": {"type": "MemoryDataset"},
"cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": filepath,
"save_args": {"index": True},
},
@@ -60,11 +60,11 @@ def local_config(tmp_path):
filepath = str(tmp_path / "cars.csv")
return {
"cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": filepath,
"save_args": {"index": False},
},
- "boats": {"type": "MemoryDataSet"},
+ "boats": {"type": "MemoryDataset"},
}
@@ -104,7 +104,7 @@ def proj_catalog(tmp_path, base_config):
@pytest.fixture
def proj_catalog_nested(tmp_path):
path = tmp_path / _BASE_ENV / "catalog" / "dir" / "nested.yml"
- _write_yaml(path, {"nested": {"type": "MemoryDataSet"}})
+ _write_yaml(path, {"nested": {"type": "MemoryDataset"}})
@pytest.fixture
@@ -138,7 +138,7 @@ def test_load_core_config_dict_syntax(self, tmp_path):
catalog = conf["catalog"]
assert params["param1"] == 1
- assert catalog["trains"]["type"] == "MemoryDataSet"
+ assert catalog["trains"]["type"] == "MemoryDataset"
@use_config_dir
def test_load_core_config_get_syntax(self, tmp_path):
@@ -148,7 +148,7 @@ def test_load_core_config_get_syntax(self, tmp_path):
catalog = conf.get("catalog")
assert params["param1"] == 1
- assert catalog["trains"]["type"] == "MemoryDataSet"
+ assert catalog["trains"]["type"] == "MemoryDataset"
@use_config_dir
def test_load_local_config_overrides_base(self, tmp_path):
@@ -159,9 +159,9 @@ def test_load_local_config_overrides_base(self, tmp_path):
catalog = conf["catalog"]
assert params["param1"] == 1
- assert catalog["trains"]["type"] == "MemoryDataSet"
- assert catalog["cars"]["type"] == "pandas.CSVDataSet"
- assert catalog["boats"]["type"] == "MemoryDataSet"
+ assert catalog["trains"]["type"] == "MemoryDataset"
+ assert catalog["cars"]["type"] == "pandas.CSVDataset"
+ assert catalog["boats"]["type"] == "MemoryDataset"
assert not catalog["cars"]["save_args"]["index"]
@use_proj_catalog
@@ -204,9 +204,9 @@ def test_nested(self, tmp_path):
catalog = config_loader["catalog"]
assert catalog.keys() == {"cars", "trains", "nested"}
- assert catalog["cars"]["type"] == "pandas.CSVDataSet"
+ assert catalog["cars"]["type"] == "pandas.CSVDataset"
assert catalog["cars"]["save_args"]["index"] is True
- assert catalog["nested"]["type"] == "MemoryDataSet"
+ assert catalog["nested"]["type"] == "MemoryDataset"
@use_config_dir
def test_nested_subdirs_duplicate(self, tmp_path, base_config):
@@ -384,7 +384,7 @@ def test_yaml_parser_error(self, tmp_path):
example_catalog = """
example_iris_data:
- type: pandas.CSVDataSet
+ type: pandas.CSVDataset
filepath: data/01_raw/iris.csv
"""
@@ -460,7 +460,7 @@ def test_adding_extra_keys_to_confloader(self, tmp_path):
catalog = conf["catalog"]
conf["spark"] = {"spark_config": "emr.blabla"}
- assert catalog["trains"]["type"] == "MemoryDataSet"
+ assert catalog["trains"]["type"] == "MemoryDataset"
assert conf["spark"] == {"spark_config": "emr.blabla"}
@use_config_dir
@@ -530,7 +530,7 @@ def test_load_config_from_tar_file(self, tmp_path):
conf = OmegaConfigLoader(conf_source=f"{tmp_path}/tar_conf.tar.gz")
catalog = conf["catalog"]
- assert catalog["trains"]["type"] == "MemoryDataSet"
+ assert catalog["trains"]["type"] == "MemoryDataset"
@use_config_dir
def test_load_config_from_zip_file(self, tmp_path):
@@ -554,7 +554,7 @@ def zipdir(path, ziph):
conf = OmegaConfigLoader(conf_source=f"{tmp_path}/Python.zip")
catalog = conf["catalog"]
- assert catalog["trains"]["type"] == "MemoryDataSet"
+ assert catalog["trains"]["type"] == "MemoryDataset"
@use_config_dir
def test_variable_interpolation_with_correct_env(self, tmp_path):
@@ -621,13 +621,13 @@ def test_variable_interpolation_in_catalog_with_templates(self, tmp_path):
"type": "${_pandas.type}",
"filepath": "data/01_raw/companies.csv",
},
- "_pandas": {"type": "pandas.CSVDataSet"},
+ "_pandas": {"type": "pandas.CSVDataset"},
}
_write_yaml(base_catalog, catalog_config)
conf = OmegaConfigLoader(str(tmp_path))
conf.default_run_env = ""
- assert conf["catalog"]["companies"]["type"] == "pandas.CSVDataSet"
+ assert conf["catalog"]["companies"]["type"] == "pandas.CSVDataset"
def test_variable_interpolation_in_catalog_with_separate_templates_file(
self, tmp_path
@@ -640,13 +640,13 @@ def test_variable_interpolation_in_catalog_with_separate_templates_file(
}
}
tmp_catalog = tmp_path / _BASE_ENV / "catalog_temp.yml"
- template = {"_pandas": {"type": "pandas.CSVDataSet"}}
+ template = {"_pandas": {"type": "pandas.CSVDataset"}}
_write_yaml(base_catalog, catalog_config)
_write_yaml(tmp_catalog, template)
conf = OmegaConfigLoader(str(tmp_path))
conf.default_run_env = ""
- assert conf["catalog"]["companies"]["type"] == "pandas.CSVDataSet"
+ assert conf["catalog"]["companies"]["type"] == "pandas.CSVDataset"
def test_custom_resolvers(self, tmp_path):
base_params = tmp_path / _BASE_ENV / "parameters.yml"
@@ -696,7 +696,7 @@ def test_globals_resolution(self, tmp_path):
"filepath": "data/01_raw/companies.csv",
},
}
- globals_config = {"x": 34, "dataset_type": "pandas.CSVDataSet"}
+ globals_config = {"x": 34, "dataset_type": "pandas.CSVDataset"}
_write_yaml(base_params, param_config)
_write_yaml(globals_params, globals_config)
_write_yaml(base_catalog, catalog_config)
@@ -883,7 +883,7 @@ def test_runtime_params_resolution(self, tmp_path):
runtime_params = {
"x": 45,
"dataset": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
},
}
param_config = {
@@ -965,12 +965,12 @@ def test_runtime_params_default_global(self, tmp_path):
}
globals_config = {
"dataset": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
}
}
catalog_config = {
"companies": {
- "type": "${runtime_params:type, ${globals:dataset.type, 'MemoryDataSet'}}",
+ "type": "${runtime_params:type, ${globals:dataset.type, 'MemoryDataset'}}",
"filepath": "data/01_raw/companies.csv",
},
}
diff --git a/tests/config/test_templated_config.py b/tests/config/test_templated_config.py
index 9a8edbd0d4..dec4b48ddf 100644
--- a/tests/config/test_templated_config.py
+++ b/tests/config/test_templated_config.py
@@ -41,7 +41,7 @@ def template_config():
"s3_bucket": "s3a://boat-and-car-bucket",
"raw_data_folder": "01_raw",
"boat_file_name": "boats.csv",
- "boat_data_type": "SparkDataSet",
+ "boat_data_type": "SparkDataset",
"string_type": "VARCHAR",
"float_type": "FLOAT",
"write_only_user": "ron",
@@ -55,10 +55,10 @@ def catalog_with_jinja2_syntax(tmp_path):
catalog = """
{% for speed in ['fast', 'slow'] %}
{{ speed }}-trains:
- type: MemoryDataSet
+ type: MemoryDataset
{{ speed }}-cars:
- type: pandas.CSVDataSet
+ type: pandas.CSVDataset
filepath: ${s3_bucket}/{{ speed }}-cars.csv
save_args:
index: true
@@ -86,7 +86,7 @@ def proj_catalog_globals(tmp_path, template_config):
def normal_config_advanced():
return {
"planes": {
- "type": "SparkJDBCDataSet",
+ "type": "SparkJDBCDataset",
"postgres_credentials": {"user": "Fakeuser", "password": "F@keP@55word"},
"batch_size": 10000,
"need_permission": True,
@@ -117,7 +117,7 @@ def param_config_advanced():
@pytest.fixture
def template_config_advanced():
return {
- "plane_data_type": "SparkJDBCDataSet",
+ "plane_data_type": "SparkJDBCDataset",
"credentials": {"user": "Fakeuser", "password": "F@keP@55word"},
"batch_size": 10000,
"permission_param": True,
@@ -215,7 +215,7 @@ def test_get_catalog_config_with_dict_get(self, tmp_path, template_config):
)
config_loader.default_run_env = ""
catalog = config_loader["catalog"]
- assert catalog["boats"]["type"] == "SparkDataSet"
+ assert catalog["boats"]["type"] == "SparkDataset"
@pytest.mark.usefixtures("proj_catalog_param")
def test_catalog_parameterized_w_dict(self, tmp_path, template_config):
@@ -225,7 +225,7 @@ def test_catalog_parameterized_w_dict(self, tmp_path, template_config):
)
config_loader.default_run_env = ""
catalog = config_loader.get("catalog*.yml")
- assert catalog["boats"]["type"] == "SparkDataSet"
+ assert catalog["boats"]["type"] == "SparkDataset"
assert (
catalog["boats"]["filepath"] == "s3a://boat-and-car-bucket/01_raw/boats.csv"
)
@@ -243,7 +243,7 @@ def test_catalog_parameterized_w_globals(self, tmp_path):
str(tmp_path), globals_pattern="*globals.yml"
).get("catalog*.yml")
- assert catalog["boats"]["type"] == "SparkDataSet"
+ assert catalog["boats"]["type"] == "SparkDataset"
assert (
catalog["boats"]["filepath"] == "s3a://boat-and-car-bucket/01_raw/boats.csv"
)
@@ -279,7 +279,7 @@ def test_catalog_advanced(self, tmp_path, normal_config_advanced):
config_loader.default_run_env = ""
catalog = config_loader.get("catalog*.yml")
- assert catalog["planes"]["type"] == "SparkJDBCDataSet"
+ assert catalog["planes"]["type"] == "SparkJDBCDataset"
assert catalog["planes"]["postgres_credentials"]["user"] == "Fakeuser"
assert catalog["planes"]["postgres_credentials"]["password"] == "F@keP@55word"
assert catalog["planes"]["batch_size"] == 10000
@@ -295,7 +295,7 @@ def test_catalog_parameterized_advanced(self, tmp_path, template_config_advanced
config_loader.default_run_env = ""
catalog = config_loader.get("catalog*.yml")
- assert catalog["planes"]["type"] == "SparkJDBCDataSet"
+ assert catalog["planes"]["type"] == "SparkJDBCDataset"
assert catalog["planes"]["postgres_credentials"]["user"] == "Fakeuser"
assert catalog["planes"]["postgres_credentials"]["password"] == "F@keP@55word"
assert catalog["planes"]["batch_size"] == 10000
@@ -312,7 +312,7 @@ def test_catalog_parameterized_w_dict_mixed(self, tmp_path, get_environ):
str(tmp_path), globals_pattern="*globals.yml", globals_dict=get_environ
).get("catalog*.yml")
- assert catalog["boats"]["type"] == "SparkDataSet"
+ assert catalog["boats"]["type"] == "SparkDataset"
assert (
catalog["boats"]["filepath"] == "s3a://boat-and-car-bucket/01_raw/boats.csv"
)
@@ -332,7 +332,7 @@ def test_catalog_parameterized_w_dict_namespaced(
config_loader.default_run_env = ""
catalog = config_loader.get("catalog*.yml")
- assert catalog["boats"]["type"] == "SparkDataSet"
+ assert catalog["boats"]["type"] == "SparkDataset"
assert (
catalog["boats"]["filepath"] == "s3a://boat-and-car-bucket/01_raw/boats.csv"
)
@@ -362,15 +362,15 @@ def test_catalog_with_jinja2_syntax(self, tmp_path, template_config):
config_loader.default_run_env = ""
catalog = config_loader.get("catalog*.yml")
expected_catalog = {
- "fast-trains": {"type": "MemoryDataSet"},
+ "fast-trains": {"type": "MemoryDataset"},
"fast-cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "s3a://boat-and-car-bucket/fast-cars.csv",
"save_args": {"index": True},
},
- "slow-trains": {"type": "MemoryDataSet"},
+ "slow-trains": {"type": "MemoryDataset"},
"slow-cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "s3a://boat-and-car-bucket/slow-cars.csv",
"save_args": {"index": True},
},
@@ -389,15 +389,15 @@ def test_catalog_with_jinja2_syntax_and_globals_file(self, tmp_path):
config_loader.default_run_env = ""
catalog = config_loader.get("catalog*.yml")
expected_catalog = {
- "fast-trains": {"type": "MemoryDataSet"},
+ "fast-trains": {"type": "MemoryDataset"},
"fast-cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "s3a://boat-and-car-bucket/fast-cars.csv",
"save_args": {"index": True},
},
- "slow-trains": {"type": "MemoryDataSet"},
+ "slow-trains": {"type": "MemoryDataset"},
"slow-cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "s3a://boat-and-car-bucket/slow-cars.csv",
"save_args": {"index": True},
},
@@ -492,7 +492,7 @@ def test_adding_extra_keys_to_confloader(self, tmp_path, template_config):
catalog = config_loader["catalog"]
config_loader["spark"] = {"spark_config": "emr.blabla"}
- assert catalog["boats"]["type"] == "SparkDataSet"
+ assert catalog["boats"]["type"] == "SparkDataset"
assert config_loader["spark"] == {"spark_config": "emr.blabla"}
@pytest.mark.usefixtures("proj_catalog_param")
diff --git a/tests/framework/cli/pipeline/test_pipeline.py b/tests/framework/cli/pipeline/test_pipeline.py
index f216d73917..0587dedd7d 100644
--- a/tests/framework/cli/pipeline/test_pipeline.py
+++ b/tests/framework/cli/pipeline/test_pipeline.py
@@ -4,7 +4,7 @@
import pytest
import yaml
from click.testing import CliRunner
-from kedro_datasets.pandas import CSVDataSet
+from kedro_datasets.pandas import CSVDataset
from pandas import DataFrame
from kedro.framework.cli.pipeline import _sync_dirs
@@ -187,7 +187,7 @@ def test_catalog_and_params(
conf_dir = fake_repo_path / settings.CONF_SOURCE / "base"
catalog_dict = {
"ds_from_pipeline": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "data/01_raw/iris.csv",
}
}
@@ -204,7 +204,7 @@ def test_catalog_and_params(
with KedroSession.create(PACKAGE_NAME) as session:
ctx = session.load_context()
- assert isinstance(ctx.catalog._datasets["ds_from_pipeline"], CSVDataSet)
+ assert isinstance(ctx.catalog._datasets["ds_from_pipeline"], CSVDataset)
assert isinstance(ctx.catalog.load("ds_from_pipeline"), DataFrame)
assert ctx.params["params_from_pipeline"] == params_dict["params_from_pipeline"]
diff --git a/tests/framework/cli/test_catalog.py b/tests/framework/cli/test_catalog.py
index f1394c4531..d6342be9bb 100644
--- a/tests/framework/cli/test_catalog.py
+++ b/tests/framework/cli/test_catalog.py
@@ -1,7 +1,7 @@
import pytest
import yaml
from click.testing import CliRunner
-from kedro_datasets.pandas import CSVDataSet
+from kedro_datasets.pandas import CSVDataset
from kedro.io import DataCatalog, MemoryDataset
from kedro.pipeline import node
@@ -32,10 +32,10 @@ def mock_pipelines(mocker):
def fake_catalog_config():
config = {
"parquet_{factory_pattern}": {
- "type": "pandas.ParquetDataSet",
+ "type": "pandas.ParquetDataset",
"filepath": "test.pq",
},
- "csv_{factory_pattern}": {"type": "pandas.CSVDataSet", "filepath": "test.csv"},
+ "csv_{factory_pattern}": {"type": "pandas.CSVDataset", "filepath": "test.csv"},
}
return config
@@ -44,7 +44,7 @@ def fake_catalog_config():
def fake_catalog_with_overlapping_factories():
config = {
"an_example_dataset": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "dummy_filepath",
},
"an_example_{placeholder}": {
@@ -71,13 +71,13 @@ def fake_catalog_with_overlapping_factories():
def fake_catalog_config_with_resolvable_dataset():
config = {
"parquet_{factory_pattern}": {
- "type": "pandas.ParquetDataSet",
+ "type": "pandas.ParquetDataset",
"filepath": "test.pq",
},
- "csv_{factory_pattern}": {"type": "pandas.CSVDataSet", "filepath": "test.csv"},
- "explicit_ds": {"type": "pandas.CSVDataSet", "filepath": "test.csv"},
+ "csv_{factory_pattern}": {"type": "pandas.CSVDataset", "filepath": "test.csv"},
+ "explicit_ds": {"type": "pandas.CSVDataset", "filepath": "test.csv"},
"{factory_pattern}_ds": {
- "type": "pandas.ParquetDataSet",
+ "type": "pandas.ParquetDataset",
"filepath": "test.pq",
},
}
@@ -134,11 +134,11 @@ def test_no_param_datasets_in_respose(
yaml_dump_mock = mocker.patch("yaml.dump", return_value="Result YAML")
mocked_context = fake_load_context.return_value
catalog_datasets = {
- "iris_data": CSVDataSet("test.csv"),
+ "iris_data": CSVDataset("test.csv"),
"intermediate": MemoryDataset(),
"parameters": MemoryDataset(),
"params:data_ratio": MemoryDataset(),
- "not_used": CSVDataSet("test2.csv"),
+ "not_used": CSVDataset("test2.csv"),
}
mocked_context.catalog = DataCatalog(datasets=catalog_datasets)
@@ -177,7 +177,7 @@ def test_default_dataset(
"""
yaml_dump_mock = mocker.patch("yaml.dump", return_value="Result YAML")
mocked_context = fake_load_context.return_value
- catalog_datasets = {"some_dataset": CSVDataSet("test.csv")}
+ catalog_datasets = {"some_dataset": CSVDataset("test.csv")}
mocked_context.catalog = DataCatalog(datasets=catalog_datasets)
mocker.patch.object(
mock_pipelines[PIPELINE_NAME],
@@ -236,8 +236,8 @@ def test_list_factory_generated_datasets(
expected_dict = {
f"Datasets in '{PIPELINE_NAME}' pipeline": {
"Datasets generated from factories": {
- "pandas.CSVDataSet": ["csv_example"],
- "pandas.ParquetDataSet": ["parquet_example"],
+ "pandas.CSVDataset": ["csv_example"],
+ "pandas.ParquetDataset": ["parquet_example"],
}
}
}
@@ -341,8 +341,8 @@ def test_no_missing_datasets(
mocked_context = fake_load_context.return_value
catalog_datasets = {
- "input_data": CSVDataSet("test.csv"),
- "output_data": CSVDataSet("test2.csv"),
+ "input_data": CSVDataset("test.csv"),
+ "output_data": CSVDataset("test2.csv"),
}
mocked_context.catalog = DataCatalog(datasets=catalog_datasets)
mocked_context.project_path = fake_repo_path
@@ -370,7 +370,7 @@ def test_missing_datasets_appended(
data_catalog_file = catalog_path / f"catalog_{self.PIPELINE_NAME}.yml"
catalog_config = {
- "example_test_x": {"type": "pandas.CSVDataSet", "filepath": "test.csv"}
+ "example_test_x": {"type": "pandas.CSVDataset", "filepath": "test.csv"}
}
with data_catalog_file.open(mode="w") as catalog_file:
yaml.safe_dump(catalog_config, catalog_file, default_flow_style=False)
@@ -445,9 +445,9 @@ def test_rank_catalog_factories_with_no_factories(
mocked_context = fake_load_context.return_value
catalog_datasets = {
- "iris_data": CSVDataSet("test.csv"),
+ "iris_data": CSVDataset("test.csv"),
"intermediate": MemoryDataset(),
- "not_used": CSVDataSet("test2.csv"),
+ "not_used": CSVDataset("test2.csv"),
}
mocked_context.catalog = DataCatalog(datasets=catalog_datasets)
@@ -555,14 +555,14 @@ def test_no_param_datasets_in_resolve(
catalog_config = {
"iris_data": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "test.csv",
},
- "intermediate": {"type": "MemoryDataSet"},
+ "intermediate": {"type": "MemoryDataset"},
}
catalog_datasets = {
- "iris_data": CSVDataSet("test.csv"),
+ "iris_data": CSVDataset("test.csv"),
"intermediate": MemoryDataset(),
"parameters": MemoryDataset(),
"params:data_ratio": MemoryDataset(),
diff --git a/tests/framework/context/test_context.py b/tests/framework/context/test_context.py
index 794cdb1fa7..7032b1ecef 100644
--- a/tests/framework/context/test_context.py
+++ b/tests/framework/context/test_context.py
@@ -73,9 +73,9 @@ def base_config(tmp_path):
trains_filepath = (tmp_path / "trains.csv").as_posix()
return {
- "trains": {"type": "pandas.CSVDataSet", "filepath": trains_filepath},
+ "trains": {"type": "pandas.CSVDataset", "filepath": trains_filepath},
"cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": cars_filepath,
"save_args": {"index": True},
},
@@ -90,19 +90,19 @@ def local_config(tmp_path):
horses_filepath = "horses.csv"
return {
"cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": cars_filepath,
"save_args": {"index": False},
"versioned": True,
},
"boats": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": boats_filepath,
"versioned": True,
"layer": "raw",
},
"horses": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": horses_filepath,
"versioned": True,
},
diff --git a/tests/framework/session/conftest.py b/tests/framework/session/conftest.py
index c38a363666..c7758d2d89 100644
--- a/tests/framework/session/conftest.py
+++ b/tests/framework/session/conftest.py
@@ -66,13 +66,13 @@ def local_config(tmp_path):
boats_filepath = str(tmp_path / "boats.csv")
return {
"cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": cars_filepath,
"save_args": {"index": False},
"versioned": True,
},
"boats": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": boats_filepath,
"versioned": True,
},
diff --git a/tests/framework/session/test_session_extension_hooks.py b/tests/framework/session/test_session_extension_hooks.py
index 3f407852b9..4e43c80880 100644
--- a/tests/framework/session/test_session_extension_hooks.py
+++ b/tests/framework/session/test_session_extension_hooks.py
@@ -18,7 +18,7 @@
settings,
)
from kedro.framework.session import KedroSession
-from kedro.io import DataCatalog, MemoryDataSet
+from kedro.io import DataCatalog, MemoryDataset
from kedro.pipeline import node, pipeline
from kedro.pipeline.node import Node
from kedro.runner import ParallelRunner
@@ -286,7 +286,7 @@ def test_before_and_after_node_run_hooks_parallel_runner(
assert set(record.outputs.keys()) <= {"planes", "ships"}
-class TestDataSetHooks:
+class TestDatasetHooks:
@pytest.mark.usefixtures("mock_pipelines")
def test_before_and_after_dataset_loaded_hooks_sequential_runner(
self, mock_session, caplog, dummy_dataframe
@@ -554,10 +554,10 @@ def load(self, name: str, version: str = None) -> Any:
@pytest.fixture
def memory_catalog():
- ds1 = MemoryDataSet({"data": 42})
- ds2 = MemoryDataSet({"data": 42})
- ds3 = MemoryDataSet({"data": 42})
- ds4 = MemoryDataSet({"data": 42})
+ ds1 = MemoryDataset({"data": 42})
+ ds2 = MemoryDataset({"data": 42})
+ ds3 = MemoryDataset({"data": 42})
+ ds4 = MemoryDataset({"data": 42})
return LogCatalog({"ds1": ds1, "ds2": ds2, "ds3": ds3, "ds4": ds4})
diff --git a/tests/io/test_cached_dataset.py b/tests/io/test_cached_dataset.py
index 92499de81c..fa93dfaa2f 100644
--- a/tests/io/test_cached_dataset.py
+++ b/tests/io/test_cached_dataset.py
@@ -3,7 +3,7 @@
import pytest
import yaml
-from kedro_datasets.pandas import CSVDataSet
+from kedro_datasets.pandas import CSVDataset
from kedro.io import CachedDataset, DataCatalog, DatasetError, MemoryDataset
@@ -11,7 +11,7 @@
test_ds:
type: CachedDataset
dataset:
- type: kedro_datasets.pandas.CSVDataSet
+ type: kedro_datasets.pandas.CSVDataset
filepath: example.csv
"""
@@ -20,7 +20,7 @@
type: CachedDataset
versioned: true
dataset:
- type: kedro_datasets.pandas.CSVDataSet
+ type: kedro_datasets.pandas.CSVDataset
filepath: example.csv
"""
@@ -28,7 +28,7 @@
test_ds:
type: CachedDataset
dataset:
- type: kedro_datasets.pandas.CSVDataSet
+ type: kedro_datasets.pandas.CSVDataset
filepath: example.csv
versioned: true
"""
@@ -81,7 +81,7 @@ def test_from_yaml(self, mocker):
catalog = DataCatalog.from_config(config)
assert catalog.list() == ["test_ds"]
mock = mocker.Mock()
- assert isinstance(catalog._datasets["test_ds"]._dataset, CSVDataSet)
+ assert isinstance(catalog._datasets["test_ds"]._dataset, CSVDataset)
catalog._datasets["test_ds"]._dataset = mock
catalog.save("test_ds", 20)
diff --git a/tests/io/test_core.py b/tests/io/test_core.py
index 7274a0cd32..77774e7f50 100644
--- a/tests/io/test_core.py
+++ b/tests/io/test_core.py
@@ -1,6 +1,5 @@
from __future__ import annotations
-import importlib
from decimal import Decimal
from fractions import Fraction
from pathlib import PurePosixPath
@@ -9,7 +8,6 @@
import pytest
from kedro.io.core import (
- _DEPRECATED_CLASSES,
AbstractDataset,
_parse_filepath,
get_filepath_str,
@@ -33,14 +31,7 @@
]
-@pytest.mark.parametrize("module_name", ["kedro.io", "kedro.io.core"])
-@pytest.mark.parametrize("class_name", _DEPRECATED_CLASSES)
-def test_deprecation(module_name, class_name):
- with pytest.warns(DeprecationWarning, match=f"{repr(class_name)} has been renamed"):
- getattr(importlib.import_module(module_name), class_name)
-
-
-class MyDataSet(AbstractDataset):
+class MyDataset(AbstractDataset):
def __init__(self, var=None):
self.var = var
@@ -57,10 +48,10 @@ def _save(self, data):
class TestCoreFunctions:
@pytest.mark.parametrize("var", [1, True] + FALSE_BUILTINS)
def test_str_representation(self, var):
- assert str(MyDataSet(var)) == f"MyDataSet(var={var})"
+ assert str(MyDataset(var)) == f"MyDataset(var={var})"
def test_str_representation_none(self):
- assert str(MyDataSet()) == "MyDataSet()"
+ assert str(MyDataset()) == "MyDataset()"
def test_get_filepath_str(self):
path = get_filepath_str(PurePosixPath("example.com/test.csv"), "http")
diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py
index b3d8dc7ef7..afd1707bea 100644
--- a/tests/io/test_data_catalog.py
+++ b/tests/io/test_data_catalog.py
@@ -7,7 +7,7 @@
import pandas as pd
import pytest
-from kedro_datasets.pandas import CSVDataSet, ParquetDataSet
+from kedro_datasets.pandas import CSVDataset, ParquetDataset
from pandas.testing import assert_frame_equal
from kedro.io import (
@@ -42,9 +42,9 @@ def dummy_dataframe():
def sane_config(filepath):
return {
"catalog": {
- "boats": {"type": "pandas.CSVDataSet", "filepath": filepath},
+ "boats": {"type": "pandas.CSVDataset", "filepath": filepath},
"cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "s3://test_bucket/test_file.csv",
"credentials": "s3_credentials",
"layer": "raw",
@@ -78,11 +78,11 @@ def sane_config_with_tracking_ds(tmp_path):
return {
"catalog": {
"boats": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": boat_path,
"versioned": True,
},
- "planes": {"type": "tracking.MetricsDataSet", "filepath": plane_path},
+ "planes": {"type": "tracking.MetricsDataset", "filepath": plane_path},
},
}
@@ -92,15 +92,15 @@ def config_with_dataset_factories():
return {
"catalog": {
"{brand}_cars": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "data/01_raw/{brand}_cars.csv",
},
"audi_cars": {
- "type": "pandas.ParquetDataSet",
+ "type": "pandas.ParquetDataset",
"filepath": "data/01_raw/audi_cars.pq",
},
"{type}_boats": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "data/01_raw/{type}_boats.csv",
},
},
@@ -114,7 +114,7 @@ def config_with_dataset_factories_nested():
"{brand}_cars": {
"type": "PartitionedDataset",
"path": "data/01_raw",
- "dataset": "pandas.CSVDataSet",
+ "dataset": "pandas.CSVDataset",
"metadata": {
"my-plugin": {
"brand": "{brand}",
@@ -133,7 +133,7 @@ def config_with_dataset_factories_nested():
@pytest.fixture
def config_with_dataset_factories_with_default(config_with_dataset_factories):
config_with_dataset_factories["catalog"]["{default_dataset}"] = {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "data/01_raw/{default_dataset}.csv",
}
return config_with_dataset_factories
@@ -142,7 +142,7 @@ def config_with_dataset_factories_with_default(config_with_dataset_factories):
@pytest.fixture
def config_with_dataset_factories_bad_pattern(config_with_dataset_factories):
config_with_dataset_factories["catalog"]["{type}@planes"] = {
- "type": "pandas.ParquetDataSet",
+ "type": "pandas.ParquetDataset",
"filepath": "data/01_raw/{brand}_plane.pq",
}
return config_with_dataset_factories
@@ -153,19 +153,19 @@ def config_with_dataset_factories_only_patterns():
return {
"catalog": {
"{default}": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "data/01_raw/{default}.csv",
},
"{namespace}_{dataset}": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "data/01_raw/{namespace}_{dataset}.pq",
},
"{country}_companies": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "data/01_raw/{country}_companies.csv",
},
"{dataset}s": {
- "type": "pandas.CSVDataSet",
+ "type": "pandas.CSVDataset",
"filepath": "data/01_raw/{dataset}s.csv",
},
},
@@ -174,13 +174,13 @@ def config_with_dataset_factories_only_patterns():
@pytest.fixture
def dataset(filepath):
- return CSVDataSet(filepath=filepath, save_args={"index": False})
+ return CSVDataset(filepath=filepath, save_args={"index": False})
@pytest.fixture
def multi_catalog():
- csv = CSVDataSet(filepath="abc.csv")
- parq = ParquetDataSet(filepath="xyz.parq")
+ csv = CSVDataset(filepath="abc.csv")
+ parq = ParquetDataset(filepath="xyz.parq")
layers = {"raw": {"abc.csv"}, "model": {"xyz.parq"}}
return DataCatalog({"abc": csv, "xyz": parq}, layers=layers)
@@ -364,14 +364,14 @@ def test_eq(self, multi_catalog, data_catalog):
def test_datasets_on_init(self, data_catalog_from_config):
"""Check datasets are loaded correctly on construction"""
- assert isinstance(data_catalog_from_config.datasets.boats, CSVDataSet)
- assert isinstance(data_catalog_from_config.datasets.cars, CSVDataSet)
+ assert isinstance(data_catalog_from_config.datasets.boats, CSVDataset)
+ assert isinstance(data_catalog_from_config.datasets.cars, CSVDataset)
def test_datasets_on_add(self, data_catalog_from_config):
"""Check datasets are updated correctly after adding"""
- data_catalog_from_config.add("new_dataset", CSVDataSet("some_path"))
- assert isinstance(data_catalog_from_config.datasets.new_dataset, CSVDataSet)
- assert isinstance(data_catalog_from_config.datasets.boats, CSVDataSet)
+ data_catalog_from_config.add("new_dataset", CSVDataset("some_path"))
+ assert isinstance(data_catalog_from_config.datasets.new_dataset, CSVDataset)
+ assert isinstance(data_catalog_from_config.datasets.boats, CSVDataset)
def test_adding_datasets_not_allowed(self, data_catalog_from_config):
"""Check error if user tries to update the datasets attribute"""
@@ -454,15 +454,15 @@ def test_config_invalid_module(self, sane_config):
"""Check the error if the type points to nonexistent module"""
sane_config["catalog"]["boats"][
"type"
- ] = "kedro.invalid_module_name.io.CSVDataSet"
+ ] = "kedro.invalid_module_name.io.CSVDataset"
- error_msg = "Class 'kedro.invalid_module_name.io.CSVDataSet' not found"
+ error_msg = "Class 'kedro.invalid_module_name.io.CSVDataset' not found"
with pytest.raises(DatasetError, match=re.escape(error_msg)):
DataCatalog.from_config(**sane_config)
def test_config_relative_import(self, sane_config):
"""Check the error if the type points to a relative import"""
- sane_config["catalog"]["boats"]["type"] = ".CSVDataSetInvalid"
+ sane_config["catalog"]["boats"]["type"] = ".CSVDatasetInvalid"
pattern = "'type' class path does not support relative paths"
with pytest.raises(DatasetError, match=re.escape(pattern)):
@@ -480,20 +480,20 @@ def test_config_import_kedro_datasets(self, sane_config, mocker):
# In Python 3.7 call_args.args is not available thus we access the call
# arguments with less meaningful index.
# The 1st index returns a tuple, the 2nd index return the name of module.
- assert call_args[0][0] == f"{prefix}pandas.CSVDataSet"
+ assert call_args[0][0] == f"{prefix}pandas.CSVDataset"
def test_config_import_extras(self, sane_config):
"""Test kedro_datasets default path to the dataset class"""
- sane_config["catalog"]["boats"]["type"] = "pandas.CSVDataSet"
+ sane_config["catalog"]["boats"]["type"] = "pandas.CSVDataset"
assert DataCatalog.from_config(**sane_config)
def test_config_missing_class(self, sane_config):
"""Check the error if the type points to nonexistent class"""
- sane_config["catalog"]["boats"]["type"] = "kedro.io.CSVDataSetInvalid"
+ sane_config["catalog"]["boats"]["type"] = "kedro.io.CSVDatasetInvalid"
pattern = (
"An exception occurred when parsing config for dataset 'boats':\n"
- "Class 'kedro.io.CSVDataSetInvalid' not found"
+ "Class 'kedro.io.CSVDatasetInvalid' not found"
)
with pytest.raises(DatasetError, match=re.escape(pattern)):
DataCatalog.from_config(**sane_config)
@@ -570,10 +570,10 @@ def test_missing_dependency(self, sane_config, mocker):
pattern = "dependency issue"
def dummy_load(obj_path, *args, **kwargs):
- if obj_path == "kedro_datasets.pandas.CSVDataSet":
+ if obj_path == "kedro_datasets.pandas.CSVDataset":
raise AttributeError(pattern)
if obj_path == "kedro_datasets.pandas.__all__":
- return ["CSVDataSet"]
+ return ["CSVDataset"]
mocker.patch("kedro.io.core.load_obj", side_effect=dummy_load)
with pytest.raises(DatasetError, match=pattern):
@@ -598,7 +598,7 @@ def test_confirm(self, tmp_path, caplog, mocker):
catalog = {
"ds_to_confirm": {
"type": "IncrementalDataset",
- "dataset": "pandas.CSVDataSet",
+ "dataset": "pandas.CSVDataset",
"path": str(tmp_path),
}
}
@@ -748,7 +748,7 @@ def test_load_version_on_unversioned_dataset(
def test_replacing_nonword_characters(self):
"""Test replacing non-word characters in dataset names"""
- csv = CSVDataSet(filepath="abc.csv")
+ csv = CSVDataset(filepath="abc.csv")
datasets = {"ds1@spark": csv, "ds2_spark": csv, "ds3.csv": csv, "jalapeƱo": csv}
catalog = DataCatalog(datasets=datasets)
@@ -767,7 +767,7 @@ def test_no_versions_with_cloud_protocol(self, monkeypatch):
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "dummmy")
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "dummmy")
version = Version(load=None, save=None)
- versioned_dataset = CSVDataSet("s3://bucket/file.csv", version=version)
+ versioned_dataset = CSVDataset("s3://bucket/file.csv", version=version)
pattern = re.escape(
f"Did not find any versions for {versioned_dataset}. "
f"This could be due to insufficient permission."
@@ -785,7 +785,7 @@ def test_match_added_to_datasets_on_get(self, config_with_dataset_factories):
assert "{brand}_cars" in catalog._dataset_patterns
tesla_cars = catalog._get_dataset("tesla_cars")
- assert isinstance(tesla_cars, CSVDataSet)
+ assert isinstance(tesla_cars, CSVDataset)
assert "tesla_cars" in catalog._datasets
@pytest.mark.parametrize(
@@ -818,7 +818,7 @@ def test_explicit_entry_not_overwritten(self, config_with_dataset_factories):
"""Check that the existing catalog entry is not overwritten by config in pattern"""
catalog = DataCatalog.from_config(**config_with_dataset_factories)
audi_cars = catalog._get_dataset("audi_cars")
- assert isinstance(audi_cars, ParquetDataSet)
+ assert isinstance(audi_cars, ParquetDataset)
@pytest.mark.parametrize(
"dataset_name,pattern",
@@ -859,7 +859,7 @@ def test_default_dataset(self, config_with_dataset_factories_with_default, caplo
"in the catalog will be used to override the default "
"MemoryDataset creation for the dataset 'jet@planes'" in log_record.message
)
- assert isinstance(jet_dataset, CSVDataSet)
+ assert isinstance(jet_dataset, CSVDataset)
def test_unmatched_key_error_when_parsing_config(
self, config_with_dataset_factories_bad_pattern
diff --git a/tests/io/test_incremental_dataset.py b/tests/io/test_incremental_dataset.py
index db9421e886..c36c6b62f9 100644
--- a/tests/io/test_incremental_dataset.py
+++ b/tests/io/test_incremental_dataset.py
@@ -8,15 +8,15 @@
import boto3
import pandas as pd
import pytest
-from kedro_datasets.pickle import PickleDataSet
-from kedro_datasets.text import TextDataSet
+from kedro_datasets.pickle import PickleDataset
+from kedro_datasets.text import TextDataset
from moto import mock_s3
from pandas.testing import assert_frame_equal
from kedro.io import AbstractDataset, DatasetError, IncrementalDataset
from kedro.io.data_catalog import CREDENTIALS_KEY
-DATASET = "kedro_datasets.pandas.CSVDataSet"
+DATASET = "kedro_datasets.pandas.CSVDataset"
@pytest.fixture
@@ -226,8 +226,8 @@ def test_checkpoint_path(self, local_csvs, partitioned_data_pandas):
@pytest.mark.parametrize(
"checkpoint_config,expected_checkpoint_class",
[
- (None, TextDataSet),
- ({"type": "kedro_datasets.pickle.PickleDataSet"}, PickleDataSet),
+ (None, TextDataset),
+ ({"type": "kedro_datasets.pickle.PickleDataset"}, PickleDataset),
({"type": "tests.io.test_incremental_dataset.DummyDataset"}, DummyDataset),
],
)
@@ -372,7 +372,7 @@ def mocked_csvs_in_s3(mocked_s3_bucket, partitioned_data_pandas):
return f"s3://{BUCKET_NAME}/{prefix}"
-class TestPartitionedDataSetS3:
+class TestPartitionedDatasetS3:
os.environ["AWS_ACCESS_KEY_ID"] = "FAKE_ACCESS_KEY"
os.environ["AWS_SECRET_ACCESS_KEY"] = "FAKE_SECRET_KEY"
@@ -477,7 +477,7 @@ def test_force_checkpoint_checkpoint_file_exists(
checkpoint_path = (
f"{mocked_csvs_in_s3}/{IncrementalDataset.DEFAULT_CHECKPOINT_FILENAME}"
)
- checkpoint_value = TextDataSet(checkpoint_path).load()
+ checkpoint_value = TextDataset(checkpoint_path).load()
assert checkpoint_value == "p04/data.csv"
pds = IncrementalDataset(
diff --git a/tests/io/test_partitioned_dataset.py b/tests/io/test_partitioned_dataset.py
index 02903cca7c..0acece1eab 100644
--- a/tests/io/test_partitioned_dataset.py
+++ b/tests/io/test_partitioned_dataset.py
@@ -7,7 +7,7 @@
import pandas as pd
import pytest
import s3fs
-from kedro_datasets.pandas import CSVDataSet, ParquetDataSet
+from kedro_datasets.pandas import CSVDataset, ParquetDataset
from moto import mock_s3
from pandas.testing import assert_frame_equal
@@ -38,11 +38,11 @@ def local_csvs(tmp_path, partitioned_data_pandas):
LOCAL_DATASET_DEFINITION = [
- "pandas.CSVDataSet",
- "kedro_datasets.pandas.CSVDataSet",
- CSVDataSet,
- {"type": "pandas.CSVDataSet", "save_args": {"index": False}},
- {"type": CSVDataSet},
+ "pandas.CSVDataset",
+ "kedro_datasets.pandas.CSVDataset",
+ CSVDataset,
+ {"type": "pandas.CSVDataset", "save_args": {"index": False}},
+ {"type": CSVDataset},
]
@@ -101,7 +101,7 @@ def original_data():
def test_save_invalidates_cache(self, local_csvs, mocker):
"""Test that save calls invalidate partition cache"""
- pds = PartitionedDataset(str(local_csvs), "pandas.CSVDataSet")
+ pds = PartitionedDataset(str(local_csvs), "pandas.CSVDataset")
mocked_fs_invalidate = mocker.patch.object(pds._filesystem, "invalidate_cache")
first_load = pds.load()
assert pds._partition_cache.currsize == 1
@@ -125,7 +125,7 @@ def test_save_invalidates_cache(self, local_csvs, mocker):
@pytest.mark.parametrize("overwrite,expected_num_parts", [(False, 6), (True, 1)])
def test_overwrite(self, local_csvs, overwrite, expected_num_parts):
pds = PartitionedDataset(
- str(local_csvs), "pandas.CSVDataSet", overwrite=overwrite
+ str(local_csvs), "pandas.CSVDataset", overwrite=overwrite
)
original_data = pd.DataFrame({"foo": 42, "bar": ["a", "b", None]})
part_id = "new/data"
@@ -137,9 +137,9 @@ def test_overwrite(self, local_csvs, overwrite, expected_num_parts):
def test_release_instance_cache(self, local_csvs):
"""Test that cache invalidation does not affect other instances"""
- ds_a = PartitionedDataset(str(local_csvs), "pandas.CSVDataSet")
+ ds_a = PartitionedDataset(str(local_csvs), "pandas.CSVDataset")
ds_a.load()
- ds_b = PartitionedDataset(str(local_csvs), "pandas.CSVDataSet")
+ ds_b = PartitionedDataset(str(local_csvs), "pandas.CSVDataset")
ds_b.load()
assert ds_a._partition_cache.currsize == 1
@@ -151,7 +151,7 @@ def test_release_instance_cache(self, local_csvs):
# cache of the dataset B is unaffected
assert ds_b._partition_cache.currsize == 1
- @pytest.mark.parametrize("dataset", ["pandas.CSVDataSet", "pandas.ParquetDataSet"])
+ @pytest.mark.parametrize("dataset", ["pandas.CSVDataset", "pandas.ParquetDataset"])
def test_exists(self, local_csvs, dataset):
assert PartitionedDataset(str(local_csvs), dataset).exists()
@@ -192,7 +192,7 @@ def test_load_args(self, mocker):
path = str(Path.cwd())
load_args = {"maxdepth": 42, "withdirs": True}
- pds = PartitionedDataset(path, "pandas.CSVDataSet", load_args=load_args)
+ pds = PartitionedDataset(path, "pandas.CSVDataset", load_args=load_args)
mocker.patch.object(pds, "_path_to_partition", return_value=fake_partition_name)
assert pds.load().keys() == {fake_partition_name}
@@ -207,7 +207,7 @@ def test_credentials(
):
mocked_filesystem = mocker.patch("fsspec.filesystem")
path = str(Path.cwd())
- pds = PartitionedDataset(path, "pandas.CSVDataSet", credentials=credentials)
+ pds = PartitionedDataset(path, "pandas.CSVDataset", credentials=credentials)
assert mocked_filesystem.call_count == 2
mocked_filesystem.assert_called_with("file", **expected_pds_creds)
@@ -233,13 +233,13 @@ def test_fs_args(self, mocker):
mocked_filesystem = mocker.patch("fsspec.filesystem")
path = str(Path.cwd())
- pds = PartitionedDataset(path, "pandas.CSVDataSet", fs_args=fs_args)
+ pds = PartitionedDataset(path, "pandas.CSVDataset", fs_args=fs_args)
assert mocked_filesystem.call_count == 2
mocked_filesystem.assert_called_with("file", **fs_args)
assert pds._dataset_config["fs_args"] == fs_args
- @pytest.mark.parametrize("dataset", ["pandas.ParquetDataSet", ParquetDataSet])
+ @pytest.mark.parametrize("dataset", ["pandas.ParquetDataset", ParquetDataset])
def test_invalid_dataset(self, dataset, local_csvs):
pds = PartitionedDataset(str(local_csvs), dataset)
loaded_partitions = pds.load()
@@ -278,8 +278,8 @@ def test_invalid_dataset_config(self, dataset_config, error_pattern):
@pytest.mark.parametrize(
"dataset_config",
[
- {"type": CSVDataSet, "versioned": True},
- {"type": "pandas.CSVDataSet", "versioned": True},
+ {"type": CSVDataset, "versioned": True},
+ {"type": "pandas.CSVDataset", "versioned": True},
],
)
def test_versioned_dataset_not_allowed(self, dataset_config):
@@ -291,7 +291,7 @@ def test_versioned_dataset_not_allowed(self, dataset_config):
PartitionedDataset(str(Path.cwd()), dataset_config)
def test_no_partitions(self, tmpdir):
- pds = PartitionedDataset(str(tmpdir), "pandas.CSVDataSet")
+ pds = PartitionedDataset(str(tmpdir), "pandas.CSVDataset")
pattern = re.escape(f"No partitions found in '{tmpdir}'")
with pytest.raises(DatasetError, match=pattern):
@@ -303,14 +303,14 @@ def test_no_partitions(self, tmpdir):
(
{
"path": str(Path.cwd()),
- "dataset": {"type": CSVDataSet, "filepath": "fake_path"},
+ "dataset": {"type": CSVDataset, "filepath": "fake_path"},
},
"filepath",
),
(
{
"path": str(Path.cwd()),
- "dataset": {"type": CSVDataSet, "other_arg": "fake_path"},
+ "dataset": {"type": CSVDataset, "other_arg": "fake_path"},
"filepath_arg": "other_arg",
},
"other_arg",
@@ -330,7 +330,7 @@ def test_credentials_log_warning(self, caplog):
the top-level ones"""
pds = PartitionedDataset(
path=str(Path.cwd()),
- dataset={"type": CSVDataSet, "credentials": {"secret": "dataset"}},
+ dataset={"type": CSVDataset, "credentials": {"secret": "dataset"}},
credentials={"secret": "global"},
)
log_message = KEY_PROPAGATION_WARNING % {
@@ -345,7 +345,7 @@ def test_fs_args_log_warning(self, caplog):
arguments will overwrite the top-level ones"""
pds = PartitionedDataset(
path=str(Path.cwd()),
- dataset={"type": CSVDataSet, "fs_args": {"args": "dataset"}},
+ dataset={"type": CSVDataset, "fs_args": {"args": "dataset"}},
fs_args={"args": "dataset"},
)
log_message = KEY_PROPAGATION_WARNING % {
@@ -359,14 +359,14 @@ def test_fs_args_log_warning(self, caplog):
"pds_config,expected_ds_creds,global_creds",
[
(
- {"dataset": "pandas.CSVDataSet", "credentials": {"secret": "global"}},
+ {"dataset": "pandas.CSVDataset", "credentials": {"secret": "global"}},
{"secret": "global"},
{"secret": "global"},
),
(
{
"dataset": {
- "type": CSVDataSet,
+ "type": CSVDataset,
"credentials": {"secret": "expected"},
},
},
@@ -375,7 +375,7 @@ def test_fs_args_log_warning(self, caplog):
),
(
{
- "dataset": {"type": CSVDataSet, "credentials": None},
+ "dataset": {"type": CSVDataset, "credentials": None},
"credentials": {"secret": "global"},
},
None,
@@ -384,7 +384,7 @@ def test_fs_args_log_warning(self, caplog):
(
{
"dataset": {
- "type": CSVDataSet,
+ "type": CSVDataset,
"credentials": {"secret": "expected"},
},
"credentials": {"secret": "global"},
@@ -403,11 +403,11 @@ def test_dataset_creds(self, pds_config, expected_ds_creds, global_creds):
BUCKET_NAME = "fake_bucket_name"
S3_DATASET_DEFINITION = [
- "pandas.CSVDataSet",
- "kedro_datasets.pandas.CSVDataSet",
- CSVDataSet,
- {"type": "pandas.CSVDataSet", "save_args": {"index": False}},
- {"type": CSVDataSet},
+ "pandas.CSVDataset",
+ "kedro_datasets.pandas.CSVDataset",
+ CSVDataset,
+ {"type": "pandas.CSVDataset", "save_args": {"index": False}},
+ {"type": CSVDataset},
]
@@ -456,7 +456,7 @@ def test_load_s3a(self, mocked_csvs_in_s3, partitioned_data_pandas, mocker):
s3a_path = f"s3a://{path}"
# any type is fine as long as it passes isinstance check
# since _dataset_type is mocked later anyways
- pds = PartitionedDataset(s3a_path, "pandas.CSVDataSet")
+ pds = PartitionedDataset(s3a_path, "pandas.CSVDataset")
assert pds._protocol == "s3a"
mocked_ds = mocker.patch.object(pds, "_dataset_type")
@@ -481,7 +481,7 @@ def test_join_protocol_with_bucket_name_startswith_protocol(
bucket name starts with the protocol name, i.e. `s3://s3_bucket/dummy_.txt`
"""
- pds = PartitionedDataset(mocked_csvs_in_s3, "pandas.CSVDataSet")
+ pds = PartitionedDataset(mocked_csvs_in_s3, "pandas.CSVDataset")
assert pds._join_protocol(partition_path) == f"s3://{partition_path}"
@pytest.mark.parametrize("dataset", S3_DATASET_DEFINITION)
@@ -505,7 +505,7 @@ def test_save_s3a(self, mocked_csvs_in_s3, mocker):
s3a_path = f"s3a://{path}"
# any type is fine as long as it passes isinstance check
# since _dataset_type is mocked later anyways
- pds = PartitionedDataset(s3a_path, "pandas.CSVDataSet", filename_suffix=".csv")
+ pds = PartitionedDataset(s3a_path, "pandas.CSVDataset", filename_suffix=".csv")
assert pds._protocol == "s3a"
mocked_ds = mocker.patch.object(pds, "_dataset_type")
@@ -517,7 +517,7 @@ def test_save_s3a(self, mocked_csvs_in_s3, mocker):
mocked_ds.assert_called_once_with(filepath=f"{s3a_path}/{new_partition}.csv")
mocked_ds.return_value.save.assert_called_once_with(data)
- @pytest.mark.parametrize("dataset", ["pandas.CSVDataSet", "pandas.HDFDataSet"])
+ @pytest.mark.parametrize("dataset", ["pandas.CSVDataset", "pandas.HDFDataset"])
def test_exists(self, dataset, mocked_csvs_in_s3):
assert PartitionedDataset(mocked_csvs_in_s3, dataset).exists()
diff --git a/tests/runner/conftest.py b/tests/runner/conftest.py
index 4c720a7a4a..0ce581e624 100644
--- a/tests/runner/conftest.py
+++ b/tests/runner/conftest.py
@@ -3,7 +3,7 @@
import pandas as pd
import pytest
-from kedro.io import DataCatalog, LambdaDataSet, MemoryDataSet
+from kedro.io import DataCatalog, LambdaDataset, MemoryDataset
from kedro.pipeline import node, pipeline
@@ -42,7 +42,7 @@ def multi_input_list_output(arg1, arg2):
@pytest.fixture
def conflicting_feed_dict(pandas_df_feed_dict):
- ds1 = MemoryDataSet({"data": 0})
+ ds1 = MemoryDataset({"data": 0})
ds3 = pandas_df_feed_dict["ds3"]
return {"ds1": ds1, "ds3": ds3}
@@ -60,8 +60,8 @@ def catalog():
@pytest.fixture
def memory_catalog():
- ds1 = MemoryDataSet({"data": 42})
- ds2 = MemoryDataSet([1, 2, 3, 4, 5])
+ ds1 = MemoryDataset({"data": 42})
+ ds2 = MemoryDataset([1, 2, 3, 4, 5])
return DataCatalog({"ds1": ds1, "ds2": ds2})
@@ -73,7 +73,7 @@ def _load():
def _save(arg):
pass
- persistent_dataset = LambdaDataSet(load=_load, save=_save)
+ persistent_dataset = LambdaDataset(load=_load, save=_save)
return DataCatalog(
{
"ds0_A": persistent_dataset,
diff --git a/tests/runner/test_parallel_runner.py b/tests/runner/test_parallel_runner.py
index 27b91d6896..1c3269ff46 100644
--- a/tests/runner/test_parallel_runner.py
+++ b/tests/runner/test_parallel_runner.py
@@ -1,6 +1,5 @@
from __future__ import annotations
-import importlib
import sys
from concurrent.futures.process import ProcessPoolExecutor
from typing import Any
@@ -34,12 +33,6 @@
)
-def test_deprecation():
- class_name = "_SharedMemoryDataSet"
- with pytest.warns(DeprecationWarning, match=f"{repr(class_name)} has been renamed"):
- getattr(importlib.import_module("kedro.runner.parallel_runner"), class_name)
-
-
@pytest.mark.skipif(
sys.platform.startswith("win"), reason="Due to bug in parallel runner"
)