Skip to content

Commit

Permalink
chore(datasets): Fix more doctest issues (#451)
Browse files Browse the repository at this point in the history
Signed-off-by: Merel Theisen <[email protected]>
Co-authored-by: Deepyaman Datta <[email protected]>
  • Loading branch information
merelcht and deepyaman authored Nov 28, 2023
1 parent e91fdbc commit a866efa
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 13 deletions.
5 changes: 1 addition & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,13 @@ dataset-doctests:
--ignore kedro_datasets/pandas/sql_dataset.py \
--ignore kedro_datasets/partitions/incremental_dataset.py \
--ignore kedro_datasets/partitions/partitioned_dataset.py \
--ignore kedro_datasets/pillow/image_dataset.py \
--ignore kedro_datasets/polars/lazy_polars_dataset.py \
--ignore kedro_datasets/redis/redis_dataset.py \
--ignore kedro_datasets/snowflake/snowpark_dataset.py \
--ignore kedro_datasets/spark/deltatable_dataset.py \
--ignore kedro_datasets/spark/spark_dataset.py \
--ignore kedro_datasets/spark/spark_hive_dataset.py \
--ignore kedro_datasets/spark/spark_jdbc_dataset.py \
--ignore kedro_datasets/tensorflow/tensorflow_model_dataset.py \
--ignore kedro_datasets/video/video_dataset.py
--ignore kedro_datasets/tensorflow/tensorflow_model_dataset.py

test-sequential:
cd $(plugin) && pytest tests --cov-config pyproject.toml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class IncrementalDataset(PartitionedDataset):
>>>
>>> dataset.release() # clears load cache
>>> # returns an empty dictionary as no new partitions were added
>>> dataset.load()
>>> assert dataset.load() == {}
"""

DEFAULT_CHECKPOINT_TYPE = "kedro_datasets.text.TextDataset"
Expand Down
2 changes: 1 addition & 1 deletion kedro-datasets/kedro_datasets/pillow/image_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class ImageDataset(AbstractVersionedDataset[Image.Image, Image.Image]):
>>> from kedro_datasets.pillow import ImageDataset
>>>
>>> dataset = ImageDataset(filepath="test.png")
>>> dataset = ImageDataset(filepath="https://storage.googleapis.com/gtv-videos-bucket/sample/images/ForBiggerBlazes.jpg")
>>> image = dataset.load()
>>> image.show()
Expand Down
4 changes: 2 additions & 2 deletions kedro-datasets/kedro_datasets/spark/spark_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ class SparkDataset(AbstractVersionedDataset[DataFrame, DataFrame]):
.. code-block:: pycon
>>> from pyspark.sql import SparkSession
>>> from pyspark.sql.types import StructField, StringType, IntegerType, StructType
>>> from pyspark.sql.types import IntegerType, Row, StringType, StructField, StructType
>>>
>>> from kedro_datasets.spark import SparkDataset
>>>
Expand All @@ -249,7 +249,7 @@ class SparkDataset(AbstractVersionedDataset[DataFrame, DataFrame]):
>>> dataset.save(spark_df)
>>> reloaded = dataset.load()
>>>
>>> reloaded.take(4)
>>> assert Row(name='Bob', age=12) in reloaded.take(4)
"""

# this dataset cannot be used with ``ParallelRunner``,
Expand Down
2 changes: 1 addition & 1 deletion kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class SparkJDBCDataset(AbstractDataset[DataFrame, DataFrame]):
.. code-block:: pycon
>>> import pandas as pd
>>> from kedro_datasets import SparkJBDCDataset
>>> from kedro_datasets.spark import SparkJBDCDataset
>>> from pyspark.sql import SparkSession
>>>
>>> spark = SparkSession.builder.getOrCreate()
Expand Down
10 changes: 6 additions & 4 deletions kedro-datasets/kedro_datasets/video/video_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,11 @@ class VideoDataset(AbstractDataset[AbstractVideo, AbstractVideo]):
>>> from kedro_datasets.video import VideoDataset
>>> import numpy as np
>>>
>>> video = VideoDataset(filepath="/video/file/path.mp4").load()
>>> video = VideoDataset(
... filepath="https://storage.googleapis.com/gtv-videos-bucket/sample/ForBiggerBlazes.mp4"
... ).load()
>>> frame = video[0]
>>> np.sum(np.asarray(frame))
>>> assert isinstance(np.sum(np.asarray(frame)), np.uint64)
Example creating a video from numpy frames using Python API:
Expand All @@ -244,7 +246,7 @@ class VideoDataset(AbstractDataset[AbstractVideo, AbstractVideo]):
... imgs.append(Image.fromarray(frame))
... frame -= 1
...
>>> video = VideoDataset("my_video.mp4")
>>> video = VideoDataset(filepath="my_video.mp4")
>>> video.save(SequenceVideo(imgs, fps=25))
Expand All @@ -262,7 +264,7 @@ class VideoDataset(AbstractDataset[AbstractVideo, AbstractVideo]):
... yield Image.fromarray(frame)
... frame -= 1
...
>>> video = VideoDataset("my_video.mp4")
>>> video = VideoDataset(filepath="my_video.mp4")
>>> video.save(GeneratorVideo(gen(), fps=25, length=None))
"""
Expand Down

0 comments on commit a866efa

Please sign in to comment.