From 32c498237ed5751961ef4fa9495985527fa26704 Mon Sep 17 00:00:00 2001 From: SuryanarayanaY <116063290+SuryanarayanaY@users.noreply.github.com> Date: Fri, 21 Apr 2023 20:48:09 +0530 Subject: [PATCH 1/7] Updating split range from [0, 1] to (0, 1) in tf.keras.utils.split_dataset to make it more intuitive In the split_dataset API which is used to split a dataset into left and right parts. But documentation states it splits dataset into left half and right half where the parts need not be equal parts.The split can be anything in range(0, 1).Hence I think to replace the word half with part. Also in the argument section and exception section the range for split used as [0, 1] which is not a correct notation as 0 and 1 are actually excluded in the range and raises exception if we use either 0 or 1 for either of left_size or right_size for splitting the dataset. Hence i have replaced [0, 1] with (0,1) to make it more intuitive. Thanks. --- keras/utils/dataset_utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/keras/utils/dataset_utils.py b/keras/utils/dataset_utils.py index 35d234d6255..91c930fe11e 100644 --- a/keras/utils/dataset_utils.py +++ b/keras/utils/dataset_utils.py @@ -33,16 +33,16 @@ def split_dataset( dataset, left_size=None, right_size=None, shuffle=False, seed=None ): - """Split a dataset into a left half and a right half (e.g. train / test). + """Split a dataset into a left part and a right part (e.g. train / test). Args: dataset: A `tf.data.Dataset` object, or a list/tuple of arrays with the same length. - left_size: If float (in the range `[0, 1]`), it signifies + left_size: If float (in the range `(0, 1)`), it signifies the fraction of the data to pack in the left dataset. If integer, it signifies the number of samples to pack in the left dataset. If `None`, it uses the complement to `right_size`. Defaults to `None`. - right_size: If float (in the range `[0, 1]`), it signifies + right_size: If float (in the range `(0, 1)`), it signifies the fraction of the data to pack in the right dataset. If integer, it signifies the number of samples to pack in the right dataset. If `None`, it uses the complement to `left_size`. Defaults to `None`. @@ -398,7 +398,7 @@ def _rescale_dataset_split_sizes(left_size, right_size, total_length): raise ValueError( "`left_size` should be either a positive integer " f"smaller than {total_length}, or a float " - "within the range `[0, 1]`. Received: left_size=" + "within the range `(0, 1)`. Received: left_size=" f"{left_size}" ) @@ -413,7 +413,7 @@ def _rescale_dataset_split_sizes(left_size, right_size, total_length): raise ValueError( "`right_size` should be either a positive integer " f"and smaller than {total_length} or a float " - "within the range `[0, 1]`. Received: right_size=" + "within the range `(0, 1)`. Received: right_size=" f"{right_size}" ) From 9bc86d6895b9090c662af45227dc90bf3d42208e Mon Sep 17 00:00:00 2001 From: SuryanarayanaY <116063290+SuryanarayanaY@users.noreply.github.com> Date: Tue, 27 Jun 2023 12:07:13 +0530 Subject: [PATCH 2/7] Update dataset_utils.py Updated the range notation as requested --- keras/utils/dataset_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/keras/utils/dataset_utils.py b/keras/utils/dataset_utils.py index 91c930fe11e..1fef78b4179 100644 --- a/keras/utils/dataset_utils.py +++ b/keras/utils/dataset_utils.py @@ -38,7 +38,7 @@ def split_dataset( Args: dataset: A `tf.data.Dataset` object, or a list/tuple of arrays with the same length. - left_size: If float (in the range `(0, 1)`), it signifies + left_size: If float (in the range "[0, 1] (excluding the boundary values 0 and 1)"), it signifies the fraction of the data to pack in the left dataset. If integer, it signifies the number of samples to pack in the left dataset. If `None`, it uses the complement to `right_size`. Defaults to `None`. @@ -398,7 +398,7 @@ def _rescale_dataset_split_sizes(left_size, right_size, total_length): raise ValueError( "`left_size` should be either a positive integer " f"smaller than {total_length}, or a float " - "within the range `(0, 1)`. Received: left_size=" + "within the range '[0, 1] (excluding the boundary values 0 and 1)'. Received: left_size=" f"{left_size}" ) @@ -413,7 +413,7 @@ def _rescale_dataset_split_sizes(left_size, right_size, total_length): raise ValueError( "`right_size` should be either a positive integer " f"and smaller than {total_length} or a float " - "within the range `(0, 1)`. Received: right_size=" + "within the range '[0, 1] (excluding the boundary values 0 and 1)'. Received: right_size=" f"{right_size}" ) From 0a51eeb2b1a68386aa8b2b39a6881e5c1d4c14fb Mon Sep 17 00:00:00 2001 From: SuryanarayanaY <116063290+SuryanarayanaY@users.noreply.github.com> Date: Wed, 19 Jul 2023 17:51:45 +0530 Subject: [PATCH 3/7] Update dataset_utils.py Don e the formatting changes as requested --- keras/utils/dataset_utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/keras/utils/dataset_utils.py b/keras/utils/dataset_utils.py index 1fef78b4179..5ccab14243b 100644 --- a/keras/utils/dataset_utils.py +++ b/keras/utils/dataset_utils.py @@ -38,10 +38,11 @@ def split_dataset( Args: dataset: A `tf.data.Dataset` object, or a list/tuple of arrays with the same length. - left_size: If float (in the range "[0, 1] (excluding the boundary values 0 and 1)"), it signifies - the fraction of the data to pack in the left dataset. If integer, it - signifies the number of samples to pack in the left dataset. If - `None`, it uses the complement to `right_size`. Defaults to `None`. + left_size: If float (in the range `[0, 1]` (excluding the + boundary values 0 and 1), it signifies the fraction of the data to + pack in the left dataset.If integer, it signifies the number of + samples to pack in the left dataset.If `None`, it uses the complement + to `right_size`. Defaults to `None`. right_size: If float (in the range `(0, 1)`), it signifies the fraction of the data to pack in the right dataset. If integer, it signifies the number of samples to pack in the right dataset. If From ed636c992a3ce9a68aabb0d717b5516d8e9709af Mon Sep 17 00:00:00 2001 From: SuryanarayanaY <116063290+SuryanarayanaY@users.noreply.github.com> Date: Wed, 19 Jul 2023 17:59:22 +0530 Subject: [PATCH 4/7] Update dataset_utils.py --- keras/utils/dataset_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/keras/utils/dataset_utils.py b/keras/utils/dataset_utils.py index 5ccab14243b..5c8ebd6fac8 100644 --- a/keras/utils/dataset_utils.py +++ b/keras/utils/dataset_utils.py @@ -38,10 +38,10 @@ def split_dataset( Args: dataset: A `tf.data.Dataset` object, or a list/tuple of arrays with the same length. - left_size: If float (in the range `[0, 1]` (excluding the - boundary values 0 and 1), it signifies the fraction of the data to - pack in the left dataset.If integer, it signifies the number of - samples to pack in the left dataset.If `None`, it uses the complement + left_size: If float (in the range `[0, 1]` (excluding the + boundary values 0 and 1)), it signifies the fraction of the data to + pack in the left dataset.If integer, it signifies the number of + samples to pack in the left dataset.If `None`, it uses the complement to `right_size`. Defaults to `None`. right_size: If float (in the range `(0, 1)`), it signifies the fraction of the data to pack in the right dataset. If integer, it From 38aab6fb280c9f03eb6e315cc0bed3067b7be253 Mon Sep 17 00:00:00 2001 From: SuryanarayanaY <116063290+SuryanarayanaY@users.noreply.github.com> Date: Tue, 1 Aug 2023 19:12:20 +0530 Subject: [PATCH 5/7] Update dataset_utils.py Reduced the line lengths to <80 chars as required for black format --- keras/utils/dataset_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/keras/utils/dataset_utils.py b/keras/utils/dataset_utils.py index 5c8ebd6fac8..8031312b121 100644 --- a/keras/utils/dataset_utils.py +++ b/keras/utils/dataset_utils.py @@ -399,8 +399,8 @@ def _rescale_dataset_split_sizes(left_size, right_size, total_length): raise ValueError( "`left_size` should be either a positive integer " f"smaller than {total_length}, or a float " - "within the range '[0, 1] (excluding the boundary values 0 and 1)'. Received: left_size=" - f"{left_size}" + "within the range '[0, 1] (excluding the boundary values 0 and 1)'." + f"Received: left_size = {left_size}" ) # check right_size is non-negative and less than 1 and less than @@ -414,8 +414,8 @@ def _rescale_dataset_split_sizes(left_size, right_size, total_length): raise ValueError( "`right_size` should be either a positive integer " f"and smaller than {total_length} or a float " - "within the range '[0, 1] (excluding the boundary values 0 and 1)'. Received: right_size=" - f"{right_size}" + "within the range '[0, 1] (excluding the boundary values 0 and 1)'." + f"Received: right_size = {right_size}" ) # check sum of left_size and right_size is less than or equal to From a0b1378685548c2172d2b939ff94af85452769fd Mon Sep 17 00:00:00 2001 From: Surya <116063290+SuryanarayanaY@users.noreply.github.com> Date: Thu, 24 Aug 2023 10:09:53 +0530 Subject: [PATCH 6/7] Update dataset_utils.py Done the formatting as suggested. Thanks --- keras/utils/dataset_utils.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/keras/utils/dataset_utils.py b/keras/utils/dataset_utils.py index 8031312b121..9b281b64462 100644 --- a/keras/utils/dataset_utils.py +++ b/keras/utils/dataset_utils.py @@ -40,11 +40,12 @@ def split_dataset( same length. left_size: If float (in the range `[0, 1]` (excluding the boundary values 0 and 1)), it signifies the fraction of the data to - pack in the left dataset.If integer, it signifies the number of - samples to pack in the left dataset.If `None`, it uses the complement - to `right_size`. Defaults to `None`. - right_size: If float (in the range `(0, 1)`), it signifies - the fraction of the data to pack in the right dataset. If integer, it + pack in the left dataset. If integer, it signifies the number of + samples to pack in the left dataset. If `None`, it uses the + complement to `right_size`. Defaults to `None`. + right_size: If float (in the range `[0, 1]` (excluding the + boundary values 0 and 1)), it signifiesthe fraction of + the data to pack in the right dataset. If integer, it signifies the number of samples to pack in the right dataset. If `None`, it uses the complement to `left_size`. Defaults to `None`. shuffle: Boolean, whether to shuffle the data before splitting it. @@ -399,7 +400,7 @@ def _rescale_dataset_split_sizes(left_size, right_size, total_length): raise ValueError( "`left_size` should be either a positive integer " f"smaller than {total_length}, or a float " - "within the range '[0, 1] (excluding the boundary values 0 and 1)'." + "within the range `[0, 1]` (excluding the boundary values 0 and 1)." f"Received: left_size = {left_size}" ) @@ -414,7 +415,7 @@ def _rescale_dataset_split_sizes(left_size, right_size, total_length): raise ValueError( "`right_size` should be either a positive integer " f"and smaller than {total_length} or a float " - "within the range '[0, 1] (excluding the boundary values 0 and 1)'." + "within the range `[0, 1]` (excluding the boundary values 0 and 1)." f"Received: right_size = {right_size}" ) From 9c5d415935311900b8b329dd5141dbb4188ab4a6 Mon Sep 17 00:00:00 2001 From: Surya <116063290+SuryanarayanaY@users.noreply.github.com> Date: Thu, 24 Aug 2023 15:14:31 +0530 Subject: [PATCH 7/7] Update dataset_utils.py Removed trailing whitespaces as suggested by lint. --- keras/utils/dataset_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/keras/utils/dataset_utils.py b/keras/utils/dataset_utils.py index 9b281b64462..156cb3a2e48 100644 --- a/keras/utils/dataset_utils.py +++ b/keras/utils/dataset_utils.py @@ -41,10 +41,10 @@ def split_dataset( left_size: If float (in the range `[0, 1]` (excluding the boundary values 0 and 1)), it signifies the fraction of the data to pack in the left dataset. If integer, it signifies the number of - samples to pack in the left dataset. If `None`, it uses the + samples to pack in the left dataset. If `None`, it uses the complement to `right_size`. Defaults to `None`. right_size: If float (in the range `[0, 1]` (excluding the - boundary values 0 and 1)), it signifiesthe fraction of + boundary values 0 and 1)), it signifiesthe fraction of the data to pack in the right dataset. If integer, it signifies the number of samples to pack in the right dataset. If `None`, it uses the complement to `left_size`. Defaults to `None`.