diff --git a/keras/utils/dataset_utils.py b/keras/utils/dataset_utils.py index 35d234d6255..156cb3a2e48 100644 --- a/keras/utils/dataset_utils.py +++ b/keras/utils/dataset_utils.py @@ -33,17 +33,19 @@ def split_dataset( dataset, left_size=None, right_size=None, shuffle=False, seed=None ): - """Split a dataset into a left half and a right half (e.g. train / test). + """Split a dataset into a left part and a right part (e.g. train / test). Args: dataset: A `tf.data.Dataset` object, or a list/tuple of arrays with the same length. - left_size: If float (in the range `[0, 1]`), it signifies - the fraction of the data to pack in the left dataset. If integer, it - signifies the number of samples to pack in the left dataset. If - `None`, it uses the complement to `right_size`. Defaults to `None`. - right_size: If float (in the range `[0, 1]`), it signifies - the fraction of the data to pack in the right dataset. If integer, it + left_size: If float (in the range `[0, 1]` (excluding the + boundary values 0 and 1)), it signifies the fraction of the data to + pack in the left dataset. If integer, it signifies the number of + samples to pack in the left dataset. If `None`, it uses the + complement to `right_size`. Defaults to `None`. + right_size: If float (in the range `[0, 1]` (excluding the + boundary values 0 and 1)), it signifiesthe fraction of + the data to pack in the right dataset. If integer, it signifies the number of samples to pack in the right dataset. If `None`, it uses the complement to `left_size`. Defaults to `None`. shuffle: Boolean, whether to shuffle the data before splitting it. @@ -398,8 +400,8 @@ def _rescale_dataset_split_sizes(left_size, right_size, total_length): raise ValueError( "`left_size` should be either a positive integer " f"smaller than {total_length}, or a float " - "within the range `[0, 1]`. Received: left_size=" - f"{left_size}" + "within the range `[0, 1]` (excluding the boundary values 0 and 1)." + f"Received: left_size = {left_size}" ) # check right_size is non-negative and less than 1 and less than @@ -413,8 +415,8 @@ def _rescale_dataset_split_sizes(left_size, right_size, total_length): raise ValueError( "`right_size` should be either a positive integer " f"and smaller than {total_length} or a float " - "within the range `[0, 1]`. Received: right_size=" - f"{right_size}" + "within the range `[0, 1]` (excluding the boundary values 0 and 1)." + f"Received: right_size = {right_size}" ) # check sum of left_size and right_size is less than or equal to