From ca724af02152720b7c69ea1f471b15921f7a1d22 Mon Sep 17 00:00:00 2001 From: Anasuya G Nair Date: Thu, 24 Oct 2024 14:17:16 +0530 Subject: [PATCH] #8343: Update documentation for ttnn.glu, reglu, geglu, swiglu (#13932) * #8343: Update documentation and move sweep test * #8343: Remove timeout parameter --- .github/workflows/ttnn-run-sweeps.yaml | 2 + .../sweeps/eltwise/unary/geglu/geglu.py | 76 +++++++++++++++++++ .../sweeps/eltwise/unary/glu/glu.py | 49 ++++++------ .../sweeps/eltwise/unary/swiglu/swiglu.py | 76 +++++++++++++++++++ .../operations/eltwise/unary/unary_pybind.hpp | 56 ++++++++++++-- 5 files changed, 232 insertions(+), 27 deletions(-) create mode 100644 tests/sweep_framework/sweeps/eltwise/unary/geglu/geglu.py create mode 100644 tests/sweep_framework/sweeps/eltwise/unary/swiglu/swiglu.py diff --git a/.github/workflows/ttnn-run-sweeps.yaml b/.github/workflows/ttnn-run-sweeps.yaml index a4cce1ebe2d..c320b810cb3 100644 --- a/.github/workflows/ttnn-run-sweeps.yaml +++ b/.github/workflows/ttnn-run-sweeps.yaml @@ -80,6 +80,8 @@ on: - eltwise.unary.silu.silu - eltwise.unary.silu.silu_pytorch2 - eltwise.unary.glu.glu + - eltwise.unary.geglu.geglu + - eltwise.unary.swiglu.swiglu - eltwise.unary.sigmoid.sigmoid - eltwise.unary.sigmoid.sigmoid_pytorch2 - eltwise.unary.sigmoid_accurate.sigmoid_accurate diff --git a/tests/sweep_framework/sweeps/eltwise/unary/geglu/geglu.py b/tests/sweep_framework/sweeps/eltwise/unary/geglu/geglu.py new file mode 100644 index 00000000000..5eebfe309a2 --- /dev/null +++ b/tests/sweep_framework/sweeps/eltwise/unary/geglu/geglu.py @@ -0,0 +1,76 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +from typing import Optional, Tuple +from functools import partial + +import torch +import ttnn +from tests.sweep_framework.sweep_utils.utils import gen_shapes +from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt + +from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time +from models.utility_functions import torch_random + + +# Parameters provided to the test vector generator are defined here. +# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values. +# Each suite has a key name (in this case "suite_1") which will associate the test vectors to this specific suite of inputs. +# Developers can create their own generator functions and pass them to the parameters as inputs. +parameters = { + "nightly": { + "input_shape": gen_shapes([1, 1, 32, 64], [6, 12, 256, 256], [1, 1, 32, 64], 16), + "input_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], + "input_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT], + "input_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], + "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], + }, +} + + +# Invalidate vector is called during the generation phase where each vector will be passed in. +# If invalidated, the vector will still be stored but will be skipped. +# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid. +def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: + if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT: + return True, "Row Major layout is not supported" + return False, None + + +# This is the run instructions for the test, defined by the developer. +# The run function must take the above-defined parameters as inputs. +# The runner will call this run function with each test vector, and the returned results from this function will be stored. +# If you defined a device_mesh_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra. +def run( + input_shape, + input_dtype, + input_layout, + input_memory_config, + output_memory_config, + *, + device, +) -> list: + torch.manual_seed(0) + + torch_input_tensor = gen_func_with_cast_tt( + partial(torch_random, low=-100, high=100, dtype=torch.float16), input_dtype + )(input_shape) + + golden_function = ttnn.get_golden_function(ttnn.geglu) + torch_output_tensor = golden_function(torch_input_tensor, dim=-1) + + input_tensor = ttnn.from_torch( + torch_input_tensor, + dtype=input_dtype, + layout=input_layout, + device=device, + memory_config=input_memory_config, + ) + + start_time = start_measuring_time() + result = ttnn.geglu(input_tensor, dim=-1, memory_config=output_memory_config) + output_tensor = ttnn.to_torch(result) + e2e_perf = stop_measuring_time(start_time) + + return [check_with_pcc(torch_output_tensor, output_tensor, 0.999), e2e_perf] diff --git a/tests/sweep_framework/sweeps/eltwise/unary/glu/glu.py b/tests/sweep_framework/sweeps/eltwise/unary/glu/glu.py index f90ea4ac0dc..97626190e1c 100644 --- a/tests/sweep_framework/sweeps/eltwise/unary/glu/glu.py +++ b/tests/sweep_framework/sweeps/eltwise/unary/glu/glu.py @@ -6,7 +6,6 @@ from functools import partial import torch -import random import ttnn from tests.sweep_framework.sweep_utils.utils import gen_shapes from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt @@ -14,10 +13,6 @@ from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time from models.utility_functions import torch_random -# Override the default timeout in seconds for hang detection. -TIMEOUT = 30 - -random.seed(0) # Parameters provided to the test vector generator are defined here. # They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values. @@ -26,45 +21,55 @@ parameters = { "nightly": { "input_shape": gen_shapes([1, 1, 32, 64], [6, 12, 256, 256], [1, 1, 32, 64], 16), - "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], - "input_a_layout": [ttnn.TILE_LAYOUT], - "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], + "input_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], + "input_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT], + "input_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], }, } +# Invalidate vector is called during the generation phase where each vector will be passed in. +# If invalidated, the vector will still be stored but will be skipped. +# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid. +def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: + if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT: + return True, "Row Major layout is not supported" + return False, None + + # This is the run instructions for the test, defined by the developer. # The run function must take the above-defined parameters as inputs. # The runner will call this run function with each test vector, and the returned results from this function will be stored. # If you defined a device_mesh_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra. def run( input_shape, - input_a_dtype, - input_a_layout, - input_a_memory_config, + input_dtype, + input_layout, + input_memory_config, output_memory_config, *, device, ) -> list: - data_seed = random.randint(0, 20000000) - torch.manual_seed(data_seed) + torch.manual_seed(0) - torch_input_tensor_a = gen_func_with_cast_tt( - partial(torch_random, low=-100, high=100, dtype=torch.float16), input_a_dtype + torch_input_tensor = gen_func_with_cast_tt( + partial(torch_random, low=-100, high=100, dtype=torch.float16), input_dtype )(input_shape) - torch_output_tensor = torch.nn.functional.glu(torch_input_tensor_a, dim=-1) - input_tensor_a = ttnn.from_torch( - torch_input_tensor_a, - dtype=input_a_dtype, - layout=input_a_layout, + golden_function = ttnn.get_golden_function(ttnn.glu) + torch_output_tensor = golden_function(torch_input_tensor, dim=-1) + + input_tensor = ttnn.from_torch( + torch_input_tensor, + dtype=input_dtype, + layout=input_layout, device=device, - memory_config=input_a_memory_config, + memory_config=input_memory_config, ) start_time = start_measuring_time() - result = ttnn.glu(input_tensor_a, dim=-1, memory_config=output_memory_config) + result = ttnn.glu(input_tensor, dim=-1, memory_config=output_memory_config) output_tensor = ttnn.to_torch(result) e2e_perf = stop_measuring_time(start_time) diff --git a/tests/sweep_framework/sweeps/eltwise/unary/swiglu/swiglu.py b/tests/sweep_framework/sweeps/eltwise/unary/swiglu/swiglu.py new file mode 100644 index 00000000000..3bf2b88400b --- /dev/null +++ b/tests/sweep_framework/sweeps/eltwise/unary/swiglu/swiglu.py @@ -0,0 +1,76 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +from typing import Optional, Tuple +from functools import partial + +import torch +import ttnn +from tests.sweep_framework.sweep_utils.utils import gen_shapes +from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt + +from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time +from models.utility_functions import torch_random + + +# Parameters provided to the test vector generator are defined here. +# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values. +# Each suite has a key name (in this case "suite_1") which will associate the test vectors to this specific suite of inputs. +# Developers can create their own generator functions and pass them to the parameters as inputs. +parameters = { + "nightly": { + "input_shape": gen_shapes([1, 1, 32, 64], [6, 12, 256, 256], [1, 1, 32, 64], 16), + "input_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], + "input_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT], + "input_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], + "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], + }, +} + + +# Invalidate vector is called during the generation phase where each vector will be passed in. +# If invalidated, the vector will still be stored but will be skipped. +# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid. +def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: + if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT: + return True, "Row Major layout is not supported" + return False, None + + +# This is the run instructions for the test, defined by the developer. +# The run function must take the above-defined parameters as inputs. +# The runner will call this run function with each test vector, and the returned results from this function will be stored. +# If you defined a device_mesh_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra. +def run( + input_shape, + input_dtype, + input_layout, + input_memory_config, + output_memory_config, + *, + device, +) -> list: + torch.manual_seed(0) + + torch_input_tensor = gen_func_with_cast_tt( + partial(torch_random, low=-100, high=100, dtype=torch.float16), input_dtype + )(input_shape) + + golden_function = ttnn.get_golden_function(ttnn.swiglu) + torch_output_tensor = golden_function(torch_input_tensor, dim=-1) + + input_tensor = ttnn.from_torch( + torch_input_tensor, + dtype=input_dtype, + layout=input_layout, + device=device, + memory_config=input_memory_config, + ) + + start_time = start_measuring_time() + result = ttnn.swiglu(input_tensor, dim=-1, memory_config=output_memory_config) + output_tensor = ttnn.to_torch(result) + e2e_perf = stop_measuring_time(start_time) + + return [check_with_pcc(torch_output_tensor, output_tensor, 0.999), e2e_perf] diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/unary_pybind.hpp b/ttnn/cpp/ttnn/operations/eltwise/unary/unary_pybind.hpp index ce3a1a81957..025f3c1ba81 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/unary/unary_pybind.hpp +++ b/ttnn/cpp/ttnn/operations/eltwise/unary/unary_pybind.hpp @@ -1646,19 +1646,65 @@ void py_module(py::module& module) { // Unary ops with dim parameter - detail::bind_unary_operation_with_dim_parameter(module, ttnn::glu, "dim", "Dimension to split input tensor. Supported dimension -1 or 3", "Split the tensor into two, apply glu function on second tensor followed by mul op with first tensor"); - detail::bind_unary_operation_with_dim_parameter(module, ttnn::reglu, "dim", "Dimension to split input tensor. Supported dimension -1 or 3", "Split the tensor into two, apply relu function on second tensor followed by mul op with first tensor", + detail::bind_unary_operation_with_dim_parameter(module, ttnn::glu, "dim", "Dimension to split input tensor. Supported only for last dimension (dim = -1 or 3)", "Split the tensor into two parts, apply the GLU function on the second tensor, and then perform multiplication with the first tensor.", R"doc(Supported dtypes, layouts, and ranks: +----------------------------+---------------------------------+-------------------+ | Dtypes | Layouts | Ranks | +----------------------------+---------------------------------+-------------------+ - | BFLOAT16, BFLOAT8_B | TILE | 2, 3, 4 | + | BFLOAT16, BFLOAT8_B | TILE | 4 | + +----------------------------+---------------------------------+-------------------+ + + System memory is not supported. + + Last dimension of input tensor should be divisible by 64. + + )doc"); + + detail::bind_unary_operation_with_dim_parameter(module, ttnn::reglu, "dim", "Dimension to split input tensor. Supported only for last dimension (dim = -1 or 3)", "Split the tensor into two parts, apply the ReLU function on the second tensor, and then perform multiplication with the first tensor.", + R"doc(Supported dtypes, layouts, and ranks: + + +----------------------------+---------------------------------+-------------------+ + | Dtypes | Layouts | Ranks | + +----------------------------+---------------------------------+-------------------+ + | BFLOAT16, BFLOAT8_B | TILE | 4 | + +----------------------------+---------------------------------+-------------------+ + + System memory is not supported. + + Last dimension of input tensor should be divisible by 64. + + )doc"); + + detail::bind_unary_operation_with_dim_parameter(module, ttnn::geglu, "dim", "Dimension to split input tensor. Supported only for last dimension (dim = -1 or 3)", "Split the tensor into two parts, apply the GELU function on the second tensor, and then perform multiplication with the first tensor.", + R"doc(Supported dtypes, layouts, and ranks: + + +----------------------------+---------------------------------+-------------------+ + | Dtypes | Layouts | Ranks | + +----------------------------+---------------------------------+-------------------+ + | BFLOAT16, BFLOAT8_B | TILE | 4 | +----------------------------+---------------------------------+-------------------+ + + System memory is not supported. + + Last dimension of input tensor should be divisible by 64. + )doc"); - detail::bind_unary_operation_with_dim_parameter(module, ttnn::geglu, "dim", "Dimension to split input tensor. Supported dimension -1 or 3", "Split the tensor into two, apply gelu function on second tensor followed by mul op with first tensor"); - detail::bind_unary_operation_with_dim_parameter(module, ttnn::swiglu, "dim", "Dimension to split input tensor. Supported dimension -1 or 3", "Split the tensor into two, apply silu function on second tensor followed by mul op with first tensor"); + detail::bind_unary_operation_with_dim_parameter(module, ttnn::swiglu, "dim", "Dimension to split input tensor. Supported only for last dimension (dim = -1 or 3)", "Split the tensor into two parts, apply the SiLU function on the second tensor, and then perform multiplication with the first tensor.", + R"doc(Supported dtypes, layouts, and ranks: + + +----------------------------+---------------------------------+-------------------+ + | Dtypes | Layouts | Ranks | + +----------------------------+---------------------------------+-------------------+ + | BFLOAT16, BFLOAT8_B | TILE | 4 | + +----------------------------+---------------------------------+-------------------+ + + System memory is not supported. + + Last dimension of input tensor should be divisible by 64. + + )doc"); // Other unaries (unary chain operations) detail::bind_softplus(module, ttnn::softplus);