Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ttnn.bias_gelu_bw unary low PCC #13856

Open
Tracked by #13795
amalbasaTT opened this issue Oct 16, 2024 · 4 comments
Open
Tracked by #13795

ttnn.bias_gelu_bw unary low PCC #13856

amalbasaTT opened this issue Oct 16, 2024 · 4 comments
Assignees
Labels

Comments

@amalbasaTT
Copy link
Contributor

amalbasaTT commented Oct 16, 2024

Describe the bug
ttnn.bias_gelu_bw unary has low PCC when input_tensor_a has bfloat8_b dtype or in random cases when approx is "tanh".

To Reproduce
Steps to reproduce the behavior:
Sweep test for bias_gelu_bw is located in 'tests/sweep_framework/sweeps/eltwise/unary_backward/bias_gelu_bw/bias_gelu_bw.py'

  1. Go to 'tests/sweep_framework/sweeps/eltwise/unary_backward/bias_gelu_bw/bias_gelu_bw.py'
  2. Generate new parameter vectors and run the sweep test
python3 tests/sweep_framework/sweeps_parameter_generator.py --elastic cloud --module-name eltwise.unary_backward.bias_gelu_bw.bias_gelu_bw
python3 tests/sweep_framework/sweeps_runner.py --elastic cloud --module-name eltwise.unary_backward.bias_gelu_bw.bias_gelu_bw --suite-name xfail
  1. See the error. Results can be found on elastic cloud as explained here: https://github.com/tenstorrent/tt-metal/tree/main/tests/sweep_framework
@umadevimcw
Copy link
Contributor

@amalbasaTT Can you provide unit test for this issue as well?

@amalbasaTT
Copy link
Contributor Author

@amalbasaTT Can you provide unit test for this issue as well?
Here is the unit test which confirms low PCC when input_tensor_a has bfloat8_b. First two test cases (where input_tensor_a has bfloat8_b) fail, other two pass.

# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

from loguru import logger
from functools import partial
import pytest
import torch
import ttnn
import traceback

from tests.ttnn.utils_for_testing import assert_with_pcc
from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
from models.utility_functions import torch_random


def run_backward_div_tests(
    input_shape,
    dtype,
    dlayout,
    in_mem_cfg,
    out_mem_cfg,
    data_seed,
    device,
):
    torch.manual_seed(data_seed)
    # grad tensor
    x = gen_func_with_cast_tt(
        partial(torch_random, low=-100, high=100, dtype=torch.float32), dtype[0]
    )(input_shape[0])
    # input tensor 
    y = gen_func_with_cast_tt(
        partial(torch_random, low=-100, high=100, dtype=torch.float32), dtype[1]
    )(input_shape[0])
    
    y.requires_grad = True
    
    scalar = torch.tensor(1, dtype=torch.bfloat16).uniform_(-100, 100).item()

    try:
        # get ref result
        golden_function = ttnn.get_golden_function(ttnn.bias_gelu_bw)
        ref_value = golden_function(x, y, scalar)[0]

        tt_x = ttnn.from_torch(x, dtype=dtype[0], layout=dlayout[0], device=device, memory_config=in_mem_cfg[0])
        tt_y = ttnn.from_torch(y, dtype=dtype[1], layout=dlayout[0], device=device, memory_config=in_mem_cfg[1])

        tt_result = ttnn.bias_gelu_bw(tt_x, tt_y, scalar, memory_config=out_mem_cfg)[0]
        tt_result = ttnn.to_torch(tt_result)
    
    except Exception as e:
        logger.warning(f"Test execution crashed: {e}")
        print(traceback.format_exc())
        raise e

    assert len(tt_result.shape) == len(ref_value.shape)
    assert tt_result.shape == ref_value.shape
    assert_with_pcc(ref_value, tt_result, 0.999)


test_sweep_args = [
    (
        [(6, 5, 96, 128)],
        [ttnn.bfloat16, ttnn.bfloat8_b],
        [ttnn.TILE_LAYOUT],
        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
        ttnn.DRAM_MEMORY_CONFIG,
        14943539,
    ),
    (
        [(3, 2, 192, 32)],
        [ttnn.bfloat8_b, ttnn.bfloat8_b],
        [ttnn.TILE_LAYOUT],
        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
        ttnn.DRAM_MEMORY_CONFIG,
        14943539,
    ),
    (
        [(3, 2, 192, 32)],
        [ttnn.bfloat8_b, ttnn.bfloat16],
        [ttnn.TILE_LAYOUT],
        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
        ttnn.DRAM_MEMORY_CONFIG,
        14943539,
    ),
    (
        [(3, 2, 192, 32)],
        [ttnn.bfloat16, ttnn.bfloat16],
        [ttnn.TILE_LAYOUT],
        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
        ttnn.DRAM_MEMORY_CONFIG,
        14943539,
    ),
]


@pytest.mark.parametrize(
    "input_shape, dtype, dlayout, in_mem_config, out_mem_config, data_seed",
    (test_sweep_args),
)
def test_backward_div(input_shape, dtype, dlayout, in_mem_config, out_mem_config, data_seed, device):
    run_backward_div_tests(input_shape, dtype, dlayout, in_mem_config, out_mem_config, data_seed, device)
    


@amalbasaTT
Copy link
Contributor Author

Here is a unit test for the low PCC for some of the cases where approx was "tanh" (most of those are were grad tensor has bfloat8_b , but there was an instance where grad tensor had bfloat16 also):

# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

from loguru import logger
from functools import partial
import pytest
import torch
import ttnn
import traceback

from tests.ttnn.utils_for_testing import assert_with_pcc
from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt
from models.utility_functions import torch_random


def run_backward_div_tests(
    input_shape,
    approx,
    dtype,
    dlayout,
    in_mem_cfg,
    out_mem_cfg,
    data_seed,
    device,
):
    torch.manual_seed(data_seed)
    # grad tensor
    x = gen_func_with_cast_tt(
        partial(torch_random, low=-100, high=100, dtype=torch.float32), dtype[0]
    )(input_shape[0])
    # input tensor 
    y = gen_func_with_cast_tt(
        partial(torch_random, low=-100, high=100, dtype=torch.float32), dtype[1]
    )(input_shape[0])
    
    y.requires_grad = True
    
    scalar = torch.tensor(1, dtype=torch.bfloat16).uniform_(-100, 100).item()

    try:
        # get ref result
        golden_function = ttnn.get_golden_function(ttnn.bias_gelu_bw)
        ref_value = golden_function(x, y, scalar, value=approx)[0]

        tt_x = ttnn.from_torch(x, dtype=dtype[0], layout=dlayout[0], device=device, memory_config=in_mem_cfg[0])
        tt_y = ttnn.from_torch(y, dtype=dtype[1], layout=dlayout[0], device=device, memory_config=in_mem_cfg[1])

        tt_result = ttnn.bias_gelu_bw(tt_x, tt_y, scalar, approximate=approx, memory_config=out_mem_cfg)[0]
        tt_result = ttnn.to_torch(tt_result)
    
    except Exception as e:
        logger.warning(f"Test execution crashed: {e}")
        print(traceback.format_exc())
        raise e

    assert len(tt_result.shape) == len(ref_value.shape)
    assert tt_result.shape == ref_value.shape
    assert_with_pcc(ref_value, tt_result, 0.999)


test_sweep_args = [
    (
        [(6, 10, 128, 224)],
        "tanh",
        [ttnn.bfloat8_b, ttnn.bfloat16],
        [ttnn.TILE_LAYOUT],
        [ttnn.L1_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
        ttnn.DRAM_MEMORY_CONFIG,
        14469376,
    ),
    (
        [(4, 2, 96, 192)],
        "tanh",
        [ttnn.bfloat16, ttnn.bfloat16],
        [ttnn.TILE_LAYOUT],
        [ttnn.L1_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
        ttnn.L1_MEMORY_CONFIG,
        4378657,
    ),
    (
        [(5, 10, 224, 32)],
        "tanh",
        [ttnn.bfloat8_b, ttnn.bfloat16],
        [ttnn.TILE_LAYOUT],
        [ttnn.DRAM_MEMORY_CONFIG, ttnn.DRAM_MEMORY_CONFIG],
        ttnn.DRAM_MEMORY_CONFIG,
        678741,
    ),
    (
        [(97, 129)],
        "tanh",
        [ttnn.bfloat16, ttnn.bfloat16],
        [ttnn.TILE_LAYOUT],
        [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
        ttnn.DRAM_MEMORY_CONFIG,
        7580522,
    ),
]


@pytest.mark.parametrize(
    "input_shape, approx, dtype, dlayout, in_mem_config, out_mem_config, data_seed",
    (test_sweep_args),
)
def test_backward_div(input_shape, approx, dtype, dlayout, in_mem_config, out_mem_config, data_seed, device):
    run_backward_div_tests(input_shape, approx, dtype, dlayout, in_mem_config, out_mem_config, data_seed, device)
    


@KalaivaniMCW
Copy link
Contributor

KalaivaniMCW commented Nov 14, 2024

bias_gelu_bw unary is directly dependent on gelu_bw which involves multiple eltwise ops.

On debugging step-by-step for approx = "tanh", the pcc drop occurs with ttnn.tanh as it has a pcc of 0.994 here which further degrades when we cannot get -0.0 on our operations

Analysis sheet : link
Image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

3 participants