Skip to content

Commit

Permalink
LLK Test Coverage - MathFid and DestAcc in reduce API (#12696)
Browse files Browse the repository at this point in the history
* #0: Add MathFid and DestAcc sweep to reduce API

1. Added AVG pool in reduce tests
2. Added Math Fidelity for all reduce tests
3. Added FP32 DEST Accumulation but skipped for
scalar pool since it isn't supported
4. Fp32_dst_acc sweep skipped for GS
  • Loading branch information
ncvetkovicTT authored Sep 20, 2024
1 parent cee4c7e commit 5bb0b10
Show file tree
Hide file tree
Showing 6 changed files with 353 additions and 146 deletions.
6 changes: 6 additions & 0 deletions tests/tt_metal/tt_metal/test_kernels/compute/reduce_h.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,14 @@ void MAIN {
acquire_dst(tt::DstMode::Half);
for(uint32_t ht = 0; ht < Ht; ++ht) {
cb_wait_front(tt::CB::c_in0, onetile);
#if (MATH_ONLY == 1)
UNPACK(( llk_unpack_AB(tt::CB::c_in0, tt::CB::c_in2, 0, 0) ));
// REDUCE_OP is expected to come from add_define
reduce_tile_math(reduce_dst_idx);
#elif (MATH_ONLY == 0)
// REDUCE_OP is expected to come from add_define
reduce_tile(tt::CB::c_in0, tt::CB::c_in2, 0, 0, reduce_dst_idx);
#endif
cb_pop_front(tt::CB::c_in0, onetile);
}

Expand Down
8 changes: 7 additions & 1 deletion tests/tt_metal/tt_metal/test_kernels/compute/reduce_hw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,14 @@ void MAIN {
// in this case we just sequentially add to accumulator all the W-tiles in a row
for(uint32_t wt = 0; wt < Wt; ++wt) {
cb_wait_front(tt::CB::c_in0, onetile);
// REDUCE_OP/DIM is expected to come from add_define
#if (MATH_ONLY == 1)
UNPACK(( llk_unpack_AB(tt::CB::c_in0, tt::CB::c_in2, 0, 0) ));
// REDUCE_OP is expected to come from add_define
reduce_tile_math(reduce_dst_idx);
#elif (MATH_ONLY == 0)
// REDUCE_OP is expected to come from add_define
reduce_tile(tt::CB::c_in0, tt::CB::c_in2, 0, 0, reduce_dst_idx);
#endif
cb_pop_front(tt::CB::c_in0, onetile);
}
}
Expand Down
35 changes: 18 additions & 17 deletions tests/tt_metal/tt_metal/unit_tests/compute/test_golden_impls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ std::vector<uint16_t> gold_transpose_wh(const std::vector<uint16_t> &src_vec, co
// input shape.x is assumed to have the full number of elements in bfloat16
// src_vec is expected to be untilized
// result is also untilized
std::vector<uint16_t> gold_reduce_h(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, bool red_max, bool zeropad) {
std::vector<uint16_t> gold_reduce_h(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint32_t> shape_dst{shape[0], shape[1], 1, shape[3]};
TT_FATAL(shape[2] > 0, "Error");
if (zeropad)
Expand All @@ -150,10 +150,11 @@ std::vector<uint16_t> gold_reduce_h(const std::vector<uint16_t> &src_vec, const
for (int n = 0; n < shape[0]; n++)
for (int c = 0; c < shape[1]; c++)
for (int w = 0; w < shape[3]; w++) {
float sum = red_max ? -std::numeric_limits<float>::max() : 0.0f;
// red_type : {SUM, AVG, MAX}; i.e. {0, 1, 2};
float sum = (red_type == 2) ? -std::numeric_limits<float>::max() : 0.0f;
for (int h = 0; h < shape[2]; h++) {
auto offs = addr.offs(n, c, h, w);
if (red_max)
if (red_type == 2)
sum = fmaxf(bfloat16(src_vec[offs]).to_float(), sum);
else
sum += bfloat16(src_vec[offs]).to_float();
Expand All @@ -165,7 +166,7 @@ std::vector<uint16_t> gold_reduce_h(const std::vector<uint16_t> &src_vec, const
return reduced;
};

std::vector<uint16_t> gold_reduce_w(const vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, bool red_max, bool zeropad) {
std::vector<uint16_t> gold_reduce_w(const vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint32_t> shape_dst{shape[0], shape[1], shape[2], 1};
if (zeropad)
shape_dst[3] = 32;
Expand All @@ -177,22 +178,22 @@ std::vector<uint16_t> gold_reduce_w(const vector<uint16_t> &src_vec, const std::
for (int n = 0; n < shape[0]; n++)
for (int c = 0; c < shape[1]; c++)
for (int h = 0; h < shape[2]; h++) {
float sum = red_max ? -std::numeric_limits<float>::max() : 0.0f;
// red_type : {SUM, AVG, MAX}; i.e. {0, 1, 2};
float sum = (red_type == 2) ? -std::numeric_limits<float>::max() : 0.0f;
for (int w = 0; w < shape[3]; w++) {
auto offs = addr.offs(n, c, h, w);
if (red_max)
if (red_type == 2)
sum = fmaxf(bfloat16(src_vec[offs]).to_float(), sum);
else
sum += bfloat16(src_vec[offs]).to_float();
}
auto dest_offs = addr_dst.offs(n, c, h, 0);
reduced[dest_offs] = bfloat16(sum*scaler).to_uint16();
}

return reduced;
}

std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, bool red_max, bool zeropad) {
std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type, bool zeropad) {
vector<uint32_t> shape_dst{shape[0], shape[1], 1, 1};
if (zeropad) {
shape_dst[2] = 32;
Expand All @@ -205,17 +206,17 @@ std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const
std::fill(reduced.begin(), reduced.end(), 0);
for (int n = 0; n < shape[0]; n++)
for (int c = 0; c < shape[1]; c++) {
float sum = red_max ? -std::numeric_limits<float>::max() : 0.0f;
// red_type : {SUM, AVG, MAX}; i.e. {0, 1, 2};
float sum = (red_type == 2) ? -std::numeric_limits<float>::max() : 0.0f;
for (int h = 0; h < shape[2]; h++) {
for (int w = 0; w < shape[3]; w++) {
auto offs = addr.offs(n, c, h, w);
if (red_max)
sum = fmaxf(bfloat16(src_vec[offs]).to_float(), sum);
else {
sum += bfloat16(src_vec[offs]).to_float();
//sum = bfloat16(sum).to_float();
for (int w = 0; w < shape[3]; w++) {
auto offs = addr.offs(n, c, h, w);
if (red_type == 2)
sum = fmaxf(bfloat16(src_vec[offs]).to_float(), sum);
else
sum += bfloat16(src_vec[offs]).to_float();
}
}}
}
auto dest_offs = addr_dst.offs(n, c, 0, 0);
reduced[dest_offs] = bfloat16(sum*scaler).to_uint16();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,20 @@ std::vector<uint16_t> gold_transpose_wh(const std::vector<uint16_t> &src_vec, co
// input shape.x is assumed to have the full number of elements in bfloat16
// src_vec is expected to be untilized
// result is also untilized
std::vector<uint16_t> gold_reduce_h(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, bool red_max = false, bool zeropad = true);
// red_type : {SUM, AVG, MAX}; i.e. {0, 1, 2};
std::vector<uint16_t> gold_reduce_h(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type = 0, bool zeropad = true);

// input shape.x is assumed to have the full number of elements in bfloat16
// src_vec is expected to be untilized
// result is also untilized
std::vector<uint16_t> gold_reduce_w(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, bool red_max = false, bool zeropad = true);
// red_type : {SUM, AVG, MAX}; i.e. {0, 1, 2};
std::vector<uint16_t> gold_reduce_w(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type = 0, bool zeropad = true);

// input shape.x is assumed to have the full number of elements in bfloat16
// src_vec is expected to be untilized
// result is also untilized
std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, bool red_max = false, bool zeropad = true);
// red_type : {SUM, AVG, MAX}; i.e. {0, 1, 2};
std::vector<uint16_t> gold_reduce_hw(const std::vector<uint16_t> &src_vec, const std::vector<uint32_t> &shape, float scaler, uint8_t red_type = 0, bool zeropad = true);

// Takes untilized src0_vec and tilized src1_vec
// returns tilized result of eltwise addition
Expand Down
Loading

0 comments on commit 5bb0b10

Please sign in to comment.