Skip to content

Commit

Permalink
SPU LLVM: Try to reduce float clamping by using round-to-zero
Browse files Browse the repository at this point in the history
  • Loading branch information
elad335 committed Aug 24, 2022
1 parent 1fc0191 commit 6cdfb92
Showing 1 changed file with 35 additions and 6 deletions.
41 changes: 35 additions & 6 deletions rpcs3/Emu/Cell/SPURecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8426,6 +8426,24 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return true;
}


bool is_input_float_result(value_t<f32[4]> v)
{
static const auto MT = match<f32[4]>();

if (std::get<0>(match_expr(v, fm(MT, MT))) ||
std::get<0>(match_expr(v, fma(MT, MT, MT))) ||
std::get<0>(match_expr(v, fms(MT, MT, MT))) ||
std::get<0>(match_expr(v, fnms(MT, MT, MT)))
//std::get<0>(match_expr(v, fa(MT, MT))) ||
//std::get<0>(match_expr(v, fs(MT, MT))) ||
//std::get<0>(match_expr(v, spu_re(MT))) ||
//std::get<0>(match_expr(v, spu_rsqrte(MT)))
)
{
return true;
}

return false;
}

Expand All @@ -8447,6 +8465,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

value_t<f32[4]> clamp_smax(value_t<f32[4]> v)
{
if (g_cfg.core.spu_approx_xfloat)
{
if (is_input_float_result(v))
{
return v;
}
}

if (m_use_avx512)
{
if (is_input_positive(v))
Expand Down Expand Up @@ -8775,7 +8801,10 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

const auto ma = sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)));
const auto mb = sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)));
return eval(bitcast<f32[4]>(bitcast<s32[4]>(a * b) & ma & mb));
const auto mul = eval(bitcast<s32[4]>(a * b));
const auto after_a = is_input_float_result(a) ? mul : eval(ma & mul);
const auto after_b = is_input_float_result(b) ? after_a : eval(mb & after_a);
return eval(bitcast<f32[4]>(after_b));
}
else
{
Expand Down Expand Up @@ -9093,7 +9122,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
{
return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c);
return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), clamp_smax(c));
}
else
{
Expand Down Expand Up @@ -9130,9 +9159,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
{
const auto ma = sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)));
const auto mb = sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)));
const auto ca = bitcast<f32[4]>(bitcast<s32[4]>(a) & mb);
const auto cb = bitcast<f32[4]>(bitcast<s32[4]>(b) & ma);
return fma32x4(eval(ca), eval(cb), c);
const auto ca = is_input_float_result(b) ? a : eval(bitcast<f32[4]>(bitcast<s32[4]>(a) & mb));
const auto cb = is_input_float_result(a) ? b : eval(bitcast<f32[4]>(bitcast<s32[4]>(b) & ma));
return fma32x4(eval(ca), eval(cb), clamp_smax(c));
}
else
{
Expand Down Expand Up @@ -9202,7 +9231,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

if (g_cfg.core.spu_approx_xfloat)
{
return fma32x4(clamp_smax(a), clamp_smax(b), eval(-c));
return fma32x4(clamp_smax(a), clamp_smax(b), eval(-clamp_smax(c)));
}
else
{
Expand Down

0 comments on commit 6cdfb92

Please sign in to comment.