Skip to content

Commit

Permalink
[Test] SPU LLVM: Add more clamping, reorder them to be on the result
Browse files Browse the repository at this point in the history
  • Loading branch information
elad335 committed Jun 6, 2022
1 parent 248809c commit da0f108
Showing 1 changed file with 93 additions and 31 deletions.
124 changes: 93 additions & 31 deletions rpcs3/Emu/Cell/SPURecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7081,7 +7081,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto [a, b] = get_vrs<u8[16]>(op.ra, op.rb);
const auto zeroes = splat<u8[16]>(0);

if (op.ra == op.rb && !m_interp_magn)
if ((op.ra == op.rb || a.value == b.value) && !m_interp_magn)
{
set_vr(op.rt, vdbpsadbw(a, zeroes, 0));
return;
Expand Down Expand Up @@ -7887,6 +7887,35 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return true;
}

if (auto [ok, v0] = match_expr(a, fabs(match<f32[4]>())); ok)
{
return true;
}

if (auto [ok, v0] = match_expr(a, spu_rsqrte(match<f32[4]>())); ok)
{
return true;
}

return false;
}

bool is_input_float_result(value_t<f32[4]> v)
{
static const auto MT = match<f32[4]>();

if (std::get<0>(match_expr(v, fm(MT, MT))) ||
std::get<0>(match_expr(v, fma(MT, MT, MT))) ||
std::get<0>(match_expr(v, fms(MT, MT, MT))) ||
std::get<0>(match_expr(v, fnms(MT, MT, MT))) ||
//std::get<0>(match_expr(v, fa(MT, MT))) ||
//std::get<0>(match_expr(v, fs(MT, MT))) ||
std::get<0>(match_expr(v, spu_re(MT))) ||
std::get<0>(match_expr(v, spu_rsqrte(MT))))
{
return true;
}

return false;
}

Expand All @@ -7906,8 +7935,21 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return eval(bitcast<f32[4]>(min(bitcast<u32[4]>(v),splat<u32[4]>(0xff7fffff))));
}

value_t<f32[4]> clamp_smax(value_t<f32[4]> v)
value_t<f32[4]> clamp_smax(value_t<f32[4]> v, bool force = false)
{
if (!g_cfg.core.spu_approx_xfloat && !force)
{
return v;
}

if (g_cfg.core.spu_approx_xfloat)
{
if (is_input_float_result(v))
{
return v;
}
}

return eval(clamp_positive_smax(clamp_negative_smax(v)));
}

Expand Down Expand Up @@ -7959,7 +8001,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
register_intrinsic("spu_frest", [&](llvm::CallInst* ci)
{
const auto a = value<f32[4]>(ci->getOperand(0));
return fre(a);

if (g_cfg.core.spu_approx_xfloat)
{
return eval(fre(a));
}

return eval(fre(a));
});

set_vr(op.rt, frest(get_vr<f32[4]>(op.ra)));
Expand All @@ -7983,7 +8031,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
register_intrinsic("spu_frsqest", [&](llvm::CallInst* ci)
{
const auto a = value<f32[4]>(ci->getOperand(0));
return frsqe(fabs(a));

if (g_cfg.core.spu_approx_xfloat)
{
return eval(frsqe(fabs(a)));
}

return eval(frsqe(fabs(a)));
});

set_vr(op.rt, frsqest(get_vr<f32[4]>(op.ra)));
Expand Down Expand Up @@ -8044,6 +8098,16 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return eval(sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
}

if (is_input_positive(a) && is_input_positive(b))
{
return eval(sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
}

if (g_cfg.core.spu_approx_xfloat && is_input_float_result(a) && is_input_float_result(b))
{
return eval(sext<s32[4]>(fcmp_ord(a > b)));
}

if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
{
const auto ai = eval(bitcast<s32[4]>(a));
Expand Down Expand Up @@ -8121,7 +8185,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const auto a = value<f32[4]>(ci->getOperand(0));
const auto b = value<f32[4]>(ci->getOperand(1));

return a + b;
return eval(a + b);
});

set_vr(op.rt, fa(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb)));
Expand All @@ -8148,8 +8212,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

if (g_cfg.core.spu_approx_xfloat)
{
const auto bc = clamp_smax(b); // for #4478
return eval(a - bc);
return eval(a - b);
}
else
{
Expand All @@ -8176,19 +8239,17 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

register_intrinsic("spu_fm", [&](llvm::CallInst* ci)
{
const auto a = value<f32[4]>(ci->getOperand(0));
const auto b = value<f32[4]>(ci->getOperand(1));
const auto a = clamp_smax(value<f32[4]>(ci->getOperand(0)));
const auto b = clamp_smax(value<f32[4]>(ci->getOperand(1)));

if (g_cfg.core.spu_approx_xfloat)
{
if (op.ra == op.rb && !m_interp_magn)
if ((op.ra == op.rb || a.value == b.value) && !m_interp_magn)
{
return eval(a * b);
return clamp_positive_smax(eval(a * b));
}

const auto ma = sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)));
const auto mb = sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)));
return eval(bitcast<f32[4]>(bitcast<s32[4]>(a * b) & ma & mb));
return clamp_smax(eval(a * b));
}
else
{
Expand Down Expand Up @@ -8309,6 +8370,11 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
}
}
}

if (g_cfg.core.spu_approx_xfloat && is_input_float_result(ab[i]))
{
safe_float_compare.set(i);
}
}

if (safe_float_compare.any())
Expand Down Expand Up @@ -8500,13 +8566,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

register_intrinsic("spu_fnms", [&](llvm::CallInst* ci)
{
const auto a = value<f32[4]>(ci->getOperand(0));
const auto b = value<f32[4]>(ci->getOperand(1));
const auto a = clamp_smax(value<f32[4]>(ci->getOperand(0)));
const auto b = clamp_smax(value<f32[4]>(ci->getOperand(1)));
const auto c = value<f32[4]>(ci->getOperand(2));

if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
{
return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c);
return clamp_smax(fma32x4(eval(-a), b, c), true);
}
else
{
Expand Down Expand Up @@ -8535,17 +8601,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

register_intrinsic("spu_fma", [&](llvm::CallInst* ci)
{
const auto a = value<f32[4]>(ci->getOperand(0));
const auto b = value<f32[4]>(ci->getOperand(1));
const auto a = clamp_smax(value<f32[4]>(ci->getOperand(0)));
const auto b = clamp_smax(value<f32[4]>(ci->getOperand(1)));
const auto c = value<f32[4]>(ci->getOperand(2));

if (g_cfg.core.spu_approx_xfloat)
{
const auto ma = sext<s32[4]>(fcmp_uno(a != fsplat<f32[4]>(0.)));
const auto mb = sext<s32[4]>(fcmp_uno(b != fsplat<f32[4]>(0.)));
const auto ca = bitcast<f32[4]>(bitcast<s32[4]>(a) & mb);
const auto cb = bitcast<f32[4]>(bitcast<s32[4]>(b) & ma);
return fma32x4(eval(ca), eval(cb), c);
return clamp_smax(fma32x4(eval(a), eval(b), c));
}
else
{
Expand Down Expand Up @@ -8609,13 +8671,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

register_intrinsic("spu_fms", [&](llvm::CallInst* ci)
{
const auto a = value<f32[4]>(ci->getOperand(0));
const auto b = value<f32[4]>(ci->getOperand(1));
const auto a = clamp_smax(value<f32[4]>(ci->getOperand(0)));
const auto b = clamp_smax(value<f32[4]>(ci->getOperand(1)));
const auto c = value<f32[4]>(ci->getOperand(2));

if (g_cfg.core.spu_approx_xfloat)
{
return fma32x4(clamp_smax(a), clamp_smax(b), eval(-c));
return clamp_smax(fma32x4(a, b, eval(-c)));
}
else
{
Expand Down Expand Up @@ -8678,28 +8740,28 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
register_intrinsic("spu_re", [&](llvm::CallInst* ci)
{
const auto a = value<f32[4]>(ci->getOperand(0));
return fre(a);
return clamp_smax(eval(fre(a)));
});

register_intrinsic("spu_rsqrte", [&](llvm::CallInst* ci)
{
const auto a = value<f32[4]>(ci->getOperand(0));
return frsqe(fabs(a));
return clamp_smax(eval(frsqe(fabs(a)));
});

const auto [a, b] = get_vrs<f32[4]>(op.ra, op.rb);

if (const auto [ok, mb] = match_expr(b, frest(match<f32[4]>())); ok && mb.eq(a))
{
erase_stores(b);
set_vr(op.rt, spu_re(a));
set_vr(op.rt, eval(spu_re(a)));
return;
}

if (const auto [ok, mb] = match_expr(b, frsqest(match<f32[4]>())); ok && mb.eq(a))
{
erase_stores(b);
set_vr(op.rt, spu_rsqrte(a));
set_vr(op.rt, eval(spu_rsqrte(a)));
return;
}

Expand Down

0 comments on commit da0f108

Please sign in to comment.