From 6cdfb923f312859a1ba5352f1b1c562ec671653e Mon Sep 17 00:00:00 2001 From: Eladash Date: Thu, 25 Aug 2022 00:32:00 +0300 Subject: [PATCH] SPU LLVM: Try to reduce float clamping by using round-to-zero --- rpcs3/Emu/Cell/SPURecompiler.cpp | 41 +++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index d84b48bc07b8..025cabad55f7 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -8426,6 +8426,24 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator return true; } + + bool is_input_float_result(value_t v) + { + static const auto MT = match(); + + if (std::get<0>(match_expr(v, fm(MT, MT))) || + std::get<0>(match_expr(v, fma(MT, MT, MT))) || + std::get<0>(match_expr(v, fms(MT, MT, MT))) || + std::get<0>(match_expr(v, fnms(MT, MT, MT))) + //std::get<0>(match_expr(v, fa(MT, MT))) || + //std::get<0>(match_expr(v, fs(MT, MT))) || + //std::get<0>(match_expr(v, spu_re(MT))) || + //std::get<0>(match_expr(v, spu_rsqrte(MT))) + ) + { + return true; + } + return false; } @@ -8447,6 +8465,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator value_t clamp_smax(value_t v) { + if (g_cfg.core.spu_approx_xfloat) + { + if (is_input_float_result(v)) + { + return v; + } + } + if (m_use_avx512) { if (is_input_positive(v)) @@ -8775,7 +8801,10 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto ma = sext(fcmp_uno(a != fsplat(0.))); const auto mb = sext(fcmp_uno(b != fsplat(0.))); - return eval(bitcast(bitcast(a * b) & ma & mb)); + const auto mul = eval(bitcast(a * b)); + const auto after_a = is_input_float_result(a) ? mul : eval(ma & mul); + const auto after_b = is_input_float_result(b) ? after_a : eval(mb & after_a); + return eval(bitcast(after_b)); } else { @@ -9093,7 +9122,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat) { - return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c); + return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), clamp_smax(c)); } else { @@ -9130,9 +9159,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator { const auto ma = sext(fcmp_uno(a != fsplat(0.))); const auto mb = sext(fcmp_uno(b != fsplat(0.))); - const auto ca = bitcast(bitcast(a) & mb); - const auto cb = bitcast(bitcast(b) & ma); - return fma32x4(eval(ca), eval(cb), c); + const auto ca = is_input_float_result(b) ? a : eval(bitcast(bitcast(a) & mb)); + const auto cb = is_input_float_result(a) ? b : eval(bitcast(bitcast(b) & ma)); + return fma32x4(eval(ca), eval(cb), clamp_smax(c)); } else { @@ -9202,7 +9231,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (g_cfg.core.spu_approx_xfloat) { - return fma32x4(clamp_smax(a), clamp_smax(b), eval(-c)); + return fma32x4(clamp_smax(a), clamp_smax(b), eval(-clamp_smax(c))); } else {