From 0de2f642b8d1c89eea5fc46e0a8e119758b566f6 Mon Sep 17 00:00:00 2001 From: AnthonyH26 <37834038+AnthonyH26@users.noreply.github.com> Date: Sat, 4 Jan 2025 12:49:38 +0000 Subject: [PATCH 1/2] Update translate.h Added definition of V_CVT_PK_U16_U32 --- src/shader_recompiler/frontend/translate/translate.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 9da0844e4a6..2fdc73e2c7e 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -241,6 +241,7 @@ class Translator { void V_SAD(const GcnInst& inst); void V_SAD_U32(const GcnInst& inst); void V_CVT_PK_U16_U32(const GcnInst& inst); + void V_CVT_PK_I16_I32(const GcnInst& inst); void V_CVT_PK_U8_F32(const GcnInst& inst); void V_LSHL_B64(const GcnInst& inst); void V_MUL_F64(const GcnInst& inst); From cf9f3daa74f771b9e94e279f9e85099ab28ee170 Mon Sep 17 00:00:00 2001 From: AnthonyH26 <37834038+AnthonyH26@users.noreply.github.com> Date: Sat, 4 Jan 2025 12:52:03 +0000 Subject: [PATCH 2/2] Implemented V_CVT_PK_I16_I32 Implemented this opcode to help with https://github.com/shadps4-emu/shadps4-game-compatibility/issues/1864 --- .../frontend/translate/vector_alu.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 2b32ca2ce3e..5c5bc5ae54e 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -375,6 +375,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_SAD_U32(inst); case Opcode::V_CVT_PK_U16_U32: return V_CVT_PK_U16_U32(inst); + case Opcode::V_CVT_PK_I16_I32: + return V_CVT_PK_I16_I32(inst); case Opcode::V_CVT_PK_U8_F32: return V_CVT_PK_U8_F32(inst); case Opcode::V_LSHL_B64: @@ -1182,6 +1184,17 @@ void Translator::V_CVT_PK_U16_U32(const GcnInst& inst) { SetDst(inst.dst[0], ir.BitFieldInsert(lo, hi, ir.Imm32(16), ir.Imm32(16))); } +void Translator::V_CVT_PK_I16_I32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + //Clamp the underflow then clamp the overflow from the result + IR::U32 lo = ir.IMax(src0, ir.Imm32(0x8000), true); + lo = ir.IMin(lo, ir.Imm32(0x7FFF), true); + IR::U32 hi = ir.IMax(src1, ir.Imm32(0x8000), true); + hi = ir.IMin(hi, ir.Imm32(0x7FFF), true); + SetDst(inst.dst[0], ir.BitFieldInsert(lo, hi, ir.Imm32(16), ir.Imm32(16))); +} + void Translator::V_CVT_PK_U8_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])};