From 166c4858a1155860c10cf903091df39a576c0e81 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Thu, 16 Jan 2025 17:20:31 +0800 Subject: [PATCH] [RV64_DYNAREC] Optimized 16bit constant ROL opcodes (#2267) --- src/dynarec/rv64/dynarec_rv64_66.c | 154 +++++++++++++-------- src/dynarec/rv64/dynarec_rv64_emit_shift.c | 29 ++++ src/dynarec/rv64/dynarec_rv64_helper.h | 2 + 3 files changed, 124 insertions(+), 61 deletions(-) diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index 45a3a3a9a7..5a35e1f214 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -1062,13 +1062,18 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch ((nextop >> 3) & 7) { case 0: INST_NAME("ROL Ew, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); - GETEW(x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rol16, x1, x3, x1, x2); - EWBACK; + u8 = geted_ib(dyn, addr, ninst, nextop) & 15; + if (u8) { + // removed PENDING on purpose + SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); + GETEW(x1, 1); + u8 = (F8) & 0x1f; + emit_rol16c(dyn, ninst, x1, u8, x4, x5); + EWBACK; + } else { + FAKEED; + F8; + } break; case 1: INST_NAME("ROR Ew, Ib"); @@ -1178,17 +1183,76 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } break; case 0xD1: + nextop = F8; + switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("ROL Ew, 1"); + // removed PENDING on purpose + SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); + GETEW(x1, 0); + emit_rol16c(dyn, ninst, x1, 1, x5, x4); + EWBACK; + break; + case 1: + INST_NAME("ROR Ew, 1"); + MOV32w(x2, 1); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); + GETEW(x1, 1); + CALL_(ror16, x1, x3, x1, x2); + EWBACK; + break; + case 2: + INST_NAME("RCL Ew, 1"); + MOV32w(x2, 1); + MESSAGE("LOG_DUMP", "Need optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); + GETEW(x1, 1); + CALL_(rcl16, x1, x3, x1, x2); + EWBACK; + break; + case 3: + INST_NAME("RCR Ew, 1"); + MOV32w(x2, 1); + MESSAGE("LOG_DUMP", "Need optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); + GETEW(x1, 1); + CALL_(rcr16, x1, x3, x1, x2); + EWBACK; + break; + case 5: + INST_NAME("SHR Ew, 1"); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined + GETEW(x1, 0); + emit_shr16c(dyn, ninst, x1, 1, x5, x4, x6); + EWBACK; + break; + case 4: + case 6: + INST_NAME("SHL Ew, 1"); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined + GETEW(x1, 0); + emit_shl16c(dyn, ninst, x1, 1, x5, x4, x6); + EWBACK; + break; + case 7: + INST_NAME("SAR Ew, 1"); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined + GETSEW(x1, 0); + emit_sar16c(dyn, ninst, x1, 1, x5, x4, x6); + EWBACK; + break; + } + break; + case 0xD3: nextop = F8; switch ((nextop >> 3) & 7) { case 0: - if (opcode == 0xD1) { - INST_NAME("ROL Ew, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("ROL Ew, CL"); - ANDI(x2, xRCX, 0x1f); - } + INST_NAME("ROL Ew, CL"); + ANDI(x2, xRCX, 0x1f); MESSAGE(LOG_DUMP, "Need Optimization\n"); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); @@ -1196,13 +1260,8 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni EWBACK; break; case 1: - if (opcode == 0xD1) { - INST_NAME("ROR Ew, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("ROR Ew, CL"); - ANDI(x2, xRCX, 0x1f); - } + INST_NAME("ROR Ew, CL"); + ANDI(x2, xRCX, 0x1f); MESSAGE(LOG_DUMP, "Need Optimization\n"); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); @@ -1210,13 +1269,8 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni EWBACK; break; case 2: - if (opcode == 0xD1) { - INST_NAME("RCL Ew, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("RCL Ew, CL"); - ANDI(x2, xRCX, 0x1f); - } + INST_NAME("RCL Ew, CL"); + ANDI(x2, xRCX, 0x1f); MESSAGE("LOG_DUMP", "Need optimization\n"); READFLAGS(X_CF); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); @@ -1225,13 +1279,8 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni EWBACK; break; case 3: - if (opcode == 0xD1) { - INST_NAME("RCR Ew, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("RCR Ew, CL"); - ANDI(x2, xRCX, 0x1f); - } + INST_NAME("RCR Ew, CL"); + ANDI(x2, xRCX, 0x1f); MESSAGE("LOG_DUMP", "Need optimization\n"); READFLAGS(X_CF); SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); @@ -1240,14 +1289,9 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni EWBACK; break; case 5: - if (opcode == 0xD1) { - INST_NAME("SHR Ew, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("SHR Ew, CL"); - ANDI(x2, xRCX, 0x1f); - BEQ_NEXT(x2, xZR); - } + INST_NAME("SHR Ew, CL"); + ANDI(x2, xRCX, 0x1f); + BEQ_NEXT(x2, xZR); SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if (box64_dynarec_safeflags > 1) MAYSETFLAGS(); @@ -1257,14 +1301,9 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 4: case 6: - if (opcode == 0xD1) { - INST_NAME("SHL Ew, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("SHL Ew, CL"); - ANDI(x2, xRCX, 0x1f); - BEQ_NEXT(x2, xZR); - } + INST_NAME("SHL Ew, CL"); + ANDI(x2, xRCX, 0x1f); + BEQ_NEXT(x2, xZR); SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if (box64_dynarec_safeflags > 1) MAYSETFLAGS(); @@ -1273,14 +1312,9 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni EWBACK; break; case 7: - if (opcode == 0xD1) { - INST_NAME("SAR Ew, 1"); - MOV32w(x2, 1); - } else { - INST_NAME("SAR Ew, CL"); - ANDI(x2, xRCX, 0x1f); - BEQ_NEXT(x2, xZR); - } + INST_NAME("SAR Ew, CL"); + ANDI(x2, xRCX, 0x1f); + BEQ_NEXT(x2, xZR); SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if (box64_dynarec_safeflags > 1) MAYSETFLAGS(); @@ -1288,8 +1322,6 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_sar16(dyn, ninst, x1, x2, x5, x4, x6); EWBACK; break; - default: - DEFAULT; } break; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index 267bf1cf0c..8daa62dc26 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -1061,6 +1061,35 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } } +// emit ROL16 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch +void emit_rol16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4) +{ + if (!c) return; + + SET_DFNONE(); + + SLLI(s3, s1, 48 + c); + SRLI(s3, s3, 48); + SRLI(s1, s1, 16 - c); + OR(s1, s1, s3); + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + + IFX (X_CF | X_OF) { + ANDI(xFlags, xFlags, ~(1UL << F_CF | 1UL << F_OF2)); + ANDI(s4, s1, 1 << F_CF); + IFX (X_CF) OR(xFlags, xFlags, s4); + } + IFX (X_OF) { + if (c == 1) { + SRLI(s3, s1, 15); + XOR(s3, s3, s4); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } + } +} + // emit ROL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 40146b775b..a389f7b1d3 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1301,6 +1301,7 @@ void* rv64_next(void); #define emit_shr32 STEPNAME(emit_shr32) #define emit_shr32c STEPNAME(emit_shr32c) #define emit_sar32c STEPNAME(emit_sar32c) +#define emit_rol16c STEPNAME(emit_rol16c) #define emit_rol32 STEPNAME(emit_rol32) #define emit_ror32 STEPNAME(emit_ror32) #define emit_rol32c STEPNAME(emit_rol32c) @@ -1470,6 +1471,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_rol16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4); void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);