Skip to content

Commit

Permalink
[RV64_DYNAREC] Optimized 16bit constant ROL opcodes (#2267)
Browse files Browse the repository at this point in the history
  • Loading branch information
ksco authored Jan 16, 2025
1 parent 4b8fb02 commit 166c485
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 61 deletions.
154 changes: 93 additions & 61 deletions src/dynarec/rv64/dynarec_rv64_66.c
Original file line number Diff line number Diff line change
Expand Up @@ -1062,13 +1062,18 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
switch ((nextop >> 3) & 7) {
case 0:
INST_NAME("ROL Ew, Ib");
MESSAGE(LOG_DUMP, "Need Optimization\n");
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
GETEW(x1, 1);
u8 = F8;
MOV32w(x2, u8);
CALL_(rol16, x1, x3, x1, x2);
EWBACK;
u8 = geted_ib(dyn, addr, ninst, nextop) & 15;
if (u8) {
// removed PENDING on purpose
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION);
GETEW(x1, 1);
u8 = (F8) & 0x1f;
emit_rol16c(dyn, ninst, x1, u8, x4, x5);
EWBACK;
} else {
FAKEED;
F8;
}
break;
case 1:
INST_NAME("ROR Ew, Ib");
Expand Down Expand Up @@ -1178,45 +1183,94 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
}
break;
case 0xD1:
nextop = F8;
switch ((nextop >> 3) & 7) {
case 0:
INST_NAME("ROL Ew, 1");
// removed PENDING on purpose
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION);
GETEW(x1, 0);
emit_rol16c(dyn, ninst, x1, 1, x5, x4);
EWBACK;
break;
case 1:
INST_NAME("ROR Ew, 1");
MOV32w(x2, 1);
MESSAGE(LOG_DUMP, "Need Optimization\n");
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
GETEW(x1, 1);
CALL_(ror16, x1, x3, x1, x2);
EWBACK;
break;
case 2:
INST_NAME("RCL Ew, 1");
MOV32w(x2, 1);
MESSAGE("LOG_DUMP", "Need optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
GETEW(x1, 1);
CALL_(rcl16, x1, x3, x1, x2);
EWBACK;
break;
case 3:
INST_NAME("RCR Ew, 1");
MOV32w(x2, 1);
MESSAGE("LOG_DUMP", "Need optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
GETEW(x1, 1);
CALL_(rcr16, x1, x3, x1, x2);
EWBACK;
break;
case 5:
INST_NAME("SHR Ew, 1");
SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
GETEW(x1, 0);
emit_shr16c(dyn, ninst, x1, 1, x5, x4, x6);
EWBACK;
break;
case 4:
case 6:
INST_NAME("SHL Ew, 1");
SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
GETEW(x1, 0);
emit_shl16c(dyn, ninst, x1, 1, x5, x4, x6);
EWBACK;
break;
case 7:
INST_NAME("SAR Ew, 1");
SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
GETSEW(x1, 0);
emit_sar16c(dyn, ninst, x1, 1, x5, x4, x6);
EWBACK;
break;
}
break;

case 0xD3:
nextop = F8;
switch ((nextop >> 3) & 7) {
case 0:
if (opcode == 0xD1) {
INST_NAME("ROL Ew, 1");
MOV32w(x2, 1);
} else {
INST_NAME("ROL Ew, CL");
ANDI(x2, xRCX, 0x1f);
}
INST_NAME("ROL Ew, CL");
ANDI(x2, xRCX, 0x1f);
MESSAGE(LOG_DUMP, "Need Optimization\n");
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
GETEW(x1, 1);
CALL_(rol16, x1, x3, x1, x2);
EWBACK;
break;
case 1:
if (opcode == 0xD1) {
INST_NAME("ROR Ew, 1");
MOV32w(x2, 1);
} else {
INST_NAME("ROR Ew, CL");
ANDI(x2, xRCX, 0x1f);
}
INST_NAME("ROR Ew, CL");
ANDI(x2, xRCX, 0x1f);
MESSAGE(LOG_DUMP, "Need Optimization\n");
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
GETEW(x1, 1);
CALL_(ror16, x1, x3, x1, x2);
EWBACK;
break;
case 2:
if (opcode == 0xD1) {
INST_NAME("RCL Ew, 1");
MOV32w(x2, 1);
} else {
INST_NAME("RCL Ew, CL");
ANDI(x2, xRCX, 0x1f);
}
INST_NAME("RCL Ew, CL");
ANDI(x2, xRCX, 0x1f);
MESSAGE("LOG_DUMP", "Need optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
Expand All @@ -1225,13 +1279,8 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
EWBACK;
break;
case 3:
if (opcode == 0xD1) {
INST_NAME("RCR Ew, 1");
MOV32w(x2, 1);
} else {
INST_NAME("RCR Ew, CL");
ANDI(x2, xRCX, 0x1f);
}
INST_NAME("RCR Ew, CL");
ANDI(x2, xRCX, 0x1f);
MESSAGE("LOG_DUMP", "Need optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
Expand All @@ -1240,14 +1289,9 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
EWBACK;
break;
case 5:
if (opcode == 0xD1) {
INST_NAME("SHR Ew, 1");
MOV32w(x2, 1);
} else {
INST_NAME("SHR Ew, CL");
ANDI(x2, xRCX, 0x1f);
BEQ_NEXT(x2, xZR);
}
INST_NAME("SHR Ew, CL");
ANDI(x2, xRCX, 0x1f);
BEQ_NEXT(x2, xZR);
SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
if (box64_dynarec_safeflags > 1)
MAYSETFLAGS();
Expand All @@ -1257,14 +1301,9 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
break;
case 4:
case 6:
if (opcode == 0xD1) {
INST_NAME("SHL Ew, 1");
MOV32w(x2, 1);
} else {
INST_NAME("SHL Ew, CL");
ANDI(x2, xRCX, 0x1f);
BEQ_NEXT(x2, xZR);
}
INST_NAME("SHL Ew, CL");
ANDI(x2, xRCX, 0x1f);
BEQ_NEXT(x2, xZR);
SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
if (box64_dynarec_safeflags > 1)
MAYSETFLAGS();
Expand All @@ -1273,23 +1312,16 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
EWBACK;
break;
case 7:
if (opcode == 0xD1) {
INST_NAME("SAR Ew, 1");
MOV32w(x2, 1);
} else {
INST_NAME("SAR Ew, CL");
ANDI(x2, xRCX, 0x1f);
BEQ_NEXT(x2, xZR);
}
INST_NAME("SAR Ew, CL");
ANDI(x2, xRCX, 0x1f);
BEQ_NEXT(x2, xZR);
SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
if (box64_dynarec_safeflags > 1)
MAYSETFLAGS();
GETSEW(x1, 0);
emit_sar16(dyn, ninst, x1, x2, x5, x4, x6);
EWBACK;
break;
default:
DEFAULT;
}
break;

Expand Down
29 changes: 29 additions & 0 deletions src/dynarec/rv64/dynarec_rv64_emit_shift.c
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,35 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
}
}

// emit ROL16 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch
void emit_rol16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4)
{
if (!c) return;

SET_DFNONE();

SLLI(s3, s1, 48 + c);
SRLI(s3, s3, 48);
SRLI(s1, s1, 16 - c);
OR(s1, s1, s3);

if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR);

IFX (X_CF | X_OF) {
ANDI(xFlags, xFlags, ~(1UL << F_CF | 1UL << F_OF2));
ANDI(s4, s1, 1 << F_CF);
IFX (X_CF) OR(xFlags, xFlags, s4);
}
IFX (X_OF) {
if (c == 1) {
SRLI(s3, s1, 15);
XOR(s3, s3, s4);
SLLI(s3, s3, F_OF2);
OR(xFlags, xFlags, s3);
}
}
}

// emit ROL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
{
Expand Down
2 changes: 2 additions & 0 deletions src/dynarec/rv64/dynarec_rv64_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,7 @@ void* rv64_next(void);
#define emit_shr32 STEPNAME(emit_shr32)
#define emit_shr32c STEPNAME(emit_shr32c)
#define emit_sar32c STEPNAME(emit_sar32c)
#define emit_rol16c STEPNAME(emit_rol16c)
#define emit_rol32 STEPNAME(emit_rol32)
#define emit_ror32 STEPNAME(emit_ror32)
#define emit_rol32c STEPNAME(emit_rol32c)
Expand Down Expand Up @@ -1470,6 +1471,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
void emit_rol16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
Expand Down

0 comments on commit 166c485

Please sign in to comment.