Skip to content

Commit

Permalink
[RV64_DYNAREC] Optimized 16bit constant RCL/RCR opcodes (#2270)
Browse files Browse the repository at this point in the history
  • Loading branch information
ksco authored Jan 16, 2025
1 parent 93bf308 commit cff4798
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 26 deletions.
58 changes: 32 additions & 26 deletions src/dynarec/rv64/dynarec_rv64_66.c
Original file line number Diff line number Diff line change
Expand Up @@ -1091,25 +1091,33 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
break;
case 2:
INST_NAME("RCL Ew, Ib");
MESSAGE(LOG_DUMP, "Need Optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
GETEW(x1, 1);
u8 = F8;
MOV32w(x2, u8);
CALL_(rcl16, x1, x3, x1, x2);
EWBACK;
if (geted_ib(dyn, addr, ninst, nextop) & 31) {
READFLAGS(X_CF);
// removed PENDING on purpose
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION);
GETEW(x1, 1);
u8 = (F8) & 0x1f;
emit_rcl16c(dyn, ninst, ed, u8, x4, x5);
EWBACK;
} else {
FAKEED;
F8;
}
break;
case 3:
INST_NAME("RCR Ew, Ib");
MESSAGE(LOG_DUMP, "Need Optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
GETEW(x1, 1);
u8 = F8;
MOV32w(x2, u8);
CALL_(rcr16, x1, x3, x1, x2);
EWBACK;
if (geted_ib(dyn, addr, ninst, nextop) & 31) {
READFLAGS(X_CF);
// removed PENDING on purpose
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION);
GETEW(x1, 1);
u8 = (F8) & 0x1f;
emit_rcr16c(dyn, ninst, ed, u8, x4, x5);
EWBACK;
} else {
FAKEED;
F8;
}
break;
case 4:
case 6:
Expand Down Expand Up @@ -1207,22 +1215,20 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
break;
case 2:
INST_NAME("RCL Ew, 1");
MOV32w(x2, 1);
MESSAGE("LOG_DUMP", "Need optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
GETEW(x1, 1);
CALL_(rcl16, x1, x3, x1, x2);
// removed PENDING on purpose
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION);
GETEW(x1, 0);
emit_rcl16c(dyn, ninst, x1, 1, x5, x4);
EWBACK;
break;
case 3:
INST_NAME("RCR Ew, 1");
MOV32w(x2, 1);
MESSAGE("LOG_DUMP", "Need optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
GETEW(x1, 1);
CALL_(rcr16, x1, x3, x1, x2);
// removed PENDING on purpose
SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION);
GETEW(x1, 0);
emit_rcr16c(dyn, ninst, x1, 1, x5, x4);
EWBACK;
break;
case 5:
Expand Down
84 changes: 84 additions & 0 deletions src/dynarec/rv64/dynarec_rv64_emit_shift.c
Original file line number Diff line number Diff line change
Expand Up @@ -1635,3 +1635,87 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
emit_pf(dyn, ninst, s1, s3, s4);
}
}

// emit RCL16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
void emit_rcl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4)
{
if (!(c % 17)) return;

SET_DFNONE();

c %= 17;

ANDI(s3, xFlags, 1 << F_CF);
SLLI(s3, s3, 16);
OR(s1, s1, s3); // insert CF to bit 16

SLLI(s4, s1, 47 + c);
SLLI(s3, s4, 1);
SRLI(s3, s3, 48);
SRLI(s1, s1, 17 - c);
OR(s1, s1, s3);

if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR);

IFX (X_CF | X_OF) {
ANDI(xFlags, xFlags, ~(1UL << F_CF | 1UL << F_OF2));
SRLI(s4, s4, 63);
IFX (X_CF) OR(xFlags, xFlags, s4);
}

IFX (X_OF) {
if (c == 1) {
SRLI(s3, s1, 15);
XOR(s3, s3, s4);
SLLI(s3, s3, F_OF2);
OR(xFlags, xFlags, s3);
}
}
}

// emit RCR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
void emit_rcr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4)
{
MAYUSE(s1);
MAYUSE(s3);
MAYUSE(s4);

if (!(c % 17)) return;

SET_DFNONE();

c %= 17;


ANDI(s3, xFlags, 1 << F_CF);
SLLI(s3, s3, 16);
OR(s1, s1, s3); // insert CF to bit 16

SRLI(s3, s1, c);
SLLI(s4, s1, 63 - c);
SLLI(s1, s4, s1);
SRLI(s1, s1, 48);
OR(s1, s1, s3);

if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR);

IFX (X_CF | X_OF) {
ANDI(xFlags, xFlags, ~(1UL << F_CF | 1UL << F_OF2));
}

IFX (X_CF) {
SRLI(s4, s4, 63);
OR(xFlags, xFlags, s4);
}

IFX (X_OF) {
if (c == 1) {
SRLI(s3, s1, 14);
SRLI(s4, s3, 1);
XOR(s3, s3, s4);
ANDI(s3, s3, 1);
SLLI(s3, s3, F_OF2);
OR(xFlags, xFlags, s3);
}
}
}
4 changes: 4 additions & 0 deletions src/dynarec/rv64/dynarec_rv64_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,8 @@ void* rv64_next(void);
#define emit_shld32 STEPNAME(emit_shld32)
#define emit_shld16c STEPNAME(emit_shld16c)
#define emit_shrd16c STEPNAME(emit_shrd16c)
#define emit_rcl16c STEPNAME(emit_rcl16c)
#define emit_rcr16c STEPNAME(emit_rcr16c)

#define emit_pf STEPNAME(emit_pf)

Expand Down Expand Up @@ -1482,6 +1484,8 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6);
void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
void emit_rcl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
void emit_rcr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);

void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);

Expand Down

0 comments on commit cff4798

Please sign in to comment.