diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index bddb329c1..975dff34f 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -2614,11 +2614,16 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, f * mu tu sew lmul */ uint32_t vtypei = (0b0 << 7) | (0b0 << 6) | (sew << 3) | vlmul; uint32_t vl = (int)((float)(16 >> sew) * multiple); - if (vl <= 31) { - VSETIVLI(xZR, vl, vtypei); - } else { - ADDI(s1, xZR, vl); - VSETVLI(xZR, s1, vtypei); + + if (dyn->inst_sew == VECTOR_SEWNA || dyn->inst_vl == 0 || dyn->inst_sew != sew || dyn->inst_vl != vl) { + if (vl <= 31) { + VSETIVLI(xZR, vl, vtypei); + } else { + ADDI(s1, xZR, vl); + VSETVLI(xZR, s1, vtypei); + } } + dyn->inst_sew = sew; + dyn->inst_vl = vl; return sew; } diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index d0d81a6fa..4a1603d06 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -30,6 +30,8 @@ dyn->e.olds[i].v = 0; \ dyn->insts[ninst].f_entry = dyn->f; \ dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \ + dyn->inst_sew = VECTOR_SEWNA; \ + dyn->inst_vl = 0; \ if (ninst) \ dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr; diff --git a/src/dynarec/rv64/dynarec_rv64_pass1.h b/src/dynarec/rv64/dynarec_rv64_pass1.h index 5360f7670..0aa8e010d 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass1.h +++ b/src/dynarec/rv64/dynarec_rv64_pass1.h @@ -8,6 +8,8 @@ for (int i = 0; i < 16; ++i) \ dyn->e.olds[i].v = 0; \ dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \ + dyn->inst_sew = VECTOR_SEWNA; \ + dyn->inst_vl = 0; \ dyn->e.swapped = 0; \ dyn->e.barrier = 0 diff --git a/src/dynarec/rv64/dynarec_rv64_pass2.h b/src/dynarec/rv64/dynarec_rv64_pass2.h index 4f41f628d..71b14b2e2 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass2.h +++ b/src/dynarec/rv64/dynarec_rv64_pass2.h @@ -9,6 +9,8 @@ #define EMIT(A) do {dyn->insts[ninst].size+=4; dyn->native_size+=4;}while(0) #define NEW_INST \ dyn->vector_sew = dyn->insts[ninst].vector_sew_entry; \ + dyn->inst_sew = VECTOR_SEWNA; \ + dyn->inst_vl = 0; \ if (ninst) { \ dyn->insts[ninst].address = (dyn->insts[ninst - 1].address + dyn->insts[ninst - 1].size); \ dyn->insts_size += 1 + ((dyn->insts[ninst - 1].x64.size > (dyn->insts[ninst - 1].size / 4)) ? dyn->insts[ninst - 1].x64.size : (dyn->insts[ninst - 1].size / 4)) / 15; \ diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h index 995b7f3a0..85337a617 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass3.h +++ b/src/dynarec/rv64/dynarec_rv64_pass3.h @@ -14,6 +14,8 @@ #define MESSAGE(A, ...) if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__) #define NEW_INST \ dyn->vector_sew = dyn->insts[ninst].vector_sew_entry; \ + dyn->inst_sew = VECTOR_SEWNA; \ + dyn->inst_vl = 0; \ if (box64_dynarec_dump) print_newinst(dyn, ninst); \ if (ninst) { \ addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst - 1].x64.size, dyn->insts[ninst - 1].size / 4); \ diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index f3fde5652..ca1d5e960 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -156,6 +156,8 @@ typedef struct dynarec_rv64_s { uint8_t abort; uint8_t vector_sew; // current sew status uint8_t vector_eew; // current effective sew status, should only be used after SET_ELEMENT_WIDTH + uint8_t inst_sew; // sew inside current instruction, for vsetvli elimination + uint8_t inst_vl; // vl inside current instruction, for vsetvli elimination } dynarec_rv64_t; // v0 is hardware wired to vector mask register, which should be always reserved