Skip to content

Commit

Permalink
[RV64_DYNAREC] Added more opcodes for vector (#1968)
Browse files Browse the repository at this point in the history
  • Loading branch information
ksco authored Oct 28, 2024
1 parent 2835a2f commit d3c1ea5
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 47 deletions.
112 changes: 112 additions & 0 deletions src/dynarec/rv64/dynarec_rv64_0f_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,50 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
SMWRITE2();
}
break;
case 0x51:
INST_NAME("SQRTPS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETEX_vector(v0, 0, 0, VECTOR_SEW32);
GETGX_empty_vector(v1);
VFSQRT_V(v1, v0, VECTOR_UNMASKED);
break;
case 0x53:
INST_NAME("RCPPS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETEX_vector(v0, 0, 0, VECTOR_SEW32);
GETGX_empty_vector(v1);
LUI(x4, 0x3f800);
d0 = fpu_get_scratch(dyn);
FMVWX(d0, x4); // 1.0f
VFRDIV_VF(v1, v0, d0, VECTOR_UNMASKED);
break;
case 0x54:
INST_NAME("ANDPS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETEX_vector(q0, 0, 0, VECTOR_SEW32);
GETGX_vector(v0, 1, VECTOR_SEW32);
VAND_VV(v0, v0, q0, VECTOR_UNMASKED);
break;
case 0x55:
INST_NAME("ANDNPS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETEX_vector(q0, 0, 0, VECTOR_SEW32);
GETGX_vector(v0, 1, VECTOR_SEW32);
VXOR_VI(v0, v0, 0x1f, VECTOR_UNMASKED);
VAND_VV(v0, v0, q0, VECTOR_UNMASKED);
break;
case 0x56:
INST_NAME("ORPS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETEX_vector(q0, 0, 0, VECTOR_SEW32);
GETGX_vector(v0, 1, VECTOR_SEW32);
VOR_VV(v0, v0, q0, VECTOR_UNMASKED);
break;
case 0x57:
INST_NAME("XORPS Gx, Ex");
nextop = F8;
Expand All @@ -266,6 +310,74 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
VXOR_VV(q0, q1, q0, VECTOR_UNMASKED);
}
break;
case 0x58:
INST_NAME("ADDPS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETEX_vector(q0, 0, 0, VECTOR_SEW32);
GETGX_vector(v0, 1, VECTOR_SEW32);
VFADD_VV(v0, v0, q0, VECTOR_UNMASKED);
break;
case 0x59:
INST_NAME("MULPS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETEX_vector(q0, 0, 0, VECTOR_SEW32);
GETGX_vector(v0, 1, VECTOR_SEW32);
VFMUL_VV(v0, v0, q0, VECTOR_UNMASKED);
break;
case 0x5C:
INST_NAME("SUBPS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETEX_vector(q0, 0, 0, VECTOR_SEW32);
GETGX_vector(v0, 1, VECTOR_SEW32);
VFSUB_VV(v0, v0, q0, VECTOR_UNMASKED);
break;
case 0x5D:
INST_NAME("MINPS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(q0, 1, VECTOR_SEW32);
GETEX_vector(q1, 0, 0, VECTOR_SEW32);
if (!box64_dynarec_fastnan) {
v0 = fpu_get_scratch(dyn);
VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED);
VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED);
}
VFMIN_VV(q0, q0, q1, VECTOR_UNMASKED);
if (!box64_dynarec_fastnan) {
VMAND_MM(VMASK, v0, VMASK);
VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
VADD_VX(q0, q1, xZR, VECTOR_MASKED);
}
break;
case 0x5E:
INST_NAME("DIVPS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(q0, 1, VECTOR_SEW32);
GETEX_vector(q1, 0, 0, VECTOR_SEW32);
VFDIV_VV(q0, q0, q1, VECTOR_UNMASKED);
break;
case 0x5F:
INST_NAME("MAXPS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(q0, 1, VECTOR_SEW32);
GETEX_vector(q1, 0, 0, VECTOR_SEW32);
v0 = fpu_get_scratch(dyn);
if (!box64_dynarec_fastnan) {
VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED);
VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED);
}
VFMAX_VV(q0, q0, q1, VECTOR_UNMASKED);
if (!box64_dynarec_fastnan) {
VMAND_MM(VMASK, v0, VMASK);
VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
VADD_VX(q0, q1, xZR, VECTOR_MASKED);
}
break;
case 0xC6:
INST_NAME("SHUFPS Gx, Ex, Ib");
nextop = F8;
Expand Down
28 changes: 18 additions & 10 deletions src/dynarec/rv64/dynarec_rv64_660f_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -862,12 +862,16 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
GETGX_vector(q0, 1, VECTOR_SEW64);
GETEX_vector(q1, 0, 0, VECTOR_SEW64);
v0 = fpu_get_scratch(dyn);
VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED);
VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED);
if (!box64_dynarec_fastnan) {
VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED);
VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED);
}
VFMIN_VV(q0, q0, q1, VECTOR_UNMASKED);
VMAND_MM(VMASK, v0, VMASK);
VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
VADD_VX(q0, q1, xZR, VECTOR_MASKED);
if (!box64_dynarec_fastnan) {
VMAND_MM(VMASK, v0, VMASK);
VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
VADD_VX(q0, q1, xZR, VECTOR_MASKED);
}
break;
case 0x5E:
INST_NAME("DIVPD Gx, Ex");
Expand Down Expand Up @@ -896,12 +900,16 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
GETGX_vector(q0, 1, VECTOR_SEW64);
GETEX_vector(q1, 0, 0, VECTOR_SEW64);
v0 = fpu_get_scratch(dyn);
VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED);
VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED);
if (!box64_dynarec_fastnan) {
VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED);
VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED);
}
VFMAX_VV(q0, q0, q1, VECTOR_UNMASKED);
VMAND_MM(VMASK, v0, VMASK);
VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
VADD_VX(q0, q1, xZR, VECTOR_MASKED);
if (!box64_dynarec_fastnan) {
VMAND_MM(VMASK, v0, VMASK);
VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
VADD_VX(q0, q1, xZR, VECTOR_MASKED);
}
break;
case 0x60:
INST_NAME("PUNPCKLBW Gx, Ex");
Expand Down
66 changes: 29 additions & 37 deletions src/dynarec/rv64/dynarec_rv64_f30f_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
INST_NAME("MOVSS Gx, Ex");
nextop = F8;
GETG;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
ed = (nextop & 7) + (rex.b << 3);
v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW32);
v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW32);
Expand All @@ -66,14 +66,11 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
}
} else {
SMREAD();
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd);
d0 = fpu_get_scratch(dyn);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
VECTOR_LOAD_VMASK(0xF, x4, 1);
VLE8_V(d0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
LWU(x4, ed, fixedaddress);
VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
VMERGE_VVM(v0, v0, d0); // implies VMASK
VMV_S_X(v0, x4);
}
break;
case 0x11:
Expand Down Expand Up @@ -130,18 +127,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
case 0x59:
INST_NAME("MULSS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(v0, 1, VECTOR_SEW32);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
VECTOR_LOAD_VMASK(0xFF, x4, 1);
VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
LWU(x4, ed, fixedaddress);
VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
VMV_S_X(v1, x4);
GETGX_vector(v0, 1, VECTOR_SEW32);
}
if (box64_dynarec_fastnan) {
Expand Down Expand Up @@ -172,18 +168,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
case 0x5A:
INST_NAME("CVTSS2SD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(v0, 1, VECTOR_SEW32);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
VECTOR_LOAD_VMASK(0xFF, x4, 1);
VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
LWU(x4, ed, fixedaddress);
VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
VMV_S_X(v1, x4);
GETGX_vector(v0, 1, VECTOR_SEW32);
}
d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
Expand All @@ -200,18 +195,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
case 0x5D:
INST_NAME("MINSS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(v0, 1, VECTOR_SEW32);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
VECTOR_LOAD_VMASK(0xFF, x4, 1);
VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
LWU(x4, ed, fixedaddress);
VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
VMV_S_X(v1, x4);
GETGX_vector(v0, 1, VECTOR_SEW32);
}
d0 = fpu_get_scratch(dyn);
Expand All @@ -232,18 +226,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
case 0x5F:
INST_NAME("MAXSS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(v0, 1, VECTOR_SEW32);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
VECTOR_LOAD_VMASK(0xFF, x4, 1);
VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
LWU(x4, ed, fixedaddress);
VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
VMV_S_X(v1, x4);
GETGX_vector(v0, 1, VECTOR_SEW32);
}
d0 = fpu_get_scratch(dyn);
Expand All @@ -269,18 +262,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
case 0xC2:
INST_NAME("CMPSS Gx, Ex, Ib");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(d0, 1, VECTOR_SEW32);
d1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
} else {
SMREAD();
d1 = fpu_get_scratch(dyn);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1);
VECTOR_LOAD_VMASK(0xFF, x4, 1);
VLE8_V(d1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 1);
LWU(x4, ed, fixedaddress);
VXOR_VV(d1, d1, d1, VECTOR_UNMASKED);
VMV_S_X(d1, x4);
GETGX_vector(d0, 1, VECTOR_SEW32);
}
u8 = F8;
Expand Down

0 comments on commit d3c1ea5

Please sign in to comment.