From 4a0e8e9f3cf10412f3d893bef61d55f7ebb1ae17 Mon Sep 17 00:00:00 2001 From: xctan Date: Tue, 12 Nov 2024 23:13:02 +0800 Subject: [PATCH 1/8] [RV64_DYNAREC] Added 0F 68 PUNPCKHBW opcode --- src/dynarec/rv64/dynarec_rv64_0f_vector.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index f5c3a4d83..d740352b9 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -515,6 +515,25 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); VNCLIPU_WI(q0, d0, 0, VECTOR_UNMASKED); break; + case 0x68: + INST_NAME("PUNPCKHBW Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + ADDI(x2, xZR, 0x10); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VSLIDEDOWN_VI(v0, q0, 4, VECTOR_UNMASKED); + VSLIDEDOWN_VI(v1, q1, 4, VECTOR_UNMASKED); + VWADDU_VX(d0, v0, xZR, VECTOR_UNMASKED); + VWMULU_VX(v0, v1, x2, VECTOR_UNMASKED); // shift left 4 bits + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VMACC_VX(d0, v0, x2, VECTOR_UNMASKED); // shift left 4 bits and merge + VMV_V_V(q0, d0); + break; case 0x6B: INST_NAME("PACKSSDW Gm, Em"); nextop = F8; From 9b2c4416ffcc4cf477025c5ccddafc00423586e4 Mon Sep 17 00:00:00 2001 From: xctan Date: Tue, 12 Nov 2024 23:30:22 +0800 Subject: [PATCH 2/8] [RV64_DYNAREC] Added 0F 69 PUNPCKHWD opcode --- src/dynarec/rv64/dynarec_rv64_0f_vector.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index d740352b9..a16ea0a20 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -534,6 +534,25 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VMACC_VX(d0, v0, x2, VECTOR_UNMASKED); // shift left 4 bits and merge VMV_V_V(q0, d0); break; + case 0x69: + INST_NAME("PUNPCKHWD Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + ADDI(x2, xZR, 0x100); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VSLIDEDOWN_VI(v0, q0, 2, VECTOR_UNMASKED); + VSLIDEDOWN_VI(v1, q1, 2, VECTOR_UNMASKED); + VWADDU_VX(d0, v0, xZR, VECTOR_UNMASKED); + VWMULU_VX(v0, v1, x2, VECTOR_UNMASKED); // shift left 8 bits + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + VMACC_VX(d0, v0, x2, VECTOR_UNMASKED); // shift left 8 bits and merge + VMV_V_V(q0, d0); + break; case 0x6B: INST_NAME("PACKSSDW Gm, Em"); nextop = F8; From a5674ffb0fe9116f74868d6e207e57d7e5b29566 Mon Sep 17 00:00:00 2001 From: xctan Date: Wed, 13 Nov 2024 00:09:28 +0800 Subject: [PATCH 3/8] [RV64_DYNAREC] Added 0F 6A PUNPCKHDQ opcode --- src/dynarec/rv64/dynarec_rv64_0f_vector.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index a16ea0a20..e230a8cef 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -553,6 +553,26 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VMACC_VX(d0, v0, x2, VECTOR_UNMASKED); // shift left 8 bits and merge VMV_V_V(q0, d0); break; + case 0x6A: + INST_NAME("PUNPCKHDQ Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + MOV32w(x2, 32); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VSLIDEDOWN_VI(v0, q0, 1, VECTOR_UNMASKED); + VSLIDEDOWN_VI(v1, q1, 1, VECTOR_UNMASKED); + VWADDU_VX(d0, v0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + VSLL_VX(v0, v0, x2, VECTOR_UNMASKED); + VOR_VV(d0, d0, v0, VECTOR_UNMASKED); + VMV_V_V(q0, d0); + break; case 0x6B: INST_NAME("PACKSSDW Gm, Em"); nextop = F8; From a335df21cac6ab79f7175e82a52a3321b8e83012 Mon Sep 17 00:00:00 2001 From: xctan Date: Wed, 13 Nov 2024 00:12:41 +0800 Subject: [PATCH 4/8] [RV64_DYNAREC] Updated 0F 68-69 PUNPCKHBW/WD opcodes --- src/dynarec/rv64/dynarec_rv64_0f_vector.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index e230a8cef..46f1b9d86 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -522,16 +522,16 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETEM_vector(q1, 0); SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - ADDI(x2, xZR, 0x10); v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VSLIDEDOWN_VI(v0, q0, 4, VECTOR_UNMASKED); VSLIDEDOWN_VI(v1, q1, 4, VECTOR_UNMASKED); VWADDU_VX(d0, v0, xZR, VECTOR_UNMASKED); - VWMULU_VX(v0, v1, x2, VECTOR_UNMASKED); // shift left 4 bits + VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); - VMACC_VX(d0, v0, x2, VECTOR_UNMASKED); // shift left 4 bits and merge + VSLL_VI(v0, v0, 8, VECTOR_UNMASKED); + VOR_VV(d0, d0, v0, VECTOR_UNMASKED); VMV_V_V(q0, d0); break; case 0x69: @@ -541,16 +541,16 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETEM_vector(q1, 0); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); - ADDI(x2, xZR, 0x100); v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VSLIDEDOWN_VI(v0, q0, 2, VECTOR_UNMASKED); VSLIDEDOWN_VI(v1, q1, 2, VECTOR_UNMASKED); VWADDU_VX(d0, v0, xZR, VECTOR_UNMASKED); - VWMULU_VX(v0, v1, x2, VECTOR_UNMASKED); // shift left 8 bits + VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); - VMACC_VX(d0, v0, x2, VECTOR_UNMASKED); // shift left 8 bits and merge + VSLL_VI(v0, v0, 16, VECTOR_UNMASKED); + VOR_VV(d0, d0, v0, VECTOR_UNMASKED); VMV_V_V(q0, d0); break; case 0x6A: From 3492910b73da3139c972f4b3a09d018952c67229 Mon Sep 17 00:00:00 2001 From: xctan Date: Wed, 13 Nov 2024 00:21:01 +0800 Subject: [PATCH 5/8] [RV64_DYNAREC] Added 0F 60 PUNPCKLBW opcode --- src/dynarec/rv64/dynarec_rv64_0f_vector.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index 46f1b9d86..049fb5d12 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -483,6 +483,22 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VADD_VX(q0, q1, xZR, VECTOR_MASKED); } break; + case 0x60: + INST_NAME("PUNPCKLBW Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VWADDU_VX(d0, q0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VSLL_VI(v0, v0, 8, VECTOR_UNMASKED); + VOR_VV(d0, d0, v0, VECTOR_UNMASKED); + VMV_V_V(q0, d0); + break; case 0x63: INST_NAME("PACKSSWB Gm, Em"); nextop = F8; From 833a9cb862749e3838f6db5f3f20d0c0e1cd6db8 Mon Sep 17 00:00:00 2001 From: xctan Date: Wed, 13 Nov 2024 00:23:18 +0800 Subject: [PATCH 6/8] [RV64_DYNAREC] Added 0F 61 PUNPCKLWD opcode --- src/dynarec/rv64/dynarec_rv64_0f_vector.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index 049fb5d12..a5b899210 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -499,6 +499,22 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VOR_VV(d0, d0, v0, VECTOR_UNMASKED); VMV_V_V(q0, d0); break; + case 0x61: + INST_NAME("PUNPCKLWD Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VWADDU_VX(d0, q0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + VSLL_VI(v0, v0, 16, VECTOR_UNMASKED); + VOR_VV(d0, d0, v0, VECTOR_UNMASKED); + VMV_V_V(q0, d0); + break; case 0x63: INST_NAME("PACKSSWB Gm, Em"); nextop = F8; From e3697b1beb934f8c78de36bab47665a194b6d779 Mon Sep 17 00:00:00 2001 From: xctan Date: Wed, 13 Nov 2024 00:24:39 +0800 Subject: [PATCH 7/8] [RV64_DYNAREC] Added 0F 62 PUNPCKLDQ opcode --- src/dynarec/rv64/dynarec_rv64_0f_vector.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index a5b899210..218e91154 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -515,6 +515,23 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VOR_VV(d0, d0, v0, VECTOR_UNMASKED); VMV_V_V(q0, d0); break; + case 0x62: + INST_NAME("PUNPCKLDQ Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + MOV32w(x2, 32); + VWADDU_VX(d0, q0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + VSLL_VX(v0, v0, x2, VECTOR_UNMASKED); + VOR_VV(d0, d0, v0, VECTOR_UNMASKED); + VMV_V_V(q0, d0); + break; case 0x63: INST_NAME("PACKSSWB Gm, Em"); nextop = F8; From 5b75a93dfb626b4c8a6a516693f46dfbaa07fc36 Mon Sep 17 00:00:00 2001 From: xctan Date: Thu, 14 Nov 2024 23:58:33 +0800 Subject: [PATCH 8/8] [RV64_DYNAREC] Simplified MMX PUNPCK{L,H}{BW,WD,DQ} --- src/dynarec/rv64/dynarec_rv64_0f_vector.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index 218e91154..195d58f75 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -496,8 +496,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); VSLL_VI(v0, v0, 8, VECTOR_UNMASKED); - VOR_VV(d0, d0, v0, VECTOR_UNMASKED); - VMV_V_V(q0, d0); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); break; case 0x61: INST_NAME("PUNPCKLWD Gm, Em"); @@ -512,8 +511,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); VSLL_VI(v0, v0, 16, VECTOR_UNMASKED); - VOR_VV(d0, d0, v0, VECTOR_UNMASKED); - VMV_V_V(q0, d0); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); break; case 0x62: INST_NAME("PUNPCKLDQ Gm, Em"); @@ -529,8 +527,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); VSLL_VX(v0, v0, x2, VECTOR_UNMASKED); - VOR_VV(d0, d0, v0, VECTOR_UNMASKED); - VMV_V_V(q0, d0); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); break; case 0x63: INST_NAME("PACKSSWB Gm, Em"); @@ -580,8 +577,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); VSLL_VI(v0, v0, 8, VECTOR_UNMASKED); - VOR_VV(d0, d0, v0, VECTOR_UNMASKED); - VMV_V_V(q0, d0); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); break; case 0x69: INST_NAME("PUNPCKHWD Gm, Em"); @@ -599,8 +595,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); VSLL_VI(v0, v0, 16, VECTOR_UNMASKED); - VOR_VV(d0, d0, v0, VECTOR_UNMASKED); - VMV_V_V(q0, d0); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); break; case 0x6A: INST_NAME("PUNPCKHDQ Gm, Em"); @@ -619,8 +614,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); VSLL_VX(v0, v0, x2, VECTOR_UNMASKED); - VOR_VV(d0, d0, v0, VECTOR_UNMASKED); - VMV_V_V(q0, d0); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); break; case 0x6B: INST_NAME("PACKSSDW Gm, Em");