From 796b99e1a13eb3063ed95de52b53417d7bbc859e Mon Sep 17 00:00:00 2001 From: Finn Wilkinson Date: Thu, 7 Nov 2024 19:58:23 +0000 Subject: [PATCH] Implemented ZIP (4 vectors) SVE2 instruction with tests. --- src/lib/arch/aarch64/Instruction_execute.cc | 23 +++++++++++++++++++++ test/regression/aarch64/instructions/sve.cc | 15 +++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc index 28a254679..558ebc052 100644 --- a/src/lib/arch/aarch64/Instruction_execute.cc +++ b/src/lib/arch/aarch64/Instruction_execute.cc @@ -7076,6 +7076,29 @@ void Instruction::execute() { results_[0] = vecZip(sourceValues_, true); break; } + case Opcode::AArch64_ZIP_VG4_4Z4Z_S: { // zip {zd1.s - zd4.s}, {zn1.s - + // zn4.s} + const uint32_t* zn[4]; + zn[0] = sourceValues_[0].getAsVector(); + zn[1] = sourceValues_[1].getAsVector(); + zn[2] = sourceValues_[2].getAsVector(); + zn[3] = sourceValues_[3].getAsVector(); + + const uint16_t quads = VL_bits / (32 * 4); + + uint32_t out[4][64] = {{0}, {0}, {0}, {0}}; + for (int r = 0; r < 4; r++) { + const uint16_t base = r * quads; + for (int q = 0; q < quads; q++) { + out[r][4 * q] = zn[0][base + q]; + out[r][4 * q + 1] = zn[1][base + q]; + out[r][4 * q + 2] = zn[2][base + q]; + out[r][4 * q + 3] = zn[3][base + q]; + } + results_[r] = RegisterValue(out[r], 256); + } + break; + } case Opcode::AArch64_ZERO_M: { // zero {mask} // SME // Not in right context mode. Raise exception diff --git a/test/regression/aarch64/instructions/sve.cc b/test/regression/aarch64/instructions/sve.cc index 16a966d00..f9699593f 100644 --- a/test/regression/aarch64/instructions/sve.cc +++ b/test/regression/aarch64/instructions/sve.cc @@ -9145,13 +9145,26 @@ TEST_P(InstSve, zip) { zip1 z16.b, z12.b, z13.b zip2 z17.b, z14.b, z15.b )"); - CHECK_NEON(4, double, fillNeon({0.5, -0.5}, VL / 8)); CHECK_NEON(5, double, fillNeon({0.75, -0.75}, VL / 8)); CHECK_NEON(10, float, fillNeon({0.5, -0.75}, VL / 8)); CHECK_NEON(11, float, fillNeon({-0.5, 0.75}, VL / 8)); CHECK_NEON(16, int8_t, fillNeon({1, -2}, VL / 8)); CHECK_NEON(17, int8_t, fillNeon({-1, 2}, VL / 8)); + + // Multi-vector + RUN_AARCH64(R"( + #32-bit + dup z0.s, #5 + dup z1.s, #6 + dup z2.s, #7 + dup z3.s, #8 + zip {z4.s - z7.s}, {z0.s - z3.s} + )"); + CHECK_NEON(4, uint32_t, fillNeon({5, 6, 7, 8}, VL / 8)); + CHECK_NEON(5, uint32_t, fillNeon({5, 6, 7, 8}, VL / 8)); + CHECK_NEON(6, uint32_t, fillNeon({5, 6, 7, 8}, VL / 8)); + CHECK_NEON(7, uint32_t, fillNeon({5, 6, 7, 8}, VL / 8)); } TEST_P(InstSve, psel) {