Skip to content

Commit

Permalink
Implemented ZIP (4 vectors) SVE2 instruction with tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
FinnWilkinson committed Nov 7, 2024
1 parent 9d7ec76 commit f2b86fa
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
23 changes: 23 additions & 0 deletions src/lib/arch/aarch64/Instruction_execute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7076,6 +7076,29 @@ void Instruction::execute() {
results_[0] = vecZip<uint8_t, 8>(sourceValues_, true);
break;
}
case Opcode::AArch64_ZIP_VG4_4Z4Z_S: { // zip {zd1.s - zd4.s}, {zn1.s -
// zn4.s}
const uint32_t* zn[4];
zn[0] = sourceValues_[0].getAsVector<uint32_t>();
zn[1] = sourceValues_[1].getAsVector<uint32_t>();
zn[2] = sourceValues_[2].getAsVector<uint32_t>();
zn[3] = sourceValues_[3].getAsVector<uint32_t>();

const uint16_t quads = VL_bits / (32 * 4);

uint32_t out[4][64] = {{0}, {0}, {0}, {0}};
for (int r = 0; r < 4; r++) {
const uint16_t base = r * quads;
for (int q = 0; q < quads; q++) {
out[r][4 * q] = zn[0][base + q];
out[r][4 * q + 1] = zn[1][base + q];
out[r][4 * q + 2] = zn[2][base + q];
out[r][4 * q + 3] = zn[3][base + q];
}
results_[r] = RegisterValue(out[r], 256);
}
break;
}
case Opcode::AArch64_ZERO_M: { // zero {mask}
// SME
// Not in right context mode. Raise exception
Expand Down
15 changes: 14 additions & 1 deletion test/regression/aarch64/instructions/sve.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9145,13 +9145,26 @@ TEST_P(InstSve, zip) {
zip1 z16.b, z12.b, z13.b
zip2 z17.b, z14.b, z15.b
)");

CHECK_NEON(4, double, fillNeon<double>({0.5, -0.5}, VL / 8));
CHECK_NEON(5, double, fillNeon<double>({0.75, -0.75}, VL / 8));
CHECK_NEON(10, float, fillNeon<float>({0.5, -0.75}, VL / 8));
CHECK_NEON(11, float, fillNeon<float>({-0.5, 0.75}, VL / 8));
CHECK_NEON(16, int8_t, fillNeon<int8_t>({1, -2}, VL / 8));
CHECK_NEON(17, int8_t, fillNeon<int8_t>({-1, 2}, VL / 8));

// Multi-vector
RUN_AARCH64(R"(
#32-bit
dup z0.s, #5
dup z1.s, #6
dup z2.s, #7
dup z3.s, #8
zip {z4.s - z7.s}, {z0.s - z3.s}
)");
CHECK_NEON(4, uint32_t, fillNeon<uint32_t>({5, 6, 7, 8}, VL / 8));
CHECK_NEON(5, uint32_t, fillNeon<uint32_t>({5, 6, 7, 8}, VL / 8));
CHECK_NEON(6, uint32_t, fillNeon<uint32_t>({5, 6, 7, 8}, VL / 8));
CHECK_NEON(7, uint32_t, fillNeon<uint32_t>({5, 6, 7, 8}, VL / 8));
}

TEST_P(InstSve, psel) {
Expand Down

0 comments on commit f2b86fa

Please sign in to comment.