diff --git a/src/include/simeng/arch/aarch64/helpers/neon.hh b/src/include/simeng/arch/aarch64/helpers/neon.hh index 46b39b99b..bcf11311a 100644 --- a/src/include/simeng/arch/aarch64/helpers/neon.hh +++ b/src/include/simeng/arch/aarch64/helpers/neon.hh @@ -558,11 +558,13 @@ RegisterValue vecLogicOp_3vecs(srcValContainer& sourceValues, return {out, 256}; } -/** Helper function for NEON instructions with the format `uaddlv zd, zn.T`. +/** Helper function for NEON instructions with the format `uaddlv rd, Vn.T`. * T represents the type of the destination register (e.g. for h0, T = * uint32_t). * U represents the type of the sourceValues[0] (e.g. for v0.8b, U = * uint8_t) + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). * Returns correctly formatted RegisterValue. */ template RegisterValue vecAddlv(srcValContainer& sourceValues) { @@ -574,8 +576,10 @@ RegisterValue vecAddlv(srcValContainer& sourceValues) { return {out, 256}; } -/** Helper function for NEON instructions with the format `umaxv Vd, Vn.T`. +/** Helper function for NEON instructions with the format `umaxv rd, Vn.T`. * T represents the type of sourceValues (e.g. for vn.s, T = uint32_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). * Returns correctly formatted RegisterValue. */ template RegisterValue vecUMaxV(srcValContainer& sourceValues) { diff --git a/src/include/simeng/arch/aarch64/helpers/sve.hh b/src/include/simeng/arch/aarch64/helpers/sve.hh index e36ba675f..492a82d08 100644 --- a/src/include/simeng/arch/aarch64/helpers/sve.hh +++ b/src/include/simeng/arch/aarch64/helpers/sve.hh @@ -877,8 +877,9 @@ RegisterValue sveFsqrtPredicated_2vecs(srcValContainer& sourceValues, /** Helper function for SVE instructions with the format `ftsmul zd, zn, zm`. * T represents the type of sourceValues (e.g. for zn.d, T = double). - * Returns correctly formatted RegisterValue. U represents the same precision as - * T, but as an integer type for the second source register. */ + * U represents the same precision as T, but as an integer type for the second + * source register. + * Returns correctly formatted RegisterValue. */ template RegisterValue sveFTrigSMul(srcValContainer& sourceValues, const uint16_t VL_bits) { @@ -903,8 +904,9 @@ RegisterValue sveFTrigSMul(srcValContainer& sourceValues, /** Helper function for SVE instructions with the format `ftssel zd, zn, zm`. * T represents the type of sourceValues (e.g. for zn.d, T = double). - * Returns correctly formatted RegisterValue. U represents the same precision as - * T, but as an integer type for the second source register. */ + * U represents the same precision as T, but as an integer type for the second + * source register. + * Returns correctly formatted RegisterValue. */ template RegisterValue sveFTrigSSel(srcValContainer& sourceValues, const uint16_t VL_bits) { @@ -1094,7 +1096,6 @@ RegisterValue sveLastBScalar(srcValContainer& sourceValues, const T* n = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out; // Get last active element int lastElem = 0; @@ -1107,20 +1108,18 @@ RegisterValue sveLastBScalar(srcValContainer& sourceValues, // If no active lane has been found, select highest element instead if (i == 0) lastElem = partition_num - 1; } - - out = n[lastElem]; - return {out, 256}; + return {n[lastElem], 256}; } -/** Helper function for SVE instructions with the format `clastb rd, pg, rd, +/** Helper function for SVE instructions with the format `clastb zd, pg, zd, * zn`. * T represents the vector register type (e.g. zd.d would be uint64_t). * Returns correctly formatted RegisterValue. */ template -RegisterValue sveCLastBScalar(srcValContainer& sourceValues, - const uint16_t VL_bits) { +RegisterValue sveCLastBSimdScalar(srcValContainer& sourceValues, + const uint16_t VL_bits) { const uint64_t* p = sourceValues[1].getAsVector(); - const uint64_t m = sourceValues[2].get(); + const T* m = sourceValues[2].getAsVector(); const T* n = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -1137,7 +1136,7 @@ RegisterValue sveCLastBScalar(srcValContainer& sourceValues, } if (lastElem < 0) { - out = m; + out = m[0]; } else { out = n[lastElem]; } diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc index ff91d4929..cfd3be6f6 100644 --- a/src/lib/arch/aarch64/Instruction_execute.cc +++ b/src/lib/arch/aarch64/Instruction_execute.cc @@ -2665,19 +2665,19 @@ void Instruction::execute() { break; } case Opcode::AArch64_CLASTB_VPZ_D: { // clastb dd, pg, dn, zn.d - results_[0] = sveCLastBScalar(sourceValues_, VL_bits); + results_[0] = sveCLastBSimdScalar(sourceValues_, VL_bits); break; } case Opcode::AArch64_CLASTB_VPZ_S: { // clastb sd, pg, sn, zn.s - results_[0] = sveCLastBScalar(sourceValues_, VL_bits); + results_[0] = sveCLastBSimdScalar(sourceValues_, VL_bits); break; } case Opcode::AArch64_CLASTB_VPZ_H: { // clastb hd, pg, hn, zn.h - results_[0] = sveCLastBScalar(sourceValues_, VL_bits); + results_[0] = sveCLastBSimdScalar(sourceValues_, VL_bits); break; } case Opcode::AArch64_CLASTB_VPZ_B: { // clastb bd, pg, bn, zn.b - results_[0] = sveCLastBScalar(sourceValues_, VL_bits); + results_[0] = sveCLastBSimdScalar(sourceValues_, VL_bits); break; } case Opcode::AArch64_LD1_MXIPXX_H_D: { // ld1d {zath.d[ws, #imm]}, pg/z, diff --git a/src/lib/pipeline/ReorderBuffer.cc b/src/lib/pipeline/ReorderBuffer.cc index 33326944a..e53849ea8 100644 --- a/src/lib/pipeline/ReorderBuffer.cc +++ b/src/lib/pipeline/ReorderBuffer.cc @@ -87,10 +87,17 @@ unsigned int ReorderBuffer::commit(uint64_t maxCommitSize) { // sufficiently long, assume an error in SimEng has occured. robHeadRepeatCounter_++; if (robHeadRepeatCounter_ > 10000000) { - std::cerr << "[SimEng:ReorderBuffer] Infinite loop detected in rob " - "commit at instruction address " + std::cerr << "[SimEng:ReorderBuffer] Instruction stuck unable to " + "commit at the head of ROB for a very long time at " + "instruction address 0x" << std::hex << uop->getInstructionAddress() << std::dec - << " (" << uop->getMicroOpIndex() << ")." << std::endl; + << " (MicroOp Index: " << uop->getMicroOpIndex() + << "). This is unexpected behaviour for most valid core " + "configurations, though may arise in designs with very " + "high latencies or bottlenecks. If this is not the case, " + "please try re-running. Please raise an issue on GitHub " + "if the problem persists." + << std::endl; exit(1); } break;