Responded to more comments

UoB-HPC · Dec 18, 2024 · d1cb296 · d1cb296
1 parent 5738a42
commit d1cb296
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 22 deletions.
diff --git a/src/include/simeng/arch/aarch64/helpers/neon.hh b/src/include/simeng/arch/aarch64/helpers/neon.hh
@@ -558,11 +558,13 @@ RegisterValue vecLogicOp_3vecs(srcValContainer& sourceValues,
   return {out, 256};
 }
 
-/** Helper function for NEON instructions with the format `uaddlv zd, zn.T`.
+/** Helper function for NEON instructions with the format `uaddlv rd, Vn.T`.
  * T represents the type of the destination register (e.g. for h0, T =
  * uint32_t).
  * U represents the type of the sourceValues[0] (e.g. for v0.8b, U =
  * uint8_t)
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
  * Returns correctly formatted RegisterValue. */
 template <typename T, typename U, int I>
 RegisterValue vecAddlv(srcValContainer& sourceValues) {
@@ -574,8 +576,10 @@ RegisterValue vecAddlv(srcValContainer& sourceValues) {
   return {out, 256};
 }
 
-/** Helper function for NEON instructions with the format `umaxv Vd, Vn.T`.
+/** Helper function for NEON instructions with the format `umaxv rd, Vn.T`.
  * T represents the type of sourceValues (e.g. for vn.s, T = uint32_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
  * Returns correctly formatted RegisterValue. */
 template <typename T, int I>
 RegisterValue vecUMaxV(srcValContainer& sourceValues) {

diff --git a/src/include/simeng/arch/aarch64/helpers/sve.hh b/src/include/simeng/arch/aarch64/helpers/sve.hh
@@ -877,8 +877,9 @@ RegisterValue sveFsqrtPredicated_2vecs(srcValContainer& sourceValues,
 
 /** Helper function for SVE instructions with the format `ftsmul zd, zn, zm`.
  * T represents the type of sourceValues (e.g. for zn.d, T = double).
- * Returns correctly formatted RegisterValue. U represents the same precision as
- * T, but as an integer type for the second source register. */
+ * U represents the same precision as T, but as an integer type for the second
+ * source register.
+ * Returns correctly formatted RegisterValue. */
 template <typename T, typename U>
 RegisterValue sveFTrigSMul(srcValContainer& sourceValues,
                            const uint16_t VL_bits) {
@@ -903,8 +904,9 @@ RegisterValue sveFTrigSMul(srcValContainer& sourceValues,
 
 /** Helper function for SVE instructions with the format `ftssel zd, zn, zm`.
  * T represents the type of sourceValues (e.g. for zn.d, T = double).
- * Returns correctly formatted RegisterValue. U represents the same precision as
- * T, but as an integer type for the second source register. */
+ * U represents the same precision as T, but as an integer type for the second
+ * source register.
+ * Returns correctly formatted RegisterValue. */
 template <typename T, typename U>
 RegisterValue sveFTrigSSel(srcValContainer& sourceValues,
                            const uint16_t VL_bits) {
@@ -1094,7 +1096,6 @@ RegisterValue sveLastBScalar(srcValContainer& sourceValues,
   const T* n = sourceValues[1].getAsVector<T>();
 
   const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-  T out;
 
   // Get last active element
   int lastElem = 0;
@@ -1107,20 +1108,18 @@ RegisterValue sveLastBScalar(srcValContainer& sourceValues,
     // If no active lane has been found, select highest element instead
     if (i == 0) lastElem = partition_num - 1;
   }
-
-  out = n[lastElem];
-  return {out, 256};
+  return {n[lastElem], 256};
 }
 
-/** Helper function for SVE instructions with the format `clastb rd, pg, rd,
+/** Helper function for SVE instructions with the format `clastb zd, pg, zd,
  * zn`.
  * T represents the vector register type (e.g. zd.d would be uint64_t).
  * Returns correctly formatted RegisterValue. */
 template <typename T>
-RegisterValue sveCLastBScalar(srcValContainer& sourceValues,
-                              const uint16_t VL_bits) {
+RegisterValue sveCLastBSimdScalar(srcValContainer& sourceValues,
+                                  const uint16_t VL_bits) {
   const uint64_t* p = sourceValues[1].getAsVector<uint64_t>();
-  const uint64_t m = sourceValues[2].get<T>();
+  const T* m = sourceValues[2].getAsVector<T>();
   const T* n = sourceValues[3].getAsVector<T>();
 
   const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
@@ -1137,7 +1136,7 @@ RegisterValue sveCLastBScalar(srcValContainer& sourceValues,
   }
 
   if (lastElem < 0) {
-    out = m;
+    out = m[0];
   } else {
     out = n[lastElem];
   }

diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc
@@ -2665,19 +2665,19 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CLASTB_VPZ_D: {  // clastb dd, pg, dn, zn.d
-        results_[0] = sveCLastBScalar<uint64_t>(sourceValues_, VL_bits);
+        results_[0] = sveCLastBSimdScalar<uint64_t>(sourceValues_, VL_bits);
         break;
       }
       case Opcode::AArch64_CLASTB_VPZ_S: {  // clastb sd, pg, sn, zn.s
-        results_[0] = sveCLastBScalar<uint32_t>(sourceValues_, VL_bits);
+        results_[0] = sveCLastBSimdScalar<uint32_t>(sourceValues_, VL_bits);
         break;
       }
       case Opcode::AArch64_CLASTB_VPZ_H: {  // clastb hd, pg, hn, zn.h
-        results_[0] = sveCLastBScalar<uint16_t>(sourceValues_, VL_bits);
+        results_[0] = sveCLastBSimdScalar<uint16_t>(sourceValues_, VL_bits);
         break;
       }
       case Opcode::AArch64_CLASTB_VPZ_B: {  // clastb bd, pg, bn, zn.b
-        results_[0] = sveCLastBScalar<uint8_t>(sourceValues_, VL_bits);
+        results_[0] = sveCLastBSimdScalar<uint8_t>(sourceValues_, VL_bits);
         break;
       }
       case Opcode::AArch64_LD1_MXIPXX_H_D: {  // ld1d {zath.d[ws, #imm]}, pg/z,

diff --git a/src/lib/pipeline/ReorderBuffer.cc b/src/lib/pipeline/ReorderBuffer.cc
@@ -87,10 +87,17 @@ unsigned int ReorderBuffer::commit(uint64_t maxCommitSize) {
       // sufficiently long, assume an error in SimEng has occured.
       robHeadRepeatCounter_++;
       if (robHeadRepeatCounter_ > 10000000) {
-        std::cerr << "[SimEng:ReorderBuffer] Infinite loop detected in rob "
-                     "commit at instruction address "
+        std::cerr << "[SimEng:ReorderBuffer] Instruction stuck unable to "
+                     "commit at the head of ROB for a very long time at "
+                     "instruction address 0x"
                   << std::hex << uop->getInstructionAddress() << std::dec
-                  << " (" << uop->getMicroOpIndex() << ")." << std::endl;
+                  << " (MicroOp Index: " << uop->getMicroOpIndex()
+                  << "). This is unexpected behaviour for most valid core "
+                     "configurations, though may arise in designs with very "
+                     "high latencies or bottlenecks. If this is not the case, "
+                     "please try re-running. Please raise an issue on GitHub "
+                     "if the problem persists."
+                  << std::endl;
         exit(1);
       }
       break;