diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 39f65c94c..8e0ae2324 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -4,7 +4,6 @@ name: Short-Tests
 
 # Controls when the workflow will run
 on:
-  # Triggers the workflow on push or pull request events but only for the mydev branch
   push:
     branches-ignore:
       - "gh-readonly-queue**"
@@ -86,25 +85,21 @@ jobs:
       - name: Run Simulation
         run: /bin/bash $GITHUB_WORKSPACE/short-tests.sh
   format-code:
-    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request'
+    runs-on: tgrogers-raid
     needs: [build-TITANV, build-TITANV-LOCALXBAR, build-QV100, build-2060, build-3070]
-    permissions:
-      # Give the default GITHUB_TOKEN write permission to commit and push the
-      # added or changed files to the repository.
-      contents: write
     steps:
       - uses: actions/checkout@v4
-      # Other steps that change files in the repository go here
-      # …
+        with:
+          ref: ${{github.event.pull_request.head.ref}}
+          repository: ${{github.event.pull_request.head.repo.full_name}}
+          ssh-key: ''
+
       - name: Run clang-format
         run: |
-          sudo apt-get install -y clang-format
+          git config user.name "purdue-jenkins"
+          git config user.email "purdue-jenkins@users.noreply.github.com"
+          git remote set-url origin git@github.com:${{github.event.pull_request.head.repo.full_name}}
+          git remote -v
           /bin/bash ./format-code.sh
-      - uses: stefanzweifel/git-auto-commit-action@v5
-        with:
-          # Optional. Commit message for the created commit.
-          # Defaults to "Apply automatic changes"
-          commit_message: Automated clang-format
-          # Optional. Option used by `git-status` to determine if the repository is 
-          # dirty. See https://git-scm.com/docs/git-status#_options
-          status_options: '--untracked-files=no'
\ No newline at end of file
+          if git status --untracked-files=no | grep -q "nothing to commit"; then echo "No changes to commit."; else git commit -a -m "Automated Format"; git push; fi
diff --git a/src/cuda-sim/instructions.cc b/src/cuda-sim/instructions.cc
index 4792efc80..108de9759 100644
--- a/src/cuda-sim/instructions.cc
+++ b/src/cuda-sim/instructions.cc
@@ -5441,6 +5441,38 @@ void shfl_impl(const ptx_instruction *pI, core_t *core, warp_inst_t inst) {
   }
 }
 
+void shf_impl(const ptx_instruction *pI, ptx_thread_info *thread) {
+  ptx_reg_t a, b, c, d;
+  const operand_info &dst = pI->dst();
+  const operand_info &src1 = pI->src1();
+  const operand_info &src2 = pI->src2();
+  const operand_info &src3 = pI->src3();
+
+  // Only b32 is allowed
+  unsigned i_type = pI->get_type();
+  a = thread->get_operand_value(src1, dst, i_type, thread, 1);
+  b = thread->get_operand_value(src2, dst, i_type, thread, 1);
+  c = thread->get_operand_value(src3, dst, i_type, thread, 1);
+
+  if (i_type != B32_TYPE)
+    printf("Only the b32 data_type is allowed per the ISA\n");
+
+  unsigned clamp_mode = pI->clamp_mode();
+  unsigned n = c.u32 & 0x1f;
+  if (clamp_mode) {
+    if (c.u32 < 32)
+      n = c;
+    else
+      n = 32;
+  }
+  if (pI->left_mode())
+    d.u32 = (b.u32 << n) | (a.u32 >> (32 - n));
+  else
+    d.u32 = (b.u32 << (32 - n)) | (a.u32 >> n);
+
+  thread->set_operand_value(dst, d, i_type, thread, pI);
+}
+
 void shl_impl(const ptx_instruction *pI, ptx_thread_info *thread) {
   ptx_reg_t a, b, d;
   const operand_info &dst = pI->dst();
diff --git a/src/cuda-sim/opcodes.def b/src/cuda-sim/opcodes.def
index f5bf156e2..83a23ea77 100644
--- a/src/cuda-sim/opcodes.def
+++ b/src/cuda-sim/opcodes.def
@@ -103,6 +103,7 @@ OP_DEF(SELP_OP,selp_impl,"selp",1,1)
 OP_DEF(SETP_OP,setp_impl,"setp",1,1)
 OP_DEF(SET_OP,set_impl,"set",1,1)
 OP_W_DEF(SHFL_OP,shfl_impl,"shfl",1,10)
+OP_DEF(SHF_OP,shf_impl,"shf",1,1)
 OP_DEF(SHL_OP,shl_impl,"shl",1,1)
 OP_DEF(SHR_OP,shr_impl,"shr",1,1)
 OP_DEF(SIN_OP,sin_impl,"sin",1,4)
diff --git a/src/cuda-sim/ptx.l b/src/cuda-sim/ptx.l
index 15b3cf77e..0810ef6e2 100644
--- a/src/cuda-sim/ptx.l
+++ b/src/cuda-sim/ptx.l
@@ -134,6 +134,7 @@ selp	TC; yylval->int_value = SELP_OP; return OPCODE;
 setp    TC; yylval->int_value = SETP_OP; return OPCODE;
 set	TC; yylval->int_value = SET_OP; return OPCODE;
 shfl	TC; yylval->int_value = SHFL_OP; return OPCODE;
+shf	TC; yylval->int_value = SHF_OP; return OPCODE;
 shl     TC; yylval->int_value = SHL_OP; return OPCODE;
 shr     TC; yylval->int_value = SHR_OP; return OPCODE;
 sin	TC; yylval->int_value = SIN_OP; return OPCODE;
@@ -317,6 +318,9 @@ breakaddr  TC; yylval->int_value = BREAKADDR_OP; return OPCODE;
 
 \.sat	TC; return SAT_OPTION;
 
+\.l		TC; return LEFT_OPTION;
+\.r		TC; return RIGHT_OPTION;
+
 \.eq    TC; return EQ_OPTION;
 \.ne    TC; return NE_OPTION;
 \.lt    TC; return LT_OPTION;
@@ -354,6 +358,8 @@ breakaddr  TC; yylval->int_value = BREAKADDR_OP; return OPCODE;
 \.arrive TC; return ARRIVE_OPTION;
 \.red TC; return RED_OPTION;
 
+\.clamp	TC; return CLAMP_OPTION;
+\.wrap	TC; return WRAP_OPTION;
 
 \.approx TC; return APPROX_OPTION;
 \.full  TC; return FULL_OPTION;
@@ -488,4 +494,4 @@ int ptx_error( yyscan_t yyscanner, ptx_recognizer* recognizer, const char *s )
 	fflush(stdout);
 	//exit(1);
 	return 0;
-}
\ No newline at end of file
+}
diff --git a/src/cuda-sim/ptx.y b/src/cuda-sim/ptx.y
index b38f78352..61183e88c 100644
--- a/src/cuda-sim/ptx.y
+++ b/src/cuda-sim/ptx.y
@@ -220,6 +220,10 @@ class ptx_recognizer;
 %token	PRMT_RC16_MODE;
 %token	PRMT_ECL_MODE;
 %token	PRMT_ECR_MODE;
+%token	WRAP_OPTION;
+%token	CLAMP_OPTION;
+%token	LEFT_OPTION;
+%token	RIGHT_OPTION;
 
 %type <int_value> function_decl_header
 %type <ptr_value> function_decl
@@ -507,6 +511,10 @@ option: type_spec
 	| DOWN_OPTION { recognizer->add_option(DOWN_OPTION); }
 	| BFLY_OPTION { recognizer->add_option(BFLY_OPTION); }
 	| IDX_OPTION { recognizer->add_option(IDX_OPTION); }
+	| WRAP_OPTION { recognizer->add_option(WRAP_OPTION); }
+	| CLAMP_OPTION { recognizer->add_option(CLAMP_OPTION); }
+	| LEFT_OPTION { recognizer->add_option(LEFT_OPTION); }
+	| RIGHT_OPTION { recognizer->add_option(RIGHT_OPTION); }
 	;
 
 atomic_operation_spec: ATOMIC_AND { recognizer->add_option(ATOMIC_AND); }
diff --git a/src/cuda-sim/ptx_ir.cc b/src/cuda-sim/ptx_ir.cc
index d3095428f..139920930 100644
--- a/src/cuda-sim/ptx_ir.cc
+++ b/src/cuda-sim/ptx_ir.cc
@@ -1227,6 +1227,8 @@ ptx_instruction::ptx_instruction(
   m_rounding_mode = RN_OPTION;
   m_compare_op = -1;
   m_saturation_mode = 0;
+  m_clamp_mode = 0;
+  m_left_mode = 0;
   m_geom_spec = 0;
   m_vector_spec = 0;
   m_atomic_spec = 0;
@@ -1293,6 +1295,18 @@ ptx_instruction::ptx_instruction(
       case SAT_OPTION:
         m_saturation_mode = 1;
         break;
+      case WRAP_OPTION:
+        m_clamp_mode = 0;
+        break;
+      case CLAMP_OPTION:
+        m_clamp_mode = 1;
+        break;
+      case LEFT_OPTION:
+        m_left_mode = 1;
+        break;
+      case RIGHT_OPTION:
+        m_left_mode = 0;
+        break;
       case RNI_OPTION:
       case RZI_OPTION:
       case RMI_OPTION:
diff --git a/src/cuda-sim/ptx_ir.h b/src/cuda-sim/ptx_ir.h
index 8b1f19c86..d253866db 100644
--- a/src/cuda-sim/ptx_ir.h
+++ b/src/cuda-sim/ptx_ir.h
@@ -1085,6 +1085,8 @@ class ptx_instruction : public warp_inst_t {
   unsigned cache_option() const { return m_cache_option; }
   unsigned rounding_mode() const { return m_rounding_mode; }
   unsigned saturation_mode() const { return m_saturation_mode; }
+  unsigned clamp_mode() const { return m_clamp_mode; }
+  unsigned left_mode() const { return m_left_mode; }
   unsigned dimension() const { return m_geom_spec; }
   unsigned barrier_op() const { return m_barrier_op; }
   unsigned shfl_op() const { return m_shfl_op; }
@@ -1159,6 +1161,8 @@ class ptx_instruction : public warp_inst_t {
   unsigned m_rounding_mode;
   unsigned m_compare_op;
   unsigned m_saturation_mode;
+  unsigned m_clamp_mode;
+  unsigned m_left_mode;
   unsigned m_barrier_op;
   unsigned m_shfl_op;
   unsigned m_prmt_op;
diff --git a/src/gpgpu-sim/gpu-sim.h b/src/gpgpu-sim/gpu-sim.h
index c8fa6eb03..8e81451b6 100644
--- a/src/gpgpu-sim/gpu-sim.h
+++ b/src/gpgpu-sim/gpu-sim.h
@@ -132,9 +132,9 @@ struct power_config {
 
     // NOTE: After changing the nonlinear model to only scaling idle core,
     // NOTE: The min_inc_per_active_sm is not used any more
-    if (g_use_nonlinear_model)
-      sscanf(gpu_nonlinear_model_config, "%lf:%lf", &gpu_idle_core_power,
-             &gpu_min_inc_per_active_sm);
+    // if (g_use_nonlinear_model)
+    //   sscanf(gpu_nonlinear_model_config, "%lf:%lf", &gpu_idle_core_power,
+    //          &gpu_min_inc_per_active_sm);
   }
   void reg_options(class OptionParser *opp);
 
diff --git a/src/gpgpu-sim/shader.h b/src/gpgpu-sim/shader.h
index 10fb773ce..e658a14c9 100644
--- a/src/gpgpu-sim/shader.h
+++ b/src/gpgpu-sim/shader.h
@@ -135,7 +135,7 @@ class shd_warp_t {
     m_waiting_ldgsts = false;
 
     // Ni: Clear m_ldgdepbar_buf
-    for (int i = 0; i < m_ldgdepbar_buf.size(); i++) {
+    for (unsigned i = 0; i < m_ldgdepbar_buf.size(); i++) {
       m_ldgdepbar_buf[i].clear();
     }
     m_ldgdepbar_buf.clear();
@@ -167,7 +167,7 @@ class shd_warp_t {
     m_waiting_ldgsts = false;
 
     // Ni: Clear m_ldgdepbar_buf
-    for (int i = 0; i < m_ldgdepbar_buf.size(); i++) {
+    for (unsigned i = 0; i < m_ldgdepbar_buf.size(); i++) {
       m_ldgdepbar_buf[i].clear();
     }
     m_ldgdepbar_buf.clear();