diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 39f65c94c..8e0ae2324 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,7 +4,6 @@ name: Short-Tests # Controls when the workflow will run on: - # Triggers the workflow on push or pull request events but only for the mydev branch push: branches-ignore: - "gh-readonly-queue**" @@ -86,25 +85,21 @@ jobs: - name: Run Simulation run: /bin/bash $GITHUB_WORKSPACE/short-tests.sh format-code: - runs-on: ubuntu-latest + if: github.event_name == 'pull_request' + runs-on: tgrogers-raid needs: [build-TITANV, build-TITANV-LOCALXBAR, build-QV100, build-2060, build-3070] - permissions: - # Give the default GITHUB_TOKEN write permission to commit and push the - # added or changed files to the repository. - contents: write steps: - uses: actions/checkout@v4 - # Other steps that change files in the repository go here - # … + with: + ref: ${{github.event.pull_request.head.ref}} + repository: ${{github.event.pull_request.head.repo.full_name}} + ssh-key: '' + - name: Run clang-format run: | - sudo apt-get install -y clang-format + git config user.name "purdue-jenkins" + git config user.email "purdue-jenkins@users.noreply.github.com" + git remote set-url origin git@github.com:${{github.event.pull_request.head.repo.full_name}} + git remote -v /bin/bash ./format-code.sh - - uses: stefanzweifel/git-auto-commit-action@v5 - with: - # Optional. Commit message for the created commit. - # Defaults to "Apply automatic changes" - commit_message: Automated clang-format - # Optional. Option used by `git-status` to determine if the repository is - # dirty. See https://git-scm.com/docs/git-status#_options - status_options: '--untracked-files=no' \ No newline at end of file + if git status --untracked-files=no | grep -q "nothing to commit"; then echo "No changes to commit."; else git commit -a -m "Automated Format"; git push; fi diff --git a/src/cuda-sim/instructions.cc b/src/cuda-sim/instructions.cc index 4792efc80..108de9759 100644 --- a/src/cuda-sim/instructions.cc +++ b/src/cuda-sim/instructions.cc @@ -5441,6 +5441,38 @@ void shfl_impl(const ptx_instruction *pI, core_t *core, warp_inst_t inst) { } } +void shf_impl(const ptx_instruction *pI, ptx_thread_info *thread) { + ptx_reg_t a, b, c, d; + const operand_info &dst = pI->dst(); + const operand_info &src1 = pI->src1(); + const operand_info &src2 = pI->src2(); + const operand_info &src3 = pI->src3(); + + // Only b32 is allowed + unsigned i_type = pI->get_type(); + a = thread->get_operand_value(src1, dst, i_type, thread, 1); + b = thread->get_operand_value(src2, dst, i_type, thread, 1); + c = thread->get_operand_value(src3, dst, i_type, thread, 1); + + if (i_type != B32_TYPE) + printf("Only the b32 data_type is allowed per the ISA\n"); + + unsigned clamp_mode = pI->clamp_mode(); + unsigned n = c.u32 & 0x1f; + if (clamp_mode) { + if (c.u32 < 32) + n = c; + else + n = 32; + } + if (pI->left_mode()) + d.u32 = (b.u32 << n) | (a.u32 >> (32 - n)); + else + d.u32 = (b.u32 << (32 - n)) | (a.u32 >> n); + + thread->set_operand_value(dst, d, i_type, thread, pI); +} + void shl_impl(const ptx_instruction *pI, ptx_thread_info *thread) { ptx_reg_t a, b, d; const operand_info &dst = pI->dst(); diff --git a/src/cuda-sim/opcodes.def b/src/cuda-sim/opcodes.def index f5bf156e2..83a23ea77 100644 --- a/src/cuda-sim/opcodes.def +++ b/src/cuda-sim/opcodes.def @@ -103,6 +103,7 @@ OP_DEF(SELP_OP,selp_impl,"selp",1,1) OP_DEF(SETP_OP,setp_impl,"setp",1,1) OP_DEF(SET_OP,set_impl,"set",1,1) OP_W_DEF(SHFL_OP,shfl_impl,"shfl",1,10) +OP_DEF(SHF_OP,shf_impl,"shf",1,1) OP_DEF(SHL_OP,shl_impl,"shl",1,1) OP_DEF(SHR_OP,shr_impl,"shr",1,1) OP_DEF(SIN_OP,sin_impl,"sin",1,4) diff --git a/src/cuda-sim/ptx.l b/src/cuda-sim/ptx.l index 15b3cf77e..0810ef6e2 100644 --- a/src/cuda-sim/ptx.l +++ b/src/cuda-sim/ptx.l @@ -134,6 +134,7 @@ selp TC; yylval->int_value = SELP_OP; return OPCODE; setp TC; yylval->int_value = SETP_OP; return OPCODE; set TC; yylval->int_value = SET_OP; return OPCODE; shfl TC; yylval->int_value = SHFL_OP; return OPCODE; +shf TC; yylval->int_value = SHF_OP; return OPCODE; shl TC; yylval->int_value = SHL_OP; return OPCODE; shr TC; yylval->int_value = SHR_OP; return OPCODE; sin TC; yylval->int_value = SIN_OP; return OPCODE; @@ -317,6 +318,9 @@ breakaddr TC; yylval->int_value = BREAKADDR_OP; return OPCODE; \.sat TC; return SAT_OPTION; +\.l TC; return LEFT_OPTION; +\.r TC; return RIGHT_OPTION; + \.eq TC; return EQ_OPTION; \.ne TC; return NE_OPTION; \.lt TC; return LT_OPTION; @@ -354,6 +358,8 @@ breakaddr TC; yylval->int_value = BREAKADDR_OP; return OPCODE; \.arrive TC; return ARRIVE_OPTION; \.red TC; return RED_OPTION; +\.clamp TC; return CLAMP_OPTION; +\.wrap TC; return WRAP_OPTION; \.approx TC; return APPROX_OPTION; \.full TC; return FULL_OPTION; @@ -488,4 +494,4 @@ int ptx_error( yyscan_t yyscanner, ptx_recognizer* recognizer, const char *s ) fflush(stdout); //exit(1); return 0; -} \ No newline at end of file +} diff --git a/src/cuda-sim/ptx.y b/src/cuda-sim/ptx.y index b38f78352..61183e88c 100644 --- a/src/cuda-sim/ptx.y +++ b/src/cuda-sim/ptx.y @@ -220,6 +220,10 @@ class ptx_recognizer; %token PRMT_RC16_MODE; %token PRMT_ECL_MODE; %token PRMT_ECR_MODE; +%token WRAP_OPTION; +%token CLAMP_OPTION; +%token LEFT_OPTION; +%token RIGHT_OPTION; %type function_decl_header %type function_decl @@ -507,6 +511,10 @@ option: type_spec | DOWN_OPTION { recognizer->add_option(DOWN_OPTION); } | BFLY_OPTION { recognizer->add_option(BFLY_OPTION); } | IDX_OPTION { recognizer->add_option(IDX_OPTION); } + | WRAP_OPTION { recognizer->add_option(WRAP_OPTION); } + | CLAMP_OPTION { recognizer->add_option(CLAMP_OPTION); } + | LEFT_OPTION { recognizer->add_option(LEFT_OPTION); } + | RIGHT_OPTION { recognizer->add_option(RIGHT_OPTION); } ; atomic_operation_spec: ATOMIC_AND { recognizer->add_option(ATOMIC_AND); } diff --git a/src/cuda-sim/ptx_ir.cc b/src/cuda-sim/ptx_ir.cc index d3095428f..139920930 100644 --- a/src/cuda-sim/ptx_ir.cc +++ b/src/cuda-sim/ptx_ir.cc @@ -1227,6 +1227,8 @@ ptx_instruction::ptx_instruction( m_rounding_mode = RN_OPTION; m_compare_op = -1; m_saturation_mode = 0; + m_clamp_mode = 0; + m_left_mode = 0; m_geom_spec = 0; m_vector_spec = 0; m_atomic_spec = 0; @@ -1293,6 +1295,18 @@ ptx_instruction::ptx_instruction( case SAT_OPTION: m_saturation_mode = 1; break; + case WRAP_OPTION: + m_clamp_mode = 0; + break; + case CLAMP_OPTION: + m_clamp_mode = 1; + break; + case LEFT_OPTION: + m_left_mode = 1; + break; + case RIGHT_OPTION: + m_left_mode = 0; + break; case RNI_OPTION: case RZI_OPTION: case RMI_OPTION: diff --git a/src/cuda-sim/ptx_ir.h b/src/cuda-sim/ptx_ir.h index 8b1f19c86..d253866db 100644 --- a/src/cuda-sim/ptx_ir.h +++ b/src/cuda-sim/ptx_ir.h @@ -1085,6 +1085,8 @@ class ptx_instruction : public warp_inst_t { unsigned cache_option() const { return m_cache_option; } unsigned rounding_mode() const { return m_rounding_mode; } unsigned saturation_mode() const { return m_saturation_mode; } + unsigned clamp_mode() const { return m_clamp_mode; } + unsigned left_mode() const { return m_left_mode; } unsigned dimension() const { return m_geom_spec; } unsigned barrier_op() const { return m_barrier_op; } unsigned shfl_op() const { return m_shfl_op; } @@ -1159,6 +1161,8 @@ class ptx_instruction : public warp_inst_t { unsigned m_rounding_mode; unsigned m_compare_op; unsigned m_saturation_mode; + unsigned m_clamp_mode; + unsigned m_left_mode; unsigned m_barrier_op; unsigned m_shfl_op; unsigned m_prmt_op; diff --git a/src/gpgpu-sim/gpu-sim.h b/src/gpgpu-sim/gpu-sim.h index c8fa6eb03..8e81451b6 100644 --- a/src/gpgpu-sim/gpu-sim.h +++ b/src/gpgpu-sim/gpu-sim.h @@ -132,9 +132,9 @@ struct power_config { // NOTE: After changing the nonlinear model to only scaling idle core, // NOTE: The min_inc_per_active_sm is not used any more - if (g_use_nonlinear_model) - sscanf(gpu_nonlinear_model_config, "%lf:%lf", &gpu_idle_core_power, - &gpu_min_inc_per_active_sm); + // if (g_use_nonlinear_model) + // sscanf(gpu_nonlinear_model_config, "%lf:%lf", &gpu_idle_core_power, + // &gpu_min_inc_per_active_sm); } void reg_options(class OptionParser *opp); diff --git a/src/gpgpu-sim/shader.h b/src/gpgpu-sim/shader.h index 10fb773ce..e658a14c9 100644 --- a/src/gpgpu-sim/shader.h +++ b/src/gpgpu-sim/shader.h @@ -135,7 +135,7 @@ class shd_warp_t { m_waiting_ldgsts = false; // Ni: Clear m_ldgdepbar_buf - for (int i = 0; i < m_ldgdepbar_buf.size(); i++) { + for (unsigned i = 0; i < m_ldgdepbar_buf.size(); i++) { m_ldgdepbar_buf[i].clear(); } m_ldgdepbar_buf.clear(); @@ -167,7 +167,7 @@ class shd_warp_t { m_waiting_ldgsts = false; // Ni: Clear m_ldgdepbar_buf - for (int i = 0; i < m_ldgdepbar_buf.size(); i++) { + for (unsigned i = 0; i < m_ldgdepbar_buf.size(); i++) { m_ldgdepbar_buf[i].clear(); } m_ldgdepbar_buf.clear();