From 3b1edd93b41b69a430e876b3f4343ecda97b24b3 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 4 Jul 2024 09:27:59 +0200 Subject: [PATCH 01/14] Revert ":wrench: Add external reg enable to slices (#89)" This reverts commit f1846d6dbd160e5f0831d36acbe469e8dcf2d09d. --- src/fpnew_cast_multi.sv | 13 ++++----- src/fpnew_divsqrt_multi.sv | 11 +++----- src/fpnew_divsqrt_th_32.sv | 11 +++----- src/fpnew_fma.sv | 15 +++++------ src/fpnew_fma_multi.sv | 13 ++++----- src/fpnew_noncomp.sv | 13 ++++----- src/fpnew_opgroup_block.sv | 6 ++--- src/fpnew_opgroup_fmt_slice.sv | 21 +++++---------- src/fpnew_opgroup_multifmt_slice.sv | 42 ++++++++++++----------------- 9 files changed, 55 insertions(+), 90 deletions(-) diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv index 04692cad..3ef7304a 100644 --- a/src/fpnew_cast_multi.sv +++ b/src/fpnew_cast_multi.sv @@ -26,8 +26,7 @@ module fpnew_cast_multi #( // Do not change localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig), fpnew_pkg::max_int_width(IntFmtConfig)), - localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, - localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS ) ( input logic clk_i, input logic rst_ni, @@ -58,9 +57,7 @@ module fpnew_cast_multi #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o, - // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + output logic busy_o ); // ---------- @@ -153,7 +150,7 @@ module fpnew_cast_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -378,7 +375,7 @@ module fpnew_cast_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (mid_pipe_ready[i] & mid_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0) `FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0) @@ -780,7 +777,7 @@ module fpnew_cast_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + NUM_MID_REGS + i]; + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index d47f71b9..52503cd1 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -24,8 +24,7 @@ module fpnew_divsqrt_multi #( parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), - localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, - localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS ) ( input logic clk_i, input logic rst_ni, @@ -58,9 +57,7 @@ module fpnew_divsqrt_multi #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o, - // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + output logic busy_o ); // ---------- @@ -124,7 +121,7 @@ module fpnew_divsqrt_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) @@ -362,7 +359,7 @@ module fpnew_divsqrt_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) diff --git a/src/fpnew_divsqrt_th_32.sv b/src/fpnew_divsqrt_th_32.sv index 8ddb80e9..71d23068 100644 --- a/src/fpnew_divsqrt_th_32.sv +++ b/src/fpnew_divsqrt_th_32.sv @@ -27,8 +27,7 @@ module fpnew_divsqrt_th_32 #( parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = 32, - localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, - localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS ) ( input logic clk_i, input logic rst_ni, @@ -55,9 +54,7 @@ module fpnew_divsqrt_th_32 #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o, - // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + output logic busy_o ); // ---------- @@ -116,7 +113,7 @@ module fpnew_divsqrt_th_32 #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) @@ -457,7 +454,7 @@ module fpnew_divsqrt_th_32 #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index d9966ba2..449ca434 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -21,9 +21,8 @@ module fpnew_fma #( parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, parameter type TagType = logic, parameter type AuxType = logic, - // Do not change - localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat), - localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs + + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change ) ( input logic clk_i, input logic rst_ni, @@ -51,9 +50,7 @@ module fpnew_fma #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o, - // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + output logic busy_o ); // ---------- @@ -138,7 +135,7 @@ module fpnew_fma #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -454,7 +451,7 @@ module fpnew_fma #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (mid_pipe_ready[i] & mid_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) @@ -674,7 +671,7 @@ module fpnew_fma #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + NUM_MID_REGS + i]; + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index c5bb2bbe..c1dea524 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -23,8 +23,7 @@ module fpnew_fma_multi #( parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), - localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, - localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS ) ( input logic clk_i, input logic rst_ni, @@ -55,9 +54,7 @@ module fpnew_fma_multi #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o, - // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + output logic busy_o ); // ---------- @@ -157,7 +154,7 @@ module fpnew_fma_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -549,7 +546,7 @@ module fpnew_fma_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (mid_pipe_ready[i] & mid_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) @@ -832,7 +829,7 @@ module fpnew_fma_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + NUM_MID_REGS + i]; + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) diff --git a/src/fpnew_noncomp.sv b/src/fpnew_noncomp.sv index a8cf765d..370e80e9 100644 --- a/src/fpnew_noncomp.sv +++ b/src/fpnew_noncomp.sv @@ -21,9 +21,8 @@ module fpnew_noncomp #( parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, parameter type TagType = logic, parameter type AuxType = logic, - // Do not change - localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat), - localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs + + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change ) ( input logic clk_i, input logic rst_ni, @@ -53,9 +52,7 @@ module fpnew_noncomp #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o, - // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + output logic busy_o ); // ---------- @@ -123,7 +120,7 @@ module fpnew_noncomp #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -393,7 +390,7 @@ module fpnew_noncomp #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index e9e7f9f6..998449cb 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -132,8 +132,7 @@ module fpnew_opgroup_block #( .tag_o ( fmt_outputs[fmt].tag ), .out_valid_o ( fmt_out_valid[fmt] ), .out_ready_i ( fmt_out_ready[fmt] ), - .busy_o ( fmt_busy[fmt] ), - .reg_ena_i ( '0 ) + .busy_o ( fmt_busy[fmt] ) ); // If the format wants to use merged ops, tie off the dangling ones not used here end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused @@ -208,8 +207,7 @@ module fpnew_opgroup_block #( .tag_o ( fmt_outputs[FMT].tag ), .out_valid_o ( fmt_out_valid[FMT] ), .out_ready_i ( fmt_out_ready[FMT] ), - .busy_o ( fmt_busy[FMT] ), - .reg_ena_i ( '0 ) + .busy_o ( fmt_busy[FMT] ) ); end diff --git a/src/fpnew_opgroup_fmt_slice.sv b/src/fpnew_opgroup_fmt_slice.sv index 9aeb469b..35fbe484 100644 --- a/src/fpnew_opgroup_fmt_slice.sv +++ b/src/fpnew_opgroup_fmt_slice.sv @@ -21,14 +21,12 @@ module fpnew_opgroup_fmt_slice #( parameter logic EnableVectors = 1'b1, parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter logic ExtRegEna = 1'b0, parameter type TagType = logic, parameter int unsigned TrueSIMDClass = 0, // Do not change localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors), - localparam type MaskType = logic [NUM_LANES-1:0], - localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs + localparam type MaskType = logic [NUM_LANES-1:0] ) ( input logic clk_i, input logic rst_ni, @@ -54,9 +52,7 @@ module fpnew_opgroup_fmt_slice #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o, - // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + output logic busy_o ); localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat); @@ -137,8 +133,7 @@ module fpnew_opgroup_fmt_slice #( .aux_o ( lane_vectorial[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ), - .reg_ena_i + .busy_o ( lane_busy[lane] ) ); assign lane_is_class[lane] = 1'b0; assign lane_class_mask[lane] = fpnew_pkg::NEGINF; @@ -169,8 +164,7 @@ module fpnew_opgroup_fmt_slice #( // .aux_o ( lane_vectorial[lane] ), // .out_valid_o ( out_valid ), // .out_ready_i ( out_ready ), - // .busy_o ( lane_busy[lane] ), - // .reg_ena_i + // .busy_o ( lane_busy[lane] ) // ); // assign lane_is_class[lane] = 1'b0; end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance @@ -204,8 +198,7 @@ module fpnew_opgroup_fmt_slice #( .aux_o ( lane_vectorial[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ), - .reg_ena_i + .busy_o ( lane_busy[lane] ) ); end // ADD OTHER OPTIONS HERE @@ -214,8 +207,8 @@ module fpnew_opgroup_fmt_slice #( assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = (lane_out_valid[lane] | ExtRegEna) ? op_result : '{default: lane_ext_bit[0]}; - assign lane_status[lane] = (lane_out_valid[lane] | ExtRegEna) ? op_status : '0; + assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; // Otherwise generate constant sign-extension end else begin diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index 61145562..f1151a58 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -25,14 +25,12 @@ module fpnew_opgroup_multifmt_slice #( parameter fpnew_pkg::divsqrt_unit_t DivSqrtSel = fpnew_pkg::THMULTI, parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter logic ExtRegEna = 1'b0, parameter type TagType = logic, // Do not change localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, localparam int unsigned NUM_SIMD_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, EnableVectors), - localparam type MaskType = logic [NUM_SIMD_LANES-1:0], - localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs + localparam type MaskType = logic [NUM_SIMD_LANES-1:0] ) ( input logic clk_i, input logic rst_ni, @@ -61,9 +59,7 @@ module fpnew_opgroup_multifmt_slice #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o, - // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + output logic busy_o ); if ((OpGroup == fpnew_pkg::DIVSQRT)) begin @@ -247,16 +243,15 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .in_valid_i ( in_valid ), .in_ready_o ( lane_in_ready[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ), - .reg_ena_i + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) ); end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance @@ -288,8 +283,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .aux_o ( lane_aux[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ), - .reg_ena_i + .busy_o ( lane_busy[lane] ) ); end else if(DivSqrtSel == fpnew_pkg::THMULTI) begin : gen_thmulti_c910_divsqrt fpnew_divsqrt_th_64_multi #( @@ -362,8 +356,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .aux_o ( lane_aux[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ), - .reg_ena_i + .busy_o ( lane_busy[lane] ) ); end end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance @@ -401,8 +394,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .aux_o ( lane_aux[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ), - .reg_ena_i + .busy_o ( lane_busy[lane] ) ); end // ADD OTHER OPTIONS HERE @@ -411,8 +403,8 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = (lane_out_valid[lane] | ExtRegEna) ? op_result : '{default: lane_ext_bit[0]}; - assign lane_status[lane] = (lane_out_valid[lane] | ExtRegEna) ? op_status : '0; + assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; // Otherwise generate constant sign-extension end else begin : inactive_lane @@ -509,7 +501,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (byp_pipe_ready[i] & byp_pipe_valid_q[i]) | reg_ena_i[i]; + assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0) From f32d69a6bc9449a93b2ad837ca971574941eba7e Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 4 Jul 2024 09:30:14 +0200 Subject: [PATCH 02/14] Revert "Merge pull request #102 from michael-platzer/ext_div_sqrt" This reverts commit 4aac6b3e87a30c8567dbe7401eba3274eea18afc, reversing changes made to d6e581628f3517a1fb1257507d3214e599f7859d. --- src/fpnew_divsqrt_multi.sv | 39 +++++++++++++---------------- src/fpnew_opgroup_multifmt_slice.sv | 2 +- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index 52503cd1..5f1f99e6 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -139,9 +139,6 @@ module fpnew_divsqrt_multi #( assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; - logic ext_op_start_q; - `FF(ext_op_start_q, reg_ena_i[NUM_INP_REGS-1], 1'b0) - // ----------------- // Input processing // ----------------- @@ -183,8 +180,8 @@ module fpnew_divsqrt_multi #( fsm_state_e state_q, state_d; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_valid = ((in_valid_q & in_ready & ~flush_i) | ext_op_start_q) & (op_q == fpnew_pkg::DIV); - assign sqrt_valid = ((in_valid_q & in_ready & ~flush_i) | ext_op_start_q) & (op_q != fpnew_pkg::DIV); + assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; + assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; assign op_starting = div_valid | sqrt_valid; // Hold additional information while the operation is in progress @@ -207,9 +204,7 @@ module fpnew_divsqrt_multi #( // Valid synch with other lanes // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes // As soon as all the lanes are over, we can clear this FF and start with a new operation - logic unit_done_clear; - `FFLARNC(unit_done_q, unit_done, unit_done, unit_done_clear, 1'b0, clk_i, rst_ni) - assign unit_done_clear = simd_synch_done | reg_ena_i[NUM_INP_REGS-1]; + `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni) // Tell the other units that this unit has finished now or in the past assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; @@ -291,20 +286,20 @@ module fpnew_divsqrt_multi #( logic hold_en; div_sqrt_top_mvp i_divsqrt_lei ( - .Clk_CI ( clk_i ), - .Rst_RBI ( rst_ni ), - .Div_start_SI ( div_valid ), - .Sqrt_start_SI ( sqrt_valid ), - .Operand_a_DI ( divsqrt_operands[0] ), - .Operand_b_DI ( divsqrt_operands[1] ), - .RM_SI ( rnd_mode_q ), - .Precision_ctl_SI ( '0 ), - .Format_sel_SI ( divsqrt_fmt ), - .Kill_SI ( flush_i | reg_ena_i[NUM_INP_REGS-1] ), - .Result_DO ( unit_result ), - .Fflags_SO ( unit_status ), - .Ready_SO ( unit_ready ), - .Done_SO ( unit_done ) + .Clk_CI ( clk_i ), + .Rst_RBI ( rst_ni ), + .Div_start_SI ( div_valid ), + .Sqrt_start_SI ( sqrt_valid ), + .Operand_a_DI ( divsqrt_operands[0] ), + .Operand_b_DI ( divsqrt_operands[1] ), + .RM_SI ( rnd_mode_q ), + .Precision_ctl_SI ( '0 ), + .Format_sel_SI ( divsqrt_fmt ), + .Kill_SI ( flush_i ), + .Result_DO ( unit_result ), + .Fflags_SO ( unit_status ), + .Ready_SO ( unit_ready ), + .Done_SO ( unit_done ) ); // Adjust result width and fix FP8 diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index f1151a58..958ca81b 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -518,7 +518,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 assign conv_target_q = '0; end - if ((DivSqrtSel != fpnew_pkg::TH32) && !ExtRegEna) begin + if (DivSqrtSel != fpnew_pkg::TH32) begin // Synch lanes if there is more than one assign simd_synch_rdy = EnableVectors ? &divsqrt_ready[NUM_DIVSQRT_LANES-1:0] : divsqrt_ready[0]; assign simd_synch_done = EnableVectors ? &divsqrt_done[NUM_DIVSQRT_LANES-1:0] : divsqrt_done[0]; From 61d0264a969fd4e6e4bc5f962894ab68d2b468aa Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 4 Jul 2024 09:30:44 +0200 Subject: [PATCH 03/14] =?UTF-8?q?Revert=20"=E2=9C=A8=20Add=20ext=20reg=20e?= =?UTF-8?q?na=20to=20new=20TH=2064=20divsqrt=20unit=20(#129)"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 6b8c4e08ba3546070c4b5de7141f14244e260e2b. --- src/fpnew_divsqrt_th_64_multi.sv | 31 ++++++++--------------------- src/fpnew_opgroup_multifmt_slice.sv | 3 +-- 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv index 2e66399f..b5fd9eb9 100644 --- a/src/fpnew_divsqrt_th_64_multi.sv +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -26,8 +26,7 @@ module fpnew_divsqrt_th_64_multi #( parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), - localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, - localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS ) ( input logic clk_i, input logic rst_ni, @@ -60,9 +59,7 @@ module fpnew_divsqrt_th_64_multi #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o, - // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + output logic busy_o ); // ---------- @@ -126,7 +123,7 @@ module fpnew_divsqrt_th_64_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (inp_pipe_ready[i] & inp_pipe_valid_q[i]) | reg_ena_i[i]; + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) @@ -144,16 +141,6 @@ module fpnew_divsqrt_th_64_multi #( assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; - logic last_inp_reg_ena; - if (NUM_INP_REGS >= 1) begin : gen_last_inp_reg_ena_valid - assign last_inp_reg_ena = reg_ena_i[NUM_INP_REGS-1]; - end else begin : gen_last_inp_reg_ena_zero - assign last_inp_reg_ena = 1'b0; - end - - logic ext_op_start_q; - `FF(ext_op_start_q, last_inp_reg_ena, 1'b0) - // ----------------- // Input processing // ----------------- @@ -207,8 +194,8 @@ module fpnew_divsqrt_th_64_multi #( fsm_state_e state_q, state_d; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_valid = ((in_valid_q & in_ready & ~flush_i) | ext_op_start_q) & (op_q == fpnew_pkg::DIV); - assign sqrt_valid = ((in_valid_q & in_ready & ~flush_i) | ext_op_start_q) & (op_q != fpnew_pkg::DIV); + assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; + assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; assign op_starting = div_valid | sqrt_valid; // Hold additional information while the operation is in progress @@ -230,9 +217,7 @@ module fpnew_divsqrt_th_64_multi #( // Valid synch with other lanes // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes // As soon as all the lanes are over, we can clear this FF and start with a new operation - logic unit_done_clear; - `FFLARNC(unit_done_q, unit_done, unit_done, unit_done_clear, 1'b0, clk_i, rst_ni); - assign unit_done_clear = simd_synch_done | last_inp_reg_ena; + `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); // Tell the other units that this unit has finished now or in the past assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; @@ -411,7 +396,7 @@ module fpnew_divsqrt_th_64_multi #( .idu_vfpu_rf_pipex_func ( {3'b0, divsqrt_fmt_q, 11'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0) .idu_vfpu_rf_pipex_gateclk_sel ( func_sel ), // 2. Select func .pad_yy_icg_scan_en ( 1'b0 ), // SE signal for the redundant clock gating module - .rtu_yy_xx_flush ( flush_i | last_inp_reg_ena), // Flush + .rtu_yy_xx_flush ( flush_i ), // Flush .vfpu_yy_xx_dqnan ( 1'b0 ), // Disable qNaN, set to 1 if sNaN is used .vfpu_yy_xx_rm ( rm_q ), // Round mode. redundant if imm0 set to the same .pipex_dp_vfdsu_ereg ( ), // Don't care, used by C910 @@ -477,7 +462,7 @@ module fpnew_divsqrt_th_64_multi #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (out_pipe_ready[i] & out_pipe_valid_q[i]) | reg_ena_i[NUM_INP_REGS + i]; + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index 958ca81b..fce88df0 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -319,8 +319,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .aux_o ( lane_aux[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ), - .reg_ena_i + .busy_o ( lane_busy[lane] ) ); end else begin : gen_pulp_divsqrt fpnew_divsqrt_multi #( From 916e3c9f4864ccfd36dee44458d7c140d4f4f755 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Wed, 15 May 2024 11:03:28 +0200 Subject: [PATCH 04/14] Added independent pipeline control / aux module. --- Bender.yml | 1 + src/fpnew_aux.sv | 122 +++++++++++++++++++++++++++++++++++++++++++++++ src_files.yml | 1 + 3 files changed, 124 insertions(+) create mode 100644 src/fpnew_aux.sv diff --git a/Bender.yml b/Bender.yml index c9b18715..d29a4557 100644 --- a/Bender.yml +++ b/Bender.yml @@ -37,6 +37,7 @@ sources: - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v + - src/fpnew_aux.sv - src/fpnew_divsqrt_th_32.sv - src/fpnew_divsqrt_th_64_multi.sv - src/fpnew_divsqrt_multi.sv diff --git a/src/fpnew_aux.sv b/src/fpnew_aux.sv new file mode 100644 index 00000000..28059db5 --- /dev/null +++ b/src/fpnew_aux.sv @@ -0,0 +1,122 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Maurus Item +// +// Description Aux chain for FPNew, handles transmitting of shared handshake and aux data +// And enables the correct lanes so they always stay in sync. + +`include "common_cells/registers.svh" + +module fpnew_aux #( + parameter int unsigned NumPipeRegs = 0, + parameter type TagType = logic, + parameter type AuxType = logic, + parameter int unsigned NumLanes = 1 +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input TagType tag_i, + input AuxType aux_i, + input logic is_vector_i, + input logic [NumLanes-1:0] lane_active_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output TagType tag_o, + output AuxType aux_o, + output logic is_vector_o, + output logic [NumLanes-1:0] lane_active_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Register Enable for Lanes + output logic [NumPipeRegs-1:0] reg_enable_o, + output logic [NumPipeRegs-1:0] vector_reg_enable_o, + output logic [NumLanes-1:0][NumPipeRegs-1:0] lane_reg_enable_o, + // Indication of valid data in flight + output logic busy_o +); + + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + TagType [0:NumPipeRegs] tag; + AuxType [0:NumPipeRegs] aux; + logic [0:NumPipeRegs] is_vector; + logic [0:NumPipeRegs][NumLanes-1:0] lane_active; + logic [0:NumPipeRegs] valid; + + // Ready signal is combinatorial for all stages + logic [0:NumPipeRegs] ready; + + // First element of pipeline is taken from inputs + assign tag [0] = tag_i; + assign aux [0] = aux_i; + assign is_vector [0] = is_vector_i; + assign valid [0] = in_valid_i; + assign lane_active[0] = lane_active_i; + + // Propagate pipeline ready signal to upstream circuitry + assign in_ready_o = ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_input_pipeline + + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign ready[i] = ready[i+1] | ~valid[i+1]; + + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(valid[i+1], valid[i], ready[i], flush_i, 1'b0, clk_i, rst_ni) + + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = ready[i] & valid[i]; + + // Drive external registers with reg enable + assign reg_enable_o[i] = reg_ena; + + // Drive external vector registers with reg enable if operation is a vector + assign vector_reg_enable_o[i] = reg_ena & is_vector[i]; + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_reg_enable_o[l][i] = reg_ena & lane_active[i][l]; + end + + // Generate the pipeline registers within the stages, use enable-registers + `FFL( tag[i+1], tag[i], reg_ena, TagType'('0)) + `FFL( aux[i+1], aux[i], reg_ena, AuxType'('0)) + `FFL( is_vector[i+1], is_vector[i], reg_ena, '0 ) + `FFL(lane_active[i+1], lane_active[i], reg_ena, '0 ) + end + + // Ready travels backwards from output side, driven by downstream circuitry + assign ready[NumPipeRegs] = out_ready_i; + + // Assign module outputs + assign tag_o = tag [NumPipeRegs]; + assign aux_o = aux [NumPipeRegs]; + assign is_vector_o = is_vector [NumPipeRegs]; + assign out_valid_o = valid [NumPipeRegs]; + assign lane_active_o = lane_active[NumPipeRegs]; + + // Assign output Flags: Busy if any element inside the pipe is valid + assign busy_o = |valid; +endmodule diff --git a/src_files.yml b/src_files.yml index 14634faf..d41c7bc4 100644 --- a/src_files.yml +++ b/src_files.yml @@ -33,6 +33,7 @@ fpnew: vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v, vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v, vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v, + src/fpnew_aux.sv, src/fpnew_divsqrt_th_32.sv, src/fpnew_divsqrt_th_64_multi.sv, src/fpnew_divsqrt_multi.sv, From 48522fa5f133fe02454b5095770ac9496afe2a43 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Wed, 10 Jul 2024 16:07:05 +0200 Subject: [PATCH 05/14] Moved handshake and aux chains outside of computational elements. --- src/fpnew_cast_multi.sv | 96 ++++++----------------------------------- src/fpnew_fma.sv | 93 ++++++--------------------------------- src/fpnew_fma_multi.sv | 92 +++++---------------------------------- src/fpnew_noncomp.sv | 70 ++++-------------------------- 4 files changed, 45 insertions(+), 306 deletions(-) diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv index 3ef7304a..558664f4 100644 --- a/src/fpnew_cast_multi.sv +++ b/src/fpnew_cast_multi.sv @@ -21,8 +21,7 @@ module fpnew_cast_multi #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, + // Do not change localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig), fpnew_pkg::max_int_width(IntFmtConfig)), @@ -39,25 +38,14 @@ module fpnew_cast_multi #( input fpnew_pkg::fp_format_e src_fmt_i, input fpnew_pkg::fp_format_e dst_fmt_i, input fpnew_pkg::int_format_e int_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -117,12 +105,7 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -133,24 +116,14 @@ module fpnew_cast_multi #( assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; assign inp_pipe_int_fmt_q[0] = int_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -160,9 +133,7 @@ module fpnew_cast_multi #( `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; @@ -320,9 +291,8 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e src_fmt_q2; fpnew_pkg::fp_format_e dst_fmt_q2; fpnew_pkg::int_format_e int_fmt_q2; - // Internal pipeline signals, index i holds signal after i register stages - + // Internal pipeline signals, index i holds signal after i register stages logic [0:NUM_MID_REGS] mid_pipe_input_sign_q; logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q; logic [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q; @@ -336,12 +306,7 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_input_sign_q[0] = input_sign; @@ -357,25 +322,14 @@ module fpnew_cast_multi #( assign mid_pipe_src_fmt_q[0] = src_fmt_q; assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; assign mid_pipe_int_fmt_q[0] = int_fmt_q; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0) `FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0) @@ -390,9 +344,7 @@ module fpnew_cast_multi #( `FFL(mid_pipe_src_fmt_q[i+1], mid_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign input_sign_q = mid_pipe_input_sign_q[NUM_MID_REGS]; @@ -749,52 +701,30 @@ module fpnew_cast_multi #( logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; assign out_pipe_ext_bit_q[0] = extension_bit; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS]; - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index 449ca434..7423b563 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -19,8 +19,6 @@ module fpnew_fma #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change ) ( @@ -32,25 +30,14 @@ module fpnew_fma #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input logic op_mod_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -105,12 +92,7 @@ module fpnew_fma #( fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -118,33 +100,21 @@ module fpnew_fma #( assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_op_mod_q[0] = op_mod_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // ----------------- @@ -413,12 +383,7 @@ module fpnew_fma #( logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_eff_sub_q[0] = effective_subtraction; @@ -433,25 +398,14 @@ module fpnew_fma #( assign mid_pipe_res_is_spec_q[0] = result_is_special; assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) @@ -465,10 +419,9 @@ module fpnew_fma #( `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; @@ -644,50 +597,28 @@ module fpnew_fma #( // Output pipeline signals, index i holds signal after i register stages fp_t [0:NUM_OUT_REGS] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index c1dea524..5d23376f 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -19,8 +19,6 @@ module fpnew_fma_multi #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -36,25 +34,14 @@ module fpnew_fma_multi #( input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands input fpnew_pkg::fp_format_e src2_fmt_i, // format of the addend input fpnew_pkg::fp_format_e dst_fmt_i, // format of the result - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -121,12 +108,7 @@ module fpnew_fma_multi #( fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src2_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -137,24 +119,14 @@ module fpnew_fma_multi #( assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_src2_fmt_q[0] = src2_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -164,9 +136,7 @@ module fpnew_fma_multi #( `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_src2_fmt_q[i+1], inp_pipe_src2_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; @@ -507,12 +477,7 @@ module fpnew_fma_multi #( logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_eff_sub_q[0] = effective_subtraction; @@ -528,25 +493,14 @@ module fpnew_fma_multi #( assign mid_pipe_res_is_spec_q[0] = result_is_special; assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) @@ -561,9 +515,7 @@ module fpnew_fma_multi #( `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; @@ -802,50 +754,28 @@ module fpnew_fma_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_noncomp.sv b/src/fpnew_noncomp.sv index 370e80e9..4b9ff8c4 100644 --- a/src/fpnew_noncomp.sv +++ b/src/fpnew_noncomp.sv @@ -19,8 +19,6 @@ module fpnew_noncomp #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change ) ( @@ -32,27 +30,16 @@ module fpnew_noncomp #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input logic op_mod_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, output fpnew_pkg::classmask_e class_mask_o, output logic is_class_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -90,12 +77,7 @@ module fpnew_noncomp #( fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -103,33 +85,21 @@ module fpnew_noncomp #( assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_op_mod_q[0] = op_mod_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // --------------------- @@ -360,12 +330,7 @@ module fpnew_noncomp #( logic [0:NUM_OUT_REGS] out_pipe_extension_bit_q; fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q; logic [0:NUM_OUT_REGS] out_pipe_is_class_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; @@ -373,45 +338,28 @@ module fpnew_noncomp #( assign out_pipe_extension_bit_q[0] = extension_bit_d; assign out_pipe_class_mask_q[0] = class_mask_d; assign out_pipe_is_class_q[0] = is_class_d; - assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign out_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0) `FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN) `FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS]; assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS]; assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS]; - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q}); endmodule From ccbf929a7652b7b5f0fe0bba0f3621d9f3c45a3d Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Wed, 12 Jun 2024 11:53:58 +0200 Subject: [PATCH 06/14] Added aux module that can deal with lane FSMs. --- Bender.yml | 1 + src/fpnew_aux_fsm.sv | 306 +++++++++++++++++++++++++++++++++++++++++++ src_files.yml | 1 + 3 files changed, 308 insertions(+) create mode 100644 src/fpnew_aux_fsm.sv diff --git a/Bender.yml b/Bender.yml index d29a4557..773b7f1e 100644 --- a/Bender.yml +++ b/Bender.yml @@ -38,6 +38,7 @@ sources: - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v - src/fpnew_aux.sv + - src/fpnew_aux_fsm.sv - src/fpnew_divsqrt_th_32.sv - src/fpnew_divsqrt_th_64_multi.sv - src/fpnew_divsqrt_multi.sv diff --git a/src/fpnew_aux_fsm.sv b/src/fpnew_aux_fsm.sv new file mode 100644 index 00000000..4b1c6013 --- /dev/null +++ b/src/fpnew_aux_fsm.sv @@ -0,0 +1,306 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Maurus Item +// +// Description Aux chain for FPNew, handles transmitting of shared handshake and aux data +// And enables the correct lanes so they always stay in sync. +// This version can be used for lanes that have some form of FSM in them and only eventually are ready + +`include "common_cells/registers.svh" + +module fpnew_aux_fsm #( + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + parameter int unsigned NumLanes = 1 +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input TagType tag_i, + input AuxType aux_i, + input logic is_vector_i, + input logic [NumLanes-1:0] lane_active_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output TagType tag_o, + output AuxType aux_o, + output logic is_vector_o, + output logic [NumLanes-1:0] lane_active_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Register Enable for Lanes + output logic [NumPipeRegs-1:0] reg_enable_o, + output logic [NumPipeRegs-1:0] vector_reg_enable_o, + output logic [NumLanes-1:0][NumPipeRegs-1:0] lane_reg_enable_o, + // Signals for the Lane FSMs + // Signal to start the FSM, will be asserted for one cycle + output logic [NumLanes-1:0] lane_fsm_start_o, + // Signal that the FSM finished it's operation, should be asserted continuously + input logic [NumLanes-1:0] lane_fsm_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Pipeline Distribution + // ---------- + // This must match between this module and modules that use this module as reg enable input! + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // Always have one reg to use for FSM Input + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + TagType [0:NUM_INP_REGS] in_tag; + AuxType [0:NUM_INP_REGS] in_aux; + logic [0:NUM_INP_REGS] in_is_vector; + logic [0:NUM_INP_REGS][NumLanes-1:0] in_lane_active; + logic [0:NUM_INP_REGS] in_valid; + + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] in_ready; + + // First element of pipeline is taken from inputs + assign in_tag [0] = tag_i; + assign in_aux [0] = aux_i; + assign in_is_vector [0] = is_vector_i; + assign in_valid [0] = in_valid_i; + assign in_lane_active[0] = lane_active_i; + + // Propagate pipeline ready signal to upstream circuitry + assign in_ready_o = in_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign in_ready[i] = in_ready[i+1] | ~in_valid[i+1]; + + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(in_valid[i+1], in_valid[i], in_ready[i], flush_i, 1'b0, clk_i, rst_ni) + + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = in_ready[i] & in_valid[i]; + + // Drive external registers with reg enable + assign reg_enable_o[i] = reg_ena; + + // Drive external vector registers with reg enable if operation is a vector + assign vector_reg_enable_o[i] = reg_ena & in_is_vector[i]; + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_reg_enable_o[l][i] = reg_ena & in_lane_active[i][l]; + end + + // Generate the pipeline registers within the stages, use enable-registers + `FFL( in_tag[i+1], in_tag[i], reg_ena, TagType'('0)) + `FFL( in_aux[i+1], in_aux[i], reg_ena, AuxType'('0)) + `FFL( in_is_vector[i+1], in_is_vector[i], reg_ena, '0 ) + `FFL(in_lane_active[i+1], in_lane_active[i], reg_ena, '0 ) + end + + // ---------- + // Global FSM + // ---------- + + // FSM states + typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; + fsm_state_e state_q, state_d; + + // Input & Output Handshake + logic fsm_in_valid, fsm_in_ready; + logic fsm_out_valid, fsm_out_ready; + + // Synchronisazion signals + logic fsm_start, fsm_ready, fsm_busy; + + // Data holding signals + TagType held_tag; + AuxType held_aux; + logic held_is_vector; + logic [NumLanes-1:0] held_lane_active; + + // Upstream Handshake Connection + assign fsm_in_valid = in_valid[NUM_INP_REGS]; + assign in_ready[NUM_INP_REGS] = fsm_in_ready; + + // Done when all active lanes are done + assign fsm_ready = &(lane_fsm_ready_i | ~held_lane_active); + + // FSM to safely apply and receive data from DIVSQRT unit + always_comb begin : flag_fsm + // Default assignments + fsm_out_valid = 1'b0; + fsm_in_ready = 1'b0; + fsm_start = 1'b0; + fsm_busy = 1'b0; + state_d = state_q; + + unique case (state_q) + IDLE: begin + fsm_in_ready = '1; + if (fsm_in_valid) begin + state_d = BUSY; + fsm_start = 1'b1; + end + end + BUSY: begin + fsm_busy = 1'b1; + // If all active lanes are done send data down chain + if (fsm_ready) begin + fsm_out_valid = 1'b1; + if (fsm_out_ready) begin + fsm_in_ready = 1'b1; + if (fsm_in_valid) begin + state_d = BUSY; + fsm_start = 1'b1; + end else begin + state_d = IDLE; + end + end else begin + state_d = HOLD; + end + end + end + HOLD: begin + // Exact same as BUSY, but outer condition is already given + fsm_out_valid = 1'b1; + if (fsm_out_ready) begin + fsm_in_ready = 1'b1; + if (fsm_in_valid) begin + state_d = BUSY; + fsm_start = 1'b1; + end else begin + state_d = IDLE; + end + end else begin + state_d = HOLD; + end + end + + // fall into idle state otherwise + default: state_d = IDLE; + endcase + + // Flushing overrides the other actions + if (flush_i) begin + fsm_out_valid = 1'b0; + state_d = IDLE; + end + end + + `FF(state_q, state_d, IDLE); + + // Start Lanes when FSM starts and lane is active + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_fsm_start_o[l] = fsm_start && in_lane_active[NUM_INP_REGS][l]; + end + + // ---------------- + // Data Holding FFs + // ---------------- + + `FFL( held_tag, in_tag[NUM_INP_REGS], fsm_start, TagType'('0)); + `FFL( held_aux, in_aux[NUM_INP_REGS], fsm_start, AuxType'('0)); + `FFL( held_is_vector, in_is_vector[NUM_INP_REGS], fsm_start, '0); + `FFL(held_lane_active, in_lane_active[NUM_INP_REGS], fsm_start, '0); + + // --------------- + // Output pipeline + // --------------- + + // Output pipeline signals, index i holds signal after i register stages + TagType [0:NUM_OUT_REGS] out_tag; + AuxType [0:NUM_OUT_REGS] out_aux; + logic [0:NUM_OUT_REGS] out_is_vector; + logic [0:NUM_OUT_REGS][NumLanes-1:0] out_lane_active; + logic [0:NUM_OUT_REGS] out_valid; + + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_ready; + + // Connect to upstream Handshake + assign out_valid[0] = fsm_out_valid; + assign fsm_out_ready = out_ready[0]; + + // Connect to Hold Register + assign out_tag [0] = held_tag; + assign out_aux [0] = held_aux; + assign out_is_vector [0] = held_is_vector; + assign out_lane_active[0] = held_lane_active; + + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_ready[i] = out_ready[i+1] | ~out_valid[i+1]; + + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_valid[i+1], out_valid[i], out_ready[i], flush_i, 1'b0, clk_i, rst_ni) + + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_ready[i] & out_valid[i]; + + // Drive external registers with reg enable + assign reg_enable_o[NUM_INP_REGS + i] = reg_ena; + + // Drive external vector registers with reg enable if operation is a vector + assign vector_reg_enable_o[NUM_INP_REGS + i] = reg_ena & out_is_vector[i]; + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_reg_enable_o[l][NUM_INP_REGS + i] = reg_ena & out_lane_active[i][l]; + end + + // Generate the pipeline registers within the stages, use enable-registers + `FFL( out_tag[i+1], out_tag[i], reg_ena, TagType'('0)) + `FFL( out_aux[i+1], out_aux[i], reg_ena, AuxType'('0)) + `FFL( out_is_vector[i+1], out_is_vector[i], reg_ena, '0 ) + `FFL(out_lane_active[i+1], out_lane_active[i], reg_ena, '0 ) + end + + // Ready travels backwards from output side, driven by downstream circuitry + assign out_ready[NUM_OUT_REGS] = out_ready_i; + + // Assign module outputs + assign tag_o = out_tag [NUM_OUT_REGS]; + assign aux_o = out_aux [NUM_OUT_REGS]; + assign is_vector_o = out_is_vector [NUM_OUT_REGS]; + assign out_valid_o = out_valid [NUM_OUT_REGS]; + assign lane_active_o = out_lane_active[NUM_OUT_REGS]; + + // Assign output Flags: Busy if any element inside the pipe is valid + assign busy_o = |in_valid | |out_valid | fsm_busy; + +endmodule diff --git a/src_files.yml b/src_files.yml index d41c7bc4..7c31c30f 100644 --- a/src_files.yml +++ b/src_files.yml @@ -34,6 +34,7 @@ fpnew: vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v, vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v, src/fpnew_aux.sv, + src/fpnew_aux_fsm.sv, src/fpnew_divsqrt_th_32.sv, src/fpnew_divsqrt_th_64_multi.sv, src/fpnew_divsqrt_multi.sv, From 55ffbf82ae52d0c0313d7407e35d9e30ad5b1795 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 13 Jun 2024 14:23:30 +0200 Subject: [PATCH 07/14] Removed previous synchronisation of divsqrt module --- src/fpnew_divsqrt_multi.sv | 46 ++++------------------------ src/fpnew_divsqrt_th_64_multi.sv | 52 ++++---------------------------- 2 files changed, 12 insertions(+), 86 deletions(-) diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index 5f1f99e6..44a030a1 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -41,10 +41,6 @@ module fpnew_divsqrt_multi #( // Input Handshake input logic in_valid_i, output logic in_ready_o, - output logic divsqrt_done_o, - input logic simd_synch_done_i, - output logic divsqrt_ready_o, - input logic simd_synch_rdy_i, input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, @@ -170,11 +166,10 @@ module fpnew_divsqrt_multi #( logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done, unit_done_q; // status signals from unit instance + logic unit_ready, unit_done; // status signals from unit instance logic op_starting; // high in the cycle a new operation starts logic out_valid, out_ready; // output handshake with downstream logic unit_busy; // valid data in flight - logic simd_synch_done; // FSM states typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; fsm_state_e state_q, state_d; @@ -198,21 +193,8 @@ module fpnew_divsqrt_multi #( `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0) - // Wait for other lanes only if the operation is vectorial - assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q; - - // Valid synch with other lanes - // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes - // As soon as all the lanes are over, we can clear this FF and start with a new operation - `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni) - // Tell the other units that this unit has finished now or in the past - assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; - - // Ready synch with other lanes - // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes - assign divsqrt_ready_o = in_ready; - // Upstream ready comes from sanitization FSM, and it is synched among all the lanes - assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready; + // Upstream ready comes from FSM + assign inp_pipe_ready[NUM_INP_REGS] = in_ready; // FSM to safely apply and receive data from DIVSQRT unit always_comb begin : flag_fsm @@ -234,7 +216,7 @@ module fpnew_divsqrt_multi #( BUSY: begin unit_busy = 1'b1; // data in flight // If all the lanes are done with processing - if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin + if (unit_done) begin out_valid = 1'b1; // try to commit result downstream // If downstream accepts our result if (out_ready) begin @@ -305,22 +287,6 @@ module fpnew_divsqrt_multi #( // Adjust result width and fix FP8 assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result; - // Hold the result when one lane has finished execution, except when all the lanes finish together, - // or the operation is not vectorial, and the result can be accepted downstream - assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready); - // The Hold register (load, no reset) - `FFLNR(held_result_q, adjusted_result, hold_en, clk_i) - `FFLNR(held_status_q, unit_status, hold_en, clk_i) - - // -------------- - // Output Select - // -------------- - logic [WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - // Prioritize hold register data - assign result_d = unit_done_q ? held_result_q : adjusted_result; - assign status_d = unit_done_q ? held_status_q : unit_status; - // ---------------- // Output Pipeline // ---------------- @@ -335,8 +301,8 @@ module fpnew_divsqrt_multi #( logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = result_d; - assign out_pipe_status_q[0] = status_d; + assign out_pipe_result_q[0] = adjusted_result; + assign out_pipe_status_q[0] = unit_status; assign out_pipe_tag_q[0] = result_tag_q; assign out_pipe_mask_q[0] = result_mask_q; assign out_pipe_aux_q[0] = result_aux_q; diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv index b5fd9eb9..508d076c 100644 --- a/src/fpnew_divsqrt_th_64_multi.sv +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -39,14 +39,9 @@ module fpnew_divsqrt_th_64_multi #( input TagType tag_i, input logic mask_i, input AuxType aux_i, - input logic vectorial_op_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, - output logic divsqrt_done_o, - input logic simd_synch_done_i, - output logic divsqrt_ready_o, - input logic simd_synch_rdy_i, input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, @@ -95,7 +90,6 @@ module fpnew_divsqrt_th_64_multi #( TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_vec_op_q; logic [0:NUM_INP_REGS] inp_pipe_valid_q; // Ready signal is combinatorial for all stages logic [0:NUM_INP_REGS] inp_pipe_ready; @@ -108,7 +102,6 @@ module fpnew_divsqrt_th_64_multi #( assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_vec_op_q[0] = vectorial_op_i; assign inp_pipe_valid_q[0] = in_valid_i; // Input stage: Propagate pipeline ready signal to upstream circuitry assign in_ready_o = inp_pipe_ready[0]; @@ -132,7 +125,6 @@ module fpnew_divsqrt_th_64_multi #( `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) - `FFL(inp_pipe_vec_op_q[i+1], inp_pipe_vec_op_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; @@ -184,11 +176,11 @@ module fpnew_divsqrt_th_64_multi #( logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done, unit_done_q; // status signals from unit instance + logic unit_ready, unit_done; // status signals from unit instance logic op_starting; // high in the cycle a new operation starts logic out_valid, out_ready; // output handshake with downstream logic unit_busy; // valid data in flight - logic simd_synch_done; + // FSM states typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; fsm_state_e state_q, state_d; @@ -203,29 +195,13 @@ module fpnew_divsqrt_th_64_multi #( TagType result_tag_q; logic result_mask_q; AuxType result_aux_q; - logic result_vec_op_q; // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0) - - // Wait for other lanes only if the operation is vectorial - assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q; - - // Valid synch with other lanes - // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes - // As soon as all the lanes are over, we can clear this FF and start with a new operation - `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); - // Tell the other units that this unit has finished now or in the past - assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; - // Ready synch with other lanes - // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes - assign divsqrt_ready_o = in_ready; - // Upstream ready comes from sanitization FSM, and it is synched among all the lanes - assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready; + assign inp_pipe_ready[NUM_INP_REGS] = in_ready; // FSM to safely apply and receive data from DIVSQRT unit always_comb begin : flag_fsm @@ -247,7 +223,7 @@ module fpnew_divsqrt_th_64_multi #( BUSY: begin unit_busy = 1'b1; // data in flight // If all the lanes are done with processing - if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin + if (unit_done) begin out_valid = 1'b1; // try to commit result downstream // If downstream accepts our result if (out_ready) begin @@ -413,22 +389,6 @@ module fpnew_divsqrt_th_64_multi #( assign unit_ready = !vfdsu_dp_fdiv_busy; - // Hold the result when one lane has finished execution, except when all the lanes finish together, - // or the operation is not vectorial, and the result can be accepted downstream - assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready); - // The Hold register (load, no reset) - `FFLNR(held_result_q, unit_result, hold_en, clk_i) - `FFLNR(held_status_q, unit_status, hold_en, clk_i) - - // -------------- - // Output Select - // -------------- - logic [WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - // Prioritize hold register data - assign result_d[WIDTH-1:0] = unit_done_q ? held_result_q[WIDTH-1:0] : unit_result[WIDTH-1:0]; - assign status_d = unit_done_q ? held_status_q : unit_status; - // ---------------- // Output Pipeline // ---------------- @@ -443,8 +403,8 @@ module fpnew_divsqrt_th_64_multi #( logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = result_d; - assign out_pipe_status_q[0] = status_d; + assign out_pipe_result_q[0] = unit_result; + assign out_pipe_status_q[0] = unit_status; assign out_pipe_tag_q[0] = result_tag_q; assign out_pipe_mask_q[0] = result_mask_q; assign out_pipe_aux_q[0] = result_aux_q; From 7332f2c337940253fcbf80fabd42f1aa01733cda Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 13 Jun 2024 14:24:30 +0200 Subject: [PATCH 08/14] Converted division to use shared FSM. --- src/fpnew_divsqrt_multi.sv | 210 ++++++----------------------- src/fpnew_divsqrt_th_32.sv | 218 ++++++------------------------- src/fpnew_divsqrt_th_64_multi.sv | 212 +++++++----------------------- 3 files changed, 126 insertions(+), 514 deletions(-) diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index 44a030a1..71dfe5b7 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -20,8 +20,6 @@ module fpnew_divsqrt_multi #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -34,26 +32,17 @@ module fpnew_divsqrt_multi #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input fpnew_pkg::fp_format_e dst_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - input logic vectorial_op_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic flush_i, + input logic[NumPipeRegs-1:0] reg_enable_i, + input logic fsm_start_i, + output logic fsm_ready_o ); // ---------- @@ -79,61 +68,39 @@ module fpnew_divsqrt_multi #( fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::operation_e op_q; fpnew_pkg::fp_format_e dst_fmt_q; - logic in_valid_q; // Input pipeline signals, index i holds signal after i register stages logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_vec_op_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_vec_op_q[0] = vectorial_op_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to upstream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) - `FFL(inp_pipe_vec_op_q[i+1], inp_pipe_vec_op_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; assign op_q = inp_pipe_op_q[NUM_INP_REGS]; assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; - assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; // ----------------- // Input processing @@ -160,112 +127,19 @@ module fpnew_divsqrt_multi #( divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1]; end - // ------------ - // Control FSM - // ------------ - - logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done; // status signals from unit instance - logic op_starting; // high in the cycle a new operation starts - logic out_valid, out_ready; // output handshake with downstream - logic unit_busy; // valid data in flight - // FSM states - typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; - fsm_state_e state_q, state_d; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; - assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; - assign op_starting = div_valid | sqrt_valid; - - // Hold additional information while the operation is in progress - logic result_is_fp8_q; - TagType result_tag_q; - logic result_mask_q; - AuxType result_aux_q; - logic result_vec_op_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_is_fp8_q, input_is_fp8, op_starting, '0) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0) - - // Upstream ready comes from FSM - assign inp_pipe_ready[NUM_INP_REGS] = in_ready; - - // FSM to safely apply and receive data from DIVSQRT unit - always_comb begin : flag_fsm - // Default assignments - in_ready = 1'b0; - out_valid = 1'b0; - unit_busy = 1'b0; - state_d = state_q; - - unique case (state_q) - // Waiting for work - IDLE: begin - in_ready = 1'b1; // we're ready - if (in_valid_q && unit_ready) begin // New work arrives - state_d = BUSY; // go into processing state - end - end - // Operation in progress - BUSY: begin - unit_busy = 1'b1; // data in flight - // If all the lanes are done with processing - if (unit_done) begin - out_valid = 1'b1; // try to commit result downstream - // If downstream accepts our result - if (out_ready) begin - state_d = IDLE; // we anticipate going back to idling.. - in_ready = 1'b1; // we acknowledge the instruction - if (in_valid_q && unit_ready) begin // ..unless new work comes in - state_d = BUSY; // and stay busy with it - end - // Otherwise if downstream is not ready for the result - end else begin - state_d = HOLD; // wait for the pipeline to take the data - end - end - end - // Waiting with valid result for downstream - HOLD: begin - unit_busy = 1'b1; // data in flight - out_valid = 1'b1; // try to commit result downstream - // If the result is accepted by downstream - if (out_ready) begin - state_d = IDLE; // go back to idle.. - if (in_valid_q && unit_ready) begin // ..unless new work comes in - in_ready = 1'b1; // acknowledge the new transaction - state_d = BUSY; // will be busy with the next instruction - end - end - end - // fall into idle state otherwise - default: state_d = IDLE; - endcase - - // Flushing overrides the other actions - if (flush_i) begin - unit_busy = 1'b0; // data is invalidated - out_valid = 1'b0; // cancel any valid data - state_d = IDLE; // go to default state - end - end - - // FSM status register (asynch active low reset) - `FF(state_q, state_d, IDLE) + assign div_valid = (op_q == fpnew_pkg::DIV) & fsm_start_i; + assign sqrt_valid = (op_q != fpnew_pkg::DIV) & fsm_start_i; // ----------------- // DIVSQRT instance // ----------------- - logic [63:0] unit_result; - logic [WIDTH-1:0] adjusted_result, held_result_q; - fpnew_pkg::status_t unit_status, held_status_q; - logic hold_en; + logic [63:0] raw_unit_result; + logic [WIDTH-1:0] unit_result; + logic unit_done; + fpnew_pkg::status_t unit_status; div_sqrt_top_mvp i_divsqrt_lei ( .Clk_CI ( clk_i ), @@ -278,14 +152,28 @@ module fpnew_divsqrt_multi #( .Precision_ctl_SI ( '0 ), .Format_sel_SI ( divsqrt_fmt ), .Kill_SI ( flush_i ), - .Result_DO ( unit_result ), + .Result_DO ( raw_unit_result ), .Fflags_SO ( unit_status ), - .Ready_SO ( unit_ready ), + .Ready_SO ( fsm_ready_o ), .Done_SO ( unit_done ) ); // Adjust result width and fix FP8 - assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result; + assign unit_result = input_is_fp8 ? raw_unit_result >> 8 : raw_unit_result; + + // ---------------- + // Hold Result + // ---------------- + logic [WIDTH-1:0] held_result, out_result; + fpnew_pkg::status_t held_status, out_status; + logic out_mask; + + `FFL(held_result, unit_result, unit_done, '0); + `FFL(held_status, unit_status, unit_done, '0); + `FFL(out_mask, inp_pipe_mask_q[NUM_INP_REGS], fsm_start_i, '0); // Mask is stored on start -> Dont need a bypass mux + + assign out_result = unit_done ? unit_result : held_result; + assign out_status = unit_done ? unit_status : held_status; // ---------------- // Output Pipeline @@ -293,50 +181,28 @@ module fpnew_divsqrt_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = adjusted_result; - assign out_pipe_status_q[0] = unit_status; - assign out_pipe_tag_q[0] = result_tag_q; - assign out_pipe_mask_q[0] = result_mask_q; - assign out_pipe_aux_q[0] = result_aux_q; - assign out_pipe_valid_q[0] = out_valid; - // Input stage: Propagate pipeline ready signal to inside pipe - assign out_ready = out_pipe_ready[0]; + assign out_pipe_result_q[0] = out_result; + assign out_pipe_status_q[0] = out_status; + assign out_pipe_mask_q[0] = out_mask; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_divsqrt_th_32.sv b/src/fpnew_divsqrt_th_32.sv index 71d23068..f4f6bb44 100644 --- a/src/fpnew_divsqrt_th_32.sv +++ b/src/fpnew_divsqrt_th_32.sv @@ -23,8 +23,6 @@ module fpnew_divsqrt_th_32 #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = 32, localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -36,25 +34,17 @@ module fpnew_divsqrt_th_32 #( input logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic flush_i, + input logic[NumPipeRegs-1:0] reg_enable_i, + input logic fsm_start_i, + output logic fsm_ready_o ); // ---------- @@ -79,73 +69,45 @@ module fpnew_divsqrt_th_32 #( logic [1:0][WIDTH-1:0] operands_q; fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::operation_e op_q; - logic in_valid_q; // Input pipeline signals, index i holds signal after i register stages logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; assign op_q = inp_pipe_op_q[NUM_INP_REGS]; - assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; - // ------------ - // Control FSM - // ------------ - logic in_ready; // input handshake with upstream + // ----------------- + // Input processing + // ----------------- logic div_op, sqrt_op; // input signalling with unit - logic unit_ready_q, unit_done; // status signals from unit instance logic op_starting; // high in the cycle a new operation starts - logic out_valid, out_ready; // output handshake with downstream - logic hold_result; // whether to put result into hold register - logic data_is_held; // data in hold register is valid - logic unit_busy; // valid data in flight - // FSM states - typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; - fsm_state_e state_q, state_d; // Operations are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_op = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; //in_ready delete, valid independent of ready - assign sqrt_op = in_valid_q & (op_q == fpnew_pkg::SQRT) & in_ready & ~flush_i; + assign div_op = (op_q == fpnew_pkg::DIV) & fsm_start_i; //in_ready delete, valid independent of ready + assign sqrt_op = (op_q == fpnew_pkg::SQRT) & fsm_start_i; assign op_starting = div_op | sqrt_op; //start computing or handshake, modify tb handshake right logic fdsu_fpu_ex1_stall, fdsu_fpu_ex1_stall_q; @@ -159,92 +121,11 @@ module fpnew_divsqrt_th_32 #( `FFL(div_op_q, div_op_d, 1'b1, '0) `FFL(sqrt_op_q, sqrt_op_d, 1'b1, '0) - // FSM to safely apply and receive data from DIVSQRT unit - always_comb begin : flag_fsm - // Default assignments - in_ready = 1'b0; - out_valid = 1'b0; - hold_result = 1'b0; - data_is_held = 1'b0; - unit_busy = 1'b0; - state_d = state_q; - inp_pipe_ready[NUM_INP_REGS] = unit_ready_q; - - unique case (state_q) - // Waiting for work - IDLE: begin - // in_ready = 1'b1; // we're ready - in_ready = unit_ready_q; //*** - if (in_valid_q && unit_ready_q) begin // New work arrives - inp_pipe_ready[NUM_INP_REGS] = unit_ready_q && !fdsu_fpu_ex1_stall; - state_d = BUSY; // go into processing state - end - end - // Operation in progress - BUSY: begin - inp_pipe_ready[NUM_INP_REGS] = fdsu_fpu_ex1_stall_q; - unit_busy = 1'b1; // data in flight - // If the unit is done with processing - if (unit_done) begin - out_valid = 1'b1; // try to commit result downstream - // If downstream accepts our result - if (out_ready) begin - state_d = IDLE; // we anticipate going back to idling.. - if (in_valid_q && unit_ready_q) begin // ..unless new work comes in - in_ready = 1'b1; // we acknowledge the instruction - state_d = BUSY; // and stay busy with it - end - // Otherwise if downstream is not ready for the result - end else begin - hold_result = 1'b1; // activate the hold register - state_d = HOLD; // wait for the pipeline to take the data - end - end - end - // Waiting with valid result for downstream - HOLD: begin - unit_busy = 1'b1; // data in flight - data_is_held = 1'b1; // data in hold register is valid - out_valid = 1'b1; // try to commit result downstream - // If the result is accepted by downstream - if (out_ready) begin - state_d = IDLE; // go back to idle.. - if (in_valid_q && unit_ready_q) begin // ..unless new work comes in - in_ready = 1'b1; // acknowledge the new transaction - state_d = BUSY; // will be busy with the next instruction - end - end - end - // fall into idle state otherwise - default: state_d = IDLE; - endcase - - // Flushing overrides the other actions - if (flush_i) begin - unit_busy = 1'b0; // data is invalidated - out_valid = 1'b0; // cancel any valid data - state_d = IDLE; // go to default state - end - end - - // FSM status register (asynch active low reset) - `FF(state_q, state_d, IDLE) - - // Hold additional information while the operation is in progress - TagType result_tag_q; - AuxType result_aux_q; - logic result_mask_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - // ----------------- // DIVSQRT instance // ----------------- - logic [WIDTH-1:0] unit_result, held_result_q; - fpnew_pkg::status_t unit_status, held_status_q; + logic [WIDTH-1:0] unit_result; + fpnew_pkg::status_t unit_status; // thead define fdsu module's input and output logic ctrl_fdsu_ex1_sel; @@ -276,7 +157,8 @@ module fpnew_divsqrt_th_32 #( logic [4:0] fpu_idu_fwd_fflags; logic fpu_idu_fwd_vld; - logic unit_ready_d; + logic unit_done; // status signals from unit instance + logic unit_ready_d, unit_ready_q; // unit_ready_q related to state machine, different under special and normal cases. always_comb begin @@ -295,6 +177,8 @@ module fpnew_divsqrt_th_32 #( `FFL(unit_ready_q, unit_ready_d, 1'b1, 1'b1) + assign fsm_ready_o = unit_ready_q && !fdsu_fpu_ex1_stall; + // determine input of time to select operands always_comb begin ctrl_fdsu_ex1_sel = 1'b0; @@ -408,18 +292,23 @@ module fpnew_divsqrt_th_32 #( unit_done = fpu_idu_fwd_vld; end + // ---------------- + // Hold Result + // ---------------- + + // Hold additional information while the operation is in progress + logic [WIDTH-1:0] held_result, out_result; + fpnew_pkg::status_t held_status, out_status; + logic out_mask; + + // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) // The Hold register (load, no reset) - `FFLNR(held_result_q, unit_result, hold_result, clk_i) - `FFLNR(held_status_q, unit_status, hold_result, clk_i) - - // -------------- - // Output Select - // -------------- - logic [WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - // Prioritize hold register data - assign result_d = data_is_held ? held_result_q : unit_result; - assign status_d = data_is_held ? held_status_q : unit_status; + `FFL(held_result, unit_result, unit_done, '0); + `FFL(held_status, unit_status, unit_done, '0); + `FFL(out_mask, inp_pipe_mask_q[NUM_INP_REGS], op_starting, '0) + + assign out_result = unit_done ? unit_result : held_result; + assign out_status = unit_done ? unit_status : held_status; // ---------------- // Output Pipeline @@ -427,50 +316,29 @@ module fpnew_divsqrt_th_32 #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = result_d; - assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = result_tag_q; - assign out_pipe_mask_q[0] = result_mask_q; - assign out_pipe_aux_q[0] = result_aux_q; - assign out_pipe_valid_q[0] = out_valid; - // Input stage: Propagate pipeline ready signal to inside pipe - assign out_ready = out_pipe_ready[0]; + assign out_pipe_result_q[0] = out_result; + assign out_pipe_status_q[0] = out_status; + assign out_pipe_mask_q[0] = out_mask; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); + endmodule diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv index 508d076c..8f1d38b0 100644 --- a/src/fpnew_divsqrt_th_64_multi.sv +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -13,7 +13,7 @@ // Authors: Stefan Mach // Roman Marquart - +// Maurus Item `include "common_cells/registers.svh" @@ -22,8 +22,6 @@ module fpnew_divsqrt_th_64_multi #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -36,25 +34,17 @@ module fpnew_divsqrt_th_64_multi #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input fpnew_pkg::fp_format_e dst_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic flush_i, + input logic[NumPipeRegs-1:0] reg_enable_i, + input logic fsm_start_i, + output logic fsm_ready_o ); // ---------- @@ -80,58 +70,39 @@ module fpnew_divsqrt_th_64_multi #( fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::operation_e op_q; fpnew_pkg::fp_format_e dst_fmt_q; - logic in_valid_q; // Input pipeline signals, index i holds signal after i register stages logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q /*verilator split_var */; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to upstream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; assign op_q = inp_pipe_op_q[NUM_INP_REGS]; assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; - assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; // ----------------- // Input processing @@ -170,108 +141,23 @@ module fpnew_divsqrt_th_64_multi #( $fatal(1, "DivSqrt THMULTI: Unsupported WIDTH (the supported width are 64, 32, 16)"); end - // ------------ - // Control FSM - // ------------ - - logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done; // status signals from unit instance - logic op_starting; // high in the cycle a new operation starts - logic out_valid, out_ready; // output handshake with downstream - logic unit_busy; // valid data in flight // FSM states typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; fsm_state_e state_q, state_d; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; - assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; - assign op_starting = div_valid | sqrt_valid; - - // Hold additional information while the operation is in progress - - TagType result_tag_q; - logic result_mask_q; - AuxType result_aux_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - - assign inp_pipe_ready[NUM_INP_REGS] = in_ready; - - // FSM to safely apply and receive data from DIVSQRT unit - always_comb begin : flag_fsm - // Default assignments - in_ready = 1'b0; - out_valid = 1'b0; - unit_busy = 1'b0; - state_d = state_q; - - unique case (state_q) - // Waiting for work - IDLE: begin - in_ready = 1'b1; // we're ready - if (in_valid_q && unit_ready) begin // New work arrives - state_d = BUSY; // go into processing state - end - end - // Operation in progress - BUSY: begin - unit_busy = 1'b1; // data in flight - // If all the lanes are done with processing - if (unit_done) begin - out_valid = 1'b1; // try to commit result downstream - // If downstream accepts our result - if (out_ready) begin - state_d = IDLE; // we anticipate going back to idling.. - in_ready = 1'b1; // we acknowledge the instruction - if (in_valid_q && unit_ready) begin // ..unless new work comes in - state_d = BUSY; // and stay busy with it - end - // Otherwise if downstream is not ready for the result - end else begin - state_d = HOLD; // wait for the pipeline to take the data - end - end - end - // Waiting with valid result for downstream - HOLD: begin - unit_busy = 1'b1; // data in flight - out_valid = 1'b1; // try to commit result downstream - // If the result is accepted by downstream - if (out_ready) begin - state_d = IDLE; // go back to idle.. - if (in_valid_q && unit_ready) begin // ..unless new work comes in - in_ready = 1'b1; // acknowledge the new transaction - state_d = BUSY; // will be busy with the next instruction - end - end - end - // fall into idle state otherwise - default: state_d = IDLE; - endcase - - // Flushing overrides the other actions - if (flush_i) begin - unit_busy = 1'b0; // data is invalidated - out_valid = 1'b0; // cancel any valid data - state_d = IDLE; // go to default state - end - end - - // FSM status register (asynch active low reset) - `FF(state_q, state_d, IDLE) + assign div_valid = (op_q == fpnew_pkg::DIV) & fsm_start_i; + assign sqrt_valid = (op_q != fpnew_pkg::DIV) & fsm_start_i; // ----------------- // DIVSQRT instance // ----------------- - logic [63:0] unit_result, held_result_q; - fpnew_pkg::status_t unit_status, held_status_q; - logic hold_en; + logic unit_done; // Unit output is valid and should be saved + + logic [63:0] unit_result; + fpnew_pkg::status_t unit_status; logic vfdsu_dp_fdiv_busy; @@ -284,11 +170,11 @@ module fpnew_divsqrt_th_64_multi #( logic [63:0] srcf0, srcf1; // Save operands in regs, C910 saves all the following information in its regs in the next cycle. - `FFL(rm_q, rnd_mode_q, op_starting, fpnew_pkg::RNE) - `FFL(divsqrt_fmt_q, divsqrt_fmt, op_starting, '0) - `FFL(divsqrt_op_q, op_q, op_starting, fpnew_pkg::DIV) - `FFL(srcf0_q, operands_q[0], op_starting, '0) - `FFL(srcf1_q, operands_q[1], op_starting, '0) + `FFL(rm_q, rnd_mode_q, fsm_start_i, fpnew_pkg::RNE) + `FFL(divsqrt_fmt_q, divsqrt_fmt, fsm_start_i, '0) + `FFL(divsqrt_op_q, op_q, fsm_start_i, fpnew_pkg::DIV) + `FFL(srcf0_q, operands_q[0], fsm_start_i, '0) + `FFL(srcf1_q, operands_q[1], fsm_start_i, '0) // NaN-box inputs with max WIDTH if(WIDTH == 64) begin : gen_fmt_64_bits @@ -349,7 +235,7 @@ module fpnew_divsqrt_th_64_multi #( // Select func 1 cycle after div issue logic func_sel; - `FFLARNC(func_sel, 1'b1, op_starting, func_sel, 1'b0, clk_i, rst_ni) + `FFLARNC(func_sel, 1'b1, fsm_start_i, func_sel, 1'b0, clk_i, rst_ni) // Select operands 2 cycles after div issue logic op_sel; @@ -367,7 +253,7 @@ module fpnew_divsqrt_th_64_multi #( .dp_vfdsu_ex1_pipex_srcf0 ( srcf0 ), // Input for operand 0 .dp_vfdsu_ex1_pipex_srcf1 ( srcf1 ), // Input for operand 1 .dp_vfdsu_fdiv_gateclk_issue ( 1'b1 ), // Local clock enable (same as above) - .dp_vfdsu_idu_fdiv_issue ( op_starting ), // 1. Issue fdiv (FSM in ctrl) + .dp_vfdsu_idu_fdiv_issue ( fsm_start_i ), // 1. Issue fdiv (FSM in ctrl) .forever_cpuclk ( clk_i ), // Clock input .idu_vfpu_rf_pipex_func ( {3'b0, divsqrt_fmt_q, 11'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0) .idu_vfpu_rf_pipex_gateclk_sel ( func_sel ), // 2. Select func @@ -387,7 +273,21 @@ module fpnew_divsqrt_th_64_multi #( .vfdsu_ifu_debug_pipe_busy ( ) // Debug output ); - assign unit_ready = !vfdsu_dp_fdiv_busy; + assign fsm_ready_o = !vfdsu_dp_fdiv_busy; + + // ---------------- + // Hold Result + // ---------------- + logic [63:0] held_result, out_result; + fpnew_pkg::status_t held_status, out_status; + logic out_mask; + + `FFL(held_result, unit_result, unit_done, '0); + `FFL(held_status, unit_status, unit_done, '0); + `FFL(out_mask, inp_pipe_mask_q[NUM_INP_REGS], fsm_start_i, '0); // Mask is stored on start -> Dont need a bypass mux + + assign out_result = unit_done ? unit_result : held_result; + assign out_status = unit_done ? unit_status : held_status; // ---------------- // Output Pipeline @@ -395,51 +295,29 @@ module fpnew_divsqrt_th_64_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = unit_result; - assign out_pipe_status_q[0] = unit_status; - assign out_pipe_tag_q[0] = result_tag_q; - assign out_pipe_mask_q[0] = result_mask_q; - assign out_pipe_aux_q[0] = result_aux_q; - assign out_pipe_valid_q[0] = out_valid; - // Input stage: Propagate pipeline ready signal to inside pipe - assign out_ready = out_pipe_ready[0]; + assign out_pipe_result_q[0] = out_result; + assign out_pipe_status_q[0] = out_status; + assign out_pipe_mask_q[0] = out_mask; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); -endmodule +endmodule From f8de0df11e6fd65f11f012d0fa5beda3e40009c8 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 13 Jun 2024 15:26:46 +0200 Subject: [PATCH 09/14] Converted multiformat slice to use new aux chain --- src/fpnew_opgroup_multifmt_slice.sv | 295 +++++++++++++--------------- 1 file changed, 138 insertions(+), 157 deletions(-) diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index fce88df0..9d33291d 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -80,12 +80,11 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 // We will send the format information along with the data localparam int unsigned FMT_BITS = fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS)); - localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags + localparam int unsigned AUX_BITS = FMT_BITS + 1; // add integer flags - logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes logic vectorial_op; logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation - logic [AUX_BITS-1:0] aux_data; + logic [AUX_BITS-1:0] in_aux, out_aux; // aux signals to pass along with the operation // additional flags for CONV logic dst_fmt_is_int, dst_is_cpk; @@ -100,12 +99,8 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used - TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used logic [NUM_LANES-1:0] lane_masks; - logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used - logic [NUM_LANES-1:0] lane_busy; // dito - logic result_is_vector; logic [FMT_BITS-1:0] result_fmt; logic result_fmt_is_int, result_is_cpk; logic [1:0] result_vec_op; // info for vectorial results (for packing) @@ -115,7 +110,6 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 // ----------- // Input Side // ----------- - assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled // Cast-and-Pack ops are encoded in operation and modifier @@ -131,7 +125,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 assign dst_fmt = dst_fmt_is_int ? int_fmt_i : dst_fmt_i; // The data sent along consists of the vectorial flag and format bits - assign aux_data = {dst_fmt_is_int, vectorial_op, dst_fmt}; + assign in_aux = {dst_fmt_is_int, dst_fmt}; assign target_aux_d = {dst_vec_op, dst_is_cpk}; // CONV passes one operand for assembly after the unit: opC for cpk, opB for others @@ -152,6 +146,74 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 end end + // --------------- + // Generate Aux Chain + // --------------- + // Signals to transmit reg enable to other modules + logic [NumPipeRegs-1:0] vector_reg_enable; + + logic [NUM_LANES-1:0] in_lane_active, out_lane_active, lane_fsm_ready, lane_fsm_start; + logic [NUM_LANES-1:0][NumPipeRegs-1:0] lane_reg_enabe; + + if (OpGroup == fpnew_pkg::DIVSQRT) begin: gen_fsm_aux + fpnew_aux_fsm #( + .NumPipeRegs( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ), + .NumLanes ( NUM_LANES ) + ) i_aux_fsm ( + .clk_i, + .rst_ni, + .tag_i, + .aux_i ( in_aux ), + .is_vector_i ( vectorial_op ), + .lane_active_i ( in_lane_active ), + .in_valid_i, + .in_ready_o, + .flush_i, + .tag_o, + .aux_o ( out_aux ), + .is_vector_o ( /* Unused */ ), + .lane_active_o ( out_lane_active ), + .out_valid_o, + .out_ready_i, + .busy_o, + .reg_enable_o ( /* Unused */ ), + .vector_reg_enable_o ( vector_reg_enable ), + .lane_reg_enable_o ( lane_reg_enabe ), + .lane_fsm_start_o ( lane_fsm_start ), + .lane_fsm_ready_i ( lane_fsm_ready ) + ); + end else begin: gen_direct_aux + fpnew_aux #( + .NumPipeRegs( NumPipeRegs ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ), + .NumLanes ( NUM_LANES ) + ) i_aux ( + .clk_i, + .rst_ni, + .tag_i, + .aux_i ( in_aux ), + .is_vector_i ( vectorial_op ), + .lane_active_i ( in_lane_active ), + .in_valid_i, + .in_ready_o, + .flush_i, + .tag_o, + .aux_o ( out_aux ), + .is_vector_o ( /* Unused */ ), + .lane_active_o ( out_lane_active ), + .out_valid_o, + .out_ready_i, + .busy_o, + .reg_enable_o ( /* Unused */ ), + .vector_reg_enable_o ( vector_reg_enable ), + .lane_reg_enable_o ( lane_reg_enabe ) + ); + end + // --------------- // Generate Lanes // --------------- @@ -180,13 +242,17 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 // Generate instances only if needed, lane 0 always generated if ((lane == 0) || (EnableVectors & (!(OpGroup == fpnew_pkg::DIVSQRT && (lane >= NUM_DIVSQRT_LANES))))) begin : active_lane - logic in_valid, out_valid, out_ready; // lane-local handshake logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands logic [LANE_WIDTH-1:0] op_result; // lane-local results fpnew_pkg::status_t op_status; - assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors + // Figure out if lane is active e.g. should be used + assign in_lane_active[lane] = ( + (LANE_FORMATS[src_fmt_i] & ~is_up_cast) | + (LANE_FORMATS[dst_fmt_i] & is_up_cast) | + (OpGroup == fpnew_pkg::DIVSQRT) + ) & ((lane == 0) | vectorial_op); // Slice out the operands for this lane, upper bits are ignored in the unit always_comb begin : prepare_input @@ -223,9 +289,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 fpnew_fma_multi #( .FpFmtConfig ( LANE_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_fma_multi ( .clk_i, .rst_ni, @@ -237,31 +301,21 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .src_fmt_i, .src2_fmt_i ( op_i == fpnew_pkg::ADDS ? src_fmt_i : dst_fmt_i ), .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ) ); + end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance - if (DivSqrtSel == fpnew_pkg::TH32 && LANE_FORMATS[0] && (LANE_FORMATS[1:fpnew_pkg::NUM_FP_FORMATS-1] == '0)) begin : gen_th32_e906_divsqrt + if (DivSqrtSel == fpnew_pkg::TH32 && LANE_FORMATS[0] && (LANE_FORMATS[1:fpnew_pkg::NUM_FP_FORMATS-1] == '0)) begin : gen_th32_e906_divsqrt // The T-head-based DivSqrt unit is supported only in FP32-only configurations fpnew_divsqrt_th_32 #( .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_divsqrt_multi_th ( .clk_i, .rst_ni, @@ -269,105 +323,69 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .is_boxed_i ( is_boxed_2op ), // 2 operands .rnd_mode_i, .op_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), + .mask_i ( simd_mask_i[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ), + .fsm_start_i ( lane_fsm_start[lane] ), + .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else if(DivSqrtSel == fpnew_pkg::THMULTI) begin : gen_thmulti_c910_divsqrt fpnew_divsqrt_th_64_multi #( .FpFmtConfig ( LANE_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_divsqrt_th_64_c910 ( - .clk_i, + .clk_i, .rst_ni, .operands_i ( local_operands[1:0] ), // 2 operands .is_boxed_i ( is_boxed_2op ), // 2 operands .rnd_mode_i, .op_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .vectorial_op_i ( vectorial_op ), // synchronize only vectorial operations - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .divsqrt_done_o ( divsqrt_done[lane] ), - .simd_synch_done_i( simd_synch_done ), - .divsqrt_ready_o ( divsqrt_ready[lane] ), - .simd_synch_rdy_i ( simd_synch_rdy ), + .mask_i ( simd_mask_i[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ), + .fsm_start_i ( lane_fsm_start[lane] ), + .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else begin : gen_pulp_divsqrt fpnew_divsqrt_multi #( .FpFmtConfig ( LANE_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_divsqrt_multi ( .clk_i, .rst_ni, - .operands_i ( local_operands[1:0] ), // 2 operands - .is_boxed_i ( is_boxed_2op ), // 2 operands + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands .rnd_mode_i, .op_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .vectorial_op_i ( vectorial_op ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .divsqrt_done_o ( divsqrt_done[lane] ), - .simd_synch_done_i( simd_synch_done ), - .divsqrt_ready_o ( divsqrt_ready[lane] ), - .simd_synch_rdy_i ( simd_synch_rdy ), + .mask_i ( simd_mask_i[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ), + .fsm_start_i ( lane_fsm_start[lane] ), + .fsm_ready_o ( lane_fsm_ready[lane] ) ); end - end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance - end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance fpnew_cast_multi #( .FpFmtConfig ( LANE_FORMATS ), .IntFmtConfig ( CONV_INT_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_cast_multi ( .clk_i, .rst_ni, @@ -379,45 +397,32 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .src_fmt_i, .dst_fmt_i, .int_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ) ); end // ADD OTHER OPTIONS HERE - // Handshakes are only done if the lane is actually used - assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); - assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); + // Guard against accidentally using the wrong aux module + if (OpGroup != fpnew_pkg::DIVSQRT) begin : lane_fsm_guard + assign lane_fsm_ready[lane] = 1'b0; // Lane does not have a FSM, it can not be ready! + end // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; - assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + assign local_result = out_lane_active[lane] ? op_result: '{default: lane_ext_bit[0]}; + assign lane_status[lane] = out_lane_active[lane] ? op_status : '0; // Otherwise generate constant sign-extension end else begin : inactive_lane - assign lane_out_valid[lane] = 1'b0; // unused lane - assign lane_in_ready[lane] = 1'b0; // unused lane - assign lane_aux[lane] = 1'b0; // unused lane assign lane_masks[lane] = 1'b1; // unused lane - assign lane_tags[lane] = 1'b0; // unused lane - assign divsqrt_done[lane] = 1'b0; // unused lane - assign divsqrt_ready[lane] = 1'b0; // unused lane assign lane_ext_bit[lane] = 1'b1; // NaN-box unused lane assign local_result = {(LANE_WIDTH){lane_ext_bit[0]}}; // sign-extend/nan box assign lane_status[lane] = '0; - assign lane_busy[lane] = 1'b0; + assign in_lane_active[lane] = 1'b0; // Lane does not exist, it can never be active + assign lane_fsm_ready[lane] = 1'b0; // Lane does not exist, it can not be ready end // Generate result packing depending on float format @@ -481,32 +486,22 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 // Bypass pipeline signals, index i holds signal after i register stages logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q; logic [0:NumPipeRegs][2:0] byp_pipe_aux_q; - logic [0:NumPipeRegs] byp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NumPipeRegs] byp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign byp_pipe_target_q[0] = conv_target_d; assign byp_pipe_aux_q[0] = target_aux_d; - assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op; + // Generate the register stages for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline - // Internal register enable for this stage - logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i]; + // Internal register enable for this stage + logic reg_ena; + // Enable register is set externally + assign reg_ena = vector_reg_enable[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector; + // Output stage: assign module outputs assign conv_target_q = byp_pipe_target_q[NumPipeRegs]; @@ -517,30 +512,16 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 assign conv_target_q = '0; end - if (DivSqrtSel != fpnew_pkg::TH32) begin - // Synch lanes if there is more than one - assign simd_synch_rdy = EnableVectors ? &divsqrt_ready[NUM_DIVSQRT_LANES-1:0] : divsqrt_ready[0]; - assign simd_synch_done = EnableVectors ? &divsqrt_done[NUM_DIVSQRT_LANES-1:0] : divsqrt_done[0]; - end else begin - // Unused (TH32 divider only supported for scalar FP32 divsqrt) - assign simd_synch_rdy = '0; - assign simd_synch_done = '0; - end - // ------------ // Output Side // ------------ - assign {result_fmt_is_int, result_is_vector, result_fmt} = lane_aux[0]; + assign {result_fmt_is_int, result_fmt} = out_aux; assign result_o = result_fmt_is_int ? ifmt_slice_result[result_fmt] : fmt_slice_result[result_fmt]; assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones - assign tag_o = lane_tags[0]; // don't care about upper ones - assign busy_o = (| lane_busy); - - assign out_valid_o = lane_out_valid[0]; // don't care about upper ones // Collapse the status always_comb begin : output_processing From 85e83c705e5a8bc200eef61826c7cc4f1a5544e2 Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 13 Jun 2024 15:23:58 +0200 Subject: [PATCH 10/14] Converted fmt slice to new aux chain --- src/fpnew_opgroup_fmt_slice.sv | 159 +++++++++++++-------------------- 1 file changed, 63 insertions(+), 96 deletions(-) diff --git a/src/fpnew_opgroup_fmt_slice.sv b/src/fpnew_opgroup_fmt_slice.sv index 35fbe484..b7b5310c 100644 --- a/src/fpnew_opgroup_fmt_slice.sv +++ b/src/fpnew_opgroup_fmt_slice.sv @@ -58,9 +58,7 @@ module fpnew_opgroup_fmt_slice #( localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat); localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES); - - logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes - logic vectorial_op; + logic vectorial_op, cmp_op; logic [NUM_LANES*FP_WIDTH-1:0] slice_result; logic [Width-1:0] slice_regular_result, slice_class_result, slice_vec_class_result; @@ -68,18 +66,50 @@ module fpnew_opgroup_fmt_slice #( fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask; - TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used logic [NUM_LANES-1:0] lane_masks; - logic [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito + logic [NUM_LANES-1:0] lane_is_class; // only the first one is actually used - logic result_is_vector, result_is_class; + logic result_is_vector, result_is_class, result_is_cmp; // ----------- // Input Side // ----------- - assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled + // --------------- + // Generate Aux Chain + // --------------- + // Signals to transmit reg enable to other modules + logic [NUM_LANES-1:0] in_lane_active, out_lane_active; + logic [NUM_LANES-1:0][NumPipeRegs-1:0] lane_reg_enable; + + fpnew_aux #( + .NumPipeRegs( NumPipeRegs ), + .TagType ( TagType ), + .AuxType ( logic ), + .NumLanes ( NUM_LANES ) + ) i_aux ( + .clk_i, + .rst_ni, + .tag_i, + .aux_i ( cmp_op ), + .is_vector_i ( vectorial_op ), + .lane_active_i ( in_lane_active ), + .in_valid_i, + .in_ready_o, + .flush_i, + .tag_o, + .aux_o ( result_is_cmp ), + .is_vector_o ( result_is_vector ), + .lane_active_o ( out_lane_active ), + .out_valid_o, + .out_ready_i, + .busy_o, + .reg_enable_o ( /* Unused */ ), + .vector_reg_enable_o ( /* Unused */ ), + .lane_reg_enable_o ( lane_reg_enable ) + ); + // --------------- // Generate Lanes // --------------- @@ -89,13 +119,13 @@ module fpnew_opgroup_fmt_slice #( // Generate instances only if needed, lane 0 always generated if ((lane == 0) || EnableVectors) begin : active_lane - logic in_valid, out_valid, out_ready; // lane-local handshake logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands logic [FP_WIDTH-1:0] op_result; // lane-local results fpnew_pkg::status_t op_status; - assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors + assign in_lane_active[lane] = (lane == 0) | vectorial_op; // upper lanes only for vectors + // Slice out the operands for this lane always_comb begin : prepare_input for (int i = 0; i < int'(NUM_OPERANDS); i++) begin @@ -106,11 +136,9 @@ module fpnew_opgroup_fmt_slice #( // Instantiate the operation from the selected opgroup if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance fpnew_fma #( - .FpFormat ( FpFormat ), - .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic ) + .FpFormat ( FpFormat ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ) ) i_fma ( .clk_i, .rst_ni, @@ -119,61 +147,20 @@ module fpnew_opgroup_fmt_slice #( .rnd_mode_i, .op_i, .op_mod_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( vectorial_op ), // Remember whether operation was vectorial - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_vectorial[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable[lane] ) ); assign lane_is_class[lane] = 1'b0; assign lane_class_mask[lane] = fpnew_pkg::NEGINF; - end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance - // fpnew_divsqrt #( - // .FpFormat (FpFormat), - // .NumPipeRegs(NumPipeRegs), - // .PipeConfig (PipeConfig), - // .TagType (TagType), - // .AuxType (logic) - // ) i_divsqrt ( - // .clk_i, - // .rst_ni, - // .operands_i ( local_operands ), - // .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), - // .rnd_mode_i, - // .op_i, - // .op_mod_i, - // .tag_i, - // .aux_i ( vectorial_op ), // Remember whether operation was vectorial - // .in_valid_i ( in_valid ), - // .in_ready_o ( lane_in_ready[lane] ), - // .flush_i, - // .result_o ( op_result ), - // .status_o ( op_status ), - // .extension_bit_o ( lane_ext_bit[lane] ), - // .tag_o ( lane_tags[lane] ), - // .aux_o ( lane_vectorial[lane] ), - // .out_valid_o ( out_valid ), - // .out_ready_i ( out_ready ), - // .busy_o ( lane_busy[lane] ) - // ); - // assign lane_is_class[lane] = 1'b0; end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance fpnew_noncomp #( - .FpFormat (FpFormat), - .NumPipeRegs(NumPipeRegs), - .PipeConfig (PipeConfig), - .TagType (TagType), - .AuxType (logic) + .FpFormat ( FpFormat ), + .NumPipeRegs( NumPipeRegs ), + .PipeConfig ( PipeConfig ) ) i_noncomp ( .clk_i, .rst_ni, @@ -182,42 +169,27 @@ module fpnew_opgroup_fmt_slice #( .rnd_mode_i, .op_i, .op_mod_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( vectorial_op ), // Remember whether operation was vectorial - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .class_mask_o ( lane_class_mask[lane] ), - .is_class_o ( lane_is_class[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_vectorial[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .class_mask_o ( lane_class_mask[lane] ), + .is_class_o ( lane_is_class[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable[lane] ) ); end // ADD OTHER OPTIONS HERE - // Handshakes are only done if the lane is actually used - assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); - assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); - // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; - assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + assign local_result = out_lane_active[lane] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = out_lane_active[lane] ? op_status : '0; // Otherwise generate constant sign-extension end else begin - assign lane_out_valid[lane] = 1'b0; // unused lane - assign lane_in_ready[lane] = 1'b0; // unused lane assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box assign lane_status[lane] = '0; - assign lane_busy[lane] = 1'b0; assign lane_is_class[lane] = 1'b0; + assign in_lane_active[lane] = 1'b0; // Lane does not exist, it can never be active end // Insert lane result into slice result @@ -253,7 +225,6 @@ module fpnew_opgroup_fmt_slice #( // ------------ // Output Side // ------------ - assign result_is_vector = lane_vectorial[0]; assign result_is_class = lane_is_class[0]; assign slice_regular_result = $signed({extension_bit_o, slice_result}); @@ -274,11 +245,7 @@ module fpnew_opgroup_fmt_slice #( // Select the proper result assign result_o = result_is_class ? slice_class_result : slice_regular_result; - assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused - assign tag_o = lane_tags[0]; // upper lanes unused - assign busy_o = (| lane_busy); - assign out_valid_o = lane_out_valid[0]; // upper lanes unused - + assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused // Collapse the lane status always_comb begin : output_processing From 2877f217d4ab8fc6b5a7992e92fa771d7decea0a Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Fri, 21 Jun 2024 16:57:46 +0200 Subject: [PATCH 11/14] Improved synchronization in case of faults in division and prevented potential cases where a bitflip causes a stall due to activating a division lane that does not exist. --- src/fpnew_aux_fsm.sv | 2 +- src/fpnew_opgroup_multifmt_slice.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fpnew_aux_fsm.sv b/src/fpnew_aux_fsm.sv index 4b1c6013..7ab7763f 100644 --- a/src/fpnew_aux_fsm.sv +++ b/src/fpnew_aux_fsm.sv @@ -154,7 +154,7 @@ module fpnew_aux_fsm #( assign in_ready[NUM_INP_REGS] = fsm_in_ready; // Done when all active lanes are done - assign fsm_ready = &(lane_fsm_ready_i | ~held_lane_active); + assign fsm_ready = &lane_fsm_ready_i; // FSM to safely apply and receive data from DIVSQRT unit always_comb begin : flag_fsm diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index 9d33291d..549ce4d6 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -422,7 +422,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 assign local_result = {(LANE_WIDTH){lane_ext_bit[0]}}; // sign-extend/nan box assign lane_status[lane] = '0; assign in_lane_active[lane] = 1'b0; // Lane does not exist, it can never be active - assign lane_fsm_ready[lane] = 1'b0; // Lane does not exist, it can not be ready + assign lane_fsm_ready[lane] = 1'b1; // Lane does not exist, it is always ready just in case erronous data gets to the FSM in this slot end // Generate result packing depending on float format From f5e0339337e49b2a786c74048bee19c0eb3cd21a Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Thu, 4 Jul 2024 10:02:22 +0200 Subject: [PATCH 12/14] Re-implemented external reg-enable. --- src/fpnew_aux.sv | 4 ++- src/fpnew_aux_fsm.sv | 39 +++++++++++++++++++++++------ src/fpnew_divsqrt_multi.sv | 29 ++++++++++----------- src/fpnew_divsqrt_th_64_multi.sv | 3 ++- src/fpnew_opgroup_block.sv | 2 ++ src/fpnew_opgroup_fmt_slice.sv | 3 +++ src/fpnew_opgroup_multifmt_slice.sv | 9 ++++++- 7 files changed, 65 insertions(+), 24 deletions(-) diff --git a/src/fpnew_aux.sv b/src/fpnew_aux.sv index 28059db5..19c5a527 100644 --- a/src/fpnew_aux.sv +++ b/src/fpnew_aux.sv @@ -47,6 +47,8 @@ module fpnew_aux #( output logic [NumPipeRegs-1:0] reg_enable_o, output logic [NumPipeRegs-1:0] vector_reg_enable_o, output logic [NumLanes-1:0][NumPipeRegs-1:0] lane_reg_enable_o, + // External register enable override + input logic [NumPipeRegs-1:0] reg_ena_i, // Indication of valid data in flight output logic busy_o ); @@ -89,7 +91,7 @@ module fpnew_aux #( `FFLARNC(valid[i+1], valid[i], ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = ready[i] & valid[i]; + assign reg_ena = (ready[i] & valid[i]) | reg_ena_i[i]; // Drive external registers with reg enable assign reg_enable_o[i] = reg_ena; diff --git a/src/fpnew_aux_fsm.sv b/src/fpnew_aux_fsm.sv index 7ab7763f..102af9d9 100644 --- a/src/fpnew_aux_fsm.sv +++ b/src/fpnew_aux_fsm.sv @@ -52,8 +52,12 @@ module fpnew_aux_fsm #( // Signals for the Lane FSMs // Signal to start the FSM, will be asserted for one cycle output logic [NumLanes-1:0] lane_fsm_start_o, + // Signal to abort the current operation for the FSMs, will be asserted for one cycle + output logic [NumLanes-1:0] lane_fsm_kill_o, // Signal that the FSM finished it's operation, should be asserted continuously input logic [NumLanes-1:0] lane_fsm_ready_i, + // External register enable override + input logic [NumPipeRegs-1:0] reg_ena_i, // Indication of valid data in flight output logic busy_o ); @@ -110,7 +114,7 @@ module fpnew_aux_fsm #( `FFLARNC(in_valid[i+1], in_valid[i], in_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = in_ready[i] & in_valid[i]; + assign reg_ena = (in_ready[i] & in_valid[i]) | reg_ena_i[i]; // Drive external registers with reg enable assign reg_enable_o[i] = reg_ena; @@ -220,19 +224,40 @@ module fpnew_aux_fsm #( `FF(state_q, state_d, IDLE); + // Mini FSM for external reg enable. If external reg enable is set: + // 1. Kill any ongoing operations + // 2. On the next cycle start new operations + logic ext_fsm_start_d, ext_fsm_start_q; + + if (NUM_INP_REGS > 0) begin + assign ext_fsm_start_d = reg_ena_i[NUM_INP_REGS - 1]; + end else begin + assign ext_fsm_start_d = 1'b0; + end + + `FF(ext_fsm_start_q, ext_fsm_start_d, 1'b0); + + // Kill Lanes where a new input is given + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_fsm_kill_o[l] = ext_fsm_start_d && in_lane_active[NUM_INP_REGS][l]; + end + // Start Lanes when FSM starts and lane is active for (genvar l = 0; l < NumLanes; l++) begin - assign lane_fsm_start_o[l] = fsm_start && in_lane_active[NUM_INP_REGS][l]; + assign lane_fsm_start_o[l] = (fsm_start || ext_fsm_start_q) && in_lane_active[NUM_INP_REGS][l]; end // ---------------- // Data Holding FFs // ---------------- - `FFL( held_tag, in_tag[NUM_INP_REGS], fsm_start, TagType'('0)); - `FFL( held_aux, in_aux[NUM_INP_REGS], fsm_start, AuxType'('0)); - `FFL( held_is_vector, in_is_vector[NUM_INP_REGS], fsm_start, '0); - `FFL(held_lane_active, in_lane_active[NUM_INP_REGS], fsm_start, '0); + logic hold_reg_enable; + assign hold_reg_enable = fsm_start || ext_fsm_start_d; + + `FFL( held_tag, in_tag[NUM_INP_REGS], hold_reg_enable, TagType'('0)); + `FFL( held_aux, in_aux[NUM_INP_REGS], hold_reg_enable, AuxType'('0)); + `FFL( held_is_vector, in_is_vector[NUM_INP_REGS], hold_reg_enable, '0); + `FFL(held_lane_active, in_lane_active[NUM_INP_REGS], hold_reg_enable, '0); // --------------- // Output pipeline @@ -272,7 +297,7 @@ module fpnew_aux_fsm #( `FFLARNC(out_valid[i+1], out_valid[i], out_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_ready[i] & out_valid[i]; + assign reg_ena = (out_ready[i] & out_valid[i]) | reg_ena_i[NUM_INP_REGS + i];; // Drive external registers with reg enable assign reg_enable_o[NUM_INP_REGS + i] = reg_ena; diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index 71dfe5b7..fb966ff5 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -42,6 +42,7 @@ module fpnew_divsqrt_multi #( input logic flush_i, input logic[NumPipeRegs-1:0] reg_enable_i, input logic fsm_start_i, + input logic fsm_kill_i, output logic fsm_ready_o ); @@ -142,20 +143,20 @@ module fpnew_divsqrt_multi #( fpnew_pkg::status_t unit_status; div_sqrt_top_mvp i_divsqrt_lei ( - .Clk_CI ( clk_i ), - .Rst_RBI ( rst_ni ), - .Div_start_SI ( div_valid ), - .Sqrt_start_SI ( sqrt_valid ), - .Operand_a_DI ( divsqrt_operands[0] ), - .Operand_b_DI ( divsqrt_operands[1] ), - .RM_SI ( rnd_mode_q ), - .Precision_ctl_SI ( '0 ), - .Format_sel_SI ( divsqrt_fmt ), - .Kill_SI ( flush_i ), - .Result_DO ( raw_unit_result ), - .Fflags_SO ( unit_status ), - .Ready_SO ( fsm_ready_o ), - .Done_SO ( unit_done ) + .Clk_CI ( clk_i ), + .Rst_RBI ( rst_ni ), + .Div_start_SI ( div_valid ), + .Sqrt_start_SI ( sqrt_valid ), + .Operand_a_DI ( divsqrt_operands[0] ), + .Operand_b_DI ( divsqrt_operands[1] ), + .RM_SI ( rnd_mode_q ), + .Precision_ctl_SI ( '0 ), + .Format_sel_SI ( divsqrt_fmt ), + .Kill_SI ( flush_i | fsm_kill_i ), + .Result_DO ( raw_unit_result ), + .Fflags_SO ( unit_status ), + .Ready_SO ( fsm_ready_o ), + .Done_SO ( unit_done ) ); // Adjust result width and fix FP8 diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv index 8f1d38b0..e08884c6 100644 --- a/src/fpnew_divsqrt_th_64_multi.sv +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -44,6 +44,7 @@ module fpnew_divsqrt_th_64_multi #( input logic flush_i, input logic[NumPipeRegs-1:0] reg_enable_i, input logic fsm_start_i, + input logic fsm_kill_i, output logic fsm_ready_o ); @@ -258,7 +259,7 @@ module fpnew_divsqrt_th_64_multi #( .idu_vfpu_rf_pipex_func ( {3'b0, divsqrt_fmt_q, 11'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0) .idu_vfpu_rf_pipex_gateclk_sel ( func_sel ), // 2. Select func .pad_yy_icg_scan_en ( 1'b0 ), // SE signal for the redundant clock gating module - .rtu_yy_xx_flush ( flush_i ), // Flush + .rtu_yy_xx_flush ( flush_i | fsm_kill_i ), // Flush .vfpu_yy_xx_dqnan ( 1'b0 ), // Disable qNaN, set to 1 if sNaN is used .vfpu_yy_xx_rm ( rm_q ), // Round mode. redundant if imm0 set to the same .pipex_dp_vfdsu_ereg ( ), // Don't care, used by C910 diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index 998449cb..e7949ef5 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -132,6 +132,7 @@ module fpnew_opgroup_block #( .tag_o ( fmt_outputs[fmt].tag ), .out_valid_o ( fmt_out_valid[fmt] ), .out_ready_i ( fmt_out_ready[fmt] ), + .reg_ena_i ( '0 ), .busy_o ( fmt_busy[fmt] ) ); // If the format wants to use merged ops, tie off the dangling ones not used here @@ -207,6 +208,7 @@ module fpnew_opgroup_block #( .tag_o ( fmt_outputs[FMT].tag ), .out_valid_o ( fmt_out_valid[FMT] ), .out_ready_i ( fmt_out_ready[FMT] ), + .reg_ena_i ( '0 ), .busy_o ( fmt_busy[FMT] ) ); diff --git a/src/fpnew_opgroup_fmt_slice.sv b/src/fpnew_opgroup_fmt_slice.sv index b7b5310c..ea2a15e0 100644 --- a/src/fpnew_opgroup_fmt_slice.sv +++ b/src/fpnew_opgroup_fmt_slice.sv @@ -51,6 +51,8 @@ module fpnew_opgroup_fmt_slice #( // Output handshake output logic out_valid_o, input logic out_ready_i, + // External register enable override + input logic [NumPipeRegs-1:0] reg_ena_i, // Indication of valid data in flight output logic busy_o ); @@ -104,6 +106,7 @@ module fpnew_opgroup_fmt_slice #( .lane_active_o ( out_lane_active ), .out_valid_o, .out_ready_i, + .reg_ena_i, .busy_o, .reg_enable_o ( /* Unused */ ), .vector_reg_enable_o ( /* Unused */ ), diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index 549ce4d6..3c85bf23 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -58,6 +58,8 @@ module fpnew_opgroup_multifmt_slice #( // Output handshake output logic out_valid_o, input logic out_ready_i, + // External register enable override + input logic [NumPipeRegs-1:0] reg_ena_i, // Indication of valid data in flight output logic busy_o ); @@ -152,7 +154,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 // Signals to transmit reg enable to other modules logic [NumPipeRegs-1:0] vector_reg_enable; - logic [NUM_LANES-1:0] in_lane_active, out_lane_active, lane_fsm_ready, lane_fsm_start; + logic [NUM_LANES-1:0] in_lane_active, out_lane_active, lane_fsm_ready, lane_fsm_start, lane_fsm_kill; logic [NUM_LANES-1:0][NumPipeRegs-1:0] lane_reg_enabe; if (OpGroup == fpnew_pkg::DIVSQRT) begin: gen_fsm_aux @@ -178,11 +180,13 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .lane_active_o ( out_lane_active ), .out_valid_o, .out_ready_i, + .reg_ena_i, .busy_o, .reg_enable_o ( /* Unused */ ), .vector_reg_enable_o ( vector_reg_enable ), .lane_reg_enable_o ( lane_reg_enabe ), .lane_fsm_start_o ( lane_fsm_start ), + .lane_fsm_kill_o ( lane_fsm_kill ), .lane_fsm_ready_i ( lane_fsm_ready ) ); end else begin: gen_direct_aux @@ -207,6 +211,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .lane_active_o ( out_lane_active ), .out_valid_o, .out_ready_i, + .reg_ena_i, .busy_o, .reg_enable_o ( /* Unused */ ), .vector_reg_enable_o ( vector_reg_enable ), @@ -354,6 +359,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .mask_o ( lane_masks[lane] ), .reg_enable_i ( lane_reg_enabe[lane] ), .fsm_start_i ( lane_fsm_start[lane] ), + .fsm_kill_i ( lane_fsm_kill[lane] ), .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else begin : gen_pulp_divsqrt @@ -377,6 +383,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .mask_o ( lane_masks[lane] ), .reg_enable_i ( lane_reg_enabe[lane] ), .fsm_start_i ( lane_fsm_start[lane] ), + .fsm_kill_i ( lane_fsm_kill[lane] ), .fsm_ready_o ( lane_fsm_ready[lane] ) ); end From fcd9a93876f3ff1881b807576f9756733602860d Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Tue, 16 Jul 2024 14:52:07 +0200 Subject: [PATCH 13/14] Removed FSM Enum and signals that are no longer used. --- src/fpnew_divsqrt_th_64_multi.sv | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv index e08884c6..f28ee321 100644 --- a/src/fpnew_divsqrt_th_64_multi.sv +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -144,10 +144,6 @@ module fpnew_divsqrt_th_64_multi #( logic div_valid, sqrt_valid; // input signalling with unit - // FSM states - typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; - fsm_state_e state_q, state_d; - // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. assign div_valid = (op_q == fpnew_pkg::DIV) & fsm_start_i; assign sqrt_valid = (op_q != fpnew_pkg::DIV) & fsm_start_i; From e7e0882681b09a055e43fd6a85c8a5cc7e80ab5d Mon Sep 17 00:00:00 2001 From: Maurus Item Date: Wed, 17 Jul 2024 21:58:01 +0200 Subject: [PATCH 14/14] Made aux chain modules lane-agnostic. --- src/fpnew_aux.sv | 34 +------ src/fpnew_aux_fsm.sv | 79 +++------------ src/fpnew_opgroup_fmt_slice.sv | 51 +++++----- src/fpnew_opgroup_multifmt_slice.sv | 150 ++++++++++++++++++---------- 4 files changed, 140 insertions(+), 174 deletions(-) diff --git a/src/fpnew_aux.sv b/src/fpnew_aux.sv index 19c5a527..adf0ef9c 100644 --- a/src/fpnew_aux.sv +++ b/src/fpnew_aux.sv @@ -21,16 +21,13 @@ module fpnew_aux #( parameter int unsigned NumPipeRegs = 0, parameter type TagType = logic, - parameter type AuxType = logic, - parameter int unsigned NumLanes = 1 + parameter type AuxType = logic ) ( input logic clk_i, input logic rst_ni, // Input signals input TagType tag_i, input AuxType aux_i, - input logic is_vector_i, - input logic [NumLanes-1:0] lane_active_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, @@ -38,15 +35,11 @@ module fpnew_aux #( // Output signals output TagType tag_o, output AuxType aux_o, - output logic is_vector_o, - output logic [NumLanes-1:0] lane_active_o, // Output handshake output logic out_valid_o, input logic out_ready_i, // Register Enable for Lanes output logic [NumPipeRegs-1:0] reg_enable_o, - output logic [NumPipeRegs-1:0] vector_reg_enable_o, - output logic [NumLanes-1:0][NumPipeRegs-1:0] lane_reg_enable_o, // External register enable override input logic [NumPipeRegs-1:0] reg_ena_i, // Indication of valid data in flight @@ -60,8 +53,6 @@ module fpnew_aux #( // Input pipeline signals, index i holds signal after i register stages TagType [0:NumPipeRegs] tag; AuxType [0:NumPipeRegs] aux; - logic [0:NumPipeRegs] is_vector; - logic [0:NumPipeRegs][NumLanes-1:0] lane_active; logic [0:NumPipeRegs] valid; // Ready signal is combinatorial for all stages @@ -70,9 +61,7 @@ module fpnew_aux #( // First element of pipeline is taken from inputs assign tag [0] = tag_i; assign aux [0] = aux_i; - assign is_vector [0] = is_vector_i; assign valid [0] = in_valid_i; - assign lane_active[0] = lane_active_i; // Propagate pipeline ready signal to upstream circuitry assign in_ready_o = ready[0]; @@ -90,23 +79,12 @@ module fpnew_aux #( // Valid: enabled by ready signal, synchronous clear with the flush signal `FFLARNC(valid[i+1], valid[i], ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (ready[i] & valid[i]) | reg_ena_i[i]; - - // Drive external registers with reg enable - assign reg_enable_o[i] = reg_ena; - - // Drive external vector registers with reg enable if operation is a vector - assign vector_reg_enable_o[i] = reg_ena & is_vector[i]; - for (genvar l = 0; l < NumLanes; l++) begin - assign lane_reg_enable_o[l][i] = reg_ena & lane_active[i][l]; - end + // Enable register if pipeline ready and a valid data item is present + assign reg_enable_o[i] = ready[i] & valid[i] | reg_ena_i[i]; // Generate the pipeline registers within the stages, use enable-registers - `FFL( tag[i+1], tag[i], reg_ena, TagType'('0)) - `FFL( aux[i+1], aux[i], reg_ena, AuxType'('0)) - `FFL( is_vector[i+1], is_vector[i], reg_ena, '0 ) - `FFL(lane_active[i+1], lane_active[i], reg_ena, '0 ) + `FFL( tag[i+1], tag[i], reg_enable_o[i], TagType'('0)) + `FFL( aux[i+1], aux[i], reg_enable_o[i], AuxType'('0)) end // Ready travels backwards from output side, driven by downstream circuitry @@ -115,9 +93,7 @@ module fpnew_aux #( // Assign module outputs assign tag_o = tag [NumPipeRegs]; assign aux_o = aux [NumPipeRegs]; - assign is_vector_o = is_vector [NumPipeRegs]; assign out_valid_o = valid [NumPipeRegs]; - assign lane_active_o = lane_active[NumPipeRegs]; // Assign output Flags: Busy if any element inside the pipe is valid assign busy_o = |valid; diff --git a/src/fpnew_aux_fsm.sv b/src/fpnew_aux_fsm.sv index 102af9d9..3d2c2ee7 100644 --- a/src/fpnew_aux_fsm.sv +++ b/src/fpnew_aux_fsm.sv @@ -23,16 +23,13 @@ module fpnew_aux_fsm #( parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, parameter type TagType = logic, - parameter type AuxType = logic, - parameter int unsigned NumLanes = 1 + parameter type AuxType = logic ) ( input logic clk_i, input logic rst_ni, // Input signals input TagType tag_i, input AuxType aux_i, - input logic is_vector_i, - input logic [NumLanes-1:0] lane_active_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, @@ -40,22 +37,16 @@ module fpnew_aux_fsm #( // Output signals output TagType tag_o, output AuxType aux_o, - output logic is_vector_o, - output logic [NumLanes-1:0] lane_active_o, // Output handshake output logic out_valid_o, input logic out_ready_i, // Register Enable for Lanes output logic [NumPipeRegs-1:0] reg_enable_o, - output logic [NumPipeRegs-1:0] vector_reg_enable_o, - output logic [NumLanes-1:0][NumPipeRegs-1:0] lane_reg_enable_o, // Signals for the Lane FSMs // Signal to start the FSM, will be asserted for one cycle - output logic [NumLanes-1:0] lane_fsm_start_o, - // Signal to abort the current operation for the FSMs, will be asserted for one cycle - output logic [NumLanes-1:0] lane_fsm_kill_o, - // Signal that the FSM finished it's operation, should be asserted continuously - input logic [NumLanes-1:0] lane_fsm_ready_i, + output logic fsm_start_o, + output logic fsm_kill_o, + input logic fsm_ready_i, // External register enable override input logic [NumPipeRegs-1:0] reg_ena_i, // Indication of valid data in flight @@ -83,8 +74,6 @@ module fpnew_aux_fsm #( // Input pipeline signals, index i holds signal after i register stages TagType [0:NUM_INP_REGS] in_tag; AuxType [0:NUM_INP_REGS] in_aux; - logic [0:NUM_INP_REGS] in_is_vector; - logic [0:NUM_INP_REGS][NumLanes-1:0] in_lane_active; logic [0:NUM_INP_REGS] in_valid; // Ready signal is combinatorial for all stages @@ -93,16 +82,14 @@ module fpnew_aux_fsm #( // First element of pipeline is taken from inputs assign in_tag [0] = tag_i; assign in_aux [0] = aux_i; - assign in_is_vector [0] = is_vector_i; assign in_valid [0] = in_valid_i; - assign in_lane_active[0] = lane_active_i; // Propagate pipeline ready signal to upstream circuitry assign in_ready_o = in_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline - + // Internal register enable for this stage logic reg_ena; // Determine the ready signal of the current stage - advance the pipeline: @@ -114,22 +101,11 @@ module fpnew_aux_fsm #( `FFLARNC(in_valid[i+1], in_valid[i], in_ready[i], flush_i, 1'b0, clk_i, rst_ni) // Enable register if pipleine ready and a valid data item is present - assign reg_ena = (in_ready[i] & in_valid[i]) | reg_ena_i[i]; - - // Drive external registers with reg enable - assign reg_enable_o[i] = reg_ena; - - // Drive external vector registers with reg enable if operation is a vector - assign vector_reg_enable_o[i] = reg_ena & in_is_vector[i]; - for (genvar l = 0; l < NumLanes; l++) begin - assign lane_reg_enable_o[l][i] = reg_ena & in_lane_active[i][l]; - end + assign reg_enable_o[i] = in_ready[i] & in_valid[i] | reg_ena_i[i]; // Generate the pipeline registers within the stages, use enable-registers - `FFL( in_tag[i+1], in_tag[i], reg_ena, TagType'('0)) - `FFL( in_aux[i+1], in_aux[i], reg_ena, AuxType'('0)) - `FFL( in_is_vector[i+1], in_is_vector[i], reg_ena, '0 ) - `FFL(in_lane_active[i+1], in_lane_active[i], reg_ena, '0 ) + `FFL( in_tag[i+1], in_tag[i], reg_enable_o[i], TagType'('0)) + `FFL( in_aux[i+1], in_aux[i], reg_enable_o[i], AuxType'('0)) end // ---------- @@ -144,28 +120,22 @@ module fpnew_aux_fsm #( logic fsm_in_valid, fsm_in_ready; logic fsm_out_valid, fsm_out_ready; - // Synchronisazion signals - logic fsm_start, fsm_ready, fsm_busy; + logic fsm_start, fsm_busy; // Data holding signals TagType held_tag; AuxType held_aux; - logic held_is_vector; - logic [NumLanes-1:0] held_lane_active; // Upstream Handshake Connection assign fsm_in_valid = in_valid[NUM_INP_REGS]; assign in_ready[NUM_INP_REGS] = fsm_in_ready; - // Done when all active lanes are done - assign fsm_ready = &lane_fsm_ready_i; - // FSM to safely apply and receive data from DIVSQRT unit always_comb begin : flag_fsm // Default assignments fsm_out_valid = 1'b0; fsm_in_ready = 1'b0; - fsm_start = 1'b0; + fsm_start = 1'b0; fsm_busy = 1'b0; state_d = state_q; @@ -180,7 +150,7 @@ module fpnew_aux_fsm #( BUSY: begin fsm_busy = 1'b1; // If all active lanes are done send data down chain - if (fsm_ready) begin + if (fsm_ready_i) begin fsm_out_valid = 1'b1; if (fsm_out_ready) begin fsm_in_ready = 1'b1; @@ -237,15 +207,8 @@ module fpnew_aux_fsm #( `FF(ext_fsm_start_q, ext_fsm_start_d, 1'b0); - // Kill Lanes where a new input is given - for (genvar l = 0; l < NumLanes; l++) begin - assign lane_fsm_kill_o[l] = ext_fsm_start_d && in_lane_active[NUM_INP_REGS][l]; - end - - // Start Lanes when FSM starts and lane is active - for (genvar l = 0; l < NumLanes; l++) begin - assign lane_fsm_start_o[l] = (fsm_start || ext_fsm_start_q) && in_lane_active[NUM_INP_REGS][l]; - end + assign fsm_kill_o = ext_fsm_start_d; + assign fsm_start_o = (fsm_start || ext_fsm_start_q); // ---------------- // Data Holding FFs @@ -256,8 +219,6 @@ module fpnew_aux_fsm #( `FFL( held_tag, in_tag[NUM_INP_REGS], hold_reg_enable, TagType'('0)); `FFL( held_aux, in_aux[NUM_INP_REGS], hold_reg_enable, AuxType'('0)); - `FFL( held_is_vector, in_is_vector[NUM_INP_REGS], hold_reg_enable, '0); - `FFL(held_lane_active, in_lane_active[NUM_INP_REGS], hold_reg_enable, '0); // --------------- // Output pipeline @@ -266,8 +227,6 @@ module fpnew_aux_fsm #( // Output pipeline signals, index i holds signal after i register stages TagType [0:NUM_OUT_REGS] out_tag; AuxType [0:NUM_OUT_REGS] out_aux; - logic [0:NUM_OUT_REGS] out_is_vector; - logic [0:NUM_OUT_REGS][NumLanes-1:0] out_lane_active; logic [0:NUM_OUT_REGS] out_valid; // Ready signal is combinatorial for all stages @@ -280,8 +239,6 @@ module fpnew_aux_fsm #( // Connect to Hold Register assign out_tag [0] = held_tag; assign out_aux [0] = held_aux; - assign out_is_vector [0] = held_is_vector; - assign out_lane_active[0] = held_lane_active; // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline @@ -302,17 +259,9 @@ module fpnew_aux_fsm #( // Drive external registers with reg enable assign reg_enable_o[NUM_INP_REGS + i] = reg_ena; - // Drive external vector registers with reg enable if operation is a vector - assign vector_reg_enable_o[NUM_INP_REGS + i] = reg_ena & out_is_vector[i]; - for (genvar l = 0; l < NumLanes; l++) begin - assign lane_reg_enable_o[l][NUM_INP_REGS + i] = reg_ena & out_lane_active[i][l]; - end - // Generate the pipeline registers within the stages, use enable-registers `FFL( out_tag[i+1], out_tag[i], reg_ena, TagType'('0)) `FFL( out_aux[i+1], out_aux[i], reg_ena, AuxType'('0)) - `FFL( out_is_vector[i+1], out_is_vector[i], reg_ena, '0 ) - `FFL(out_lane_active[i+1], out_lane_active[i], reg_ena, '0 ) end // Ready travels backwards from output side, driven by downstream circuitry @@ -321,9 +270,7 @@ module fpnew_aux_fsm #( // Assign module outputs assign tag_o = out_tag [NUM_OUT_REGS]; assign aux_o = out_aux [NUM_OUT_REGS]; - assign is_vector_o = out_is_vector [NUM_OUT_REGS]; assign out_valid_o = out_valid [NUM_OUT_REGS]; - assign lane_active_o = out_lane_active[NUM_OUT_REGS]; // Assign output Flags: Busy if any element inside the pipe is valid assign busy_o = |in_valid | |out_valid | fsm_busy; diff --git a/src/fpnew_opgroup_fmt_slice.sv b/src/fpnew_opgroup_fmt_slice.sv index ea2a15e0..4ffe0ddf 100644 --- a/src/fpnew_opgroup_fmt_slice.sv +++ b/src/fpnew_opgroup_fmt_slice.sv @@ -13,6 +13,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_opgroup_fmt_slice #( parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL, parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), @@ -59,8 +61,9 @@ module fpnew_opgroup_fmt_slice #( localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat); localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES); + localparam int unsigned AUX_BITS = 2; - logic vectorial_op, cmp_op; + logic [AUX_BITS-1:0] aux_in, aux_out; logic [NUM_LANES*FP_WIDTH-1:0] slice_result; logic [Width-1:0] slice_regular_result, slice_class_result, slice_vec_class_result; @@ -71,46 +74,38 @@ module fpnew_opgroup_fmt_slice #( logic [NUM_LANES-1:0] lane_masks; logic [NUM_LANES-1:0] lane_is_class; // only the first one is actually used - logic result_is_vector, result_is_class, result_is_cmp; + logic result_is_class; // ----------- // Input Side // ----------- - assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled + assign aux_in[0] = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled + assign aux_in[1] = (op_i == fpnew_pkg::CMP); // --------------- // Generate Aux Chain // --------------- - // Signals to transmit reg enable to other modules - logic [NUM_LANES-1:0] in_lane_active, out_lane_active; - logic [NUM_LANES-1:0][NumPipeRegs-1:0] lane_reg_enable; + logic [NumPipeRegs-1:0] reg_enable; fpnew_aux #( .NumPipeRegs( NumPipeRegs ), .TagType ( TagType ), - .AuxType ( logic ), - .NumLanes ( NUM_LANES ) + .AuxType ( logic [AUX_BITS-1:0] ) ) i_aux ( .clk_i, .rst_ni, .tag_i, - .aux_i ( cmp_op ), - .is_vector_i ( vectorial_op ), - .lane_active_i ( in_lane_active ), + .aux_i ( aux_in ), .in_valid_i, .in_ready_o, .flush_i, .tag_o, - .aux_o ( result_is_cmp ), - .is_vector_o ( result_is_vector ), - .lane_active_o ( out_lane_active ), + .aux_o ( aux_out ), .out_valid_o, .out_ready_i, .reg_ena_i, .busy_o, - .reg_enable_o ( /* Unused */ ), - .vector_reg_enable_o ( /* Unused */ ), - .lane_reg_enable_o ( lane_reg_enable ) + .reg_enable_o ( reg_enable ) ); // --------------- @@ -127,7 +122,16 @@ module fpnew_opgroup_fmt_slice #( logic [FP_WIDTH-1:0] op_result; // lane-local results fpnew_pkg::status_t op_status; - assign in_lane_active[lane] = (lane == 0) | vectorial_op; // upper lanes only for vectors + // Build reg_enable for lane + logic [NumPipeRegs-1:0] lane_reg_enable; + logic [0:NumPipeRegs] lane_active; + + assign lane_active[0] = (lane == 0) | aux_in[0]; // upper lanes only for vectors + + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_enable + `FFL(lane_active[i+1], lane_active[i], reg_enable[i], '0 ) + assign lane_reg_enable[i] = lane_active[i] & reg_enable[i]; + end // Slice out the operands for this lane always_comb begin : prepare_input @@ -155,7 +159,7 @@ module fpnew_opgroup_fmt_slice #( .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enable[lane] ) + .reg_enable_i ( lane_reg_enable ) ); assign lane_is_class[lane] = 1'b0; assign lane_class_mask[lane] = fpnew_pkg::NEGINF; @@ -179,20 +183,19 @@ module fpnew_opgroup_fmt_slice #( .class_mask_o ( lane_class_mask[lane] ), .is_class_o ( lane_is_class[lane] ), .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enable[lane] ) + .reg_enable_i ( lane_reg_enable ) ); end // ADD OTHER OPTIONS HERE // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = out_lane_active[lane] ? op_result : '{default: lane_ext_bit[0]}; - assign lane_status[lane] = out_lane_active[lane] ? op_status : '0; + assign local_result = lane_active[NumPipeRegs] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = lane_active[NumPipeRegs] ? op_status : '0; // Otherwise generate constant sign-extension end else begin assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box assign lane_status[lane] = '0; assign lane_is_class[lane] = 1'b0; - assign in_lane_active[lane] = 1'b0; // Lane does not exist, it can never be active end // Insert lane result into slice result @@ -243,7 +246,7 @@ module fpnew_opgroup_fmt_slice #( // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1; - assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0]; + assign slice_class_result = aux_out[0] ? slice_vec_class_result : lane_class_mask[0]; // Select the proper result assign result_o = result_is_class ? slice_class_result : slice_regular_result; diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index 3c85bf23..7e04db89 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -107,7 +107,6 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 logic result_fmt_is_int, result_is_cpk; logic [1:0] result_vec_op; // info for vectorial results (for packing) - logic simd_synch_rdy, simd_synch_done; // ----------- // Input Side @@ -152,70 +151,57 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 // Generate Aux Chain // --------------- // Signals to transmit reg enable to other modules - logic [NumPipeRegs-1:0] vector_reg_enable; + logic [NumPipeRegs-1:0] reg_enable; - logic [NUM_LANES-1:0] in_lane_active, out_lane_active, lane_fsm_ready, lane_fsm_start, lane_fsm_kill; - logic [NUM_LANES-1:0][NumPipeRegs-1:0] lane_reg_enabe; + logic fsm_start, fsm_ready, fsm_kill; + logic [NUM_LANES-1:0] lane_fsm_ready; + assign fsm_ready = &lane_fsm_ready; if (OpGroup == fpnew_pkg::DIVSQRT) begin: gen_fsm_aux fpnew_aux_fsm #( .NumPipeRegs( NumPipeRegs ), .PipeConfig ( PipeConfig ), .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ), - .NumLanes ( NUM_LANES ) + .AuxType ( logic [AUX_BITS-1:0] ) ) i_aux_fsm ( .clk_i, .rst_ni, .tag_i, - .aux_i ( in_aux ), - .is_vector_i ( vectorial_op ), - .lane_active_i ( in_lane_active ), + .aux_i ( in_aux ), .in_valid_i, .in_ready_o, .flush_i, .tag_o, - .aux_o ( out_aux ), - .is_vector_o ( /* Unused */ ), - .lane_active_o ( out_lane_active ), + .aux_o ( out_aux ), .out_valid_o, .out_ready_i, .reg_ena_i, .busy_o, - .reg_enable_o ( /* Unused */ ), - .vector_reg_enable_o ( vector_reg_enable ), - .lane_reg_enable_o ( lane_reg_enabe ), - .lane_fsm_start_o ( lane_fsm_start ), - .lane_fsm_kill_o ( lane_fsm_kill ), - .lane_fsm_ready_i ( lane_fsm_ready ) + .reg_enable_o ( reg_enable ), + .fsm_start_o ( fsm_start ), + .fsm_kill_o ( fsm_kill ), + .fsm_ready_i ( fsm_ready ) ); end else begin: gen_direct_aux fpnew_aux #( .NumPipeRegs( NumPipeRegs ), .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ), - .NumLanes ( NUM_LANES ) + .AuxType ( logic [AUX_BITS-1:0] ) ) i_aux ( .clk_i, .rst_ni, .tag_i, - .aux_i ( in_aux ), - .is_vector_i ( vectorial_op ), - .lane_active_i ( in_lane_active ), + .aux_i ( in_aux ), .in_valid_i, .in_ready_o, .flush_i, .tag_o, - .aux_o ( out_aux ), - .is_vector_o ( /* Unused */ ), - .lane_active_o ( out_lane_active ), + .aux_o ( out_aux ), .out_valid_o, .out_ready_i, .reg_ena_i, .busy_o, - .reg_enable_o ( /* Unused */ ), - .vector_reg_enable_o ( vector_reg_enable ), - .lane_reg_enable_o ( lane_reg_enabe ) + .reg_enable_o ( reg_enable ) ); end @@ -252,13 +238,68 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 logic [LANE_WIDTH-1:0] op_result; // lane-local results fpnew_pkg::status_t op_status; + + + // Build reg_enable for lane + logic [NumPipeRegs-1:0] lane_reg_enable; + logic lane_fsm_start; + // Figure out if lane is active e.g. should be used - assign in_lane_active[lane] = ( + logic in_lane_active, out_lane_active; + + assign in_lane_active = ( (LANE_FORMATS[src_fmt_i] & ~is_up_cast) | (LANE_FORMATS[dst_fmt_i] & is_up_cast) | (OpGroup == fpnew_pkg::DIVSQRT) ) & ((lane == 0) | vectorial_op); + if (OpGroup == fpnew_pkg::DIVSQRT) begin: gen_fsm_reg_enable + // This must match between this module and modules that use this module as reg enable input! + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // Always have one reg to use for FSM Input + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + + + logic [0:NUM_INP_REGS] inp_pipe_lane_active; + logic [0:NUM_OUT_REGS] out_pipe_lane_active; + + assign inp_pipe_lane_active[0] = in_lane_active; + + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_in_pipe_enable + `FFL(inp_pipe_lane_active[i+1], inp_pipe_lane_active[i], reg_enable[i], '0 ) + assign lane_reg_enable[i] = inp_pipe_lane_active[i] & reg_enable[i]; + end + + assign lane_fsm_start = fsm_start & inp_pipe_lane_active[NUM_INP_REGS]; + `FFL(out_pipe_lane_active[0], inp_pipe_lane_active[NUM_INP_REGS], fsm_start, '0 ) + + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_out_pipe_enable + `FFL(out_pipe_lane_active[i+1], out_pipe_lane_active[i], reg_enable[i], '0 ) + assign lane_reg_enable[NUM_INP_REGS + i] = out_pipe_lane_active[i] & reg_enable[i]; + end + + assign out_lane_active = out_pipe_lane_active[NUM_OUT_REGS]; + + end else begin: gen_direct_reg_enable + logic [0:NumPipeRegs] pipe_lane_active; + + assign pipe_lane_active[0] = in_lane_active; + + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_enable + `FFL(pipe_lane_active[i+1], pipe_lane_active[i], reg_enable[i], '0 ) + assign lane_reg_enable[i] = pipe_lane_active[i] & reg_enable[i]; + end + + assign out_lane_active = pipe_lane_active[NumPipeRegs]; + end + // Slice out the operands for this lane, upper bits are ignored in the unit always_comb begin : prepare_input for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin @@ -298,7 +339,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 ) i_fpnew_fma_multi ( .clk_i, .rst_ni, - .operands_i ( local_operands ), + .operands_i ( local_operands ), .is_boxed_i, .rnd_mode_i, .op_i, @@ -306,12 +347,12 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .src_fmt_i, .src2_fmt_i ( op_i == fpnew_pkg::ADDS ? src_fmt_i : dst_fmt_i ), .dst_fmt_i, - .mask_i ( simd_mask_i[lane] ), - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enabe[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ) ); end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance @@ -334,8 +375,8 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enabe[lane] ), - .fsm_start_i ( lane_fsm_start[lane] ), + .reg_enable_i ( lane_reg_enable ), + .fsm_start_i ( lane_fsm_start ), .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else if(DivSqrtSel == fpnew_pkg::THMULTI) begin : gen_thmulti_c910_divsqrt @@ -357,9 +398,9 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enabe[lane] ), - .fsm_start_i ( lane_fsm_start[lane] ), - .fsm_kill_i ( lane_fsm_kill[lane] ), + .reg_enable_i ( lane_reg_enable ), + .fsm_start_i ( lane_fsm_start ), + .fsm_kill_i ( fsm_kill ), .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else begin : gen_pulp_divsqrt @@ -381,9 +422,9 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enabe[lane] ), - .fsm_start_i ( lane_fsm_start[lane] ), - .fsm_kill_i ( lane_fsm_kill[lane] ), + .reg_enable_i ( lane_reg_enable ), + .fsm_start_i ( lane_fsm_start ), + .fsm_kill_i ( fsm_kill ), .fsm_ready_o ( lane_fsm_ready[lane] ) ); end @@ -404,12 +445,12 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .src_fmt_i, .dst_fmt_i, .int_fmt_i, - .mask_i ( simd_mask_i[lane] ), - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .mask_o ( lane_masks[lane] ), - .reg_enable_i ( lane_reg_enabe[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ) ); end // ADD OTHER OPTIONS HERE @@ -419,8 +460,8 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 end // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = out_lane_active[lane] ? op_result: '{default: lane_ext_bit[0]}; - assign lane_status[lane] = out_lane_active[lane] ? op_status : '0; + assign local_result = out_lane_active ? op_result: '{default: lane_ext_bit[0]}; + assign lane_status[lane] = out_lane_active ? op_status : '0; // Otherwise generate constant sign-extension end else begin : inactive_lane @@ -428,7 +469,6 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 assign lane_ext_bit[lane] = 1'b1; // NaN-box unused lane assign local_result = {(LANE_WIDTH){lane_ext_bit[0]}}; // sign-extend/nan box assign lane_status[lane] = '0; - assign in_lane_active[lane] = 1'b0; // Lane does not exist, it can never be active assign lane_fsm_ready[lane] = 1'b1; // Lane does not exist, it is always ready just in case erronous data gets to the FSM in this slot end @@ -503,7 +543,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 // Internal register enable for this stage logic reg_ena; // Enable register is set externally - assign reg_ena = vector_reg_enable[i]; + assign reg_ena = reg_enable[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0)