From 26b1123aafa49d05fdb497f70fd7cb48846facf3 Mon Sep 17 00:00:00 2001 From: Masoom Panda Date: Sat, 4 Jul 2020 11:37:35 +0000 Subject: [PATCH 1/5] Replace ISA_Decls.bsv --- src_Core/ISA/ISA_Decls.bsv | 99 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/src_Core/ISA/ISA_Decls.bsv b/src_Core/ISA/ISA_Decls.bsv index 620ac9ff..89ca9385 100644 --- a/src_Core/ISA/ISA_Decls.bsv +++ b/src_Core/ISA/ISA_Decls.bsv @@ -29,6 +29,11 @@ import BuildVector :: *; import Posit_Numeric_Types :: *; `endif +`ifdef ACCEL +import Posit_Numeric_Types :: *; +//import Accel_Defines :: *; +`endif + // ================================================================ // BSV project imports @@ -143,6 +148,10 @@ typedef Bit #(PositWidth) WordPL; `endif +`ifdef ACCEL +typedef Bit #(32) WordAL; //Accelerator inputs ,for now its hard coded as Bit#(32) +`endif + // ================================================================ // Tokens are used for signalling/synchronization, and have no payload @@ -1015,6 +1024,96 @@ endfunction `endif `endif + +`ifdef ACCEL //When PositAccel is set +// ================================================================ +// ACCEL + +Opcode accel_opc = 7'b00_010_11; //opcode that its PositAccel(may change it later) + +//function bits definition +Bit #(7) f7_fma_p = 7'h0; +Bit #(7) f7_fda_p = 7'h1; +Bit #(7) f7_fms_p = 7'h2; +Bit #(7) f7_fds_p = 7'h3; +Bit #(7) f7_fcvt_p_s = 7'h4; +Bit #(7) f7_fcvt_s_p = 7'h5; +Bit #(7) f7_fcvt_p_r = 7'h6; +Bit #(7) f7_fcvt_r_p = 7'h7; + + +/*Placeholder for Legality checks + + +// RS2 encoding +//Changes maybe required for accelerator +Bit #(5) rs2_s = 5'h00; +Bit #(5) rs2_d = 5'h01; +Bit #(5) rs2_p = 5'h10; // Quills: rs2 for Posits +Bit #(5) rs2_r = 5'h11; // Quills: rs2 for Quire + + +// ToDO:accel_rounding_mode_check if required,here definition of fv_rmode_check is copied from ISA_F. + +(need to be changed as per accelerator conditions) +// Returns the correct rounding mode considering the values in the +// FCSR and the instruction and checks legality + +function Tuple2# (Bit #(3), Bool) fv_rmode_check ( + Bit #(3) inst_frm, Bit #(3) fcsr_frm); + let rm = (inst_frm == 3'h7) ? fcsr_frm : inst_frm; + let rm_is_legal = (inst_frm == 3'h7) ? fv_fcsr_frm_valid (fcsr_frm) + : fv_inst_frm_valid (inst_frm); + return (tuple2 (rm, rm_is_legal)); +endfunction + + +//function to determine whether instruction for accelerator is legal or not +//needs changes + +/*function Bool accel_instr_legal ( + Bit #(7) f7, RegName rs2, Opcode accel_opc); + Bool is_legal = True; + if ( (accopcode == accel_opc )&& ((f7 == f7_fma_p) + || (f7 == f7_fms_p) + || (f7 == f7_fda_p) + || (f7 == f7_fds_p) + || ((f7==f7_fcvt_r_p) && (rs2 == rs2_p)) + || ((f7 == f7_fcvt_p_s) && (rs2 == rs2_s)) + || ((f7 == f7_fcvt_s_p) && (rs2 == rs2_p)) + || ((f7 == f7_fcvt_p_r) && (rs2 == rs2_r)))) + return True; + else return False; + endfunction*/ + + + +// Posit instructions which update the quire does not update +// PPR state. +function Bool accel_is_destn_in_quire (Opcode accopcode,Bit #(7) f7); + return ( (accopcode == accel_opc) && ((f7 == f7_fma_p) + || (f7 == f7_fms_p) + || (f7 == f7_fda_p) + || (f7 == f7_fds_p) + || (f7==f7_fcvt_r_p))); + +endfunction + +// Posit instructions which takes no operands from the GPR +// but only reads from the quire +function Bool accel_is_source_in_quire (Opcode accopcode, Bit #(7) f7); + return ( (accopcode == accel_opc) && (f7 == f7_fcvt_p_r)); +endfunction + +// Posit instructions whose rd is in the posit register file +function Bool accel_is_rd_in_PPR (Opcode accopcode, Bit #(7) f7); + return ( (accopcode == accel_opc) + && ( (f7 == f7_fcvt_p_r) + || (f7 == f7_fcvt_p_s))); + +endfunction +`endif + // ================================================================ // System Instructions Opcode op_SYSTEM = 7'b11_100_11; From c7d75df2e386ee3ab92b47dcdf95c7bfc09b5c44 Mon Sep 17 00:00:00 2001 From: Masoom Panda Date: Sat, 4 Jul 2020 11:40:10 +0000 Subject: [PATCH 2/5] Accelerator fields are added --- src_Core/CPU/CPU_Globals.bsv | 102 ++++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) diff --git a/src_Core/CPU/CPU_Globals.bsv b/src_Core/CPU/CPU_Globals.bsv index 5aa4b2c6..a9c775fa 100644 --- a/src_Core/CPU/CPU_Globals.bsv +++ b/src_Core/CPU/CPU_Globals.bsv @@ -22,6 +22,7 @@ package CPU_Globals; import ISA_Decls :: *; import TV_Info :: *; +import Accel_Defines :: *; // ================================================================ // Output status of each stage @@ -180,6 +181,28 @@ endinstance `endif `endif +`ifdef ACCEL +typedef struct { + Bypass_State bypass_state; + RegName rd; + WordAL rd_val; + } AccelBypass +deriving (Bits); + +instance FShow #(AccelBypass); + function Fmt fshow (AccelBypass x); + let fmt0 = $format ("AccelBypass {"); + let fmt1 = ((x.bypass_state == BYPASS_RD_NONE) + ? $format ("PRd -") + : $format ("PRd %0d ", x.rd) + ((x.bypass_state == BYPASS_RD) + ? $format ("-") + : $format ("prd_val:%h", x.rd_val))); + let fmt2 = $format ("}"); + return fmt0 + fmt1 + fmt2; + endfunction +endinstance +`endif + // ---------------- // Baseline bypass info @@ -198,6 +221,12 @@ PBypass no_pbypass = PBypass {bypass_state: BYPASS_RD_NONE, rd_val: ? }; `endif `endif +`ifdef ACCEL +AccelBypass no_accelbypass = AccelBypass {bypass_state: BYPASS_RD_NONE, + rd: ?, + rd_val: ? }; + +`endif // ---------------- @@ -241,6 +270,20 @@ endfunction `endif `endif +`ifdef ACCEL +// AccelBypass functions for PositAccel +// Returns '(busy, val)' +// 'busy' means that the RegName is valid and matches, but the value is not available yet + +function Tuple2 #(Bool, WordAL) fn_accel_ppr_bypass (AccelBypass bypass, RegName rd, WordPL rd_val); + Bool busy = ((bypass.bypass_state == BYPASS_RD) && (bypass.rd == rd)); + WordPL val= ( ((bypass.bypass_state == BYPASS_RD_RDVAL) && (bypass.rd == rd)) + ? bypass.rd_val + : rd_val); + return tuple2 (busy, val); +endfunction +`endif + // ================================================================ // Trap information @@ -349,6 +392,7 @@ typedef struct { Instr_C instr_C; // Valid if no exception; original compressed instruction WordXL pred_pc; // Predicted next pc Decoded_Instr decoded_instr; + Bit#(1) rocc_value_bit; //value bit of RoCC } Data_StageD_to_Stage1 deriving (Bits); @@ -458,6 +502,10 @@ typedef enum { OP_Stage2_ALU // Pass-through (non mem, M, FD, AMO) `ifdef ISA_F , OP_Stage2_FD `endif +`ifdef ACCEL + , OP_Stage2_ACCEL +`endif + } Op_Stage2 deriving (Eq, Bits, FShow); @@ -467,7 +515,7 @@ typedef struct { Instr instr; // For debugging. Just funct3, funct7 are // enough for functionality. Op_Stage2 op_stage2; - RegName rd; + RegName rd; //Destination register Addr addr; // Branch, jump: newPC // Mem ops and AMOs: mem addr WordXL val1; // OP_Stage2_ALU: rd_val @@ -487,13 +535,27 @@ typedef struct { `ifdef POSIT Bool no_rd_upd; // No rd to be updated as result goes to quire Bool rs_frm_ppr; // The rs is from PPR (Posit stores) - Bool rd_in_ppr; // The rd should update into PPR + Bool rd_in_ppr; // The rd should update into PRF WordPL pval1; // OP_Stage2_P: arg1 WordPL pval2; // OP_Stage2_P: arg2 `endif Bit #(3) rounding_mode; // rounding mode from fcsr_frm or instr.rm `endif +`ifdef ACCEL + // PositAccel fields + Bool no_rd_upd; // No rd to be updated as result goes to quire + Bool rs_frm_ppr; // The rs is from PPR (Posit stores) + Bool rd_in_ppr; // The rd should update into PRF + WordAL accelval1; // OP_Stage2_P: arg1 + WordAL accelval2; // OP_Stage2_P: arg2 + Bit #(3) rounding_mode; //rounding mode + Bit #(3) funct3; ////Part of custom instruction to select whether register address is of flute or accelerator , in accelerator funct3 is referred as rg_sel in which rg_sel[2] is xd;rg_sel[1] is xs1;rg_sel[0] is xs2. + + Bit #(7) funct7; //funct7 decides which accelerator operation to be performed by the accelerator,inside the accelerator this is referred as opcode + Bit #(1) rocc_value_bit; //Value bit of RoCC +`endif + `ifdef INCLUDE_TANDEM_VERIF Trace_Data trace_data; `endif @@ -519,6 +581,15 @@ instance FShow #(Data_Stage1_to_Stage2); x.pval1, x.pval2); `endif `endif + +`ifdef ACCEL + fmt = fmt + $format ("\n"); + fmt = fmt + $format (" no_rd_upd: ", fshow (x.no_rd_upd)); + fmt = fmt + $format (" rd_in_ppr: ", fshow (x.rd_in_ppr)); + fmt = fmt + $format (" accelval1:%h accelval2:%h }", + x.pval1, x.pval2); + fmt = fmt + $format (" funct3:%h rd:%h opcode:%h value:%h" ,x.funct3,x.rd,x.funct7,x.rocc_value_bit); + `endif return fmt; endfunction endinstance @@ -542,6 +613,11 @@ typedef struct { `endif +`ifdef ACCEL //change numericals + AccelBypass accelbypass; +`endif + + // feedforward data Data_Stage2_to_Stage3 data_to_stage3; } Output_Stage2 @@ -588,6 +664,14 @@ typedef struct { `endif `endif +`ifdef ACCEL //sending from stage 2 to stage 3,changes may be required here + + Bool no_rd_upd; + Bool rd_in_ppr; // The rd should update into PPR + WordAL prd_val; +`endif + + `ifdef INCLUDE_TANDEM_VERIF Trace_Data trace_data; `endif @@ -615,6 +699,17 @@ instance FShow #(Data_Stage2_to_Stage3); else `endif `endif + +`ifdef ACCEL + + if (x.rd_in_ppr) + if (x.no_rd_upd) + fmt = fmt + $format (" Output to Quire. No Rd update."); + else + fmt = fmt + $format (" prd:%0d rd_val:%h\n", x.rd, x.prd_val); + else +`endif + fmt = fmt + $format (" grd:%0d rd_val:%h\n", x.rd, x.rd_val); return fmt; endfunction @@ -632,6 +727,9 @@ typedef struct { PBypass pbypass; `endif `endif +`ifdef ACCEL + AccelBypass accelbypass; +`endif `ifdef INCLUDE_TANDEM_VERIF Trace_Data trace_data; From 3b011583914eec78ef1d159275f3e1e531bc6c8a Mon Sep 17 00:00:00 2001 From: Masoom Panda Date: Sat, 4 Jul 2020 11:42:19 +0000 Subject: [PATCH 3/5] Replace CPU_Stage2.bsv --- src_Core/CPU/CPU_Stage2.bsv | 185 ++++++++++++++++++++++++++++++++++-- 1 file changed, 179 insertions(+), 6 deletions(-) diff --git a/src_Core/CPU/CPU_Stage2.bsv b/src_Core/CPU/CPU_Stage2.bsv index 4e096339..1c0d0014 100644 --- a/src_Core/CPU/CPU_Stage2.bsv +++ b/src_Core/CPU/CPU_Stage2.bsv @@ -63,7 +63,9 @@ import RISCV_MBox :: *; import FBox_Top :: *; import FBox_Core :: *; // For fv_nanbox function `endif - +`ifdef ACCEL //Stage 2 dispatches to PositAccel +import PositAccel :: *; +`endif // ================================================================ // Interface @@ -121,7 +123,12 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, `ifdef ISA_F FBox_Top_IFC fbox <- mkFBox_Top (0); `endif + // ---------------- + // PositAccel Wrapper +`ifdef ACCEL//instantiate the wrapper PositAccel + Wrapper_IFC wrap <- mkPositAccel ; +`endif // ---------------- let bypass_base = Bypass {bypass_state: BYPASS_RD_NONE, @@ -145,6 +152,14 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, `endif `endif + +`ifdef ACCEL //Bypass Logic for ACCEL i.e PositAccel + let accelbypass_base = AccelBypass {bypass_state: BYPASS_RD_NONE, + rd: rg_stage2.rd, + rd_val: rg_stage2.accelval1 + }; +`endif + let data_to_stage3_base = Data_Stage2_to_Stage3 { priv: rg_stage2.priv , pc: rg_stage2.pc @@ -164,6 +179,12 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, , rd: rg_stage2.rd , rd_val: rg_stage2.val1 +`ifdef ACCEL //changes maybe required + , no_rd_upd: False + , rd_in_ppr: False + , prd_val : rg_stage2.accelval1 + +`endif `ifdef INCLUDE_TANDEM_VERIF , trace_data: rg_stage2.trace_data `endif @@ -199,9 +220,16 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, let res <- fbox.server_reset.response.get; `endif +`ifdef ACCEL + let accel_resp <-wrap.server_reset.response.get; //server response when PositAccel is set +`endif + f_reset_rsps.enq (?); endrule + + + // ---------------- // Combinational output function @@ -216,10 +244,17 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, bypass : no_bypass `ifdef ISA_F , fbypass : no_fbypass + `ifdef POSIT , pbypass : no_pbypass + +`endif `endif + +`ifdef ACCEL + , accelbypass : no_accelbypass `endif + `ifdef INCLUDE_TANDEM_VERIF , trace_data : ? `endif @@ -240,10 +275,15 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, bypass : bypass `ifdef ISA_F , fbypass : no_fbypass + `ifdef POSIT , pbypass : no_pbypass `endif `endif +`ifdef ACCEL + , accelbypass : no_accelbypass +`endif + }; end @@ -298,6 +338,17 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, // GPR loads data_to_stage3.rd_val = result; +`ifdef ACCEL //need to change + // A PPR load + if (rg_stage2.rd_in_ppr) begin + // Only PLW is a legal instruction + //It needs to be checked + data_to_stage3.prd_val = truncate (dcache.word64); + end + data_to_stage3.rd_in_ppr = rg_stage2.rd_in_ppr; +`endif + + // Update the bypass channel, if not trapping (NONPIPE) let bypass = bypass_base; `ifdef ISA_F @@ -306,6 +357,11 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, let pbypass = pbypass_base; `endif `endif +`ifdef ACCEL + let accelbypass = accelbypass_base; +`endif + + if (ostatus != OSTATUS_NONPIPE) begin `ifdef ISA_F @@ -339,6 +395,7 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, `endif `endif + // Bypassing GPR values if (rg_stage2.rd != 0) begin // TODO: is this test necessary? // Choose one of the following two options @@ -354,6 +411,23 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, end end +`ifdef ACCEL + // Bypassing PPR value. + else if (rg_stage2.rd_in_ppr) begin + // Choose one of the following two options + + // Option 1: longer critical path, since the data is bypassed back into previous stage. + // We use data_to_stage3.rd_val since nanboxing has been done. + // pbypass.bypass_state = ((ostatus == OSTATUS_PIPE) ? BYPASS_RD_RDVAL : BYPASS_RD); + // pbypass.rd_val = data_to_stage3.prd_val; + + // Option 2: shorter critical path, since the data is not bypassed into previous stage, + // (the bypassing is effectively delayed until the next stage). + accelbypass.bypass_state = BYPASS_RD; + end +`endif + + `ifdef INCLUDE_TANDEM_VERIF let trace_data = rg_stage2.trace_data; `ifdef ISA_F @@ -380,6 +454,9 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, , pbypass : no_pbypass `endif `endif +`ifdef ACCEL + , accelbypass : no_accelbypass +`endif `ifdef INCLUDE_TANDEM_VERIF , trace_data : trace_data `endif @@ -408,6 +485,9 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, , pbypass : no_pbypass `endif `endif +`ifdef ACCEL + , accelbypass : no_accelbypass +`endif `ifdef INCLUDE_TANDEM_VERIF , trace_data : trace_data `endif @@ -445,6 +525,10 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, , pbypass : no_pbypass `endif `endif +`ifdef ACCEL + , accelbypass : no_accelbypass +`endif + `ifdef INCLUDE_TANDEM_VERIF , trace_data : trace_data `endif @@ -483,6 +567,9 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, , pbypass : no_pbypass `endif `endif +`ifdef ACCEL + , accelbypass : no_accelbypass +`endif `ifdef INCLUDE_TANDEM_VERIF , trace_data : trace_data `endif @@ -510,7 +597,7 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, data_to_stage3.fpr_flags= fflags; `ifdef POSIT data_to_stage3.no_rd_upd= rg_stage2.no_rd_upd; - data_to_stage3.rd_in_ppr= rg_stage2.rd_in_ppr; + data_to_stage3.rd_in_prf= rg_stage2.rd_in_prf; data_to_stage3.prd_val = truncate (value); `endif `ifdef RV64 @@ -536,8 +623,8 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, end `ifdef POSIT - else if ((rg_stage2.rd_in_ppr) || (rg_stage2.no_rd_upd)) begin - if ((rg_stage2.rd_in_ppr) && (!rg_stage2.no_rd_upd)) begin + else if ((rg_stage2.rd_in_prf) || (rg_stage2.no_rd_upd)) begin + if ((rg_stage2.rd_in_prf) && (!rg_stage2.no_rd_upd)) begin pbypass.bypass_state = ((ostatus==OSTATUS_PIPE) ? BYPASS_RD_RDVAL : BYPASS_RD); pbypass.rd_val = truncate (value); @@ -569,6 +656,62 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, data_to_stage3.trace_data = trace_data; `endif +//-------------------------------------------------------------------- +`ifdef ACCEL + // This stage is doing Posit accelerator op + if (rg_stage2.op_stage2 == OP_Stage2_ACCEL) begin + + + let ostatus = ((! wrap.valid) ? OSTATUS_BUSY : OSTATUS_PIPE); //set status as per posit accel +//instantiation of positaccel has been done using wrap + + + // Extract fields from PositAccel result which are necessary to be sent to writeback stage(Stage 3)(clarify this that what all to be sent) + +//if xd=0 do not wait for response from wrapper else wait for the response and then proceed + +//funct3 in Accelerator is termed as rg_sel-where rg_sel[2] is xd;rg_sel[1] is xs1;rg_sel[0] is xs2.(in terms of RoCC format) + + let data_to_stage3 = data_to_stage3_base; + let xd =rg_stage.funct3[2]; //the msb bit of funct3,in RoCC terms its xd + + if((!xd)==0) begin + Wrapper_in_res accel_resp= ff_ROCCRsp.first; //extracting the response from PositAccel in accel_resp + data_to_stage3.prd_val = accel_resp.result; //accelerator result + end + data_to_stage3.no_rd_upd= rg_stage2.no_rd_upd; + data_to_stage3.rd_valid = (ostatus == OSTATUS_PIPE); + data_to_stage3.rd_in_ppr= rg_stage2.rd_in_ppr; + + + let bypass = bypass_base; + let accelbypass = accelbypass_base; + + // result is meant for a PPR + if (rg_stage2.rd_in_ppr) begin + accelbypass.bypass_state = ((ostatus==OSTATUS_PIPE) ? BYPASS_RD_RDVAL + : BYPASS_RD); + accelbypass.rd_val = value; + + end + + + // result is meant for a GPR (while using vector operations) + else begin + bypass.bypass_state = ((ostatus==OSTATUS_PIPE) ? BYPASS_RD_RDVAL + : BYPASS_RD); +`ifdef RV64 + bypass.rd_val = (value); +`else + bypass.rd_val = truncate (value); +`endif + end +`endif +end + // ----------------------------------------------------- + + +//stage 2 outputs output_stage2 = Output_Stage2 {ostatus : ostatus, trap_info : trap_info_fbox, data_to_stage3 : data_to_stage3, @@ -579,17 +722,22 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, , pbypass : pbypass `endif `endif +`ifdef ACCEL + , accelbypass : accelbypass +`endif `ifdef INCLUDE_TANDEM_VERIF , trace_data : trace_data `endif }; - end +end `endif return output_stage2; + endfunction - // ---------------- + // ------------------------------------------------------------- + // Initiate DM, Shifter box, MBox or FBox op function Action fa_enq (Data_Stage1_to_Stage2 x); @@ -706,6 +854,31 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, ); end `endif + +`ifdef ACCEL + // If PositAccel op, initiate it + else if (x.op_stage2 == OP_Stage2_ACCEL) begin + // Instr fields required for decode for opcodes + let opcode = instr_opcode (x.instr); + let funct7 = instr_funct7 (x.instr); + let funct3 = instr_funct3 (x.instr); + let rs2 = instr_rs2 (x.instr); + /*Bit #(32) val1 = x.val1_frm_gpr ? extend (x.val1) + : extend (x.fval1);*/ //when dealing with vectors,it maybe used + + wrap.req ( opcode //PositAccel is instantiated using wrap + , funct7 + , x.rounding_mode // rm + , funct3 + , x.rocc_value_bit //value bit (RoCC) + //,rs2 + ,x.rd + , x.accelval1 + , x.accelval2 + ); + end +`endif + endaction endfunction From fcd0da72dad3fdaee25366fbf4f44a4315013a55 Mon Sep 17 00:00:00 2001 From: Masoom Panda Date: Sat, 4 Jul 2020 11:43:14 +0000 Subject: [PATCH 4/5] Modified for accelerator From f3570aa6285e7919396e71478fe96682a7701b29 Mon Sep 17 00:00:00 2001 From: yagyamundra Date: Mon, 1 Nov 2021 08:38:08 -0700 Subject: [PATCH 5/5] commented wrappers instantiation in CPU_Globals and EX_ALU_functions ROCC has been added in stage 1 and stage 2 Code Compiled. PositAccel and AccelDefines needs to be created. --- src_Core/CPU/CPU_Globals.bsv | 2 +- src_Core/CPU/CPU_Stage1.bsv | 153 ++++++++++++++++++++++++-- src_Core/CPU/CPU_Stage2.bsv | 175 ++++++++++++++++++++++++++++-- src_Core/CPU/EX_ALU_functions.bsv | 161 ++++++++++++++++++++++----- 4 files changed, 447 insertions(+), 44 deletions(-) diff --git a/src_Core/CPU/CPU_Globals.bsv b/src_Core/CPU/CPU_Globals.bsv index a9c775fa..4d4885e0 100644 --- a/src_Core/CPU/CPU_Globals.bsv +++ b/src_Core/CPU/CPU_Globals.bsv @@ -22,7 +22,7 @@ package CPU_Globals; import ISA_Decls :: *; import TV_Info :: *; -import Accel_Defines :: *; +//import Accel_Defines :: *; // ================================================================ // Output status of each stage diff --git a/src_Core/CPU/CPU_Stage1.bsv b/src_Core/CPU/CPU_Stage1.bsv index a12a28c2..fd4f1a05 100644 --- a/src_Core/CPU/CPU_Stage1.bsv +++ b/src_Core/CPU/CPU_Stage1.bsv @@ -40,6 +40,15 @@ import FPR_RegFile :: *; import PPR_RegFile :: *; `endif `endif + +`ifdef ROCC //if accelerator is set +import PPR_RegFile :: *; +import GPR_RegFile :: *; +`endif + +`ifdef ACCEL //if accelerator is set;assuming both inputs as scalars and scalar output for now +import PPR_RegFile :: *; +`endif import CSR_RegFile :: *; import EX_ALU_functions :: *; @@ -86,6 +95,18 @@ module mkCPU_Stage1 #(Bit #(4) verbosity, PBypass pbypass_from_stage2, PBypass pbypass_from_stage3, `endif +`endif + +`ifdef ROCC //asuming that for now dealing with scalars,so just using PPR + PPR_RegFile_IFC ppr_regfile, + roccBypass roccbypass_from_stage2, + rocclBypass roccbypass_from_stage3, +`endif + +`ifdef ACCEL //asuming that for now dealing with scalars,so just using PPR + PPR_RegFile_IFC ppr_regfile, + AccelBypass accelbypass_from_stage2, + AccelBypass accelbypass_from_stage3, `endif CSR_RegFile_IFC csr_regfile, Epoch cur_epoch, @@ -115,6 +136,7 @@ module mkCPU_Stage1 #(Bit #(4) verbosity, let decoded_instr = rg_stage_input.decoded_instr; let funct3 = decoded_instr.funct3; + let funct7 = decoded_instr.funct7; //func7 of decoded instruction,for the case of accelerator func7 tells which accelerator operation to be performed. // Register rs1 read and bypass let rs1 = decoded_instr.rs1; @@ -172,15 +194,49 @@ module mkCPU_Stage1 #(Bit #(4) verbosity, `endif `endif +`ifdef ROCC + // rs1 read and bypasS + let roccrs1_val = ppr_regfile.read_rs1 (rs1); + match { .roccbusy1a, .roccrs1a } = acccel_fn_ppr_bypass (roccbypass_from_stage3, rs1, roccrs1_val); + match { .roccbusy1b, .roccrs1b } = rocc_fn_ppr_bypass (roccbypass_from_stage2, rs1, roccrs1a); + Bool roccrs1_busy = (roccbusy1a || roccbusy1b); + WordPL roccrs1_val_bypassed = roccrs1b; + + // rs2 read and bypass + let roccrs2_val = ppr_regfile.read_rs2 (rs2); + match { .roccbusy2a, .roccrs2a } = rocc_fn_ppr_bypass (pbypass_from_stage3, rs2, roccrs2_val); + match { .roccbusy2b, .roccrs2b } = rocc_fn_ppr_bypass (pbypass_from_stage2, rs2, roccrs2a); + Bool roccrs2_busy = (roccbusy2a || roccbusy2b); + WordPL roccrs2_val_bypassed = roccrs2b; +`endif + + +`ifdef ACCEL + // rs1 read and bypasS + let accelrs1_val = ppr_regfile.read_rs1 (rs1); + match { .accelbusy1a, .accelrs1a } = acccel_fn_ppr_bypass (accelbypass_from_stage3, rs1, accelrs1_val); + match { .accelbusy1b, .accelrs1b } = accel_fn_ppr_bypass (accelbypass_from_stage2, rs1, accelrs1a); + Bool accelrs1_busy = (accelbusy1a || accelbusy1b); + WordPL accelrs1_val_bypassed = accelrs1b; + + // rs2 read and bypass + let accelrs2_val = ppr_regfile.read_rs2 (rs2); + match { .accelbusy2a, .accelrs2a } = accel_fn_ppr_bypass (pbypass_from_stage3, rs2, accelrs2_val); + match { .accelbusy2b, .accelrs2b } = accel_fn_ppr_bypass (pbypass_from_stage2, rs2, accelrs2a); + Bool accelrs2_busy = (accelbusy2a || accelbusy2b); + WordPL accelrs2_val_bypassed = accelrs2b; +`endif + + // ALU function let alu_inputs = ALU_Inputs {cur_priv : cur_priv, pc : rg_stage_input.pc, is_i32_not_i16 : rg_stage_input.is_i32_not_i16, - instr : rg_stage_input.instr, + instr : rg_stage_input.instr,//32 bit instruction `ifdef ISA_C instr_C : rg_stage_input.instr_C, `endif - decoded_instr : rg_stage_input.decoded_instr, + decoded_instr : rg_stage_input.decoded_instr,//decoded instruction rs1_val : rs1_val_bypassed, rs2_val : rs2_val_bypassed, `ifdef ISA_F @@ -196,6 +252,19 @@ module mkCPU_Stage1 #(Bit #(4) verbosity, prs1_val : prs1_val_bypassed, prs2_val : prs2_val_bypassed, `endif +`endif + +`ifdef ROCC + roccrs1_val : roccrs1_val_bypassed, + roccrs2_val : roccrs2_val_bypassed, + rocc_value_bit : rg_stage_input.rocc_value_bit, //value bit of RoCC +`endif + + +`ifdef ACCEL + accelrs1_val : accelrs1_val_bypassed, + accelrs2_val : accelrs2_val_bypassed, + rocc_value_bit : rg_stage_input.rocc_value_bit, //value bit of RoCC `endif mstatus : csr_regfile.read_mstatus, misa : csr_regfile.read_misa }; @@ -217,7 +286,7 @@ module mkCPU_Stage1 #(Bit #(4) verbosity, rs_frm_fpr : alu_outputs.rs_frm_fpr, val1_frm_gpr : alu_outputs.val1_frm_gpr, `ifdef POSIT - no_rd_upd : alu_outputs.no_rd_upd, + no_rd_upd : alu_outputs.no_rd_upd, rs_frm_ppr : alu_outputs.rs_frm_ppr, rd_in_ppr : alu_outputs.rd_in_ppr, pval1 : alu_outputs.pval1, @@ -225,6 +294,32 @@ module mkCPU_Stage1 #(Bit #(4) verbosity, `endif rounding_mode : alu_outputs.rm, `endif + +`ifdef ROCC //ToDo:include GPR when dealing with vectors + + roccval1 : alu_outputs.roccval1,//input1 + roccval2 : alu_outputs.roccval2,//input2 + rs_frm_ppr : alu_outputs.rs_frm_ppr, + rd_in_ppr : alu_outputs.rd_in_ppr, + funct3 : alu_outputs.funct3, //funct3 is referred as rg_sel=[xd xs1 xs2] + no_rd_upd : alu_outputs.no_rd_upd, + funct7 : alu_outputs.funct7, //funct7 acts as the opcode to determine what accelerator operation to be carried out + rocc_value_bit : alu_outputs.rocc_value_bit, //value bit for RoCC + rounding_mode : alu_outputs.rm, +`endif + +`ifdef ACCEL //ToDo:include GPR when dealing with vectors + + accelval1 : alu_outputs.accelval1,//input1 + accelval2 : alu_outputs.accelval2,//input2 + rs_frm_ppr : alu_outputs.rs_frm_ppr, + rd_in_ppr : alu_outputs.rd_in_ppr, + funct3 : alu_outputs.funct3, //In RoCC,funct3 is referred as rg_sel=[xd xs1 xs2] + no_rd_upd : alu_outputs.no_rd_upd, + funct7 : alu_outputs.funct7, //in accelerator funct7 acts as the opcode to determine what accelerator operation to be carried out + rocc_value_bit : alu_outputs.rocc_value_bit, //value bit for RoCC + rounding_mode : alu_outputs.rm, +`endif `ifdef INCLUDE_TANDEM_VERIF trace_data : alu_outputs.trace_data, `endif @@ -259,17 +354,47 @@ module mkCPU_Stage1 #(Bit #(4) verbosity, fval2 : ?, fval3 : ?, rd_in_fpr : ?, - rs_frm_fpr : ?, - val1_frm_gpr : ?, + rs_frm_fpr : ?, + val1_frm_gpr : ?, `ifdef POSIT pval1 : ?, pval2 : ?, - rs_frm_ppr : ?, + rs_frm_ppr : ?, rd_in_ppr : ?, - no_rd_upd : ?, + no_rd_upd : ?, `endif rounding_mode : ?, `endif + + +`ifdef ROCC + roccval1 : ?, + roccval2 : ?, + rs_frm_ppr : ?, + rd_in_ppr : ?, + no_rd_upd : ?, + funct3 : ?, + funct7 : ?, + rocc_value_bit : ?, + rounding_mode : ?, + +`endif + + +`ifdef ACCEL + accelval1 : ?, + accelval2 : ?, + rs_frm_ppr : ?, + rd_in_ppr : ?, + no_rd_upd : ?, + funct3 : ?, + funct7 : ?, + rocc_value_bit : ?, + rounding_mode : ?, + +`endif + + `ifdef INCLUDE_TANDEM_VERIF trace_data: alu_outputs.trace_data, `endif @@ -297,6 +422,20 @@ module mkCPU_Stage1 #(Bit #(4) verbosity, `endif `endif +`ifdef ROCC + // Stall if bypass pending for PPR rs1, rs2 or rs3 + else if (roccrs1_busy || roccrs2_busy) begin + output_stage1.ostatus = OSTATUS_BUSY; + end +`endif + +`ifdef ACCEL + // Stall if bypass pending for PPR rs1, rs2 or rs3 + else if (accelrs1_busy || accelrs2_busy) begin + output_stage1.ostatus = OSTATUS_BUSY; + end +`endif + // Trap on fetch-exception else if (rg_stage_input.exc) begin output_stage1.ostatus = OSTATUS_NONPIPE; diff --git a/src_Core/CPU/CPU_Stage2.bsv b/src_Core/CPU/CPU_Stage2.bsv index 1c0d0014..d46d1dc8 100644 --- a/src_Core/CPU/CPU_Stage2.bsv +++ b/src_Core/CPU/CPU_Stage2.bsv @@ -63,6 +63,11 @@ import RISCV_MBox :: *; import FBox_Top :: *; import FBox_Core :: *; // For fv_nanbox function `endif + +`ifdef ROCC //Stage 2 dispatches to PositAccel +import PositAccel :: *; +`endif + `ifdef ACCEL //Stage 2 dispatches to PositAccel import PositAccel :: *; `endif @@ -126,6 +131,10 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, // ---------------- // PositAccel Wrapper +`ifdef ROCC//instantiate the wrapper PositAccel + Wrapper_IFC wrap <- mkPositAccel ; +`endif + `ifdef ACCEL//instantiate the wrapper PositAccel Wrapper_IFC wrap <- mkPositAccel ; `endif @@ -152,6 +161,13 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, `endif `endif +`ifdef ROCC //Bypass Logic for ACCEL i.e PositAccel + let roccbypass_base = roccBypass {bypass_state: BYPASS_RD_NONE, + rd: rg_stage2.rd, + rd_val: rg_stage2.roccval1 + }; +`endif + `ifdef ACCEL //Bypass Logic for ACCEL i.e PositAccel let accelbypass_base = AccelBypass {bypass_state: BYPASS_RD_NONE, @@ -179,6 +195,13 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, , rd: rg_stage2.rd , rd_val: rg_stage2.val1 +`ifdef ROCC //changes maybe required + , no_rd_upd: False + , rd_in_ppr: False + , prd_val : rg_stage2.roccval1 + +`endif + `ifdef ACCEL //changes maybe required , no_rd_upd: False , rd_in_ppr: False @@ -220,6 +243,12 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, let res <- fbox.server_reset.response.get; `endif +`ifdef ROCC + let rocc_resp <-wrap.server_reset.response.get; //server response when PositAccel is set +`endif + + + `ifdef ACCEL let accel_resp <-wrap.server_reset.response.get; //server response when PositAccel is set `endif @@ -251,6 +280,11 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, `endif `endif + +`ifdef ROCC + , roccbypass : no_roccbypass +`endif + `ifdef ACCEL , accelbypass : no_accelbypass `endif @@ -280,8 +314,8 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, , pbypass : no_pbypass `endif `endif -`ifdef ACCEL - , accelbypass : no_accelbypass +`ifdef ROCC + , roccbypass : no_roccbypass `endif }; @@ -338,6 +372,17 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, // GPR loads data_to_stage3.rd_val = result; + +`ifdef ROCC //need to change + // A PPR load + if (rg_stage2.rd_in_ppr) begin + // Only PLW is a legal instruction + //It needs to be checked + data_to_stage3.prd_val = truncate (dcache.word64); + end + data_to_stage3.rd_in_ppr = rg_stage2.rd_in_ppr; +`endif + `ifdef ACCEL //need to change // A PPR load if (rg_stage2.rd_in_ppr) begin @@ -357,6 +402,11 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, let pbypass = pbypass_base; `endif `endif + +`ifdef ROCC + let roccbypass = roccbypass_base; +`endif + `ifdef ACCEL let accelbypass = accelbypass_base; `endif @@ -411,6 +461,23 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, end end + +`ifdef ROCC + // Bypassing PPR value. + else if (rg_stage2.rd_in_ppr) begin + // Choose one of the following two options + + // Option 1: longer critical path, since the data is bypassed back into previous stage. + // We use data_to_stage3.rd_val since nanboxing has been done. + // pbypass.bypass_state = ((ostatus == OSTATUS_PIPE) ? BYPASS_RD_RDVAL : BYPASS_RD); + // pbypass.rd_val = data_to_stage3.prd_val; + + // Option 2: shorter critical path, since the data is not bypassed into previous stage, + // (the bypassing is effectively delayed until the next stage). + roccbypass.bypass_state = BYPASS_RD; + end +`endif + `ifdef ACCEL // Bypassing PPR value. else if (rg_stage2.rd_in_ppr) begin @@ -454,6 +521,11 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, , pbypass : no_pbypass `endif `endif + +`ifdef ROCC + , roccbypass : no_roccbypass +`endif + `ifdef ACCEL , accelbypass : no_accelbypass `endif @@ -485,6 +557,9 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, , pbypass : no_pbypass `endif `endif +`ifdef ROCC + , roccbypass : no_roccbypass +`endif `ifdef ACCEL , accelbypass : no_accelbypass `endif @@ -525,8 +600,8 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, , pbypass : no_pbypass `endif `endif -`ifdef ACCEL - , accelbypass : no_accelbypass +`ifdef ROCC + , roccbypass : no_roccbypass `endif `ifdef INCLUDE_TANDEM_VERIF @@ -567,8 +642,8 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, , pbypass : no_pbypass `endif `endif -`ifdef ACCEL - , accelbypass : no_accelbypass +`ifdef ROCC + , roccbypass : no_roccbypass `endif `ifdef INCLUDE_TANDEM_VERIF , trace_data : trace_data @@ -654,8 +729,63 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, end data_to_stage3.trace_data = trace_data; + end +`endif + + //-------------------------------------------------------------------- +`ifdef ROCC + // This stage is doing Posit accelerator op + if (rg_stage2.op_stage2 == OP_Stage2_ROCC) begin + + + let ostatus = ((! wrap.valid) ? OSTATUS_BUSY : OSTATUS_PIPE); //set status as per posit accel +//instantiation of positaccel has been done using wrap + + + // Extract fields from PositAccel result which are necessary to be sent to writeback stage(Stage 3)(clarify this that what all to be sent) + +//if xd=0 do not wait for response from wrapper else wait for the response and then proceed + +//funct3 in Accelerator is termed as rg_sel-where rg_sel[2] is xd;rg_sel[1] is xs1;rg_sel[0] is xs2.(in terms of RoCC format) + + let data_to_stage3 = data_to_stage3_base; + let xd =rg_stage.funct3[2]; //the msb bit of funct3,in RoCC terms its xd + + if((!xd)==0) begin + Wrapper_in_res rocc_resp= ff_ROCCRsp.first; //extracting the response from PositAccel in accel_resp + data_to_stage3.prd_val = rocc_resp.result; //accelerator result + end + data_to_stage3.no_rd_upd= rg_stage2.no_rd_upd; + data_to_stage3.rd_valid = (ostatus == OSTATUS_PIPE); + data_to_stage3.rd_in_ppr= rg_stage2.rd_in_ppr; + + + let bypass = bypass_base; + let roccbypass = roccbypass_base; + + // result is meant for a PPR + if (rg_stage2.rd_in_ppr) begin + roccbypass.bypass_state = ((ostatus==OSTATUS_PIPE) ? BYPASS_RD_RDVAL + : BYPASS_RD); + roccbypass.rd_val = value; + + end + + + // result is meant for a GPR (while using vector operations) + else begin + bypass.bypass_state = ((ostatus==OSTATUS_PIPE) ? BYPASS_RD_RDVAL + : BYPASS_RD); +`ifdef RV64 + bypass.rd_val = (value); +`else + bypass.rd_val = truncate (value); +`endif + end + end `endif + // ----------------------------------------------------- //-------------------------------------------------------------------- `ifdef ACCEL // This stage is doing Posit accelerator op @@ -706,8 +836,9 @@ module mkCPU_Stage2 #(Bit #(4) verbosity, bypass.rd_val = truncate (value); `endif end + end `endif -end + // ----------------------------------------------------- @@ -722,6 +853,11 @@ end , pbypass : pbypass `endif `endif + +`ifdef ROCC + , roccbypass : roccbypass +`endif + `ifdef ACCEL , accelbypass : accelbypass `endif @@ -855,6 +991,31 @@ end end `endif +`ifdef ROCC + // If PositAccel op, initiate it + else if (x.op_stage2 == OP_Stage2_ROCC) begin + // Instr fields required for decode for opcodes + let opcode = instr_opcode (x.instr); + let funct7 = instr_funct7 (x.instr); + let funct3 = instr_funct3 (x.instr); + let rs2 = instr_rs2 (x.instr); + /*Bit #(32) val1 = x.val1_frm_gpr ? extend (x.val1) + : extend (x.fval1);*/ //when dealing with vectors,it maybe used + + wrap.req ( opcode //PositAccel is instantiated using wrap + , funct7 + , x.rounding_mode // rm + , funct3 + , x.rocc_value_bit //value bit (RoCC) + //,rs2 + ,x.rd + , x.roccval1 + , x.roccval2 + ); + end +`endif + + `ifdef ACCEL // If PositAccel op, initiate it else if (x.op_stage2 == OP_Stage2_ACCEL) begin diff --git a/src_Core/CPU/EX_ALU_functions.bsv b/src_Core/CPU/EX_ALU_functions.bsv index 2143a967..49d9e17a 100644 --- a/src_Core/CPU/EX_ALU_functions.bsv +++ b/src_Core/CPU/EX_ALU_functions.bsv @@ -30,6 +30,7 @@ import Vector :: *; import ISA_Decls :: *; import CPU_Globals :: *; import TV_Info :: *; +//import Accel_Defines :: *; //PositAccelerator definitions // ================================================================ // ALU inputs @@ -58,6 +59,11 @@ typedef struct { `ifdef POSIT WordPL prs1_val; WordPL prs2_val; +`endif +`ifdef ACCEL + WordAL accelrs1_val; + WordAL accelrs2_val; + Bit #(1) rocc_value_bit; `endif MISA misa; } ALU_Inputs @@ -88,7 +94,7 @@ typedef struct { Exc_Code exc_code; // Relevant if control == CONTROL_TRAP Op_Stage2 op_stage2; - RegName rd; + RegName rd; //destination register bit[11:7] of instruction Addr addr; // Branch, jump: newPC // Mem ops and AMOs: mem addr WordXL val1; // OP_Stage2_ALU: result for Rd (ALU ops: result, JAL/JALR: return PC) @@ -119,6 +125,21 @@ typedef struct { Bit #(3) rm; // rounding mode `endif +`ifdef ACCEL //accelerator + Bool rd_in_ppr; // For instructions where the destn + // is in the Posit RF + Bool rs_frm_ppr; // src register is in ppr (for posit stores) + Bool no_rd_upd; // For instructions where the destn + // is quire, there will be no update + // of architectural state + WordAL accelval1; // OP_Stage2_ACCEL: arg1 + WordAL accelval2; // OP_Stage2_ACCEL: arg2 + Bit #(3) rm; // rounding mode + Bit #(3) funct3; ////Part of custom instruction to select whether register address is of flute or accelerator , in accelerator funct3 is referred as rg_sel in which rg_sel[2] is xd;rg_sel[1] is xs1;rg_sel[0] is xs2. + + Bit #(7) funct7; //funct7 decides which accelerator operation to be performed by the accelerator,inside the accelerator this is referred as opcode + Bit #(1) rocc_value_bit; //Value bit of RoCC +`endif CF_Info cf_info; // For redirection and branch predictor `ifdef INCLUDE_TANDEM_VERIF @@ -156,6 +177,19 @@ ALU_Outputs alu_outputs_base pval2 : ?, `endif rm : ?, +`endif +`ifdef ACCEL + + no_rd_upd : False, + rs_frm_ppr : False, + rd_in_ppr : False, + accelval1 : ?, + accelval2 : ?, + funct3 : ?, + rm : ?, + funct7 : ?, + rocc_value_bit : ?, + `endif cf_info : cf_info_base @@ -702,41 +736,34 @@ function ALU_Outputs fv_LD (ALU_Inputs inputs); `endif `ifdef ISA_F || (funct3 == f3_FLW) -`ifdef POSIT - || (funct3 == f3_PLW) -`endif `endif `ifdef ISA_D || (funct3 == f3_FLD) `endif ); - let alu_outputs = alu_outputs_base; - // FP loads are not legal unless the MSTATUS.FS bit is set Bool legal_FP_LD = True; `ifdef ISA_F - if (opcode == op_LOAD_FP) begin + //if ((opcode == op_LOAD_FP) || (opcode == op_LOAD_P)) + if (opcode == op_LOAD_FP) legal_FP_LD = (fv_mstatus_fs (inputs.mstatus) != fs_xs_off); - -`ifdef POSIT - // when posit loads are in the picture, f3 == PLW is reserved - // for loads to PPR. All other loads are to FPR - alu_outputs.rd_in_fpr = (funct3 != f3_PLW); - alu_outputs.rd_in_ppr = (funct3 == f3_PLW); - `else - // note that the destination register for this load is in the FPR - alu_outputs.rd_in_fpr = True; -`endif - end `endif + let alu_outputs = alu_outputs_base; alu_outputs.control = ((legal_LD && legal_FP_LD) ? CONTROL_STRAIGHT : CONTROL_TRAP); alu_outputs.op_stage2 = OP_Stage2_LD; alu_outputs.rd = inputs.decoded_instr.rd; alu_outputs.addr = eaddr; +`ifdef ISA_F + // note that the destination register for this load is in the FPR + alu_outputs.rd_in_fpr = (opcode == op_LOAD_FP); +`ifdef POSIT + alu_outputs.rd_in_ppr = (opcode == op_LOAD_P); +`endif +`endif `ifdef INCLUDE_TANDEM_VERIF // Normal trace output (if no trap) @@ -797,16 +824,17 @@ function ALU_Outputs fv_ST (ALU_Inputs inputs); if (opcode == op_STORE_FP) begin legal_FP_ST = (fv_mstatus_fs (inputs.mstatus) != fs_xs_off); -`ifdef POSIT - // when posit stores are in the picture, f3 == PSW is reserved - // for stores from PPR. All other stores are from FPR - alu_outputs.rs_frm_fpr = (funct3 != f3_PSW); - alu_outputs.rs_frm_ppr = (funct3 == f3_PSW); -`else // note that the source data register for this store is in the FPR alu_outputs.rs_frm_fpr = True; -`endif + end +`ifdef POSIT + else if (opcode == op_STORE_P) begin + legal_FP_ST = (fv_mstatus_fs (inputs.mstatus) != fs_xs_off); + + // note that the source data register for this store is in the FPR + alu_outputs.rs_frm_ppr = True; end +`endif `endif alu_outputs.control = ((legal_ST && legal_FP_ST) ? CONTROL_STRAIGHT @@ -818,14 +846,10 @@ function ALU_Outputs fv_ST (ALU_Inputs inputs); `ifdef ISA_F alu_outputs.fval2 = inputs.frs2_val; -`ifdef POSIT - alu_outputs.pval2 = inputs.prs2_val; -`endif `endif `ifdef INCLUDE_TANDEM_VERIF // Normal trace output (if no trap) - // Posits are not supported in TV `ifdef ISA_F if (opcode == op_STORE_FP) alu_outputs.trace_data = mkTrace_F_STORE (fall_through_pc (inputs), @@ -1074,6 +1098,62 @@ function ALU_Outputs fv_FP (ALU_Inputs inputs); endfunction `endif + +// ---------------------------------------------------------------- +// ACCEL +// Just pass through to the PositAccel + +`ifdef ACCEL + +function ALU_Outputs fv_ACCEL (ALU_Inputs inputs); + let opcode = inputs.decoded_instr.opcode; //this opcode will indicate that accelerator is to be set + let funct3 = inputs.decoded_instr.funct3; + let funct7 = inputs.decoded_instr.funct7; + let rs2 = inputs.decoded_instr.rs2; + + /*// Check instruction legality for accelerator + +//analogy from ISA_F condition has been taken(few things are copied),changes are required. + + // Is the rounding mode legal + match {.rm, .rm_is_legal} = accel_rmode_check (funct3, inputs.frm); + + // Is the instruction legal -- if MSTATUS.FS = fs_xs_off, FP instructions + // are always illegal,Check for similar conditions for accelerator(if applicable) + + let inst_is_legal = ( (fv_mstatus_fs (inputs.mstatus) == fs_xs_off) + ? False + : accel_instr_legal (funct7, + rs2, + opcode));*/ + + +//ToDo: Instruction legality check w.r.t RoCC can be checked later(if applicable here) + + + let alu_outputs = alu_outputs_base; + /*alu_outputs.control = ((inst_is_legal && rm_is_legal) ? CONTROL_STRAIGHT + : CONTROL_TRAP);*/ + + + alu_outputs.op_stage2 = OP_Stage2_ACCEL; + alu_outputs.rd = inputs.decoded_instr.rd; + alu_outputs.rm = rm; + alu_outputs.funct3 = inputs.decoded_instr.funct3; + alu_outputs.funct7 = inputs.decoded_instr.funct7; + alu_outputs.no_rd_upd = accel_is_destn_in_quire (opcode, funct7);// Posit instructions which update the quire does not update ,function definition in ISA_Decls + alu_outputs.rd_in_ppr = ( accel_is_rd_in_PPR (opcode, funct7) + || accel_is_destn_in_quire (opcode, funct7)); + + + // Just copy the accelrs*_val values from inputs to outputs + alu_outputs.accelval1 = inputs.accelrs1_val; + alu_outputs.accelval2 = inputs.accelrs2_val; + + +return alu_outputs; +endfunction +`endif // ---------------------------------------------------------------- // AMO // Just pass through to the memory stage @@ -1231,6 +1311,14 @@ function ALU_Outputs fv_ALU (ALU_Inputs inputs); else if ( (inputs.decoded_instr.opcode == op_STORE_FP)) alu_outputs = fv_ST (inputs); +`ifdef POSIT + else if ( (inputs.decoded_instr.opcode == op_LOAD_P)) + alu_outputs = fv_LD (inputs); + + else if ( (inputs.decoded_instr.opcode == op_STORE_P)) + alu_outputs = fv_ST (inputs); +`endif + else if ( (inputs.decoded_instr.opcode == op_FP) || (inputs.decoded_instr.opcode == op_FMADD) || (inputs.decoded_instr.opcode == op_FMSUB) @@ -1240,6 +1328,21 @@ function ALU_Outputs fv_ALU (ALU_Inputs inputs); alu_outputs = fv_FP (inputs); `endif +`ifdef ACCEL //accelerator operations + + else if (inputs.decoded_instr.opcode==accel_opc && ((inputs.decoded_instr.func7 == f7_fma_p) + || (inputs.decoded_instr.func7 == f7_fda_p) + || (inputs.decoded_instr.func7 == f7_fms_p) + || (inputs.decoded_instr.func7 == f7_fds_p) + || (inputs.decoded_instr.func7 == f7_fcvt_p_s) + || (inputs.decoded_instr.func7 == f7_fcvt_s_p) + || (inputs.decoded_instr.func7 == f7_fcvt_p_r) + || (inputs.decoded_instr.func7 == f7_fcvt_r_p)) + ) + alu_outputs = fv_ACCEL (inputs); +`endif + + else begin alu_outputs.control = CONTROL_TRAP;