Skip to content

Commit

Permalink
[hardware] fix segment ops
Browse files Browse the repository at this point in the history
  • Loading branch information
mp-17 committed Oct 26, 2024
1 parent 83ad4a3 commit 6682824
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 41 deletions.
48 changes: 26 additions & 22 deletions hardware/src/ara_dispatcher.sv
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
/////////////////////////

ara_req_t ara_req, ara_req_d;
logic ara_req_valid_d;
logic ara_req_valid, ara_req_valid_d;

always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
Expand Down Expand Up @@ -259,6 +259,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(

// The handshake signals are just passed-through if the insn is non-segment
ara_resp_t ara_resp;
logic ara_resp_valid;

segment_sequencer #(
.SegSupport(SegSupport),
.ara_req_t (ara_req_t ),
Expand All @@ -277,6 +279,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
.store_complete_o(store_complete),
.ara_req_i(ara_req),
.ara_req_o(ara_req_d),
.ara_req_valid_i(ara_req_valid),
.ara_req_valid_o(ara_req_valid_d),
.ara_req_ready_i(ara_req_ready_i),
.ara_resp_i(ara_resp_i),
.ara_resp_o(ara_resp),
Expand Down Expand Up @@ -360,7 +364,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
cvt_resize : CVT_SAME,
default : '0
};
ara_req_valid_d = 1'b0;
ara_req_valid = 1'b0;

is_config = 1'b0;
ignore_zero_vl_check = 1'b0;
Expand Down Expand Up @@ -401,7 +405,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
// These generate a reshuffle request to Ara's backend
// When LMUL > 1, not all the regs that compose a large
// register should always be reshuffled
ara_req_valid_d = ~rs_mask_request_q;
ara_req_valid = ~rs_mask_request_q;
ara_req.use_scalar_op = 1'b1;
ara_req.vs2 = vs_buffer_q;
ara_req.eew_vs2 = eew_old_buffer_q;
Expand Down Expand Up @@ -602,7 +606,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
ara_req.vd = insn.varith_type.rd;
ara_req.use_vd = 1'b1;
ara_req.vm = insn.varith_type.vm;
ara_req_valid_d = 1'b1;
ara_req_valid = 1'b1;

// Decode based on the func6 field
unique case (insn.varith_type.func6)
Expand Down Expand Up @@ -818,7 +822,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
ara_req.use_vd = 1'b1;
ara_req.vm = insn.varith_type.vm;
ara_req.is_stride_np2 = is_stride_np2;
ara_req_valid_d = 1'b1;
ara_req_valid = 1'b1;

// Decode based on the func6 field
unique case (insn.varith_type.func6)
Expand Down Expand Up @@ -1031,7 +1035,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
ara_req.use_vd = 1'b1;
ara_req.vm = insn.varith_type.vm;
ara_req.is_stride_np2 = is_stride_np2;
ara_req_valid_d = 1'b1;
ara_req_valid = 1'b1;

// Decode based on the func6 field
unique case (insn.varith_type.func6)
Expand Down Expand Up @@ -1237,7 +1241,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
ara_req.vd = insn.varith_type.rd;
ara_req.use_vd = 1'b1;
ara_req.vm = insn.varith_type.vm;
ara_req_valid_d = 1'b1;
ara_req_valid = 1'b1;

// Assume an effective EMUL = LMUL1 by default (for the mask operations)
ara_req.emul = LMUL_1;
Expand Down Expand Up @@ -1333,7 +1337,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
acc_resp_o.resp_valid = 1'b1;
acc_resp_o.result = ara_resp.resp;
acc_resp_o.exception = ara_resp.exception;
ara_req_valid_d = 1'b0;
ara_req_valid = 1'b0;
end
end
6'b010100: begin
Expand Down Expand Up @@ -1685,7 +1689,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
ara_req.use_vd = 1'b1;
ara_req.vm = insn.varith_type.vm;
ara_req.is_stride_np2 = is_stride_np2;
ara_req_valid_d = 1'b1;
ara_req_valid = 1'b1;

// Decode based on the func6 field
unique case (insn.varith_type.func6)
Expand Down Expand Up @@ -1922,7 +1926,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
ara_req.use_vd = 1'b1;
ara_req.vm = insn.varith_type.vm;
ara_req.fp_rm = acc_req_i.frm;
ara_req_valid_d = 1'b1;
ara_req_valid = 1'b1;

// Decode based on the func6 field
unique case (insn.varith_type.func6)
Expand Down Expand Up @@ -2007,7 +2011,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
acc_resp_o.resp_valid = 1'b1;
acc_resp_o.result = vfmvfs_result;
acc_resp_o.exception = ara_resp.exception;
ara_req_valid_d = 1'b0;
ara_req_valid = 1'b0;
end
end
6'b011000: ara_req.op = ara_pkg::VMFEQ;
Expand Down Expand Up @@ -2344,7 +2348,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
ara_req.vm = insn.varith_type.vm;
ara_req.is_stride_np2 = is_stride_np2;
ara_req.fp_rm = acc_req_i.frm;
ara_req_valid_d = 1'b1;
ara_req_valid = 1'b1;

// Decode based on the func6 field
unique case (insn.varith_type.func6)
Expand Down Expand Up @@ -2593,7 +2597,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
ara_req.vm = insn.vmem_type.vm;
ara_req.scalar_op = acc_req_i.rs1;
ara_req.nf = insn.vmem_type.nf;
ara_req_valid_d = 1'b1;
ara_req_valid = 1'b1;

// Decode the element width
// Indexed memory operations follow a different rule
Expand Down Expand Up @@ -2634,7 +2638,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
acc_resp_o.req_ready = 1'b1;
acc_resp_o.resp_valid = 1'b1;
illegal_insn = 1'b1;
ara_req_valid_d = 1'b0;
ara_req_valid = 1'b0;
end
endcase

Expand Down Expand Up @@ -2749,7 +2753,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
ignore_zero_vl_check = 1'b1;
// The LMUL value is kept in the instruction itself
illegal_insn_load = 1'b0;
ara_req_valid_d = 1'b1;
ara_req_valid = 1'b1;

// Maximum vector length. VLMAX = nf * VLEN / EW8.
ara_req.vtype.vsew = EW8;
Expand Down Expand Up @@ -2782,7 +2786,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
acc_resp_o.req_ready = 1'b1;
acc_resp_o.resp_valid = 1'b1;
acc_resp_o.exception = ara_resp.exception;
ara_req_valid_d = 1'b0;
ara_req_valid = 1'b0;
// In case of exception, modify vstart
if ( ara_resp.exception.valid ) begin
csr_vstart_d = ara_resp.exception_vstart;
Expand Down Expand Up @@ -2823,7 +2827,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
ara_req.vm = insn.vmem_type.vm;
ara_req.scalar_op = acc_req_i.rs1;
ara_req.nf = insn.vmem_type.nf;
ara_req_valid_d = 1'b1;
ara_req_valid = 1'b1;

// Decode the element width
// Indexed memory operations follow a different rule
Expand Down Expand Up @@ -3000,15 +3004,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(

acc_resp_o.req_ready = 1'b0;
acc_resp_o.resp_valid = 1'b0;
ara_req_valid_d = 1'b1;
ara_req_valid = 1'b1;
end

// Wait until the back-end answers to acknowledge those instructions
if ( ara_resp_valid ) begin
acc_resp_o.req_ready = 1'b1;
acc_resp_o.resp_valid = 1'b1;
acc_resp_o.exception = ara_resp.exception;
ara_req_valid_d = 1'b0;
ara_req_valid = 1'b0;
// In case of exception, modify vstart and wait until the previous
// operations are over
if ( ara_resp.exception.valid ) begin
Expand Down Expand Up @@ -3273,7 +3277,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(

// Raise an illegal instruction exception
if ( illegal_insn || illegal_insn_load || illegal_insn_store ) begin
ara_req_valid_d = 1'b0;
ara_req_valid = 1'b0;
acc_resp_o.req_ready = 1'b1;
acc_resp_o.resp_valid = 1'b1;
acc_resp_o.exception.valid = 1'b1;
Expand Down Expand Up @@ -3332,7 +3336,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
// Stall the interface, and inject a reshuffling instruction
acc_resp_o.req_ready = 1'b0;
acc_resp_o.resp_valid = 1'b0;
ara_req_valid_d = 1'b0;
ara_req_valid = 1'b0;

// Initialize the reshuffle counter limit to handle LMUL > 1
unique case (ara_req.emul)
Expand Down Expand Up @@ -3398,7 +3402,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
// delay the zero_vl acknowledge by 1 cycle
acc_resp_o.req_ready = ~((is_vload & load_complete_q) | (is_vstore & store_complete_q));
acc_resp_o.resp_valid = ~((is_vload & load_complete_q) | (is_vstore & store_complete_q));
ara_req_valid_d = 1'b0;
ara_req_valid = 1'b0;
load_zero_vl = is_vload;
store_zero_vl = is_vstore;
end
Expand Down
62 changes: 43 additions & 19 deletions hardware/src/segment_sequencer.sv
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #(
// Ara frontend - backend info and handshakes
input ara_req_t ara_req_i,
output ara_req_t ara_req_o,
input logic ara_req_valid_i,
output logic ara_req_valid_o,
input logic ara_req_ready_i,
input ara_resp_t ara_resp_i,
output ara_resp_t ara_resp_o,
Expand All @@ -43,10 +45,12 @@ module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #(
ara_resp_t ara_resp_d, ara_resp_q;
logic is_vload_d, is_vload_q;
logic [$bits(ara_req_i.vstart):0] next_vstart_cnt;
logic [2:0] nf_d, nf_q;

typedef enum logic [1:0] {
IDLE,
SEGMENT_MICRO_OPS,
SEGMENT_MICRO_OPS_WAIT_END,
SEGMENT_MICRO_OPS_END
} state_e;
state_e state_d, state_q;
Expand All @@ -70,7 +74,8 @@ module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #(
.q_o(segment_cnt_q),
.overflow_o( /* Unused */ )
);
assign segment_cnt_clear = new_seg_mem_op | (segment_cnt_en & (segment_cnt_q == ara_req_i.nf));
assign segment_cnt_clear = (state_q == SEGMENT_MICRO_OPS_END)
| ((state_q != IDLE) & segment_cnt_en & (segment_cnt_q == nf_q));

// Track the number of segments
logic vstart_cnt_en;
Expand All @@ -91,7 +96,7 @@ module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #(
.overflow_o( /* Unused */ )
);
// Change destination vector index when all the fields of the segment have been processed
assign vstart_cnt_en = segment_cnt_en & (segment_cnt_q == ara_req_i.nf);
assign vstart_cnt_en = segment_cnt_en & (segment_cnt_q == nf_q);

// Next vstart count
assign next_vstart_cnt = vstart_cnt_q + 1;
Expand All @@ -104,6 +109,7 @@ module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #(

// Pass through
ara_req_o = ara_req_i;
ara_req_valid_o = ara_req_valid_i;
ara_resp_o = ara_resp_i;
ara_resp_valid_o = ara_resp_valid_i;
// Block load/store_complete
Expand All @@ -113,6 +119,7 @@ module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #(
ara_resp_d = ara_resp_q;
ara_resp_valid_d = ara_resp_valid_q;
is_vload_d = is_vload_q;
nf_d = nf_q;

// Don't count up by default
new_seg_mem_op = 1'b0;
Expand All @@ -121,71 +128,87 @@ module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #(
// Low-perf Moore's FSM
unique case (state_q)
IDLE: begin
// Pass-through
load_complete_o = load_complete_i;
store_complete_o = store_complete_i;
// Be ready to sample the next nf
nf_d = ara_req_i.nf;
// Send a first micro operation upon valid segment mem op
if (is_segment_mem_op_i && !illegal_insn_i) begin
// If we are here, the backend is able to accept the request
// Set-up sequencing
new_seg_mem_op = 1'b1;
// Set up the first micro operation
ara_req_o.vl = 1;
ara_req_o.vl = next_vstart_cnt;
// Pass to the next field if the previous micro op finished
segment_cnt_en = 1'b1;
// Start sequencing
state_d = SEGMENT_MICRO_OPS;
end
end
SEGMENT_MICRO_OPS: begin
// Manipulate the memory micro request in advance
ara_req_o.vl = 1;
ara_req_o.vl = next_vstart_cnt;
ara_req_o.vstart = vstart_cnt_q;
ara_req_o.vs1 = ara_req_i.vs1 + segment_cnt_q;
ara_req_o.vd = ara_req_i.vd + segment_cnt_q;

// Don't answer CVA6 yet
ara_resp_valid_o = 1'b0;

// Wait for an answer from Ara's backend
if (ara_resp_valid_i) begin
// Pass to the next field if the previous micro op finished
// Pass to the next field if the previous micro op finished
if (ara_req_valid_i && ara_req_ready_i) begin
segment_cnt_en = 1'b1;
// If exception, stop the execution
end

// Wait for an answer from Ara's backend
if (ara_resp_valid_i) begin // If exception, stop the execution
if (ara_resp_i.exception.valid) begin
ara_resp_valid_o = ara_resp_valid_i;
// If no exception, continue with the micro ops
end else begin
// If over - stop in the next cycle
if (segment_cnt_clear && (next_vstart_cnt == ara_req_i.vl)) begin
// Sample the last answer
ara_resp_d = ara_resp_i;
ara_resp_valid_d = ara_resp_valid_i;
is_vload_d = is_vload_i;
state_d = SEGMENT_MICRO_OPS_END;
state_d = SEGMENT_MICRO_OPS_WAIT_END;
end
end
end
end
SEGMENT_MICRO_OPS_END: begin
SEGMENT_MICRO_OPS_WAIT_END: begin
// Don't answer CVA6 yet
ara_resp_valid_o = 1'b0;
// Stop injecting micro instructions
ara_req_valid_o = 1'b0;
// Wait for idle to give the final load/store_complete
if (ara_idle_i) begin
ara_resp_o = ara_resp_q;
ara_resp_valid_o = ara_resp_valid_q;
load_complete_o = is_vload_q;
store_complete_o = ~is_vload_q;
state_d = IDLE;
if (ara_idle_i && ara_req_ready_i) begin
state_d = SEGMENT_MICRO_OPS_END;
end
end
SEGMENT_MICRO_OPS_END: begin
ara_resp_o = ara_resp_q;
ara_resp_valid_o = 1'b1;
load_complete_o = is_vload_q;
store_complete_o = ~is_vload_q;
state_d = IDLE;
end
default:;
endcase
end

always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
state_q <= IDLE;
nf_q <= '0;
is_vload_q <= 1'b0;
ara_resp_q <= '0;
ara_resp_valid_q <= '0;
end else begin
state_q <= state_d;
nf_q <= nf_d;
is_vload_q <= is_vload_d;
ara_resp_q <= ara_resp_d;
ara_resp_valid_q <= ara_resp_valid_d;
end
end
end else begin : gen_no_segment_support
Expand All @@ -195,6 +218,7 @@ module segment_sequencer import ara_pkg::*; import rvv_pkg::*; #(
assign load_complete_o = load_complete_i;
assign store_complete_o = store_complete_i;
assign ara_req_o = ara_req_i;
assign ara_req_valid_o = ara_req_valid_i;
assign ara_resp_o = ara_resp_i;
assign ara_resp_valid_o = ara_resp_valid_i;
end
Expand Down

0 comments on commit 6682824

Please sign in to comment.