Refactor forwarding in issue_stage module (#2519)

This commit is contained in:
jzthales 2024-10-01 04:13:30 +00:00 committed by GitHub
parent 56532c6963
commit 6ccd8d8bfa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 342 additions and 353 deletions

View file

@ -1,2 +1,2 @@
cv32a65x:
gates: 172749
gates: 171804

View file

@ -114,6 +114,12 @@ module cva6
logic is_double_rd_macro_instr; // is double move decoded 32bit instruction of macro definition
logic vfp; // is this a vector floating-point instruction?
},
localparam type writeback_t = struct packed {
logic valid; // wb data is valid
logic [CVA6Cfg.XLEN-1:0] data; //wb data
logic ex_valid; // exception from WB
logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; //transaction ID
},
// branch-predict
// this is the struct we get back from ex stage and we will use it to update
@ -785,6 +791,7 @@ module cva6
.exception_t(exception_t),
.fu_data_t(fu_data_t),
.scoreboard_entry_t(scoreboard_entry_t),
.writeback_t(writeback_t),
.x_issue_req_t(x_issue_req_t),
.x_issue_resp_t(x_issue_resp_t),
.x_register_t(x_register_t),

View file

@ -21,6 +21,8 @@ module issue_read_operands
parameter type branchpredict_sbe_t = logic,
parameter type fu_data_t = logic,
parameter type scoreboard_entry_t = logic,
parameter type forwarding_t = logic,
parameter type writeback_t = logic,
parameter type rs3_len_t = logic,
parameter type x_issue_req_t = logic,
parameter type x_issue_resp_t = logic,
@ -44,29 +46,8 @@ module issue_read_operands
input logic [CVA6Cfg.NrIssuePorts-1:0] issue_instr_valid_i,
// Issue stage acknowledge - TO_BE_COMPLETED
output logic [CVA6Cfg.NrIssuePorts-1:0] issue_ack_o,
// rs1 operand address - scoreboard
output logic [CVA6Cfg.NrIssuePorts-1:0][REG_ADDR_SIZE-1:0] rs1_o,
// rs1 operand - scoreboard
input logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs1_i,
// rs1 operand is valid - scoreboard
input logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid_i,
// rs2 operand address - scoreboard
output logic [CVA6Cfg.NrIssuePorts-1:0][REG_ADDR_SIZE-1:0] rs2_o,
// rs2 operand - scoreboard
input logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs2_i,
// rs2 operand is valid - scoreboard
input logic [CVA6Cfg.NrIssuePorts-1:0] rs2_valid_i,
// rs3 operand address - scoreboard
output logic [CVA6Cfg.NrIssuePorts-1:0][REG_ADDR_SIZE-1:0] rs3_o,
// rs3 operand - scoreboard
input rs3_len_t [CVA6Cfg.NrIssuePorts-1:0] rs3_i,
// rs3 operand is valid - scoreboard
input logic [CVA6Cfg.NrIssuePorts-1:0] rs3_valid_i,
// get clobber input
// TO_BE_COMPLETED - TO_BE_COMPLETED
input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_i,
// TO_BE_COMPLETED - TO_BE_COMPLETED
input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_i,
// Forwarding - SCOREBOARD
input forwarding_t fwd_i,
// TO_BE_COMPLETED - TO_BE_COMPLETED
output fu_data_t [CVA6Cfg.NrIssuePorts-1:0] fu_data_o,
// Unregistered version of fu_data_o.operanda - TO_BE_COMPLETED
@ -160,20 +141,49 @@ module issue_read_operands
rs3_len_t operand_c_fpr;
// output flipflop (ID <-> EX)
fu_data_t [CVA6Cfg.NrIssuePorts-1:0] fu_data_n, fu_data_q;
logic [ CVA6Cfg.XLEN-1:0] imm_forward_rs3;
logic [ CVA6Cfg.XLEN-1:0] imm_forward_rs3;
logic [CVA6Cfg.NrIssuePorts-1:0] alu_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] mult_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] fpu_valid_q;
logic [ 1:0] fpu_fmt_q;
logic [ 2:0] fpu_rm_q;
logic [CVA6Cfg.NrIssuePorts-1:0] alu2_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] lsu_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] csr_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] branch_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] cvxif_valid_q;
logic [ 31:0] cvxif_off_instr_q;
logic cvxif_instruction_valid;
logic [CVA6Cfg.NrIssuePorts-1:0] alu_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] mult_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] fpu_valid_q;
logic [ 1:0] fpu_fmt_q;
logic [ 2:0] fpu_rm_q;
logic [CVA6Cfg.NrIssuePorts-1:0] alu2_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] lsu_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] csr_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] branch_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] cvxif_valid_q;
logic [ 31:0] cvxif_off_instr_q;
logic cvxif_instruction_valid;
//fwd logic
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_has_raw;
logic [CVA6Cfg.NrIssuePorts-1:0] rs2_has_raw;
logic [CVA6Cfg.NrIssuePorts-1:0] rs3_has_raw;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs3;
logic [CVA6Cfg.NrIssuePorts-1:0] rs3_fpr;
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid;
logic [CVA6Cfg.NrIssuePorts-1:0] rs2_valid;
logic [CVA6Cfg.NrIssuePorts-1:0] rs3_valid;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs1_res;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs2_res;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs3_res;
// clobber
fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr, rd_clobber_fpr;
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] gpr_clobber_vld;
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] fpr_clobber_vld;
ariane_pkg::fu_t [ CVA6Cfg.NR_SB_ENTRIES:0] clobber_fu;
//forward logic
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0]
rs1_fwd_req, rs2_fwd_req, rs3_fwd_req;
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_is_not_gpr0, rs2_is_not_gpr0, rs3_is_not_gpr0;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] rs_data;
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_available, rs2_available, rs3_available;
logic [CVA6Cfg.NrIssuePorts-1:0][31:0] tinst_n, tinst_q; // transformed instruction
@ -387,6 +397,228 @@ module issue_read_operands
end
end
// -------------------
// RD clobber process
// -------------------
// rd_clobber output: output currently clobbered destination registers
always_comb begin : clobber_assign
gpr_clobber_vld = '0;
fpr_clobber_vld = '0;
// default (highest entry hast lowest prio in arbiter tree below)
clobber_fu[CVA6Cfg.NR_SB_ENTRIES] = ariane_pkg::NONE;
for (int unsigned i = 0; i < 2 ** ariane_pkg::REG_ADDR_SIZE; i++) begin
gpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1;
fpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1;
end
// check for all valid entries and set the clobber accordingly
for (int unsigned i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin
gpr_clobber_vld[fwd_i.sbe[i].rd][i] = fwd_i.still_issued[i] & ~(CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[i].op));
fpr_clobber_vld[fwd_i.sbe[i].rd][i] = fwd_i.still_issued[i] & (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[i].op));
clobber_fu[i] = fwd_i.sbe[i].fu;
end
// GPR[0] is always free
gpr_clobber_vld[0] = '0;
end
for (genvar k = 0; k < 2 ** ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers
// get fu that is going to clobber this register (there should be only one)
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + 1),
.DataType(ariane_pkg::fu_t),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_gpr_clobbers (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (gpr_clobber_vld[k]),
.gnt_o (),
.data_i (clobber_fu),
.gnt_i (1'b1),
.req_o (),
.data_o (rd_clobber_gpr[k]),
.idx_o ()
);
if (CVA6Cfg.FpPresent) begin
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + 1),
.DataType(ariane_pkg::fu_t),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_fpr_clobbers (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (fpr_clobber_vld[k]),
.gnt_o (),
.data_i (clobber_fu),
.gnt_i (1'b1),
.req_o (),
.data_o (rd_clobber_fpr[k]),
.idx_o ()
);
end else begin
assign rd_clobber_fpr[k] = NONE;
end
end
// ----------------------------------
// Read Operands (a.k.a forwarding)
// ----------------------------------
// read operand interface: same logic as register file
// WB ports have higher prio than entries
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb
assign rs1_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].rs1) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[fwd_i.wb[k].trans_id].op
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
issue_instr_i[i].op
)));
assign rs2_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].rs2) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[fwd_i.wb[k].trans_id].op
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_i[i].op
)));
assign rs3_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[fwd_i.wb[k].trans_id].op
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_i[i].op
)));
assign rs_data[i][k] = fwd_i.wb[k].data;
end
for (genvar k = 0; unsigned'(k) < CVA6Cfg.NR_SB_ENTRIES; k++) begin : gen_rs_entries
assign rs1_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].rs1) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[k].op
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
issue_instr_i[i].op
)));
assign rs2_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].rs2) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[k].op
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_i[i].op
)));
assign rs3_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[k].op
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_i[i].op
)));
assign rs_data[i][k+CVA6Cfg.NrWbPorts] = fwd_i.sbe[k].result;
end
// use fixed prio here
// this implicitly gives higher prio to WB ports
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(CVA6Cfg.XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_rs1 (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (rs1_fwd_req[i]),
.gnt_o (),
.data_i (rs_data[i]),
.gnt_i (1'b1),
.req_o (rs1_available[i]),
.data_o (rs1_res[i]),
.idx_o ()
);
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(CVA6Cfg.XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_rs2 (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (rs2_fwd_req[i]),
.gnt_o (),
.data_i (rs_data[i]),
.gnt_i (1'b1),
.req_o (rs2_available[i]),
.data_o (rs2_res[i]),
.idx_o ()
);
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(CVA6Cfg.XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_rs3 (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (rs3_fwd_req[i]),
.gnt_o (),
.data_i (rs_data[i]),
.gnt_i (1'b1),
.req_o (rs3_available[i]),
.data_o (rs3[i]),
.idx_o ()
);
if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port
assign rs3_res[i] = rs3[i][riscv::XLEN-1:0];
end else begin : gen_fp_three_port
assign rs3_res[i] = rs3[i][CVA6Cfg.FLen-1:0];
end
assign rs1_has_raw[i] = !issue_instr_i[i].use_zimm && ((CVA6Cfg.FpPresent && is_rs1_fpr(
issue_instr_i[i].op
)) ? rd_clobber_fpr[issue_instr_i[i].rs1] != NONE :
rd_clobber_gpr[issue_instr_i[i].rs1] != NONE);
assign rs1_valid[i] = rs1_available[i] && (CVA6Cfg.FpPresent && is_rs1_fpr(
issue_instr_i[i].op
) ? 1'b1 : ((rd_clobber_gpr[issue_instr_i[i].rs1] != CSR) ||
(CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)));
assign rs2_has_raw[i] = ((CVA6Cfg.FpPresent && is_rs2_fpr(
issue_instr_i[i].op
)) ? rd_clobber_fpr[issue_instr_i[i].rs2] != NONE :
rd_clobber_gpr[issue_instr_i[i].rs2] != NONE);
assign rs2_valid[i] = rs2_available[i] && (CVA6Cfg.FpPresent && is_rs2_fpr(
issue_instr_i[i].op
) ? 1'b1 : ((rd_clobber_gpr[issue_instr_i[i].rs2] != CSR) ||
(CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)));
assign rs3_has_raw[i] = ((CVA6Cfg.FpPresent && is_imm_fpr(
issue_instr_i[i].op
)) ? rd_clobber_fpr[issue_instr_i[i].result[REG_ADDR_SIZE-1:0]] != NONE : 0);
assign rs3_valid[i] = rs3_available[i];
assign rs3_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_i[i].op));
end
// ---------------
// Register stage
// ---------------
@ -403,27 +635,8 @@ module issue_read_operands
forward_rs3 = '0; // FPR only
for (int unsigned i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
// poll the scoreboard for those values
rs1_o[i] = issue_instr_i[i].rs1;
rs2_o[i] = issue_instr_i[i].rs2;
rs3_o[i] = issue_instr_i[i].result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field
// 0. check that we are not using the zimm type in RS1
// as this is an immediate we do not have to wait on anything here
// 0.bis check that rs1 is required by coprocessor if not do not wait here
// 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr)
// 2. poll the scoreboard
if (!issue_instr_i[i].use_zimm && ((CVA6Cfg.FpPresent && is_rs1_fpr(
issue_instr_i[i].op
)) ? rd_clobber_fpr_i[issue_instr_i[i].rs1] != NONE :
rd_clobber_gpr_i[issue_instr_i[i].rs1] != NONE)) begin
// check if the clobbering instruction is not a CSR instruction, CSR instructions can only
// be fetched through the register file since they can't be forwarded
// if the operand is available, forward it. CSRs don't write to/from FPR
if (rs1_valid_i[i] && (CVA6Cfg.FpPresent && is_rs1_fpr(
issue_instr_i[i].op
) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i[i].rs1] != CSR) ||
(CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)))) begin
if (rs1_has_raw[i]) begin
if (rs1_valid[i]) begin
forward_rs1[i] = 1'b1;
end else begin // the operand is not available -> stall
stall_raw[i] = 1'b1;
@ -431,15 +644,8 @@ module issue_read_operands
end
end
if (((CVA6Cfg.FpPresent && is_rs2_fpr(
issue_instr_i[i].op
)) ? rd_clobber_fpr_i[issue_instr_i[i].rs2] != NONE :
rd_clobber_gpr_i[issue_instr_i[i].rs2] != NONE)) begin
// if the operand is available, forward it. CSRs don't write to/from FPR
if (rs2_valid_i[i] && (CVA6Cfg.FpPresent && is_rs2_fpr(
issue_instr_i[i].op
) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i[i].rs2] != CSR) ||
(CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)))) begin
if (rs2_has_raw[i]) begin
if (rs2_valid[i]) begin
forward_rs2[i] = 1'b1;
end else begin // the operand is not available -> stall
stall_raw[i] = 1'b1;
@ -447,12 +653,8 @@ module issue_read_operands
end
end
// Only check clobbered gpr for OFFLOADED instruction
if ((CVA6Cfg.FpPresent && is_imm_fpr(
issue_instr_i[i].op
)) ? rd_clobber_fpr_i[issue_instr_i[i].result[REG_ADDR_SIZE-1:0]] != NONE : 0) begin
// if the operand is available, forward it. CSRs don't write to/from FPR so no need to check
if (rs3_valid_i[i]) begin
if (rs3_has_raw[i] && rs3_fpr[i]) begin
if (rs3_valid[i]) begin
forward_rs3[i] = 1'b1;
end else begin // the operand is not available -> stall
stall_raw[i] = 1'b1;
@ -460,6 +662,7 @@ module issue_read_operands
end
end
end
if (CVA6Cfg.CvxifEn) begin
// Remove unecessary forward and stall in case source register is not needed by coprocessor.
if (x_issue_valid_o && x_issue_resp_i.accept) begin
@ -511,9 +714,9 @@ module issue_read_operands
// third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3
if (OPERANDS_PER_INSTR == 3) begin : gen_gp_rs3
assign imm_forward_rs3 = rs3_i[0];
assign imm_forward_rs3 = rs3_res[0];
end else begin : gen_fp_rs3
assign imm_forward_rs3 = {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, rs3_i[0]};
assign imm_forward_rs3 = {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, rs3_res[0]};
end
// Forwarding/Output MUX
@ -542,10 +745,10 @@ module issue_read_operands
// or should we forward
if (forward_rs1[i]) begin
fu_data_n[i].operand_a = rs1_i[i];
fu_data_n[i].operand_a = rs1_res[i];
end
if (forward_rs2[i]) begin
fu_data_n[i].operand_b = rs2_i[i];
fu_data_n[i].operand_b = rs2_res[i];
end
if ((CVA6Cfg.FpPresent || (CVA6Cfg.CvxifEn && OPERANDS_PER_INSTR == 3)) && forward_rs3[i]) begin
fu_data_n[i].imm = imm_forward_rs3;
@ -676,7 +879,6 @@ module issue_read_operands
end
end
always_comb begin : gen_check_waw_dependencies
stall_waw = '1;
for (int unsigned i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
@ -687,8 +889,8 @@ module issue_read_operands
// no other instruction has the same destination register -> issue the instruction
if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
issue_instr_i[i].op
)) ? (rd_clobber_fpr_i[issue_instr_i[i].rd] == NONE) :
(rd_clobber_gpr_i[issue_instr_i[i].rd] == NONE)) begin
)) ? (rd_clobber_fpr[issue_instr_i[i].rd] == NONE) :
(rd_clobber_gpr[issue_instr_i[i].rd] == NONE)) begin
stall_waw[i] = 1'b0;
end
// or check that the target destination register will be written in this cycle by the
@ -709,6 +911,8 @@ module issue_read_operands
end
end
end
// We can issue an instruction if we do not detect that any other instruction is writing the same
// destination register.
// We also need to check if there is an unresolved branch in the scoreboard.

View file

@ -23,6 +23,7 @@ module issue_stage
parameter type exception_t = logic,
parameter type fu_data_t = logic,
parameter type scoreboard_entry_t = logic,
parameter type writeback_t = logic,
parameter type x_issue_req_t = logic,
parameter type x_issue_resp_t = logic,
parameter type x_register_t = logic,
@ -155,29 +156,21 @@ module issue_stage
// Scoreboard (SB) <-> Issue and Read Operands (IRO)
// ---------------------------------------------------
typedef logic [(CVA6Cfg.NrRgprPorts == 3 ? CVA6Cfg.XLEN : CVA6Cfg.FLen)-1:0] rs3_len_t;
typedef struct packed {
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] still_issued;
logic [CVA6Cfg.TRANS_ID_BITS-1:0] issue_pointer;
writeback_t [CVA6Cfg.NrWbPorts-1:0] wb;
scoreboard_entry_t [CVA6Cfg.NR_SB_ENTRIES-1:0] sbe;
} forwarding_t;
fu_t [ 2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro;
fu_t [ 2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro;
forwarding_t fwd;
scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_instr_sb_iro;
logic [CVA6Cfg.NrIssuePorts-1:0][ 31:0] orig_instr_sb_iro;
logic [CVA6Cfg.NrIssuePorts-1:0] issue_instr_valid_sb_iro;
logic [CVA6Cfg.NrIssuePorts-1:0] issue_ack_iro_sb;
logic [CVA6Cfg.NrIssuePorts-1:0][REG_ADDR_SIZE-1:0] rs1_iro_sb;
logic [CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs1_sb_iro;
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid_sb_iro;
logic [CVA6Cfg.NrIssuePorts-1:0][REG_ADDR_SIZE-1:0] rs2_iro_sb;
logic [CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs2_sb_iro;
logic [CVA6Cfg.NrIssuePorts-1:0] rs2_valid_iro_sb;
logic [CVA6Cfg.NrIssuePorts-1:0][REG_ADDR_SIZE-1:0] rs3_iro_sb;
rs3_len_t [CVA6Cfg.NrIssuePorts-1:0] rs3_sb_iro;
logic [CVA6Cfg.NrIssuePorts-1:0] rs3_valid_iro_sb;
scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_instr_sb_iro;
logic [CVA6Cfg.NrIssuePorts-1:0][ 31:0] orig_instr_sb_iro;
logic [CVA6Cfg.NrIssuePorts-1:0] issue_instr_valid_sb_iro;
logic [CVA6Cfg.NrIssuePorts-1:0] issue_ack_iro_sb;
logic [CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs1_forwarding_xlen;
logic [CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs2_forwarding_xlen;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs1_forwarding_xlen;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs2_forwarding_xlen;
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
assign rs1_forwarding_o[i] = rs1_forwarding_xlen[i][CVA6Cfg.VLEN-1:0];
@ -190,7 +183,6 @@ module issue_stage
logic x_transaction_accepted_iro_sb, x_issue_writeback_iro_sb;
logic [CVA6Cfg.TRANS_ID_BITS-1:0] x_id_iro_sb;
// ---------------------------------------------------------
// 2. Manage instructions in a scoreboard
// ---------------------------------------------------------
@ -198,32 +190,23 @@ module issue_stage
.CVA6Cfg (CVA6Cfg),
.rs3_len_t (rs3_len_t),
.bp_resolve_t(bp_resolve_t),
.writeback_t(writeback_t),
.forwarding_t(forwarding_t),
.exception_t(exception_t),
.scoreboard_entry_t(scoreboard_entry_t)
) i_scoreboard (
.sb_full_o (sb_full_o),
.rd_clobber_gpr_o (rd_clobber_gpr_sb_iro),
.rd_clobber_fpr_o (rd_clobber_fpr_sb_iro),
.x_transaction_accepted_i(x_transaction_accepted_iro_sb),
.x_issue_writeback_i (x_issue_writeback_iro_sb),
.x_id_i (x_id_iro_sb),
.rs1_i (rs1_iro_sb),
.rs1_o (rs1_sb_iro),
.rs1_valid_o (rs1_valid_sb_iro),
.rs2_i (rs2_iro_sb),
.rs2_o (rs2_sb_iro),
.rs2_valid_o (rs2_valid_iro_sb),
.rs3_i (rs3_iro_sb),
.rs3_o (rs3_sb_iro),
.rs3_valid_o (rs3_valid_iro_sb),
.decoded_instr_i (decoded_instr_i),
.decoded_instr_valid_i(decoded_instr_valid_i),
.decoded_instr_ack_o (decoded_instr_ack_o),
.issue_instr_o (issue_instr_sb_iro),
.orig_instr_o (orig_instr_sb_iro),
.issue_instr_valid_o (issue_instr_valid_sb_iro),
.issue_ack_i (issue_ack_iro_sb),
.fwd_o (fwd),
.decoded_instr_i (decoded_instr_i),
.decoded_instr_valid_i (decoded_instr_valid_i),
.decoded_instr_ack_o (decoded_instr_ack_o),
.issue_instr_o (issue_instr_sb_iro),
.orig_instr_o (orig_instr_sb_iro),
.issue_instr_valid_o (issue_instr_valid_sb_iro),
.issue_ack_i (issue_ack_iro_sb),
.resolved_branch_i(resolved_branch_i),
.trans_id_i (trans_id_i),
@ -241,6 +224,8 @@ module issue_stage
.fu_data_t(fu_data_t),
.scoreboard_entry_t(scoreboard_entry_t),
.rs3_len_t(rs3_len_t),
.writeback_t(writeback_t),
.forwarding_t(forwarding_t),
.x_issue_req_t(x_issue_req_t),
.x_issue_resp_t(x_issue_resp_t),
.x_register_t(x_register_t),
@ -253,17 +238,7 @@ module issue_stage
.issue_ack_o (issue_ack_iro_sb),
.fu_data_o (fu_data_o),
.flu_ready_i (flu_ready_i),
.rs1_o (rs1_iro_sb),
.rs1_i (rs1_sb_iro),
.rs1_valid_i (rs1_valid_sb_iro),
.rs2_o (rs2_iro_sb),
.rs2_i (rs2_sb_iro),
.rs2_valid_i (rs2_valid_iro_sb),
.rs3_o (rs3_iro_sb),
.rs3_i (rs3_sb_iro),
.rs3_valid_i (rs3_valid_iro_sb),
.rd_clobber_gpr_i (rd_clobber_gpr_sb_iro),
.rd_clobber_fpr_i (rd_clobber_fpr_sb_iro),
.fwd_i (fwd),
.alu_valid_o (alu_valid_o),
.alu2_valid_o (alu2_valid_o),
.branch_valid_o (branch_valid_o),

View file

@ -17,47 +17,24 @@ module scoreboard #(
parameter type bp_resolve_t = logic,
parameter type exception_t = logic,
parameter type scoreboard_entry_t = logic,
parameter type forwarding_t = logic,
parameter type writeback_t = logic,
parameter type rs3_len_t = logic
) (
// Subsystem Clock - SUBSYSTEM
input logic clk_i,
input logic clk_i,
// Asynchronous reset active low - SUBSYSTEM
input logic rst_ni,
input logic rst_ni,
// TO_BE_COMPLETED - TO_BE_COMPLETED
output logic sb_full_o,
output logic sb_full_o,
// Flush only un-issued instructions - TO_BE_COMPLETED
input logic flush_unissued_instr_i,
input logic flush_unissued_instr_i,
// Flush whole scoreboard - TO_BE_COMPLETED
input logic flush_i,
// TO_BE_COMPLETED - TO_BE_COMPLETED
output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr_o,
// TO_BE_COMPLETED - TO_BE_COMPLETED
output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_fpr_o,
input logic flush_i,
// Writeback Handling of CVXIF
input logic x_transaction_accepted_i,
input logic x_issue_writeback_i,
input logic [CVA6Cfg.TRANS_ID_BITS-1:0] x_id_i,
// rs1 operand address - issue_read_operands
input logic [CVA6Cfg.NrIssuePorts-1:0][ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i,
// rs1 operand - issue_read_operands
output logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs1_o,
// rs1 operand is valid - issue_read_operands
output logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid_o,
// rs2 operand address - issue_read_operands
input logic [CVA6Cfg.NrIssuePorts-1:0][ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i,
// rs2 operand - issue_read_operands
output logic [CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs2_o,
// rs2 operand is valid - issue_read_operands
output logic [CVA6Cfg.NrIssuePorts-1:0] rs2_valid_o,
// rs3 operand address - issue_read_operands
input logic [CVA6Cfg.NrIssuePorts-1:0][ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i,
// rs3 operand - issue_read_operands
output rs3_len_t [CVA6Cfg.NrIssuePorts-1:0] rs3_o,
// rs3 operand is valid - issue_read_operands
output logic [CVA6Cfg.NrIssuePorts-1:0] rs3_valid_o,
input logic x_transaction_accepted_i,
input logic x_issue_writeback_i,
input logic [CVA6Cfg.TRANS_ID_BITS-1:0] x_id_i,
// advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer
// TO_BE_COMPLETED - TO_BE_COMPLETED
output scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o,
@ -86,6 +63,8 @@ module scoreboard #(
output logic [CVA6Cfg.NrIssuePorts-1:0] issue_instr_valid_o,
// TO_BE_COMPLETED - TO_BE_COMPLETED
input logic [CVA6Cfg.NrIssuePorts-1:0] issue_ack_i,
// Forwarding - issue_read_operands
output forwarding_t fwd_o,
// TO_BE_COMPLETED - TO_BE_COMPLETED
input bp_resolve_t resolved_branch_i,
@ -302,194 +281,22 @@ module scoreboard #(
assign commit_pointer_n[k] = (flush_i) ? '0 : commit_pointer_n[0] + unsigned'(k);
end
// -------------------
// RD clobber process
// -------------------
// rd_clobber output: output currently clobbered destination registers
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] gpr_clobber_vld;
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] fpr_clobber_vld;
ariane_pkg::fu_t [ CVA6Cfg.NR_SB_ENTRIES:0] clobber_fu;
always_comb begin : clobber_assign
gpr_clobber_vld = '0;
fpr_clobber_vld = '0;
// default (highest entry hast lowest prio in arbiter tree below)
clobber_fu[CVA6Cfg.NR_SB_ENTRIES] = ariane_pkg::NONE;
for (int unsigned i = 0; i < 2 ** ariane_pkg::REG_ADDR_SIZE; i++) begin
gpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1;
fpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1;
end
// check for all valid entries and set the clobber accordingly
for (int unsigned i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin
gpr_clobber_vld[mem_q[i].sbe.rd][i] = still_issued[i] & ~mem_q[i].is_rd_fpr_flag;
fpr_clobber_vld[mem_q[i].sbe.rd][i] = still_issued[i] & mem_q[i].is_rd_fpr_flag;
clobber_fu[i] = mem_q[i].sbe.fu;
end
// GPR[0] is always free
gpr_clobber_vld[0] = '0;
// Forwarding logic
writeback_t [CVA6Cfg.NrWbPorts-1:0] wb;
for (genvar i = 0; i < CVA6Cfg.NrWbPorts; i++) begin
assign wb[i].valid = wt_valid_i[i];
assign wb[i].data = wbdata_i[i];
assign wb[i].ex_valid = ex_i[i].valid;
assign wb[i].trans_id = trans_id_i[i];
end
for (genvar k = 0; k < 2 ** ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers
// get fu that is going to clobber this register (there should be only one)
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + 1),
.DataType(ariane_pkg::fu_t),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_gpr_clobbers (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (gpr_clobber_vld[k]),
.gnt_o (),
.data_i (clobber_fu),
.gnt_i (1'b1),
.req_o (),
.data_o (rd_clobber_gpr_o[k]),
.idx_o ()
);
if (CVA6Cfg.FpPresent) begin
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + 1),
.DataType(ariane_pkg::fu_t),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_fpr_clobbers (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (fpr_clobber_vld[k]),
.gnt_o (),
.data_i (clobber_fu),
.gnt_i (1'b1),
.req_o (),
.data_o (rd_clobber_fpr_o[k]),
.idx_o ()
);
end
assign fwd_o.still_issued = still_issued;
assign fwd_o.issue_pointer = issue_pointer;
assign fwd_o.wb = wb;
for (genvar i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin
assign fwd_o.sbe[i] = mem_q[i].sbe;
end
// ----------------------------------
// Read Operands (a.k.a forwarding)
// ----------------------------------
// read operand interface: same logic as register file
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0]
rs1_fwd_req, rs2_fwd_req, rs3_fwd_req;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] rs_data;
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid, rs2_valid, rs3_valid;
// WB ports have higher prio than entries
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb
assign rs1_fwd_req[i][k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i[i]) & (~mem_q[trans_id_i[k]].cancelled) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
issue_instr_o[i].op
)));
assign rs2_fwd_req[i][k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i[i]) & (~mem_q[trans_id_i[k]].cancelled) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_o[i].op
)));
assign rs3_fwd_req[i][k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i[i]) & (~mem_q[trans_id_i[k]].cancelled) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_o[i].op
)));
assign rs_data[i][k] = wbdata_i[k];
end
for (genvar k = 0; unsigned'(k) < CVA6Cfg.NR_SB_ENTRIES; k++) begin : gen_rs_entries
assign rs1_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs1_i[i]) & still_issued[k] & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
issue_instr_o[i].op
)));
assign rs2_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs2_i[i]) & still_issued[k] & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_o[i].op
)));
assign rs3_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs3_i[i]) & still_issued[k] & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_o[i].op
)));
assign rs_data[i][k+CVA6Cfg.NrWbPorts] = mem_q[k].sbe.result;
end
// check whether we are accessing GPR[0]
assign rs1_valid_o[i] = rs1_valid[i] & ((|rs1_i[i]) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
issue_instr_o[i].op
)));
assign rs2_valid_o[i] = rs2_valid[i] & ((|rs2_i[i]) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_o[i].op
)));
assign rs3_valid_o[i] = CVA6Cfg.NrRgprPorts == 3 ? rs3_valid[i] & ((|rs3_i[i]) | (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_o[i].op
))) : rs3_valid[i];
// use fixed prio here
// this implicitly gives higher prio to WB ports
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(CVA6Cfg.XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_rs1 (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (rs1_fwd_req[i]),
.gnt_o (),
.data_i (rs_data[i]),
.gnt_i (1'b1),
.req_o (rs1_valid[i]),
.data_o (rs1_o[i]),
.idx_o ()
);
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(CVA6Cfg.XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_rs2 (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (rs2_fwd_req[i]),
.gnt_o (),
.data_i (rs_data[i]),
.gnt_i (1'b1),
.req_o (rs2_valid[i]),
.data_o (rs2_o[i]),
.idx_o ()
);
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs3;
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(CVA6Cfg.XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_rs3 (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (rs3_fwd_req[i]),
.gnt_o (),
.data_i (rs_data[i]),
.gnt_i (1'b1),
.req_o (rs3_valid[i]),
.data_o (rs3[i]),
.idx_o ()
);
if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port
assign rs3_o[i] = rs3[i][riscv::XLEN-1:0];
end else begin : gen_fp_three_port
assign rs3_o[i] = rs3[i][CVA6Cfg.FLen-1:0];
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin : regs
if (!rst_ni) begin
@ -513,10 +320,6 @@ module scoreboard #(
assert (CVA6Cfg.NR_SB_ENTRIES == 2 ** CVA6Cfg.TRANS_ID_BITS)
else $fatal(1, "Scoreboard size needs to be a power of two.");
end
// assert that zero is never set
assert property (@(posedge clk_i) disable iff (!rst_ni) (rd_clobber_gpr_o[0] == ariane_pkg::NONE))
else $fatal(1, "RD 0 should not bet set");
// assert that we never acknowledge a commit if the instruction is not valid
assert property (
@(posedge clk_i) disable iff (!rst_ni) commit_ack_i[0] |-> commit_instr_o[0].valid)