Separate RAW and WAW process to fix CVXIF with Superscalar (#2395)

This commit is contained in:
Guillaume Chauvon 2024-07-26 14:58:18 +02:00 committed by GitHub
parent 96b0508525
commit 211af02e5e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 194 additions and 177 deletions

View file

@ -7,9 +7,8 @@
//
// Original Author: Guillaume Chauvon
module compressed_instr_decoder
import cvxif_instr_pkg::*;
#(
module compressed_instr_decoder #(
parameter type copro_compressed_resp_t = logic,
parameter int NbInstr = 1,
parameter copro_compressed_resp_t CoproInstr [NbInstr] = {0},
parameter type x_compressed_req_t = logic,

View file

@ -71,6 +71,7 @@ module cvxif_example_coprocessor
assign register_valid = cvxif_req_i.register_valid;
compressed_instr_decoder #(
.copro_compressed_resp_t(cvxif_instr_pkg::copro_compressed_resp_t),
.NbInstr(cvxif_instr_pkg::NbCompInstr),
.CoproInstr(cvxif_instr_pkg::CoproCompInstr),
.x_compressed_req_t(x_compressed_req_t),
@ -85,6 +86,8 @@ module cvxif_example_coprocessor
);
instr_decoder #(
.copro_issue_resp_t (cvxif_instr_pkg::copro_issue_resp_t),
.opcode_t (cvxif_instr_pkg::opcode_t),
.NbInstr (cvxif_instr_pkg::NbInstr),
.CoproInstr(cvxif_instr_pkg::CoproInstr),
.NrRgprPorts(NrRgprPorts),

View file

@ -7,9 +7,9 @@
//
// Original Author: Guillaume Chauvon
module instr_decoder
import cvxif_instr_pkg::*;
#(
module instr_decoder #(
parameter type copro_issue_resp_t = logic,
parameter type opcode_t = logic,
parameter int NbInstr = 1,
parameter copro_issue_resp_t CoproInstr [NbInstr] = {0},
parameter int unsigned NrRgprPorts = 2,
@ -53,7 +53,7 @@ module instr_decoder
issue_resp_o.writeback = '0;
issue_resp_o.register_read = '0;
registers_o = '0;
opcode_o = ILLEGAL;
opcode_o = opcode_t'(0); // == ILLEGAL see cvxif_instr_pkg.sv
hartid_o = '0;
id_o = '0;
rd_o = '0;

View file

@ -18,7 +18,7 @@ module cvxif_issue_register_commit_if_driver #(
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic [ CVA6Cfg.XLEN-1:0] hart_id_i,
input logic [CVA6Cfg.XLEN-1:0] hart_id_i,
// CVXIF Issue interface
input logic issue_ready_i,
input x_issue_resp_t issue_resp_i,
@ -33,10 +33,10 @@ module cvxif_issue_register_commit_if_driver #(
output x_commit_t commit_o,
// IRO in/out
input logic valid_i,
input logic [ 31:0] x_off_instr_i,
input logic [31:0] x_off_instr_i,
input logic [CVA6Cfg.TRANS_ID_BITS-1:0] x_trans_id_i,
input logic [ CVA6Cfg.NrRgprPorts-1:0][CVA6Cfg.XLEN-1:0] register_i,
input logic [ CVA6Cfg.NrRgprPorts-1:0] rs_valid_i,
input [(CVA6Cfg.NrRgprPorts/CVA6Cfg.NrIssuePorts)-1:0][CVA6Cfg.XLEN-1:0] register_i,
input logic [(CVA6Cfg.NrRgprPorts/CVA6Cfg.NrIssuePorts)-1:0] rs_valid_i,
output logic cvxif_busy_o
);
// X_ISSUE_REGISTER_SPLIT = 0 : Issue and register transactions are synchrone

View file

@ -167,7 +167,7 @@ package build_config_pkg;
cfg.VpnLen = VpnLen;
cfg.PtLevels = PtLevels;
cfg.X_NUM_RS = cfg.NrRgprPorts;
cfg.X_NUM_RS = cfg.NrRgprPorts / cfg.NrIssuePorts;
cfg.X_ID_WIDTH = cfg.TRANS_ID_BITS;
cfg.X_RFR_WIDTH = cfg.XLEN;
cfg.X_RFW_WIDTH = cfg.XLEN;

View file

@ -149,9 +149,10 @@ module issue_read_operands
logic none, load, store, alu, alu2, ctrl_flow, mult, csr, fpu, fpu_vec, cvxif, accel;
} fus_busy_t;
logic [CVA6Cfg.NrIssuePorts-1:0] stall, stall_rs1, stall_rs2, stall_rs3;
logic [CVA6Cfg.NrIssuePorts-1:0] stall_raw, stall_waw, stall_rs1, stall_rs2, stall_rs3;
logic [CVA6Cfg.NrIssuePorts-1:0] fu_busy; // functional unit is busy
fus_busy_t [CVA6Cfg.NrIssuePorts-1:0] fus_busy; // which functional units are considered busy
logic [CVA6Cfg.NrIssuePorts-1:0] issue_ack;
// operands coming from regfile
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] operand_a_regfile, operand_b_regfile;
// third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3
@ -185,10 +186,10 @@ module issue_read_operands
assign orig_instr = riscv::instruction_t'(orig_instr_i[0]);
// CVXIF Signals
logic cvxif_busy;
logic cvxif_req_allowed;
logic x_transaction_rejected;
logic [CVA6Cfg.NrRgprPorts-1:0] rs_valid;
logic [CVA6Cfg.NrRgprPorts-1:0][CVA6Cfg.XLEN-1:0] rs;
logic [OPERANDS_PER_INSTR-1:0] rs_valid;
logic [OPERANDS_PER_INSTR-1:0][CVA6Cfg.XLEN-1:0] rs;
cvxif_issue_register_commit_if_driver #(
.CVA6Cfg (CVA6Cfg),
@ -215,7 +216,7 @@ module issue_read_operands
.x_trans_id_i (issue_instr_i[0].trans_id),
.register_i (rs),
.rs_valid_i (rs_valid),
.cvxif_busy_o (cvxif_busy)
.cvxif_busy_o ()
);
if (OPERANDS_PER_INSTR == 3) begin
assign rs_valid = {~stall_rs3[0], ~stall_rs2[0], ~stall_rs1[0]};
@ -226,7 +227,9 @@ module issue_read_operands
end
// TODO check only for 1st instruction ??
assign cvxif_instruction_valid = (!issue_instr_i[0].ex.valid && issue_instr_valid_i[0] && (issue_instr_i[0].fu == CVXIF));
// Allow a cvxif transaction if we WaW condition are ok.
assign cvxif_req_allowed = (issue_instr_i[0].fu == CVXIF) && !stall_waw[0];
assign cvxif_instruction_valid = !issue_instr_i[0].ex.valid && issue_instr_valid_i[0] && cvxif_req_allowed;
assign x_transaction_accepted_o = x_issue_valid_o && x_issue_ready_i && x_issue_resp_i.accept;
assign x_transaction_rejected = x_issue_valid_o && x_issue_ready_i && ~x_issue_resp_i.accept;
assign x_issue_writeback_o = x_issue_resp_i.writeback;
@ -251,7 +254,7 @@ module issue_read_operands
assign alu2_valid_o = alu2_valid_q;
assign cvxif_valid_o = CVA6Cfg.CvxifEn ? cvxif_valid_q : '0;
assign cvxif_off_instr_o = CVA6Cfg.CvxifEn ? cvxif_off_instr_q : '0;
assign stall_issue_o = stall[0];
assign stall_issue_o = stall_raw[0];
assign tinst_o = CVA6Cfg.RVH ? tinst_q : '0;
// ---------------
// Issue Stage
@ -259,7 +262,10 @@ module issue_read_operands
always_comb begin : structural_hazards
fus_busy = '0;
// CVXIF is always ready to try a new transaction on 1st issue port
// If a transaction is already pending then we stall until the transaction is done.(issue_ack_o[0] = 0)
// Since we can not have two CVXIF instruction on 1st issue port, CVXIF is always ready for the pending instruction.
fus_busy[0].cvxif = 1'b0;
if (!flu_ready_i) begin
fus_busy[0].alu = 1'b1;
fus_busy[0].ctrl_flow = 1'b1;
@ -286,15 +292,13 @@ module issue_read_operands
fus_busy[0].store = 1'b1;
end
if (cvxif_busy) begin
fus_busy[0].cvxif = 1'b1;
end
if (CVA6Cfg.SuperscalarEn) begin
fus_busy[1] = fus_busy[0];
// Never issue CSR instruction on second issue port.
fus_busy[1].csr = 1'b1;
// Never issue CVXIF instruction on second issue port.
fus_busy[1].cvxif = 1'b1;
unique case (issue_instr_i[0].fu)
NONE: fus_busy[1].none = 1'b1;
@ -346,7 +350,7 @@ module issue_read_operands
fus_busy[1].load = 1'b1;
fus_busy[1].store = 1'b1;
end
CVXIF: fus_busy[1].cvxif = 1'b1;
CVXIF: ;
endcase
end
end
@ -390,7 +394,7 @@ module issue_read_operands
// check that all operands are available, otherwise stall
// forward corresponding register
always_comb begin : operands_available
stall = '{default: stall_i};
stall_raw = '{default: stall_i};
stall_rs1 = '{default: stall_i};
stall_rs2 = '{default: stall_i};
stall_rs3 = '{default: stall_i};
@ -426,7 +430,7 @@ module issue_read_operands
(CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)))) begin
forward_rs1[i] = 1'b1;
end else begin // the operand is not available -> stall
stall[i] = 1'b1;
stall_raw[i] = 1'b1;
stall_rs1[i] = 1'b1;
end
end
@ -445,7 +449,7 @@ module issue_read_operands
(CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)))) begin
forward_rs2[i] = 1'b1;
end else begin // the operand is not available -> stall
stall[i] = 1'b1;
stall_raw[i] = 1'b1;
stall_rs2[i] = 1'b1;
end
end
@ -456,12 +460,12 @@ module issue_read_operands
)) ? rd_clobber_fpr_i[issue_instr_i[i].result[REG_ADDR_SIZE-1:0]] != NONE : 0) ||
((CVA6Cfg.CvxifEn && OPERANDS_PER_INSTR == 3 &&
x_issue_valid_o && x_issue_resp_i.accept && x_issue_resp_i.register_read[2]) &&
rd_clobber_gpr_i[issue_instr_i[i].result] != NONE)) begin
rd_clobber_gpr_i[issue_instr_i[i].result[REG_ADDR_SIZE-1:0]] != NONE)) begin
// if the operand is available, forward it. CSRs don't write to/from FPR so no need to check
if (rs3_valid_i[i]) begin
forward_rs3[i] = 1'b1;
end else begin // the operand is not available -> stall
stall[i] = 1'b1;
stall_raw[i] = 1'b1;
stall_rs3[i] = 1'b1;
end
end
@ -473,7 +477,7 @@ module issue_read_operands
) == is_rd_fpr(
issue_instr_i[0].op
))) && issue_instr_i[1].rs1 == issue_instr_i[0].rd && issue_instr_i[1].rs1 != '0) begin
stall[1] = 1'b1;
stall_raw[1] = 1'b1;
end
if ((!CVA6Cfg.FpPresent || (is_rs2_fpr(
@ -481,7 +485,7 @@ module issue_read_operands
) == is_rd_fpr(
issue_instr_i[0].op
))) && issue_instr_i[1].rs2 == issue_instr_i[0].rd && issue_instr_i[1].rs2 != '0) begin
stall[1] = 1'b1;
stall_raw[1] = 1'b1;
end
// Only check clobbered gpr for OFFLOADED instruction
@ -492,7 +496,7 @@ module issue_read_operands
) && issue_instr_i[0].rd == issue_instr_i[1].result[REG_ADDR_SIZE-1:0] :
issue_instr_i[1].op == OFFLOAD && OPERANDS_PER_INSTR == 3 ?
issue_instr_i[0].rd == issue_instr_i[1].result[REG_ADDR_SIZE-1:0] : 1'b0) begin
stall[1] = 1'b1;
stall_raw[1] = 1'b1;
end
end
end
@ -664,18 +668,11 @@ module issue_read_operands
end
end
// We can issue an instruction if we do not detect that any other instruction is writing the same
// destination register.
// We also need to check if there is an unresolved branch in the scoreboard.
always_comb begin : issue_scoreboard
always_comb begin : gen_check_waw_dependencies
stall_waw = '1;
for (int unsigned i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
// default assignment
issue_ack_o[i] = 1'b0;
// check that we didn't stall, that the instruction we got is valid
// and that the functional unit we need is not busy
if (issue_instr_valid_i[i] && !fu_busy[i]) begin
// check that the corresponding functional unit is not busy
if (!stall[i]) begin
// -----------------------------------------
// WAW - Write After Write Dependency Check
// -----------------------------------------
@ -684,7 +681,7 @@ module issue_read_operands
issue_instr_i[i].op
)) ? (rd_clobber_fpr_i[issue_instr_i[i].rd] == NONE) :
(rd_clobber_gpr_i[issue_instr_i[i].rd] == NONE)) begin
issue_ack_o[i] = 1'b1;
stall_waw[i] = 1'b0;
end
// or check that the target destination register will be written in this cycle by the
// commit stage
@ -693,38 +690,55 @@ module issue_read_operands
issue_instr_i[i].op
)) ? (we_fpr_i[c] && waddr_i[c] == issue_instr_i[i].rd[4:0]) :
(we_gpr_i[c] && waddr_i[c] == issue_instr_i[i].rd[4:0])) begin
issue_ack_o[i] = 1'b1;
stall_waw[i] = 1'b0;
end
end
if (i > 0) begin
if ((issue_instr_i[i].rd[4:0] == issue_instr_i[i-1].rd[4:0]) && (issue_instr_i[i].rd[4:0] != '0)) begin
issue_ack_o[i] = 1'b0;
stall_waw[i] = 1'b1;
end
end
end
end
end
// We can issue an instruction if we do not detect that any other instruction is writing the same
// destination register.
// We also need to check if there is an unresolved branch in the scoreboard.
always_comb begin : issue_scoreboard
for (int unsigned i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
// default assignment
issue_ack[i] = 1'b0;
// check that the instruction we got is valid
// and that the functional unit we need is not busy
if (issue_instr_valid_i[i] && !fu_busy[i]) begin
if (!stall_raw[i] && !stall_waw[i]) begin
issue_ack[i] = 1'b1;
end
// we can also issue the instruction under the following two circumstances:
// we can do this even if we are stalled or no functional unit is ready (as we don't need one)
// the decoder needs to make sure that the instruction is marked as valid when it does not
// need any functional unit or if an exception occurred previous to the execute stage.
// 1. we already got an exception
if (issue_instr_i[i].ex.valid) begin
issue_ack_o[i] = 1'b1;
issue_ack[i] = 1'b1;
end
// 2. it is an instruction which does not need any functional unit
if (issue_instr_i[i].fu == NONE) begin
issue_ack_o[i] = 1'b1;
end
if (issue_instr_i[i].fu == CVXIF) begin
issue_ack_o[i] = (x_transaction_accepted_o || x_transaction_rejected);
issue_ack[i] = 1'b1;
end
end
end
if (CVA6Cfg.SuperscalarEn) begin
if (!issue_ack_o[0]) begin
issue_ack_o[1] = 1'b0;
if (!issue_ack[0]) begin
issue_ack[1] = 1'b0;
end
end
issue_ack_o = issue_ack;
// Do not acknoledge the issued instruction if transaction is not completed.
if (issue_instr_i[0].fu == CVXIF && !(x_transaction_accepted_o || x_transaction_rejected)) begin
issue_ack_o[0] = 1'b0;
end
end
// ----------------------

View file

@ -3,9 +3,9 @@
#
# This file has been generated by SpyGlass:
# Report Name : summary
# Report Created by: akassimi
# Report Created on: Tue Jul 16 15:53:46 2024
# Working Directory: /home/akassimi/rhel8/cva6_synthesis/cva6/spyglass
# Report Created by: runner_riscv-public
# Report Created on: Fri Jul 26 00:36:54 2024
# Working Directory: /gitlab-runner/runner_riscv-public/builds/yD5zmwgi3/0/riscv-ci/cva6/spyglass
# SpyGlass Version : SpyGlass_vS-2021.09-SP2-3
# Policy Name : SpyGlass(SpyGlass_vS-2021.09-SP2-03)
# erc(SpyGlass_vS-2021.09-SP2-03)
@ -17,9 +17,9 @@
# starc(SpyGlass_vS-2021.09-SP2-03)
# starc2005(SpyGlass_vS-2021.09-SP2-03)
#
# Total Number of Generated Messages : 1501
# Total Number of Generated Messages : 1521
# Number of Waived Messages : 2
# Number of Reported Messages : 1499
# Number of Reported Messages : 1519
# Number of Overlimit Messages : 0
#
#
@ -62,7 +62,7 @@ WARNING SYNTH_12608 1 The logic of the always block
Block
WARNING SYNTH_12611 2 Property blocks will be ignored for
synthesis
WARNING SYNTH_5064 37 Non-synthesizable statements are
WARNING SYNTH_5064 38 Non-synthesizable statements are
ignored for synthesis.
WARNING SYNTH_5143 11 Initial block is ignored for synthesis
WARNING SYNTH_89 4 Initial Assignment at Declaration is
@ -71,6 +71,7 @@ WARNING WRN_1024 3 Signed argument is passed to $signed
system function call, or unsigned
argument passed to $unsigned system
function call.
WARNING WRN_27 1 Bit-select should not be out-of-range.
INFO DetectTopDesignUnits 1 Identify the top-level design units in
user design.
INFO ElabSummary 1 Generates Elaborated design units
@ -112,17 +113,17 @@ WARNING STARC05-2.2.3.3 14 Do not assign over the same signal in
circuits
WARNING W224 1 Multi-bit expression found when one-bit
expression expected
WARNING W240 323 An input has been declared but is not
WARNING W240 322 An input has been declared but is not
read
WARNING W263 4 A case expression width does not match
case select expression width
WARNING W287b 32 Output port of an instance is not
WARNING W287b 36 Output port of an instance is not
connected
WARNING W415a 526 Signal may be multiply assigned (beside
WARNING W415a 537 Signal may be multiply assigned (beside
initialization) in the same scope.
WARNING W480 3 Loop index is not of type integer
WARNING W486 2 Shift overflow - some bits may be lost
WARNING W528 483 A signal or variable is set but never
WARNING W528 487 A signal or variable is set but never
read
INFO W240 1 An input has been declared but is not
read