Altera opt 3 (#2613)
Some checks are pending
bender-up-to-date / bender-up-to-date (push) Waiting to run
ci / build-riscv-tests (push) Waiting to run
ci / execute-riscv64-tests (push) Blocked by required conditions
ci / execute-riscv32-tests (push) Blocked by required conditions

The third optimization for Altera FPGA is to move the register file to LUTRAM. Same as before, the reason why the optimization previously done for Xilinx is not working, is that in that case asynchronous RAM primitives are used, and Altera does not support asynchronous RAM. Therefore, this optimization consists in using synchronous RAM for the register file.

The main changes to the existing code are:

Changes in ariane_regfile_fpga.sv file: The idea is the same as before, since synchronous RAM takes one clock cycle to read, we need to store the data when it is written, in case it is read right after. For this there is an auxiliary register that stores the last written data. On the read side, we need to identify if the data to be read is available in the RAM or if it is still in the auxiliary register (read after write). To compensate for the synchronous RAM delay the address is advanced one clock cycle. In this case there is a multiplexer in the output to select the block from where data is read, here we need to keep the read address for one clock cycle to select the right block when data is available.

Changes in issue_read_operands.sv file: adjust address to read from register file (when synchronous RAM is used reads take one cycle, so we advance the address). Since this address is an input, we need a new input port that brings the address in advance “issue_instr_i_prev”.

Changes in issue_stage.sv file: To connect the new input port that brings the address in advance “decoded_instr_i_prev”.

Changes in id_stage.sv file: To output the instruction to be issued before registering it (one clock cycle in advance). A new output port is needed for this “issue_entry_o_prev”

Changes in cva6.sv file: To connect the new output of the id_stage to the issue_stage to bring the address in advance to the register file (issue_entry_id_issue_prev)
This commit is contained in:
AngelaGonzalezMarino 2024-11-28 14:26:29 +01:00 committed by GitHub
parent dd649f28ad
commit b718824e1e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 47 additions and 14 deletions

View file

@ -1,4 +1,5 @@
// Copyright 2018 ETH Zurich and University of Bologna. // Copyright 2018 ETH Zurich and University of Bologna.
// Copyright 2024 - PlanV Technologies for additionnal contribution.
// Copyright and related rights are licensed under the Solderpad Hardware // Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in // License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at // compliance with the License. You may obtain a copy of the License at
@ -15,7 +16,7 @@
// Noam Gallmann - gnoam@live.com // Noam Gallmann - gnoam@live.com
// Felipe Lisboa Malaquias // Felipe Lisboa Malaquias
// Henry Suzukawa // Henry Suzukawa
// // Angela Gonzalez - PlanV Technologies
// //
// Description: This register file is optimized for implementation on // Description: This register file is optimized for implementation on
// FPGAs. The register file features one distributed RAM block per implemented // FPGAs. The register file features one distributed RAM block per implemented
@ -55,6 +56,11 @@ module ariane_regfile_fpga #(
logic [CVA6Cfg.NrCommitPorts-1:0][NUM_WORDS-1:0] we_dec; logic [CVA6Cfg.NrCommitPorts-1:0][NUM_WORDS-1:0] we_dec;
logic [NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel; logic [NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel;
logic [NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q; logic [NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q;
logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_reg;
logic [NR_READ_PORTS-1:0] read_after_write;
logic [NR_READ_PORTS-1:0][4:0] raddr_q;
logic [NR_READ_PORTS-1:0][4:0] raddr;
// write adress decoder (for block selector) // write adress decoder (for block selector)
always_comb begin always_comb begin
@ -88,36 +94,55 @@ module ariane_regfile_fpga #(
always_ff @(posedge clk_i or negedge rst_ni) begin always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin if (!rst_ni) begin
mem_block_sel_q <= '0; mem_block_sel_q <= '0;
raddr_q <= '0;
end else begin end else begin
mem_block_sel_q <= mem_block_sel; mem_block_sel_q <= mem_block_sel;
if (CVA6Cfg.FpgaAlteraEn) raddr_q <= raddr_i;
else raddr_q <= '0;
end end
end end
// distributed RAM blocks // distributed RAM blocks
logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read[CVA6Cfg.NrCommitPorts]; logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read[CVA6Cfg.NrCommitPorts];
logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read_sync[CVA6Cfg.NrCommitPorts];
for (genvar j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block for (genvar j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block
always_ff @(posedge clk_i) begin always_ff @(posedge clk_i) begin
if (we_i[j] && ~waddr_i[j] != 0) begin if (we_i[j] && ~waddr_i[j] != 0) begin
mem[j][waddr_i[j]] <= wdata_i[j]; mem[j][waddr_i[j]] <= wdata_i[j];
if (CVA6Cfg.FpgaAlteraEn)
wdata_reg[j] <= wdata_i[j]; // register data written in case is needed to read next cycle
else wdata_reg[j] <= '0;
end
if (CVA6Cfg.FpgaAlteraEn) begin
for (int k = 0; k < NR_READ_PORTS; k++) begin : block_read
mem_read_sync[j][k] = mem[j][raddr_i[k]]; // synchronous RAM
read_after_write[k] <= '0;
if (waddr_i[j] == raddr_i[k])
read_after_write[k] <= we_i[j] && ~waddr_i[j] != 0; // Identify if we need to read the content that was written
end
end end
end end
for (genvar k = 0; k < NR_READ_PORTS; k++) begin : block_read for (genvar k = 0; k < NR_READ_PORTS; k++) begin : block_read
assign mem_read[j][k] = mem[j][raddr_i[k]]; assign mem_read[j][k] = CVA6Cfg.FpgaAlteraEn ? ( read_after_write[k] ? wdata_reg[j]: mem_read_sync[j][k]) : mem[j][raddr_i[k]];
end end
end end
//with synchronous ram there is the need to adjust which address is used at the output MUX
assign raddr = CVA6Cfg.FpgaAlteraEn ? raddr_q : raddr_i;
// output MUX // output MUX
logic [NR_READ_PORTS-1:0][LOG_NR_WRITE_PORTS-1:0] block_addr; logic [NR_READ_PORTS-1:0][LOG_NR_WRITE_PORTS-1:0] block_addr;
for (genvar k = 0; k < NR_READ_PORTS; k++) begin : regfile_read_port for (genvar k = 0; k < NR_READ_PORTS; k++) begin : regfile_read_port
assign block_addr[k] = mem_block_sel_q[raddr_i[k]]; assign block_addr[k] = mem_block_sel_q[raddr[k]];
assign rdata_o[k] = (ZERO_REG_ZERO && raddr_i[k] == '0) ? '0 : mem_read[block_addr[k]][k]; assign rdata_o[k] = (ZERO_REG_ZERO && raddr[k] == '0) ? '0 : mem_read[block_addr[k]][k];
end end
// random initialization of the memory to suppress assert warnings on Questa. // random initialization of the memory to suppress assert warnings on Questa.
initial begin initial begin
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
for (int j = 0; j < NUM_WORDS; j++) begin for (int j = 0; j < NUM_WORDS; j++) begin
mem[i][j] = $random(); if (!CVA6Cfg.FpgaAlteraEn)
mem[i][j] = $random(); //quartus does not support this random statement on synthesis
else mem[i][j] = '0;
end end
end end
end end

View file

@ -401,7 +401,7 @@ module cva6
// -------------- // --------------
// ID <-> ISSUE // ID <-> ISSUE
// -------------- // --------------
scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_entry_id_issue; scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_entry_id_issue, issue_entry_id_issue_prev;
logic [CVA6Cfg.NrIssuePorts-1:0][31:0] orig_instr_id_issue; logic [CVA6Cfg.NrIssuePorts-1:0][31:0] orig_instr_id_issue;
logic [CVA6Cfg.NrIssuePorts-1:0] issue_entry_valid_id_issue; logic [CVA6Cfg.NrIssuePorts-1:0] issue_entry_valid_id_issue;
logic [CVA6Cfg.NrIssuePorts-1:0] is_ctrl_fow_id_issue; logic [CVA6Cfg.NrIssuePorts-1:0] is_ctrl_fow_id_issue;
@ -690,6 +690,7 @@ module cva6
.fetch_entry_ready_o(fetch_ready_id_if), .fetch_entry_ready_o(fetch_ready_id_if),
.issue_entry_o (issue_entry_id_issue), .issue_entry_o (issue_entry_id_issue),
.issue_entry_o_prev (issue_entry_id_issue_prev),
.orig_instr_o (orig_instr_id_issue), .orig_instr_o (orig_instr_id_issue),
.issue_entry_valid_o(issue_entry_valid_id_issue), .issue_entry_valid_o(issue_entry_valid_id_issue),
.is_ctrl_flow_o (is_ctrl_fow_id_issue), .is_ctrl_flow_o (is_ctrl_fow_id_issue),
@ -806,6 +807,7 @@ module cva6
.stall_i (stall_acc_id), .stall_i (stall_acc_id),
// ID Stage // ID Stage
.decoded_instr_i (issue_entry_id_issue), .decoded_instr_i (issue_entry_id_issue),
.decoded_instr_i_prev (issue_entry_id_issue_prev),
.orig_instr_i (orig_instr_id_issue), .orig_instr_i (orig_instr_id_issue),
.decoded_instr_valid_i (issue_entry_valid_id_issue), .decoded_instr_valid_i (issue_entry_valid_id_issue),
.is_ctrl_flow_i (is_ctrl_fow_id_issue), .is_ctrl_flow_i (is_ctrl_fow_id_issue),

View file

@ -41,6 +41,7 @@ module id_stage #(
output logic [CVA6Cfg.NrIssuePorts-1:0] fetch_entry_ready_o, output logic [CVA6Cfg.NrIssuePorts-1:0] fetch_entry_ready_o,
// Handshake's data between decode and issue - ISSUE // Handshake's data between decode and issue - ISSUE
output scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_entry_o, output scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_entry_o,
output scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_entry_o_prev,
// Instruction value - ISSUE // Instruction value - ISSUE
output logic [CVA6Cfg.NrIssuePorts-1:0][31:0] orig_instr_o, output logic [CVA6Cfg.NrIssuePorts-1:0][31:0] orig_instr_o,
// Handshake's valid between decode and issue - ISSUE // Handshake's valid between decode and issue - ISSUE
@ -266,6 +267,7 @@ module id_stage #(
// ------------------ // ------------------
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
assign issue_entry_o[i] = issue_q[i].sbe; assign issue_entry_o[i] = issue_q[i].sbe;
assign issue_entry_o_prev[i] = CVA6Cfg.FpgaAlteraEn ? issue_n[i].sbe : '0;
assign issue_entry_valid_o[i] = issue_q[i].valid; assign issue_entry_valid_o[i] = issue_q[i].valid;
assign is_ctrl_flow_o[i] = issue_q[i].is_ctrl_flow; assign is_ctrl_flow_o[i] = issue_q[i].is_ctrl_flow;
assign orig_instr_o[i] = issue_q[i].orig_instr; assign orig_instr_o[i] = issue_q[i].orig_instr;

View file

@ -39,6 +39,7 @@ module issue_read_operands
input logic stall_i, input logic stall_i,
// Entry about the instruction to issue - SCOREBOARD // Entry about the instruction to issue - SCOREBOARD
input scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_instr_i, input scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_instr_i,
input scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_instr_i_prev,
// Instruction to issue - SCOREBOARD // Instruction to issue - SCOREBOARD
input logic [CVA6Cfg.NrIssuePorts-1:0][31:0] orig_instr_i, input logic [CVA6Cfg.NrIssuePorts-1:0][31:0] orig_instr_i,
// Is there an instruction to issue - SCOREBOARD // Is there an instruction to issue - SCOREBOARD
@ -954,11 +955,12 @@ module issue_read_operands
logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.XLEN-1:0] wdata_pack; logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.XLEN-1:0] wdata_pack;
logic [CVA6Cfg.NrCommitPorts-1:0] we_pack; logic [CVA6Cfg.NrCommitPorts-1:0] we_pack;
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin //adjust address to read from register file (when synchronous RAM is used reads take one cycle, so we advance the address)
assign raddr_pack[i*OPERANDS_PER_INSTR+0] = issue_instr_i[i].rs1; for (genvar i = 0; i <= CVA6Cfg.NrIssuePorts - 1; i++) begin
assign raddr_pack[i*OPERANDS_PER_INSTR+1] = issue_instr_i[i].rs2; assign raddr_pack[i*OPERANDS_PER_INSTR+0] = CVA6Cfg.FpgaEn && CVA6Cfg.FpgaAlteraEn ? issue_instr_i_prev[i].rs1[4:0] : issue_instr_i[i].rs1[4:0];
assign raddr_pack[i*OPERANDS_PER_INSTR+1] = CVA6Cfg.FpgaEn && CVA6Cfg.FpgaAlteraEn ? issue_instr_i_prev[i].rs2[4:0] : issue_instr_i[i].rs2[4:0];
if (OPERANDS_PER_INSTR == 3) begin if (OPERANDS_PER_INSTR == 3) begin
assign raddr_pack[i*OPERANDS_PER_INSTR+2] = issue_instr_i[i].result[4:0]; assign raddr_pack[i*OPERANDS_PER_INSTR+2] = CVA6Cfg.FpgaEn && CVA6Cfg.FpgaAlteraEn ? issue_instr_i_prev[i].result[4:0] : issue_instr_i[i].result[4:0];
end end
end end

View file

@ -43,6 +43,7 @@ module issue_stage
input logic stall_i, input logic stall_i,
// Handshake's data with decode stage - ID_STAGE // Handshake's data with decode stage - ID_STAGE
input scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] decoded_instr_i, input scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] decoded_instr_i,
input scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] decoded_instr_i_prev,
// instruction value - ID_STAGE // instruction value - ID_STAGE
input logic [CVA6Cfg.NrIssuePorts-1:0][31:0] orig_instr_i, input logic [CVA6Cfg.NrIssuePorts-1:0][31:0] orig_instr_i,
// Handshake's valid with decode stage - ID_STAGE // Handshake's valid with decode stage - ID_STAGE
@ -253,6 +254,7 @@ module issue_stage
.flush_i (flush_unissued_instr_i), .flush_i (flush_unissued_instr_i),
.stall_i, .stall_i,
.issue_instr_i (issue_instr_sb_iro), .issue_instr_i (issue_instr_sb_iro),
.issue_instr_i_prev (decoded_instr_i_prev),
.orig_instr_i (orig_instr_sb_iro), .orig_instr_i (orig_instr_sb_iro),
.issue_instr_valid_i (issue_instr_valid_sb_iro), .issue_instr_valid_i (issue_instr_valid_sb_iro),
.issue_ack_o (issue_ack_iro_sb), .issue_ack_o (issue_ack_iro_sb),