🚧 Parameterize register file (FPU preparation)

This commit is contained in:
Florian Zaruba 2018-03-29 13:40:46 +02:00 committed by Stefan Mach
parent 4e8aa09384
commit 259e089ae3
8 changed files with 177 additions and 236 deletions

View file

@ -12,7 +12,7 @@ max_cycles ?= 10000000
# Test case to run
test_case ?= core_test
# QuestaSim Version
questa_version ?=
questa_version ?= -10.6b
# verilator version
verilator ?= verilator
# preset which runs a single test

View file

@ -200,6 +200,7 @@ module ariane #(
logic tw_csr_id;
logic tsr_csr_id;
logic dcache_en_csr_nbdcache;
logic csr_write_fflags_commit_cs;
// ----------------------------
// Performance Counters <-> *
// ----------------------------
@ -368,7 +369,7 @@ module ariane #(
.waddr_i ( waddr_commit_id ),
.wdata_i ( wdata_commit_id ),
.we_i ( we_commit_id ),
.we_fpr_i ( ), // TODO
.commit_instr_o ( commit_instr_id_commit ),
.commit_ack_i ( commit_ack ),
.*
@ -474,6 +475,7 @@ module ariane #(
.waddr_o ( waddr_commit_id ),
.wdata_o ( wdata_commit_id ),
.we_o ( we_commit_id ),
.we_fpr_o ( ), // write FPU reg, TODO
.commit_lsu_o ( lsu_commit_commit_ex ),
.commit_lsu_ready_i ( lsu_commit_ready_ex_commit ),
.commit_csr_o ( csr_commit_commit_ex ),
@ -481,6 +483,7 @@ module ariane #(
.csr_op_o ( csr_op_commit_csr ),
.csr_wdata_o ( csr_wdata_commit_csr ),
.csr_rdata_i ( csr_rdata_csr_commit ),
.csr_write_fflags_o ( csr_write_fflags_commit_cs ),
.csr_exception_i ( csr_exception_csr_commit ),
.fence_i_o ( fence_i_commit_controller ),
.fence_o ( fence_commit_controller ),
@ -504,6 +507,7 @@ module ariane #(
.commit_ack_i ( commit_ack ),
.ex_i ( ex_commit ),
.csr_op_i ( csr_op_commit_csr ),
.csr_write_fflags_i ( csr_write_fflags_commit_cs ),
.csr_addr_i ( csr_addr_ex_csr ),
.csr_wdata_i ( csr_wdata_commit_csr ),
.csr_rdata_o ( csr_rdata_csr_commit ),
@ -511,6 +515,8 @@ module ariane #(
.csr_exception_o ( csr_exception_csr_commit ),
.epc_o ( epc_commit_pcgen ),
.eret_o ( eret ),
.fflags_o ( ), // FPU flags out
.frm_o ( ), // FPU rounding mode flags out TODO
.trap_vector_base_o ( trap_vector_base_commit_pcgen ),
.priv_lvl_o ( priv_lvl ),
.ld_st_priv_lvl_o ( ld_st_priv_lvl_csr_ex ),

View file

@ -23,151 +23,98 @@
// latches and is thus smaller than the flip-flop based RF.
//
module ariane_regfile #(
parameter DATA_WIDTH = 32
module ariane_regfile_latch #(
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2,
parameter int unsigned NR_WRITE_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0
)(
// Clock and Reset
input logic clk,
input logic rst_n,
input logic test_en_i,
//Read port R1
input logic [4:0] raddr_a_i,
output logic [DATA_WIDTH-1:0] rdata_a_o,
//Read port R2
input logic [4:0] raddr_b_i,
output logic [DATA_WIDTH-1:0] rdata_b_o,
// Write port W1
input logic [4:0] waddr_a_i,
input logic [DATA_WIDTH-1:0] wdata_a_i,
input logic we_a_i,
// Write port W2
input logic [4:0] waddr_b_i,
input logic [DATA_WIDTH-1:0] wdata_b_i,
input logic we_b_i
// clock and reset
input logic clk_i,
input logic rst_ni,
// disable clock gates for testing
input logic test_en_i,
// read port
input logic [NR_READ_PORTS-1:0][4:0] raddr_i,
output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [NR_WRITE_PORTS-1:0][4:0] waddr_i,
input logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [NR_WRITE_PORTS-1:0] we_i
);
localparam ADDR_WIDTH = 5;;
localparam NUM_WORDS = 2**ADDR_WIDTH;
localparam ADDR_WIDTH = 5;;
localparam NUM_WORDS = 2**ADDR_WIDTH;
logic [DATA_WIDTH-1:0] mem[NUM_WORDS];
logic [NUM_WORDS-1:1] mem_clocks;
logic [NUM_WORDS-1:1] waddr_onehot_a;
logic [NUM_WORDS-1:1] waddr_onehot_b, waddr_onehot_b_q;
logic [DATA_WIDTH-1:0] mem[NUM_WORDS];
logic [NR_WRITE_PORTS-1:0][NUM_WORDS-1:1] waddr_onehot,waddr_onehot_q;
logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_q;
logic [NUM_WORDS-1:1] mem_clocks;
logic [DATA_WIDTH-1:0] wdata_a_q;
logic [DATA_WIDTH-1:0] wdata_b_q;
// Write port W1
logic [ADDR_WIDTH-1:0] raddr_a_int, raddr_b_int, waddr_a_int;
// decode addresses
for (genvar i = 0; i < NR_READ_PORTS; i++)
assign rdata_o[i] = mem[raddr_i[i][ADDR_WIDTH-1:0]];
assign raddr_a_int = raddr_a_i[ADDR_WIDTH-1:0];
assign raddr_b_int = raddr_b_i[ADDR_WIDTH-1:0];
assign waddr_a_int = waddr_a_i[ADDR_WIDTH-1:0];
int unsigned i;
int unsigned j;
int unsigned k;
int unsigned l;
genvar x;
logic clk_int;
//-----------------------------------------------------------------------------
//-- READ : Read address decoder RAD
//-----------------------------------------------------------------------------
assign rdata_a_o = mem[raddr_a_int];
assign rdata_b_o = mem[raddr_b_int];
//-----------------------------------------------------------------------------
// WRITE : SAMPLE INPUT DATA
//---------------------------------------------------------------------------
cluster_clock_gating CG_WE_GLOBAL
(
.clk_i ( clk ),
.en_i ( we_a_i ),
.test_en_i ( test_en_i ),
.clk_o ( clk_int )
);
// use clk_int here, since otherwise we don't want to write anything anyway
always_ff @(posedge clk_int, negedge rst_n) begin : sample_waddr
if (~rst_n) begin
wdata_a_q <= '0;
wdata_b_q <= '0;
waddr_onehot_b_q <= '0;
always_ff @(posedge clk_i, negedge rst_ni) begin : sample_waddr
if (~rst_ni) begin
wdata_q <= '0;
end else begin
if (we_a_i)
wdata_a_q <= wdata_a_i;
if (we_b_i)
wdata_b_q <= wdata_b_i;
waddr_onehot_b_q <= waddr_onehot_b;
for (int unsigned i = 0; i < NR_WRITE_PORTS; i++)
// enable flipflop will most probably infer clock gating
if (we_i[i]) begin
wdata_q[i] <= wdata_i[i];
end
waddr_onehot_q <= waddr_onehot;
end
end
//-----------------------------------------------------------------------------
//-- WRITE : Write Address Decoder (WAD), combinatorial process
//-----------------------------------------------------------------------------
always_comb begin : p_WADa
for (i = 1; i < NUM_WORDS; i++) begin : p_WordItera
if ((we_a_i == 1'b1) && (waddr_a_i == i))
waddr_onehot_a[i] = 1'b1;
else
waddr_onehot_a[i] = 1'b0;
// WRITE : Write Address Decoder (WAD), combinatorial process
always_comb begin : decode_write_addess
for (int unsigned i = 0; i < NR_WRITE_PORTS; i++) begin
for (int unsigned j = 1; j < NUM_WORDS; j++) begin
if (we_i[i] && (waddr_i[i] == j))
waddr_onehot[i][j] = 1'b1;
else
waddr_onehot[i][j] = 1'b0;
end
end
end
always_comb begin : p_WADb
for (j = 1; j < NUM_WORDS; j++) begin : p_WordIterb
if ((we_b_i == 1'b1) && (waddr_b_i == j))
waddr_onehot_b[j] = 1'b1;
else
waddr_onehot_b[j] = 1'b0;
end
// WRITE : Clock gating (if integrated clock-gating cells are available)
for (genvar x = ZERO_REG_ZERO; x < NUM_WORDS; x++) begin
logic [NR_WRITE_PORTS-1:0] waddr_ored;
for (genvar i = 0; i < NR_WRITE_PORTS; i++)
assign waddr_ored[i] = waddr_onehot[i][x];
cluster_clock_gating i_cg (
.clk_i ( clk_i ),
.en_i ( |waddr_ored ),
.test_en_i ( test_en_i ),
.clk_o ( mem_clocks[x] )
);
end
//-----------------------------------------------------------------------------
//-- WRITE : Clock gating (if integrated clock-gating cells are available)
//-----------------------------------------------------------------------------
generate
for (x = 1; x < NUM_WORDS; x++)
begin : CG_CELL_WORD_ITER
cluster_clock_gating CG_Inst
(
.clk_i ( clk_int ),
.en_i ( waddr_onehot_a[x] | waddr_onehot_b[x] ),
.test_en_i ( test_en_i ),
.clk_o ( mem_clocks[x] )
);
end
endgenerate
//-----------------------------------------------------------------------------
//-- WRITE : Write operation
//-----------------------------------------------------------------------------
//-- Generate M = WORDS sequential processes, each of which describes one
//-- word of the memory. The processes are synchronized with the clocks
//-- ClocksxC(i), i = 0, 1, ..., M-1
//-- Use active low, i.e. transparent on low latches as storage elements
//-- Data is sampled on rising clock edge
// Generate M = WORDS sequential processes, each of which describes one
// word of the memory. The processes are synchronized with the clocks
// ClocksxC(i), i = 0, 1, ..., M-1
// Use active low, i.e. transparent on low latches as storage elements
// Data is sampled on rising clock edge
// Integer registers
always_latch begin : latch_wdata
// Note: The assignment has to be done inside this process or Modelsim complains about it
mem[0] = '0;
if (ZERO_REG_ZERO)
mem[0] = '0;
for(k = 1; k < NUM_WORDS; k++)
begin : w_WordIter
if (mem_clocks[k] == 1'b1)
mem[k] = waddr_onehot_b_q[k] ? wdata_b_q : wdata_a_q;
end
for (int unsigned i = 0; i < NR_WRITE_PORTS; i++) begin
for (int unsigned k = ZERO_REG_ZERO; k < NUM_WORDS; k++) begin
if (mem_clocks[k] && waddr_onehot_q[i][k])
mem[k] = wdata_q[i];
end
end
end
endmodule

View file

@ -23,87 +23,58 @@
//
module ariane_regfile #(
parameter DATA_WIDTH = 32
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2,
parameter int unsigned NR_WRITE_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0
)(
// Clock and Reset
input logic clk,
input logic rst_n,
input logic test_en_i,
//Read port R1
input logic [4:0] raddr_a_i,
output logic [DATA_WIDTH-1:0] rdata_a_o,
//Read port R2
input logic [4:0] raddr_b_i,
output logic [DATA_WIDTH-1:0] rdata_b_o,
// Write port W1
input logic [4:0] waddr_a_i,
input logic [DATA_WIDTH-1:0] wdata_a_i,
input logic we_a_i,
// Write port W2
input logic [4:0] waddr_b_i,
input logic [DATA_WIDTH-1:0] wdata_b_i,
input logic we_b_i
// clock and reset
input logic clk_i,
input logic rst_ni,
// disable clock gates for testing
input logic test_en_i,
// read port
input logic [NR_READ_PORTS-1:0][4:0] raddr_i,
output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [NR_WRITE_PORTS-1:0][4:0] waddr_i,
input logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [NR_WRITE_PORTS-1:0] we_i
);
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2**ADDR_WIDTH;
logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] rf_reg;
logic [NUM_WORDS-1:0] we_a_dec, we_b_dec;
logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] mem;
logic [NR_WRITE_PORTS-1:0][NUM_WORDS-1:0] we_dec;
always_comb begin : we_a_decoder
for (int i = 0; i < NUM_WORDS; i++) begin
if (waddr_a_i == i)
we_a_dec[i] = we_a_i;
else
we_a_dec[i] = 1'b0;
end
end
always_comb begin : we_b_decoder
for (int i = 0; i < NUM_WORDS; i++) begin
if (waddr_b_i == i)
we_b_dec[i] = we_b_i;
else
we_b_dec[i] = 1'b0;
end
end
generate
// loop from 1 to NUM_WORDS-1 as R0 is nil
for (genvar i = 1; i < NUM_WORDS; i++) begin : rf_gen
always_ff @(posedge clk, negedge rst_n) begin : register_write_behavioral
if (rst_n==1'b0) begin
rf_reg[i] <= 'b0;
end else begin
if (we_a_dec[i])
rf_reg[i] <= wdata_a_i;
if (we_b_dec[i])
rf_reg[i] <= wdata_b_i;
always_comb begin : we_decoder
for (int unsigned j = 0; j < NR_WRITE_PORTS; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (waddr_i[j] == i)
we_dec[j][i] = we_i[j];
else
we_dec[j][i] = 1'b0;
end
end
end
end
// R0 is nil
`ifdef verilator
always_ff @(posedge clk, negedge rst_n) begin
rf_reg[0] <= '0;
// loop from 1 to NUM_WORDS-1 as R0 is nil
always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral
if (~rst_ni) begin
mem <= '{default: '0};
end else begin
for (int unsigned j = 0; j < NR_WRITE_PORTS; j++) begin
for (int unsigned i = ZERO_REG_ZERO; i < NUM_WORDS; i++) begin
if (we_dec[j][i])
mem[i] <= wdata_i[j];
end
end
end
end
`else
assign rf_reg[0] = '0;
`endif
endgenerate
assign rdata_a_o = rf_reg[raddr_a_i];
assign rdata_b_o = rf_reg[raddr_b_i];
for (genvar i = 0; i < NR_READ_PORTS; i++)
assign rdata_o[i] = mem[raddr_i[i]];
endmodule

View file

@ -21,16 +21,14 @@ module commit_stage #(
input logic halt_i, // request to halt the core
input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline
output exception_t exception_o, // take exception to controller
// from scoreboard
input scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_i, // the instruction we want to commit
output logic [NR_COMMIT_PORTS-1:0] commit_ack_o, // acknowledge that we are indeed committing
// to register file
output logic [NR_COMMIT_PORTS-1:0][4:0] waddr_o, // register file write address
output logic [NR_COMMIT_PORTS-1:0][63:0] wdata_o, // register file write data
output logic [NR_COMMIT_PORTS-1:0] we_o, // register file write enable
output logic [NR_COMMIT_PORTS-1:0] we_fpr_o, // floating point register enable
// to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline)
output logic [63:0] pc_o,
// to/from CSR file
@ -38,6 +36,7 @@ module commit_stage #(
output logic [63:0] csr_wdata_o, // data to write to CSR
input logic [63:0] csr_rdata_i, // data to read from CSR
input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit)
output logic csr_write_fflags_o, // write the fflags CSR
// commit signals to ex
output logic commit_lsu_o, // commit the pending store
input logic commit_lsu_ready_i, // commit buffer of LSU is ready
@ -59,21 +58,22 @@ module commit_stage #(
// write register file or commit instruction in LSU or CSR Buffer
always_comb begin : commit
// default assignments
commit_ack_o[0] = 1'b0;
commit_ack_o[1] = 1'b0;
commit_ack_o[0] = 1'b0;
commit_ack_o[1] = 1'b0;
we_o[0] = 1'b0;
we_o[1] = 1'b0;
we_o[0] = 1'b0;
we_o[1] = 1'b0;
commit_lsu_o = 1'b0;
commit_csr_o = 1'b0;
wdata_o[0] = commit_instr_i[0].result;
wdata_o[1] = commit_instr_i[1].result;
csr_op_o = ADD; // this corresponds to a CSR NOP
csr_wdata_o = 64'b0;
fence_i_o = 1'b0;
fence_o = 1'b0;
sfence_vma_o = 1'b0;
commit_lsu_o = 1'b0;
commit_csr_o = 1'b0;
wdata_o[0] = commit_instr_i[0].result;
wdata_o[1] = commit_instr_i[1].result;
csr_op_o = ADD; // this corresponds to a CSR NOP
csr_wdata_o = 64'b0;
fence_i_o = 1'b0;
fence_o = 1'b0;
sfence_vma_o = 1'b0;
csr_write_fflags_o = 1'b0;
// we will not commit the instruction if we took an exception
// but we do not commit the instruction if we requested a halt
@ -101,6 +101,16 @@ module commit_stage #(
end
end
// ---------
// FPU
// ---------
if (commit_instr_i[0].fu == FPU) begin
// write the CSR with potential exception flags from retiring floating point instruction
csr_op_o = CSR_SET;
csr_wdata_o = {59'b0, commit_instr_i[0].ex.cause[4:0]};
csr_write_fflags_o = 1'b1;
end
// ---------
// CSR Logic
// ---------
@ -145,10 +155,15 @@ module commit_stage #(
// check if the second instruction can be committed as well and the first wasn't a CSR instruction
if (commit_ack_o[0] && commit_instr_i[1].valid && !halt_i && !(commit_instr_i[0].fu inside {CSR}) && !flush_dcache_i) begin
// only if the first instruction didn't throw an exception and this instruction won't throw an exception
// and the operator is of type ALU, LOAD, CTRL_FLOW, MULT
if (!exception_o.valid && !commit_instr_i[1].ex.valid && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT})) begin
// and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT or FPU
if (!exception_o.valid && !commit_instr_i[1].ex.valid && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU})) begin
we_o[1] = 1'b1;
commit_ack_o[1] = 1'b1;
// additionally check if we are retiring an FPU instruction because we need to make sure that we right all
// exception flags
csr_op_o = CSR_SET;
csr_wdata_o = {59'b0, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])};
csr_write_fflags_o = (commit_instr_i[1].fu == FPU);
end
end
end

View file

@ -45,6 +45,7 @@ module csr_regfile #(
input logic [11:0] csr_addr_i, // Address of the register to read/write
input logic [63:0] csr_wdata_i, // Write data in
output logic [63:0] csr_rdata_o, // Read data out
input logic csr_write_fflags_i, // Write fflags register
input logic [63:0] pc_i, // PC of instruction accessing the CSR
output exception_t csr_exception_o, // attempts to access a CSR without appropriate privilege
// level or to write a read-only register also
@ -96,8 +97,8 @@ module csr_regfile #(
// ----------------
// Assignments
// ----------------
// Debug MUX
assign csr_addr = csr_t'(((debug_csr_req_i) ? debug_csr_addr_i : csr_addr_i));
// Debug MUX and fflags register
assign csr_addr = csr_t'(((debug_csr_req_i) ? debug_csr_addr_i : (csr_write_fflags_i) ? CSR_FFLAGS : csr_addr_i));
// Output the read data directly
assign debug_csr_rdata_o = csr_rdata;
@ -667,6 +668,9 @@ module csr_regfile #(
csr_exception_o.valid = 1'b1;
end
end
// in case we are writing the CSR flag no exception can ever occur, don't set the valid flag in that case
if (csr_write_fflags_i)
csr_exception_o.valid = 1'b0;
// -------------------
// Wait for Interrupt

View file

@ -70,7 +70,8 @@ module issue_read_operands #(
// commit port
input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i,
input logic [NR_COMMIT_PORTS-1:0][63:0] wdata_i,
input logic [NR_COMMIT_PORTS-1:0] we_i
input logic [NR_COMMIT_PORTS-1:0] we_i,
input logic [NR_COMMIT_PORTS-1:0] we_fpr_i
// committing instruction instruction
// from scoreboard
// input scoreboard_entry commit_instr_i,
@ -307,27 +308,23 @@ module issue_read_operands #(
// ----------------------
// Integer Register File
// ----------------------
logic [1:0][63:0] rdata_o;
assign operand_a_regfile = rdata_o[0];
assign operand_b_regfile = rdata_o[1];
ariane_regfile #(
.DATA_WIDTH ( 64 )
) regfile_i (
// Clock and Reset
.clk ( clk_i ),
.rst_n ( rst_ni ),
.test_en_i ( test_en_i ),
.raddr_a_i ( raddr_a ),
.rdata_a_o ( operand_a_regfile ),
.raddr_b_i ( issue_instr_i.rs2[4:0] ),
.rdata_b_o ( operand_b_regfile ),
.waddr_a_i ( waddr ),
.wdata_a_i ( wdata ),
.we_a_i ( we ),
.waddr_b_i ( waddr_i[1] ),
.wdata_b_i ( wdata_i[1] ),
.we_b_i ( we_i[1] )
.DATA_WIDTH ( 64 ),
.NR_READ_PORTS ( 2 ),
.NR_WRITE_PORTS ( 2 ),
.ZERO_REG_ZERO ( 1 )
) i_ariane_regfile (
.raddr_i ( '{issue_instr_i.rs2[4:0], raddr_a} ),
.rdata_o ( rdata_o ),
.waddr_i ( '{waddr_i[1], waddr} ),
.wdata_i ( '{wdata_i[1], wdata} ),
.we_i ( '{we_i[1], we} ),
.*
);
// ----------------------

View file

@ -76,6 +76,7 @@ module issue_stage #(
input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i,
input logic [NR_COMMIT_PORTS-1:0][63:0] wdata_i,
input logic [NR_COMMIT_PORTS-1:0] we_i,
input logic [NR_COMMIT_PORTS-1:0] we_fpr_i,
output scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_o,
input logic [NR_COMMIT_PORTS-1:0] commit_ack_i