verible-verilog-format: apply it on core directory (#1540)

using verible-v0.0-3422-g520ca4b9/bin/verible-verilog-format
with default configuration

Note: two files are not correctly handled by verible
- core/include/std_cache_pkg.sv
- core/cache_subsystem/cva6_hpdcache_if_adapter.sv
This commit is contained in:
André Sintzoff 2023-10-18 16:36:00 +02:00 committed by GitHub
parent 3d47805dfc
commit 7cd183b710
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
94 changed files with 21423 additions and 19841 deletions

View file

@ -13,56 +13,59 @@
// Date: 20.11.2020
// Description: Functional unit that dispatches CVA6 instructions to accelerators.
module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type acc_req_t = acc_pkg::accelerator_req_t,
parameter type acc_resp_t = acc_pkg::accelerator_resp_t,
parameter type acc_cfg_t = logic,
parameter acc_cfg_t AccCfg = '0
module acc_dispatcher
import ariane_pkg::*;
import riscv::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type acc_req_t = acc_pkg::accelerator_req_t,
parameter type acc_resp_t = acc_pkg::accelerator_resp_t,
parameter type acc_cfg_t = logic,
parameter acc_cfg_t AccCfg = '0
) (
input logic clk_i,
input logic rst_ni,
input logic clk_i,
input logic rst_ni,
// Interface with the CSR regfile
input logic acc_cons_en_i, // Accelerator memory consistent mode
output logic acc_fflags_valid_o,
output logic [4:0] acc_fflags_o,
input logic acc_cons_en_i, // Accelerator memory consistent mode
output logic acc_fflags_valid_o,
output logic [4:0] acc_fflags_o,
// Interface with the CSRs
input logic [2:0] fcsr_frm_i,
output logic dirty_v_state_o,
input logic [2:0] fcsr_frm_i,
output logic dirty_v_state_o,
// Interface with the issue stage
input scoreboard_entry_t issue_instr_i,
input logic issue_instr_hs_i,
output logic issue_stall_o,
input fu_data_t fu_data_i,
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,
output logic [TRANS_ID_BITS-1:0] acc_trans_id_o,
output xlen_t acc_result_o,
output logic acc_valid_o,
output exception_t acc_exception_o,
input scoreboard_entry_t issue_instr_i,
input logic issue_instr_hs_i,
output logic issue_stall_o,
input fu_data_t fu_data_i,
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,
output logic [TRANS_ID_BITS-1:0] acc_trans_id_o,
output xlen_t acc_result_o,
output logic acc_valid_o,
output exception_t acc_exception_o,
// Interface with the execute stage
output logic acc_valid_ex_o, // FU executed
output logic acc_valid_ex_o, // FU executed
// Interface with the commit stage
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
input logic commit_st_barrier_i, // A store barrier was commited
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
input logic commit_st_barrier_i, // A store barrier was commited
// Interface with the load/store unit
output logic acc_stall_st_pending_o,
input logic acc_no_st_pending_i,
input dcache_req_i_t [2:0] dcache_req_ports_i,
output logic acc_stall_st_pending_o,
input logic acc_no_st_pending_i,
input dcache_req_i_t [2:0] dcache_req_ports_i,
// Interface with the controller
output logic ctrl_halt_o,
input logic flush_unissued_instr_i,
input logic flush_ex_i,
output logic flush_pipeline_o,
output logic ctrl_halt_o,
input logic flush_unissued_instr_i,
input logic flush_ex_i,
output logic flush_pipeline_o,
// Interface with cache subsystem
output dcache_req_i_t [1:0] acc_dcache_req_ports_o,
input dcache_req_o_t [1:0] acc_dcache_req_ports_i,
input logic inval_ready_i,
output logic inval_valid_o,
output logic [63:0] inval_addr_o,
output dcache_req_i_t [1:0] acc_dcache_req_ports_o,
input dcache_req_o_t [1:0] acc_dcache_req_ports_i,
input logic inval_ready_i,
output logic inval_valid_o,
output logic [63:0] inval_addr_o,
// Accelerator interface
output acc_req_t acc_req_o,
input acc_resp_t acc_resp_i
);
output acc_req_t acc_req_o,
input acc_resp_t acc_resp_i
);
`include "common_cells/registers.svh"
@ -96,16 +99,15 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
always_comb begin : stall_issue
unique case (issue_instr_i.fu)
ACCEL:
// 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet
issue_stall_o = ~acc_ready;
// 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet
issue_stall_o = ~acc_ready;
LOAD:
// 2. We're issuing a scalar load but there is an inflight accelerator store.
issue_stall_o = acc_cons_en_i & ~acc_no_st_pending;
// 2. We're issuing a scalar load but there is an inflight accelerator store.
issue_stall_o = acc_cons_en_i & ~acc_no_st_pending;
STORE:
// 3. We're issuing a scalar store but there is an inflight accelerator load or store.
issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending);
default:
issue_stall_o = 1'b0;
// 3. We're issuing a scalar store but there is an inflight accelerator load or store.
issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending);
default: issue_stall_o = 1'b0;
endcase
end
@ -121,30 +123,30 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
logic acc_insn_queue_empty;
logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage;
logic acc_commit;
logic [TRANS_ID_BITS-1:0] acc_commit_trans_id;
logic [ TRANS_ID_BITS-1:0] acc_commit_trans_id;
assign acc_data = acc_valid_ex_o ? fu_data_i : '0;
fifo_v3 #(
.DEPTH (InstructionQueueDepth),
.FALL_THROUGH(1'b1 ),
.dtype (fu_data_t )
.DEPTH (InstructionQueueDepth),
.FALL_THROUGH(1'b1),
.dtype (fu_data_t)
) i_acc_insn_queue (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.flush_i (flush_ex_i ),
.testmode_i(1'b0 ),
.data_i (fu_data_i ),
.push_i (acc_valid_q ),
.full_o (/* Unused */ ),
.data_o (acc_insn_queue_o ),
.pop_i (acc_insn_queue_pop ),
.empty_o (acc_insn_queue_empty),
.usage_o (acc_insn_queue_usage)
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_ex_i),
.testmode_i(1'b0),
.data_i (fu_data_i),
.push_i (acc_valid_q),
.full_o ( /* Unused */),
.data_o (acc_insn_queue_o),
.pop_i (acc_insn_queue_pop),
.empty_o (acc_insn_queue_empty),
.usage_o (acc_insn_queue_usage)
);
// We are ready if the instruction queue is able to accept at least one more entry.
assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth-1);
assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth - 1);
/**********************************
* Non-speculative instructions *
@ -160,17 +162,15 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q;
`FF(insn_ready_q, insn_ready_d, '0)
always_comb begin: p_non_speculative_ff
always_comb begin : p_non_speculative_ff
// Maintain state
insn_pending_d = insn_pending_q;
insn_ready_d = insn_ready_q;
// We received a new instruction
if (acc_valid_q)
insn_pending_d[acc_data.trans_id] = 1'b1;
if (acc_valid_q) insn_pending_d[acc_data.trans_id] = 1'b1;
// Flush all received instructions
if (flush_ex_i)
insn_pending_d = '0;
if (flush_ex_i) insn_pending_d = '0;
// An accelerator instruction is no longer speculative.
if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin
@ -179,9 +179,8 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
end
// An accelerator instruction was issued.
if (acc_req_o.req_valid)
insn_ready_d[acc_req_o.trans_id] = 1'b0;
end: p_non_speculative_ff
if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0;
end : p_non_speculative_ff
/*************************
* Accelerator request *
@ -193,18 +192,18 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
acc_pkg::accelerator_req_t acc_req_int;
fall_through_register #(
.T(acc_pkg::accelerator_req_t)
.T(acc_pkg::accelerator_req_t)
) i_accelerator_req_register (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.clr_i (1'b0 ),
.testmode_i(1'b0 ),
.data_i (acc_req ),
.valid_i (acc_req_valid ),
.ready_o (acc_req_ready ),
.data_o (acc_req_int ),
.valid_o (acc_req_o.req_valid),
.ready_i (acc_resp_i.req_ready)
.clk_i (clk_i),
.rst_ni (rst_ni),
.clr_i (1'b0),
.testmode_i(1'b0),
.data_i (acc_req),
.valid_i (acc_req_valid),
.ready_o (acc_req_ready),
.data_o (acc_req_int),
.valid_o (acc_req_o.req_valid),
.ready_i (acc_resp_i.req_ready)
);
assign acc_req_o.insn = acc_req_int.insn;
@ -216,28 +215,33 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
assign acc_req_o.acc_cons_en = acc_cons_en_i;
assign acc_req_o.inval_ready = inval_ready_i;
always_comb begin: accelerator_req_dispatcher
always_comb begin : accelerator_req_dispatcher
// Do not fetch from the instruction queue
acc_insn_queue_pop = 1'b0;
// Default values
acc_req = '0;
acc_req_valid = 1'b0;
acc_req = '0;
acc_req_valid = 1'b0;
// Unpack fu_data_t into accelerator_req_t
if (!acc_insn_queue_empty) begin
acc_req = '{
// Instruction is forwarded from the decoder as an immediate
// -
// frm rounding information is up to date during a valid request to the accelerator
// The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes
// do not take place until the accelerator answers (Ariane commits in-order)
insn : acc_insn_queue_o.imm[31:0],
rs1 : acc_insn_queue_o.operand_a,
rs2 : acc_insn_queue_o.operand_b,
frm : fpnew_pkg::roundmode_e'(fcsr_frm_i),
trans_id: acc_insn_queue_o.trans_id,
default : '0
// Instruction is forwarded from the decoder as an immediate
// -
// frm rounding information is up to date during a valid request to the accelerator
// The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes
// do not take place until the accelerator answers (Ariane commits in-order)
insn :
acc_insn_queue_o.imm[
31
:
0
],
rs1 : acc_insn_queue_o.operand_a,
rs2 : acc_insn_queue_o.operand_b,
frm : fpnew_pkg::roundmode_e'(fcsr_frm_i),
trans_id: acc_insn_queue_o.trans_id,
default: '0
};
// Wait until the instruction is no longer speculative.
acc_req_valid = insn_ready_q[acc_insn_queue_o.trans_id] ||
@ -254,26 +258,22 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
logic acc_st_disp;
// Unpack the accelerator response
assign acc_trans_id_o = acc_resp_i.trans_id;
assign acc_result_o = acc_resp_i.result;
assign acc_valid_o = acc_resp_i.resp_valid;
assign acc_exception_o = '{
cause: riscv::ILLEGAL_INSTR,
tval : '0,
valid: acc_resp_i.error
};
assign acc_fflags_valid_o = acc_resp_i.fflags_valid;
assign acc_fflags_o = acc_resp_i.fflags;
assign acc_trans_id_o = acc_resp_i.trans_id;
assign acc_result_o = acc_resp_i.result;
assign acc_valid_o = acc_resp_i.resp_valid;
assign acc_exception_o = '{cause: riscv::ILLEGAL_INSTR, tval : '0, valid: acc_resp_i.error};
assign acc_fflags_valid_o = acc_resp_i.fflags_valid;
assign acc_fflags_o = acc_resp_i.fflags;
// Always ready to receive responses
assign acc_req_o.resp_ready = 1'b1;
// Signal dispatched load/store to issue stage
assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD);
assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE);
assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD);
assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE);
// Cache invalidation
assign inval_valid_o = acc_resp_i.inval_valid;
assign inval_addr_o = acc_resp_i.inval_addr;
assign inval_valid_o = acc_resp_i.inval_valid;
assign inval_addr_o = acc_resp_i.inval_addr;
/**************************
* Accelerator commit *
@ -282,13 +282,11 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
// Instruction can be issued to the (in-order) back-end if
// it reached the top of the scoreboard and it hasn't been
// issued yet
always_comb begin: accelerator_commit
always_comb begin : accelerator_commit
acc_commit = 1'b0;
if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL)
acc_commit = 1'b1;
if (commit_instr_i[0].valid &&
!commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL)
acc_commit = 1'b1;
if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) acc_commit = 1'b1;
if (commit_instr_i[0].valid && !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL)
acc_commit = 1'b1;
end
// Dirty the V state if we are committing anything related to the vector accelerator
@ -330,37 +328,38 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
// Count speculative loads. These can still be flushed.
counter #(
.WIDTH (3),
.STICKY_OVERFLOW (0)
.WIDTH (3),
.STICKY_OVERFLOW(0)
) i_acc_spec_loads (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.clear_i (flush_ex_i ),
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp),
.load_i (1'b0 ),
.down_i (acc_ld_disp ),
.d_i ('0 ),
.q_o (acc_spec_loads_pending ),
.overflow_o (acc_spec_loads_overflow )
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (flush_ex_i),
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp),
.load_i (1'b0),
.down_i (acc_ld_disp),
.d_i ('0),
.q_o (acc_spec_loads_pending),
.overflow_o(acc_spec_loads_overflow)
);
// Count dispatched loads. These cannot be flushed anymore.
counter #(
.WIDTH (3),
.STICKY_OVERFLOW (0)
.WIDTH (3),
.STICKY_OVERFLOW(0)
) i_acc_disp_loads (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.clear_i (1'b0 ),
.en_i (acc_ld_disp ^ acc_resp_i.load_complete),
.load_i (1'b0 ),
.down_i (acc_resp_i.load_complete),
.d_i ('0 ),
.q_o (acc_disp_loads_pending ),
.overflow_o (acc_disp_loads_overflow )
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (1'b0),
.en_i (acc_ld_disp ^ acc_resp_i.load_complete),
.load_i (1'b0),
.down_i (acc_resp_i.load_complete),
.d_i ('0),
.q_o (acc_disp_loads_pending),
.overflow_o(acc_disp_loads_overflow)
);
acc_dispatcher_no_load_overflow: assert property (
acc_dispatcher_no_load_overflow :
assert property (
@(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) )
else $error("[acc_dispatcher] Too many pending loads.");
@ -374,37 +373,38 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
// Count speculative stores. These can still be flushed.
counter #(
.WIDTH (3),
.STICKY_OVERFLOW (0)
.WIDTH (3),
.STICKY_OVERFLOW(0)
) i_acc_spec_stores (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.clear_i (flush_ex_i ),
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp),
.load_i (1'b0 ),
.down_i (acc_st_disp ),
.d_i ('0 ),
.q_o (acc_spec_stores_pending ),
.overflow_o (acc_spec_stores_overflow)
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (flush_ex_i),
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp),
.load_i (1'b0),
.down_i (acc_st_disp),
.d_i ('0),
.q_o (acc_spec_stores_pending),
.overflow_o(acc_spec_stores_overflow)
);
// Count dispatched stores. These cannot be flushed anymore.
counter #(
.WIDTH (3),
.STICKY_OVERFLOW (0)
.WIDTH (3),
.STICKY_OVERFLOW(0)
) i_acc_disp_stores (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.clear_i (1'b0 ),
.en_i (acc_st_disp ^ acc_resp_i.store_complete),
.load_i (1'b0 ),
.down_i (acc_resp_i.store_complete),
.d_i ('0 ),
.q_o (acc_disp_stores_pending ),
.overflow_o (acc_disp_stores_overflow )
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (1'b0),
.en_i (acc_st_disp ^ acc_resp_i.store_complete),
.load_i (1'b0),
.down_i (acc_resp_i.store_complete),
.d_i ('0),
.q_o (acc_disp_stores_pending),
.overflow_o(acc_disp_stores_overflow)
);
acc_dispatcher_no_store_overflow: assert property (
acc_dispatcher_no_store_overflow :
assert property (
@(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) )
else $error("[acc_dispatcher] Too many pending stores.");

View file

@ -18,291 +18,305 @@
// Description: Ariane ALU based on RI5CY's ALU
module alu import ariane_pkg::*; #(
module alu
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input fu_data_t fu_data_i,
output riscv::xlen_t result_o,
output logic alu_branch_res_o
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input fu_data_t fu_data_i,
output riscv::xlen_t result_o,
output logic alu_branch_res_o
);
riscv::xlen_t operand_a_rev;
logic [31:0] operand_a_rev32;
logic [riscv::XLEN:0] operand_b_neg;
logic [riscv::XLEN+1:0] adder_result_ext_o;
logic less; // handles both signed and unsigned forms
logic [31:0] rolw; // Rotate Left Word
logic [31:0] rorw; // Rotate Right Word
logic [31:0] orcbw, rev8w;
logic [$clog2(riscv::XLEN) :0] cpop; // Count Population
logic [$clog2(riscv::XLEN)-1 :0] lz_tz_count; // Count Leading Zeros
logic [4:0] lz_tz_wcount; // Count Leading Zeros Word
logic lz_tz_empty, lz_tz_wempty;
riscv::xlen_t operand_a_rev;
logic [ 31:0] operand_a_rev32;
logic [ riscv::XLEN:0] operand_b_neg;
logic [riscv::XLEN+1:0] adder_result_ext_o;
logic less; // handles both signed and unsigned forms
logic [ 31:0] rolw; // Rotate Left Word
logic [ 31:0] rorw; // Rotate Right Word
logic [31:0] orcbw, rev8w;
logic [ $clog2(riscv::XLEN) : 0] cpop; // Count Population
logic [$clog2(riscv::XLEN)-1 : 0] lz_tz_count; // Count Leading Zeros
logic [ 4:0] lz_tz_wcount; // Count Leading Zeros Word
logic lz_tz_empty, lz_tz_wempty;
// bit reverse operand_a for left shifts and bit counting
generate
genvar k;
for(k = 0; k < riscv::XLEN; k++)
assign operand_a_rev[k] = fu_data_i.operand_a[riscv::XLEN-1-k];
// bit reverse operand_a for left shifts and bit counting
generate
genvar k;
for (k = 0; k < riscv::XLEN; k++)
assign operand_a_rev[k] = fu_data_i.operand_a[riscv::XLEN-1-k];
for (k = 0; k < 32; k++)
assign operand_a_rev32[k] = fu_data_i.operand_a[31-k];
endgenerate
for (k = 0; k < 32; k++) assign operand_a_rev32[k] = fu_data_i.operand_a[31-k];
endgenerate
// ------
// Adder
// ------
logic adder_op_b_negate;
logic adder_z_flag;
logic [riscv::XLEN:0] adder_in_a, adder_in_b;
riscv::xlen_t adder_result;
logic [riscv::XLEN-1:0] operand_a_bitmanip, bit_indx;
// ------
// Adder
// ------
logic adder_op_b_negate;
logic adder_z_flag;
logic [riscv::XLEN:0] adder_in_a, adder_in_b;
riscv::xlen_t adder_result;
logic [riscv::XLEN-1:0] operand_a_bitmanip, bit_indx;
always_comb begin
adder_op_b_negate = 1'b0;
always_comb begin
adder_op_b_negate = 1'b0;
unique case (fu_data_i.operation)
// ADDER OPS
EQ, NE, SUB, SUBW, ANDN, ORN, XNOR: adder_op_b_negate = 1'b1;
default: ;
endcase
end
always_comb begin
operand_a_bitmanip = fu_data_i.operand_a;
if (ariane_pkg::BITMANIP) begin
unique case (fu_data_i.operation)
// ADDER OPS
EQ, NE,
SUB, SUBW,
ANDN, ORN, XNOR: adder_op_b_negate = 1'b1;
default: ;
SH1ADD: operand_a_bitmanip = fu_data_i.operand_a << 1;
SH2ADD: operand_a_bitmanip = fu_data_i.operand_a << 2;
SH3ADD: operand_a_bitmanip = fu_data_i.operand_a << 3;
SH1ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 1;
SH2ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 2;
SH3ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 3;
CTZ: operand_a_bitmanip = operand_a_rev;
CTZW: operand_a_bitmanip = operand_a_rev32;
ADDUW, CPOPW, CLZW: operand_a_bitmanip = fu_data_i.operand_a[31:0];
default: ;
endcase
end
end
always_comb begin
operand_a_bitmanip = fu_data_i.operand_a;
// prepare operand a
assign adder_in_a = {operand_a_bitmanip, 1'b1};
if (ariane_pkg::BITMANIP) begin
unique case (fu_data_i.operation)
SH1ADD : operand_a_bitmanip = fu_data_i.operand_a << 1;
SH2ADD : operand_a_bitmanip = fu_data_i.operand_a << 2;
SH3ADD : operand_a_bitmanip = fu_data_i.operand_a << 3;
SH1ADDUW : operand_a_bitmanip = fu_data_i.operand_a[31:0] << 1;
SH2ADDUW : operand_a_bitmanip = fu_data_i.operand_a[31:0] << 2;
SH3ADDUW : operand_a_bitmanip = fu_data_i.operand_a[31:0] << 3;
CTZ : operand_a_bitmanip = operand_a_rev;
CTZW : operand_a_bitmanip = operand_a_rev32;
ADDUW, CPOPW, CLZW : operand_a_bitmanip = fu_data_i.operand_a[31:0];
default : ;
endcase
end
end
// prepare operand b
assign operand_b_neg = {fu_data_i.operand_b, 1'b0} ^ {riscv::XLEN + 1{adder_op_b_negate}};
assign adder_in_b = operand_b_neg;
// prepare operand a
assign adder_in_a = {operand_a_bitmanip, 1'b1};
// actual adder
assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b);
assign adder_result = adder_result_ext_o[riscv::XLEN:1];
assign adder_z_flag = ~|adder_result;
// prepare operand b
assign operand_b_neg = {fu_data_i.operand_b, 1'b0} ^ {riscv::XLEN+1{adder_op_b_negate}};
assign adder_in_b = operand_b_neg ;
// get the right branch comparison result
always_comb begin : branch_resolve
// set comparison by default
alu_branch_res_o = 1'b1;
case (fu_data_i.operation)
EQ: alu_branch_res_o = adder_z_flag;
NE: alu_branch_res_o = ~adder_z_flag;
LTS, LTU: alu_branch_res_o = less;
GES, GEU: alu_branch_res_o = ~less;
default: alu_branch_res_o = 1'b1;
endcase
end
// actual adder
assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b);
assign adder_result = adder_result_ext_o[riscv::XLEN:1];
assign adder_z_flag = ~|adder_result;
// ---------
// Shifts
// ---------
// get the right branch comparison result
always_comb begin : branch_resolve
// set comparison by default
alu_branch_res_o = 1'b1;
case (fu_data_i.operation)
EQ: alu_branch_res_o = adder_z_flag;
NE: alu_branch_res_o = ~adder_z_flag;
LTS, LTU: alu_branch_res_o = less;
GES, GEU: alu_branch_res_o = ~less;
default: alu_branch_res_o = 1'b1;
endcase
end
// TODO: this can probably optimized significantly
logic shift_left; // should we shift left
logic shift_arithmetic;
// ---------
// Shifts
// ---------
riscv::xlen_t shift_amt; // amount of shift, to the right
riscv::xlen_t shift_op_a; // input of the shifter
logic [ 31:0] shift_op_a32; // input to the 32 bit shift operation
// TODO: this can probably optimized significantly
logic shift_left; // should we shift left
logic shift_arithmetic;
riscv::xlen_t shift_result;
logic [ 31:0] shift_result32;
riscv::xlen_t shift_amt; // amount of shift, to the right
riscv::xlen_t shift_op_a; // input of the shifter
logic [31:0] shift_op_a32; // input to the 32 bit shift operation
logic [riscv::XLEN:0] shift_right_result;
logic [ 32:0] shift_right_result32;
riscv::xlen_t shift_result;
logic [31:0] shift_result32;
riscv::xlen_t shift_left_result;
logic [ 31:0] shift_left_result32;
logic [riscv::XLEN:0] shift_right_result;
logic [32:0] shift_right_result32;
assign shift_amt = fu_data_i.operand_b;
riscv::xlen_t shift_left_result;
logic [31:0] shift_left_result32;
assign shift_left = (fu_data_i.operation == SLL) | (fu_data_i.operation == SLLW);
assign shift_amt = fu_data_i.operand_b;
assign shift_arithmetic = (fu_data_i.operation == SRA) | (fu_data_i.operation == SRAW);
assign shift_left = (fu_data_i.operation == SLL) | (fu_data_i.operation == SLLW);
// right shifts, we let the synthesizer optimize this
logic [riscv::XLEN:0] shift_op_a_64;
logic [32:0] shift_op_a_32;
assign shift_arithmetic = (fu_data_i.operation == SRA) | (fu_data_i.operation == SRAW);
// choose the bit reversed or the normal input for shift operand a
assign shift_op_a = shift_left ? operand_a_rev : fu_data_i.operand_a;
assign shift_op_a32 = shift_left ? operand_a_rev32 : fu_data_i.operand_a[31:0];
// right shifts, we let the synthesizer optimize this
logic [riscv::XLEN:0] shift_op_a_64;
logic [32:0] shift_op_a_32;
assign shift_op_a_64 = {shift_arithmetic & shift_op_a[riscv::XLEN-1], shift_op_a};
assign shift_op_a_32 = {shift_arithmetic & shift_op_a[31], shift_op_a32};
// choose the bit reversed or the normal input for shift operand a
assign shift_op_a = shift_left ? operand_a_rev : fu_data_i.operand_a;
assign shift_op_a32 = shift_left ? operand_a_rev32 : fu_data_i.operand_a[31:0];
assign shift_right_result = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]);
assign shift_op_a_64 = { shift_arithmetic & shift_op_a[riscv::XLEN-1], shift_op_a};
assign shift_op_a_32 = { shift_arithmetic & shift_op_a[31], shift_op_a32};
assign shift_right_result32 = $unsigned($signed(shift_op_a_32) >>> shift_amt[4:0]);
// bit reverse the shift_right_result for left shifts
genvar j;
generate
for (j = 0; j < riscv::XLEN; j++)
assign shift_left_result[j] = shift_right_result[riscv::XLEN-1-j];
assign shift_right_result = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]);
for (j = 0; j < 32; j++) assign shift_left_result32[j] = shift_right_result32[31-j];
assign shift_right_result32 = $unsigned($signed(shift_op_a_32) >>> shift_amt[4:0]);
// bit reverse the shift_right_result for left shifts
genvar j;
generate
for(j = 0; j < riscv::XLEN; j++)
assign shift_left_result[j] = shift_right_result[riscv::XLEN-1-j];
endgenerate
for(j = 0; j < 32; j++)
assign shift_left_result32[j] = shift_right_result32[31-j];
assign shift_result = shift_left ? shift_left_result : shift_right_result[riscv::XLEN-1:0];
assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0];
endgenerate
// ------------
// Comparisons
// ------------
assign shift_result = shift_left ? shift_left_result : shift_right_result[riscv::XLEN-1:0];
assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0];
always_comb begin
logic sgn;
sgn = 1'b0;
// ------------
// Comparisons
// ------------
always_comb begin
logic sgn;
sgn = 1'b0;
if ((fu_data_i.operation == SLTS) ||
if ((fu_data_i.operation == SLTS) ||
(fu_data_i.operation == LTS) ||
(fu_data_i.operation == GES) ||
(fu_data_i.operation == MAX) ||
(fu_data_i.operation == MIN))
sgn = 1'b1;
sgn = 1'b1;
less = ($signed({sgn & fu_data_i.operand_a[riscv::XLEN-1], fu_data_i.operand_a}) < $signed({sgn & fu_data_i.operand_b[riscv::XLEN-1], fu_data_i.operand_b}));
end
less = ($signed({sgn & fu_data_i.operand_a[riscv::XLEN-1], fu_data_i.operand_a}) <
$signed({sgn & fu_data_i.operand_b[riscv::XLEN-1], fu_data_i.operand_b}));
end
if (ariane_pkg::BITMANIP) begin : gen_bitmanip
// Count Population + Count population Word
if (ariane_pkg::BITMANIP) begin : gen_bitmanip
// Count Population + Count population Word
popcount #(
.INPUT_WIDTH(riscv::XLEN)
) i_cpop_count (
.data_i (operand_a_bitmanip),
.popcount_o (cpop)
);
popcount #(
.INPUT_WIDTH(riscv::XLEN)
) i_cpop_count (
.data_i (operand_a_bitmanip),
.popcount_o(cpop)
);
// Count Leading/Trailing Zeros
// 64b
lzc #(
.WIDTH(riscv::XLEN),
.MODE (1)
) i_clz_64b (
.in_i(operand_a_bitmanip),
.cnt_o(lz_tz_count),
.empty_o(lz_tz_empty)
);
//32b
lzc #(
.WIDTH(32),
.MODE (1)
) i_clz_32b (
.in_i(operand_a_bitmanip[31:0]),
.cnt_o(lz_tz_wcount),
.empty_o(lz_tz_wempty)
);
end
// -----------
// Result MUX
// -----------
always_comb begin
result_o = '0;
unique case (fu_data_i.operation)
// Standard Operations
ANDL, ANDN: result_o = fu_data_i.operand_a & operand_b_neg[riscv::XLEN:1];
ORL, ORN: result_o = fu_data_i.operand_a | operand_b_neg[riscv::XLEN:1];
XORL, XNOR: result_o = fu_data_i.operand_a ^ operand_b_neg[riscv::XLEN:1];
// Adder Operations
ADD, SUB, ADDUW, SH1ADD, SH2ADD, SH3ADD, SH1ADDUW, SH2ADDUW, SH3ADDUW:
result_o = adder_result;
// Add word: Ignore the upper bits and sign extend to 64 bit
ADDW, SUBW: result_o = {{riscv::XLEN - 32{adder_result[31]}}, adder_result[31:0]};
// Shift Operations
SLL, SRL, SRA: result_o = (riscv::XLEN == 64) ? shift_result : shift_result32;
// Shifts 32 bit
SLLW, SRLW, SRAW: result_o = {{riscv::XLEN - 32{shift_result32[31]}}, shift_result32[31:0]};
// Comparison Operations
SLTS, SLTU: result_o = {{riscv::XLEN - 1{1'b0}}, less};
default: ; // default case to suppress unique warning
endcase
if (ariane_pkg::BITMANIP) begin
// Index for Bitwise Rotation
bit_indx = 1 << (fu_data_i.operand_b & (riscv::XLEN - 1));
orcbw = {
{8{|fu_data_i.operand_a[31:24]}},
{8{|fu_data_i.operand_a[23:16]}},
{8{|fu_data_i.operand_a[15:8]}},
{8{|fu_data_i.operand_a[7:0]}}
};
rev8w = {
{fu_data_i.operand_a[7:0]},
{fu_data_i.operand_a[15:8]},
{fu_data_i.operand_a[23:16]},
{fu_data_i.operand_a[31:24]}
};
// rolw, roriw, rorw
rolw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
rorw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
unique case (fu_data_i.operation)
// Left Shift 32 bit unsigned
SLLIUW:
result_o = {{riscv::XLEN-32{1'b0}}, fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[5:0];
// Integer minimum/maximum
MAX: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
MAXU: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
MIN: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
MINU: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
// Single bit instructions operations
BCLR, BCLRI: result_o = fu_data_i.operand_a & ~bit_indx;
BEXT, BEXTI: result_o = {{riscv::XLEN - 1{1'b0}}, |(fu_data_i.operand_a & bit_indx)};
BINV, BINVI: result_o = fu_data_i.operand_a ^ bit_indx;
BSET, BSETI: result_o = fu_data_i.operand_a | bit_indx;
// Count Leading/Trailing Zeros
// 64b
lzc #(
.WIDTH(riscv::XLEN),
.MODE (1)
) i_clz_64b (
.in_i (operand_a_bitmanip),
.cnt_o (lz_tz_count),
.empty_o (lz_tz_empty)
);
//32b
lzc #(
.WIDTH(32),
.MODE (1)
) i_clz_32b (
.in_i (operand_a_bitmanip[31:0]),
.cnt_o (lz_tz_wcount),
.empty_o (lz_tz_wempty)
);
CLZ, CTZ:
result_o = (lz_tz_empty) ? ({{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count} + 1) :
{{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count};
CLZW, CTZW: result_o = (lz_tz_wempty) ? 32 : {{riscv::XLEN - 5{1'b0}}, lz_tz_wcount};
// Count population
CPOP, CPOPW: result_o = {{(riscv::XLEN - ($clog2(riscv::XLEN) + 1)) {1'b0}}, cpop};
// Sign and Zero Extend
SEXTB: result_o = {{riscv::XLEN - 8{fu_data_i.operand_a[7]}}, fu_data_i.operand_a[7:0]};
SEXTH: result_o = {{riscv::XLEN - 16{fu_data_i.operand_a[15]}}, fu_data_i.operand_a[15:0]};
ZEXTH: result_o = {{riscv::XLEN - 16{1'b0}}, fu_data_i.operand_a[15:0]};
// Bitwise Rotation
ROL:
result_o = (riscv::XLEN == 64) ? ((fu_data_i.operand_a << fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a << fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[4:0])));
ROLW: result_o = {{riscv::XLEN - 32{rolw[31]}}, rolw};
ROR, RORI:
result_o = (riscv::XLEN == 64) ? ((fu_data_i.operand_a >> fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a >> fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[4:0])));
RORW, RORIW: result_o = {{riscv::XLEN - 32{rorw[31]}}, rorw};
ORCB:
result_o = (riscv::XLEN == 64) ? ({{8{|fu_data_i.operand_a[63:56]}}, {8{|fu_data_i.operand_a[55:48]}}, {8{|fu_data_i.operand_a[47:40]}}, {8{|fu_data_i.operand_a[39:32]}}, orcbw}) : orcbw;
REV8:
result_o = (riscv::XLEN == 64) ? ({rev8w , {fu_data_i.operand_a[39:32]}, {fu_data_i.operand_a[47:40]}, {fu_data_i.operand_a[55:48]}, {fu_data_i.operand_a[63:56]}}) : rev8w;
default: ; // default case to suppress unique warning
endcase
end
// -----------
// Result MUX
// -----------
always_comb begin
result_o = '0;
unique case (fu_data_i.operation)
// Standard Operations
ANDL, ANDN: result_o = fu_data_i.operand_a & operand_b_neg[riscv::XLEN:1];
ORL, ORN : result_o = fu_data_i.operand_a | operand_b_neg[riscv::XLEN:1];
XORL, XNOR: result_o = fu_data_i.operand_a ^ operand_b_neg[riscv::XLEN:1];
// Adder Operations
ADD, SUB,
ADDUW,
SH1ADD, SH2ADD, SH3ADD,
SH1ADDUW, SH2ADDUW, SH3ADDUW: result_o = adder_result;
// Add word: Ignore the upper bits and sign extend to 64 bit
ADDW, SUBW: result_o = {{riscv::XLEN-32{adder_result[31]}}, adder_result[31:0]};
// Shift Operations
SLL,
SRL, SRA: result_o = (riscv::XLEN == 64) ? shift_result : shift_result32;
// Shifts 32 bit
SLLW,
SRLW, SRAW: result_o = {{riscv::XLEN-32{shift_result32[31]}}, shift_result32[31:0]};
// Comparison Operations
SLTS, SLTU: result_o = {{riscv::XLEN-1{1'b0}}, less};
default: ; // default case to suppress unique warning
endcase
if (ariane_pkg::BITMANIP) begin
// Index for Bitwise Rotation
bit_indx = 1 << (fu_data_i.operand_b & (riscv::XLEN-1));
orcbw = {{8{|fu_data_i.operand_a[31:24]}}, {8{|fu_data_i.operand_a[23:16]}}, {8{|fu_data_i.operand_a[15:8]}}, {8{|fu_data_i.operand_a[7:0]}}};
rev8w = {{fu_data_i.operand_a[7:0]}, {fu_data_i.operand_a[15:8]}, {fu_data_i.operand_a[23:16]}, {fu_data_i.operand_a[31:24]}};
// rolw, roriw, rorw
rolw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
rorw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
unique case (fu_data_i.operation)
// Left Shift 32 bit unsigned
SLLIUW: result_o = {{riscv::XLEN-32{1'b0}}, fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[5:0];
// Integer minimum/maximum
MAX: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
MAXU: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
MIN: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
MINU: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
// Single bit instructions operations
BCLR, BCLRI: result_o = fu_data_i.operand_a & ~bit_indx;
BEXT, BEXTI: result_o = {{riscv::XLEN-1{1'b0}}, |(fu_data_i.operand_a & bit_indx)};
BINV, BINVI: result_o = fu_data_i.operand_a ^ bit_indx;
BSET, BSETI: result_o = fu_data_i.operand_a | bit_indx;
// Count Leading/Trailing Zeros
CLZ, CTZ : result_o = (lz_tz_empty) ? ({{riscv::XLEN-$clog2(riscv::XLEN){1'b0}}, lz_tz_count} + 1) : {{riscv::XLEN-$clog2(riscv::XLEN){1'b0}}, lz_tz_count};
CLZW, CTZW: result_o = (lz_tz_wempty) ? 32 : {{riscv::XLEN-5{1'b0}}, lz_tz_wcount};
// Count population
CPOP, CPOPW: result_o = {{(riscv::XLEN-($clog2(riscv::XLEN)+1)){1'b0}}, cpop};
// Sign and Zero Extend
SEXTB: result_o = {{riscv::XLEN-8{fu_data_i.operand_a[7]}}, fu_data_i.operand_a[7:0]};
SEXTH: result_o = {{riscv::XLEN-16{fu_data_i.operand_a[15]}}, fu_data_i.operand_a[15:0]};
ZEXTH: result_o = {{riscv::XLEN-16{1'b0}}, fu_data_i.operand_a[15:0]};
// Bitwise Rotation
ROL: result_o = (riscv::XLEN == 64) ? ((fu_data_i.operand_a << fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a << fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[4:0])));
ROLW: result_o = {{riscv::XLEN-32{rolw[31]}}, rolw};
ROR, RORI: result_o = (riscv::XLEN == 64) ? ((fu_data_i.operand_a >> fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a >> fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[4:0])));
RORW, RORIW: result_o = {{riscv::XLEN-32{rorw[31]}}, rorw};
ORCB: result_o = (riscv::XLEN == 64) ? ({{8{|fu_data_i.operand_a[63:56]}}, {8{|fu_data_i.operand_a[55:48]}}, {8{|fu_data_i.operand_a[47:40]}}, {8{|fu_data_i.operand_a[39:32]}}, orcbw}) : orcbw;
REV8: result_o = (riscv::XLEN == 64) ? ({rev8w , {fu_data_i.operand_a[39:32]}, {fu_data_i.operand_a[47:40]}, {fu_data_i.operand_a[55:48]}, {fu_data_i.operand_a[63:56]}}) : rev8w;
default: ; // default case to suppress unique warning
endcase
end
if (CVA6Cfg.ZiCondExtEn) begin
unique case (fu_data_i.operation)
CZERO_EQZ : result_o = (|fu_data_i.operand_b) ? fu_data_i.operand_a : '0; // move zero to rd if rs2 is equal to zero else rs1
CZERO_NEZ : result_o = (|fu_data_i.operand_b) ? '0 : fu_data_i.operand_a; // move zero to rd if rs2 is nonzero else rs1
default: ; // default case to suppress unique warning
endcase
end
if (CVA6Cfg.ZiCondExtEn) begin
unique case (fu_data_i.operation)
CZERO_EQZ:
result_o = (|fu_data_i.operand_b) ? fu_data_i.operand_a : '0; // move zero to rd if rs2 is equal to zero else rs1
CZERO_NEZ:
result_o = (|fu_data_i.operand_b) ? '0 : fu_data_i.operand_a; // move zero to rd if rs2 is nonzero else rs1
default: ; // default case to suppress unique warning
endcase
end
end
endmodule

View file

@ -17,66 +17,66 @@
module amo_buffer #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // pipeline flush
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // pipeline flush
input logic valid_i, // AMO is valid
output logic ready_o, // AMO unit is ready
input ariane_pkg::amo_t amo_op_i, // AMO Operation
input logic valid_i, // AMO is valid
output logic ready_o, // AMO unit is ready
input ariane_pkg::amo_t amo_op_i, // AMO Operation
input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue
input riscv::xlen_t data_i, // data which is placed in the queue
input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write)
input riscv::xlen_t data_i, // data which is placed in the queue
input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write)
// D$
output ariane_pkg::amo_req_t amo_req_o, // request to cache subsytem
input ariane_pkg::amo_resp_t amo_resp_i, // response from cache subsystem
output ariane_pkg::amo_req_t amo_req_o, // request to cache subsytem
input ariane_pkg::amo_resp_t amo_resp_i, // response from cache subsystem
// Auxiliary signals
input logic amo_valid_commit_i, // We have a vaild AMO in the commit stage
input logic no_st_pending_i // there is currently no store pending anymore
input logic amo_valid_commit_i, // We have a vaild AMO in the commit stage
input logic no_st_pending_i // there is currently no store pending anymore
);
logic flush_amo_buffer;
logic amo_valid;
logic flush_amo_buffer;
logic amo_valid;
typedef struct packed {
ariane_pkg::amo_t op;
logic [riscv::PLEN-1:0] paddr;
riscv::xlen_t data;
logic [1:0] size;
} amo_op_t ;
typedef struct packed {
ariane_pkg::amo_t op;
logic [riscv::PLEN-1:0] paddr;
riscv::xlen_t data;
logic [1:0] size;
} amo_op_t;
amo_op_t amo_data_in, amo_data_out;
amo_op_t amo_data_in, amo_data_out;
// validate this request as soon as all stores have drained and the AMO is in the commit stage
assign amo_req_o.req = no_st_pending_i & amo_valid_commit_i & amo_valid;
assign amo_req_o.amo_op = amo_data_out.op;
assign amo_req_o.size = amo_data_out.size;
assign amo_req_o.operand_a = {{64-riscv::PLEN{1'b0}}, amo_data_out.paddr};
assign amo_req_o.operand_b = {{64-riscv::XLEN{1'b0}}, amo_data_out.data};
// validate this request as soon as all stores have drained and the AMO is in the commit stage
assign amo_req_o.req = no_st_pending_i & amo_valid_commit_i & amo_valid;
assign amo_req_o.amo_op = amo_data_out.op;
assign amo_req_o.size = amo_data_out.size;
assign amo_req_o.operand_a = {{64 - riscv::PLEN{1'b0}}, amo_data_out.paddr};
assign amo_req_o.operand_b = {{64 - riscv::XLEN{1'b0}}, amo_data_out.data};
assign amo_data_in.op = amo_op_i;
assign amo_data_in.data = data_i;
assign amo_data_in.paddr = paddr_i;
assign amo_data_in.size = data_size_i;
assign amo_data_in.op = amo_op_i;
assign amo_data_in.data = data_i;
assign amo_data_in.paddr = paddr_i;
assign amo_data_in.size = data_size_i;
// only flush if we are currently not committing the AMO
// e.g.: it is not speculative anymore
assign flush_amo_buffer = flush_i & !amo_valid_commit_i;
// only flush if we are currently not committing the AMO
// e.g.: it is not speculative anymore
assign flush_amo_buffer = flush_i & !amo_valid_commit_i;
fifo_v3 #(
.DEPTH ( 1 ),
.dtype ( amo_op_t )
) i_amo_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_amo_buffer ),
.testmode_i ( 1'b0 ),
.full_o ( amo_valid ),
.empty_o ( ready_o ),
.usage_o ( ), // left open
.data_i ( amo_data_in ),
.push_i ( valid_i ),
.data_o ( amo_data_out ),
.pop_i ( amo_resp_i.ack )
);
fifo_v3 #(
.DEPTH(1),
.dtype(amo_op_t)
) i_amo_fifo (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_amo_buffer),
.testmode_i(1'b0),
.full_o (amo_valid),
.empty_o (ready_o),
.usage_o (), // left open
.data_i (amo_data_in),
.push_i (valid_i),
.data_o (amo_data_out),
.pop_i (amo_resp_i.ack)
);
endmodule

View file

@ -24,97 +24,91 @@
//
module ariane_regfile_lol #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0
)(
// clock and reset
input logic clk_i,
input logic rst_ni,
// disable clock gates for testing
input logic test_en_i,
// read port
input logic [NR_READ_PORTS-1:0][4:0] raddr_i,
output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0
) (
// clock and reset
input logic clk_i,
input logic rst_ni,
// disable clock gates for testing
input logic test_en_i,
// read port
input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i,
output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
);
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2**ADDR_WIDTH;
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2 ** ADDR_WIDTH;
logic [NUM_WORDS-1:ZERO_REG_ZERO] mem_clocks;
logic [NUM_WORDS-1:ZERO_REG_ZERO] mem_clocks;
logic [DATA_WIDTH-1:0] mem[NUM_WORDS];
logic [CVA6Cfg.NrCommitPorts-1:0][NUM_WORDS-1:1] waddr_onehot,waddr_onehot_q;
logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_q;
logic [ DATA_WIDTH-1:0] mem [NUM_WORDS];
logic [CVA6Cfg.NrCommitPorts-1:0][NUM_WORDS-1:1] waddr_onehot, waddr_onehot_q;
logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_q;
// decode addresses
for (genvar i = 0; i < NR_READ_PORTS; i++)
assign rdata_o[i] = mem[raddr_i[i][ADDR_WIDTH-1:0]];
// decode addresses
for (genvar i = 0; i < NR_READ_PORTS; i++) assign rdata_o[i] = mem[raddr_i[i][ADDR_WIDTH-1:0]];
always_ff @(posedge clk_i, negedge rst_ni) begin : sample_waddr
if (~rst_ni) begin
wdata_q <= '0;
end else begin
for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++)
// enable flipflop will most probably infer clock gating
if (we_i[i]) begin
wdata_q[i] <= wdata_i[i];
end
waddr_onehot_q <= waddr_onehot;
end
always_ff @(posedge clk_i, negedge rst_ni) begin : sample_waddr
if (~rst_ni) begin
wdata_q <= '0;
end else begin
for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++)
// enable flipflop will most probably infer clock gating
if (we_i[i]) begin
wdata_q[i] <= wdata_i[i];
end
waddr_onehot_q <= waddr_onehot;
end
end
// WRITE : Write Address Decoder (WAD), combinatorial process
always_comb begin : decode_write_addess
for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
for (int unsigned j = 1; j < NUM_WORDS; j++) begin
if (we_i[i] && (waddr_i[i] == j))
waddr_onehot[i][j] = 1'b1;
else
waddr_onehot[i][j] = 1'b0;
end
end
// WRITE : Write Address Decoder (WAD), combinatorial process
always_comb begin : decode_write_addess
for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
for (int unsigned j = 1; j < NUM_WORDS; j++) begin
if (we_i[i] && (waddr_i[i] == j)) waddr_onehot[i][j] = 1'b1;
else waddr_onehot[i][j] = 1'b0;
end
end
end
// WRITE : Clock gating (if integrated clock-gating cells are available)
for (genvar x = ZERO_REG_ZERO; x < NUM_WORDS; x++) begin
// WRITE : Clock gating (if integrated clock-gating cells are available)
for (genvar x = ZERO_REG_ZERO; x < NUM_WORDS; x++) begin
logic [CVA6Cfg.NrCommitPorts-1:0] waddr_ored;
logic [CVA6Cfg.NrCommitPorts-1:0] waddr_ored;
for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++)
assign waddr_ored[i] = waddr_onehot[i][x];
for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) assign waddr_ored[i] = waddr_onehot[i][x];
cluster_clock_gating i_cg (
.clk_i ( clk_i ),
.en_i ( |waddr_ored ),
.test_en_i ( test_en_i ),
.clk_o ( mem_clocks[x] )
);
end
// Generate M = WORDS sequential processes, each of which describes one
// word of the memory. The processes are synchronized with the clocks
// ClocksxC(i), i = 0, 1, ..., M-1
// Use active low, i.e. transparent on low latches as storage elements
// Data is sampled on rising clock edge
// Integer registers
always_latch begin : latch_wdata
// Note: The assignment has to be done inside this process or Modelsim complains about it
if (ZERO_REG_ZERO)
mem[0] = '0;
for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
for (int unsigned k = ZERO_REG_ZERO; k < NUM_WORDS; k++) begin
if (mem_clocks[k] && waddr_onehot_q[i][k])
mem[k] = wdata_q[i];
end
end
cluster_clock_gating i_cg (
.clk_i (clk_i),
.en_i (|waddr_ored),
.test_en_i(test_en_i),
.clk_o (mem_clocks[x])
);
end
// Generate M = WORDS sequential processes, each of which describes one
// word of the memory. The processes are synchronized with the clocks
// ClocksxC(i), i = 0, 1, ..., M-1
// Use active low, i.e. transparent on low latches as storage elements
// Data is sampled on rising clock edge
// Integer registers
always_latch begin : latch_wdata
// Note: The assignment has to be done inside this process or Modelsim complains about it
if (ZERO_REG_ZERO) mem[0] = '0;
for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
for (int unsigned k = ZERO_REG_ZERO; k < NUM_WORDS; k++) begin
if (mem_clocks[k] && waddr_onehot_q[i][k]) mem[k] = wdata_q[i];
end
end
end
endmodule

View file

@ -23,60 +23,58 @@
//
module ariane_regfile #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0
)(
// clock and reset
input logic clk_i,
input logic rst_ni,
// disable clock gates for testing
input logic test_en_i,
// read port
input logic [NR_READ_PORTS-1:0][4:0] raddr_i,
output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0
) (
// clock and reset
input logic clk_i,
input logic rst_ni,
// disable clock gates for testing
input logic test_en_i,
// read port
input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i,
output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
);
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2**ADDR_WIDTH;
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2 ** ADDR_WIDTH;
logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] mem;
logic [CVA6Cfg.NrCommitPorts-1:0][NUM_WORDS-1:0] we_dec;
logic [ NUM_WORDS-1:0][DATA_WIDTH-1:0] mem;
logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec;
always_comb begin : we_decoder
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (waddr_i[j] == i)
we_dec[j][i] = we_i[j];
else
we_dec[j][i] = 1'b0;
end
end
always_comb begin : we_decoder
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (waddr_i[j] == i) we_dec[j][i] = we_i[j];
else we_dec[j][i] = 1'b0;
end
end
end
// loop from 1 to NUM_WORDS-1 as R0 is nil
always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral
if (~rst_ni) begin
mem <= '{default: '0};
end else begin
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (we_dec[j][i]) begin
mem[i] <= wdata_i[j];
end
end
if (ZERO_REG_ZERO) begin
mem[0] <= '0;
end
end
// loop from 1 to NUM_WORDS-1 as R0 is nil
always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral
if (~rst_ni) begin
mem <= '{default: '0};
end else begin
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (we_dec[j][i]) begin
mem[i] <= wdata_i[j];
end
end
if (ZERO_REG_ZERO) begin
mem[0] <= '0;
end
end
end
end
for (genvar i = 0; i < NR_READ_PORTS; i++) begin
assign rdata_o[i] = mem[raddr_i[i]];

View file

@ -26,35 +26,35 @@
//
module ariane_regfile_fpga #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0
)(
// clock and reset
input logic clk_i,
input logic rst_ni,
// disable clock gates for testing
input logic test_en_i,
// read port
input logic [NR_READ_PORTS-1:0][4:0] raddr_i,
output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0
) (
// clock and reset
input logic clk_i,
input logic rst_ni,
// disable clock gates for testing
input logic test_en_i,
// read port
input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i,
output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
);
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2**ADDR_WIDTH;
localparam NUM_WORDS = 2 ** ADDR_WIDTH;
localparam LOG_NR_WRITE_PORTS = CVA6Cfg.NrCommitPorts == 1 ? 1 : $clog2(CVA6Cfg.NrCommitPorts);
// Distributed RAM usually supports one write port per block - duplicate for each write port.
logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] mem [CVA6Cfg.NrCommitPorts];
logic [ NUM_WORDS-1:0][ DATA_WIDTH-1:0] mem [CVA6Cfg.NrCommitPorts];
logic [CVA6Cfg.NrCommitPorts-1:0][NUM_WORDS-1:0] we_dec;
logic [NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel;
logic [NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q;
logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec;
logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel;
logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q;
// write adress decoder (for block selector)
always_comb begin
@ -75,8 +75,8 @@ module ariane_regfile_fpga #(
// index has priority.
always_comb begin
mem_block_sel = mem_block_sel_q;
for (int i = 0; i<NUM_WORDS; i++) begin
for (int j = 0; j<CVA6Cfg.NrCommitPorts; j++) begin
for (int i = 0; i < NUM_WORDS; i++) begin
for (int j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
if (we_dec[j][i] == 1'b1) begin
mem_block_sel[i] = LOG_NR_WRITE_PORTS'(j);
end
@ -94,14 +94,14 @@ module ariane_regfile_fpga #(
end
// distributed RAM blocks
logic [NR_READ_PORTS-1:0] [DATA_WIDTH-1:0] mem_read [CVA6Cfg.NrCommitPorts];
for (genvar j=0; j<CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block
logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read[CVA6Cfg.NrCommitPorts];
for (genvar j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block
always_ff @(posedge clk_i) begin
if (we_i[j] && ~waddr_i[j] != 0) begin
mem[j][waddr_i[j]] <= wdata_i[j];
end
end
for (genvar k=0; k<NR_READ_PORTS; k++) begin : block_read
for (genvar k = 0; k < NR_READ_PORTS; k++) begin : block_read
assign mem_read[j][k] = mem[j][raddr_i[k]];
end
end
@ -110,15 +110,13 @@ module ariane_regfile_fpga #(
logic [NR_READ_PORTS-1:0][LOG_NR_WRITE_PORTS-1:0] block_addr;
for (genvar k = 0; k < NR_READ_PORTS; k++) begin : regfile_read_port
assign block_addr[k] = mem_block_sel_q[raddr_i[k]];
assign rdata_o[k] =
(ZERO_REG_ZERO && raddr_i[k] == '0 ) ? '0 : mem_read[block_addr[k]][k];
assign rdata_o[k] = (ZERO_REG_ZERO && raddr_i[k] == '0) ? '0 : mem_read[block_addr[k]][k];
end
// random initialization of the memory to suppress assert warnings on Questa.
initial
begin
for(int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
for(int j = 0; j < NUM_WORDS; j++) begin
initial begin
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
for (int j = 0; j < NUM_WORDS; j++) begin
mem[i][j] = $random();
end
end

View file

@ -20,69 +20,74 @@
module axi_shim #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned AxiNumWords = 4, // data width in dwords, this is also the maximum burst length, must be >=2
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned AxiNumWords = 4, // data width in dwords, this is also the maximum burst length, must be >=2
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// read channel
// request
input logic rd_req_i,
output logic rd_gnt_o,
input logic [CVA6Cfg.AxiAddrWidth-1:0] rd_addr_i,
input logic [$clog2(AxiNumWords)-1:0] rd_blen_i, // axi convention: LEN-1
input logic [2:0] rd_size_i,
input logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_i, // use same ID for reads, or make sure you only have one outstanding read tx
input logic rd_lock_i,
// read response (we have to unconditionally sink the response)
input logic rd_rdy_i,
output logic rd_last_o,
output logic rd_valid_o,
output logic [CVA6Cfg.AxiDataWidth-1:0] rd_data_o,
output logic [CVA6Cfg.AxiUserWidth-1:0] rd_user_o,
output logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_o,
output logic rd_exokay_o, // indicates whether exclusive tx succeeded
// write channel
input logic wr_req_i,
output logic wr_gnt_o,
input logic [CVA6Cfg.AxiAddrWidth-1:0] wr_addr_i,
input logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] wr_data_i,
input logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] wr_user_i,
input logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] wr_be_i,
input logic [$clog2(AxiNumWords)-1:0] wr_blen_i, // axi convention: LEN-1
input logic [2:0] wr_size_i,
input logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_i,
input logic wr_lock_i,
input logic [5:0] wr_atop_i,
// write response
input logic wr_rdy_i,
output logic wr_valid_o,
output logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_o,
output logic wr_exokay_o, // indicates whether exclusive tx succeeded
// AXI port
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// read channel
// request
input logic rd_req_i,
output logic rd_gnt_o,
input logic [CVA6Cfg.AxiAddrWidth-1:0] rd_addr_i,
input logic [$clog2(AxiNumWords)-1:0] rd_blen_i, // axi convention: LEN-1
input logic [2:0] rd_size_i,
input logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_i, // use same ID for reads, or make sure you only have one outstanding read tx
input logic rd_lock_i,
// read response (we have to unconditionally sink the response)
input logic rd_rdy_i,
output logic rd_last_o,
output logic rd_valid_o,
output logic [CVA6Cfg.AxiDataWidth-1:0] rd_data_o,
output logic [CVA6Cfg.AxiUserWidth-1:0] rd_user_o,
output logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_o,
output logic rd_exokay_o, // indicates whether exclusive tx succeeded
// write channel
input logic wr_req_i,
output logic wr_gnt_o,
input logic [CVA6Cfg.AxiAddrWidth-1:0] wr_addr_i,
input logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] wr_data_i,
input logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] wr_user_i,
input logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] wr_be_i,
input logic [$clog2(AxiNumWords)-1:0] wr_blen_i, // axi convention: LEN-1
input logic [2:0] wr_size_i,
input logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_i,
input logic wr_lock_i,
input logic [5:0] wr_atop_i,
// write response
input logic wr_rdy_i,
output logic wr_valid_o,
output logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_o,
output logic wr_exokay_o, // indicates whether exclusive tx succeeded
// AXI port
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
);
localparam AddrIndex = ($clog2(AxiNumWords) > 0) ? $clog2(AxiNumWords) : 1;
///////////////////////////////////////////////////////
// write channel
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// write channel
///////////////////////////////////////////////////////
enum logic [3:0] {
IDLE, WAIT_AW_READY, WAIT_LAST_W_READY, WAIT_LAST_W_READY_AW_READY, WAIT_AW_READY_BURST
} wr_state_q, wr_state_d;
IDLE,
WAIT_AW_READY,
WAIT_LAST_W_READY,
WAIT_LAST_W_READY_AW_READY,
WAIT_AW_READY_BURST
}
wr_state_q, wr_state_d;
// AXI tx counter
logic [AddrIndex-1:0] wr_cnt_d, wr_cnt_q;
logic wr_single_req, wr_cnt_done, wr_cnt_clr, wr_cnt_en;
assign wr_single_req = (wr_blen_i == 0);
assign wr_single_req = (wr_blen_i == 0);
// address
assign axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
assign axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
assign axi_req_o.aw.addr = wr_addr_i[CVA6Cfg.AxiAddrWidth-1:0];
assign axi_req_o.aw.size = wr_size_i;
assign axi_req_o.aw.len = wr_blen_i;
@ -108,10 +113,8 @@ module axi_shim #(
assign wr_id_o = axi_resp_i.b.id;
// tx counter
assign wr_cnt_done = (wr_cnt_q == wr_blen_i);
assign wr_cnt_d = (wr_cnt_clr) ?
'0 : (wr_cnt_en) ?
wr_cnt_q+1 : wr_cnt_q;
assign wr_cnt_done = (wr_cnt_q == wr_blen_i);
assign wr_cnt_d = (wr_cnt_clr) ? '0 : (wr_cnt_en) ? wr_cnt_q + 1 : wr_cnt_q;
always_comb begin : p_axi_write_fsm
// default
@ -137,21 +140,25 @@ module axi_shim #(
if (wr_single_req) begin
wr_cnt_clr = 1'b1;
// single req can be granted here
wr_gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready;
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready})
2'b01: wr_state_d = WAIT_AW_READY;
2'b10: wr_state_d = WAIT_LAST_W_READY;
wr_gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready;
case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
2'b01: wr_state_d = WAIT_AW_READY;
2'b10: wr_state_d = WAIT_LAST_W_READY;
default: wr_state_d = IDLE;
endcase
// its a request for the whole cache line
// its a request for the whole cache line
end else begin
wr_cnt_en = axi_resp_i.w_ready;
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready})
2'b11: wr_state_d = WAIT_LAST_W_READY;
2'b01: wr_state_d = WAIT_LAST_W_READY_AW_READY;
2'b10: wr_state_d = WAIT_LAST_W_READY;
default:;
case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
2'b11: wr_state_d = WAIT_LAST_W_READY;
2'b01: wr_state_d = WAIT_LAST_W_READY_AW_READY;
2'b10: wr_state_d = WAIT_LAST_W_READY;
default: ;
endcase
end
end
@ -172,7 +179,9 @@ module axi_shim #(
axi_req_o.w_valid = 1'b1;
axi_req_o.aw_valid = 1'b1;
// we got an aw_ready
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready})
case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
// we got an aw ready
2'b01: begin
// are there any outstanding transactions?
@ -180,25 +189,25 @@ module axi_shim #(
wr_state_d = WAIT_AW_READY_BURST;
wr_cnt_clr = 1'b1;
end else begin
// yes, so reduce the count and stay here
// yes, so reduce the count and stay here
wr_cnt_en = 1'b1;
end
end
2'b10: wr_state_d = WAIT_LAST_W_READY;
2'b10: wr_state_d = WAIT_LAST_W_READY;
2'b11: begin
// we are finished
if (wr_cnt_done) begin
wr_state_d = IDLE;
wr_gnt_o = 1'b1;
wr_cnt_clr = 1'b1;
// there are outstanding transactions
// there are outstanding transactions
end else begin
wr_state_d = WAIT_LAST_W_READY;
wr_cnt_en = 1'b1;
end
end
default:;
endcase
default: ;
endcase
end
///////////////////////////////////
// ~> all data has already been sent, we are only waiting for the aw_ready
@ -234,14 +243,14 @@ module axi_shim #(
end
///////////////////////////////////////////////////////
// read channel
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// read channel
///////////////////////////////////////////////////////
// address
// in case of a wrapping transfer we can simply begin at the address, if we want to request a cache-line
// with an incremental transfer we need to output the corresponding base address of the cache line
assign axi_req_o.ar.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
assign axi_req_o.ar.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
assign axi_req_o.ar.addr = rd_addr_i[CVA6Cfg.AxiAddrWidth-1:0];
assign axi_req_o.ar.size = rd_size_i;
assign axi_req_o.ar.len = rd_blen_i;
@ -261,14 +270,14 @@ module axi_shim #(
assign axi_req_o.r_ready = rd_rdy_i;
assign rd_data_o = axi_resp_i.r.data;
if (ariane_pkg::AXI_USER_EN) begin
assign rd_user_o = axi_resp_i.r.user;
assign rd_user_o = axi_resp_i.r.user;
end else begin
assign rd_user_o = '0;
assign rd_user_o = '0;
end
assign rd_last_o = axi_resp_i.r.last;
assign rd_valid_o = axi_resp_i.r_valid;
assign rd_id_o = axi_resp_i.r.id;
assign rd_exokay_o = (axi_resp_i.r.resp == axi_pkg::RESP_EXOKAY);
assign rd_last_o = axi_resp_i.r.last;
assign rd_valid_o = axi_resp_i.r_valid;
assign rd_id_o = axi_resp_i.r.id;
assign rd_exokay_o = (axi_resp_i.r.resp == axi_pkg::RESP_EXOKAY);
// ----------------
@ -285,17 +294,17 @@ module axi_shim #(
end
end
// ----------------
// Assertions
// ----------------
// ----------------
// Assertions
// ----------------
//pragma translate_off
initial begin
assert (AxiNumWords >= 1) else
$fatal(1, "[axi adapter] AxiNumWords must be >= 1");
assert (CVA6Cfg.AxiIdWidth >= 2) else
$fatal(1, "[axi adapter] AXI id width must be at least 2 bit wide");
end
//pragma translate_on
//pragma translate_off
initial begin
assert (AxiNumWords >= 1)
else $fatal(1, "[axi adapter] AxiNumWords must be >= 1");
assert (CVA6Cfg.AxiIdWidth >= 2)
else $fatal(1, "[axi adapter] AXI id width must be at least 2 bit wide");
end
//pragma translate_on
endmodule // axi_adapter2
endmodule // axi_adapter2

View file

@ -15,92 +15,90 @@
module branch_unit #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic debug_mode_i,
input ariane_pkg::fu_data_t fu_data_i,
input logic [riscv::VLEN-1:0] pc_i, // PC of instruction
input logic is_compressed_instr_i,
input logic clk_i,
input logic rst_ni,
input logic debug_mode_i,
input ariane_pkg::fu_data_t fu_data_i,
input logic [riscv::VLEN-1:0] pc_i, // PC of instruction
input logic is_compressed_instr_i,
input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict
input logic branch_valid_i,
input logic branch_comp_res_i, // branch comparison result from ALU
output logic [riscv::VLEN-1:0] branch_result_o,
input logic branch_valid_i,
input logic branch_comp_res_i, // branch comparison result from ALU
output logic [riscv::VLEN-1:0] branch_result_o,
input ariane_pkg::branchpredict_sbe_t branch_predict_i, // this is the address we predicted
input ariane_pkg::branchpredict_sbe_t branch_predict_i, // this is the address we predicted
output ariane_pkg::bp_resolve_t resolved_branch_o, // this is the actual address we are targeting
output logic resolve_branch_o, // to ID to clear that we resolved the branch and we can
// accept new entries to the scoreboard
output ariane_pkg::exception_t branch_exception_o // branch exception out
output logic resolve_branch_o, // to ID to clear that we resolved the branch and we can
// accept new entries to the scoreboard
output ariane_pkg::exception_t branch_exception_o // branch exception out
);
logic [riscv::VLEN-1:0] target_address;
logic [riscv::VLEN-1:0] next_pc;
logic [riscv::VLEN-1:0] target_address;
logic [riscv::VLEN-1:0] next_pc;
// here we handle the various possibilities of mis-predicts
always_comb begin : mispredict_handler
// set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC
automatic logic [riscv::VLEN-1:0] jump_base;
// TODO(zarubaf): The ALU can be used to calculate the branch target
jump_base = (fu_data_i.operation == ariane_pkg::JALR) ? fu_data_i.operand_a[riscv::VLEN-1:0] : pc_i;
// here we handle the various possibilities of mis-predicts
always_comb begin : mispredict_handler
// set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC
automatic logic [riscv::VLEN-1:0] jump_base;
// TODO(zarubaf): The ALU can be used to calculate the branch target
jump_base = (fu_data_i.operation == ariane_pkg::JALR) ? fu_data_i.operand_a[riscv::VLEN-1:0] : pc_i;
target_address = {riscv::VLEN{1'b0}};
resolve_branch_o = 1'b0;
resolved_branch_o.target_address = {riscv::VLEN{1'b0}};
resolved_branch_o.is_taken = 1'b0;
resolved_branch_o.valid = branch_valid_i;
resolved_branch_o.is_mispredict = 1'b0;
resolved_branch_o.cf_type = branch_predict_i.cf;
// calculate next PC, depending on whether the instruction is compressed or not this may be different
// TODO(zarubaf): We already calculate this a couple of times, maybe re-use?
next_pc = pc_i + ((is_compressed_instr_i) ? {{riscv::VLEN-2{1'b0}}, 2'h2} : {{riscv::VLEN-3{1'b0}}, 3'h4});
// calculate target address simple 64 bit addition
target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm[riscv::VLEN-1:0]));
// on a JALR we are supposed to reset the LSB to 0 (according to the specification)
if (fu_data_i.operation == ariane_pkg::JALR) target_address[0] = 1'b0;
// we need to put the branch target address into rd, this is the result of this unit
branch_result_o = next_pc;
resolved_branch_o.pc = pc_i;
// There are only two sources of mispredicts:
// 1. Branches
// 2. Jumps to register addresses
if (branch_valid_i) begin
// write target address which goes to PC Gen
resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc;
resolved_branch_o.is_taken = branch_comp_res_i;
// check the outcome of the branch speculation
if ( ariane_pkg::op_is_branch(fu_data_i.operation) ) begin
// Set the `cf_type` of the output as `branch`, this will update the BHT.
resolved_branch_o.cf_type = ariane_pkg::Branch;
// If the ALU comparison does not agree with the BHT prediction set the resolution as mispredicted.
resolved_branch_o.is_mispredict = branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch);
end
if (fu_data_i.operation == ariane_pkg::JALR
// check if the address of the jump register is correct and that we actually predicted
&& (branch_predict_i.cf == ariane_pkg::NoCF || target_address != branch_predict_i.predict_address)) begin
resolved_branch_o.is_mispredict = 1'b1;
// update BTB only if this wasn't a return
if (branch_predict_i.cf != ariane_pkg::Return) resolved_branch_o.cf_type = ariane_pkg::JumpR;
end
// to resolve the branch in ID
resolve_branch_o = 1'b1;
end
end
// use ALU exception signal for storing instruction fetch exceptions if
// the target address is not aligned to a 2 byte boundary
//
logic jump_taken;
always_comb begin : exception_handling
// Do a jump if it is either unconditional jump (JAL | JALR) or `taken` conditional jump
jump_taken = !(ariane_pkg::op_is_branch(fu_data_i.operation)) ||
((ariane_pkg::op_is_branch(fu_data_i.operation)) && branch_comp_res_i);
branch_exception_o.cause = riscv::INSTR_ADDR_MISALIGNED;
branch_exception_o.valid = 1'b0;
branch_exception_o.tval = {{riscv::XLEN-riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i};
// Only throw instruction address misaligned exception if this is indeed a `taken` conditional branch or
// an unconditional jump
if (branch_valid_i &&
target_address[0] != 1'b0 &&
jump_taken)
branch_exception_o.valid = 1'b1;
target_address = {riscv::VLEN{1'b0}};
resolve_branch_o = 1'b0;
resolved_branch_o.target_address = {riscv::VLEN{1'b0}};
resolved_branch_o.is_taken = 1'b0;
resolved_branch_o.valid = branch_valid_i;
resolved_branch_o.is_mispredict = 1'b0;
resolved_branch_o.cf_type = branch_predict_i.cf;
// calculate next PC, depending on whether the instruction is compressed or not this may be different
// TODO(zarubaf): We already calculate this a couple of times, maybe re-use?
next_pc = pc_i + ((is_compressed_instr_i) ? {{riscv::VLEN-2{1'b0}}, 2'h2} : {{riscv::VLEN-3{1'b0}}, 3'h4});
// calculate target address simple 64 bit addition
target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm[riscv::VLEN-1:0]));
// on a JALR we are supposed to reset the LSB to 0 (according to the specification)
if (fu_data_i.operation == ariane_pkg::JALR) target_address[0] = 1'b0;
// we need to put the branch target address into rd, this is the result of this unit
branch_result_o = next_pc;
resolved_branch_o.pc = pc_i;
// There are only two sources of mispredicts:
// 1. Branches
// 2. Jumps to register addresses
if (branch_valid_i) begin
// write target address which goes to PC Gen
resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc;
resolved_branch_o.is_taken = branch_comp_res_i;
// check the outcome of the branch speculation
if (ariane_pkg::op_is_branch(fu_data_i.operation)) begin
// Set the `cf_type` of the output as `branch`, this will update the BHT.
resolved_branch_o.cf_type = ariane_pkg::Branch;
// If the ALU comparison does not agree with the BHT prediction set the resolution as mispredicted.
resolved_branch_o.is_mispredict = branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch);
end
if (fu_data_i.operation == ariane_pkg::JALR
// check if the address of the jump register is correct and that we actually predicted
&& (branch_predict_i.cf == ariane_pkg::NoCF || target_address != branch_predict_i.predict_address)) begin
resolved_branch_o.is_mispredict = 1'b1;
// update BTB only if this wasn't a return
if (branch_predict_i.cf != ariane_pkg::Return)
resolved_branch_o.cf_type = ariane_pkg::JumpR;
end
// to resolve the branch in ID
resolve_branch_o = 1'b1;
end
end
// use ALU exception signal for storing instruction fetch exceptions if
// the target address is not aligned to a 2 byte boundary
//
logic jump_taken;
always_comb begin : exception_handling
// Do a jump if it is either unconditional jump (JAL | JALR) or `taken` conditional jump
jump_taken = !(ariane_pkg::op_is_branch(fu_data_i.operation)) ||
((ariane_pkg::op_is_branch(fu_data_i.operation)) && branch_comp_res_i);
branch_exception_o.cause = riscv::INSTR_ADDR_MISALIGNED;
branch_exception_o.valid = 1'b0;
branch_exception_o.tval = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i};
// Only throw instruction address misaligned exception if this is indeed a `taken` conditional branch or
// an unconditional jump
if (branch_valid_i && target_address[0] != 1'b0 && jump_taken) branch_exception_o.valid = 1'b1;
end
endmodule

View file

@ -12,54 +12,54 @@
// Date: 15.09.2018
// Description: Combinatorial AMO unit
module amo_alu #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
// AMO interface
input ariane_pkg::amo_t amo_op_i,
input logic [63:0] amo_operand_a_i,
input logic [63:0] amo_operand_b_i,
output logic [63:0] amo_result_o // result of atomic memory operation
// AMO interface
input ariane_pkg::amo_t amo_op_i,
input logic [63:0] amo_operand_a_i,
input logic [63:0] amo_operand_b_i,
output logic [63:0] amo_result_o // result of atomic memory operation
);
logic [64:0] adder_sum;
logic [64:0] adder_operand_a, adder_operand_b;
logic [64:0] adder_sum;
logic [64:0] adder_operand_a, adder_operand_b;
assign adder_sum = adder_operand_a + adder_operand_b;
assign adder_sum = adder_operand_a + adder_operand_b;
always_comb begin
always_comb begin
adder_operand_a = $signed(amo_operand_a_i);
adder_operand_b = $signed(amo_operand_b_i);
adder_operand_a = $signed(amo_operand_a_i);
adder_operand_b = $signed(amo_operand_b_i);
amo_result_o = amo_operand_b_i;
amo_result_o = amo_operand_b_i;
unique case (amo_op_i)
// the default is to output operand_b
ariane_pkg::AMO_SC:;
ariane_pkg::AMO_SWAP:;
ariane_pkg::AMO_ADD: amo_result_o = adder_sum[63:0];
ariane_pkg::AMO_AND: amo_result_o = amo_operand_a_i & amo_operand_b_i;
ariane_pkg::AMO_OR: amo_result_o = amo_operand_a_i | amo_operand_b_i;
ariane_pkg::AMO_XOR: amo_result_o = amo_operand_a_i ^ amo_operand_b_i;
ariane_pkg::AMO_MAX: begin
adder_operand_b = -$signed(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i;
end
ariane_pkg::AMO_MIN: begin
adder_operand_b = -$signed(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i;
end
ariane_pkg::AMO_MAXU: begin
adder_operand_a = $unsigned(amo_operand_a_i);
adder_operand_b = -$unsigned(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i;
end
ariane_pkg::AMO_MINU: begin
adder_operand_a = $unsigned(amo_operand_a_i);
adder_operand_b = -$unsigned(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i;
end
default: amo_result_o = '0;
endcase
end
unique case (amo_op_i)
// the default is to output operand_b
ariane_pkg::AMO_SC: ;
ariane_pkg::AMO_SWAP: ;
ariane_pkg::AMO_ADD: amo_result_o = adder_sum[63:0];
ariane_pkg::AMO_AND: amo_result_o = amo_operand_a_i & amo_operand_b_i;
ariane_pkg::AMO_OR: amo_result_o = amo_operand_a_i | amo_operand_b_i;
ariane_pkg::AMO_XOR: amo_result_o = amo_operand_a_i ^ amo_operand_b_i;
ariane_pkg::AMO_MAX: begin
adder_operand_b = -$signed(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i;
end
ariane_pkg::AMO_MIN: begin
adder_operand_b = -$signed(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i;
end
ariane_pkg::AMO_MAXU: begin
adder_operand_a = $unsigned(amo_operand_a_i);
adder_operand_b = -$unsigned(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i;
end
ariane_pkg::AMO_MINU: begin
adder_operand_a = $unsigned(amo_operand_a_i);
adder_operand_b = -$unsigned(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i;
end
default: amo_result_o = '0;
endcase
end
endmodule

View file

@ -17,52 +17,66 @@
//import std_cache_pkg::*;
module axi_adapter #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 256,
parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature
parameter int unsigned CACHELINE_BYTE_OFFSET = 8,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 256,
parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature
parameter int unsigned CACHELINE_BYTE_OFFSET = 8,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic req_i,
input ariane_pkg::ad_req_t type_i,
input ariane_pkg::amo_t amo_i,
output logic gnt_o,
input logic [riscv::XLEN-1:0] addr_i,
input logic we_i,
input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] wdata_i,
input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] be_i,
input logic [1:0] size_i,
input logic [CVA6Cfg.AxiIdWidth-1:0] id_i,
// read port
output logic valid_o,
output logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] rdata_o,
output logic [CVA6Cfg.AxiIdWidth-1:0] id_o,
// critical word - read port
output logic [CVA6Cfg.AxiDataWidth-1:0] critical_word_o,
output logic critical_word_valid_o,
// AXI port
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
input logic req_i,
input ariane_pkg::ad_req_t type_i,
input ariane_pkg::amo_t amo_i,
output logic gnt_o,
input logic [riscv::XLEN-1:0] addr_i,
input logic we_i,
input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] wdata_i,
input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] be_i,
input logic [1:0] size_i,
input logic [CVA6Cfg.AxiIdWidth-1:0] id_i,
// read port
output logic valid_o,
output logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] rdata_o,
output logic [CVA6Cfg.AxiIdWidth-1:0] id_o,
// critical word - read port
output logic [CVA6Cfg.AxiDataWidth-1:0] critical_word_o,
output logic critical_word_valid_o,
// AXI port
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
);
localparam BURST_SIZE = (DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1;
localparam ADDR_INDEX = ($clog2(DATA_WIDTH/CVA6Cfg.AxiDataWidth) > 0) ? $clog2(DATA_WIDTH/CVA6Cfg.AxiDataWidth) : 1;
localparam BURST_SIZE = (DATA_WIDTH / CVA6Cfg.AxiDataWidth) - 1;
localparam ADDR_INDEX = ($clog2(
DATA_WIDTH / CVA6Cfg.AxiDataWidth
) > 0) ? $clog2(
DATA_WIDTH / CVA6Cfg.AxiDataWidth
) : 1;
enum logic [3:0] {
IDLE, WAIT_B_VALID, WAIT_AW_READY, WAIT_LAST_W_READY, WAIT_LAST_W_READY_AW_READY, WAIT_AW_READY_BURST,
WAIT_R_VALID, WAIT_R_VALID_MULTIPLE, COMPLETE_READ, WAIT_AMO_R_VALID
} state_q, state_d;
IDLE,
WAIT_B_VALID,
WAIT_AW_READY,
WAIT_LAST_W_READY,
WAIT_LAST_W_READY_AW_READY,
WAIT_AW_READY_BURST,
WAIT_R_VALID,
WAIT_R_VALID_MULTIPLE,
COMPLETE_READ,
WAIT_AMO_R_VALID
}
state_q, state_d;
// counter for AXI transfers
logic [ADDR_INDEX-1:0] cnt_d, cnt_q;
logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] cache_line_d, cache_line_q;
logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0]
cache_line_d, cache_line_q;
// save the address for a read, as we allow for non-cacheline aligned accesses
logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0] addr_offset_d, addr_offset_q;
logic [CVA6Cfg.AxiIdWidth-1:0] id_d, id_q;
logic [ADDR_INDEX-1:0] index;
logic [CVA6Cfg.AxiIdWidth-1:0] id_d, id_q;
logic [ADDR_INDEX-1:0] index;
// save the atomic operation and size
ariane_pkg::amo_t amo_d, amo_q;
logic [1:0] size_d, size_q;
@ -75,8 +89,8 @@ module axi_adapter #(
axi_req_o.aw.prot = 3'b0;
axi_req_o.aw.region = 4'b0;
axi_req_o.aw.len = 8'b0;
axi_req_o.aw.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes
axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
axi_req_o.aw.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes
axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
axi_req_o.aw.lock = 1'b0;
axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE;
axi_req_o.aw.qos = 4'b0;
@ -92,42 +106,42 @@ module axi_adapter #(
if (!CRITICAL_WORD_FIRST && type_i != ariane_pkg::SINGLE_REQ) begin
axi_req_o.ar.addr[CACHELINE_BYTE_OFFSET-1:0] = '0;
end
axi_req_o.ar.prot = 3'b0;
axi_req_o.ar.prot = 3'b0;
axi_req_o.ar.region = 4'b0;
axi_req_o.ar.len = 8'b0;
axi_req_o.ar.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes
axi_req_o.ar.len = 8'b0;
axi_req_o.ar.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes
axi_req_o.ar.burst = (CRITICAL_WORD_FIRST ? axi_pkg::BURST_WRAP : axi_pkg::BURST_INCR); // wrapping transfer in case of a critical word first strategy
axi_req_o.ar.lock = 1'b0;
axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE;
axi_req_o.ar.qos = 4'b0;
axi_req_o.ar.id = id_i;
axi_req_o.ar.user = '0;
axi_req_o.ar.lock = 1'b0;
axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE;
axi_req_o.ar.qos = 4'b0;
axi_req_o.ar.id = id_i;
axi_req_o.ar.user = '0;
axi_req_o.w_valid = 1'b0;
axi_req_o.w.data = wdata_i[0];
axi_req_o.w.strb = be_i[0];
axi_req_o.w.last = 1'b0;
axi_req_o.w.user = '0;
axi_req_o.w_valid = 1'b0;
axi_req_o.w.data = wdata_i[0];
axi_req_o.w.strb = be_i[0];
axi_req_o.w.last = 1'b0;
axi_req_o.w.user = '0;
axi_req_o.b_ready = 1'b0;
axi_req_o.r_ready = 1'b0;
axi_req_o.b_ready = 1'b0;
axi_req_o.r_ready = 1'b0;
gnt_o = 1'b0;
valid_o = 1'b0;
id_o = axi_resp_i.r.id;
gnt_o = 1'b0;
valid_o = 1'b0;
id_o = axi_resp_i.r.id;
critical_word_o = axi_resp_i.r.data;
critical_word_o = axi_resp_i.r.data;
critical_word_valid_o = 1'b0;
rdata_o = cache_line_q;
rdata_o = cache_line_q;
state_d = state_q;
cnt_d = cnt_q;
cache_line_d = cache_line_q;
state_d = state_q;
cnt_d = cnt_q;
cache_line_d = cache_line_q;
addr_offset_d = addr_offset_q;
id_d = id_q;
amo_d = amo_q;
size_d = size_q;
index = '0;
id_d = id_q;
amo_d = amo_q;
size_d = size_q;
index = '0;
case (state_q)
@ -142,17 +156,19 @@ module axi_adapter #(
axi_req_o.aw_valid = 1'b1;
axi_req_o.w_valid = 1'b1;
// store-conditional requires exclusive access
axi_req_o.aw.lock = amo_i == ariane_pkg::AMO_SC;
axi_req_o.aw.lock = amo_i == ariane_pkg::AMO_SC;
// its a single write
if (type_i == ariane_pkg::SINGLE_REQ) begin
// only a single write so the data is already the last one
axi_req_o.w.last = 1'b1;
axi_req_o.w.last = 1'b1;
// single req can be granted here
gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready;
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready})
2'b11: state_d = WAIT_B_VALID;
2'b01: state_d = WAIT_AW_READY;
2'b10: state_d = WAIT_LAST_W_READY;
case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
2'b11: state_d = WAIT_B_VALID;
2'b01: state_d = WAIT_AW_READY;
2'b10: state_d = WAIT_LAST_W_READY;
default: state_d = IDLE;
endcase
@ -161,29 +177,29 @@ module axi_adapter #(
size_d = size_i;
end
// its a request for the whole cache line
// its a request for the whole cache line
end else begin
// bursts of AMOs unsupported
assert (amo_i == ariane_pkg::AMO_NONE)
else $fatal("Bursts of atomic operations are not supported");
else $fatal("Bursts of atomic operations are not supported");
axi_req_o.aw.len = BURST_SIZE[7:0]; // number of bursts to do
axi_req_o.aw.len = BURST_SIZE[7:0]; // number of bursts to do
axi_req_o.w.data = wdata_i[0];
axi_req_o.w.strb = be_i[0];
if (axi_resp_i.w_ready)
cnt_d = BURST_SIZE[ADDR_INDEX-1:0] - 1;
else
cnt_d = BURST_SIZE[ADDR_INDEX-1:0];
if (axi_resp_i.w_ready) cnt_d = BURST_SIZE[ADDR_INDEX-1:0] - 1;
else cnt_d = BURST_SIZE[ADDR_INDEX-1:0];
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready})
2'b11: state_d = WAIT_LAST_W_READY;
2'b01: state_d = WAIT_LAST_W_READY_AW_READY;
2'b10: state_d = WAIT_LAST_W_READY;
default:;
case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
2'b11: state_d = WAIT_LAST_W_READY;
2'b01: state_d = WAIT_LAST_W_READY_AW_READY;
2'b10: state_d = WAIT_LAST_W_READY;
default: ;
endcase
end
// read
// read
end else begin
axi_req_o.ar_valid = 1'b1;
@ -193,7 +209,7 @@ module axi_adapter #(
gnt_o = axi_resp_i.ar_ready;
if (type_i != ariane_pkg::SINGLE_REQ) begin
assert (amo_i == ariane_pkg::AMO_NONE)
else $fatal("Bursts of atomic operations are not supported");
else $fatal("Bursts of atomic operations are not supported");
axi_req_o.ar.len = BURST_SIZE[7:0];
cnt_d = BURST_SIZE[ADDR_INDEX-1:0];
@ -221,8 +237,8 @@ module axi_adapter #(
// ~> we need to wait for an aw_ready and there is at least one outstanding write
WAIT_LAST_W_READY_AW_READY: begin
axi_req_o.w_valid = 1'b1;
axi_req_o.w.last = (cnt_q == '0);
axi_req_o.w_valid = 1'b1;
axi_req_o.w.last = (cnt_q == '0);
if (type_i == ariane_pkg::SINGLE_REQ) begin
axi_req_o.w.data = wdata_i[0];
axi_req_o.w.strb = be_i[0];
@ -234,29 +250,30 @@ module axi_adapter #(
// we are here because we want to write a cache line
axi_req_o.aw.len = BURST_SIZE[7:0];
// we got an aw_ready
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready})
case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
// we got an aw ready
2'b01: begin
// are there any outstanding transactions?
if (cnt_q == 0)
state_d = WAIT_AW_READY_BURST;
else // yes, so reduce the count and stay here
if (cnt_q == 0) state_d = WAIT_AW_READY_BURST;
else // yes, so reduce the count and stay here
cnt_d = cnt_q - 1;
end
2'b10: state_d = WAIT_LAST_W_READY;
2'b10: state_d = WAIT_LAST_W_READY;
2'b11: begin
// we are finished
if (cnt_q == 0) begin
state_d = WAIT_B_VALID;
gnt_o = 1'b1;
// there are outstanding transactions
// there are outstanding transactions
end else begin
state_d = WAIT_LAST_W_READY;
cnt_d = cnt_q - 1;
end
end
default:;
endcase
default: ;
endcase
end
@ -266,8 +283,8 @@ module axi_adapter #(
axi_req_o.aw.len = BURST_SIZE[7:0];
if (axi_resp_i.aw_ready) begin
state_d = WAIT_B_VALID;
gnt_o = 1'b1;
state_d = WAIT_B_VALID;
gnt_o = 1'b1;
end
end
@ -347,10 +364,8 @@ module axi_adapter #(
// ~> cacheline read, single read
WAIT_R_VALID_MULTIPLE, WAIT_R_VALID: begin
if (CRITICAL_WORD_FIRST)
index = addr_offset_q + (BURST_SIZE[ADDR_INDEX-1:0]-cnt_q);
else
index = BURST_SIZE[ADDR_INDEX-1:0]-cnt_q;
if (CRITICAL_WORD_FIRST) index = addr_offset_q + (BURST_SIZE[ADDR_INDEX-1:0] - cnt_q);
else index = BURST_SIZE[ADDR_INDEX-1:0] - cnt_q;
// reads are always wrapping here
axi_req_o.r_ready = 1'b1;
@ -380,8 +395,7 @@ module axi_adapter #(
if (state_q == WAIT_R_VALID_MULTIPLE) begin
cache_line_d[index] = axi_resp_i.r.data;
end else
cache_line_d[0] = axi_resp_i.r.data;
end else cache_line_d[0] = axi_resp_i.r.data;
// Decrease the counter
cnt_d = cnt_q - 1;
@ -425,19 +439,27 @@ module axi_adapter #(
function automatic axi_pkg::atop_t atop_from_amo(ariane_pkg::amo_t amo);
axi_pkg::atop_t result = 6'b000000;
unique case(amo)
unique case (amo)
ariane_pkg::AMO_NONE: result = {axi_pkg::ATOP_NONE, 4'b0000};
ariane_pkg::AMO_SWAP: result = {axi_pkg::ATOP_ATOMICSWAP};
ariane_pkg::AMO_ADD : result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD};
ariane_pkg::AMO_AND : result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR};
ariane_pkg::AMO_OR : result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET};
ariane_pkg::AMO_XOR : result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR};
ariane_pkg::AMO_MAX : result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX};
ariane_pkg::AMO_MAXU: result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX};
ariane_pkg::AMO_MIN : result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN};
ariane_pkg::AMO_MINU: result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN};
ariane_pkg::AMO_CAS1: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported
ariane_pkg::AMO_CAS2: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported
ariane_pkg::AMO_ADD:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD};
ariane_pkg::AMO_AND:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR};
ariane_pkg::AMO_OR:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET};
ariane_pkg::AMO_XOR:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR};
ariane_pkg::AMO_MAX:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX};
ariane_pkg::AMO_MAXU:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX};
ariane_pkg::AMO_MIN:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN};
ariane_pkg::AMO_MINU:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN};
ariane_pkg::AMO_CAS1: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported
ariane_pkg::AMO_CAS2: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported
default: result = 6'b000000;
endcase
@ -445,8 +467,8 @@ module axi_adapter #(
endfunction
function automatic logic amo_returns_data(ariane_pkg::amo_t amo);
axi_pkg::atop_t atop = atop_from_amo(amo);
logic is_load = atop[5:4] == axi_pkg::ATOP_ATOMICLOAD;
axi_pkg::atop_t atop = atop_from_amo(amo);
logic is_load = atop[5:4] == axi_pkg::ATOP_ATOMICLOAD;
logic is_swap_or_cmp = atop[5:4] == axi_pkg::ATOP_ATOMICSWAP[5:4];
return is_load || is_swap_or_cmp;
endfunction

View file

@ -18,446 +18,453 @@
// Description: Cache controller
module cache_ctrl import ariane_pkg::*; import std_cache_pkg::*; #(
module cache_ctrl
import ariane_pkg::*;
import std_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic bypass_i, // enable cache
output logic busy_o,
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic bypass_i, // enable cache
output logic busy_o,
// Core request ports
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// SRAM interface
output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array
input logic gnt_i,
output cache_line_t data_o,
output cl_be_t be_o,
output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
output logic we_o,
input logic [DCACHE_SET_ASSOC-1:0] hit_way_i,
output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array
input logic gnt_i,
output cache_line_t data_o,
output cl_be_t be_o,
output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
output logic we_o,
input logic [DCACHE_SET_ASSOC-1:0] hit_way_i,
// Miss handling
output miss_req_t miss_req_o,
output miss_req_t miss_req_o,
// return
input logic miss_gnt_i,
input logic miss_gnt_i,
input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss
input logic [63:0] critical_word_i,
input logic critical_word_valid_i,
input logic [63:0] critical_word_i,
input logic critical_word_valid_i,
// bypass ports
input logic bypass_gnt_i,
input logic bypass_valid_i,
input logic [63:0] bypass_data_i,
input logic bypass_gnt_i,
input logic bypass_valid_i,
input logic [63:0] bypass_data_i,
// check MSHR for aliasing
output logic [55:0] mshr_addr_o,
input logic mshr_addr_matches_i,
input logic mshr_index_matches_i
output logic [55:0] mshr_addr_o,
input logic mshr_addr_matches_i,
input logic mshr_index_matches_i
);
enum logic [3:0] {
IDLE, // 0
WAIT_TAG, // 1
WAIT_TAG_BYPASSED, // 2
WAIT_GNT, // 3
WAIT_GNT_SAVED, // 4
STORE_REQ, // 5
WAIT_REFILL_VALID, // 6
WAIT_REFILL_GNT, // 7
WAIT_TAG_SAVED, // 8
WAIT_MSHR, // 9
WAIT_CRITICAL_WORD // 10
} state_d, state_q;
enum logic [3:0] {
IDLE, // 0
WAIT_TAG, // 1
WAIT_TAG_BYPASSED, // 2
WAIT_GNT, // 3
WAIT_GNT_SAVED, // 4
STORE_REQ, // 5
WAIT_REFILL_VALID, // 6
WAIT_REFILL_GNT, // 7
WAIT_TAG_SAVED, // 8
WAIT_MSHR, // 9
WAIT_CRITICAL_WORD // 10
}
state_d, state_q;
typedef struct packed {
logic [DCACHE_INDEX_WIDTH-1:0] index;
logic [DCACHE_TAG_WIDTH-1:0] tag;
logic [DCACHE_TID_WIDTH-1:0] id;
logic [7:0] be;
logic [1:0] size;
logic we;
logic [63:0] wdata;
logic bypass;
logic killed;
} mem_req_t;
typedef struct packed {
logic [DCACHE_INDEX_WIDTH-1:0] index;
logic [DCACHE_TAG_WIDTH-1:0] tag;
logic [DCACHE_TID_WIDTH-1:0] id;
logic [7:0] be;
logic [1:0] size;
logic we;
logic [63:0] wdata;
logic bypass;
logic killed;
} mem_req_t;
logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q;
logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q;
mem_req_t mem_req_d, mem_req_q;
mem_req_t mem_req_d, mem_req_q;
assign busy_o = (state_q != IDLE);
assign tag_o = mem_req_d.tag;
assign busy_o = (state_q != IDLE);
assign tag_o = mem_req_d.tag;
logic [DCACHE_LINE_WIDTH-1:0] cl_i;
logic [DCACHE_LINE_WIDTH-1:0] cl_i;
always_comb begin : way_select
cl_i = '0;
for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++)
if (hit_way_i[i])
cl_i = data_i[i].data;
always_comb begin : way_select
cl_i = '0;
for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) if (hit_way_i[i]) cl_i = data_i[i].data;
// cl_i = data_i[one_hot_to_bin(hit_way_i)].data;
end
// cl_i = data_i[one_hot_to_bin(hit_way_i)].data;
end
// --------------
// Cache FSM
// --------------
always_comb begin : cache_ctrl_fsm
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
// incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array
// cache-line offset -> multiple of 64
cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left
// default assignments
state_d = state_q;
mem_req_d = mem_req_q;
hit_way_d = hit_way_q;
// output assignments
req_port_o.data_gnt = 1'b0;
req_port_o.data_rvalid = 1'b0;
req_port_o.data_rdata = '0;
req_port_o.data_rid = mem_req_q.id;
miss_req_o = '0;
mshr_addr_o = '0;
// Memory array communication
req_o = '0;
addr_o = req_port_i.address_index;
data_o = '0;
be_o = '0;
we_o = '0;
// --------------
// Cache FSM
// --------------
always_comb begin : cache_ctrl_fsm
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
// incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array
// cache-line offset -> multiple of 64
cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left
// default assignments
state_d = state_q;
mem_req_d = mem_req_q;
hit_way_d = hit_way_q;
// output assignments
req_port_o.data_gnt = 1'b0;
req_port_o.data_rvalid = 1'b0;
req_port_o.data_rdata = '0;
req_port_o.data_rid = mem_req_q.id;
miss_req_o = '0;
mshr_addr_o = '0;
// Memory array communication
req_o = '0;
addr_o = req_port_i.address_index;
data_o = '0;
be_o = '0;
we_o = '0;
mem_req_d.killed |= req_port_i.kill_req;
mem_req_d.killed |= req_port_i.kill_req;
case (state_q)
case (state_q)
IDLE: begin
// a new request arrived
if (req_port_i.data_req && !flush_i) begin
// request the cache line - we can do this speculatively
req_o = '1;
IDLE: begin
// a new request arrived
if (req_port_i.data_req && !flush_i) begin
// request the cache line - we can do this speculatively
req_o = '1;
// save index, be and we
mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;
// save index, be and we
mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;
// Bypass mode, check for uncacheable address here as well
if (bypass_i) begin
state_d = WAIT_TAG_BYPASSED;
// grant this access only if it was a load
req_port_o.data_gnt = (req_port_i.data_we) ? 1'b0 : 1'b1;
mem_req_d.bypass = 1'b1;
// ------------------
// Cache is enabled
// ------------------
end else begin
// Wait that we have access on the memory array
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
// only for a read
if (!req_port_i.data_we)
req_port_o.data_gnt = 1'b1;
end
end
end
// Bypass mode, check for uncacheable address here as well
if (bypass_i) begin
state_d = WAIT_TAG_BYPASSED;
// grant this access only if it was a load
req_port_o.data_gnt = (req_port_i.data_we) ? 1'b0 : 1'b1;
mem_req_d.bypass = 1'b1;
// ------------------
// Cache is enabled
// ------------------
end else begin
// Wait that we have access on the memory array
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
// only for a read
if (!req_port_i.data_we) req_port_o.data_gnt = 1'b1;
end
end
end
end
// cache enabled and waiting for tag
WAIT_TAG, WAIT_TAG_SAVED: begin
// check that the client really wants to do the request and that we have a valid tag
if (!req_port_i.kill_req && (req_port_i.tag_valid || state_q == WAIT_TAG_SAVED || mem_req_q.we)) begin
// save tag if we didn't already save it
if (state_q != WAIT_TAG_SAVED) begin
mem_req_d.tag = req_port_i.address_tag;
end
// we speculatively request another transfer
if (req_port_i.data_req && !flush_i) begin
req_o = '1;
end
// ------------
// HIT CASE
// ------------
if (|hit_way_i) begin
// we can request another cache-line if this was a load
if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin
state_d = WAIT_TAG; // switch back to WAIT_TAG
mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;
mem_req_d.bypass = 1'b0;
// cache enabled and waiting for tag
WAIT_TAG, WAIT_TAG_SAVED: begin
// check that the client really wants to do the request and that we have a valid tag
if (!req_port_i.kill_req && (req_port_i.tag_valid || state_q == WAIT_TAG_SAVED || mem_req_q.we)) begin
// save tag if we didn't already save it
if (state_q != WAIT_TAG_SAVED) begin
mem_req_d.tag = req_port_i.address_tag;
end
// we speculatively request another transfer
if (req_port_i.data_req && !flush_i) begin
req_o = '1;
end
// ------------
// HIT CASE
// ------------
if (|hit_way_i) begin
// we can request another cache-line if this was a load
if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin
state_d = WAIT_TAG; // switch back to WAIT_TAG
mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;
mem_req_d.bypass = 1'b0;
req_port_o.data_gnt = gnt_i;
req_port_o.data_gnt = gnt_i;
if (!gnt_i) begin
state_d = IDLE;
end
end else begin
state_d = IDLE;
end
// this is timing critical
req_port_o.data_rdata = cl_i[cl_offset +: 64];
// report data for a read
if (!mem_req_q.we) begin
req_port_o.data_rvalid = ~mem_req_q.killed;
// else this was a store so we need an extra step to handle it
end else begin
state_d = STORE_REQ;
hit_way_d = hit_way_i;
end
// ------------
// MISS CASE
// ------------
end else begin
// make a miss request
state_d = WAIT_REFILL_GNT;
end
// ----------------------------------------------
// Check MSHR - Miss Status Handling Register
// ----------------------------------------------
mshr_addr_o = {tag_o, mem_req_q.index};
// 1. We've got a match on MSHR and while are going down the
// store path. This means that the miss controller is
// currently evicting our cache-line. As the store is
// non-atomic we need to constantly check whether we are
// matching the address the miss handler is serving.
// Furthermore we need to check for the whole index
// because a completely different memory line could alias
// with the cache-line we are evicting.
// 2. The second case is where we are currently loading and
// the address matches the exact CL the miss controller
// is currently serving. That means we need to wait for
// the miss controller to finish its request before we
// can continue to serve this CL. Otherwise we will fetch
// the cache-line again and potentially loosing any
// content we've written so far. This as a consequence
// means we can't have hit on the CL which mean the
// req_port_o.data_rvalid will be de-asserted.
if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin
state_d = WAIT_MSHR;
end
// -------------------------
// Check for cache-ability
// -------------------------
if (!config_pkg::is_inside_cacheable_regions(CVA6Cfg, {{{64-riscv::PLEN}{1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}})) begin
mem_req_d.bypass = 1'b1;
state_d = WAIT_REFILL_GNT;
end
// we are still waiting for a valid tag
end else begin
// request cache line for saved index
addr_o = mem_req_q.index;
req_o = '1;
// check that we still have a memory grant
if (!gnt_i) begin
state_d = WAIT_GNT;
end
end
end
// ~> we already granted the request but lost the memory grant while waiting for the tag
WAIT_GNT, WAIT_GNT_SAVED: begin
// request cache line for saved index
addr_o = mem_req_q.index;
req_o = '1;
// if we get a valid tag while waiting for the memory grant, save it
if (req_port_i.tag_valid) begin
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_GNT_SAVED;
end
// we have a memory grant again ~> go back to WAIT_TAG
if (gnt_i) begin
state_d = (state_d == WAIT_GNT) ? WAIT_TAG : WAIT_TAG_SAVED;
end
end
// ~> we are here as we need a second round of memory access for a store
STORE_REQ: begin
// check if the MSHR still doesn't match
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// We need to re-check for MSHR aliasing here as the store requires at least
// two memory look-ups on a single-ported SRAM and therefore is non-atomic
if (!mshr_index_matches_i) begin
// store data, write dirty bit
req_o = hit_way_q;
addr_o = mem_req_q.index;
we_o = 1'b1;
be_o.vldrty = hit_way_q;
// set the correct byte enable
be_o.data[cl_offset>>3 +: 8] = mem_req_q.be;
data_o.data[cl_offset +: 64] = mem_req_q.wdata;
// ~> change the state
data_o.dirty = 1'b1;
data_o.valid = 1'b1;
// got a grant ~> this is finished now
if (gnt_i) begin
req_port_o.data_gnt = 1'b1;
state_d = IDLE;
end
end else begin
state_d = WAIT_MSHR;
end
end // case: STORE_REQ
// we've got a match on MSHR ~> miss unit is currently serving a request
WAIT_MSHR: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// we can start a new request
if (!mshr_index_matches_i) begin
req_o = '1;
addr_o = mem_req_q.index;
if (gnt_i) state_d = WAIT_TAG_SAVED;
end
end
// its for sure a miss
WAIT_TAG_BYPASSED: begin
// check that the client really wants to do the request and that we have a valid tag
if (!req_port_i.kill_req && (req_port_i.tag_valid || mem_req_q.we)) begin
// save tag
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_REFILL_GNT;
end
end
// ~> wait for grant from miss unit
WAIT_REFILL_GNT: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
miss_req_o.valid = 1'b1;
miss_req_o.bypass = mem_req_q.bypass;
miss_req_o.addr = {mem_req_q.tag, mem_req_q.index};
miss_req_o.be = mem_req_q.be;
miss_req_o.size = mem_req_q.size;
miss_req_o.we = mem_req_q.we;
miss_req_o.wdata = mem_req_q.wdata;
// got a grant so go to valid
if (bypass_gnt_i) begin
state_d = WAIT_REFILL_VALID;
// if this was a write we still need to give a grant to the store unit
if (mem_req_q.we)
req_port_o.data_gnt = 1'b1;
end
if (miss_gnt_i && !mem_req_q.we)
state_d = WAIT_CRITICAL_WORD;
else if (miss_gnt_i) begin
state_d = IDLE;
req_port_o.data_gnt = 1'b1;
end
// it can be the case that the miss unit is currently serving a
// request which matches ours
// so we need to check the MSHR for matching continuously
// if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i
if (mshr_addr_matches_i && !active_serving_i) begin
state_d = WAIT_MSHR;
end
end
// ~> wait for critical word to arrive
WAIT_CRITICAL_WORD: begin
// speculatively request another word
if (req_port_i.data_req) begin
// request the cache line
req_o = '1;
end
if (critical_word_valid_i) begin
req_port_o.data_rvalid = ~mem_req_q.killed;
req_port_o.data_rdata = critical_word_i;
// we can make another request
if (req_port_i.data_req) begin
// save index, be and we
mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;
state_d = IDLE;
// Wait until we have access on the memory array
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
req_port_o.data_gnt = 1'b1;
end
end else begin
state_d = IDLE;
end
end
end
// ~> wait until the bypass request is valid
WAIT_REFILL_VALID: begin
// got a valid answer
if (bypass_valid_i) begin
req_port_o.data_rdata = bypass_data_i;
req_port_o.data_rvalid = ~mem_req_q.killed;
state_d = IDLE;
end
end
endcase
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if (!(state_q inside {
WAIT_REFILL_GNT,
WAIT_CRITICAL_WORD})) begin
if (!gnt_i) begin
state_d = IDLE;
end
end else begin
state_d = IDLE;
end
end
end
// --------------
// Registers
// --------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
mem_req_q <= '0;
hit_way_q <= '0;
// this is timing critical
req_port_o.data_rdata = cl_i[cl_offset+:64];
// report data for a read
if (!mem_req_q.we) begin
req_port_o.data_rvalid = ~mem_req_q.killed;
// else this was a store so we need an extra step to handle it
end else begin
state_d = STORE_REQ;
hit_way_d = hit_way_i;
end
// ------------
// MISS CASE
// ------------
end else begin
// make a miss request
state_d = WAIT_REFILL_GNT;
end
// ----------------------------------------------
// Check MSHR - Miss Status Handling Register
// ----------------------------------------------
mshr_addr_o = {tag_o, mem_req_q.index};
// 1. We've got a match on MSHR and while are going down the
// store path. This means that the miss controller is
// currently evicting our cache-line. As the store is
// non-atomic we need to constantly check whether we are
// matching the address the miss handler is serving.
// Furthermore we need to check for the whole index
// because a completely different memory line could alias
// with the cache-line we are evicting.
// 2. The second case is where we are currently loading and
// the address matches the exact CL the miss controller
// is currently serving. That means we need to wait for
// the miss controller to finish its request before we
// can continue to serve this CL. Otherwise we will fetch
// the cache-line again and potentially loosing any
// content we've written so far. This as a consequence
// means we can't have hit on the CL which mean the
// req_port_o.data_rvalid will be de-asserted.
if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin
state_d = WAIT_MSHR;
end
// -------------------------
// Check for cache-ability
// -------------------------
if (!config_pkg::is_inside_cacheable_regions(
CVA6Cfg, {{{64 - riscv::PLEN} {1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}}
)) begin
mem_req_d.bypass = 1'b1;
state_d = WAIT_REFILL_GNT;
end
// we are still waiting for a valid tag
end else begin
state_q <= state_d;
mem_req_q <= mem_req_d;
hit_way_q <= hit_way_d;
end
end
// request cache line for saved index
addr_o = mem_req_q.index;
req_o = '1;
//pragma translate_off
`ifndef VERILATOR
initial begin
assert (DCACHE_LINE_WIDTH == 128) else $error ("Cacheline width has to be 128 for the moment. But only small changes required in data select logic");
// check that we still have a memory grant
if (!gnt_i) begin
state_d = WAIT_GNT;
end
end
// if the full MSHR address matches so should also match the partial one
partial_full_mshr_match: assert property(@(posedge clk_i) disable iff (~rst_ni) mshr_addr_matches_i -> mshr_index_matches_i) else $fatal (1, "partial mshr index doesn't match");
// there should never be a valid answer when the MSHR matches and we are not being served
no_valid_on_mshr_match: assert property(@(posedge clk_i) disable iff (~rst_ni) (mshr_addr_matches_i && !active_serving_i)-> !req_port_o.data_rvalid || req_port_i.kill_req) else $fatal (1, "rvalid_o should not be set on MSHR match");
`endif
//pragma translate_on
end
// ~> we already granted the request but lost the memory grant while waiting for the tag
WAIT_GNT, WAIT_GNT_SAVED: begin
// request cache line for saved index
addr_o = mem_req_q.index;
req_o = '1;
// if we get a valid tag while waiting for the memory grant, save it
if (req_port_i.tag_valid) begin
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_GNT_SAVED;
end
// we have a memory grant again ~> go back to WAIT_TAG
if (gnt_i) begin
state_d = (state_d == WAIT_GNT) ? WAIT_TAG : WAIT_TAG_SAVED;
end
end
// ~> we are here as we need a second round of memory access for a store
STORE_REQ: begin
// check if the MSHR still doesn't match
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// We need to re-check for MSHR aliasing here as the store requires at least
// two memory look-ups on a single-ported SRAM and therefore is non-atomic
if (!mshr_index_matches_i) begin
// store data, write dirty bit
req_o = hit_way_q;
addr_o = mem_req_q.index;
we_o = 1'b1;
be_o.vldrty = hit_way_q;
// set the correct byte enable
be_o.data[cl_offset>>3+:8] = mem_req_q.be;
data_o.data[cl_offset+:64] = mem_req_q.wdata;
// ~> change the state
data_o.dirty = 1'b1;
data_o.valid = 1'b1;
// got a grant ~> this is finished now
if (gnt_i) begin
req_port_o.data_gnt = 1'b1;
state_d = IDLE;
end
end else begin
state_d = WAIT_MSHR;
end
end // case: STORE_REQ
// we've got a match on MSHR ~> miss unit is currently serving a request
WAIT_MSHR: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// we can start a new request
if (!mshr_index_matches_i) begin
req_o = '1;
addr_o = mem_req_q.index;
if (gnt_i) state_d = WAIT_TAG_SAVED;
end
end
// its for sure a miss
WAIT_TAG_BYPASSED: begin
// check that the client really wants to do the request and that we have a valid tag
if (!req_port_i.kill_req && (req_port_i.tag_valid || mem_req_q.we)) begin
// save tag
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_REFILL_GNT;
end
end
// ~> wait for grant from miss unit
WAIT_REFILL_GNT: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
miss_req_o.valid = 1'b1;
miss_req_o.bypass = mem_req_q.bypass;
miss_req_o.addr = {mem_req_q.tag, mem_req_q.index};
miss_req_o.be = mem_req_q.be;
miss_req_o.size = mem_req_q.size;
miss_req_o.we = mem_req_q.we;
miss_req_o.wdata = mem_req_q.wdata;
// got a grant so go to valid
if (bypass_gnt_i) begin
state_d = WAIT_REFILL_VALID;
// if this was a write we still need to give a grant to the store unit
if (mem_req_q.we) req_port_o.data_gnt = 1'b1;
end
if (miss_gnt_i && !mem_req_q.we) state_d = WAIT_CRITICAL_WORD;
else if (miss_gnt_i) begin
state_d = IDLE;
req_port_o.data_gnt = 1'b1;
end
// it can be the case that the miss unit is currently serving a
// request which matches ours
// so we need to check the MSHR for matching continuously
// if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i
if (mshr_addr_matches_i && !active_serving_i) begin
state_d = WAIT_MSHR;
end
end
// ~> wait for critical word to arrive
WAIT_CRITICAL_WORD: begin
// speculatively request another word
if (req_port_i.data_req) begin
// request the cache line
req_o = '1;
end
if (critical_word_valid_i) begin
req_port_o.data_rvalid = ~mem_req_q.killed;
req_port_o.data_rdata = critical_word_i;
// we can make another request
if (req_port_i.data_req) begin
// save index, be and we
mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;
state_d = IDLE;
// Wait until we have access on the memory array
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
req_port_o.data_gnt = 1'b1;
end
end else begin
state_d = IDLE;
end
end
end
// ~> wait until the bypass request is valid
WAIT_REFILL_VALID: begin
// got a valid answer
if (bypass_valid_i) begin
req_port_o.data_rdata = bypass_data_i;
req_port_o.data_rvalid = ~mem_req_q.killed;
state_d = IDLE;
end
end
endcase
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if (!(state_q inside {WAIT_REFILL_GNT, WAIT_CRITICAL_WORD})) begin
state_d = IDLE;
end
end
end
// --------------
// Registers
// --------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
mem_req_q <= '0;
hit_way_q <= '0;
end else begin
state_q <= state_d;
mem_req_q <= mem_req_d;
hit_way_q <= hit_way_d;
end
end
//pragma translate_off
`ifndef VERILATOR
initial begin
assert (DCACHE_LINE_WIDTH == 128)
else
$error(
"Cacheline width has to be 128 for the moment. But only small changes required in data select logic"
);
end
// if the full MSHR address matches so should also match the partial one
partial_full_mshr_match :
assert property(@(posedge clk_i) disable iff (~rst_ni) mshr_addr_matches_i -> mshr_index_matches_i)
else $fatal(1, "partial mshr index doesn't match");
// there should never be a valid answer when the MSHR matches and we are not being served
no_valid_on_mshr_match :
assert property(@(posedge clk_i) disable iff (~rst_ni) (mshr_addr_matches_i && !active_serving_i)-> !req_port_o.data_rvalid || req_port_i.kill_req)
else $fatal(1, "rvalid_o should not be set on MSHR match");
`endif
//pragma translate_on
endmodule

View file

@ -10,13 +10,13 @@
// Date: February, 2023
// Description: Interface adapter for the CVA6 core
module cva6_hpdcache_if_adapter
import hpdcache_pkg::*;
import hpdcache_pkg::*;
// Parameters
// {{{
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter bit is_load_port = 1'b1
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter bit is_load_port = 1'b1
)
// }}}
@ -24,180 +24,177 @@ import hpdcache_pkg::*;
// {{{
(
// Clock and active-low reset pins
input logic clk_i,
input logic rst_ni,
input logic clk_i,
input logic rst_ni,
// Port ID
input hpdcache_pkg::hpdcache_req_sid_t hpdcache_req_sid_i,
input hpdcache_pkg::hpdcache_req_sid_t hpdcache_req_sid_i,
// Request/response ports from/to the CVA6 core
input ariane_pkg::dcache_req_i_t cva6_req_i,
output ariane_pkg::dcache_req_o_t cva6_req_o,
input ariane_pkg::amo_req_t cva6_amo_req_i,
output ariane_pkg::amo_resp_t cva6_amo_resp_o,
input ariane_pkg::dcache_req_i_t cva6_req_i,
output ariane_pkg::dcache_req_o_t cva6_req_o,
input ariane_pkg::amo_req_t cva6_amo_req_i,
output ariane_pkg::amo_resp_t cva6_amo_resp_o,
// Request port to the L1 Dcache
output logic hpdcache_req_valid_o,
input logic hpdcache_req_ready_i,
output hpdcache_pkg::hpdcache_req_t hpdcache_req_o,
output logic hpdcache_req_abort_o,
output hpdcache_pkg::hpdcache_tag_t hpdcache_req_tag_o,
output hpdcache_pkg::hpdcache_pma_t hpdcache_req_pma_o,
output logic hpdcache_req_valid_o,
input logic hpdcache_req_ready_i,
output hpdcache_pkg::hpdcache_req_t hpdcache_req_o,
output logic hpdcache_req_abort_o,
output hpdcache_pkg::hpdcache_tag_t hpdcache_req_tag_o,
output hpdcache_pkg::hpdcache_pma_t hpdcache_req_pma_o,
// Response port from the L1 Dcache
input logic hpdcache_rsp_valid_i,
input hpdcache_pkg::hpdcache_rsp_t hpdcache_rsp_i
input logic hpdcache_rsp_valid_i,
input hpdcache_pkg::hpdcache_rsp_t hpdcache_rsp_i
);
// }}}
// }}}
// Internal nets and registers
// Internal nets and registers
// {{{
logic forward_store, forward_amo;
logic hpdcache_req_is_uncacheable;
// }}}
// Request forwarding
// {{{
generate
// LOAD request
// {{{
logic forward_store, forward_amo;
logic hpdcache_req_is_uncacheable;
// }}}
if (is_load_port == 1'b1) begin : load_port_gen
assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions(
CVA6Cfg,
{
{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}}
, cva6_req_i.address_tag
, {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}
}
);
// Request forwarding
// {{{
generate
// LOAD request
// {{{
if (is_load_port == 1'b1) begin : load_port_gen
assign hpdcache_req_is_uncacheable =
!config_pkg::is_inside_cacheable_regions(CVA6Cfg,
{{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}}
, cva6_req_i.address_tag
, {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}});
// Request forwarding
assign hpdcache_req_valid_o = cva6_req_i.data_req,
hpdcache_req_o.addr_offset = cva6_req_i.address_index,
hpdcache_req_o.wdata = '0,
hpdcache_req_o.op = hpdcache_pkg::HPDCACHE_REQ_LOAD,
hpdcache_req_o.be = cva6_req_i.data_be,
hpdcache_req_o.size = cva6_req_i.data_size,
hpdcache_req_o.sid = hpdcache_req_sid_i,
hpdcache_req_o.tid = cva6_req_i.data_id,
hpdcache_req_o.need_rsp = 1'b1,
hpdcache_req_o.phys_indexed = 1'b0,
hpdcache_req_o.addr_tag = '0, // unused on virtually indexed request
hpdcache_req_o.pma = '0; // unused on virtually indexed request
// Request forwarding
assign hpdcache_req_valid_o = cva6_req_i.data_req,
hpdcache_req_o.addr_offset = cva6_req_i.address_index,
hpdcache_req_o.wdata = '0,
hpdcache_req_o.op = hpdcache_pkg::HPDCACHE_REQ_LOAD,
hpdcache_req_o.be = cva6_req_i.data_be,
hpdcache_req_o.size = cva6_req_i.data_size,
hpdcache_req_o.sid = hpdcache_req_sid_i,
hpdcache_req_o.tid = cva6_req_i.data_id,
hpdcache_req_o.need_rsp = 1'b1,
hpdcache_req_o.phys_indexed = 1'b0,
hpdcache_req_o.addr_tag = '0, // unused on virtually indexed request
hpdcache_req_o.pma = '0; // unused on virtually indexed request
assign hpdcache_req_abort_o = cva6_req_i.kill_req,
hpdcache_req_tag_o = cva6_req_i.address_tag,
hpdcache_req_pma_o.uncacheable = hpdcache_req_is_uncacheable,
hpdcache_req_pma_o.io = 1'b0;
assign hpdcache_req_abort_o = cva6_req_i.kill_req,
hpdcache_req_tag_o = cva6_req_i.address_tag,
hpdcache_req_pma_o.uncacheable = hpdcache_req_is_uncacheable,
hpdcache_req_pma_o.io = 1'b0;
// Response forwarding
assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i,
cva6_req_o.data_rdata = hpdcache_rsp_i.rdata,
cva6_req_o.data_rid = hpdcache_rsp_i.tid,
cva6_req_o.data_gnt = hpdcache_req_ready_i;
end // }}}
// Response forwarding
assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i,
cva6_req_o.data_rdata = hpdcache_rsp_i.rdata,
cva6_req_o.data_rid = hpdcache_rsp_i.tid,
cva6_req_o.data_gnt = hpdcache_req_ready_i;
end
// }}}
// STORE/AMO request
// {{{
else begin : store_amo_gen
hpdcache_req_addr_t amo_addr;
hpdcache_req_offset_t amo_addr_offset;
hpdcache_tag_t amo_tag;
logic amo_is_word, amo_is_word_hi;
hpdcache_req_data_t amo_data;
hpdcache_req_be_t amo_data_be;
hpdcache_req_op_t amo_op;
logic [31:0] amo_resp_word;
// STORE/AMO request
// {{{
else begin : store_amo_gen
hpdcache_req_addr_t amo_addr;
hpdcache_req_offset_t amo_addr_offset;
hpdcache_tag_t amo_tag;
logic amo_is_word, amo_is_word_hi;
hpdcache_req_data_t amo_data;
hpdcache_req_be_t amo_data_be;
hpdcache_req_op_t amo_op;
logic [31:0] amo_resp_word;
// AMO logic
// {{{
always_comb begin : amo_op_comb
amo_addr = cva6_amo_req_i.operand_a;
amo_addr_offset = amo_addr[0+:HPDCACHE_REQ_OFFSET_WIDTH];
amo_tag = amo_addr[HPDCACHE_REQ_OFFSET_WIDTH+:HPDCACHE_TAG_WIDTH];
amo_is_word = (cva6_amo_req_i.size == 2'b10);
amo_is_word_hi = cva6_amo_req_i.operand_a[2];
// AMO logic
// {{{
always_comb
begin : amo_op_comb
amo_addr = cva6_amo_req_i.operand_a;
amo_addr_offset = amo_addr[0 +: HPDCACHE_REQ_OFFSET_WIDTH];
amo_tag = amo_addr[HPDCACHE_REQ_OFFSET_WIDTH +: HPDCACHE_TAG_WIDTH];
amo_is_word = (cva6_amo_req_i.size == 2'b10);
amo_is_word_hi = cva6_amo_req_i.operand_a[2];
amo_data = amo_is_word ? {2{cva6_amo_req_i.operand_b[0+:32]}} : cva6_amo_req_i.operand_b;
amo_data = amo_is_word ? {2{cva6_amo_req_i.operand_b[0 +: 32]}}
: cva6_amo_req_i.operand_b;
amo_data_be = amo_is_word_hi ? 8'hf0 : amo_is_word ? 8'h0f : 8'hff;
amo_data_be = amo_is_word_hi ? 8'hf0 :
amo_is_word ? 8'h0f : 8'hff;
unique case (cva6_amo_req_i.amo_op)
ariane_pkg::AMO_LR: amo_op = HPDCACHE_REQ_AMO_LR;
ariane_pkg::AMO_SC: amo_op = HPDCACHE_REQ_AMO_SC;
ariane_pkg::AMO_SWAP: amo_op = HPDCACHE_REQ_AMO_SWAP;
ariane_pkg::AMO_ADD: amo_op = HPDCACHE_REQ_AMO_ADD;
ariane_pkg::AMO_AND: amo_op = HPDCACHE_REQ_AMO_AND;
ariane_pkg::AMO_OR: amo_op = HPDCACHE_REQ_AMO_OR;
ariane_pkg::AMO_XOR: amo_op = HPDCACHE_REQ_AMO_XOR;
ariane_pkg::AMO_MAX: amo_op = HPDCACHE_REQ_AMO_MAX;
ariane_pkg::AMO_MAXU: amo_op = HPDCACHE_REQ_AMO_MAXU;
ariane_pkg::AMO_MIN: amo_op = HPDCACHE_REQ_AMO_MIN;
ariane_pkg::AMO_MINU: amo_op = HPDCACHE_REQ_AMO_MINU;
default: amo_op = HPDCACHE_REQ_LOAD;
endcase
end
unique case(cva6_amo_req_i.amo_op)
ariane_pkg::AMO_LR: amo_op = HPDCACHE_REQ_AMO_LR;
ariane_pkg::AMO_SC: amo_op = HPDCACHE_REQ_AMO_SC;
ariane_pkg::AMO_SWAP: amo_op = HPDCACHE_REQ_AMO_SWAP;
ariane_pkg::AMO_ADD: amo_op = HPDCACHE_REQ_AMO_ADD;
ariane_pkg::AMO_AND: amo_op = HPDCACHE_REQ_AMO_AND;
ariane_pkg::AMO_OR: amo_op = HPDCACHE_REQ_AMO_OR;
ariane_pkg::AMO_XOR: amo_op = HPDCACHE_REQ_AMO_XOR;
ariane_pkg::AMO_MAX: amo_op = HPDCACHE_REQ_AMO_MAX;
ariane_pkg::AMO_MAXU: amo_op = HPDCACHE_REQ_AMO_MAXU;
ariane_pkg::AMO_MIN: amo_op = HPDCACHE_REQ_AMO_MIN;
ariane_pkg::AMO_MINU: amo_op = HPDCACHE_REQ_AMO_MINU;
default: amo_op = HPDCACHE_REQ_LOAD;
endcase
end
assign amo_resp_word = amo_is_word_hi ? hpdcache_rsp_i.rdata[0][32 +: 32]
assign amo_resp_word = amo_is_word_hi ? hpdcache_rsp_i.rdata[0][32 +: 32]
: hpdcache_rsp_i.rdata[0][0 +: 32];
// }}}
// }}}
// Request forwarding
// {{{
assign hpdcache_req_is_uncacheable =
!config_pkg::is_inside_cacheable_regions(CVA6Cfg,
{{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}}
, hpdcache_req_o.addr_tag, {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}});
// Request forwarding
// {{{
assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions(
CVA6Cfg,
{
{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}}
, hpdcache_req_o.addr_tag,
{ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}
}
);
assign forward_store = cva6_req_i.data_req,
forward_amo = cva6_amo_req_i.req;
assign forward_store = cva6_req_i.data_req, forward_amo = cva6_amo_req_i.req;
assign hpdcache_req_valid_o = forward_store | forward_amo,
hpdcache_req_o.addr_offset = forward_amo ? amo_addr_offset
: cva6_req_i.address_index,
hpdcache_req_o.wdata = forward_amo ? amo_data
: cva6_req_i.data_wdata,
hpdcache_req_o.op = forward_amo ? amo_op
: hpdcache_pkg::HPDCACHE_REQ_STORE,
hpdcache_req_o.be = forward_amo ? amo_data_be
: cva6_req_i.data_be,
hpdcache_req_o.size = forward_amo ? cva6_amo_req_i.size
: cva6_req_i.data_size,
hpdcache_req_o.sid = hpdcache_req_sid_i,
hpdcache_req_o.tid = forward_amo ? '1 : '0,
hpdcache_req_o.need_rsp = forward_amo,
hpdcache_req_o.phys_indexed = 1'b1,
hpdcache_req_o.addr_tag = forward_amo ? amo_tag : cva6_req_i.address_tag,
hpdcache_req_o.pma.uncacheable = hpdcache_req_is_uncacheable,
hpdcache_req_o.pma.io = 1'b0,
hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests
hpdcache_req_tag_o = '0, // unused on physically indexed requests
hpdcache_req_pma_o = '0; // unused on physically indexed requests
// }}}
assign hpdcache_req_valid_o = forward_store | forward_amo,
hpdcache_req_o.addr_offset = forward_amo ? amo_addr_offset : cva6_req_i.address_index,
hpdcache_req_o.wdata = forward_amo ? amo_data : cva6_req_i.data_wdata,
hpdcache_req_o.op = forward_amo ? amo_op : hpdcache_pkg::HPDCACHE_REQ_STORE,
hpdcache_req_o.be = forward_amo ? amo_data_be : cva6_req_i.data_be,
hpdcache_req_o.size = forward_amo ? cva6_amo_req_i.size : cva6_req_i.data_size,
hpdcache_req_o.sid = hpdcache_req_sid_i,
hpdcache_req_o.tid = forward_amo ? '1 : '0,
hpdcache_req_o.need_rsp = forward_amo,
hpdcache_req_o.phys_indexed = 1'b1,
hpdcache_req_o.addr_tag = forward_amo ? amo_tag : cva6_req_i.address_tag,
hpdcache_req_o.pma.uncacheable = hpdcache_req_is_uncacheable,
hpdcache_req_o.pma.io = 1'b0,
hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests
hpdcache_req_tag_o = '0, // unused on physically indexed requests
hpdcache_req_pma_o = '0; // unused on physically indexed requests
// }}}
// Response forwarding
// {{{
assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid != '1),
cva6_req_o.data_rdata = hpdcache_rsp_i.rdata,
cva6_req_o.data_rid = hpdcache_rsp_i.tid,
cva6_req_o.data_gnt = hpdcache_req_ready_i;
// Response forwarding
// {{{
assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid != '1),
cva6_req_o.data_rdata = hpdcache_rsp_i.rdata,
cva6_req_o.data_rid = hpdcache_rsp_i.tid,
cva6_req_o.data_gnt = hpdcache_req_ready_i;
assign cva6_amo_resp_o.ack = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid == '1),
cva6_amo_resp_o.result = amo_is_word ? {{32{amo_resp_word[31]}}, amo_resp_word}
assign cva6_amo_resp_o.ack = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid == '1),
cva6_amo_resp_o.result = amo_is_word ? {{32{amo_resp_word[31]}}, amo_resp_word}
: hpdcache_rsp_i.rdata[0][63:0];
// }}}
end
// }}}
endgenerate
// }}}
end
// }}}
endgenerate
// }}}
// Assertions
// {{{
// pragma translate_off
forward_one_request_assert: assert property (@(posedge clk_i)
($onehot0({forward_store, forward_amo}))) else
$error("Only one request shall be forwarded");
// pragma translate_on
// }}}
// Assertions
// {{{
// pragma translate_off
forward_one_request_assert :
assert property (@(posedge clk_i) ($onehot0({forward_store, forward_amo})))
else $error("Only one request shall be forwarded");
// pragma translate_on
// }}}
endmodule

View file

@ -16,83 +16,83 @@ module cva6_hpdcache_subsystem
// Parameters
// {{{
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int NumPorts = 4,
parameter int NrHwPrefetchers = 4,
parameter type noc_req_t = logic,
parameter type noc_resp_t = logic,
parameter type cmo_req_t = logic,
parameter type cmo_rsp_t = logic
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int NumPorts = 4,
parameter int NrHwPrefetchers = 4,
parameter type noc_req_t = logic,
parameter type noc_resp_t = logic,
parameter type cmo_req_t = logic,
parameter type cmo_rsp_t = logic
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
input logic clk_i,
input logic rst_ni,
// I$
// {{{
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter
// address translation requests
input ariane_pkg::icache_areq_t icache_areq_i, // to/from frontend
output ariane_pkg::icache_arsp_t icache_areq_o,
// data requests
input ariane_pkg::icache_dreq_t icache_dreq_i, // to/from frontend
output ariane_pkg::icache_drsp_t icache_dreq_o,
// }}}
// I$
// {{{
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter
// address translation requests
input ariane_pkg::icache_areq_t icache_areq_i, // to/from frontend
output ariane_pkg::icache_arsp_t icache_areq_o,
// data requests
input ariane_pkg::icache_dreq_t icache_dreq_i, // to/from frontend
output ariane_pkg::icache_drsp_t icache_dreq_o,
// }}}
// D$
// {{{
// Cache management
input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st
// D$
// {{{
// Cache management
input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st
// AMO interface
input ariane_pkg::amo_req_t dcache_amo_req_i, // from LSU
output ariane_pkg::amo_resp_t dcache_amo_resp_o, // to LSU
// CMO interface
input cmo_req_t dcache_cmo_req_i, // from CMO FU
output cmo_rsp_t dcache_cmo_resp_o, // to CMO FU
// Request ports
input ariane_pkg::dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // from LSU
output ariane_pkg::dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to LSU
// Write Buffer status
output logic wbuffer_empty_o,
output logic wbuffer_not_ni_o,
// AMO interface
input ariane_pkg::amo_req_t dcache_amo_req_i, // from LSU
output ariane_pkg::amo_resp_t dcache_amo_resp_o, // to LSU
// CMO interface
input cmo_req_t dcache_cmo_req_i, // from CMO FU
output cmo_rsp_t dcache_cmo_resp_o, // to CMO FU
// Request ports
input ariane_pkg::dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // from LSU
output ariane_pkg::dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to LSU
// Write Buffer status
output logic wbuffer_empty_o,
output logic wbuffer_not_ni_o,
// Hardware memory prefetcher configuration
input logic [NrHwPrefetchers-1:0] hwpf_base_set_i,
input logic [NrHwPrefetchers-1:0][63:0] hwpf_base_i,
output logic [NrHwPrefetchers-1:0][63:0] hwpf_base_o,
input logic [NrHwPrefetchers-1:0] hwpf_param_set_i,
input logic [NrHwPrefetchers-1:0][63:0] hwpf_param_i,
output logic [NrHwPrefetchers-1:0][63:0] hwpf_param_o,
input logic [NrHwPrefetchers-1:0] hwpf_throttle_set_i,
input logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_i,
output logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_o,
output logic [63:0] hwpf_status_o,
// }}}
// Hardware memory prefetcher configuration
input logic [NrHwPrefetchers-1:0] hwpf_base_set_i,
input logic [NrHwPrefetchers-1:0][63:0] hwpf_base_i,
output logic [NrHwPrefetchers-1:0][63:0] hwpf_base_o,
input logic [NrHwPrefetchers-1:0] hwpf_param_set_i,
input logic [NrHwPrefetchers-1:0][63:0] hwpf_param_i,
output logic [NrHwPrefetchers-1:0][63:0] hwpf_param_o,
input logic [NrHwPrefetchers-1:0] hwpf_throttle_set_i,
input logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_i,
output logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_o,
output logic [ 63:0] hwpf_status_o,
// }}}
// AXI port to upstream memory/peripherals
// {{{
output noc_req_t noc_req_o,
input noc_resp_t noc_resp_i
// }}}
// AXI port to upstream memory/peripherals
// {{{
output noc_req_t noc_req_o,
input noc_resp_t noc_resp_i
// }}}
);
// }}}
// }}}
`include "axi/typedef.svh"
// I$ instantiation
// {{{
logic icache_miss_valid, icache_miss_ready;
wt_cache_pkg::icache_req_t icache_miss;
wt_cache_pkg::icache_req_t icache_miss;
logic icache_miss_resp_valid;
wt_cache_pkg::icache_rtrn_t icache_miss_resp;
@ -100,23 +100,23 @@ module cva6_hpdcache_subsystem
localparam int ICACHE_RDTXID = 1 << (ariane_pkg::MEM_TID_WIDTH - 1);
cva6_icache #(
.CVA6Cfg (CVA6Cfg),
.RdTxId (ICACHE_RDTXID)
.CVA6Cfg(CVA6Cfg),
.RdTxId (ICACHE_RDTXID)
) i_cva6_icache (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (icache_flush_i),
.en_i (icache_en_i),
.miss_o (icache_miss_o),
.areq_i (icache_areq_i),
.areq_o (icache_areq_o),
.dreq_i (icache_dreq_i),
.dreq_o (icache_dreq_o),
.mem_rtrn_vld_i (icache_miss_resp_valid),
.mem_rtrn_i (icache_miss_resp),
.mem_data_req_o (icache_miss_valid),
.mem_data_ack_i (icache_miss_ready),
.mem_data_o (icache_miss)
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (icache_flush_i),
.en_i (icache_en_i),
.miss_o (icache_miss_o),
.areq_i (icache_areq_i),
.areq_o (icache_areq_o),
.dreq_i (icache_dreq_i),
.dreq_o (icache_dreq_o),
.mem_rtrn_vld_i(icache_miss_resp_valid),
.mem_rtrn_i (icache_miss_resp),
.mem_data_req_o(icache_miss_valid),
.mem_data_ack_i(icache_miss_ready),
.mem_data_o (icache_miss)
);
// }}}
@ -135,10 +135,10 @@ module cva6_hpdcache_subsystem
// NumPorts + 1: Hardware Memory Prefetcher (hwpf)
localparam int HPDCACHE_NREQUESTERS = NumPorts + 2;
typedef logic [CVA6Cfg.AxiAddrWidth-1:0] hpdcache_mem_addr_t;
typedef logic [CVA6Cfg.AxiAddrWidth-1:0] hpdcache_mem_addr_t;
typedef logic [ariane_pkg::MEM_TID_WIDTH-1:0] hpdcache_mem_id_t;
typedef logic [CVA6Cfg.AxiDataWidth-1:0] hpdcache_mem_data_t;
typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] hpdcache_mem_be_t;
typedef logic [CVA6Cfg.AxiDataWidth-1:0] hpdcache_mem_data_t;
typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] hpdcache_mem_be_t;
`HPDCACHE_TYPEDEF_MEM_REQ_T(hpdcache_mem_req_t, hpdcache_mem_addr_t, hpdcache_mem_id_t);
`HPDCACHE_TYPEDEF_MEM_RESP_R_T(hpdcache_mem_resp_r_t, hpdcache_mem_id_t, hpdcache_mem_data_t);
`HPDCACHE_TYPEDEF_MEM_REQ_W_T(hpdcache_mem_req_w_t, hpdcache_mem_data_t, hpdcache_mem_be_t);
@ -146,63 +146,63 @@ module cva6_hpdcache_subsystem
typedef logic [63:0] hwpf_stride_param_t;
logic dcache_req_valid [HPDCACHE_NREQUESTERS-1:0];
logic dcache_req_ready [HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_req_t dcache_req [HPDCACHE_NREQUESTERS-1:0];
logic dcache_req_abort [HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_tag_t dcache_req_tag [HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_pma_t dcache_req_pma [HPDCACHE_NREQUESTERS-1:0];
logic dcache_rsp_valid [HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_rsp_t dcache_rsp [HPDCACHE_NREQUESTERS-1:0];
logic dcache_read_miss, dcache_write_miss;
logic dcache_req_valid[HPDCACHE_NREQUESTERS-1:0];
logic dcache_req_ready[HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_req_t dcache_req [HPDCACHE_NREQUESTERS-1:0];
logic dcache_req_abort[HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_tag_t dcache_req_tag [HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_pma_t dcache_req_pma [HPDCACHE_NREQUESTERS-1:0];
logic dcache_rsp_valid[HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_rsp_t dcache_rsp [HPDCACHE_NREQUESTERS-1:0];
logic dcache_read_miss, dcache_write_miss;
logic [2:0] snoop_valid;
logic [2:0] snoop_abort;
hpdcache_pkg::hpdcache_req_offset_t [2:0] snoop_addr_offset;
hpdcache_pkg::hpdcache_tag_t [2:0] snoop_addr_tag;
logic [2:0] snoop_phys_indexed;
logic [ 2:0] snoop_valid;
logic [ 2:0] snoop_abort;
hpdcache_pkg::hpdcache_req_offset_t [ 2:0] snoop_addr_offset;
hpdcache_pkg::hpdcache_tag_t [ 2:0] snoop_addr_tag;
logic [ 2:0] snoop_phys_indexed;
logic dcache_cmo_req_is_prefetch;
logic dcache_cmo_req_is_prefetch;
logic dcache_miss_ready;
logic dcache_miss_valid;
hpdcache_mem_req_t dcache_miss;
logic dcache_miss_ready;
logic dcache_miss_valid;
hpdcache_mem_req_t dcache_miss;
logic dcache_miss_resp_ready;
logic dcache_miss_resp_valid;
hpdcache_mem_resp_r_t dcache_miss_resp;
logic dcache_miss_resp_ready;
logic dcache_miss_resp_valid;
hpdcache_mem_resp_r_t dcache_miss_resp;
logic dcache_wbuf_ready;
logic dcache_wbuf_valid;
hpdcache_mem_req_t dcache_wbuf;
logic dcache_wbuf_ready;
logic dcache_wbuf_valid;
hpdcache_mem_req_t dcache_wbuf;
logic dcache_wbuf_data_ready;
logic dcache_wbuf_data_valid;
hpdcache_mem_req_w_t dcache_wbuf_data;
logic dcache_wbuf_data_ready;
logic dcache_wbuf_data_valid;
hpdcache_mem_req_w_t dcache_wbuf_data;
logic dcache_wbuf_resp_ready;
logic dcache_wbuf_resp_valid;
hpdcache_mem_resp_w_t dcache_wbuf_resp;
logic dcache_wbuf_resp_ready;
logic dcache_wbuf_resp_valid;
hpdcache_mem_resp_w_t dcache_wbuf_resp;
logic dcache_uc_read_ready;
logic dcache_uc_read_valid;
hpdcache_mem_req_t dcache_uc_read;
logic dcache_uc_read_ready;
logic dcache_uc_read_valid;
hpdcache_mem_req_t dcache_uc_read;
logic dcache_uc_read_resp_ready;
logic dcache_uc_read_resp_valid;
hpdcache_mem_resp_r_t dcache_uc_read_resp;
logic dcache_uc_read_resp_ready;
logic dcache_uc_read_resp_valid;
hpdcache_mem_resp_r_t dcache_uc_read_resp;
logic dcache_uc_write_ready;
logic dcache_uc_write_valid;
hpdcache_mem_req_t dcache_uc_write;
logic dcache_uc_write_ready;
logic dcache_uc_write_valid;
hpdcache_mem_req_t dcache_uc_write;
logic dcache_uc_write_data_ready;
logic dcache_uc_write_data_valid;
hpdcache_mem_req_w_t dcache_uc_write_data;
logic dcache_uc_write_data_ready;
logic dcache_uc_write_data_valid;
hpdcache_mem_req_w_t dcache_uc_write_data;
logic dcache_uc_write_resp_ready;
logic dcache_uc_write_resp_valid;
hpdcache_mem_resp_w_t dcache_uc_write_resp;
logic dcache_uc_write_resp_ready;
logic dcache_uc_write_resp_valid;
hpdcache_mem_resp_w_t dcache_uc_write_resp;
hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_in;
hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_out;
@ -210,263 +210,262 @@ module cva6_hpdcache_subsystem
generate
ariane_pkg::dcache_req_i_t dcache_req_ports[HPDCACHE_NREQUESTERS-1:0];
for (genvar r = 0; r < (NumPorts-1); r++) begin : cva6_hpdcache_load_if_adapter_gen
for (genvar r = 0; r < (NumPorts - 1); r++) begin : cva6_hpdcache_load_if_adapter_gen
assign dcache_req_ports[r] = dcache_req_ports_i[r];
cva6_hpdcache_if_adapter #(
.CVA6Cfg (CVA6Cfg),
.is_load_port (1'b1)
.CVA6Cfg (CVA6Cfg),
.is_load_port(1'b1)
) i_cva6_hpdcache_load_if_adapter (
.clk_i,
.rst_ni,
.clk_i,
.rst_ni,
.hpdcache_req_sid_i (hpdcache_pkg::hpdcache_req_sid_t'(r)),
.hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(r)),
.cva6_req_i (dcache_req_ports[r]),
.cva6_req_o (dcache_req_ports_o[r]),
.cva6_amo_req_i ('0),
.cva6_amo_resp_o (/* unused */),
.cva6_req_i (dcache_req_ports[r]),
.cva6_req_o (dcache_req_ports_o[r]),
.cva6_amo_req_i ('0),
.cva6_amo_resp_o( /* unused */),
.hpdcache_req_valid_o (dcache_req_valid[r]),
.hpdcache_req_ready_i (dcache_req_ready[r]),
.hpdcache_req_o (dcache_req[r]),
.hpdcache_req_abort_o (dcache_req_abort[r]),
.hpdcache_req_tag_o (dcache_req_tag[r]),
.hpdcache_req_pma_o (dcache_req_pma[r]),
.hpdcache_req_valid_o(dcache_req_valid[r]),
.hpdcache_req_ready_i(dcache_req_ready[r]),
.hpdcache_req_o (dcache_req[r]),
.hpdcache_req_abort_o(dcache_req_abort[r]),
.hpdcache_req_tag_o (dcache_req_tag[r]),
.hpdcache_req_pma_o (dcache_req_pma[r]),
.hpdcache_rsp_valid_i (dcache_rsp_valid[r]),
.hpdcache_rsp_i (dcache_rsp[r])
.hpdcache_rsp_valid_i(dcache_rsp_valid[r]),
.hpdcache_rsp_i (dcache_rsp[r])
);
end
cva6_hpdcache_if_adapter #(
.CVA6Cfg (CVA6Cfg),
.is_load_port (1'b0)
.CVA6Cfg (CVA6Cfg),
.is_load_port(1'b0)
) i_cva6_hpdcache_store_if_adapter (
.clk_i,
.rst_ni,
.clk_i,
.rst_ni,
.hpdcache_req_sid_i (hpdcache_pkg::hpdcache_req_sid_t'(NumPorts-1)),
.hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts - 1)),
.cva6_req_i (dcache_req_ports_i[NumPorts-1]),
.cva6_req_o (dcache_req_ports_o[NumPorts-1]),
.cva6_amo_req_i (dcache_amo_req_i),
.cva6_amo_resp_o (dcache_amo_resp_o),
.cva6_req_i (dcache_req_ports_i[NumPorts-1]),
.cva6_req_o (dcache_req_ports_o[NumPorts-1]),
.cva6_amo_req_i (dcache_amo_req_i),
.cva6_amo_resp_o(dcache_amo_resp_o),
.hpdcache_req_valid_o (dcache_req_valid[NumPorts-1]),
.hpdcache_req_ready_i (dcache_req_ready[NumPorts-1]),
.hpdcache_req_o (dcache_req[NumPorts-1]),
.hpdcache_req_abort_o (dcache_req_abort[NumPorts-1]),
.hpdcache_req_tag_o (dcache_req_tag[NumPorts-1]),
.hpdcache_req_pma_o (dcache_req_pma[NumPorts-1]),
.hpdcache_req_valid_o(dcache_req_valid[NumPorts-1]),
.hpdcache_req_ready_i(dcache_req_ready[NumPorts-1]),
.hpdcache_req_o (dcache_req[NumPorts-1]),
.hpdcache_req_abort_o(dcache_req_abort[NumPorts-1]),
.hpdcache_req_tag_o (dcache_req_tag[NumPorts-1]),
.hpdcache_req_pma_o (dcache_req_pma[NumPorts-1]),
.hpdcache_rsp_valid_i (dcache_rsp_valid[NumPorts-1]),
.hpdcache_rsp_i (dcache_rsp[NumPorts-1])
.hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts-1]),
.hpdcache_rsp_i (dcache_rsp[NumPorts-1])
);
`ifdef HPDCACHE_ENABLE_CMO
cva6_hpdcache_cmo_if_adapter #(
.cmo_req_t (cmo_req_t),
.cmo_rsp_t (cmo_rsp_t)
.cmo_req_t(cmo_req_t),
.cmo_rsp_t(cmo_rsp_t)
) i_cva6_hpdcache_cmo_if_adapter (
.clk_i,
.rst_ni,
.clk_i,
.rst_ni,
.dcache_req_sid_i (hpdcache_pkg::hpdcache_req_sid_t'(NumPorts)),
.dcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts)),
.cva6_cmo_req_i (dcache_cmo_req_i),
.cva6_cmo_resp_o (dcache_cmo_resp_o),
.cva6_cmo_req_i (dcache_cmo_req_i),
.cva6_cmo_resp_o(dcache_cmo_resp_o),
.dcache_req_valid_o (dcache_req_valid[NumPorts]),
.dcache_req_ready_i (dcache_req_ready[NumPorts]),
.dcache_req_o (dcache_req[NumPorts]),
.dcache_req_abort_o (dcache_req_abort[NumPorts]),
.dcache_req_tag_o (dcache_req_tag[NumPorts]),
.dcache_req_pma_o (dcache_req_pma[NumPorts]),
.dcache_req_valid_o(dcache_req_valid[NumPorts]),
.dcache_req_ready_i(dcache_req_ready[NumPorts]),
.dcache_req_o (dcache_req[NumPorts]),
.dcache_req_abort_o(dcache_req_abort[NumPorts]),
.dcache_req_tag_o (dcache_req_tag[NumPorts]),
.dcache_req_pma_o (dcache_req_pma[NumPorts]),
.dcache_rsp_valid_i (dcache_rsp_valid[NumPorts]),
.dcache_rsp_i (dcache_rsp[NumPorts])
.dcache_rsp_valid_i(dcache_rsp_valid[NumPorts]),
.dcache_rsp_i (dcache_rsp[NumPorts])
);
`else
assign dcache_req_valid[NumPorts] = 1'b0,
dcache_req [NumPorts] = '0,
dcache_req_abort[NumPorts] = 1'b0,
dcache_req_tag [NumPorts] = '0,
dcache_req_pma [NumPorts] = '0;
dcache_req[NumPorts] = '0,
dcache_req_abort[NumPorts] = 1'b0,
dcache_req_tag[NumPorts] = '0,
dcache_req_pma[NumPorts] = '0;
`endif
endgenerate
// Snoop load port
assign snoop_valid[0] = dcache_req_valid[1] & dcache_req_ready[1],
snoop_abort[0] = dcache_req_abort[1],
snoop_addr_offset[0] = dcache_req[1].addr_offset,
snoop_addr_tag[0] = dcache_req_tag[1],
snoop_phys_indexed[0] = dcache_req[1].phys_indexed;
assign snoop_valid[0] = dcache_req_valid[1] & dcache_req_ready[1],
snoop_abort[0] = dcache_req_abort[1],
snoop_addr_offset[0] = dcache_req[1].addr_offset,
snoop_addr_tag[0] = dcache_req_tag[1],
snoop_phys_indexed[0] = dcache_req[1].phys_indexed;
// Snoop Store/AMO port
assign snoop_valid[1] = dcache_req_valid[NumPorts-1] & dcache_req_ready[NumPorts-1],
snoop_abort[1] = dcache_req_abort[NumPorts-1],
snoop_addr_offset[1] = dcache_req[NumPorts-1].addr_offset,
snoop_addr_tag[1] = dcache_req_tag[NumPorts-1],
snoop_phys_indexed[1] = dcache_req[NumPorts-1].phys_indexed;
assign snoop_valid[1] = dcache_req_valid[NumPorts-1] & dcache_req_ready[NumPorts-1],
snoop_abort[1] = dcache_req_abort[NumPorts-1],
snoop_addr_offset[1] = dcache_req[NumPorts-1].addr_offset,
snoop_addr_tag[1] = dcache_req_tag[NumPorts-1],
snoop_phys_indexed[1] = dcache_req[NumPorts-1].phys_indexed;
`ifdef HPDCACHE_ENABLE_CMO
// Snoop CMO port (in case of read prefetch accesses)
assign dcache_cmo_req_is_prefetch =
hpdcache_pkg::is_cmo_prefetch(dcache_req[NumPorts].op, dcache_req[NumPorts].size);
assign dcache_cmo_req_is_prefetch = hpdcache_pkg::is_cmo_prefetch(
dcache_req[NumPorts].op, dcache_req[NumPorts].size
);
assign snoop_valid[2] = dcache_req_valid[NumPorts]
& dcache_req_ready[NumPorts]
& dcache_cmo_req_is_prefetch,
snoop_abort[2] = dcache_req_abort[NumPorts],
snoop_addr_offset[2] = dcache_req[NumPorts].addr_offset,
snoop_addr_tag[2] = dcache_req_tag[NumPorts],
snoop_phys_indexed[2] = dcache_req[NumPorts].phys_indexed;
snoop_abort[2] = dcache_req_abort[NumPorts],
snoop_addr_offset[2] = dcache_req[NumPorts].addr_offset,
snoop_addr_tag[2] = dcache_req_tag[NumPorts],
snoop_phys_indexed[2] = dcache_req[NumPorts].phys_indexed;
`else
assign snoop_valid[2] = 1'b0,
snoop_abort[2] = 1'b0,
snoop_addr_offset[2] = '0,
snoop_addr_tag[2] = '0,
snoop_phys_indexed[2] = 1'b0;
assign snoop_valid[2] = 1'b0,
snoop_abort[2] = 1'b0,
snoop_addr_offset[2] = '0,
snoop_addr_tag[2] = '0,
snoop_phys_indexed[2] = 1'b0;
`endif
generate
for (genvar h = 0; h < NrHwPrefetchers; h++) begin : hwpf_throttle_gen
assign hwpf_throttle_in[h] = hwpf_stride_pkg::hwpf_stride_throttle_t'(hwpf_throttle_i[h]),
hwpf_throttle_o[h] = hwpf_stride_pkg::hwpf_stride_param_t'(hwpf_throttle_out[h]);
hwpf_throttle_o[h] = hwpf_stride_pkg::hwpf_stride_param_t'(hwpf_throttle_out[h]);
end
endgenerate
hwpf_stride_wrapper #(
.NUM_HW_PREFETCH (NrHwPrefetchers),
.NUM_SNOOP_PORTS (3)
.NUM_HW_PREFETCH(NrHwPrefetchers),
.NUM_SNOOP_PORTS(3)
) i_hwpf_stride_wrapper (
.clk_i,
.rst_ni,
.clk_i,
.rst_ni,
.hwpf_stride_base_set_i (hwpf_base_set_i),
.hwpf_stride_base_i (hwpf_base_i),
.hwpf_stride_base_o (hwpf_base_o),
.hwpf_stride_param_set_i (hwpf_param_set_i),
.hwpf_stride_param_i (hwpf_param_i),
.hwpf_stride_param_o (hwpf_param_o),
.hwpf_stride_throttle_set_i (hwpf_throttle_set_i),
.hwpf_stride_throttle_i (hwpf_throttle_in),
.hwpf_stride_throttle_o (hwpf_throttle_out),
.hwpf_stride_status_o (hwpf_status_o),
.hwpf_stride_base_set_i (hwpf_base_set_i),
.hwpf_stride_base_i (hwpf_base_i),
.hwpf_stride_base_o (hwpf_base_o),
.hwpf_stride_param_set_i (hwpf_param_set_i),
.hwpf_stride_param_i (hwpf_param_i),
.hwpf_stride_param_o (hwpf_param_o),
.hwpf_stride_throttle_set_i(hwpf_throttle_set_i),
.hwpf_stride_throttle_i (hwpf_throttle_in),
.hwpf_stride_throttle_o (hwpf_throttle_out),
.hwpf_stride_status_o (hwpf_status_o),
.snoop_valid_i (snoop_valid),
.snoop_abort_i (snoop_abort),
.snoop_addr_offset_i (snoop_addr_offset),
.snoop_addr_tag_i (snoop_addr_tag),
.snoop_phys_indexed_i (snoop_phys_indexed),
.snoop_valid_i (snoop_valid),
.snoop_abort_i (snoop_abort),
.snoop_addr_offset_i (snoop_addr_offset),
.snoop_addr_tag_i (snoop_addr_tag),
.snoop_phys_indexed_i(snoop_phys_indexed),
.hpdcache_req_sid_i (hpdcache_pkg::hpdcache_req_sid_t'(NumPorts+1)),
.hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts + 1)),
.hpdcache_req_valid_o (dcache_req_valid[NumPorts+1]),
.hpdcache_req_ready_i (dcache_req_ready[NumPorts+1]),
.hpdcache_req_o (dcache_req[NumPorts+1]),
.hpdcache_req_abort_o (dcache_req_abort[NumPorts+1]),
.hpdcache_req_tag_o (dcache_req_tag[NumPorts+1]),
.hpdcache_req_pma_o (dcache_req_pma[NumPorts+1]),
.hpdcache_rsp_valid_i (dcache_rsp_valid[NumPorts+1]),
.hpdcache_rsp_i (dcache_rsp[NumPorts+1])
.hpdcache_req_valid_o(dcache_req_valid[NumPorts+1]),
.hpdcache_req_ready_i(dcache_req_ready[NumPorts+1]),
.hpdcache_req_o (dcache_req[NumPorts+1]),
.hpdcache_req_abort_o(dcache_req_abort[NumPorts+1]),
.hpdcache_req_tag_o (dcache_req_tag[NumPorts+1]),
.hpdcache_req_pma_o (dcache_req_pma[NumPorts+1]),
.hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts+1]),
.hpdcache_rsp_i (dcache_rsp[NumPorts+1])
);
hpdcache #(
.NREQUESTERS (HPDCACHE_NREQUESTERS),
.HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH),
.HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth),
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t),
.hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t)
) i_hpdcache(
.clk_i,
.rst_ni,
.NREQUESTERS (HPDCACHE_NREQUESTERS),
.HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH),
.HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth),
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
.hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t)
) i_hpdcache (
.clk_i,
.rst_ni,
.wbuf_flush_i (dcache_flush_i),
.wbuf_flush_i(dcache_flush_i),
.core_req_valid_i (dcache_req_valid),
.core_req_ready_o (dcache_req_ready),
.core_req_i (dcache_req),
.core_req_abort_i (dcache_req_abort),
.core_req_tag_i (dcache_req_tag),
.core_req_pma_i (dcache_req_pma),
.core_req_valid_i(dcache_req_valid),
.core_req_ready_o(dcache_req_ready),
.core_req_i (dcache_req),
.core_req_abort_i(dcache_req_abort),
.core_req_tag_i (dcache_req_tag),
.core_req_pma_i (dcache_req_pma),
.core_rsp_valid_o (dcache_rsp_valid),
.core_rsp_o (dcache_rsp),
.core_rsp_valid_o(dcache_rsp_valid),
.core_rsp_o (dcache_rsp),
.mem_req_miss_read_ready_i (dcache_miss_ready),
.mem_req_miss_read_valid_o (dcache_miss_valid),
.mem_req_miss_read_o (dcache_miss),
.mem_req_miss_read_ready_i(dcache_miss_ready),
.mem_req_miss_read_valid_o(dcache_miss_valid),
.mem_req_miss_read_o (dcache_miss),
.mem_resp_miss_read_ready_o (dcache_miss_resp_ready),
.mem_resp_miss_read_valid_i (dcache_miss_resp_valid),
.mem_resp_miss_read_i (dcache_miss_resp),
.mem_resp_miss_read_ready_o(dcache_miss_resp_ready),
.mem_resp_miss_read_valid_i(dcache_miss_resp_valid),
.mem_resp_miss_read_i (dcache_miss_resp),
.mem_req_wbuf_write_ready_i (dcache_wbuf_ready),
.mem_req_wbuf_write_valid_o (dcache_wbuf_valid),
.mem_req_wbuf_write_o (dcache_wbuf),
.mem_req_wbuf_write_ready_i(dcache_wbuf_ready),
.mem_req_wbuf_write_valid_o(dcache_wbuf_valid),
.mem_req_wbuf_write_o (dcache_wbuf),
.mem_req_wbuf_write_data_ready_i (dcache_wbuf_data_ready),
.mem_req_wbuf_write_data_valid_o (dcache_wbuf_data_valid),
.mem_req_wbuf_write_data_o (dcache_wbuf_data),
.mem_req_wbuf_write_data_ready_i(dcache_wbuf_data_ready),
.mem_req_wbuf_write_data_valid_o(dcache_wbuf_data_valid),
.mem_req_wbuf_write_data_o (dcache_wbuf_data),
.mem_resp_wbuf_write_ready_o (dcache_wbuf_resp_ready),
.mem_resp_wbuf_write_valid_i (dcache_wbuf_resp_valid),
.mem_resp_wbuf_write_i (dcache_wbuf_resp),
.mem_resp_wbuf_write_ready_o(dcache_wbuf_resp_ready),
.mem_resp_wbuf_write_valid_i(dcache_wbuf_resp_valid),
.mem_resp_wbuf_write_i (dcache_wbuf_resp),
.mem_req_uc_read_ready_i (dcache_uc_read_ready),
.mem_req_uc_read_valid_o (dcache_uc_read_valid),
.mem_req_uc_read_o (dcache_uc_read),
.mem_req_uc_read_ready_i(dcache_uc_read_ready),
.mem_req_uc_read_valid_o(dcache_uc_read_valid),
.mem_req_uc_read_o (dcache_uc_read),
.mem_resp_uc_read_ready_o (dcache_uc_read_resp_ready),
.mem_resp_uc_read_valid_i (dcache_uc_read_resp_valid),
.mem_resp_uc_read_i (dcache_uc_read_resp),
.mem_resp_uc_read_ready_o(dcache_uc_read_resp_ready),
.mem_resp_uc_read_valid_i(dcache_uc_read_resp_valid),
.mem_resp_uc_read_i (dcache_uc_read_resp),
.mem_req_uc_write_ready_i (dcache_uc_write_ready),
.mem_req_uc_write_valid_o (dcache_uc_write_valid),
.mem_req_uc_write_o (dcache_uc_write),
.mem_req_uc_write_ready_i(dcache_uc_write_ready),
.mem_req_uc_write_valid_o(dcache_uc_write_valid),
.mem_req_uc_write_o (dcache_uc_write),
.mem_req_uc_write_data_ready_i (dcache_uc_write_data_ready),
.mem_req_uc_write_data_valid_o (dcache_uc_write_data_valid),
.mem_req_uc_write_data_o (dcache_uc_write_data),
.mem_req_uc_write_data_ready_i(dcache_uc_write_data_ready),
.mem_req_uc_write_data_valid_o(dcache_uc_write_data_valid),
.mem_req_uc_write_data_o (dcache_uc_write_data),
.mem_resp_uc_write_ready_o (dcache_uc_write_resp_ready),
.mem_resp_uc_write_valid_i (dcache_uc_write_resp_valid),
.mem_resp_uc_write_i (dcache_uc_write_resp),
.mem_resp_uc_write_ready_o(dcache_uc_write_resp_ready),
.mem_resp_uc_write_valid_i(dcache_uc_write_resp_valid),
.mem_resp_uc_write_i (dcache_uc_write_resp),
.evt_cache_write_miss_o (dcache_write_miss),
.evt_cache_read_miss_o (dcache_read_miss),
.evt_uncached_req_o (/* unused */),
.evt_cmo_req_o (/* unused */),
.evt_write_req_o (/* unused */),
.evt_read_req_o (/* unused */),
.evt_prefetch_req_o (/* unused */),
.evt_req_on_hold_o (/* unused */),
.evt_rtab_rollback_o (/* unused */),
.evt_stall_refill_o (/* unused */),
.evt_stall_o (/* unused */),
.evt_cache_write_miss_o(dcache_write_miss),
.evt_cache_read_miss_o (dcache_read_miss),
.evt_uncached_req_o ( /* unused */),
.evt_cmo_req_o ( /* unused */),
.evt_write_req_o ( /* unused */),
.evt_read_req_o ( /* unused */),
.evt_prefetch_req_o ( /* unused */),
.evt_req_on_hold_o ( /* unused */),
.evt_rtab_rollback_o ( /* unused */),
.evt_stall_refill_o ( /* unused */),
.evt_stall_o ( /* unused */),
.wbuf_empty_o (wbuffer_empty_o),
.wbuf_empty_o(wbuffer_empty_o),
.cfg_enable_i (dcache_enable_i),
.cfg_wbuf_threshold_i (4'd2),
.cfg_wbuf_reset_timecnt_on_write_i (1'b1),
.cfg_wbuf_sequential_waw_i (1'b0),
.cfg_wbuf_inhibit_write_coalescing_i (1'b0),
.cfg_prefetch_updt_plru_i (1'b1),
.cfg_error_on_cacheable_amo_i (1'b0),
.cfg_rtab_single_entry_i (1'b0)
.cfg_enable_i (dcache_enable_i),
.cfg_wbuf_threshold_i (4'd2),
.cfg_wbuf_reset_timecnt_on_write_i (1'b1),
.cfg_wbuf_sequential_waw_i (1'b0),
.cfg_wbuf_inhibit_write_coalescing_i(1'b0),
.cfg_prefetch_updt_plru_i (1'b1),
.cfg_error_on_cacheable_amo_i (1'b0),
.cfg_rtab_single_entry_i (1'b0)
);
assign dcache_miss_o = dcache_read_miss,
wbuffer_not_ni_o = wbuffer_empty_o;
assign dcache_miss_o = dcache_read_miss, wbuffer_not_ni_o = wbuffer_empty_o;
always_ff @(posedge clk_i or negedge rst_ni)
begin : dcache_flush_ff
always_ff @(posedge clk_i or negedge rst_ni) begin : dcache_flush_ff
if (!rst_ni) dcache_flush_ack_o <= 1'b0;
else dcache_flush_ack_o <= ~dcache_flush_ack_o & dcache_flush_i;
else dcache_flush_ack_o <= ~dcache_flush_ack_o & dcache_flush_i;
end
// }}}
@ -485,102 +484,124 @@ module cva6_hpdcache_subsystem
`AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, axi_data_t, axi_id_t, axi_user_t)
cva6_hpdcache_subsystem_axi_arbiter #(
.HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH),
.HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth),
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t),
.hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t),
.HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH),
.HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth),
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
.hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t),
.AxiAddrWidth (CVA6Cfg.AxiAddrWidth),
.AxiDataWidth (CVA6Cfg.AxiDataWidth),
.AxiIdWidth (CVA6Cfg.AxiIdWidth),
.AxiUserWidth (CVA6Cfg.AxiUserWidth),
.axi_ar_chan_t (axi_ar_chan_t),
.axi_aw_chan_t (axi_aw_chan_t),
.axi_w_chan_t (axi_w_chan_t),
.axi_req_t (noc_req_t),
.axi_rsp_t (noc_resp_t)
.AxiAddrWidth (CVA6Cfg.AxiAddrWidth),
.AxiDataWidth (CVA6Cfg.AxiDataWidth),
.AxiIdWidth (CVA6Cfg.AxiIdWidth),
.AxiUserWidth (CVA6Cfg.AxiUserWidth),
.axi_ar_chan_t(axi_ar_chan_t),
.axi_aw_chan_t(axi_aw_chan_t),
.axi_w_chan_t (axi_w_chan_t),
.axi_req_t (noc_req_t),
.axi_rsp_t (noc_resp_t)
) i_axi_arbiter (
.clk_i,
.rst_ni,
.clk_i,
.rst_ni,
.icache_miss_valid_i (icache_miss_valid),
.icache_miss_ready_o (icache_miss_ready),
.icache_miss_i (icache_miss),
.icache_miss_id_i (hpdcache_mem_id_t'(ICACHE_RDTXID)),
.icache_miss_valid_i(icache_miss_valid),
.icache_miss_ready_o(icache_miss_ready),
.icache_miss_i (icache_miss),
.icache_miss_id_i (hpdcache_mem_id_t'(ICACHE_RDTXID)),
.icache_miss_resp_valid_o (icache_miss_resp_valid),
.icache_miss_resp_o (icache_miss_resp),
.icache_miss_resp_valid_o(icache_miss_resp_valid),
.icache_miss_resp_o (icache_miss_resp),
.dcache_miss_ready_o (dcache_miss_ready),
.dcache_miss_valid_i (dcache_miss_valid),
.dcache_miss_i (dcache_miss),
.dcache_miss_ready_o(dcache_miss_ready),
.dcache_miss_valid_i(dcache_miss_valid),
.dcache_miss_i (dcache_miss),
.dcache_miss_resp_ready_i (dcache_miss_resp_ready),
.dcache_miss_resp_valid_o (dcache_miss_resp_valid),
.dcache_miss_resp_o (dcache_miss_resp),
.dcache_miss_resp_ready_i(dcache_miss_resp_ready),
.dcache_miss_resp_valid_o(dcache_miss_resp_valid),
.dcache_miss_resp_o (dcache_miss_resp),
.dcache_wbuf_ready_o (dcache_wbuf_ready),
.dcache_wbuf_valid_i (dcache_wbuf_valid),
.dcache_wbuf_i (dcache_wbuf),
.dcache_wbuf_ready_o(dcache_wbuf_ready),
.dcache_wbuf_valid_i(dcache_wbuf_valid),
.dcache_wbuf_i (dcache_wbuf),
.dcache_wbuf_data_ready_o (dcache_wbuf_data_ready),
.dcache_wbuf_data_valid_i (dcache_wbuf_data_valid),
.dcache_wbuf_data_i (dcache_wbuf_data),
.dcache_wbuf_data_ready_o(dcache_wbuf_data_ready),
.dcache_wbuf_data_valid_i(dcache_wbuf_data_valid),
.dcache_wbuf_data_i (dcache_wbuf_data),
.dcache_wbuf_resp_ready_i (dcache_wbuf_resp_ready),
.dcache_wbuf_resp_valid_o (dcache_wbuf_resp_valid),
.dcache_wbuf_resp_o (dcache_wbuf_resp),
.dcache_wbuf_resp_ready_i(dcache_wbuf_resp_ready),
.dcache_wbuf_resp_valid_o(dcache_wbuf_resp_valid),
.dcache_wbuf_resp_o (dcache_wbuf_resp),
.dcache_uc_read_ready_o (dcache_uc_read_ready),
.dcache_uc_read_valid_i (dcache_uc_read_valid),
.dcache_uc_read_i (dcache_uc_read),
.dcache_uc_read_id_i ('1),
.dcache_uc_read_ready_o(dcache_uc_read_ready),
.dcache_uc_read_valid_i(dcache_uc_read_valid),
.dcache_uc_read_i (dcache_uc_read),
.dcache_uc_read_id_i ('1),
.dcache_uc_read_resp_ready_i (dcache_uc_read_resp_ready),
.dcache_uc_read_resp_valid_o (dcache_uc_read_resp_valid),
.dcache_uc_read_resp_o (dcache_uc_read_resp),
.dcache_uc_read_resp_ready_i(dcache_uc_read_resp_ready),
.dcache_uc_read_resp_valid_o(dcache_uc_read_resp_valid),
.dcache_uc_read_resp_o (dcache_uc_read_resp),
.dcache_uc_write_ready_o (dcache_uc_write_ready),
.dcache_uc_write_valid_i (dcache_uc_write_valid),
.dcache_uc_write_i (dcache_uc_write),
.dcache_uc_write_id_i ('1),
.dcache_uc_write_ready_o(dcache_uc_write_ready),
.dcache_uc_write_valid_i(dcache_uc_write_valid),
.dcache_uc_write_i (dcache_uc_write),
.dcache_uc_write_id_i ('1),
.dcache_uc_write_data_ready_o (dcache_uc_write_data_ready),
.dcache_uc_write_data_valid_i (dcache_uc_write_data_valid),
.dcache_uc_write_data_i (dcache_uc_write_data),
.dcache_uc_write_data_ready_o(dcache_uc_write_data_ready),
.dcache_uc_write_data_valid_i(dcache_uc_write_data_valid),
.dcache_uc_write_data_i (dcache_uc_write_data),
.dcache_uc_write_resp_ready_i (dcache_uc_write_resp_ready),
.dcache_uc_write_resp_valid_o (dcache_uc_write_resp_valid),
.dcache_uc_write_resp_o (dcache_uc_write_resp),
.dcache_uc_write_resp_ready_i(dcache_uc_write_resp_ready),
.dcache_uc_write_resp_valid_o(dcache_uc_write_resp_valid),
.dcache_uc_write_resp_o (dcache_uc_write_resp),
.axi_req_o (noc_req_o),
.axi_resp_i (noc_resp_i)
.axi_req_o (noc_req_o),
.axi_resp_i(noc_resp_i)
);
// }}}
// Assertions
// {{{
// pragma translate_off
initial assert (hpdcache_pkg::HPDCACHE_REQ_SRC_ID_WIDTH >= $clog2(HPDCACHE_NREQUESTERS))
initial
assert (hpdcache_pkg::HPDCACHE_REQ_SRC_ID_WIDTH >= $clog2(HPDCACHE_NREQUESTERS))
else $fatal(1, "HPDCACHE_REQ_SRC_ID_WIDTH is not wide enough");
a_invalid_instruction_fetch: assert property (
a_invalid_instruction_fetch :
assert property (
@(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr, icache_dreq_o.data);
else
$warning(
1,
"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr,
icache_dreq_o.data
);
a_invalid_write_data: assert property (
a_invalid_write_data :
assert property (
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[2].data_req |-> |dcache_req_ports_i[2].data_be |-> (|dcache_req_ports_i[2].data_wdata) !== 1'hX)
else $warning(1,"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
{dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index}, dcache_req_ports_i[2].data_be, dcache_req_ports_i[2].data_wdata);
else
$warning(
1,
"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
{
dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index
},
dcache_req_ports_i[2].data_be,
dcache_req_ports_i[2].data_wdata
);
for (genvar j=0; j<2; j++) begin : gen_assertion
a_invalid_read_data: assert property (
for (genvar j = 0; j < 2; j++) begin : gen_assertion
a_invalid_read_data :
assert property (
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid data on port %01d: data=%016X",
j, dcache_req_ports_o[j].data_rdata);
else
$warning(
1,
"[l1 dcache] reading invalid data on port %01d: data=%016X",
j,
dcache_req_ports_o[j].data_rdata
);
end
// pragma translate_on
// }}}

View file

@ -16,114 +16,114 @@ module cva6_hpdcache_subsystem_axi_arbiter
// Parameters
// {{{
#(
parameter int HPDcacheMemIdWidth = 8,
parameter int HPDcacheMemDataWidth = 512,
parameter type hpdcache_mem_req_t = logic,
parameter type hpdcache_mem_req_w_t = logic,
parameter type hpdcache_mem_resp_r_t = logic,
parameter type hpdcache_mem_resp_w_t = logic,
parameter int HPDcacheMemIdWidth = 8,
parameter int HPDcacheMemDataWidth = 512,
parameter type hpdcache_mem_req_t = logic,
parameter type hpdcache_mem_req_w_t = logic,
parameter type hpdcache_mem_resp_r_t = logic,
parameter type hpdcache_mem_resp_w_t = logic,
parameter int unsigned AxiAddrWidth = 1,
parameter int unsigned AxiDataWidth = 1,
parameter int unsigned AxiIdWidth = 1,
parameter int unsigned AxiUserWidth = 1,
parameter type axi_ar_chan_t = logic,
parameter type axi_aw_chan_t = logic,
parameter type axi_w_chan_t = logic,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic,
parameter int unsigned AxiAddrWidth = 1,
parameter int unsigned AxiDataWidth = 1,
parameter int unsigned AxiIdWidth = 1,
parameter int unsigned AxiUserWidth = 1,
parameter type axi_ar_chan_t = logic,
parameter type axi_aw_chan_t = logic,
parameter type axi_w_chan_t = logic,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic,
localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
)
// }}}
// Ports
// {{{
(
input logic clk_i,
input logic rst_ni,
input logic clk_i,
input logic rst_ni,
// Interfaces from/to I$
// {{{
input logic icache_miss_valid_i,
output logic icache_miss_ready_o,
input wt_cache_pkg::icache_req_t icache_miss_i,
input hpdcache_mem_id_t icache_miss_id_i,
// Interfaces from/to I$
// {{{
input logic icache_miss_valid_i,
output logic icache_miss_ready_o,
input wt_cache_pkg::icache_req_t icache_miss_i,
input hpdcache_mem_id_t icache_miss_id_i,
output logic icache_miss_resp_valid_o,
output wt_cache_pkg::icache_rtrn_t icache_miss_resp_o,
// }}}
output logic icache_miss_resp_valid_o,
output wt_cache_pkg::icache_rtrn_t icache_miss_resp_o,
// }}}
// Interfaces from/to D$
// {{{
output logic dcache_miss_ready_o,
input logic dcache_miss_valid_i,
input hpdcache_mem_req_t dcache_miss_i,
// Interfaces from/to D$
// {{{
output logic dcache_miss_ready_o,
input logic dcache_miss_valid_i,
input hpdcache_mem_req_t dcache_miss_i,
input logic dcache_miss_resp_ready_i,
output logic dcache_miss_resp_valid_o,
output hpdcache_mem_resp_r_t dcache_miss_resp_o,
input logic dcache_miss_resp_ready_i,
output logic dcache_miss_resp_valid_o,
output hpdcache_mem_resp_r_t dcache_miss_resp_o,
// Write-buffer write interface
output logic dcache_wbuf_ready_o,
input logic dcache_wbuf_valid_i,
input hpdcache_mem_req_t dcache_wbuf_i,
// Write-buffer write interface
output logic dcache_wbuf_ready_o,
input logic dcache_wbuf_valid_i,
input hpdcache_mem_req_t dcache_wbuf_i,
output logic dcache_wbuf_data_ready_o,
input logic dcache_wbuf_data_valid_i,
input hpdcache_mem_req_w_t dcache_wbuf_data_i,
output logic dcache_wbuf_data_ready_o,
input logic dcache_wbuf_data_valid_i,
input hpdcache_mem_req_w_t dcache_wbuf_data_i,
input logic dcache_wbuf_resp_ready_i,
output logic dcache_wbuf_resp_valid_o,
output hpdcache_mem_resp_w_t dcache_wbuf_resp_o,
input logic dcache_wbuf_resp_ready_i,
output logic dcache_wbuf_resp_valid_o,
output hpdcache_mem_resp_w_t dcache_wbuf_resp_o,
// Uncached read interface
output logic dcache_uc_read_ready_o,
input logic dcache_uc_read_valid_i,
input hpdcache_mem_req_t dcache_uc_read_i,
input hpdcache_mem_id_t dcache_uc_read_id_i,
// Uncached read interface
output logic dcache_uc_read_ready_o,
input logic dcache_uc_read_valid_i,
input hpdcache_mem_req_t dcache_uc_read_i,
input hpdcache_mem_id_t dcache_uc_read_id_i,
input logic dcache_uc_read_resp_ready_i,
output logic dcache_uc_read_resp_valid_o,
output hpdcache_mem_resp_r_t dcache_uc_read_resp_o,
input logic dcache_uc_read_resp_ready_i,
output logic dcache_uc_read_resp_valid_o,
output hpdcache_mem_resp_r_t dcache_uc_read_resp_o,
// Uncached write interface
output logic dcache_uc_write_ready_o,
input logic dcache_uc_write_valid_i,
input hpdcache_mem_req_t dcache_uc_write_i,
input hpdcache_mem_id_t dcache_uc_write_id_i,
// Uncached write interface
output logic dcache_uc_write_ready_o,
input logic dcache_uc_write_valid_i,
input hpdcache_mem_req_t dcache_uc_write_i,
input hpdcache_mem_id_t dcache_uc_write_id_i,
output logic dcache_uc_write_data_ready_o,
input logic dcache_uc_write_data_valid_i,
input hpdcache_mem_req_w_t dcache_uc_write_data_i,
output logic dcache_uc_write_data_ready_o,
input logic dcache_uc_write_data_valid_i,
input hpdcache_mem_req_w_t dcache_uc_write_data_i,
input logic dcache_uc_write_resp_ready_i,
output logic dcache_uc_write_resp_valid_o,
output hpdcache_mem_resp_w_t dcache_uc_write_resp_o,
// }}}
input logic dcache_uc_write_resp_ready_i,
output logic dcache_uc_write_resp_valid_o,
output hpdcache_mem_resp_w_t dcache_uc_write_resp_o,
// }}}
// AXI port to upstream memory/peripherals
// {{{
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
// }}}
// AXI port to upstream memory/peripherals
// {{{
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
// }}}
);
// }}}
// }}}
// Internal type definitions
// {{{
typedef struct packed {
logic [AxiIdWidth-1:0] id;
logic [AxiDataWidth-1:0] data;
axi_pkg::resp_t resp;
logic last;
logic [AxiUserWidth-1:0] user;
logic [AxiIdWidth-1:0] id;
logic [AxiDataWidth-1:0] data;
axi_pkg::resp_t resp;
logic last;
logic [AxiUserWidth-1:0] user;
} axi_r_chan_t;
typedef struct packed {
logic [AxiIdWidth-1:0] id;
axi_pkg::resp_t resp;
logic [AxiUserWidth-1:0] user;
logic [AxiIdWidth-1:0] id;
axi_pkg::resp_t resp;
logic [AxiUserWidth-1:0] user;
} axi_b_chan_t;
localparam int MEM_RESP_RT_DEPTH = (1 << HPDcacheMemIdWidth);
@ -133,24 +133,26 @@ module cva6_hpdcache_subsystem_axi_arbiter
// Adapt the I$ interface to the HPDcache memory interface
// {{{
localparam int ICACHE_CL_WORDS = ariane_pkg::ICACHE_LINE_WIDTH/64;
localparam int ICACHE_CL_WORDS = ariane_pkg::ICACHE_LINE_WIDTH / 64;
localparam int ICACHE_CL_WORD_INDEX = $clog2(ICACHE_CL_WORDS);
localparam int ICACHE_CL_SIZE = $clog2(ariane_pkg::ICACHE_LINE_WIDTH/8);
localparam int ICACHE_CL_SIZE = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8);
localparam int ICACHE_WORD_SIZE = 3;
localparam int ICACHE_MEM_REQ_CL_LEN =
(ariane_pkg::ICACHE_LINE_WIDTH + HPDcacheMemDataWidth - 1)/HPDcacheMemDataWidth;
localparam int ICACHE_MEM_REQ_CL_SIZE =
(HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) ?
$clog2(HPDcacheMemDataWidth/8) : ICACHE_CL_SIZE;
$clog2(
HPDcacheMemDataWidth / 8
) : ICACHE_CL_SIZE;
// I$ request
hpdcache_mem_req_t icache_miss_req_wdata;
logic icache_miss_req_w, icache_miss_req_wok;
logic icache_miss_req_w, icache_miss_req_wok;
hpdcache_mem_req_t icache_miss_req_rdata;
logic icache_miss_req_r, icache_miss_req_rok;
logic icache_miss_req_r, icache_miss_req_rok;
logic icache_miss_pending_q;
logic icache_miss_pending_q;
// This FIFO has two functionnalities:
// - Stabilize the ready-valid protocol. The ICACHE can abort a valid
@ -158,88 +160,85 @@ module cva6_hpdcache_subsystem_axi_arbiter
// behavior is not supported by AXI.
// - Cut a possible long timing path.
hpdcache_fifo_reg #(
.FIFO_DEPTH (1),
.fifo_data_t (hpdcache_mem_req_t)
) i_icache_miss_req_fifo(
.FIFO_DEPTH (1),
.fifo_data_t(hpdcache_mem_req_t)
) i_icache_miss_req_fifo (
.clk_i,
.rst_ni,
.w_i (icache_miss_req_w),
.wok_o (icache_miss_req_wok),
.wdata_i (icache_miss_req_wdata),
.w_i (icache_miss_req_w),
.wok_o (icache_miss_req_wok),
.wdata_i(icache_miss_req_wdata),
.r_i (icache_miss_req_r),
.rok_o (icache_miss_req_rok),
.rdata_o (icache_miss_req_rdata)
.r_i (icache_miss_req_r),
.rok_o (icache_miss_req_rok),
.rdata_o(icache_miss_req_rdata)
);
assign icache_miss_req_w = icache_miss_valid_i,
icache_miss_ready_o = icache_miss_req_wok;
assign icache_miss_req_w = icache_miss_valid_i, icache_miss_ready_o = icache_miss_req_wok;
assign icache_miss_req_wdata.mem_req_addr = icache_miss_i.paddr,
icache_miss_req_wdata.mem_req_len = icache_miss_i.nc ? 0 : ICACHE_MEM_REQ_CL_LEN - 1,
icache_miss_req_wdata.mem_req_size = icache_miss_i.nc ? ICACHE_WORD_SIZE : ICACHE_MEM_REQ_CL_SIZE,
icache_miss_req_wdata.mem_req_id = icache_miss_i.tid,
icache_miss_req_wdata.mem_req_command = hpdcache_pkg::HPDCACHE_MEM_READ,
icache_miss_req_wdata.mem_req_atomic = hpdcache_pkg::hpdcache_mem_atomic_e'(0),
icache_miss_req_wdata.mem_req_cacheable = ~icache_miss_i.nc;
assign icache_miss_req_wdata.mem_req_addr = icache_miss_i.paddr,
icache_miss_req_wdata.mem_req_len = icache_miss_i.nc ? 0 : ICACHE_MEM_REQ_CL_LEN - 1,
icache_miss_req_wdata.mem_req_size = icache_miss_i.nc ? ICACHE_WORD_SIZE : ICACHE_MEM_REQ_CL_SIZE,
icache_miss_req_wdata.mem_req_id = icache_miss_i.tid,
icache_miss_req_wdata.mem_req_command = hpdcache_pkg::HPDCACHE_MEM_READ,
icache_miss_req_wdata.mem_req_atomic = hpdcache_pkg::hpdcache_mem_atomic_e'(0),
icache_miss_req_wdata.mem_req_cacheable = ~icache_miss_i.nc;
// I$ response
logic icache_miss_resp_w, icache_miss_resp_wok;
hpdcache_mem_resp_r_t icache_miss_resp_wdata;
logic icache_miss_resp_w, icache_miss_resp_wok;
hpdcache_mem_resp_r_t icache_miss_resp_wdata;
logic icache_miss_resp_data_w, icache_miss_resp_data_wok;
logic icache_miss_resp_data_r, icache_miss_resp_data_rok;
icache_resp_data_t icache_miss_resp_data_rdata;
logic icache_miss_resp_data_w, icache_miss_resp_data_wok;
logic icache_miss_resp_data_r, icache_miss_resp_data_rok;
icache_resp_data_t icache_miss_resp_data_rdata;
logic icache_miss_resp_meta_w, icache_miss_resp_meta_wok;
logic icache_miss_resp_meta_r, icache_miss_resp_meta_rok;
hpdcache_mem_id_t icache_miss_resp_meta_id;
logic icache_miss_resp_meta_w, icache_miss_resp_meta_wok;
logic icache_miss_resp_meta_r, icache_miss_resp_meta_rok;
hpdcache_mem_id_t icache_miss_resp_meta_id;
icache_resp_data_t icache_miss_rdata;
icache_resp_data_t icache_miss_rdata;
generate
if (HPDcacheMemDataWidth < ariane_pkg::ICACHE_LINE_WIDTH) begin
hpdcache_fifo_reg #(
.FIFO_DEPTH (1),
.fifo_data_t (hpdcache_mem_id_t)
.FIFO_DEPTH (1),
.fifo_data_t(hpdcache_mem_id_t)
) i_icache_refill_meta_fifo (
.clk_i,
.rst_ni,
.w_i (icache_miss_resp_meta_w),
.wok_o (icache_miss_resp_meta_wok),
.wdata_i (icache_miss_resp_wdata.mem_resp_r_id),
.w_i (icache_miss_resp_meta_w),
.wok_o (icache_miss_resp_meta_wok),
.wdata_i(icache_miss_resp_wdata.mem_resp_r_id),
.r_i (icache_miss_resp_meta_r),
.rok_o (icache_miss_resp_meta_rok),
.rdata_o (icache_miss_resp_meta_id)
.r_i (icache_miss_resp_meta_r),
.rok_o (icache_miss_resp_meta_rok),
.rdata_o(icache_miss_resp_meta_id)
);
hpdcache_data_upsize #(
.WR_WIDTH (HPDcacheMemDataWidth),
.RD_WIDTH (ariane_pkg::ICACHE_LINE_WIDTH),
.DEPTH (1)
.WR_WIDTH(HPDcacheMemDataWidth),
.RD_WIDTH(ariane_pkg::ICACHE_LINE_WIDTH),
.DEPTH (1)
) i_icache_hpdcache_data_upsize (
.clk_i,
.rst_ni,
.w_i (icache_miss_resp_data_w),
.wlast_i (icache_miss_resp_wdata.mem_resp_r_last),
.wok_o (icache_miss_resp_data_wok),
.wdata_i (icache_miss_resp_wdata.mem_resp_r_data),
.w_i (icache_miss_resp_data_w),
.wlast_i(icache_miss_resp_wdata.mem_resp_r_last),
.wok_o (icache_miss_resp_data_wok),
.wdata_i(icache_miss_resp_wdata.mem_resp_r_data),
.r_i (icache_miss_resp_data_r),
.rok_o (icache_miss_resp_data_rok),
.rdata_o (icache_miss_resp_data_rdata)
.r_i (icache_miss_resp_data_r),
.rok_o (icache_miss_resp_data_rok),
.rdata_o(icache_miss_resp_data_rdata)
);
assign icache_miss_resp_meta_r = 1'b1,
icache_miss_resp_data_r = 1'b1;
assign icache_miss_resp_meta_r = 1'b1, icache_miss_resp_data_r = 1'b1;
assign icache_miss_resp_meta_w = icache_miss_resp_w &
icache_miss_resp_wdata.mem_resp_r_last;
assign icache_miss_resp_meta_w = icache_miss_resp_w & icache_miss_resp_wdata.mem_resp_r_last;
assign icache_miss_resp_data_w = icache_miss_resp_w;
@ -256,14 +255,13 @@ module cva6_hpdcache_subsystem_axi_arbiter
assign icache_miss_resp_data_rdata = icache_miss_resp_wdata.mem_resp_r_data;
// In the case of uncacheable accesses, the Icache expects the data to be right-aligned
always_comb
begin : icache_miss_resp_data_comb
always_comb begin : icache_miss_resp_data_comb
if (!icache_miss_req_rdata.mem_req_cacheable) begin
automatic logic [ICACHE_CL_WORD_INDEX - 1: 0] icache_miss_word_index;
automatic logic [ICACHE_CL_WORD_INDEX - 1:0] icache_miss_word_index;
automatic logic [63:0] icache_miss_word;
icache_miss_word_index = icache_miss_req_rdata.mem_req_addr[3 +: ICACHE_CL_WORD_INDEX];
icache_miss_word = icache_miss_resp_data_rdata[icache_miss_word_index*64 +: 64];
icache_miss_rdata = {{ariane_pkg::ICACHE_LINE_WIDTH-64{1'b0}}, icache_miss_word};
icache_miss_word_index = icache_miss_req_rdata.mem_req_addr[3+:ICACHE_CL_WORD_INDEX];
icache_miss_word = icache_miss_resp_data_rdata[icache_miss_word_index*64+:64];
icache_miss_rdata = {{ariane_pkg::ICACHE_LINE_WIDTH - 64{1'b0}}, icache_miss_word};
end else begin
icache_miss_rdata = icache_miss_resp_data_rdata;
end
@ -272,11 +270,11 @@ module cva6_hpdcache_subsystem_axi_arbiter
endgenerate
assign icache_miss_resp_valid_o = icache_miss_resp_meta_rok,
icache_miss_resp_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK,
icache_miss_resp_o.user = '0,
icache_miss_resp_o.inv = '0,
icache_miss_resp_o.tid = icache_miss_resp_meta_id,
icache_miss_resp_o.data = icache_miss_rdata;
icache_miss_resp_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK,
icache_miss_resp_o.user = '0,
icache_miss_resp_o.inv = '0,
icache_miss_resp_o.tid = icache_miss_resp_meta_id,
icache_miss_resp_o.data = icache_miss_rdata;
// consume the Icache miss on the arrival of the response. The request
// metadata is decoded to forward the correct word in case of uncacheable
@ -286,39 +284,39 @@ module cva6_hpdcache_subsystem_axi_arbiter
// Read request arbiter
// {{{
logic mem_req_read_ready [2:0];
logic mem_req_read_valid [2:0];
hpdcache_mem_req_t mem_req_read [2:0];
logic mem_req_read_ready [2:0];
logic mem_req_read_valid [2:0];
hpdcache_mem_req_t mem_req_read [2:0];
logic mem_req_read_ready_arb;
logic mem_req_read_valid_arb;
hpdcache_mem_req_t mem_req_read_arb;
logic mem_req_read_ready_arb;
logic mem_req_read_valid_arb;
hpdcache_mem_req_t mem_req_read_arb;
assign mem_req_read_valid[0] = icache_miss_req_rok & ~icache_miss_pending_q,
mem_req_read[0] = icache_miss_req_rdata;
assign mem_req_read_valid[0] = icache_miss_req_rok & ~icache_miss_pending_q,
mem_req_read[0] = icache_miss_req_rdata;
assign dcache_miss_ready_o = mem_req_read_ready[1],
mem_req_read_valid[1] = dcache_miss_valid_i,
mem_req_read[1] = dcache_miss_i;
assign dcache_miss_ready_o = mem_req_read_ready[1],
mem_req_read_valid[1] = dcache_miss_valid_i,
mem_req_read[1] = dcache_miss_i;
assign dcache_uc_read_ready_o = mem_req_read_ready[2],
mem_req_read_valid[2] = dcache_uc_read_valid_i,
mem_req_read[2] = dcache_uc_read_i;
mem_req_read_valid[2] = dcache_uc_read_valid_i,
mem_req_read[2] = dcache_uc_read_i;
hpdcache_mem_req_read_arbiter #(
.N (3),
.hpdcache_mem_req_t (hpdcache_mem_req_t)
.N (3),
.hpdcache_mem_req_t(hpdcache_mem_req_t)
) i_mem_req_read_arbiter (
.clk_i,
.rst_ni,
.clk_i,
.rst_ni,
.mem_req_read_ready_o (mem_req_read_ready),
.mem_req_read_valid_i (mem_req_read_valid),
.mem_req_read_i (mem_req_read),
.mem_req_read_ready_o(mem_req_read_ready),
.mem_req_read_valid_i(mem_req_read_valid),
.mem_req_read_i (mem_req_read),
.mem_req_read_ready_i (mem_req_read_ready_arb),
.mem_req_read_valid_o (mem_req_read_valid_arb),
.mem_req_read_o (mem_req_read_arb)
.mem_req_read_ready_i(mem_req_read_ready_arb),
.mem_req_read_valid_o(mem_req_read_valid_arb),
.mem_req_read_o (mem_req_read_arb)
);
// }}}
@ -328,14 +326,13 @@ module cva6_hpdcache_subsystem_axi_arbiter
logic mem_resp_read_valid;
hpdcache_mem_resp_r_t mem_resp_read;
logic mem_resp_read_ready_arb [2:0];
logic mem_resp_read_valid_arb [2:0];
hpdcache_mem_resp_r_t mem_resp_read_arb [2:0];
logic mem_resp_read_ready_arb[2:0];
logic mem_resp_read_valid_arb[2:0];
hpdcache_mem_resp_r_t mem_resp_read_arb [2:0];
mem_resp_rt_t mem_resp_read_rt;
mem_resp_rt_t mem_resp_read_rt;
always_comb
begin
always_comb begin
for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin
mem_resp_read_rt[i] = (i == int'( icache_miss_id_i)) ? 0 :
(i == int'(dcache_uc_read_id_i)) ? 2 : 1;
@ -343,95 +340,95 @@ module cva6_hpdcache_subsystem_axi_arbiter
end
hpdcache_mem_resp_demux #(
.N (3),
.resp_t (hpdcache_mem_resp_r_t),
.resp_id_t (hpdcache_mem_id_t)
.N (3),
.resp_t (hpdcache_mem_resp_r_t),
.resp_id_t(hpdcache_mem_id_t)
) i_mem_resp_read_demux (
.clk_i,
.rst_ni,
.clk_i,
.rst_ni,
.mem_resp_ready_o (mem_resp_read_ready),
.mem_resp_valid_i (mem_resp_read_valid),
.mem_resp_id_i (mem_resp_read.mem_resp_r_id),
.mem_resp_i (mem_resp_read),
.mem_resp_ready_o(mem_resp_read_ready),
.mem_resp_valid_i(mem_resp_read_valid),
.mem_resp_id_i (mem_resp_read.mem_resp_r_id),
.mem_resp_i (mem_resp_read),
.mem_resp_ready_i (mem_resp_read_ready_arb),
.mem_resp_valid_o (mem_resp_read_valid_arb),
.mem_resp_o (mem_resp_read_arb),
.mem_resp_ready_i(mem_resp_read_ready_arb),
.mem_resp_valid_o(mem_resp_read_valid_arb),
.mem_resp_o (mem_resp_read_arb),
.mem_resp_rt_i (mem_resp_read_rt)
.mem_resp_rt_i(mem_resp_read_rt)
);
assign icache_miss_resp_w = mem_resp_read_valid_arb[0],
icache_miss_resp_wdata = mem_resp_read_arb[0],
mem_resp_read_ready_arb[0] = icache_miss_resp_wok;
assign icache_miss_resp_w = mem_resp_read_valid_arb[0],
icache_miss_resp_wdata = mem_resp_read_arb[0],
mem_resp_read_ready_arb[0] = icache_miss_resp_wok;
assign dcache_miss_resp_valid_o = mem_resp_read_valid_arb[1],
dcache_miss_resp_o = mem_resp_read_arb[1],
mem_resp_read_ready_arb[1] = dcache_miss_resp_ready_i;
assign dcache_miss_resp_valid_o = mem_resp_read_valid_arb[1],
dcache_miss_resp_o = mem_resp_read_arb[1],
mem_resp_read_ready_arb[1] = dcache_miss_resp_ready_i;
assign dcache_uc_read_resp_valid_o = mem_resp_read_valid_arb[2],
dcache_uc_read_resp_o = mem_resp_read_arb[2],
mem_resp_read_ready_arb[2] = dcache_uc_read_resp_ready_i;
dcache_uc_read_resp_o = mem_resp_read_arb[2],
mem_resp_read_ready_arb[2] = dcache_uc_read_resp_ready_i;
// }}}
// Write request arbiter
// {{{
logic mem_req_write_ready [1:0];
logic mem_req_write_valid [1:0];
hpdcache_mem_req_t mem_req_write [1:0];
logic mem_req_write_ready [1:0];
logic mem_req_write_valid [1:0];
hpdcache_mem_req_t mem_req_write [1:0];
logic mem_req_write_data_ready [1:0];
logic mem_req_write_data_valid [1:0];
hpdcache_mem_req_w_t mem_req_write_data [1:0];
logic mem_req_write_data_ready [1:0];
logic mem_req_write_data_valid [1:0];
hpdcache_mem_req_w_t mem_req_write_data [1:0];
logic mem_req_write_ready_arb;
logic mem_req_write_valid_arb;
hpdcache_mem_req_t mem_req_write_arb;
logic mem_req_write_ready_arb;
logic mem_req_write_valid_arb;
hpdcache_mem_req_t mem_req_write_arb;
logic mem_req_write_data_ready_arb;
logic mem_req_write_data_valid_arb;
hpdcache_mem_req_w_t mem_req_write_data_arb;
logic mem_req_write_data_ready_arb;
logic mem_req_write_data_valid_arb;
hpdcache_mem_req_w_t mem_req_write_data_arb;
assign dcache_wbuf_ready_o = mem_req_write_ready[0],
mem_req_write_valid[0] = dcache_wbuf_valid_i,
mem_req_write[0] = dcache_wbuf_i;
assign dcache_wbuf_ready_o = mem_req_write_ready[0],
mem_req_write_valid[0] = dcache_wbuf_valid_i,
mem_req_write[0] = dcache_wbuf_i;
assign dcache_wbuf_data_ready_o = mem_req_write_data_ready[0],
mem_req_write_data_valid[0] = dcache_wbuf_data_valid_i,
mem_req_write_data[0] = dcache_wbuf_data_i;
assign dcache_wbuf_data_ready_o = mem_req_write_data_ready[0],
mem_req_write_data_valid[0] = dcache_wbuf_data_valid_i,
mem_req_write_data[0] = dcache_wbuf_data_i;
assign dcache_uc_write_ready_o = mem_req_write_ready[1],
mem_req_write_valid[1] = dcache_uc_write_valid_i,
mem_req_write[1] = dcache_uc_write_i;
assign dcache_uc_write_ready_o = mem_req_write_ready[1],
mem_req_write_valid[1] = dcache_uc_write_valid_i,
mem_req_write[1] = dcache_uc_write_i;
assign dcache_uc_write_data_ready_o = mem_req_write_data_ready[1],
mem_req_write_data_valid[1] = dcache_uc_write_data_valid_i,
mem_req_write_data[1] = dcache_uc_write_data_i;
mem_req_write_data_valid[1] = dcache_uc_write_data_valid_i,
mem_req_write_data[1] = dcache_uc_write_data_i;
hpdcache_mem_req_write_arbiter #(
.N (2),
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t)
.N (2),
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t(hpdcache_mem_req_w_t)
) i_mem_req_write_arbiter (
.clk_i,
.rst_ni,
.clk_i,
.rst_ni,
.mem_req_write_ready_o (mem_req_write_ready),
.mem_req_write_valid_i (mem_req_write_valid),
.mem_req_write_i (mem_req_write),
.mem_req_write_ready_o(mem_req_write_ready),
.mem_req_write_valid_i(mem_req_write_valid),
.mem_req_write_i (mem_req_write),
.mem_req_write_data_ready_o (mem_req_write_data_ready),
.mem_req_write_data_valid_i (mem_req_write_data_valid),
.mem_req_write_data_i (mem_req_write_data),
.mem_req_write_data_ready_o(mem_req_write_data_ready),
.mem_req_write_data_valid_i(mem_req_write_data_valid),
.mem_req_write_data_i (mem_req_write_data),
.mem_req_write_ready_i (mem_req_write_ready_arb),
.mem_req_write_valid_o (mem_req_write_valid_arb),
.mem_req_write_o (mem_req_write_arb),
.mem_req_write_ready_i(mem_req_write_ready_arb),
.mem_req_write_valid_o(mem_req_write_valid_arb),
.mem_req_write_o (mem_req_write_arb),
.mem_req_write_data_ready_i (mem_req_write_data_ready_arb),
.mem_req_write_data_valid_o (mem_req_write_data_valid_arb),
.mem_req_write_data_o (mem_req_write_data_arb)
.mem_req_write_data_ready_i(mem_req_write_data_ready_arb),
.mem_req_write_data_valid_o(mem_req_write_data_valid_arb),
.mem_req_write_data_o (mem_req_write_data_arb)
);
// }}}
@ -441,52 +438,50 @@ module cva6_hpdcache_subsystem_axi_arbiter
logic mem_resp_write_valid;
hpdcache_mem_resp_w_t mem_resp_write;
logic mem_resp_write_ready_arb [1:0];
logic mem_resp_write_valid_arb [1:0];
hpdcache_mem_resp_w_t mem_resp_write_arb [1:0];
logic mem_resp_write_ready_arb[1:0];
logic mem_resp_write_valid_arb[1:0];
hpdcache_mem_resp_w_t mem_resp_write_arb [1:0];
mem_resp_rt_t mem_resp_write_rt;
mem_resp_rt_t mem_resp_write_rt;
always_comb
begin
always_comb begin
for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin
mem_resp_write_rt[i] = (i == int'(dcache_uc_write_id_i)) ? 1 : 0;
end
end
hpdcache_mem_resp_demux #(
.N (2),
.resp_t (hpdcache_mem_resp_w_t),
.resp_id_t (hpdcache_mem_id_t)
.N (2),
.resp_t (hpdcache_mem_resp_w_t),
.resp_id_t(hpdcache_mem_id_t)
) i_hpdcache_mem_resp_write_demux (
.clk_i,
.rst_ni,
.clk_i,
.rst_ni,
.mem_resp_ready_o (mem_resp_write_ready),
.mem_resp_valid_i (mem_resp_write_valid),
.mem_resp_id_i (mem_resp_write.mem_resp_w_id),
.mem_resp_i (mem_resp_write),
.mem_resp_ready_o(mem_resp_write_ready),
.mem_resp_valid_i(mem_resp_write_valid),
.mem_resp_id_i (mem_resp_write.mem_resp_w_id),
.mem_resp_i (mem_resp_write),
.mem_resp_ready_i (mem_resp_write_ready_arb),
.mem_resp_valid_o (mem_resp_write_valid_arb),
.mem_resp_o (mem_resp_write_arb),
.mem_resp_ready_i(mem_resp_write_ready_arb),
.mem_resp_valid_o(mem_resp_write_valid_arb),
.mem_resp_o (mem_resp_write_arb),
.mem_resp_rt_i (mem_resp_write_rt)
.mem_resp_rt_i(mem_resp_write_rt)
);
assign dcache_wbuf_resp_valid_o = mem_resp_write_valid_arb[0],
dcache_wbuf_resp_o = mem_resp_write_arb[0],
mem_resp_write_ready_arb[0] = dcache_wbuf_resp_ready_i;
assign dcache_wbuf_resp_valid_o = mem_resp_write_valid_arb[0],
dcache_wbuf_resp_o = mem_resp_write_arb[0],
mem_resp_write_ready_arb[0] = dcache_wbuf_resp_ready_i;
assign dcache_uc_write_resp_valid_o = mem_resp_write_valid_arb[1],
dcache_uc_write_resp_o = mem_resp_write_arb[1],
mem_resp_write_ready_arb[1] = dcache_uc_write_resp_ready_i;
dcache_uc_write_resp_o = mem_resp_write_arb[1],
mem_resp_write_ready_arb[1] = dcache_uc_write_resp_ready_i;
// }}}
// I$ miss pending
// {{{
always_ff @(posedge clk_i or negedge rst_ni)
begin : icache_miss_pending_ff
always_ff @(posedge clk_i or negedge rst_ni) begin : icache_miss_pending_ff
if (!rst_ni) begin
icache_miss_pending_q <= 1'b0;
end else begin
@ -502,59 +497,59 @@ module cva6_hpdcache_subsystem_axi_arbiter
axi_rsp_t axi_resp;
hpdcache_mem_to_axi_write #(
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t),
.aw_chan_t (axi_aw_chan_t),
.w_chan_t (axi_w_chan_t),
.b_chan_t (axi_b_chan_t)
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t),
.aw_chan_t (axi_aw_chan_t),
.w_chan_t (axi_w_chan_t),
.b_chan_t (axi_b_chan_t)
) i_hpdcache_mem_to_axi_write (
.req_ready_o (mem_req_write_ready_arb),
.req_valid_i (mem_req_write_valid_arb),
.req_i (mem_req_write_arb),
.req_ready_o(mem_req_write_ready_arb),
.req_valid_i(mem_req_write_valid_arb),
.req_i (mem_req_write_arb),
.req_data_ready_o (mem_req_write_data_ready_arb),
.req_data_valid_i (mem_req_write_data_valid_arb),
.req_data_i (mem_req_write_data_arb),
.req_data_ready_o(mem_req_write_data_ready_arb),
.req_data_valid_i(mem_req_write_data_valid_arb),
.req_data_i (mem_req_write_data_arb),
.resp_ready_i (mem_resp_write_ready),
.resp_valid_o (mem_resp_write_valid),
.resp_o (mem_resp_write),
.resp_ready_i(mem_resp_write_ready),
.resp_valid_o(mem_resp_write_valid),
.resp_o (mem_resp_write),
.axi_aw_valid_o (axi_req.aw_valid),
.axi_aw_o (axi_req.aw),
.axi_aw_ready_i (axi_resp.aw_ready),
.axi_aw_valid_o(axi_req.aw_valid),
.axi_aw_o (axi_req.aw),
.axi_aw_ready_i(axi_resp.aw_ready),
.axi_w_valid_o (axi_req.w_valid),
.axi_w_o (axi_req.w),
.axi_w_ready_i (axi_resp.w_ready),
.axi_w_valid_o(axi_req.w_valid),
.axi_w_o (axi_req.w),
.axi_w_ready_i(axi_resp.w_ready),
.axi_b_valid_i (axi_resp.b_valid),
.axi_b_i (axi_resp.b),
.axi_b_ready_o (axi_req.b_ready)
.axi_b_valid_i(axi_resp.b_valid),
.axi_b_i (axi_resp.b),
.axi_b_ready_o(axi_req.b_ready)
);
hpdcache_mem_to_axi_read #(
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t),
.ar_chan_t (axi_ar_chan_t),
.r_chan_t (axi_r_chan_t)
.hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
.ar_chan_t (axi_ar_chan_t),
.r_chan_t (axi_r_chan_t)
) i_hpdcache_mem_to_axi_read (
.req_ready_o (mem_req_read_ready_arb),
.req_valid_i (mem_req_read_valid_arb),
.req_i (mem_req_read_arb),
.req_ready_o(mem_req_read_ready_arb),
.req_valid_i(mem_req_read_valid_arb),
.req_i (mem_req_read_arb),
.resp_ready_i (mem_resp_read_ready),
.resp_valid_o (mem_resp_read_valid),
.resp_o (mem_resp_read),
.resp_ready_i(mem_resp_read_ready),
.resp_valid_o(mem_resp_read_valid),
.resp_o (mem_resp_read),
.axi_ar_valid_o (axi_req.ar_valid),
.axi_ar_o (axi_req.ar),
.axi_ar_ready_i (axi_resp.ar_ready),
.axi_ar_valid_o(axi_req.ar_valid),
.axi_ar_o (axi_req.ar),
.axi_ar_ready_i(axi_resp.ar_ready),
.axi_r_valid_i (axi_resp.r_valid),
.axi_r_i (axi_resp.r),
.axi_r_ready_o (axi_req.r_ready)
.axi_r_valid_i(axi_resp.r_valid),
.axi_r_i (axi_resp.r),
.axi_r_ready_o(axi_req.r_ready)
);
assign axi_req_o = axi_req;
@ -564,16 +559,27 @@ module cva6_hpdcache_subsystem_axi_arbiter
// Assertions
// {{{
// pragma translate_off
initial assert (HPDcacheMemIdWidth <= AxiIdWidth) else
$fatal("HPDcacheMemIdWidth shall be less or equal to AxiIdWidth");
initial assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_MSHR_SET_WIDTH + hpdcache_pkg::HPDCACHE_MSHR_WAY_WIDTH + 1)) else
$fatal("HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache misses and Icache misses");
initial assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_WBUF_DIR_PTR_WIDTH + 1)) else
$fatal("HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache cacheable writes and uncacheable writes");
initial assert (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) else
$fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Icache line");
initial assert (HPDcacheMemDataWidth <= ariane_pkg::DCACHE_LINE_WIDTH) else
$fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Dcache line");
initial
assert (HPDcacheMemIdWidth <= AxiIdWidth)
else $fatal("HPDcacheMemIdWidth shall be less or equal to AxiIdWidth");
initial
assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_MSHR_SET_WIDTH + hpdcache_pkg::HPDCACHE_MSHR_WAY_WIDTH + 1))
else
$fatal(
"HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache misses and Icache misses"
);
initial
assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_WBUF_DIR_PTR_WIDTH + 1))
else
$fatal(
"HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache cacheable writes and uncacheable writes"
);
initial
assert (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH)
else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Icache line");
initial
assert (HPDcacheMemDataWidth <= ariane_pkg::DCACHE_LINE_WIDTH)
else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Dcache line");
// pragma translate_on
// }}}

View file

@ -25,38 +25,40 @@
//
module cva6_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
/// ID to be used for read transactions
parameter logic [MEM_TID_WIDTH-1:0] RdTxId = 0
module cva6_icache
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
/// ID to be used for read transactions
parameter logic [MEM_TID_WIDTH-1:0] RdTxId = 0
) (
input logic clk_i,
input logic rst_ni,
input logic clk_i,
input logic rst_ni,
/// flush the icache, flush and kill have to be asserted together
input logic flush_i,
/// enable icache
input logic en_i,
/// to performance counter
output logic miss_o,
// address translation requests
input icache_areq_t areq_i,
output icache_arsp_t areq_o,
// data requests
input icache_dreq_t dreq_i,
output icache_drsp_t dreq_o,
// refill port
input logic mem_rtrn_vld_i,
input icache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output icache_req_t mem_data_o
/// flush the icache, flush and kill have to be asserted together
input logic flush_i,
/// enable icache
input logic en_i,
/// to performance counter
output logic miss_o,
// address translation requests
input icache_areq_t areq_i,
output icache_arsp_t areq_o,
// data requests
input icache_dreq_t dreq_i,
output icache_drsp_t dreq_o,
// refill port
input logic mem_rtrn_vld_i,
input icache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output icache_req_t mem_data_o
);
// functions
function automatic logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] icache_way_bin2oh (
input logic [L1I_WAY_WIDTH-1:0] in
);
function automatic logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] icache_way_bin2oh(
input logic [L1I_WAY_WIDTH-1:0] in);
logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] out;
out = '0;
out[in] = 1'b1;
@ -64,59 +66,70 @@ module cva6_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
endfunction
// signals
logic cache_en_d, cache_en_q; // cache is enabled
logic [riscv::VLEN-1:0] vaddr_d, vaddr_q;
logic paddr_is_nc; // asserted if physical address is non-cacheable
logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare
logic cache_rden; // triggers cache lookup
logic cache_wren; // triggers write to cacheline
logic cmp_en_d, cmp_en_q; // enable tag comparison in next cycle. used to cut long path due to NC signal.
logic flush_d, flush_q; // used to register and signal pending flushes
logic cache_en_d, cache_en_q; // cache is enabled
logic [riscv::VLEN-1:0] vaddr_d, vaddr_q;
logic paddr_is_nc; // asserted if physical address is non-cacheable
logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare
logic cache_rden; // triggers cache lookup
logic cache_wren; // triggers write to cacheline
logic
cmp_en_d,
cmp_en_q; // enable tag comparison in next cycle. used to cut long path due to NC signal.
logic flush_d, flush_q; // used to register and signal pending flushes
// replacement strategy
logic update_lfsr; // shift the LFSR
logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered
logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement
logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace
logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot)
logic all_ways_valid; // we need to switch repl strategy since all are valid
logic update_lfsr; // shift the LFSR
logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered
logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement
logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace
logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot)
logic all_ways_valid; // we need to switch repl strategy since all are valid
// invalidations / flushing
logic inv_en; // incoming invalidations
logic inv_d, inv_q; // invalidation in progress
logic flush_en, flush_done; // used to flush cache entries
logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q; // used to flush cache entries
logic inv_en; // incoming invalidations
logic inv_d, inv_q; // invalidation in progress
logic flush_en, flush_done; // used to flush cache entries
logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q; // used to flush cache entries
// mem arrays
logic cl_we; // write enable to memory array
logic [ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array
logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array
logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag
logic cl_we; // write enable to memory array
logic [ ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array
logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array
logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem
logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache
logic [ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache
logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0]cl_sel; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable
logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs
logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable
logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs
logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
// cpmtroller FSM
typedef enum logic[2:0] {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS} state_e;
typedef enum logic [2:0] {
FLUSH,
IDLE,
READ,
MISS,
KILL_ATRANS,
KILL_MISS
} state_e;
state_e state_d, state_q;
///////////////////////////////////////////////////////
// address -> cl_index mapping, interface plumbing
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// address -> cl_index mapping, interface plumbing
///////////////////////////////////////////////////////
// extract tag from physical address, check if NC
assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q;
// noncacheable if request goes to I/O space, or if cache is disabled
assign paddr_is_nc = (~cache_en_q) | (~config_pkg::is_inside_cacheable_regions(CVA6Cfg, {{{64-riscv::PLEN}{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}}));
assign paddr_is_nc = (~cache_en_q) | (~config_pkg::is_inside_cacheable_regions(
CVA6Cfg, {{{64 - riscv::PLEN} {1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}}
));
// pass exception through
assign dreq_o.ex = areq_i.fetch_exception;
@ -124,65 +137,64 @@ module cva6_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
// latch this in case we have to stall later on
// make sure this is 32bit aligned
assign vaddr_d = (dreq_o.ready & dreq_i.req) ? dreq_i.vaddr : vaddr_q;
assign areq_o.fetch_vaddr = {vaddr_q>>2, 2'b0};
assign areq_o.fetch_vaddr = {vaddr_q >> 2, 2'b0};
// split virtual address into index and offset to address cache arrays
assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH];
assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH];
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset
// if we generate a noncacheable access, the word will be at offset 0 or 4 in the cl coming from memory
assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} :
( paddr_is_nc & mem_data_req_o ) ? cl_offset_q[2]<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case
cl_offset_q;
cl_offset_q;
// request word address instead of cl address in case of NC access
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 64bit
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
end else begin : gen_piton_offset
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
end else begin : gen_piton_offset
// icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not.
// since the piton cache system replicates the data, we can always index the full CL
assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} :
cl_offset_q;
assign cl_offset_d = (dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr >> 2, 2'b0} : cl_offset_q;
// request word address instead of cl address in case of NC access
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
end
assign mem_data_o.tid = RdTxId;
assign mem_data_o.tid = RdTxId;
assign mem_data_o.nc = paddr_is_nc;
assign mem_data_o.nc = paddr_is_nc;
// way that is being replaced
assign mem_data_o.way = repl_way;
assign dreq_o.vaddr = vaddr_q;
assign mem_data_o.way = repl_way;
assign dreq_o.vaddr = vaddr_q;
// invalidations take two cycles
assign inv_d = inv_en;
assign inv_d = inv_en;
///////////////////////////////////////////////////////
// main control logic
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// main control logic
///////////////////////////////////////////////////////
logic addr_ni;
assign addr_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, areq_i.fetch_paddr);
always_comb begin : p_fsm
// default assignment
state_d = state_q;
state_d = state_q;
cache_en_d = cache_en_q & en_i;// disabling the cache is always possible, enable needs to go via flush
flush_en = 1'b0;
cmp_en_d = 1'b0;
cache_rden = 1'b0;
cache_wren = 1'b0;
inv_en = 1'b0;
flush_d = flush_q | flush_i; // register incoming flush
flush_en = 1'b0;
cmp_en_d = 1'b0;
cache_rden = 1'b0;
cache_wren = 1'b0;
inv_en = 1'b0;
flush_d = flush_q | flush_i; // register incoming flush
// interfaces
dreq_o.ready = 1'b0;
dreq_o.ready = 1'b0;
areq_o.fetch_req = 1'b0;
dreq_o.valid = 1'b0;
mem_data_req_o = 1'b0;
dreq_o.valid = 1'b0;
mem_data_req_o = 1'b0;
// performance counter
miss_o = 1'b0;
miss_o = 1'b0;
// handle invalidations unconditionally
// note: invald are mutually exclusive with
@ -197,7 +209,7 @@ end else begin : gen_piton_offset
//////////////////////////////////
// this clears all valid bits
FLUSH: begin
flush_en = 1'b1;
flush_en = 1'b1;
if (flush_done) begin
state_d = IDLE;
flush_d = 1'b0;
@ -208,27 +220,27 @@ end else begin : gen_piton_offset
//////////////////////////////////
// wait for an incoming request
IDLE: begin
// only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q;
// only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q;
// handle pending flushes, or perform cache clear upon enable
if (flush_d || (en_i && !cache_en_q)) begin
state_d = FLUSH;
// handle pending flushes, or perform cache clear upon enable
if (flush_d || (en_i && !cache_en_q)) begin
state_d = FLUSH;
// wait for incoming requests
end else begin
// mem requests are for sure invals here
if (!mem_rtrn_vld_i) begin
dreq_o.ready = 1'b1;
// we have a new request
if (dreq_i.req) begin
cache_rden = 1'b1;
state_d = READ;
end
end
if (dreq_i.kill_s1) begin
state_d = IDLE;
end else begin
// mem requests are for sure invals here
if (!mem_rtrn_vld_i) begin
dreq_o.ready = 1'b1;
// we have a new request
if (dreq_i.req) begin
cache_rden = 1'b1;
state_d = READ;
end
end
if (dreq_i.kill_s1) begin
state_d = IDLE;
end
end
end
//////////////////////////////////
// check whether we have a hit
@ -237,53 +249,53 @@ end else begin : gen_piton_offset
// reuse the miss mechanism to handle
// the request
READ: begin
areq_o.fetch_req = '1;
// only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q;
// readout speculatively
cache_rden = cache_en_q;
areq_o.fetch_req = '1;
// only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q;
// readout speculatively
cache_rden = cache_en_q;
if (areq_i.fetch_valid && (!dreq_i.spec || !addr_ni) ) begin
// check if we have to flush
if (flush_d) begin
state_d = IDLE;
if (areq_i.fetch_valid && (!dreq_i.spec || !addr_ni)) begin
// check if we have to flush
if (flush_d) begin
state_d = IDLE;
// we have a hit or an exception output valid result
end else if (((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) && !inv_q) begin
dreq_o.valid = ~dreq_i.kill_s2;// just don't output in this case
state_d = IDLE;
end else if (((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) && !inv_q) begin
dreq_o.valid = ~dreq_i.kill_s2; // just don't output in this case
state_d = IDLE;
// we can accept another request
// and stay here, but only if no inval is coming in
// note: we are not expecting ifill return packets here...
if (!mem_rtrn_vld_i) begin
dreq_o.ready = 1'b1;
if (dreq_i.req) begin
state_d = READ;
end
end
// if a request is being killed at this stage,
// we have to bail out and wait for the address translation to complete
if (dreq_i.kill_s1) begin
state_d = IDLE;
end
// we have a miss / NC transaction
end else if (dreq_i.kill_s2) begin
state_d = IDLE;
end else if (!inv_q) begin
cmp_en_d = 1'b0;
// only count this as a miss if the cache is enabled, and
// the address is cacheable
// send out ifill request
mem_data_req_o = 1'b1;
if (mem_data_ack_i) begin
miss_o = ~paddr_is_nc;
state_d = MISS;
// we can accept another request
// and stay here, but only if no inval is coming in
// note: we are not expecting ifill return packets here...
if (!mem_rtrn_vld_i) begin
dreq_o.ready = 1'b1;
if (dreq_i.req) begin
state_d = READ;
end
end
// bail out if this request is being killed (and we missed on the TLB)
end else if (dreq_i.kill_s2 || flush_d) begin
state_d = KILL_ATRANS;
// if a request is being killed at this stage,
// we have to bail out and wait for the address translation to complete
if (dreq_i.kill_s1) begin
state_d = IDLE;
end
// we have a miss / NC transaction
end else if (dreq_i.kill_s2) begin
state_d = IDLE;
end else if (!inv_q) begin
cmp_en_d = 1'b0;
// only count this as a miss if the cache is enabled, and
// the address is cacheable
// send out ifill request
mem_data_req_o = 1'b1;
if (mem_data_ack_i) begin
miss_o = ~paddr_is_nc;
state_d = MISS;
end
end
// bail out if this request is being killed (and we missed on the TLB)
end else if (dreq_i.kill_s2 || flush_d) begin
state_d = KILL_ATRANS;
end
end
//////////////////////////////////
// wait until the memory transaction
@ -293,16 +305,16 @@ end else begin : gen_piton_offset
// note: this is mutually exclusive with ICACHE_INV_REQ,
// so we do not have to check for invals here
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
state_d = IDLE;
state_d = IDLE;
// only return data if request is not being killed
if (!(dreq_i.kill_s2 || flush_d)) begin
dreq_o.valid = 1'b1;
// only write to cache if this address is cacheable
cache_wren = ~paddr_is_nc;
end
// bail out if this request is being killed
// bail out if this request is being killed
end else if (dreq_i.kill_s2 || flush_d) begin
state_d = KILL_MISS;
state_d = KILL_MISS;
end
end
//////////////////////////////////
@ -312,7 +324,7 @@ end else begin : gen_piton_offset
KILL_ATRANS: begin
areq_o.fetch_req = '1;
if (areq_i.fetch_valid) begin
state_d = IDLE;
state_d = IDLE;
end
end
//////////////////////////////////
@ -321,30 +333,28 @@ end else begin : gen_piton_offset
// go back to idle
KILL_MISS: begin
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
state_d = IDLE;
state_d = IDLE;
end
end
default: begin
// we should never get here
state_d = FLUSH;
end
endcase // state_q
endcase // state_q
end
///////////////////////////////////////////////////////
// valid bit invalidation and replacement strategy
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// valid bit invalidation and replacement strategy
///////////////////////////////////////////////////////
// note: it cannot happen that we get an invalidation + a cl replacement
// in the same cycle as these requests arrive via the same interface
// flushes take precedence over invalidations (it is ok if we ignore
// the inval since the cache is cleared anyway)
assign flush_cnt_d = (flush_done) ? '0 :
(flush_en) ? flush_cnt_q + 1 :
flush_cnt_q;
assign flush_cnt_d = (flush_done) ? '0 : (flush_en) ? flush_cnt_q + 1 : flush_cnt_q;
assign flush_done = (flush_cnt_q==(ICACHE_NUM_WORDS-1));
assign flush_done = (flush_cnt_q == (ICACHE_NUM_WORDS - 1));
// invalidation/clearing address
// flushing takes precedence over invals
@ -354,67 +364,66 @@ end else begin : gen_piton_offset
assign vld_req = (flush_en || cache_rden) ? '1 :
(mem_rtrn_i.inv.all && inv_en) ? '1 :
(mem_rtrn_i.inv.vld && inv_en) ? icache_way_bin2oh(mem_rtrn_i.inv.way) :
repl_way_oh_q;
(mem_rtrn_i.inv.vld && inv_en) ? icache_way_bin2oh(
mem_rtrn_i.inv.way
) : repl_way_oh_q;
assign vld_wdata = (cache_wren) ? '1 : '0;
assign vld_we = (cache_wren | inv_en | flush_en);
assign vld_we = (cache_wren | inv_en | flush_en);
// assign vld_req = (vld_we | cache_rden);
// chose random replacement if all are valid
assign update_lfsr = cache_wren & all_ways_valid;
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign update_lfsr = cache_wren & all_ways_valid;
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q;
// enable signals for memory arrays
assign cl_req = (cache_rden) ? '1 :
(cache_wren) ? repl_way_oh_q :
'0;
assign cl_we = cache_wren;
assign cl_req = (cache_rden) ? '1 : (cache_wren) ? repl_way_oh_q : '0;
assign cl_we = cache_wren;
// find invalid cache line
lzc #(
.WIDTH ( ICACHE_SET_ASSOC )
.WIDTH(ICACHE_SET_ASSOC)
) i_lzc (
.in_i ( ~vld_rdata ),
.cnt_o ( inv_way ),
.empty_o ( all_ways_valid )
.in_i (~vld_rdata),
.cnt_o (inv_way),
.empty_o(all_ways_valid)
);
// generate random cacheline index
lfsr #(
.LfsrWidth ( 8 ),
.OutWidth ( $clog2(ariane_pkg::ICACHE_SET_ASSOC))
.LfsrWidth(8),
.OutWidth ($clog2(ariane_pkg::ICACHE_SET_ASSOC))
) i_lfsr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( update_lfsr ),
.out_o ( rnd_way )
.clk_i (clk_i),
.rst_ni(rst_ni),
.en_i (update_lfsr),
.out_o (rnd_way)
);
///////////////////////////////////////////////////////
// tag comparison, hit generation
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// tag comparison, hit generation
///////////////////////////////////////////////////////
logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx;
for (genvar i=0;i<ICACHE_SET_ASSOC;i++) begin : gen_tag_cmpsel
assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i];
assign cl_sel[i] = cl_rdata[i][{cl_offset_q,3'b0} +: FETCH_WIDTH];
assign cl_user[i] = cl_ruser[i][{cl_offset_q,3'b0} +: FETCH_USER_WIDTH];
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel
assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i];
assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:FETCH_WIDTH];
assign cl_user[i] = cl_ruser[i][{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH];
end
lzc #(
.WIDTH ( ICACHE_SET_ASSOC )
.WIDTH(ICACHE_SET_ASSOC)
) i_lzc_hit (
.in_i ( cl_hit ),
.cnt_o ( hit_idx ),
.empty_o ( )
.in_i (cl_hit),
.cnt_o (hit_idx),
.empty_o()
);
always_comb begin
@ -422,37 +431,37 @@ end else begin : gen_piton_offset
dreq_o.data = cl_sel[hit_idx];
dreq_o.user = cl_user[hit_idx];
end else begin
dreq_o.data = mem_rtrn_i.data[{cl_offset_q,3'b0} +: FETCH_WIDTH];
dreq_o.user = mem_rtrn_i.user[{cl_offset_q,3'b0} +: FETCH_USER_WIDTH];
dreq_o.data = mem_rtrn_i.data[{cl_offset_q, 3'b0}+:FETCH_WIDTH];
dreq_o.user = mem_rtrn_i.user[{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH];
end
end
///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////
logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata [ICACHE_SET_ASSOC-1:0];
logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata[ICACHE_SET_ASSOC-1:0];
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram
// Tag RAM
sram #(
// tag + valid bit
.DATA_WIDTH ( ICACHE_TAG_WIDTH+1 ),
.NUM_WORDS ( ICACHE_NUM_WORDS )
// tag + valid bit
.DATA_WIDTH(ICACHE_TAG_WIDTH + 1),
.NUM_WORDS (ICACHE_NUM_WORDS)
) tag_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( vld_req[i] ),
.we_i ( vld_we ),
.addr_i ( vld_addr ),
// we can always use the saved tag here since it takes a
// couple of cycle until we write to the cache upon a miss
.wuser_i ( '0 ),
.wdata_i ( {vld_wdata[i], cl_tag_q} ),
.be_i ( '1 ),
.ruser_o ( ),
.rdata_o ( cl_tag_valid_rdata[i] )
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (vld_req[i]),
.we_i (vld_we),
.addr_i (vld_addr),
// we can always use the saved tag here since it takes a
// couple of cycle until we write to the cache upon a miss
.wuser_i('0),
.wdata_i({vld_wdata[i], cl_tag_q}),
.be_i ('1),
.ruser_o(),
.rdata_o(cl_tag_valid_rdata[i])
);
assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH-1:0];
@ -460,27 +469,27 @@ end else begin : gen_piton_offset
// Data RAM
sram #(
.USER_WIDTH ( ICACHE_USER_LINE_WIDTH ),
.DATA_WIDTH ( ICACHE_LINE_WIDTH ),
.USER_EN ( ariane_pkg::FETCH_USER_EN ),
.NUM_WORDS ( ICACHE_NUM_WORDS )
.USER_WIDTH(ICACHE_USER_LINE_WIDTH),
.DATA_WIDTH(ICACHE_LINE_WIDTH),
.USER_EN (ariane_pkg::FETCH_USER_EN),
.NUM_WORDS (ICACHE_NUM_WORDS)
) data_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( cl_req[i] ),
.we_i ( cl_we ),
.addr_i ( cl_index ),
.wuser_i ( mem_rtrn_i.user ),
.wdata_i ( mem_rtrn_i.data ),
.be_i ( '1 ),
.ruser_o ( cl_ruser[i] ),
.rdata_o ( cl_rdata[i] )
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (cl_req[i]),
.we_i (cl_we),
.addr_i (cl_index),
.wuser_i(mem_rtrn_i.user),
.wdata_i(mem_rtrn_i.data),
.be_i ('1),
.ruser_o(cl_ruser[i]),
.rdata_o(cl_rdata[i])
);
end
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(!rst_ni) begin
if (!rst_ni) begin
cl_tag_q <= '0;
flush_cnt_q <= '0;
vaddr_q <= '0;
@ -505,40 +514,46 @@ end else begin : gen_piton_offset
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
//pragma translate_off
`ifndef VERILATOR
repl_inval0: assert property (
repl_inval0 :
assert property (
@(posedge clk_i) disable iff (!rst_ni) cache_wren |-> !(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld))
else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
repl_inval1: assert property (
repl_inval1 :
assert property (
@(posedge clk_i) disable iff (!rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> !cache_wren)
else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
invalid_state: assert property (
invalid_state :
assert property (
@(posedge clk_i) disable iff (!rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS}))
else $fatal(1,"[l1 icache] fsm reached an invalid state");
else $fatal(1, "[l1 icache] fsm reached an invalid state");
hot1: assert property (
@(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0(cl_hit))
else $fatal(1,"[l1 icache] cl_hit signal must be hot1");
hot1 :
assert property (
@(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0(
cl_hit
))
else $fatal(1, "[l1 icache] cl_hit signal must be hot1");
// this is only used for verification!
logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
if(!rst_ni) begin
vld_mirror <= '{default:'0};
tag_mirror <= '{default:'0};
if (!rst_ni) begin
vld_mirror <= '{default: '0};
tag_mirror <= '{default: '0};
end else begin
for (int i = 0; i < ICACHE_SET_ASSOC; i++) begin
if(vld_req[i] & vld_we) begin
if (vld_req[i] & vld_we) begin
vld_mirror[vld_addr][i] <= vld_wdata[i];
tag_mirror[vld_addr][i] <= cl_tag_q;
end
@ -550,17 +565,18 @@ end else begin : gen_piton_offset
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == cl_tag_q) & vld_mirror[vld_addr][i] & (|vld_wdata);
end
tag_write_duplicate: assert property (
tag_write_duplicate :
assert property (
@(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test))
else $fatal(1,"[l1 icache] cannot allocate a CL that is already present in the cache");
else $fatal(1, "[l1 icache] cannot allocate a CL that is already present in the cache");
initial begin
// assert wrong parameterizations
assert (ICACHE_INDEX_WIDTH<=12)
else $fatal(1,"[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages");
end
initial begin
// assert wrong parameterizations
assert (ICACHE_INDEX_WIDTH <= 12)
else $fatal(1, "[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages");
end
`endif
//pragma translate_on
//pragma translate_on
endmodule // cva6_icache
endmodule // cva6_icache

View file

@ -13,56 +13,60 @@
// Description: wrapper module to connect the L1I$ to a 64bit AXI bus.
//
module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
module cva6_icache_axi_wrapper
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
input logic clk_i,
input logic rst_ni,
input riscv::priv_lvl_t priv_lvl_i,
input logic clk_i,
input logic rst_ni,
input riscv::priv_lvl_t priv_lvl_i,
input logic flush_i, // flush the icache, flush and kill have to be asserted together
input logic en_i, // enable icache
output logic miss_o, // to performance counter
// address translation requests
input icache_areq_t areq_i,
output icache_arsp_t areq_o,
// data requests
input icache_dreq_t dreq_i,
output icache_drsp_t dreq_o,
// AXI refill port
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
input logic flush_i, // flush the icache, flush and kill have to be asserted together
input logic en_i, // enable icache
output logic miss_o, // to performance counter
// address translation requests
input icache_areq_t areq_i,
output icache_arsp_t areq_o,
// data requests
input icache_dreq_t dreq_i,
output icache_drsp_t dreq_o,
// AXI refill port
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
);
localparam AxiNumWords = (ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH > DCACHE_LINE_WIDTH) +
(DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH <= DCACHE_LINE_WIDTH) ;
logic icache_mem_rtrn_vld;
icache_rtrn_t icache_mem_rtrn;
logic icache_mem_data_req;
logic icache_mem_data_ack;
icache_req_t icache_mem_data;
logic icache_mem_rtrn_vld;
icache_rtrn_t icache_mem_rtrn;
logic icache_mem_data_req;
logic icache_mem_data_ack;
icache_req_t icache_mem_data;
logic axi_rd_req;
logic axi_rd_gnt;
logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr;
logic [$clog2(AxiNumWords)-1:0] axi_rd_blen;
logic [2:0] axi_rd_size;
logic [CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_in;
logic axi_rd_rdy;
logic axi_rd_lock;
logic axi_rd_last;
logic axi_rd_valid;
logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data;
logic [CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_out;
logic axi_rd_exokay;
logic axi_rd_req;
logic axi_rd_gnt;
logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr;
logic [ $clog2(AxiNumWords)-1:0] axi_rd_blen;
logic [ 2:0] axi_rd_size;
logic [ CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_in;
logic axi_rd_rdy;
logic axi_rd_lock;
logic axi_rd_last;
logic axi_rd_valid;
logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data;
logic [ CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_out;
logic axi_rd_exokay;
logic req_valid_d, req_valid_q;
icache_req_t req_data_d, req_data_q;
logic first_d, first_q;
logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] rd_shift_d, rd_shift_q;
logic req_valid_d, req_valid_q;
icache_req_t req_data_d, req_data_q;
logic first_d, first_q;
logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
rd_shift_d, rd_shift_q;
// Keep read request asserted until we have an AXI grant. This is not guaranteed by icache (but
// required by AXI).
@ -76,8 +80,8 @@ module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
assign axi_rd_addr = CVA6Cfg.AxiAddrWidth'(req_data_d.paddr);
// Fetch a full cache line on a cache miss, or a single word on a bypassed access
assign axi_rd_blen = (req_data_d.nc) ? '0 : ariane_pkg::ICACHE_LINE_WIDTH/64-1;
assign axi_rd_size = $clog2(CVA6Cfg.AxiDataWidth/8); // Maximum
assign axi_rd_blen = (req_data_d.nc) ? '0 : ariane_pkg::ICACHE_LINE_WIDTH / 64 - 1;
assign axi_rd_size = $clog2(CVA6Cfg.AxiDataWidth / 8); // Maximum
assign axi_rd_id_in = req_data_d.tid;
assign axi_rd_rdy = 1'b1;
assign axi_rd_lock = 1'b0;
@ -96,68 +100,68 @@ module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
// I-Cache
// -------
cva6_icache #(
// use ID 0 for icache reads
.CVA6Cfg ( CVA6Cfg ),
.RdTxId ( 0 )
// use ID 0 for icache reads
.CVA6Cfg(CVA6Cfg),
.RdTxId (0)
) i_cva6_icache (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.en_i ( en_i ),
.miss_o ( miss_o ),
.areq_i ( areq_i ),
.areq_o ( areq_o ),
.dreq_i ( dreq_i ),
.dreq_o ( dreq_o ),
.mem_rtrn_vld_i ( icache_mem_rtrn_vld ),
.mem_rtrn_i ( icache_mem_rtrn ),
.mem_data_req_o ( icache_mem_data_req ),
.mem_data_ack_i ( icache_mem_data_ack ),
.mem_data_o ( icache_mem_data )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_i),
.en_i (en_i),
.miss_o (miss_o),
.areq_i (areq_i),
.areq_o (areq_o),
.dreq_i (dreq_i),
.dreq_o (dreq_o),
.mem_rtrn_vld_i(icache_mem_rtrn_vld),
.mem_rtrn_i (icache_mem_rtrn),
.mem_data_req_o(icache_mem_data_req),
.mem_data_ack_i(icache_mem_data_ack),
.mem_data_o (icache_mem_data)
);
// --------
// AXI shim
// --------
axi_shim #(
.CVA6Cfg ( CVA6Cfg ),
.AxiNumWords ( AxiNumWords ),
.axi_req_t ( axi_req_t ),
.axi_rsp_t ( axi_rsp_t )
axi_shim #(
.CVA6Cfg (CVA6Cfg),
.AxiNumWords(AxiNumWords),
.axi_req_t (axi_req_t),
.axi_rsp_t (axi_rsp_t)
) i_axi_shim (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.rd_req_i ( axi_rd_req ),
.rd_gnt_o ( axi_rd_gnt ),
.rd_addr_i ( axi_rd_addr ),
.rd_blen_i ( axi_rd_blen ),
.rd_size_i ( axi_rd_size ),
.rd_id_i ( axi_rd_id_in ),
.rd_rdy_i ( axi_rd_rdy ),
.rd_lock_i ( axi_rd_lock ),
.rd_last_o ( axi_rd_last ),
.rd_valid_o ( axi_rd_valid ),
.rd_data_o ( axi_rd_data ),
.rd_user_o ( ),
.rd_id_o ( axi_rd_id_out ),
.rd_exokay_o ( axi_rd_exokay ),
.wr_req_i ( '0 ),
.wr_gnt_o ( ),
.wr_addr_i ( '0 ),
.wr_data_i ( '0 ),
.wr_user_i ( '0 ),
.wr_be_i ( '0 ),
.wr_blen_i ( '0 ),
.wr_size_i ( '0 ),
.wr_id_i ( '0 ),
.wr_lock_i ( '0 ),
.wr_atop_i ( '0 ),
.wr_rdy_i ( '0 ),
.wr_valid_o ( ),
.wr_id_o ( ),
.wr_exokay_o ( ),
.axi_req_o ( axi_req_o ),
.axi_resp_i ( axi_resp_i )
.clk_i (clk_i),
.rst_ni (rst_ni),
.rd_req_i (axi_rd_req),
.rd_gnt_o (axi_rd_gnt),
.rd_addr_i (axi_rd_addr),
.rd_blen_i (axi_rd_blen),
.rd_size_i (axi_rd_size),
.rd_id_i (axi_rd_id_in),
.rd_rdy_i (axi_rd_rdy),
.rd_lock_i (axi_rd_lock),
.rd_last_o (axi_rd_last),
.rd_valid_o (axi_rd_valid),
.rd_data_o (axi_rd_data),
.rd_user_o (),
.rd_id_o (axi_rd_id_out),
.rd_exokay_o(axi_rd_exokay),
.wr_req_i ('0),
.wr_gnt_o (),
.wr_addr_i ('0),
.wr_data_i ('0),
.wr_user_i ('0),
.wr_be_i ('0),
.wr_blen_i ('0),
.wr_size_i ('0),
.wr_id_i ('0),
.wr_lock_i ('0),
.wr_atop_i ('0),
.wr_rdy_i ('0),
.wr_valid_o (),
.wr_id_o (),
.wr_exokay_o(),
.axi_req_o (axi_req_o),
.axi_resp_i (axi_resp_i)
);
// Buffer burst data in shift register
@ -166,7 +170,7 @@ module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
rd_shift_d = rd_shift_q;
if (axi_rd_valid) begin
first_d = axi_rd_last;
first_d = axi_rd_last;
if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
rd_shift_d = axi_rd_data;
end else begin
@ -195,4 +199,4 @@ module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
end
end
endmodule // cva6_icache_axi_wrapper
endmodule // cva6_icache_axi_wrapper

File diff suppressed because it is too large Load diff

View file

@ -15,277 +15,301 @@
// write-back data cache.
module std_cache_subsystem import ariane_pkg::*; import std_cache_pkg::*; #(
module std_cache_subsystem
import ariane_pkg::*;
import std_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4,
parameter type axi_ar_chan_t = logic,
parameter type axi_aw_chan_t = logic,
parameter type axi_w_chan_t = logic,
parameter type axi_w_chan_t = logic,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
input logic clk_i,
input logic rst_ni,
input riscv::priv_lvl_t priv_lvl_i,
input logic clk_i,
input logic rst_ni,
input riscv::priv_lvl_t priv_lvl_i,
// I$
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter
// address translation requests
input icache_areq_t icache_areq_i, // to/from frontend
output icache_arsp_t icache_areq_o,
input icache_areq_t icache_areq_i, // to/from frontend
output icache_arsp_t icache_areq_o,
// data requests
input icache_dreq_t icache_dreq_i, // to/from frontend
output icache_drsp_t icache_dreq_o,
input icache_dreq_t icache_dreq_i, // to/from frontend
output icache_drsp_t icache_dreq_o,
// AMOs
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// D$
// Cache management
input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged
input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st
output logic dcache_miss_o, // we missed on a ld/st
output logic wbuffer_empty_o, // statically set to 1, as there is no wbuffer in this cache system
// Request ports
input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU
output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU
input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU
output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU
// memory side
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i
);
assign wbuffer_empty_o = 1'b1;
axi_req_t axi_req_icache;
axi_rsp_t axi_resp_icache;
axi_req_t axi_req_bypass;
axi_rsp_t axi_resp_bypass;
axi_req_t axi_req_data;
axi_rsp_t axi_resp_data;
axi_req_t axi_req_icache;
axi_rsp_t axi_resp_icache;
axi_req_t axi_req_bypass;
axi_rsp_t axi_resp_bypass;
axi_req_t axi_req_data;
axi_rsp_t axi_resp_data;
cva6_icache_axi_wrapper #(
.CVA6Cfg ( CVA6Cfg ),
.axi_req_t ( axi_req_t ),
.axi_rsp_t ( axi_rsp_t )
) i_cva6_icache_axi_wrapper (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.priv_lvl_i ( priv_lvl_i ),
.flush_i ( icache_flush_i ),
.en_i ( icache_en_i ),
.miss_o ( icache_miss_o ),
.areq_i ( icache_areq_i ),
.areq_o ( icache_areq_o ),
.dreq_i ( icache_dreq_i ),
.dreq_o ( icache_dreq_o ),
.axi_req_o ( axi_req_icache ),
.axi_resp_i ( axi_resp_icache )
);
cva6_icache_axi_wrapper #(
.CVA6Cfg (CVA6Cfg),
.axi_req_t(axi_req_t),
.axi_rsp_t(axi_rsp_t)
) i_cva6_icache_axi_wrapper (
.clk_i (clk_i),
.rst_ni (rst_ni),
.priv_lvl_i(priv_lvl_i),
.flush_i (icache_flush_i),
.en_i (icache_en_i),
.miss_o (icache_miss_o),
.areq_i (icache_areq_i),
.areq_o (icache_areq_o),
.dreq_i (icache_dreq_i),
.dreq_o (icache_dreq_o),
.axi_req_o (axi_req_icache),
.axi_resp_i(axi_resp_icache)
);
// decreasing priority
// Port 0: PTW
// Port 1: Load Unit
// Port 2: Accelerator
// Port 3: Store Unit
std_nbdcache #(
.CVA6Cfg ( CVA6Cfg ),
.NumPorts ( NumPorts ),
.axi_req_t ( axi_req_t ),
.axi_rsp_t ( axi_rsp_t )
) i_nbdcache (
// decreasing priority
// Port 0: PTW
// Port 1: Load Unit
// Port 2: Accelerator
// Port 3: Store Unit
std_nbdcache #(
.CVA6Cfg (CVA6Cfg),
.NumPorts (NumPorts),
.axi_req_t(axi_req_t),
.axi_rsp_t(axi_rsp_t)
) i_nbdcache (
.clk_i,
.rst_ni,
.enable_i ( dcache_enable_i ),
.flush_i ( dcache_flush_i ),
.flush_ack_o ( dcache_flush_ack_o ),
.miss_o ( dcache_miss_o ),
.axi_bypass_o ( axi_req_bypass ),
.axi_bypass_i ( axi_resp_bypass ),
.axi_data_o ( axi_req_data ),
.axi_data_i ( axi_resp_data ),
.req_ports_i ( dcache_req_ports_i ),
.req_ports_o ( dcache_req_ports_o ),
.enable_i (dcache_enable_i),
.flush_i (dcache_flush_i),
.flush_ack_o (dcache_flush_ack_o),
.miss_o (dcache_miss_o),
.axi_bypass_o(axi_req_bypass),
.axi_bypass_i(axi_resp_bypass),
.axi_data_o (axi_req_data),
.axi_data_i (axi_resp_data),
.req_ports_i (dcache_req_ports_i),
.req_ports_o (dcache_req_ports_o),
.amo_req_i,
.amo_resp_o
);
);
// -----------------------
// Arbitrate AXI Ports
// -----------------------
logic [1:0] w_select, w_select_fifo, w_select_arbiter;
logic [1:0] w_fifo_usage;
logic w_fifo_empty, w_fifo_full;
// -----------------------
// Arbitrate AXI Ports
// -----------------------
logic [1:0] w_select, w_select_fifo, w_select_arbiter;
logic [1:0] w_fifo_usage;
logic w_fifo_empty, w_fifo_full;
// AR Channel
stream_arbiter #(
.DATA_T ( axi_ar_chan_t ),
.N_INP ( 3 )
) i_stream_arbiter_ar (
.clk_i,
.rst_ni,
.inp_data_i ( {axi_req_icache.ar, axi_req_bypass.ar, axi_req_data.ar} ),
.inp_valid_i ( {axi_req_icache.ar_valid, axi_req_bypass.ar_valid, axi_req_data.ar_valid} ),
.inp_ready_o ( {axi_resp_icache.ar_ready, axi_resp_bypass.ar_ready, axi_resp_data.ar_ready} ),
.oup_data_o ( axi_req_o.ar ),
.oup_valid_o ( axi_req_o.ar_valid ),
.oup_ready_i ( axi_resp_i.ar_ready )
);
// AR Channel
stream_arbiter #(
.DATA_T(axi_ar_chan_t),
.N_INP (3)
) i_stream_arbiter_ar (
.clk_i,
.rst_ni,
.inp_data_i ({axi_req_icache.ar, axi_req_bypass.ar, axi_req_data.ar}),
.inp_valid_i({axi_req_icache.ar_valid, axi_req_bypass.ar_valid, axi_req_data.ar_valid}),
.inp_ready_o({axi_resp_icache.ar_ready, axi_resp_bypass.ar_ready, axi_resp_data.ar_ready}),
.oup_data_o (axi_req_o.ar),
.oup_valid_o(axi_req_o.ar_valid),
.oup_ready_i(axi_resp_i.ar_ready)
);
// AW Channel
stream_arbiter #(
.DATA_T ( axi_aw_chan_t ),
.N_INP ( 3 )
) i_stream_arbiter_aw (
.clk_i,
.rst_ni,
.inp_data_i ( {axi_req_icache.aw, axi_req_bypass.aw, axi_req_data.aw} ),
.inp_valid_i ( {axi_req_icache.aw_valid, axi_req_bypass.aw_valid, axi_req_data.aw_valid} ),
.inp_ready_o ( {axi_resp_icache.aw_ready, axi_resp_bypass.aw_ready, axi_resp_data.aw_ready} ),
.oup_data_o ( axi_req_o.aw ),
.oup_valid_o ( axi_req_o.aw_valid ),
.oup_ready_i ( axi_resp_i.aw_ready )
);
// AW Channel
stream_arbiter #(
.DATA_T(axi_aw_chan_t),
.N_INP (3)
) i_stream_arbiter_aw (
.clk_i,
.rst_ni,
.inp_data_i ({axi_req_icache.aw, axi_req_bypass.aw, axi_req_data.aw}),
.inp_valid_i({axi_req_icache.aw_valid, axi_req_bypass.aw_valid, axi_req_data.aw_valid}),
.inp_ready_o({axi_resp_icache.aw_ready, axi_resp_bypass.aw_ready, axi_resp_data.aw_ready}),
.oup_data_o (axi_req_o.aw),
.oup_valid_o(axi_req_o.aw_valid),
.oup_ready_i(axi_resp_i.aw_ready)
);
// WID has been removed in AXI 4 so we need to keep track which AW request has been accepted
// to forward the correct write data.
always_comb begin
w_select = 0;
unique casez (axi_req_o.aw.id)
4'b0111: w_select = 2; // dcache
4'b1???: w_select = 1; // bypass
default: w_select = 0; // icache
endcase
end
// WID has been removed in AXI 4 so we need to keep track which AW request has been accepted
// to forward the correct write data.
always_comb begin
w_select = 0;
unique casez (axi_req_o.aw.id)
4'b0111: w_select = 2; // dcache
4'b1???: w_select = 1; // bypass
default: w_select = 0; // icache
endcase
end
// W Channel
fifo_v3 #(
.DATA_WIDTH ( 2 ),
// W Channel
fifo_v3 #(
.DATA_WIDTH (2),
// we can have a maximum of 4 oustanding transactions as each port is blocking
.DEPTH ( 4 ),
.FALL_THROUGH ( 1'b1 )
) i_fifo_w_channel (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( w_fifo_full ),
.empty_o ( ), // leave open
.usage_o ( w_fifo_usage ),
.data_i ( w_select ),
.DEPTH (4),
.FALL_THROUGH(1'b1)
) i_fifo_w_channel (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (w_fifo_full),
.empty_o (), // leave open
.usage_o (w_fifo_usage),
.data_i (w_select),
// a new transaction was requested and granted
.push_i ( axi_req_o.aw_valid & axi_resp_i.aw_ready ),
.push_i (axi_req_o.aw_valid & axi_resp_i.aw_ready),
// write ID to select the output MUX
.data_o ( w_select_fifo ),
.data_o (w_select_fifo),
// transaction has finished
.pop_i ( axi_req_o.w_valid & axi_resp_i.w_ready & axi_req_o.w.last )
);
.pop_i (axi_req_o.w_valid & axi_resp_i.w_ready & axi_req_o.w.last)
);
// In fall-through mode, the empty_o will be low when push_i is high (on zero usage).
// We do not want this here. Also, usage_o is missing the MSB, so on full fifo, usage_o is zero.
assign w_fifo_empty = w_fifo_usage == 0 && !w_fifo_full;
// In fall-through mode, the empty_o will be low when push_i is high (on zero usage).
// We do not want this here. Also, usage_o is missing the MSB, so on full fifo, usage_o is zero.
assign w_fifo_empty = w_fifo_usage == 0 && !w_fifo_full;
// icache will never write so select it as default (e.g.: when no arbitration is active)
// this is equal to setting it to zero
assign w_select_arbiter = w_fifo_empty ? (axi_req_o.aw_valid ? w_select : 0) : w_select_fifo;
// icache will never write so select it as default (e.g.: when no arbitration is active)
// this is equal to setting it to zero
assign w_select_arbiter = w_fifo_empty ? (axi_req_o.aw_valid ? w_select : 0) : w_select_fifo;
stream_mux #(
.DATA_T ( axi_w_chan_t ),
.N_INP ( 3 )
) i_stream_mux_w (
.inp_data_i ( {axi_req_data.w, axi_req_bypass.w, axi_req_icache.w} ),
.inp_valid_i ( {axi_req_data.w_valid, axi_req_bypass.w_valid, axi_req_icache.w_valid} ),
.inp_ready_o ( {axi_resp_data.w_ready, axi_resp_bypass.w_ready, axi_resp_icache.w_ready} ),
.inp_sel_i ( w_select_arbiter ),
.oup_data_o ( axi_req_o.w ),
.oup_valid_o ( axi_req_o.w_valid ),
.oup_ready_i ( axi_resp_i.w_ready )
);
stream_mux #(
.DATA_T(axi_w_chan_t),
.N_INP (3)
) i_stream_mux_w (
.inp_data_i ({axi_req_data.w, axi_req_bypass.w, axi_req_icache.w}),
.inp_valid_i({axi_req_data.w_valid, axi_req_bypass.w_valid, axi_req_icache.w_valid}),
.inp_ready_o({axi_resp_data.w_ready, axi_resp_bypass.w_ready, axi_resp_icache.w_ready}),
.inp_sel_i (w_select_arbiter),
.oup_data_o (axi_req_o.w),
.oup_valid_o(axi_req_o.w_valid),
.oup_ready_i(axi_resp_i.w_ready)
);
// Route responses based on ID
// 0000 -> I$
// 0111 -> D$
// 1??? -> Bypass
// R Channel
assign axi_resp_icache.r = axi_resp_i.r;
assign axi_resp_bypass.r = axi_resp_i.r;
assign axi_resp_data.r = axi_resp_i.r;
// Route responses based on ID
// 0000 -> I$
// 0111 -> D$
// 1??? -> Bypass
// R Channel
assign axi_resp_icache.r = axi_resp_i.r;
assign axi_resp_bypass.r = axi_resp_i.r;
assign axi_resp_data.r = axi_resp_i.r;
logic [1:0] r_select;
logic [1:0] r_select;
always_comb begin
r_select = 0;
unique casez (axi_resp_i.r.id)
4'b0111: r_select = 0; // dcache
4'b1???: r_select = 1; // bypass
4'b0000: r_select = 2; // icache
default: r_select = 0;
endcase
end
always_comb begin
r_select = 0;
unique casez (axi_resp_i.r.id)
4'b0111: r_select = 0; // dcache
4'b1???: r_select = 1; // bypass
4'b0000: r_select = 2; // icache
default: r_select = 0;
endcase
end
stream_demux #(
.N_OUP ( 3 )
) i_stream_demux_r (
.inp_valid_i ( axi_resp_i.r_valid ),
.inp_ready_o ( axi_req_o.r_ready ),
.oup_sel_i ( r_select ),
.oup_valid_o ( {axi_resp_icache.r_valid, axi_resp_bypass.r_valid, axi_resp_data.r_valid} ),
.oup_ready_i ( {axi_req_icache.r_ready, axi_req_bypass.r_ready, axi_req_data.r_ready} )
);
stream_demux #(
.N_OUP(3)
) i_stream_demux_r (
.inp_valid_i(axi_resp_i.r_valid),
.inp_ready_o(axi_req_o.r_ready),
.oup_sel_i (r_select),
.oup_valid_o({axi_resp_icache.r_valid, axi_resp_bypass.r_valid, axi_resp_data.r_valid}),
.oup_ready_i({axi_req_icache.r_ready, axi_req_bypass.r_ready, axi_req_data.r_ready})
);
// B Channel
logic [1:0] b_select;
// B Channel
logic [1:0] b_select;
assign axi_resp_icache.b = axi_resp_i.b;
assign axi_resp_bypass.b = axi_resp_i.b;
assign axi_resp_data.b = axi_resp_i.b;
assign axi_resp_icache.b = axi_resp_i.b;
assign axi_resp_bypass.b = axi_resp_i.b;
assign axi_resp_data.b = axi_resp_i.b;
always_comb begin
b_select = 0;
unique casez (axi_resp_i.b.id)
4'b0111: b_select = 0; // dcache
4'b1???: b_select = 1; // bypass
4'b0000: b_select = 2; // icache
default: b_select = 0;
endcase
end
always_comb begin
b_select = 0;
unique casez (axi_resp_i.b.id)
4'b0111: b_select = 0; // dcache
4'b1???: b_select = 1; // bypass
4'b0000: b_select = 2; // icache
default: b_select = 0;
endcase
end
stream_demux #(
.N_OUP ( 3 )
) i_stream_demux_b (
.inp_valid_i ( axi_resp_i.b_valid ),
.inp_ready_o ( axi_req_o.b_ready ),
.oup_sel_i ( b_select ),
.oup_valid_o ( {axi_resp_icache.b_valid, axi_resp_bypass.b_valid, axi_resp_data.b_valid} ),
.oup_ready_i ( {axi_req_icache.b_ready, axi_req_bypass.b_ready, axi_req_data.b_ready} )
);
stream_demux #(
.N_OUP(3)
) i_stream_demux_b (
.inp_valid_i(axi_resp_i.b_valid),
.inp_ready_o(axi_req_o.b_ready),
.oup_sel_i (b_select),
.oup_valid_o({axi_resp_icache.b_valid, axi_resp_bypass.b_valid, axi_resp_data.b_valid}),
.oup_ready_i({axi_req_icache.b_ready, axi_req_bypass.b_ready, axi_req_data.b_ready})
);
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
//pragma translate_off
`ifndef VERILATOR
a_invalid_instruction_fetch: assert property (
a_invalid_instruction_fetch :
assert property (
@(posedge clk_i) disable iff (~rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr, icache_dreq_o.data);
else
$warning(
1,
"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr,
icache_dreq_o.data
);
a_invalid_write_data: assert property (
a_invalid_write_data :
assert property (
@(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> |dcache_req_ports_i[NumPorts-1].data_be |-> (|dcache_req_ports_i[NumPorts-1].data_wdata) !== 1'hX)
else $warning(1,"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
{dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index}, dcache_req_ports_i[NumPorts-1].data_be, dcache_req_ports_i[NumPorts-1].data_wdata);
else
$warning(
1,
"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
{
dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index
},
dcache_req_ports_i[NumPorts-1].data_be,
dcache_req_ports_i[NumPorts-1].data_wdata
);
generate
for(genvar j=0; j<NumPorts-1; j++) begin
a_invalid_read_data: assert property (
for (genvar j = 0; j < NumPorts - 1; j++) begin
a_invalid_read_data :
assert property (
@(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_o[j].data_rvalid |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid data on port %01d: data=%016X",
j, dcache_req_ports_o[j].data_rdata);
end
else
$warning(
1,
"[l1 dcache] reading invalid data on port %01d: data=%016X",
j,
dcache_req_ports_o[j].data_rdata
);
end
endgenerate
`endif
//pragma translate_on
endmodule // std_cache_subsystem
//pragma translate_on
endmodule // std_cache_subsystem

View file

@ -13,263 +13,267 @@
// Description: Nonblocking private L1 dcache
module std_nbdcache import std_cache_pkg::*; import ariane_pkg::*; #(
module std_nbdcache
import std_cache_pkg::*;
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// Cache management
input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a LD/ST
input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a LD/ST
// AMOs
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// Request ports
input dcache_req_i_t [NumPorts-1:0] req_ports_i, // request ports
output dcache_req_o_t [NumPorts-1:0] req_ports_o, // request ports
input dcache_req_i_t [NumPorts-1:0] req_ports_i, // request ports
output dcache_req_o_t [NumPorts-1:0] req_ports_o, // request ports
// Cache AXI refill port
output axi_req_t axi_data_o,
input axi_rsp_t axi_data_i,
output axi_req_t axi_bypass_o,
input axi_rsp_t axi_bypass_i
output axi_req_t axi_data_o,
input axi_rsp_t axi_data_i,
output axi_req_t axi_bypass_o,
input axi_rsp_t axi_bypass_i
);
import std_cache_pkg::*;
import std_cache_pkg::*;
// -------------------------------
// Controller <-> Arbiter
// -------------------------------
// 1. Miss handler
// 2. PTW
// 3. Load Unit
// 4. Accelerator
// 5. Store unit
logic [NumPorts:0][DCACHE_SET_ASSOC-1:0] req;
logic [NumPorts:0][DCACHE_INDEX_WIDTH-1:0]addr;
logic [NumPorts:0] gnt;
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata;
logic [NumPorts:0][DCACHE_TAG_WIDTH-1:0] tag;
// -------------------------------
// Controller <-> Arbiter
// -------------------------------
// 1. Miss handler
// 2. PTW
// 3. Load Unit
// 4. Accelerator
// 5. Store unit
logic [ NumPorts:0][ DCACHE_SET_ASSOC-1:0] req;
logic [ NumPorts:0][DCACHE_INDEX_WIDTH-1:0] addr;
logic [ NumPorts:0] gnt;
cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata;
logic [ NumPorts:0][ DCACHE_TAG_WIDTH-1:0] tag;
cache_line_t [NumPorts:0] wdata;
logic [NumPorts:0] we;
cl_be_t [NumPorts:0] be;
logic [DCACHE_SET_ASSOC-1:0] hit_way;
// -------------------------------
// Controller <-> Miss unit
// -------------------------------
logic [NumPorts-1:0] busy;
logic [NumPorts-1:0][55:0] mshr_addr;
logic [NumPorts-1:0] mshr_addr_matches;
logic [NumPorts-1:0] mshr_index_matches;
logic [63:0] critical_word;
logic critical_word_valid;
cache_line_t [ NumPorts:0] wdata;
logic [ NumPorts:0] we;
cl_be_t [ NumPorts:0] be;
logic [ DCACHE_SET_ASSOC-1:0] hit_way;
// -------------------------------
// Controller <-> Miss unit
// -------------------------------
logic [ NumPorts-1:0] busy;
logic [ NumPorts-1:0][ 55:0] mshr_addr;
logic [ NumPorts-1:0] mshr_addr_matches;
logic [ NumPorts-1:0] mshr_index_matches;
logic [ 63:0] critical_word;
logic critical_word_valid;
logic [NumPorts-1:0][$bits(miss_req_t)-1:0] miss_req;
logic [NumPorts-1:0] miss_gnt;
logic [NumPorts-1:0] active_serving;
logic [ NumPorts-1:0][ $bits(miss_req_t)-1:0] miss_req;
logic [ NumPorts-1:0] miss_gnt;
logic [ NumPorts-1:0] active_serving;
logic [NumPorts-1:0] bypass_gnt;
logic [NumPorts-1:0] bypass_valid;
logic [NumPorts-1:0][63:0] bypass_data;
// -------------------------------
// Arbiter <-> Datram,
// -------------------------------
logic [DCACHE_SET_ASSOC-1:0] req_ram;
logic [DCACHE_INDEX_WIDTH-1:0] addr_ram;
logic we_ram;
cache_line_t wdata_ram;
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata_ram;
cl_be_t be_ram;
logic [ NumPorts-1:0] bypass_gnt;
logic [ NumPorts-1:0] bypass_valid;
logic [ NumPorts-1:0][ 63:0] bypass_data;
// -------------------------------
// Arbiter <-> Datram,
// -------------------------------
logic [ DCACHE_SET_ASSOC-1:0] req_ram;
logic [DCACHE_INDEX_WIDTH-1:0] addr_ram;
logic we_ram;
cache_line_t wdata_ram;
cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata_ram;
cl_be_t be_ram;
// ------------------
// Cache Controller
// ------------------
generate
for (genvar i = 0; i < NumPorts; i++) begin : master_ports
cache_ctrl #(
.CVA6Cfg ( CVA6Cfg )
) i_cache_ctrl (
.bypass_i ( ~enable_i ),
.busy_o ( busy [i] ),
// from core
.req_port_i ( req_ports_i [i] ),
.req_port_o ( req_ports_o [i] ),
// to SRAM array
.req_o ( req [i+1] ),
.addr_o ( addr [i+1] ),
.gnt_i ( gnt [i+1] ),
.data_i ( rdata ),
.tag_o ( tag [i+1] ),
.data_o ( wdata [i+1] ),
.we_o ( we [i+1] ),
.be_o ( be [i+1] ),
.hit_way_i ( hit_way ),
// ------------------
// Cache Controller
// ------------------
generate
for (genvar i = 0; i < NumPorts; i++) begin : master_ports
cache_ctrl #(
.CVA6Cfg(CVA6Cfg)
) i_cache_ctrl (
.bypass_i (~enable_i),
.busy_o (busy[i]),
// from core
.req_port_i(req_ports_i[i]),
.req_port_o(req_ports_o[i]),
// to SRAM array
.req_o (req[i+1]),
.addr_o (addr[i+1]),
.gnt_i (gnt[i+1]),
.data_i (rdata),
.tag_o (tag[i+1]),
.data_o (wdata[i+1]),
.we_o (we[i+1]),
.be_o (be[i+1]),
.hit_way_i (hit_way),
.miss_req_o ( miss_req [i] ),
.miss_gnt_i ( miss_gnt [i] ),
.active_serving_i ( active_serving [i] ),
.critical_word_i ( critical_word ),
.critical_word_valid_i ( critical_word_valid ),
.bypass_gnt_i ( bypass_gnt [i] ),
.bypass_valid_i ( bypass_valid [i] ),
.bypass_data_i ( bypass_data [i] ),
.miss_req_o (miss_req[i]),
.miss_gnt_i (miss_gnt[i]),
.active_serving_i (active_serving[i]),
.critical_word_i (critical_word),
.critical_word_valid_i(critical_word_valid),
.bypass_gnt_i (bypass_gnt[i]),
.bypass_valid_i (bypass_valid[i]),
.bypass_data_i (bypass_data[i]),
.mshr_addr_o ( mshr_addr [i] ),
.mshr_addr_matches_i ( mshr_addr_matches [i] ),
.mshr_index_matches_i ( mshr_index_matches[i] ),
.*
);
end
endgenerate
.mshr_addr_o (mshr_addr[i]),
.mshr_addr_matches_i (mshr_addr_matches[i]),
.mshr_index_matches_i(mshr_index_matches[i]),
.*
);
end
endgenerate
// ------------------
// Miss Handling Unit
// ------------------
miss_handler #(
.CVA6Cfg ( CVA6Cfg ),
.NR_PORTS ( NumPorts ),
.axi_req_t ( axi_req_t ),
.axi_rsp_t ( axi_rsp_t )
) i_miss_handler (
.flush_i ( flush_i ),
.busy_i ( |busy ),
// AMOs
.amo_req_i ( amo_req_i ),
.amo_resp_o ( amo_resp_o ),
.miss_req_i ( miss_req ),
.miss_gnt_o ( miss_gnt ),
.bypass_gnt_o ( bypass_gnt ),
.bypass_valid_o ( bypass_valid ),
.bypass_data_o ( bypass_data ),
.critical_word_o ( critical_word ),
.critical_word_valid_o ( critical_word_valid ),
.mshr_addr_i ( mshr_addr ),
.mshr_addr_matches_o ( mshr_addr_matches ),
.mshr_index_matches_o ( mshr_index_matches ),
.active_serving_o ( active_serving ),
.req_o ( req [0] ),
.addr_o ( addr [0] ),
.data_i ( rdata ),
.be_o ( be [0] ),
.data_o ( wdata [0] ),
.we_o ( we [0] ),
.axi_bypass_o,
.axi_bypass_i,
.axi_data_o,
.axi_data_i,
// ------------------
// Miss Handling Unit
// ------------------
miss_handler #(
.CVA6Cfg (CVA6Cfg),
.NR_PORTS (NumPorts),
.axi_req_t(axi_req_t),
.axi_rsp_t(axi_rsp_t)
) i_miss_handler (
.flush_i (flush_i),
.busy_i (|busy),
// AMOs
.amo_req_i (amo_req_i),
.amo_resp_o (amo_resp_o),
.miss_req_i (miss_req),
.miss_gnt_o (miss_gnt),
.bypass_gnt_o (bypass_gnt),
.bypass_valid_o (bypass_valid),
.bypass_data_o (bypass_data),
.critical_word_o (critical_word),
.critical_word_valid_o(critical_word_valid),
.mshr_addr_i (mshr_addr),
.mshr_addr_matches_o (mshr_addr_matches),
.mshr_index_matches_o (mshr_index_matches),
.active_serving_o (active_serving),
.req_o (req[0]),
.addr_o (addr[0]),
.data_i (rdata),
.be_o (be[0]),
.data_o (wdata[0]),
.we_o (we[0]),
.axi_bypass_o,
.axi_bypass_i,
.axi_data_o,
.axi_data_i,
.*
);
assign tag[0] = '0;
// --------------
// Memory Arrays
// --------------
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block
sram #(
.DATA_WIDTH(DCACHE_LINE_WIDTH),
.NUM_WORDS (DCACHE_NUM_WORDS)
) data_sram (
.req_i (req_ram[i]),
.rst_ni (rst_ni),
.we_i (we_ram),
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
.wuser_i('0),
.wdata_i(wdata_ram.data),
.be_i (be_ram.data),
.ruser_o(),
.rdata_o(rdata_ram[i].data),
.*
);
assign tag[0] = '0;
// --------------
// Memory Arrays
// --------------
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block
sram #(
.DATA_WIDTH ( DCACHE_LINE_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) data_sram (
.req_i ( req_ram [i] ),
.rst_ni ( rst_ni ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wuser_i ( '0 ),
.wdata_i ( wdata_ram.data ),
.be_i ( be_ram.data ),
.ruser_o ( ),
.rdata_o ( rdata_ram[i].data ),
.*
);
sram #(
.DATA_WIDTH ( DCACHE_TAG_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) tag_sram (
.req_i ( req_ram [i] ),
.rst_ni ( rst_ni ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wuser_i ( '0 ),
.wdata_i ( wdata_ram.tag ),
.be_i ( be_ram.tag ),
.ruser_o ( ),
.rdata_o ( rdata_ram[i].tag ),
.*
);
end
// ----------------
// Valid/Dirty Regs
// ----------------
// align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals.
// note: if you have an SRAM that supports flat bit enables for your target technology,
// you can use it here to save the extra 4x overhead introduced by this workaround.
logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign dirty_wdata[8*i] = wdata_ram.dirty;
assign dirty_wdata[8*i+1] = wdata_ram.valid;
assign rdata_ram[i].dirty = dirty_rdata[8*i];
assign rdata_ram[i].valid = dirty_rdata[8*i+1];
end
sram #(
.USER_WIDTH ( 1 ),
.DATA_WIDTH ( 4*DCACHE_DIRTY_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) valid_dirty_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( |req_ram ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wuser_i ( '0 ),
.wdata_i ( dirty_wdata ),
.be_i ( be_ram.vldrty ),
.ruser_o ( ),
.rdata_o ( dirty_rdata )
);
// ------------------------------------------------
// Tag Comparison and memory arbitration
// ------------------------------------------------
tag_cmp #(
.CVA6Cfg ( CVA6Cfg ),
.NR_PORTS ( NumPorts+1 ),
.ADDR_WIDTH ( DCACHE_INDEX_WIDTH ),
.DCACHE_SET_ASSOC ( DCACHE_SET_ASSOC )
) i_tag_cmp (
.req_i ( req ),
.gnt_o ( gnt ),
.addr_i ( addr ),
.wdata_i ( wdata ),
.we_i ( we ),
.be_i ( be ),
.rdata_o ( rdata ),
.tag_i ( tag ),
.hit_way_o ( hit_way ),
.req_o ( req_ram ),
.addr_o ( addr_ram ),
.wdata_o ( wdata_ram ),
.we_o ( we_ram ),
.be_o ( be_ram ),
.rdata_i ( rdata_ram ),
.DATA_WIDTH(DCACHE_TAG_WIDTH),
.NUM_WORDS (DCACHE_NUM_WORDS)
) tag_sram (
.req_i (req_ram[i]),
.rst_ni (rst_ni),
.we_i (we_ram),
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
.wuser_i('0),
.wdata_i(wdata_ram.tag),
.be_i (be_ram.tag),
.ruser_o(),
.rdata_o(rdata_ram[i].tag),
.*
);
end
//pragma translate_off
initial begin
assert (DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of AxiDataWidth");
end
//pragma translate_on
// ----------------
// Valid/Dirty Regs
// ----------------
// align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals.
// note: if you have an SRAM that supports flat bit enables for your target technology,
// you can use it here to save the extra 4x overhead introduced by this workaround.
logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign dirty_wdata[8*i] = wdata_ram.dirty;
assign dirty_wdata[8*i+1] = wdata_ram.valid;
assign rdata_ram[i].dirty = dirty_rdata[8*i];
assign rdata_ram[i].valid = dirty_rdata[8*i+1];
end
sram #(
.USER_WIDTH(1),
.DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH),
.NUM_WORDS (DCACHE_NUM_WORDS)
) valid_dirty_sram (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (|req_ram),
.we_i (we_ram),
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
.wuser_i('0),
.wdata_i(dirty_wdata),
.be_i (be_ram.vldrty),
.ruser_o(),
.rdata_o(dirty_rdata)
);
// ------------------------------------------------
// Tag Comparison and memory arbitration
// ------------------------------------------------
tag_cmp #(
.CVA6Cfg (CVA6Cfg),
.NR_PORTS (NumPorts + 1),
.ADDR_WIDTH (DCACHE_INDEX_WIDTH),
.DCACHE_SET_ASSOC(DCACHE_SET_ASSOC)
) i_tag_cmp (
.req_i (req),
.gnt_o (gnt),
.addr_i (addr),
.wdata_i (wdata),
.we_i (we),
.be_i (be),
.rdata_o (rdata),
.tag_i (tag),
.hit_way_o(hit_way),
.req_o (req_ram),
.addr_o (addr_ram),
.wdata_o(wdata_ram),
.we_o (we_ram),
.be_o (be_ram),
.rdata_i(rdata_ram),
.*
);
//pragma translate_off
initial begin
assert (DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth inside {2, 4, 8, 16})
else $fatal(1, "Cache line size needs to be a power of two multiple of AxiDataWidth");
end
//pragma translate_on
endmodule

View file

@ -16,92 +16,91 @@
// checks for hit or miss on cache
//
module tag_cmp #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NR_PORTS = 3,
parameter int unsigned ADDR_WIDTH = 64,
parameter type l_data_t = std_cache_pkg::cache_line_t,
parameter type l_be_t = std_cache_pkg::cl_be_t,
parameter int unsigned DCACHE_SET_ASSOC = 8
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NR_PORTS = 3,
parameter int unsigned ADDR_WIDTH = 64,
parameter type l_data_t = std_cache_pkg::cache_line_t,
parameter type l_be_t = std_cache_pkg::cl_be_t,
parameter int unsigned DCACHE_SET_ASSOC = 8
) (
input logic clk_i,
input logic rst_ni,
input logic clk_i,
input logic rst_ni,
input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i,
output logic [NR_PORTS-1:0] gnt_o,
input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i,
input l_data_t [NR_PORTS-1:0] wdata_i,
input logic [NR_PORTS-1:0] we_i,
input l_be_t [NR_PORTS-1:0] be_i,
output l_data_t [DCACHE_SET_ASSOC-1:0] rdata_o,
input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i,
output logic [NR_PORTS-1:0] gnt_o,
input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i,
input l_data_t [NR_PORTS-1:0] wdata_i,
input logic [NR_PORTS-1:0] we_i,
input l_be_t [NR_PORTS-1:0] be_i,
output l_data_t [DCACHE_SET_ASSOC-1:0] rdata_o,
input logic [NR_PORTS-1:0][ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later
output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way
output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way
output logic [DCACHE_SET_ASSOC-1:0] req_o,
output logic [ADDR_WIDTH-1:0] addr_o,
output l_data_t wdata_o,
output logic we_o,
output l_be_t be_o,
input l_data_t [DCACHE_SET_ASSOC-1:0] rdata_i
output logic [DCACHE_SET_ASSOC-1:0] req_o,
output logic [ ADDR_WIDTH-1:0] addr_o,
output l_data_t wdata_o,
output logic we_o,
output l_be_t be_o,
input l_data_t [DCACHE_SET_ASSOC-1:0] rdata_i
);
assign rdata_o = rdata_i;
// one hot encoded
logic [NR_PORTS-1:0] id_d, id_q;
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] sel_tag;
assign rdata_o = rdata_i;
// one hot encoded
logic [NR_PORTS-1:0] id_d, id_q;
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] sel_tag;
always_comb begin : tag_sel
sel_tag = '0;
for (int unsigned i = 0; i < NR_PORTS; i++)
if (id_q[i])
sel_tag = tag_i[i];
always_comb begin : tag_sel
sel_tag = '0;
for (int unsigned i = 0; i < NR_PORTS; i++) if (id_q[i]) sel_tag = tag_i[i];
end
for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
end
always_comb begin
gnt_o = '0;
id_d = '0;
wdata_o = '0;
req_o = '0;
addr_o = '0;
be_o = '0;
we_o = '0;
// Request Side
// priority select
for (int unsigned i = 0; i < NR_PORTS; i++) begin
req_o = req_i[i];
id_d = (1'b1 << i);
gnt_o[i] = 1'b1;
addr_o = addr_i[i];
be_o = be_i[i];
we_o = we_i[i];
wdata_o = wdata_i[i];
if (req_i[i]) break;
end
for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
`ifndef SYNTHESIS
`ifndef VERILATOR
// assert that cache only hits on one way
// this only needs to be checked one cycle after all ways have been requested
onehot :
assert property (@(posedge clk_i) disable iff (!rst_ni) &req_i |=> $onehot0(hit_way_o))
else begin
$fatal(1, "Hit should be one-hot encoded");
end
`endif
`endif
end
always_comb begin
gnt_o = '0;
id_d = '0;
wdata_o = '0;
req_o = '0;
addr_o = '0;
be_o = '0;
we_o = '0;
// Request Side
// priority select
for (int unsigned i = 0; i < NR_PORTS; i++) begin
req_o = req_i[i];
id_d = (1'b1 << i);
gnt_o[i] = 1'b1;
addr_o = addr_i[i];
be_o = be_i[i];
we_o = we_i[i];
wdata_o = wdata_i[i];
if (req_i[i])
break;
end
`ifndef SYNTHESIS
`ifndef VERILATOR
// assert that cache only hits on one way
// this only needs to be checked one cycle after all ways have been requested
onehot: assert property (
@(posedge clk_i) disable iff (!rst_ni) &req_i |=> $onehot0(hit_way_o))
else begin $fatal(1,"Hit should be one-hot encoded"); end
`endif
`endif
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
id_q <= 0;
end else begin
id_q <= id_d;
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
id_q <= 0;
end else begin
id_q <= id_d;
end
end
endmodule

View file

@ -14,40 +14,43 @@
//
module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned ReqFifoDepth = 2,
parameter int unsigned MetaFifoDepth = wt_cache_pkg::DCACHE_MAX_TX,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
module wt_axi_adapter
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned ReqFifoDepth = 2,
parameter int unsigned MetaFifoDepth = wt_cache_pkg::DCACHE_MAX_TX,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) (
input logic clk_i,
input logic rst_ni,
input logic clk_i,
input logic rst_ni,
// icache
input logic icache_data_req_i,
output logic icache_data_ack_o,
input icache_req_t icache_data_i,
// returning packets must be consumed immediately
output logic icache_rtrn_vld_o,
output icache_rtrn_t icache_rtrn_o,
// icache
input logic icache_data_req_i,
output logic icache_data_ack_o,
input icache_req_t icache_data_i,
// returning packets must be consumed immediately
output logic icache_rtrn_vld_o,
output icache_rtrn_t icache_rtrn_o,
// dcache
input logic dcache_data_req_i,
output logic dcache_data_ack_o,
input dcache_req_t dcache_data_i,
// returning packets must be consumed immediately
output logic dcache_rtrn_vld_o,
output dcache_rtrn_t dcache_rtrn_o,
// dcache
input logic dcache_data_req_i,
output logic dcache_data_ack_o,
input dcache_req_t dcache_data_i,
// returning packets must be consumed immediately
output logic dcache_rtrn_vld_o,
output dcache_rtrn_t dcache_rtrn_o,
// AXI port
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i,
// AXI port
output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i,
// Invalidations
input logic [63:0] inval_addr_i,
input logic inval_valid_i,
output logic inval_ready_o
// Invalidations
input logic [63:0] inval_addr_i,
input logic inval_valid_i,
output logic inval_ready_o
);
// support up to 512bit cache lines
@ -65,7 +68,7 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
logic dcache_data_full, dcache_data_empty;
logic [1:0] arb_req, arb_ack;
logic arb_idx, arb_gnt;
logic arb_idx, arb_gnt;
logic axi_rd_req, axi_rd_gnt;
logic axi_wr_req, axi_wr_gnt;
@ -74,12 +77,13 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr, axi_wr_addr;
logic [$clog2(AxiNumWords)-1:0] axi_rd_blen, axi_wr_blen;
logic [2:0] axi_rd_size, axi_wr_size;
logic [CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_in, axi_wr_id_in, axi_rd_id_out, axi_wr_id_out, wr_id_out;
logic [CVA6Cfg.AxiIdWidth-1:0]
axi_rd_id_in, axi_wr_id_in, axi_rd_id_out, axi_wr_id_out, wr_id_out;
logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] axi_wr_data;
logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] axi_wr_user;
logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data;
logic [CVA6Cfg.AxiUserWidth-1:0] axi_rd_user;
logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] axi_wr_be;
logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] axi_wr_be;
logic [5:0] axi_wr_atop;
logic invalidate;
logic [$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] amo_off_d, amo_off_q;
@ -94,35 +98,33 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
logic dcache_rd_full, dcache_rd_empty;
logic dcache_wr_full, dcache_wr_empty;
assign icache_data_ack_o = icache_data_req_i & ~icache_data_full;
assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full;
assign icache_data_ack_o = icache_data_req_i & ~icache_data_full;
assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full;
// arbiter
assign arb_req = {~(dcache_data_empty |
dcache_wr_full |
dcache_rd_full),
~(icache_data_empty |
icache_rd_full)};
assign arb_req = {
~(dcache_data_empty | dcache_wr_full | dcache_rd_full), ~(icache_data_empty | icache_rd_full)
};
assign arb_gnt = axi_rd_gnt | axi_wr_gnt;
assign arb_gnt = axi_rd_gnt | axi_wr_gnt;
rr_arb_tree #(
.NumIn (2),
.DataWidth (1),
.AxiVldRdy (1'b1),
.LockIn (1'b1)
.NumIn (2),
.DataWidth(1),
.AxiVldRdy(1'b1),
.LockIn (1'b1)
) i_rr_arb_tree (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.flush_i('0 ),
.rr_i ('0 ),
.req_i (arb_req ),
.gnt_o (arb_ack ),
.data_i ('0 ),
.gnt_i (arb_gnt ),
.req_o ( ),
.data_o ( ),
.idx_o (arb_idx )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i('0),
.rr_i ('0),
.req_i (arb_req),
.gnt_o (arb_ack),
.data_i ('0),
.gnt_i (arb_gnt),
.req_o (),
.data_o (),
.idx_o (arb_idx)
);
// request side
@ -149,9 +151,9 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
axi_rd_blen = '0;
if (dcache_data.paddr[2] == 1'b0) begin
axi_wr_user = {{64-CVA6Cfg.AxiUserWidth{1'b0}}, dcache_data.user};
axi_wr_user = {{64 - CVA6Cfg.AxiUserWidth{1'b0}}, dcache_data.user};
end else begin
axi_wr_user = {dcache_data.user, {64-CVA6Cfg.AxiUserWidth{1'b0}}};
axi_wr_user = {dcache_data.user, {64 - CVA6Cfg.AxiUserWidth{1'b0}}};
end
// arbiter mux
@ -159,45 +161,50 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
// Cast to AXI address width
axi_rd_addr = dcache_data.paddr;
// If dcache_data.size MSB is set, we want to read as much as possible
axi_rd_size = dcache_data.size[2] ? $clog2(CVA6Cfg.AxiDataWidth/8) : dcache_data.size;
axi_rd_size = dcache_data.size[2] ? $clog2(CVA6Cfg.AxiDataWidth / 8) : dcache_data.size;
if (dcache_data.size[2]) begin
axi_rd_blen = ariane_pkg::DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1;
axi_rd_blen = ariane_pkg::DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1;
end
end else begin
// Cast to AXI address width
axi_rd_addr = icache_data.paddr;
axi_rd_size = $clog2(CVA6Cfg.AxiDataWidth/8); // always request max number of words in case of ifill
axi_rd_size =
$clog2(CVA6Cfg.AxiDataWidth / 8); // always request max number of words in case of ifill
if (!icache_data.nc) begin
axi_rd_blen = ariane_pkg::ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1;
axi_rd_blen = ariane_pkg::ICACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1;
end
end
// signal that an invalidation message
// needs to be generated
invalidate = 1'b0;
invalidate = 1'b0;
// decode message type
if (|arb_req) begin
if (arb_idx == 0) begin
//////////////////////////////////////
// IMISS
axi_rd_req = 1'b1;
axi_rd_req = 1'b1;
//////////////////////////////////////
end else begin
unique case (dcache_data.rtype)
//////////////////////////////////////
wt_cache_pkg::DCACHE_LOAD_REQ: begin
axi_rd_req = 1'b1;
axi_rd_req = 1'b1;
end
//////////////////////////////////////
wt_cache_pkg::DCACHE_STORE_REQ: begin
axi_wr_req = 1'b1;
axi_wr_be = '0;
unique case(dcache_data.size[1:0])
2'b00: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte
2'b01: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] +:2 ] = '1; // hword
2'b10: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] +:4 ] = '1; // word
default: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] +:8 ] = '1; // dword
axi_wr_req = 1'b1;
axi_wr_be = '0;
unique case (dcache_data.size[1:0])
2'b00:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte
2'b01:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] = '1; // hword
2'b10:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] = '1; // word
default:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:8] = '1; // dword
endcase
end
//////////////////////////////////////
@ -207,16 +214,20 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
// since we only keep one read tx in flight, and since
// the dcache drains all writes/reads before executing
// an atomic, this is safe.
invalidate = arb_gnt;
axi_wr_req = 1'b1;
axi_wr_be = '0;
unique case(dcache_data.size[1:0])
2'b00: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte
2'b01: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] +:2 ] = '1; // hword
2'b10: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] +:4 ] = '1; // word
default: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] +:8 ] = '1; // dword
invalidate = arb_gnt;
axi_wr_req = 1'b1;
axi_wr_be = '0;
unique case (dcache_data.size[1:0])
2'b00:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte
2'b01:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] = '1; // hword
2'b10:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] = '1; // word
default:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:8] = '1; // dword
endcase
amo_gen_r_d = 1'b1;
amo_gen_r_d = 1'b1;
// need to use a separate ID here, so concat an additional bit
axi_wr_id_in[1] = 1'b1;
@ -226,206 +237,235 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
axi_rd_req = 1'b1;
axi_rd_id_in[1] = 1'b1;
// tie to zero in this special case
axi_wr_req = 1'b0;
axi_wr_be = '0;
axi_wr_req = 1'b0;
axi_wr_be = '0;
end
AMO_SC: begin
axi_wr_lock = 1'b1;
amo_gen_r_d = 1'b0;
axi_wr_lock = 1'b1;
amo_gen_r_d = 1'b0;
// needed to properly encode success. store the result at offset within the returned
// AXI data word aligned with the requested word size.
amo_off_d = dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] & ~((1 << dcache_data.size[1:0]) - 1);
amo_off_d = dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-
1:0] & ~((1 << dcache_data.size[1:0]) - 1);
end
// RISC-V atops have a load semantic
AMO_SWAP: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ATOMICSWAP};
AMO_ADD: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD};
AMO_AND: begin
AMO_SWAP:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ATOMICSWAP
};
AMO_ADD:
axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD};
AMO_AND: begin
// in this case we need to invert the data to get a "CLR"
axi_wr_data = ~{(CVA6Cfg.AxiDataWidth/riscv::XLEN){dcache_data.data}};
axi_wr_user = ~{(CVA6Cfg.AxiDataWidth/riscv::XLEN){dcache_data.user}};
axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR};
axi_wr_data = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.data}};
axi_wr_user = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.user}};
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR
};
end
AMO_OR: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET};
AMO_XOR: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR};
AMO_MAX: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX};
AMO_MAXU: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX};
AMO_MIN: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN};
AMO_MINU: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN};
default: ; // Do nothing
AMO_OR:
axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET};
AMO_XOR:
axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR};
AMO_MAX:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX
};
AMO_MAXU:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX
};
AMO_MIN:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN
};
AMO_MINU:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN
};
default: ; // Do nothing
endcase
end
default: ; // Do nothing
//////////////////////////////////////
default: ; // Do nothing
//////////////////////////////////////
endcase
end
end
end
fifo_v3 #(
.dtype ( icache_req_t ),
.DEPTH ( ReqFifoDepth )
.dtype(icache_req_t),
.DEPTH(ReqFifoDepth)
) i_icache_data_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( icache_data_full ),
.empty_o ( icache_data_empty ),
.usage_o ( ),
.data_i ( icache_data_i ),
.push_i ( icache_data_ack_o ),
.data_o ( icache_data ),
.pop_i ( arb_ack[0] )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (icache_data_full),
.empty_o (icache_data_empty),
.usage_o (),
.data_i (icache_data_i),
.push_i (icache_data_ack_o),
.data_o (icache_data),
.pop_i (arb_ack[0])
);
fifo_v3 #(
.dtype ( dcache_req_t ),
.DEPTH ( ReqFifoDepth )
.dtype(dcache_req_t),
.DEPTH(ReqFifoDepth)
) i_dcache_data_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( dcache_data_full ),
.empty_o ( dcache_data_empty ),
.usage_o ( ),
.data_i ( dcache_data_i ),
.push_i ( dcache_data_ack_o ),
.data_o ( dcache_data ),
.pop_i ( arb_ack[1] )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (dcache_data_full),
.empty_o (dcache_data_empty),
.usage_o (),
.data_i (dcache_data_i),
.push_i (dcache_data_ack_o),
.data_o (dcache_data),
.pop_i (arb_ack[1])
);
///////////////////////////////////////////////////////
// meta info feedback fifos
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// meta info feedback fifos
///////////////////////////////////////////////////////
logic icache_rtrn_rd_en, dcache_rtrn_rd_en;
logic icache_rtrn_vld_d, icache_rtrn_vld_q, dcache_rtrn_vld_d, dcache_rtrn_vld_q;
fifo_v3 #(
.DATA_WIDTH ( wt_cache_pkg::CACHE_ID_WIDTH ),
.DEPTH ( MetaFifoDepth )
.DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
.DEPTH (MetaFifoDepth)
) i_rd_icache_id (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( icache_rd_full ),
.empty_o ( icache_rd_empty ),
.usage_o ( ),
.data_i ( icache_data.tid ),
.push_i ( arb_ack[0] & axi_rd_gnt ),
.data_o ( icache_rtrn_tid_d ),
.pop_i ( icache_rtrn_vld_d )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (icache_rd_full),
.empty_o (icache_rd_empty),
.usage_o (),
.data_i (icache_data.tid),
.push_i (arb_ack[0] & axi_rd_gnt),
.data_o (icache_rtrn_tid_d),
.pop_i (icache_rtrn_vld_d)
);
fifo_v3 #(
.DATA_WIDTH ( wt_cache_pkg::CACHE_ID_WIDTH ),
.DEPTH ( MetaFifoDepth )
.DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
.DEPTH (MetaFifoDepth)
) i_rd_dcache_id (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( dcache_rd_full ),
.empty_o ( dcache_rd_empty ),
.usage_o ( ),
.data_i ( dcache_data.tid ),
.push_i ( arb_ack[1] & axi_rd_gnt ),
.data_o ( dcache_rtrn_rd_tid ),
.pop_i ( dcache_rd_pop )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (dcache_rd_full),
.empty_o (dcache_rd_empty),
.usage_o (),
.data_i (dcache_data.tid),
.push_i (arb_ack[1] & axi_rd_gnt),
.data_o (dcache_rtrn_rd_tid),
.pop_i (dcache_rd_pop)
);
fifo_v3 #(
.DATA_WIDTH ( wt_cache_pkg::CACHE_ID_WIDTH ),
.DEPTH ( MetaFifoDepth )
.DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
.DEPTH (MetaFifoDepth)
) i_wr_dcache_id (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( dcache_wr_full ),
.empty_o ( dcache_wr_empty ),
.usage_o ( ),
.data_i ( dcache_data.tid ),
.push_i ( arb_ack[1] & axi_wr_gnt ),
.data_o ( dcache_rtrn_wr_tid ),
.pop_i ( dcache_wr_pop )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (dcache_wr_full),
.empty_o (dcache_wr_empty),
.usage_o (),
.data_i (dcache_data.tid),
.push_i (arb_ack[1] & axi_wr_gnt),
.data_o (dcache_rtrn_wr_tid),
.pop_i (dcache_wr_pop)
);
// select correct tid to return
assign dcache_rtrn_tid_d = (dcache_wr_pop) ? dcache_rtrn_wr_tid : dcache_rtrn_rd_tid;
///////////////////////////////////////////////////////
// return path
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// return path
///////////////////////////////////////////////////////
// buffer write responses
logic b_full, b_empty, b_push, b_pop;
assign axi_wr_rdy = ~b_full;
assign b_push = axi_wr_valid & axi_wr_rdy;
assign axi_wr_rdy = ~b_full;
assign b_push = axi_wr_valid & axi_wr_rdy;
fifo_v3 #(
.DATA_WIDTH ( CVA6Cfg.AxiIdWidth + 1 ),
.DEPTH ( MetaFifoDepth ),
.FALL_THROUGH ( 1'b1 )
.DATA_WIDTH (CVA6Cfg.AxiIdWidth + 1),
.DEPTH (MetaFifoDepth),
.FALL_THROUGH(1'b1)
) i_b_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( b_full ),
.empty_o ( b_empty ),
.usage_o ( ),
.data_i ( {axi_wr_exokay, axi_wr_id_out} ),
.push_i ( b_push ),
.data_o ( {wr_exokay, wr_id_out} ),
.pop_i ( b_pop )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (b_full),
.empty_o (b_empty),
.usage_o (),
.data_i ({axi_wr_exokay, axi_wr_id_out}),
.push_i (b_push),
.data_o ({wr_exokay, wr_id_out}),
.pop_i (b_pop)
);
// buffer read responses in shift regs
logic icache_first_d, icache_first_q, dcache_first_d, dcache_first_q;
logic [ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0] icache_rd_shift_user_d, icache_rd_shift_user_q;
logic [DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0] dcache_rd_shift_user_d, dcache_rd_shift_user_q;
logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] icache_rd_shift_d, icache_rd_shift_q;
logic [DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] dcache_rd_shift_d, dcache_rd_shift_q;
logic [ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0]
icache_rd_shift_user_d, icache_rd_shift_user_q;
logic [DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0]
dcache_rd_shift_user_d, dcache_rd_shift_user_q;
logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
icache_rd_shift_d, icache_rd_shift_q;
logic [DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
dcache_rd_shift_d, dcache_rd_shift_q;
wt_cache_pkg::dcache_in_t dcache_rtrn_type_d, dcache_rtrn_type_q;
wt_cache_pkg::dcache_inval_t dcache_rtrn_inv_d, dcache_rtrn_inv_q;
logic dcache_sc_rtrn, axi_rd_last;
always_comb begin : p_axi_rtrn_shift
// output directly from regs
icache_rtrn_o = '0;
icache_rtrn_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK;
icache_rtrn_o.tid = icache_rtrn_tid_q;
icache_rtrn_o.data = icache_rd_shift_q;
icache_rtrn_o.user = icache_rd_shift_user_q;
icache_rtrn_vld_o = icache_rtrn_vld_q;
icache_rtrn_o = '0;
icache_rtrn_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK;
icache_rtrn_o.tid = icache_rtrn_tid_q;
icache_rtrn_o.data = icache_rd_shift_q;
icache_rtrn_o.user = icache_rd_shift_user_q;
icache_rtrn_vld_o = icache_rtrn_vld_q;
dcache_rtrn_o = '0;
dcache_rtrn_o.rtype = dcache_rtrn_type_q;
dcache_rtrn_o.inv = dcache_rtrn_inv_q;
dcache_rtrn_o.tid = dcache_rtrn_tid_q;
dcache_rtrn_o.data = dcache_rd_shift_q;
dcache_rtrn_o.user = dcache_rd_shift_user_q;
dcache_rtrn_vld_o = dcache_rtrn_vld_q;
dcache_rtrn_o = '0;
dcache_rtrn_o.rtype = dcache_rtrn_type_q;
dcache_rtrn_o.inv = dcache_rtrn_inv_q;
dcache_rtrn_o.tid = dcache_rtrn_tid_q;
dcache_rtrn_o.data = dcache_rd_shift_q;
dcache_rtrn_o.user = dcache_rd_shift_user_q;
dcache_rtrn_vld_o = dcache_rtrn_vld_q;
// read shift registers
icache_rd_shift_d = icache_rd_shift_q;
icache_rd_shift_d = icache_rd_shift_q;
icache_rd_shift_user_d = icache_rd_shift_user_q;
dcache_rd_shift_d = dcache_rd_shift_q;
dcache_rd_shift_d = dcache_rd_shift_q;
dcache_rd_shift_user_d = dcache_rd_shift_user_q;
icache_first_d = icache_first_q;
dcache_first_d = dcache_first_q;
icache_first_d = icache_first_q;
dcache_first_d = dcache_first_q;
if (icache_rtrn_rd_en) begin
icache_first_d = axi_rd_last;
icache_first_d = axi_rd_last;
if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
icache_rd_shift_d = axi_rd_data;
end else begin
icache_rd_shift_d = {axi_rd_data, icache_rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]};
icache_rd_shift_d = {
axi_rd_data, icache_rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]
};
end
icache_rd_shift_user_d = {axi_rd_user, icache_rd_shift_user_q[ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]};
icache_rd_shift_user_d = {
axi_rd_user, icache_rd_shift_user_q[ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]
};
// if this is a single word transaction, we need to make sure that word is placed at offset 0
if (icache_first_q) begin
icache_rd_shift_d[0] = axi_rd_data;
@ -434,13 +474,17 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
end
if (dcache_rtrn_rd_en) begin
dcache_first_d = axi_rd_last;
dcache_first_d = axi_rd_last;
if (DCACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
dcache_rd_shift_d = axi_rd_data;
end else begin
dcache_rd_shift_d = {axi_rd_data, dcache_rd_shift_q[DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]};
dcache_rd_shift_d = {
axi_rd_data, dcache_rd_shift_q[DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]
};
end
dcache_rd_shift_user_d = {axi_rd_user, dcache_rd_shift_user_q[DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]};
dcache_rd_shift_user_d = {
axi_rd_user, dcache_rd_shift_user_q[DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]
};
// if this is a single word transaction, we need to make sure that word is placed at offset 0
if (dcache_first_q) begin
dcache_rd_shift_d[0] = axi_rd_data;
@ -471,64 +515,64 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
icache_rtrn_vld_d = axi_rd_last;
end
dcache_rtrn_rd_en = 1'b0;
dcache_rtrn_vld_d = 1'b0;
dcache_rd_pop = 1'b0;
dcache_wr_pop = 1'b0;
dcache_rtrn_inv_d = '0;
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_LOAD_ACK;
b_pop = 1'b0;
dcache_sc_rtrn = 1'b0;
dcache_rtrn_rd_en = 1'b0;
dcache_rtrn_vld_d = 1'b0;
dcache_rd_pop = 1'b0;
dcache_wr_pop = 1'b0;
dcache_rtrn_inv_d = '0;
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_LOAD_ACK;
b_pop = 1'b0;
dcache_sc_rtrn = 1'b0;
// External invalidation requests (from coprocessor). This is safe as
// there are no other transactions when a coprocessor has pending stores.
inval_ready_o = 1'b0;
inval_ready_o = 1'b0;
if (inval_valid_i) begin
inval_ready_o = 1'b1;
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ;
dcache_rtrn_vld_d = 1'b1;
dcache_rtrn_inv_d.all = 1'b1;
dcache_rtrn_inv_d.idx = inval_addr_i[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
//////////////////////////////////////
// dcache needs some special treatment
// for arbitration and decoding of atomics
//////////////////////////////////////
// this is safe, there is no other read tx in flight than this atomic.
// note that this self invalidation is handled in this way due to the
// write-through cache architecture, which is aligned with the openpiton
// cache subsystem.
inval_ready_o = 1'b1;
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ;
dcache_rtrn_vld_d = 1'b1;
dcache_rtrn_inv_d.all = 1'b1;
dcache_rtrn_inv_d.idx = inval_addr_i[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
//////////////////////////////////////
// dcache needs some special treatment
// for arbitration and decoding of atomics
//////////////////////////////////////
// this is safe, there is no other read tx in flight than this atomic.
// note that this self invalidation is handled in this way due to the
// write-through cache architecture, which is aligned with the openpiton
// cache subsystem.
end else if (invalidate) begin
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ;
dcache_rtrn_vld_d = 1'b1;
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ;
dcache_rtrn_vld_d = 1'b1;
dcache_rtrn_inv_d.all = 1'b1;
dcache_rtrn_inv_d.idx = dcache_data.paddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
//////////////////////////////////////
// read responses
// note that in case of atomics, the dcache sequentializes requests and
// guarantees that there are no other pending transactions in flight
dcache_rtrn_inv_d.all = 1'b1;
dcache_rtrn_inv_d.idx = dcache_data.paddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
//////////////////////////////////////
// read responses
// note that in case of atomics, the dcache sequentializes requests and
// guarantees that there are no other pending transactions in flight
end else if (axi_rd_valid && axi_rd_id_out[0] && axi_rd_rdy) begin
dcache_rtrn_rd_en = 1'b1;
dcache_rtrn_vld_d = axi_rd_last;
dcache_rtrn_rd_en = 1'b1;
dcache_rtrn_vld_d = axi_rd_last;
// if this was an atomic op
if (axi_rd_id_out[1]) begin
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK;
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK;
// check if transaction was issued over write channel and pop that ID
if (!dcache_wr_empty) begin
dcache_wr_pop = axi_rd_last;
// if this is not the case, there MUST be an id in the read channel (LR)
dcache_wr_pop = axi_rd_last;
// if this is not the case, there MUST be an id in the read channel (LR)
end else begin
dcache_rd_pop = axi_rd_last;
dcache_rd_pop = axi_rd_last;
end
end else begin
dcache_rd_pop = axi_rd_last;
dcache_rd_pop = axi_rd_last;
end
//////////////////////////////////////
// write responses, check b fifo
//////////////////////////////////////
// write responses, check b fifo
end else if (!b_empty) begin
b_pop = 1'b1;
b_pop = 1'b1;
// this was an atomic
if (wr_id_out[1]) begin
@ -537,9 +581,9 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
// silently discard b response if we already popped the fifo
// with a R beat (iff the amo transaction generated an R beat)
if (!amo_gen_r_q) begin
dcache_rtrn_vld_d = 1'b1;
dcache_wr_pop = 1'b1;
dcache_sc_rtrn = 1'b1;
dcache_rtrn_vld_d = 1'b1;
dcache_wr_pop = 1'b1;
dcache_sc_rtrn = 1'b1;
end
end else begin
// regular response
@ -565,92 +609,92 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf
if (!rst_ni) begin
icache_first_q <= 1'b1;
dcache_first_q <= 1'b1;
icache_rd_shift_q <= '0;
icache_rd_shift_user_q <= '0;
dcache_rd_shift_q <= '0;
dcache_rd_shift_user_q <= '0;
icache_rtrn_vld_q <= '0;
dcache_rtrn_vld_q <= '0;
icache_rtrn_tid_q <= '0;
dcache_rtrn_tid_q <= '0;
dcache_rtrn_type_q <= wt_cache_pkg::DCACHE_LOAD_ACK;
dcache_rtrn_inv_q <= '0;
amo_off_q <= '0;
amo_gen_r_q <= 1'b0;
icache_first_q <= 1'b1;
dcache_first_q <= 1'b1;
icache_rd_shift_q <= '0;
icache_rd_shift_user_q <= '0;
dcache_rd_shift_q <= '0;
dcache_rd_shift_user_q <= '0;
icache_rtrn_vld_q <= '0;
dcache_rtrn_vld_q <= '0;
icache_rtrn_tid_q <= '0;
dcache_rtrn_tid_q <= '0;
dcache_rtrn_type_q <= wt_cache_pkg::DCACHE_LOAD_ACK;
dcache_rtrn_inv_q <= '0;
amo_off_q <= '0;
amo_gen_r_q <= 1'b0;
end else begin
icache_first_q <= icache_first_d;
dcache_first_q <= dcache_first_d;
icache_rd_shift_q <= icache_rd_shift_d;
icache_rd_shift_user_q <= icache_rd_shift_user_d;
dcache_rd_shift_q <= dcache_rd_shift_d;
dcache_rd_shift_user_q <= dcache_rd_shift_user_d;
icache_rtrn_vld_q <= icache_rtrn_vld_d;
dcache_rtrn_vld_q <= dcache_rtrn_vld_d;
icache_rtrn_tid_q <= icache_rtrn_tid_d;
dcache_rtrn_tid_q <= dcache_rtrn_tid_d;
dcache_rtrn_type_q <= dcache_rtrn_type_d;
dcache_rtrn_inv_q <= dcache_rtrn_inv_d;
amo_off_q <= amo_off_d;
amo_gen_r_q <= amo_gen_r_d;
icache_first_q <= icache_first_d;
dcache_first_q <= dcache_first_d;
icache_rd_shift_q <= icache_rd_shift_d;
icache_rd_shift_user_q <= icache_rd_shift_user_d;
dcache_rd_shift_q <= dcache_rd_shift_d;
dcache_rd_shift_user_q <= dcache_rd_shift_user_d;
icache_rtrn_vld_q <= icache_rtrn_vld_d;
dcache_rtrn_vld_q <= dcache_rtrn_vld_d;
icache_rtrn_tid_q <= icache_rtrn_tid_d;
dcache_rtrn_tid_q <= dcache_rtrn_tid_d;
dcache_rtrn_type_q <= dcache_rtrn_type_d;
dcache_rtrn_inv_q <= dcache_rtrn_inv_d;
amo_off_q <= amo_off_d;
amo_gen_r_q <= amo_gen_r_d;
end
end
///////////////////////////////////////////////////////
// axi protocol shim
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// axi protocol shim
///////////////////////////////////////////////////////
axi_shim #(
.CVA6Cfg ( CVA6Cfg ),
.AxiNumWords ( AxiNumWords ),
.axi_req_t ( axi_req_t ),
.axi_rsp_t ( axi_rsp_t )
.CVA6Cfg (CVA6Cfg),
.AxiNumWords(AxiNumWords),
.axi_req_t (axi_req_t),
.axi_rsp_t (axi_rsp_t)
) i_axi_shim (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.rd_req_i ( axi_rd_req ),
.rd_gnt_o ( axi_rd_gnt ),
.rd_addr_i ( axi_rd_addr ),
.rd_blen_i ( axi_rd_blen ),
.rd_size_i ( axi_rd_size ),
.rd_id_i ( axi_rd_id_in ),
.rd_rdy_i ( axi_rd_rdy ),
.rd_lock_i ( axi_rd_lock ),
.rd_last_o ( axi_rd_last ),
.rd_valid_o ( axi_rd_valid ),
.rd_data_o ( axi_rd_data ),
.rd_user_o ( axi_rd_user ),
.rd_id_o ( axi_rd_id_out ),
.rd_exokay_o ( axi_rd_exokay ),
.wr_req_i ( axi_wr_req ),
.wr_gnt_o ( axi_wr_gnt ),
.wr_addr_i ( axi_wr_addr ),
.wr_data_i ( axi_wr_data ),
.wr_user_i ( axi_wr_user ),
.wr_be_i ( axi_wr_be ),
.wr_blen_i ( axi_wr_blen ),
.wr_size_i ( axi_wr_size ),
.wr_id_i ( axi_wr_id_in ),
.wr_lock_i ( axi_wr_lock ),
.wr_atop_i ( axi_wr_atop ),
.wr_rdy_i ( axi_wr_rdy ),
.wr_valid_o ( axi_wr_valid ),
.wr_id_o ( axi_wr_id_out ),
.wr_exokay_o ( axi_wr_exokay ),
.axi_req_o ( axi_req_o ),
.axi_resp_i ( axi_resp_i )
.clk_i (clk_i),
.rst_ni (rst_ni),
.rd_req_i (axi_rd_req),
.rd_gnt_o (axi_rd_gnt),
.rd_addr_i (axi_rd_addr),
.rd_blen_i (axi_rd_blen),
.rd_size_i (axi_rd_size),
.rd_id_i (axi_rd_id_in),
.rd_rdy_i (axi_rd_rdy),
.rd_lock_i (axi_rd_lock),
.rd_last_o (axi_rd_last),
.rd_valid_o (axi_rd_valid),
.rd_data_o (axi_rd_data),
.rd_user_o (axi_rd_user),
.rd_id_o (axi_rd_id_out),
.rd_exokay_o(axi_rd_exokay),
.wr_req_i (axi_wr_req),
.wr_gnt_o (axi_wr_gnt),
.wr_addr_i (axi_wr_addr),
.wr_data_i (axi_wr_data),
.wr_user_i (axi_wr_user),
.wr_be_i (axi_wr_be),
.wr_blen_i (axi_wr_blen),
.wr_size_i (axi_wr_size),
.wr_id_i (axi_wr_id_in),
.wr_lock_i (axi_wr_lock),
.wr_atop_i (axi_wr_atop),
.wr_rdy_i (axi_wr_rdy),
.wr_valid_o (axi_wr_valid),
.wr_id_o (axi_wr_id_out),
.wr_exokay_o(axi_wr_exokay),
.axi_req_o (axi_req_o),
.axi_resp_i (axi_resp_i)
);
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
//pragma translate_off
`ifndef VERILATOR
`endif
//pragma translate_on
//pragma translate_on
endmodule // wt_l15_adapter
endmodule // wt_l15_adapter

View file

@ -19,49 +19,52 @@
// L1.5 interface.
module wt_cache_subsystem import ariane_pkg::*; import wt_cache_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4,
parameter type noc_req_t = logic,
parameter type noc_resp_t = logic
module wt_cache_subsystem
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4,
parameter type noc_req_t = logic,
parameter type noc_resp_t = logic
) (
input logic clk_i,
input logic rst_ni,
// I$
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter
// address translation requests
input icache_areq_t icache_areq_i, // to/from frontend
output icache_arsp_t icache_areq_o,
// data requests
input icache_dreq_t icache_dreq_i, // to/from frontend
output icache_drsp_t icache_dreq_o,
// D$
// Cache management
input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st
// For Performance Counter
output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,
// AMO interface
input amo_req_t dcache_amo_req_i,
output amo_resp_t dcache_amo_resp_o,
// Request ports
input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU
output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU
// writebuffer status
output logic wbuffer_empty_o,
output logic wbuffer_not_ni_o,
// memory side
output noc_req_t noc_req_o,
input noc_resp_t noc_resp_i,
// Invalidations
input logic [63:0] inval_addr_i,
input logic inval_valid_i,
output logic inval_ready_o
// TODO: interrupt interface
input logic clk_i,
input logic rst_ni,
// I$
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter
// address translation requests
input icache_areq_t icache_areq_i, // to/from frontend
output icache_arsp_t icache_areq_o,
// data requests
input icache_dreq_t icache_dreq_i, // to/from frontend
output icache_drsp_t icache_dreq_o,
// D$
// Cache management
input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st
// For Performance Counter
output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,
// AMO interface
input amo_req_t dcache_amo_req_i,
output amo_resp_t dcache_amo_resp_o,
// Request ports
input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU
output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU
// writebuffer status
output logic wbuffer_empty_o,
output logic wbuffer_not_ni_o,
// memory side
output noc_req_t noc_req_o,
input noc_resp_t noc_resp_i,
// Invalidations
input logic [63:0] inval_addr_i,
input logic inval_valid_i,
output logic inval_ready_o
// TODO: interrupt interface
);
logic icache_adapter_data_req, adapter_icache_data_ack, adapter_icache_rtrn_vld;
@ -74,24 +77,24 @@ module wt_cache_subsystem import ariane_pkg::*; import wt_cache_pkg::*; #(
wt_cache_pkg::dcache_rtrn_t adapter_dcache;
cva6_icache #(
// use ID 0 for icache reads
.CVA6Cfg ( CVA6Cfg ),
.RdTxId ( 0 )
// use ID 0 for icache reads
.CVA6Cfg(CVA6Cfg),
.RdTxId (0)
) i_cva6_icache (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( icache_flush_i ),
.en_i ( icache_en_i ),
.miss_o ( icache_miss_o ),
.areq_i ( icache_areq_i ),
.areq_o ( icache_areq_o ),
.dreq_i ( icache_dreq_i ),
.dreq_o ( icache_dreq_o ),
.mem_rtrn_vld_i ( adapter_icache_rtrn_vld ),
.mem_rtrn_i ( adapter_icache ),
.mem_data_req_o ( icache_adapter_data_req ),
.mem_data_ack_i ( adapter_icache_data_ack ),
.mem_data_o ( icache_adapter )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (icache_flush_i),
.en_i (icache_en_i),
.miss_o (icache_miss_o),
.areq_i (icache_areq_i),
.areq_o (icache_areq_o),
.dreq_i (icache_dreq_i),
.dreq_o (icache_dreq_o),
.mem_rtrn_vld_i(adapter_icache_rtrn_vld),
.mem_rtrn_i (adapter_icache),
.mem_data_req_o(icache_adapter_data_req),
.mem_data_ack_i(adapter_icache_data_ack),
.mem_data_o (icache_adapter)
);
@ -100,109 +103,131 @@ module wt_cache_subsystem import ariane_pkg::*; import wt_cache_pkg::*; #(
// they have equal prio and are RR arbited
// Port 2 is write only and goes into the merging write buffer
wt_dcache #(
.CVA6Cfg ( CVA6Cfg ),
// use ID 1 for dcache reads and amos. note that the writebuffer
// uses all IDs up to DCACHE_MAX_TX-1 for write transactions.
.RdAmoTxId ( 1 )
.CVA6Cfg (CVA6Cfg),
// use ID 1 for dcache reads and amos. note that the writebuffer
// uses all IDs up to DCACHE_MAX_TX-1 for write transactions.
.RdAmoTxId(1)
) i_wt_dcache (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.enable_i ( dcache_enable_i ),
.flush_i ( dcache_flush_i ),
.flush_ack_o ( dcache_flush_ack_o ),
.miss_o ( dcache_miss_o ),
.wbuffer_empty_o ( wbuffer_empty_o ),
.wbuffer_not_ni_o ( wbuffer_not_ni_o ),
.amo_req_i ( dcache_amo_req_i ),
.amo_resp_o ( dcache_amo_resp_o ),
.req_ports_i ( dcache_req_ports_i ),
.req_ports_o ( dcache_req_ports_o ),
.miss_vld_bits_o ( miss_vld_bits_o ),
.mem_rtrn_vld_i ( adapter_dcache_rtrn_vld ),
.mem_rtrn_i ( adapter_dcache ),
.mem_data_req_o ( dcache_adapter_data_req ),
.mem_data_ack_i ( adapter_dcache_data_ack ),
.mem_data_o ( dcache_adapter )
.clk_i (clk_i),
.rst_ni (rst_ni),
.enable_i (dcache_enable_i),
.flush_i (dcache_flush_i),
.flush_ack_o (dcache_flush_ack_o),
.miss_o (dcache_miss_o),
.wbuffer_empty_o (wbuffer_empty_o),
.wbuffer_not_ni_o(wbuffer_not_ni_o),
.amo_req_i (dcache_amo_req_i),
.amo_resp_o (dcache_amo_resp_o),
.req_ports_i (dcache_req_ports_i),
.req_ports_o (dcache_req_ports_o),
.miss_vld_bits_o (miss_vld_bits_o),
.mem_rtrn_vld_i (adapter_dcache_rtrn_vld),
.mem_rtrn_i (adapter_dcache),
.mem_data_req_o (dcache_adapter_data_req),
.mem_data_ack_i (adapter_dcache_data_ack),
.mem_data_o (dcache_adapter)
);
///////////////////////////////////////////////////////
// memory plumbing, either use 64bit AXI port or native
// L15 cache interface (derived from OpenSPARC CCX).
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// memory plumbing, either use 64bit AXI port or native
// L15 cache interface (derived from OpenSPARC CCX).
///////////////////////////////////////////////////////
`ifdef PITON_ARIANE
wt_l15_adapter #(
.CVA6Cfg ( CVA6Cfg ),
.CVA6Cfg(CVA6Cfg),
) i_adapter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.icache_data_req_i ( icache_adapter_data_req ),
.icache_data_ack_o ( adapter_icache_data_ack ),
.icache_data_i ( icache_adapter ),
.icache_rtrn_vld_o ( adapter_icache_rtrn_vld ),
.icache_rtrn_o ( adapter_icache ),
.dcache_data_req_i ( dcache_adapter_data_req ),
.dcache_data_ack_o ( adapter_dcache_data_ack ),
.dcache_data_i ( dcache_adapter ),
.dcache_rtrn_vld_o ( adapter_dcache_rtrn_vld ),
.dcache_rtrn_o ( adapter_dcache ),
.l15_req_o ( noc_req_o ),
.l15_rtrn_i ( noc_resp_i )
.clk_i (clk_i),
.rst_ni (rst_ni),
.icache_data_req_i(icache_adapter_data_req),
.icache_data_ack_o(adapter_icache_data_ack),
.icache_data_i (icache_adapter),
.icache_rtrn_vld_o(adapter_icache_rtrn_vld),
.icache_rtrn_o (adapter_icache),
.dcache_data_req_i(dcache_adapter_data_req),
.dcache_data_ack_o(adapter_dcache_data_ack),
.dcache_data_i (dcache_adapter),
.dcache_rtrn_vld_o(adapter_dcache_rtrn_vld),
.dcache_rtrn_o (adapter_dcache),
.l15_req_o (noc_req_o),
.l15_rtrn_i (noc_resp_i)
);
`else
wt_axi_adapter #(
.CVA6Cfg ( CVA6Cfg ),
.axi_req_t ( noc_req_t ),
.axi_rsp_t ( noc_resp_t )
.CVA6Cfg (CVA6Cfg),
.axi_req_t(noc_req_t),
.axi_rsp_t(noc_resp_t)
) i_adapter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.icache_data_req_i ( icache_adapter_data_req ),
.icache_data_ack_o ( adapter_icache_data_ack ),
.icache_data_i ( icache_adapter ),
.icache_rtrn_vld_o ( adapter_icache_rtrn_vld ),
.icache_rtrn_o ( adapter_icache ),
.dcache_data_req_i ( dcache_adapter_data_req ),
.dcache_data_ack_o ( adapter_dcache_data_ack ),
.dcache_data_i ( dcache_adapter ),
.dcache_rtrn_vld_o ( adapter_dcache_rtrn_vld ),
.dcache_rtrn_o ( adapter_dcache ),
.axi_req_o ( noc_req_o ),
.axi_resp_i ( noc_resp_i ),
.inval_addr_i ( inval_addr_i ),
.inval_valid_i ( inval_valid_i ),
.inval_ready_o ( inval_ready_o )
.clk_i (clk_i),
.rst_ni (rst_ni),
.icache_data_req_i(icache_adapter_data_req),
.icache_data_ack_o(adapter_icache_data_ack),
.icache_data_i (icache_adapter),
.icache_rtrn_vld_o(adapter_icache_rtrn_vld),
.icache_rtrn_o (adapter_icache),
.dcache_data_req_i(dcache_adapter_data_req),
.dcache_data_ack_o(adapter_dcache_data_ack),
.dcache_data_i (dcache_adapter),
.dcache_rtrn_vld_o(adapter_dcache_rtrn_vld),
.dcache_rtrn_o (adapter_dcache),
.axi_req_o (noc_req_o),
.axi_resp_i (noc_resp_i),
.inval_addr_i (inval_addr_i),
.inval_valid_i (inval_valid_i),
.inval_ready_o (inval_ready_o)
);
`endif
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
//pragma translate_off
`ifndef VERILATOR
a_invalid_instruction_fetch: assert property (
a_invalid_instruction_fetch :
assert property (
@(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr, icache_dreq_o.data);
else
$warning(
1,
"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr,
icache_dreq_o.data
);
for (genvar j=0; j<riscv::XLEN/8; j++) begin : gen_invalid_write_assertion
a_invalid_write_data: assert property (
for (genvar j = 0; j < riscv::XLEN / 8; j++) begin : gen_invalid_write_assertion
a_invalid_write_data :
assert property (
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> dcache_req_ports_i[NumPorts-1].data_be[j] |-> (|dcache_req_ports_i[NumPorts-1].data_wdata[j*8+:8] !== 1'hX))
else $warning(1,"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X, databe=%016X",
{dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index}, dcache_req_ports_i[NumPorts-1].data_be, dcache_req_ports_i[NumPorts-1].data_wdata, dcache_req_ports_i[NumPorts-1].data_be & dcache_req_ports_i[NumPorts-1].data_wdata);
else
$warning(
1,
"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X, databe=%016X",
{
dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index
},
dcache_req_ports_i[NumPorts-1].data_be,
dcache_req_ports_i[NumPorts-1].data_wdata,
dcache_req_ports_i[NumPorts-1].data_be & dcache_req_ports_i[NumPorts-1].data_wdata
);
end
for (genvar j=0; j<NumPorts-1; j++) begin : gen_assertion
a_invalid_read_data: assert property (
for (genvar j = 0; j < NumPorts - 1; j++) begin : gen_assertion
a_invalid_read_data :
assert property (
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid data on port %01d: data=%016X",
j, dcache_req_ports_o[j].data_rdata);
else
$warning(
1,
"[l1 dcache] reading invalid data on port %01d: data=%016X",
j,
dcache_req_ports_o[j].data_rdata
);
end
`endif
//pragma translate_on
//pragma translate_on
endmodule // wt_cache_subsystem
endmodule // wt_cache_subsystem

View file

@ -13,325 +13,329 @@
// Description: Write-Through Data cache that is compatible with openpiton.
module wt_dcache import ariane_pkg::*; import wt_cache_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4, // number of miss ports
// ID to be used for read and AMO transactions.
// note that the write buffer uses all IDs up to DCACHE_MAX_TX-1 for write transactions
parameter logic [CACHE_ID_WIDTH-1:0] RdAmoTxId = 1
module wt_dcache
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4, // number of miss ports
// ID to be used for read and AMO transactions.
// note that the write buffer uses all IDs up to DCACHE_MAX_TX-1 for write transactions
parameter logic [CACHE_ID_WIDTH-1:0] RdAmoTxId = 1
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// Cache management
input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
output logic wbuffer_empty_o,
output logic wbuffer_not_ni_o,
// Cache management
input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
output logic wbuffer_empty_o,
output logic wbuffer_not_ni_o,
// AMO interface
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// AMO interface
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// Request ports
input dcache_req_i_t [NumPorts-1:0] req_ports_i,
output dcache_req_o_t [NumPorts-1:0] req_ports_o,
// Request ports
input dcache_req_i_t [NumPorts-1:0] req_ports_i,
output dcache_req_o_t [NumPorts-1:0] req_ports_o,
output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,
output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,
input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output dcache_req_t mem_data_o
input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output dcache_req_t mem_data_o
);
// miss unit <-> read controllers
logic cache_en;
logic cache_en;
// miss unit <-> memory
logic wr_cl_vld;
logic wr_cl_nc;
logic [DCACHE_SET_ASSOC-1:0] wr_cl_we;
logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag;
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx;
logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off;
logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data;
logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user;
logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be;
logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits;
logic [DCACHE_SET_ASSOC-1:0] wr_req;
logic wr_ack;
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx;
logic [DCACHE_OFFSET_WIDTH-1:0] wr_off;
riscv::xlen_t wr_data;
logic [(riscv::XLEN/8)-1:0] wr_data_be;
logic [DCACHE_USER_WIDTH-1:0] wr_user;
logic wr_cl_vld;
logic wr_cl_nc;
logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we;
logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag;
logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx;
logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off;
logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data;
logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user;
logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be;
logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits;
logic [ DCACHE_SET_ASSOC-1:0] wr_req;
logic wr_ack;
logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_idx;
logic [ DCACHE_OFFSET_WIDTH-1:0] wr_off;
riscv::xlen_t wr_data;
logic [ (riscv::XLEN/8)-1:0] wr_data_be;
logic [ DCACHE_USER_WIDTH-1:0] wr_user;
// miss unit <-> controllers/wbuffer
logic [NumPorts-1:0] miss_req;
logic [NumPorts-1:0] miss_ack;
logic [NumPorts-1:0] miss_nc;
logic [NumPorts-1:0] miss_we;
logic [NumPorts-1:0][riscv::XLEN-1:0] miss_wdata;
logic [NumPorts-1:0][DCACHE_USER_WIDTH-1:0] miss_wuser;
logic [NumPorts-1:0][riscv::PLEN-1:0] miss_paddr;
logic [NumPorts-1:0][2:0] miss_size;
logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0] miss_id;
logic [NumPorts-1:0] miss_replay;
logic [NumPorts-1:0] miss_rtrn_vld;
logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id;
logic [ NumPorts-1:0] miss_req;
logic [ NumPorts-1:0] miss_ack;
logic [ NumPorts-1:0] miss_nc;
logic [ NumPorts-1:0] miss_we;
logic [ NumPorts-1:0][ riscv::XLEN-1:0] miss_wdata;
logic [ NumPorts-1:0][ DCACHE_USER_WIDTH-1:0] miss_wuser;
logic [ NumPorts-1:0][ riscv::PLEN-1:0] miss_paddr;
logic [ NumPorts-1:0][ 2:0] miss_size;
logic [ NumPorts-1:0][ CACHE_ID_WIDTH-1:0] miss_id;
logic [ NumPorts-1:0] miss_replay;
logic [ NumPorts-1:0] miss_rtrn_vld;
logic [ CACHE_ID_WIDTH-1:0] miss_rtrn_id;
// memory <-> read controllers/miss unit
logic [NumPorts-1:0] rd_prio;
logic [NumPorts-1:0] rd_tag_only;
logic [NumPorts-1:0] rd_req;
logic [NumPorts-1:0] rd_ack;
logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx;
logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off;
riscv::xlen_t rd_data;
logic [DCACHE_USER_WIDTH-1:0] rd_user;
logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits;
logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh;
logic [ NumPorts-1:0] rd_prio;
logic [ NumPorts-1:0] rd_tag_only;
logic [ NumPorts-1:0] rd_req;
logic [ NumPorts-1:0] rd_ack;
logic [ NumPorts-1:0][ DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [ NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx;
logic [ NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off;
riscv::xlen_t rd_data;
logic [ DCACHE_USER_WIDTH-1:0] rd_user;
logic [ DCACHE_SET_ASSOC-1:0] rd_vld_bits;
logic [ DCACHE_SET_ASSOC-1:0] rd_hit_oh;
// miss unit <-> wbuffer
logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr;
logic [DCACHE_MAX_TX-1:0] tx_vld;
logic [ DCACHE_MAX_TX-1:0][ riscv::PLEN-1:0] tx_paddr;
logic [ DCACHE_MAX_TX-1:0] tx_vld;
// wbuffer <-> memory
wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data;
wbuffer_t [ DCACHE_WBUF_DEPTH-1:0] wbuffer_data;
///////////////////////////////////////////////////////
// miss handling unit
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// miss handling unit
///////////////////////////////////////////////////////
wt_dcache_missunit #(
.CVA6Cfg ( CVA6Cfg ),
.AmoTxId ( RdAmoTxId ),
.NumPorts ( NumPorts )
.CVA6Cfg (CVA6Cfg),
.AmoTxId (RdAmoTxId),
.NumPorts(NumPorts)
) i_wt_dcache_missunit (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.enable_i ( enable_i ),
.flush_i ( flush_i ),
.flush_ack_o ( flush_ack_o ),
.miss_o ( miss_o ),
.wbuffer_empty_i ( wbuffer_empty_o ),
.cache_en_o ( cache_en ),
// amo interface
.amo_req_i ( amo_req_i ),
.amo_resp_o ( amo_resp_o ),
// miss handling interface
.miss_req_i ( miss_req ),
.miss_ack_o ( miss_ack ),
.miss_nc_i ( miss_nc ),
.miss_we_i ( miss_we ),
.miss_wdata_i ( miss_wdata ),
.miss_wuser_i ( miss_wuser ),
.miss_paddr_i ( miss_paddr ),
.miss_vld_bits_i ( miss_vld_bits_o ),
.miss_size_i ( miss_size ),
.miss_id_i ( miss_id ),
.miss_replay_o ( miss_replay ),
.miss_rtrn_vld_o ( miss_rtrn_vld ),
.miss_rtrn_id_o ( miss_rtrn_id ),
// from writebuffer
.tx_paddr_i ( tx_paddr ),
.tx_vld_i ( tx_vld ),
// cache memory interface
.wr_cl_vld_o ( wr_cl_vld ),
.wr_cl_nc_o ( wr_cl_nc ),
.wr_cl_we_o ( wr_cl_we ),
.wr_cl_tag_o ( wr_cl_tag ),
.wr_cl_idx_o ( wr_cl_idx ),
.wr_cl_off_o ( wr_cl_off ),
.wr_cl_data_o ( wr_cl_data ),
.wr_cl_user_o ( wr_cl_user ),
.wr_cl_data_be_o ( wr_cl_data_be ),
.wr_vld_bits_o ( wr_vld_bits ),
// memory interface
.mem_rtrn_vld_i ( mem_rtrn_vld_i ),
.mem_rtrn_i ( mem_rtrn_i ),
.mem_data_req_o ( mem_data_req_o ),
.mem_data_ack_i ( mem_data_ack_i ),
.mem_data_o ( mem_data_o )
.clk_i (clk_i),
.rst_ni (rst_ni),
.enable_i (enable_i),
.flush_i (flush_i),
.flush_ack_o (flush_ack_o),
.miss_o (miss_o),
.wbuffer_empty_i(wbuffer_empty_o),
.cache_en_o (cache_en),
// amo interface
.amo_req_i (amo_req_i),
.amo_resp_o (amo_resp_o),
// miss handling interface
.miss_req_i (miss_req),
.miss_ack_o (miss_ack),
.miss_nc_i (miss_nc),
.miss_we_i (miss_we),
.miss_wdata_i (miss_wdata),
.miss_wuser_i (miss_wuser),
.miss_paddr_i (miss_paddr),
.miss_vld_bits_i(miss_vld_bits_o),
.miss_size_i (miss_size),
.miss_id_i (miss_id),
.miss_replay_o (miss_replay),
.miss_rtrn_vld_o(miss_rtrn_vld),
.miss_rtrn_id_o (miss_rtrn_id),
// from writebuffer
.tx_paddr_i (tx_paddr),
.tx_vld_i (tx_vld),
// cache memory interface
.wr_cl_vld_o (wr_cl_vld),
.wr_cl_nc_o (wr_cl_nc),
.wr_cl_we_o (wr_cl_we),
.wr_cl_tag_o (wr_cl_tag),
.wr_cl_idx_o (wr_cl_idx),
.wr_cl_off_o (wr_cl_off),
.wr_cl_data_o (wr_cl_data),
.wr_cl_user_o (wr_cl_user),
.wr_cl_data_be_o(wr_cl_data_be),
.wr_vld_bits_o (wr_vld_bits),
// memory interface
.mem_rtrn_vld_i (mem_rtrn_vld_i),
.mem_rtrn_i (mem_rtrn_i),
.mem_data_req_o (mem_data_req_o),
.mem_data_ack_i (mem_data_ack_i),
.mem_data_o (mem_data_o)
);
///////////////////////////////////////////////////////
// read controllers (LD unit and PTW/MMU)
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// read controllers (LD unit and PTW/MMU)
///////////////////////////////////////////////////////
// note: last read port is used by the write buffer
for(genvar k=0; k<NumPorts-1; k++) begin : gen_rd_ports
for (genvar k = 0; k < NumPorts - 1; k++) begin : gen_rd_ports
// set these to high prio ports
assign rd_prio[k] = 1'b1;
wt_dcache_ctrl #(
.CVA6Cfg ( CVA6Cfg ),
.RdTxId ( RdAmoTxId )
.CVA6Cfg(CVA6Cfg),
.RdTxId (RdAmoTxId)
) i_wt_dcache_ctrl (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.cache_en_i ( cache_en ),
// reqs from core
.req_port_i ( req_ports_i [k] ),
.req_port_o ( req_ports_o [k] ),
// miss interface
.miss_req_o ( miss_req [k] ),
.miss_ack_i ( miss_ack [k] ),
.miss_we_o ( miss_we [k] ),
.miss_wdata_o ( miss_wdata [k] ),
.miss_wuser_o ( miss_wuser [k] ),
.miss_vld_bits_o ( miss_vld_bits_o[k]),
.miss_paddr_o ( miss_paddr [k] ),
.miss_nc_o ( miss_nc [k] ),
.miss_size_o ( miss_size [k] ),
.miss_id_o ( miss_id [k] ),
.miss_replay_i ( miss_replay [k] ),
.miss_rtrn_vld_i ( miss_rtrn_vld [k] ),
// used to detect readout mux collisions
.wr_cl_vld_i ( wr_cl_vld ),
// cache mem interface
.rd_tag_o ( rd_tag [k] ),
.rd_idx_o ( rd_idx [k] ),
.rd_off_o ( rd_off [k] ),
.rd_req_o ( rd_req [k] ),
.rd_tag_only_o ( rd_tag_only [k] ),
.rd_ack_i ( rd_ack [k] ),
.rd_data_i ( rd_data ),
.rd_user_i ( rd_user ),
.rd_vld_bits_i ( rd_vld_bits ),
.rd_hit_oh_i ( rd_hit_oh )
.clk_i (clk_i),
.rst_ni (rst_ni),
.cache_en_i (cache_en),
// reqs from core
.req_port_i (req_ports_i[k]),
.req_port_o (req_ports_o[k]),
// miss interface
.miss_req_o (miss_req[k]),
.miss_ack_i (miss_ack[k]),
.miss_we_o (miss_we[k]),
.miss_wdata_o (miss_wdata[k]),
.miss_wuser_o (miss_wuser[k]),
.miss_vld_bits_o(miss_vld_bits_o[k]),
.miss_paddr_o (miss_paddr[k]),
.miss_nc_o (miss_nc[k]),
.miss_size_o (miss_size[k]),
.miss_id_o (miss_id[k]),
.miss_replay_i (miss_replay[k]),
.miss_rtrn_vld_i(miss_rtrn_vld[k]),
// used to detect readout mux collisions
.wr_cl_vld_i (wr_cl_vld),
// cache mem interface
.rd_tag_o (rd_tag[k]),
.rd_idx_o (rd_idx[k]),
.rd_off_o (rd_off[k]),
.rd_req_o (rd_req[k]),
.rd_tag_only_o (rd_tag_only[k]),
.rd_ack_i (rd_ack[k]),
.rd_data_i (rd_data),
.rd_user_i (rd_user),
.rd_vld_bits_i (rd_vld_bits),
.rd_hit_oh_i (rd_hit_oh)
);
end
///////////////////////////////////////////////////////
// store unit controller
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// store unit controller
///////////////////////////////////////////////////////
// set read port to low priority
assign rd_prio[NumPorts-1] = 1'b0;
wt_dcache_wbuffer #(
.CVA6Cfg ( CVA6Cfg )
.CVA6Cfg(CVA6Cfg)
) i_wt_dcache_wbuffer (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.empty_o ( wbuffer_empty_o ),
.not_ni_o ( wbuffer_not_ni_o ),
// TODO: fix this
.cache_en_i ( cache_en ),
// .cache_en_i ( '0 ),
// request ports from core (store unit)
.req_port_i ( req_ports_i [NumPorts-1] ),
.req_port_o ( req_ports_o [NumPorts-1] ),
// miss unit interface
.miss_req_o ( miss_req [NumPorts-1] ),
.miss_ack_i ( miss_ack [NumPorts-1] ),
.miss_we_o ( miss_we [NumPorts-1] ),
.miss_wdata_o ( miss_wdata [NumPorts-1] ),
.miss_wuser_o ( miss_wuser [NumPorts-1] ),
.miss_vld_bits_o ( miss_vld_bits_o[NumPorts-1] ),
.miss_paddr_o ( miss_paddr [NumPorts-1] ),
.miss_nc_o ( miss_nc [NumPorts-1] ),
.miss_size_o ( miss_size [NumPorts-1] ),
.miss_id_o ( miss_id [NumPorts-1] ),
.miss_rtrn_vld_i ( miss_rtrn_vld [NumPorts-1] ),
.miss_rtrn_id_i ( miss_rtrn_id ),
// cache read interface
.rd_tag_o ( rd_tag [NumPorts-1] ),
.rd_idx_o ( rd_idx [NumPorts-1] ),
.rd_off_o ( rd_off [NumPorts-1] ),
.rd_req_o ( rd_req [NumPorts-1] ),
.rd_tag_only_o ( rd_tag_only [NumPorts-1] ),
.rd_ack_i ( rd_ack [NumPorts-1] ),
.rd_data_i ( rd_data ),
.rd_vld_bits_i ( rd_vld_bits ),
.rd_hit_oh_i ( rd_hit_oh ),
// incoming invalidations/cache refills
.wr_cl_vld_i ( wr_cl_vld ),
.wr_cl_idx_i ( wr_cl_idx ),
// single word write interface
.wr_req_o ( wr_req ),
.wr_ack_i ( wr_ack ),
.wr_idx_o ( wr_idx ),
.wr_off_o ( wr_off ),
.wr_data_o ( wr_data ),
.wr_user_o ( wr_user ),
.wr_data_be_o ( wr_data_be ),
// write buffer forwarding
.wbuffer_data_o ( wbuffer_data ),
.tx_paddr_o ( tx_paddr ),
.tx_vld_o ( tx_vld )
.clk_i (clk_i),
.rst_ni (rst_ni),
.empty_o (wbuffer_empty_o),
.not_ni_o (wbuffer_not_ni_o),
// TODO: fix this
.cache_en_i (cache_en),
// .cache_en_i ( '0 ),
// request ports from core (store unit)
.req_port_i (req_ports_i[NumPorts-1]),
.req_port_o (req_ports_o[NumPorts-1]),
// miss unit interface
.miss_req_o (miss_req[NumPorts-1]),
.miss_ack_i (miss_ack[NumPorts-1]),
.miss_we_o (miss_we[NumPorts-1]),
.miss_wdata_o (miss_wdata[NumPorts-1]),
.miss_wuser_o (miss_wuser[NumPorts-1]),
.miss_vld_bits_o(miss_vld_bits_o[NumPorts-1]),
.miss_paddr_o (miss_paddr[NumPorts-1]),
.miss_nc_o (miss_nc[NumPorts-1]),
.miss_size_o (miss_size[NumPorts-1]),
.miss_id_o (miss_id[NumPorts-1]),
.miss_rtrn_vld_i(miss_rtrn_vld[NumPorts-1]),
.miss_rtrn_id_i (miss_rtrn_id),
// cache read interface
.rd_tag_o (rd_tag[NumPorts-1]),
.rd_idx_o (rd_idx[NumPorts-1]),
.rd_off_o (rd_off[NumPorts-1]),
.rd_req_o (rd_req[NumPorts-1]),
.rd_tag_only_o (rd_tag_only[NumPorts-1]),
.rd_ack_i (rd_ack[NumPorts-1]),
.rd_data_i (rd_data),
.rd_vld_bits_i (rd_vld_bits),
.rd_hit_oh_i (rd_hit_oh),
// incoming invalidations/cache refills
.wr_cl_vld_i (wr_cl_vld),
.wr_cl_idx_i (wr_cl_idx),
// single word write interface
.wr_req_o (wr_req),
.wr_ack_i (wr_ack),
.wr_idx_o (wr_idx),
.wr_off_o (wr_off),
.wr_data_o (wr_data),
.wr_user_o (wr_user),
.wr_data_be_o (wr_data_be),
// write buffer forwarding
.wbuffer_data_o (wbuffer_data),
.tx_paddr_o (tx_paddr),
.tx_vld_o (tx_vld)
);
///////////////////////////////////////////////////////
// memory arrays, arbitration and tag comparison
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// memory arrays, arbitration and tag comparison
///////////////////////////////////////////////////////
wt_dcache_mem #(
.CVA6Cfg ( CVA6Cfg ),
.NumPorts ( NumPorts )
.CVA6Cfg (CVA6Cfg),
.NumPorts(NumPorts)
) i_wt_dcache_mem (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
// read ports
.rd_prio_i ( rd_prio ),
.rd_tag_i ( rd_tag ),
.rd_idx_i ( rd_idx ),
.rd_off_i ( rd_off ),
.rd_req_i ( rd_req ),
.rd_tag_only_i ( rd_tag_only ),
.rd_ack_o ( rd_ack ),
.rd_vld_bits_o ( rd_vld_bits ),
.rd_hit_oh_o ( rd_hit_oh ),
.rd_data_o ( rd_data ),
.rd_user_o ( rd_user ),
// cacheline write port
.wr_cl_vld_i ( wr_cl_vld ),
.wr_cl_nc_i ( wr_cl_nc ),
.wr_cl_we_i ( wr_cl_we ),
.wr_cl_tag_i ( wr_cl_tag ),
.wr_cl_idx_i ( wr_cl_idx ),
.wr_cl_off_i ( wr_cl_off ),
.wr_cl_data_i ( wr_cl_data ),
.wr_cl_user_i ( wr_cl_user ),
.wr_cl_data_be_i ( wr_cl_data_be ),
.wr_vld_bits_i ( wr_vld_bits ),
// single word write port
.wr_req_i ( wr_req ),
.wr_ack_o ( wr_ack ),
.wr_idx_i ( wr_idx ),
.wr_off_i ( wr_off ),
.wr_data_i ( wr_data ),
.wr_user_i ( wr_user ),
.wr_data_be_i ( wr_data_be ),
// write buffer forwarding
.wbuffer_data_i ( wbuffer_data )
.clk_i (clk_i),
.rst_ni (rst_ni),
// read ports
.rd_prio_i (rd_prio),
.rd_tag_i (rd_tag),
.rd_idx_i (rd_idx),
.rd_off_i (rd_off),
.rd_req_i (rd_req),
.rd_tag_only_i (rd_tag_only),
.rd_ack_o (rd_ack),
.rd_vld_bits_o (rd_vld_bits),
.rd_hit_oh_o (rd_hit_oh),
.rd_data_o (rd_data),
.rd_user_o (rd_user),
// cacheline write port
.wr_cl_vld_i (wr_cl_vld),
.wr_cl_nc_i (wr_cl_nc),
.wr_cl_we_i (wr_cl_we),
.wr_cl_tag_i (wr_cl_tag),
.wr_cl_idx_i (wr_cl_idx),
.wr_cl_off_i (wr_cl_off),
.wr_cl_data_i (wr_cl_data),
.wr_cl_user_i (wr_cl_user),
.wr_cl_data_be_i(wr_cl_data_be),
.wr_vld_bits_i (wr_vld_bits),
// single word write port
.wr_req_i (wr_req),
.wr_ack_o (wr_ack),
.wr_idx_i (wr_idx),
.wr_off_i (wr_off),
.wr_data_i (wr_data),
.wr_user_i (wr_user),
.wr_data_be_i (wr_data_be),
// write buffer forwarding
.wbuffer_data_i (wbuffer_data)
);
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
// check for concurrency issues
// check for concurrency issues
//pragma translate_off
//pragma translate_off
`ifndef VERILATOR
flush: assert property (
flush :
assert property (
@(posedge clk_i) disable iff (!rst_ni) flush_i |-> flush_ack_o |-> wbuffer_empty_o)
else $fatal(1,"[l1 dcache] flushed cache implies flushed wbuffer");
else $fatal(1, "[l1 dcache] flushed cache implies flushed wbuffer");
initial begin
initial begin
// assert wrong parameterizations
assert (DCACHE_INDEX_WIDTH<=12)
else $fatal(1,"[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages");
end
assert (DCACHE_INDEX_WIDTH <= 12)
else $fatal(1, "[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages");
end
`endif
//pragma translate_on
//pragma translate_on
endmodule // wt_dcache
endmodule // wt_dcache

View file

@ -13,95 +13,110 @@
// Description: DCache controller for read port
module wt_dcache_ctrl import ariane_pkg::*; import wt_cache_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 1
module wt_dcache_ctrl
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 1
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic cache_en_i,
// core request ports
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// interface to miss handler
output logic miss_req_o,
input logic miss_ack_i,
output logic miss_we_o, // unused (set to 0)
output riscv::xlen_t miss_wdata_o, // unused (set to 0)
output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, // unused (set to 0)
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // valid bits at the missed index
output logic [riscv::PLEN-1:0] miss_paddr_o,
output logic miss_nc_o, // request to I/O space
output logic [2:0] miss_size_o, // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline
output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // set to constant ID
input logic miss_replay_i, // request collided with pending miss - have to replay the request
input logic miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory
// used to detect readout mux collisions
input logic wr_cl_vld_i,
// cache memory interface
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
output logic rd_req_o, // read the word at offset off_i[:3] in all ways
output logic rd_tag_only_o, // set to zero here
input logic rd_ack_i,
input riscv::xlen_t rd_data_i,
input logic [DCACHE_USER_WIDTH-1:0] rd_user_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic cache_en_i,
// core request ports
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// interface to miss handler
output logic miss_req_o,
input logic miss_ack_i,
output logic miss_we_o, // unused (set to 0)
output riscv::xlen_t miss_wdata_o, // unused (set to 0)
output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, // unused (set to 0)
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // valid bits at the missed index
output logic [riscv::PLEN-1:0] miss_paddr_o,
output logic miss_nc_o, // request to I/O space
output logic [2:0] miss_size_o, // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline
output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // set to constant ID
input logic miss_replay_i, // request collided with pending miss - have to replay the request
input logic miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory
// used to detect readout mux collisions
input logic wr_cl_vld_i,
// cache memory interface
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
output logic rd_req_o, // read the word at offset off_i[:3] in all ways
output logic rd_tag_only_o, // set to zero here
input logic rd_ack_i,
input riscv::xlen_t rd_data_i,
input logic [DCACHE_USER_WIDTH-1:0] rd_user_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i
);
// controller FSM
typedef enum logic[2:0] {IDLE, READ, MISS_REQ, MISS_WAIT, KILL_MISS, KILL_MISS_ACK, REPLAY_REQ, REPLAY_READ} state_e;
typedef enum logic [2:0] {
IDLE,
READ,
MISS_REQ,
MISS_WAIT,
KILL_MISS,
KILL_MISS_ACK,
REPLAY_REQ,
REPLAY_READ
} state_e;
state_e state_d, state_q;
logic [DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q;
logic [DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q;
logic [DCACHE_CL_IDX_WIDTH-1:0] address_idx_d, address_idx_q;
logic [DCACHE_OFFSET_WIDTH-1:0] address_off_d, address_off_q;
logic [DCACHE_TID_WIDTH-1:0] id_d, id_q;
logic [DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q;
logic [DCACHE_TID_WIDTH-1:0] id_d, id_q;
logic [DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q;
logic save_tag, rd_req_d, rd_req_q, rd_ack_d, rd_ack_q;
logic [1:0] data_size_d, data_size_q;
///////////////////////////////////////////////////////
// misc
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// misc
///////////////////////////////////////////////////////
// map address to tag/idx/offset and save
assign vld_data_d = (rd_req_q) ? rd_vld_bits_i : vld_data_q;
assign address_tag_d = (save_tag) ? req_port_i.address_tag : address_tag_q;
assign vld_data_d = (rd_req_q) ? rd_vld_bits_i : vld_data_q;
assign address_tag_d = (save_tag) ? req_port_i.address_tag : address_tag_q;
assign address_idx_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : address_idx_q;
assign address_off_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_OFFSET_WIDTH-1:0] : address_off_q;
assign id_d = (req_port_o.data_gnt) ? req_port_i.data_id : id_q;
assign data_size_d = (req_port_o.data_gnt) ? req_port_i.data_size : data_size_q;
assign rd_tag_o = address_tag_d;
assign rd_idx_o = address_idx_d;
assign rd_off_o = address_off_d;
assign id_d = (req_port_o.data_gnt) ? req_port_i.data_id : id_q;
assign data_size_d = (req_port_o.data_gnt) ? req_port_i.data_size : data_size_q;
assign rd_tag_o = address_tag_d;
assign rd_idx_o = address_idx_d;
assign rd_off_o = address_off_d;
assign req_port_o.data_rdata = rd_data_i;
assign req_port_o.data_ruser = rd_user_i;
assign req_port_o.data_rid = id_q;
assign req_port_o.data_rid = id_q;
// to miss unit
assign miss_vld_bits_o = vld_data_q;
assign miss_paddr_o = {address_tag_q, address_idx_q, address_off_q};
assign miss_size_o = (miss_nc_o) ? data_size_q : 3'b111;
assign miss_vld_bits_o = vld_data_q;
assign miss_paddr_o = {address_tag_q, address_idx_q, address_off_q};
assign miss_size_o = (miss_nc_o) ? data_size_q : 3'b111;
// noncacheable if request goes to I/O space, or if cache is disabled
assign miss_nc_o = (~cache_en_i) | (~config_pkg::is_inside_cacheable_regions(CVA6Cfg, {{{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH}{1'b0}}, address_tag_q, {DCACHE_INDEX_WIDTH{1'b0}}}));
assign miss_nc_o = (~cache_en_i) | (~config_pkg::is_inside_cacheable_regions(
CVA6Cfg,
{{{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH}{1'b0}}, address_tag_q, {DCACHE_INDEX_WIDTH{1'b0}}}
));
assign miss_we_o = '0;
assign miss_we_o = '0;
assign miss_wdata_o = '0;
assign miss_wuser_o = '0;
assign miss_id_o = RdTxId;
assign rd_req_d = rd_req_o;
assign rd_ack_d = rd_ack_i;
assign miss_id_o = RdTxId;
assign rd_req_d = rd_req_o;
assign rd_ack_d = rd_ack_i;
assign rd_tag_only_o = '0;
///////////////////////////////////////////////////////
// main control logic
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// main control logic
///////////////////////////////////////////////////////
always_comb begin : p_fsm
// default assignment
@ -114,168 +129,171 @@ module wt_dcache_ctrl import ariane_pkg::*; import wt_cache_pkg::*; #(
// interfaces
unique case (state_q)
//////////////////////////////////
// wait for an incoming request
IDLE: begin
if (req_port_i.data_req) begin
rd_req_o = 1'b1;
// if read ack then ack the `req_port_o`, and goto `READ` state
if (rd_ack_i) begin
//////////////////////////////////
// wait for an incoming request
IDLE: begin
if (req_port_i.data_req) begin
rd_req_o = 1'b1;
// if read ack then ack the `req_port_o`, and goto `READ` state
if (rd_ack_i) begin
state_d = READ;
req_port_o.data_gnt = 1'b1;
end
end
end
//////////////////////////////////
// check whether we have a hit
// in case the cache is disabled,
// or in case the address is NC, we
// reuse the miss mechanism to handle
// the request
READ, REPLAY_READ: begin
// speculatively request cache line
rd_req_o = 1'b1;
// kill -> go back to IDLE
if (req_port_i.kill_req) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end else if (req_port_i.tag_valid | state_q == REPLAY_READ) begin
save_tag = (state_q != REPLAY_READ);
if (wr_cl_vld_i || !rd_ack_q) begin
state_d = REPLAY_REQ;
// we've got a hit
end else if ((|rd_hit_oh_i) && cache_en_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
// we can handle another request
if (rd_ack_i && req_port_i.data_req) begin
state_d = READ;
req_port_o.data_gnt = 1'b1;
end
end
end
//////////////////////////////////
// check whether we have a hit
// in case the cache is disabled,
// or in case the address is NC, we
// reuse the miss mechanism to handle
// the request
READ, REPLAY_READ: begin
// speculatively request cache line
rd_req_o = 1'b1;
// kill -> go back to IDLE
if(req_port_i.kill_req) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end else if(req_port_i.tag_valid | state_q==REPLAY_READ) begin
save_tag = (state_q!=REPLAY_READ);
if(wr_cl_vld_i || !rd_ack_q) begin
state_d = REPLAY_REQ;
// we've got a hit
end else if((|rd_hit_oh_i) && cache_en_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
// we can handle another request
if (rd_ack_i && req_port_i.data_req) begin
state_d = READ;
req_port_o.data_gnt = 1'b1;
end
// we've got a miss
end else begin
state_d = MISS_REQ;
end
end else begin
state_d = MISS_REQ;
end
end
//////////////////////////////////
// issue request
MISS_REQ: begin
miss_req_o = 1'b1;
end
//////////////////////////////////
// issue request
MISS_REQ: begin
miss_req_o = 1'b1;
if(req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if(miss_ack_i) begin
state_d = KILL_MISS;
end else begin
state_d = KILL_MISS_ACK;
end
end else if(miss_replay_i) begin
state_d = REPLAY_REQ;
end else if(miss_ack_i) begin
state_d = MISS_WAIT;
end
end
//////////////////////////////////
// wait until the memory transaction
// returns.
MISS_WAIT: begin
if(req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if(miss_rtrn_vld_i) begin
state_d = IDLE;
end else begin
state_d = KILL_MISS;
end
end else if(miss_rtrn_vld_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end
end
//////////////////////////////////
// replay read request
REPLAY_REQ: begin
rd_req_o = 1'b1;
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
state_d = IDLE;
end else if(rd_ack_i) begin
state_d = REPLAY_READ;
end
end
//////////////////////////////////
KILL_MISS_ACK: begin
miss_req_o = 1'b1;
// in this case the miss handler did not issue
// a transaction and we can safely go to idle
if(miss_replay_i) begin
state_d = IDLE;
end else if(miss_ack_i) begin
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if (miss_ack_i) begin
state_d = KILL_MISS;
end else begin
state_d = KILL_MISS_ACK;
end
end else if (miss_replay_i) begin
state_d = REPLAY_REQ;
end else if (miss_ack_i) begin
state_d = MISS_WAIT;
end
//////////////////////////////////
// killed miss,
// wait until miss unit responds and
// go back to idle
KILL_MISS: begin
end
//////////////////////////////////
// wait until the memory transaction
// returns.
MISS_WAIT: begin
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if (miss_rtrn_vld_i) begin
state_d = IDLE;
end else begin
state_d = KILL_MISS;
end
end else if (miss_rtrn_vld_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end
default: begin
// we should never get here
end
//////////////////////////////////
// replay read request
REPLAY_REQ: begin
rd_req_o = 1'b1;
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
state_d = IDLE;
end else if (rd_ack_i) begin
state_d = REPLAY_READ;
end
end
//////////////////////////////////
KILL_MISS_ACK: begin
miss_req_o = 1'b1;
// in this case the miss handler did not issue
// a transaction and we can safely go to idle
if (miss_replay_i) begin
state_d = IDLE;
end else if (miss_ack_i) begin
state_d = KILL_MISS;
end
end
//////////////////////////////////
// killed miss,
// wait until miss unit responds and
// go back to idle
KILL_MISS: begin
if (miss_rtrn_vld_i) begin
state_d = IDLE;
end
endcase // state_q
end
default: begin
// we should never get here
state_d = IDLE;
end
endcase // state_q
end
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(!rst_ni) begin
state_q <= IDLE;
address_tag_q <= '0;
address_idx_q <= '0;
address_off_q <= '0;
id_q <= '0;
vld_data_q <= '0;
data_size_q <= '0;
rd_req_q <= '0;
rd_ack_q <= '0;
if (!rst_ni) begin
state_q <= IDLE;
address_tag_q <= '0;
address_idx_q <= '0;
address_off_q <= '0;
id_q <= '0;
vld_data_q <= '0;
data_size_q <= '0;
rd_req_q <= '0;
rd_ack_q <= '0;
end else begin
state_q <= state_d;
address_tag_q <= address_tag_d;
address_idx_q <= address_idx_d;
address_off_q <= address_off_d;
id_q <= id_d;
vld_data_q <= vld_data_d;
data_size_q <= data_size_d;
rd_req_q <= rd_req_d;
rd_ack_q <= rd_ack_d;
state_q <= state_d;
address_tag_q <= address_tag_d;
address_idx_q <= address_idx_d;
address_off_q <= address_off_d;
id_q <= id_d;
vld_data_q <= vld_data_d;
data_size_q <= data_size_d;
rd_req_q <= rd_req_d;
rd_ack_q <= rd_ack_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
//pragma translate_off
`ifndef VERILATOR
hot1: assert property (
@(posedge clk_i) disable iff (!rst_ni) (!rd_ack_i) |=> cache_en_i |-> $onehot0(rd_hit_oh_i))
else $fatal(1,"[l1 dcache ctrl] rd_hit_oh_i signal must be hot1");
hot1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) (!rd_ack_i) |=> cache_en_i |-> $onehot0(
rd_hit_oh_i
))
else $fatal(1, "[l1 dcache ctrl] rd_hit_oh_i signal must be hot1");
initial begin
// assert wrong parameterizations
assert (DCACHE_INDEX_WIDTH<=12)
else $fatal(1,"[l1 dcache ctrl] cache index width can be maximum 12bit since VM uses 4kB pages");
assert (DCACHE_INDEX_WIDTH <= 12)
else
$fatal(1, "[l1 dcache ctrl] cache index width can be maximum 12bit since VM uses 4kB pages");
end
`endif
//pragma translate_on
//pragma translate_on
endmodule // wt_dcache_ctrl
endmodule // wt_dcache_ctrl

View file

@ -26,55 +26,57 @@
// low prio ports (rd_prio_i[port_nr] = '1b0)
module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 3
module wt_dcache_mem
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 3
) (
input logic clk_i,
input logic rst_ni,
input logic clk_i,
input logic rst_ni,
// ports
input logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later
input logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i,
input logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i,
input logic [NumPorts-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways
input logic [NumPorts-1:0] rd_tag_only_i, // only do a tag/valid lookup, no access to data arrays
input logic [NumPorts-1:0] rd_prio_i, // 0: low prio, 1: high prio
output logic [NumPorts-1:0] rd_ack_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o,
output riscv::xlen_t rd_data_o,
output logic [DCACHE_USER_WIDTH-1:0] rd_user_o,
// ports
input logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later
input logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i,
input logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i,
input logic [NumPorts-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways
input logic [NumPorts-1:0] rd_tag_only_i, // only do a tag/valid lookup, no access to data arrays
input logic [NumPorts-1:0] rd_prio_i, // 0: low prio, 1: high prio
output logic [NumPorts-1:0] rd_ack_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o,
output riscv::xlen_t rd_data_o,
output logic [DCACHE_USER_WIDTH-1:0] rd_user_o,
// only available on port 0, uses address signals of port 0
input logic wr_cl_vld_i,
input logic wr_cl_nc_i, // noncacheable access
input logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_i, // writes a full cacheline
input logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
input logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i,
input logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_i,
input logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_i,
input logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i,
input logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_i,
// only available on port 0, uses address signals of port 0
input logic wr_cl_vld_i,
input logic wr_cl_nc_i, // noncacheable access
input logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we_i, // writes a full cacheline
input logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i,
input logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
input logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i,
input logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data_i,
input logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_i,
input logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i,
input logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits_i,
// separate port for single word write, no tag access
input logic [DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3]
output logic wr_ack_o,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_i,
input logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_i,
input riscv::xlen_t wr_data_i,
input logic [DCACHE_USER_WIDTH-1:0] wr_user_i,
input logic [(riscv::XLEN/8)-1:0] wr_data_be_i,
// separate port for single word write, no tag access
input logic [DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3]
output logic wr_ack_o,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_i,
input logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_i,
input riscv::xlen_t wr_data_i,
input logic [DCACHE_USER_WIDTH-1:0] wr_user_i,
input logic [(riscv::XLEN/8)-1:0] wr_data_be_i,
// forwarded wbuffer
input wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_i
// forwarded wbuffer
input wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_i
);
// functions
function automatic logic [DCACHE_NUM_BANKS-1:0] dcache_cl_bin2oh (
input logic [DCACHE_NUM_BANKS_WIDTH-1:0] in
);
function automatic logic [DCACHE_NUM_BANKS-1:0] dcache_cl_bin2oh(
input logic [DCACHE_NUM_BANKS_WIDTH-1:0] in);
logic [DCACHE_NUM_BANKS-1:0] out;
out = '0;
out[in] = 1'b1;
@ -83,44 +85,48 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
// number of bits needed to address AXI data. If AxiDataWidth equals XLEN this parameter
// is not needed. Therefore, increment it by one to avoid reverse range select during elaboration.
localparam AXI_OFFSET_WIDTH = CVA6Cfg.AxiDataWidth == riscv::XLEN ? $clog2(CVA6Cfg.AxiDataWidth/8)+1 : $clog2(CVA6Cfg.AxiDataWidth/8);
localparam AXI_OFFSET_WIDTH = CVA6Cfg.AxiDataWidth == riscv::XLEN ? $clog2(
CVA6Cfg.AxiDataWidth / 8
) + 1 : $clog2(
CVA6Cfg.AxiDataWidth / 8
);
logic [DCACHE_NUM_BANKS-1:0] bank_req;
logic [DCACHE_NUM_BANKS-1:0] bank_we;
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][(riscv::XLEN/8)-1:0] bank_be;
logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0] bank_idx;
logic [DCACHE_CL_IDX_WIDTH-1:0] bank_idx_d, bank_idx_q;
logic [DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q;
logic [DCACHE_NUM_BANKS-1:0] bank_req;
logic [DCACHE_NUM_BANKS-1:0] bank_we;
logic [DCACHE_NUM_BANKS-1:0][ DCACHE_SET_ASSOC-1:0][(riscv::XLEN/8)-1:0] bank_be;
logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0] bank_idx;
logic [DCACHE_CL_IDX_WIDTH-1:0] bank_idx_d, bank_idx_q;
logic [DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q;
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_wdata; //
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_rdata; //
logic [DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] rdata_cl; // selected word from each cacheline
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_wuser; //
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_ruser; //
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_wdata; //
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_rdata; //
logic [DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] rdata_cl; // selected word from each cacheline
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_wuser; //
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_ruser; //
logic [DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] ruser_cl; // selected word from each cacheline
logic [DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable
logic [DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
logic [DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable
logic [DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0] tag_rdata; // these are the tags coming from the tagmem
logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
logic [$clog2(NumPorts)-1:0] vld_sel_d, vld_sel_q;
logic [$clog2(NumPorts)-1:0] vld_sel_d, vld_sel_q;
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh;
logic [(riscv::XLEN/8)-1:0] wbuffer_be;
riscv::xlen_t wbuffer_rdata, rdata;
logic [DCACHE_USER_WIDTH-1:0] wbuffer_ruser, ruser;
logic [riscv::PLEN-1:0] wbuffer_cmp_addr;
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh;
logic [ (riscv::XLEN/8)-1:0] wbuffer_be;
riscv::xlen_t wbuffer_rdata, rdata;
logic [DCACHE_USER_WIDTH-1:0] wbuffer_ruser, ruser;
logic [riscv::PLEN-1:0] wbuffer_cmp_addr;
logic cmp_en_d, cmp_en_q;
logic rd_acked;
logic [NumPorts-1:0] bank_collision, rd_req_masked, rd_req_prio;
logic cmp_en_d, cmp_en_q;
logic rd_acked;
logic [NumPorts-1:0] bank_collision, rd_req_masked, rd_req_prio;
///////////////////////////////////////////////////////
// arbiter
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// arbiter
///////////////////////////////////////////////////////
// Priority is highest for lowest read port index
//
@ -130,8 +136,8 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
// [way0, w0] [way1, w0] .. [way0, w1] [way1, w1] ..
// byte enable mapping
for (genvar k=0;k<DCACHE_NUM_BANKS;k++) begin : gen_bank
for (genvar j=0;j<DCACHE_SET_ASSOC;j++) begin : gen_bank_way
for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_bank
for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : gen_bank_way
assign bank_be[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_be_i[k*(riscv::XLEN/8) +: (riscv::XLEN/8)] :
(wr_req_i[j] & wr_ack_o) ? wr_data_be_i :
'0;
@ -142,12 +148,12 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
end
end
assign vld_wdata = wr_vld_bits_i;
assign vld_addr = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign rd_tag = rd_tag_i[vld_sel_q]; //delayed by one cycle
assign bank_off_d = (wr_cl_vld_i) ? wr_cl_off_i : rd_off_i[vld_sel_d];
assign bank_idx_d = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign vld_req = (wr_cl_vld_i) ? wr_cl_we_i : (rd_acked) ? '1 : '0;
assign vld_wdata = wr_vld_bits_i;
assign vld_addr = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign rd_tag = rd_tag_i[vld_sel_q]; //delayed by one cycle
assign bank_off_d = (wr_cl_vld_i) ? wr_cl_off_i : rd_off_i[vld_sel_d];
assign bank_idx_d = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign vld_req = (wr_cl_vld_i) ? wr_cl_we_i : (rd_acked) ? '1 : '0;
// priority masking
@ -157,20 +163,20 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
logic rd_req;
rr_arb_tree #(
.NumIn (NumPorts),
.DataWidth (1)
.NumIn (NumPorts),
.DataWidth(1)
) i_rr_arb_tree (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.flush_i('0 ),
.rr_i ('0 ),
.req_i (rd_req_masked ),
.gnt_o (rd_ack_o ),
.data_i ('0 ),
.gnt_i (~wr_cl_vld_i ),
.req_o (rd_req ),
.data_o ( ),
.idx_o (vld_sel_d )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i('0),
.rr_i ('0),
.req_i (rd_req_masked),
.gnt_o (rd_ack_o),
.data_i ('0),
.gnt_i (~wr_cl_vld_i),
.req_o (rd_req),
.data_o (),
.idx_o (vld_sel_d)
);
assign rd_acked = rd_req & ~wr_cl_vld_i;
@ -180,42 +186,43 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
bank_req = '0;
wr_ack_o = '0;
bank_we = '0;
bank_idx = '{default:wr_idx_i};
bank_idx = '{default: wr_idx_i};
for(int k=0; k<NumPorts; k++) begin
for (int k = 0; k < NumPorts; k++) begin
bank_collision[k] = rd_off_i[k][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES] == wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES];
end
if(wr_cl_vld_i & |wr_cl_we_i) begin
if (wr_cl_vld_i & |wr_cl_we_i) begin
bank_req = '1;
bank_we = '1;
bank_idx = '{default:wr_cl_idx_i};
bank_idx = '{default: wr_cl_idx_i};
end else begin
if(rd_acked) begin
if(!rd_tag_only_i[vld_sel_d]) begin
bank_req = dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
if (rd_acked) begin
if (!rd_tag_only_i[vld_sel_d]) begin
bank_req =
dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]] = rd_idx_i[vld_sel_d];
end
end
if(|wr_req_i) begin
if(rd_tag_only_i[vld_sel_d] || !(rd_ack_o[vld_sel_d] && bank_collision[vld_sel_d])) begin
if (|wr_req_i) begin
if (rd_tag_only_i[vld_sel_d] || !(rd_ack_o[vld_sel_d] && bank_collision[vld_sel_d])) begin
wr_ack_o = 1'b1;
bank_req |= dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
bank_we = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
bank_we = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
end
end
end
end
///////////////////////////////////////////////////////
// tag comparison, hit generatio, readoud muxes
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// tag comparison, hit generatio, readoud muxes
///////////////////////////////////////////////////////
logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_off;
logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_nc_off;
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx;
logic [$clog2(DCACHE_SET_ASSOC)-1:0] rd_hit_idx;
logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_off;
logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_nc_off;
logic [ $clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx;
logic [ $clog2(DCACHE_SET_ASSOC)-1:0] rd_hit_idx;
assign cmp_en_d = (|vld_req) & ~vld_we;
@ -223,32 +230,32 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} :
{rd_tag, bank_idx_q, bank_off_q};
// hit generation
for (genvar i=0;i<DCACHE_SET_ASSOC;i++) begin : gen_tag_cmpsel
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel
// tag comparison of ways >0
assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i] & cmp_en_q;
assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i] & cmp_en_q;
// byte offset mux of ways >0
assign rdata_cl[i] = bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i];
assign ruser_cl[i] = bank_ruser[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i];
end
for(genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin : gen_wbuffer_hit
for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_wbuffer_hit
assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & (wbuffer_data_i[k].wtag == (wbuffer_cmp_addr >> riscv::XLEN_ALIGN_BYTES));
end
lzc #(
.WIDTH ( DCACHE_WBUF_DEPTH )
.WIDTH(DCACHE_WBUF_DEPTH)
) i_lzc_wbuffer_hit (
.in_i ( wbuffer_hit_oh ),
.cnt_o ( wbuffer_hit_idx ),
.empty_o ( )
.in_i (wbuffer_hit_oh),
.cnt_o (wbuffer_hit_idx),
.empty_o()
);
lzc #(
.WIDTH ( DCACHE_SET_ASSOC )
.WIDTH(DCACHE_SET_ASSOC)
) i_lzc_rd_hit (
.in_i ( rd_hit_oh_o ),
.cnt_o ( rd_hit_idx ),
.empty_o ( )
.in_i (rd_hit_oh_o),
.cnt_o (rd_hit_idx),
.empty_o()
);
assign wbuffer_rdata = wbuffer_data_i[wbuffer_hit_idx].data;
@ -256,18 +263,18 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
assign wbuffer_be = (|wbuffer_hit_oh) ? wbuffer_data_i[wbuffer_hit_idx].valid : '0;
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset
// In case of an uncached read, return the desired XLEN-bit segment of the most recent AXI read
assign wr_cl_off = (wr_cl_nc_i) ? (CVA6Cfg.AxiDataWidth == riscv::XLEN) ? '0 :
// In case of an uncached read, return the desired XLEN-bit segment of the most recent AXI read
assign wr_cl_off = (wr_cl_nc_i) ? (CVA6Cfg.AxiDataWidth == riscv::XLEN) ? '0 :
wr_cl_off_i[AXI_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES] :
wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES];
end else begin : gen_piton_offset
assign wr_cl_off = wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3];
end else begin : gen_piton_offset
assign wr_cl_off = wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3];
end
always_comb begin
if (wr_cl_vld_i) begin
rdata = wr_cl_data_i[wr_cl_off*riscv::XLEN +: riscv::XLEN];
ruser = wr_cl_user_i[wr_cl_off*DCACHE_USER_WIDTH +: DCACHE_USER_WIDTH];
rdata = wr_cl_data_i[wr_cl_off*riscv::XLEN+:riscv::XLEN];
ruser = wr_cl_user_i[wr_cl_off*DCACHE_USER_WIDTH+:DCACHE_USER_WIDTH];
end else begin
rdata = rdata_cl[rd_hit_idx];
ruser = ruser_cl[rd_hit_idx];
@ -275,37 +282,37 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
end
// overlay bytes that hit in the write buffer
for(genvar k=0; k<(riscv::XLEN/8); k++) begin : gen_rd_data
assign rd_data_o[8*k +: 8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k +: 8] : rdata[8*k +: 8];
for (genvar k = 0; k < (riscv::XLEN / 8); k++) begin : gen_rd_data
assign rd_data_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k+:8] : rdata[8*k+:8];
end
for(genvar k=0; k<DCACHE_USER_WIDTH/8; k++) begin : gen_rd_user
assign rd_user_o[8*k +: 8] = (wbuffer_be[k]) ? wbuffer_ruser[8*k +: 8] : ruser[8*k +: 8];
for (genvar k = 0; k < DCACHE_USER_WIDTH / 8; k++) begin : gen_rd_user
assign rd_user_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_ruser[8*k+:8] : ruser[8*k+:8];
end
///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////
logic [DCACHE_TAG_WIDTH:0] vld_tag_rdata [DCACHE_SET_ASSOC-1:0];
logic [DCACHE_TAG_WIDTH:0] vld_tag_rdata[DCACHE_SET_ASSOC-1:0];
for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_data_banks
// Data RAM
sram #(
.USER_WIDTH ( ariane_pkg::DCACHE_SET_ASSOC * DATA_USER_WIDTH ),
.DATA_WIDTH ( ariane_pkg::DCACHE_SET_ASSOC * riscv::XLEN ),
.USER_EN ( ariane_pkg::DATA_USER_EN ),
.NUM_WORDS ( wt_cache_pkg::DCACHE_NUM_WORDS )
.USER_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * DATA_USER_WIDTH),
.DATA_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * riscv::XLEN),
.USER_EN (ariane_pkg::DATA_USER_EN),
.NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS)
) i_data_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( bank_req [k] ),
.we_i ( bank_we [k] ),
.addr_i ( bank_idx [k] ),
.wuser_i ( bank_wuser [k] ),
.wdata_i ( bank_wdata [k] ),
.be_i ( bank_be [k] ),
.ruser_o ( bank_ruser [k] ),
.rdata_o ( bank_rdata [k] )
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (bank_req[k]),
.we_i (bank_we[k]),
.addr_i (bank_idx[k]),
.wuser_i(bank_wuser[k]),
.wdata_i(bank_wdata[k]),
.be_i (bank_be[k]),
.ruser_o(bank_ruser[k]),
.rdata_o(bank_rdata[k])
);
end
@ -316,25 +323,25 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
// Tag RAM
sram #(
// tag + valid bit
.DATA_WIDTH ( ariane_pkg::DCACHE_TAG_WIDTH + 1 ),
.NUM_WORDS ( wt_cache_pkg::DCACHE_NUM_WORDS )
// tag + valid bit
.DATA_WIDTH(ariane_pkg::DCACHE_TAG_WIDTH + 1),
.NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS)
) i_tag_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( vld_req[i] ),
.we_i ( vld_we ),
.addr_i ( vld_addr ),
.wuser_i ( '0 ),
.wdata_i ( {vld_wdata[i], wr_cl_tag_i} ),
.be_i ( '1 ),
.ruser_o ( ),
.rdata_o ( vld_tag_rdata[i] )
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (vld_req[i]),
.we_i (vld_we),
.addr_i (vld_addr),
.wuser_i('0),
.wdata_i({vld_wdata[i], wr_cl_tag_i}),
.be_i ('1),
.ruser_o(),
.rdata_o(vld_tag_rdata[i])
);
end
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(!rst_ni) begin
if (!rst_ni) begin
bank_idx_q <= '0;
bank_off_q <= '0;
vld_sel_q <= '0;
@ -342,56 +349,63 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
end else begin
bank_idx_q <= bank_idx_d;
bank_off_q <= bank_off_d;
vld_sel_q <= vld_sel_d ;
vld_sel_q <= vld_sel_d;
cmp_en_q <= cmp_en_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
//pragma translate_off
`ifndef VERILATOR
initial begin
cach_line_width_axi: assert (DCACHE_LINE_WIDTH >= CVA6Cfg.AxiDataWidth)
else $fatal(1, "[l1 dcache] cache line size needs to be greater or equal AXI data width");
cach_line_width_axi :
assert (DCACHE_LINE_WIDTH >= CVA6Cfg.AxiDataWidth)
else $fatal(1, "[l1 dcache] cache line size needs to be greater or equal AXI data width");
end
initial begin
axi_xlen: assert (CVA6Cfg.AxiDataWidth >= riscv::XLEN)
else $fatal(1, "[l1 dcache] AXI data width needs to be greater or equal XLEN");
axi_xlen :
assert (CVA6Cfg.AxiDataWidth >= riscv::XLEN)
else $fatal(1, "[l1 dcache] AXI data width needs to be greater or equal XLEN");
end
initial begin
cach_line_width_xlen: assert (DCACHE_LINE_WIDTH > riscv::XLEN)
else $fatal(1, "[l1 dcache] cache_line_size needs to be greater than XLEN");
cach_line_width_xlen :
assert (DCACHE_LINE_WIDTH > riscv::XLEN)
else $fatal(1, "[l1 dcache] cache_line_size needs to be greater than XLEN");
end
hit_hot1: assert property (
@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(rd_hit_oh_o))
else $fatal(1,"[l1 dcache] rd_hit_oh_o signal must be hot1");
hit_hot1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(
rd_hit_oh_o
))
else $fatal(1, "[l1 dcache] rd_hit_oh_o signal must be hot1");
word_write_hot1: assert property (
@(posedge clk_i) disable iff (!rst_ni) wr_ack_o |-> $onehot0(wr_req_i))
else $fatal(1,"[l1 dcache] wr_req_i signal must be hot1");
word_write_hot1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) wr_ack_o |-> $onehot0(wr_req_i))
else $fatal(1, "[l1 dcache] wr_req_i signal must be hot1");
wbuffer_hit_hot1: assert property (
@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(wbuffer_hit_oh))
else $fatal(1,"[l1 dcache] wbuffer_hit_oh signal must be hot1");
wbuffer_hit_hot1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(
wbuffer_hit_oh
))
else $fatal(1, "[l1 dcache] wbuffer_hit_oh signal must be hot1");
// this is only used for verification!
logic vld_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
logic vld_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
if(!rst_ni) begin
vld_mirror <= '{default:'0};
tag_mirror <= '{default:'0};
if (!rst_ni) begin
vld_mirror <= '{default: '0};
tag_mirror <= '{default: '0};
end else begin
for (int i = 0; i < DCACHE_SET_ASSOC; i++) begin
if(vld_req[i] & vld_we) begin
if (vld_req[i] & vld_we) begin
vld_mirror[vld_addr][i] <= vld_wdata[i];
tag_mirror[vld_addr][i] <= wr_cl_tag_i;
end
@ -400,14 +414,15 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
end
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_dubl_test
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata);
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata);
end
tag_write_duplicate: assert property (
tag_write_duplicate :
assert property (
@(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test))
else $fatal(1,"[l1 dcache] cannot allocate a CL that is already present in the cache");
else $fatal(1, "[l1 dcache] cannot allocate a CL that is already present in the cache");
`endif
//pragma translate_on
//pragma translate_on
endmodule // wt_dcache_mem
endmodule // wt_dcache_mem

View file

@ -14,66 +14,68 @@
// is that the port with the highest index issues writes instead of reads.
module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter logic [CACHE_ID_WIDTH-1:0] AmoTxId = 1, // TX id to be used for AMOs
parameter int unsigned NumPorts = 4 // number of miss ports
module wt_dcache_missunit
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter logic [CACHE_ID_WIDTH-1:0] AmoTxId = 1, // TX id to be used for AMOs
parameter int unsigned NumPorts = 4 // number of miss ports
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// cache management, signals from/to core
input logic enable_i, // from CSR
input logic flush_i, // flush request, this waits for pending tx (write, read) to finish and will clear the cache
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
// local cache management signals
input logic wbuffer_empty_i,
output logic cache_en_o, // local cache enable signal
// AMO interface
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// miss handling interface (ld, ptw, wbuffer)
input logic [NumPorts-1:0] miss_req_i,
output logic [NumPorts-1:0] miss_ack_o,
input logic [NumPorts-1:0] miss_nc_i,
input logic [NumPorts-1:0] miss_we_i,
input logic [NumPorts-1:0][riscv::XLEN-1:0] miss_wdata_i,
input logic [NumPorts-1:0][DCACHE_USER_WIDTH-1:0] miss_wuser_i,
input logic [NumPorts-1:0][riscv::PLEN-1:0] miss_paddr_i,
input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i,
input logic [NumPorts-1:0][2:0] miss_size_i,
input logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0] miss_id_i, // used as transaction ID
// signals that the request collided with a pending read
output logic [NumPorts-1:0] miss_replay_o,
// signals response from memory
output logic [NumPorts-1:0] miss_rtrn_vld_o,
output logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads
// from writebuffer
input logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_i, // used to check for address collisions with read operations
input logic [DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations
// write interface to cache memory
output logic wr_cl_vld_o, // writes a full cacheline
output logic wr_cl_nc_o, // writes a full cacheline
output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline
output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o,
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o,
output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o,
output logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_o,
output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o,
output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o,
// memory interface
input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output dcache_req_t mem_data_o
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// cache management, signals from/to core
input logic enable_i, // from CSR
input logic flush_i, // flush request, this waits for pending tx (write, read) to finish and will clear the cache
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
// local cache management signals
input logic wbuffer_empty_i,
output logic cache_en_o, // local cache enable signal
// AMO interface
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// miss handling interface (ld, ptw, wbuffer)
input logic [NumPorts-1:0] miss_req_i,
output logic [NumPorts-1:0] miss_ack_o,
input logic [NumPorts-1:0] miss_nc_i,
input logic [NumPorts-1:0] miss_we_i,
input logic [NumPorts-1:0][riscv::XLEN-1:0] miss_wdata_i,
input logic [NumPorts-1:0][DCACHE_USER_WIDTH-1:0] miss_wuser_i,
input logic [NumPorts-1:0][riscv::PLEN-1:0] miss_paddr_i,
input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i,
input logic [NumPorts-1:0][2:0] miss_size_i,
input logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0] miss_id_i, // used as transaction ID
// signals that the request collided with a pending read
output logic [NumPorts-1:0] miss_replay_o,
// signals response from memory
output logic [NumPorts-1:0] miss_rtrn_vld_o,
output logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads
// from writebuffer
input logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_i, // used to check for address collisions with read operations
input logic [DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations
// write interface to cache memory
output logic wr_cl_vld_o, // writes a full cacheline
output logic wr_cl_nc_o, // writes a full cacheline
output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline
output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o,
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o,
output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o,
output logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_o,
output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o,
output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o,
// memory interface
input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output dcache_req_t mem_data_o
);
// functions
function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh (
input logic [L1D_WAY_WIDTH-1:0] in
);
function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh(
input logic [L1D_WAY_WIDTH-1:0] in);
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] out;
out = '0;
out[in] = 1'b1;
@ -86,35 +88,41 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// 010: word
// 011: dword
// 111: DCACHE line
function automatic logic [riscv::PLEN-1:0] paddrSizeAlign(
input logic [riscv::PLEN-1:0] paddr,
input logic [2:0] size
);
function automatic logic [riscv::PLEN-1:0] paddrSizeAlign(input logic [riscv::PLEN-1:0] paddr,
input logic [2:0] size);
logic [riscv::PLEN-1:0] out;
out = paddr;
unique case (size)
3'b001: out[0:0] = '0;
3'b010: out[1:0] = '0;
3'b011: out[2:0] = '0;
3'b111: out[DCACHE_OFFSET_WIDTH-1:0] = '0;
3'b001: out[0:0] = '0;
3'b010: out[1:0] = '0;
3'b011: out[2:0] = '0;
3'b111: out[DCACHE_OFFSET_WIDTH-1:0] = '0;
default: ;
endcase
return out;
endfunction : paddrSizeAlign
// controller FSM
typedef enum logic[2:0] {IDLE, DRAIN, AMO, FLUSH, STORE_WAIT, LOAD_WAIT, AMO_WAIT} state_e;
typedef enum logic [2:0] {
IDLE,
DRAIN,
AMO,
FLUSH,
STORE_WAIT,
LOAD_WAIT,
AMO_WAIT
} state_e;
state_e state_d, state_q;
// MSHR for reads
typedef struct packed {
logic [riscv::PLEN-1:0] paddr ;
logic [2:0] size ;
logic [riscv::PLEN-1:0] paddr;
logic [2:0] size;
logic [DCACHE_SET_ASSOC-1:0] vld_bits;
logic [CACHE_ID_WIDTH-1:0] id ;
logic nc ;
logic [CACHE_ID_WIDTH-1:0] id;
logic nc;
logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way;
logic [$clog2(NumPorts)-1:0] miss_port_idx;
logic [$clog2(NumPorts)-1:0] miss_port_idx;
} mshr_t;
mshr_t mshr_d, mshr_q;
@ -131,7 +139,7 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
logic amo_req_d, amo_req_q;
logic [63:0] amo_rtrn_mux;
riscv::xlen_t amo_data;
logic [63:0] amo_user; //DCACHE USER ? DATA_USER_WIDTH
logic [63:0] amo_user; //DCACHE USER ? DATA_USER_WIDTH
logic [riscv::PLEN-1:0] tmp_paddr;
logic [$clog2(NumPorts)-1:0] miss_port_idx;
logic [DCACHE_CL_IDX_WIDTH-1:0] cnt_d, cnt_q;
@ -144,25 +152,25 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
logic [NumPorts-1:0] mshr_rdrd_collision;
logic tx_rdwr_collision, mshr_rdwr_collision;
///////////////////////////////////////////////////////
// input arbitration and general control sigs
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// input arbitration and general control sigs
///////////////////////////////////////////////////////
assign cache_en_o = enable_q;
assign cnt_d = (flush_en) ? cnt_q + 1 : '0;
assign flush_done = (cnt_q == wt_cache_pkg::DCACHE_NUM_WORDS-1);
assign cache_en_o = enable_q;
assign cnt_d = (flush_en) ? cnt_q + 1 : '0;
assign flush_done = (cnt_q == wt_cache_pkg::DCACHE_NUM_WORDS - 1);
assign miss_req_masked_d = (lock_reqs) ? miss_req_masked_q :
(mask_reads) ? miss_we_i & miss_req_i : miss_req_i;
assign miss_is_write = miss_we_i[miss_port_idx];
assign miss_is_write = miss_we_i[miss_port_idx];
// read port arbiter
lzc #(
.WIDTH ( NumPorts )
.WIDTH(NumPorts)
) i_lzc_reqs (
.in_i ( miss_req_masked_d ),
.cnt_o ( miss_port_idx ),
.empty_o ( )
.in_i (miss_req_masked_d),
.cnt_o (miss_port_idx),
.empty_o()
);
always_comb begin : p_ack
@ -172,49 +180,47 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
end
end
///////////////////////////////////////////////////////
// MSHR and way replacement logic (only for read ops)
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// MSHR and way replacement logic (only for read ops)
///////////////////////////////////////////////////////
// find invalid cache line
lzc #(
.WIDTH ( ariane_pkg::DCACHE_SET_ASSOC )
.WIDTH(ariane_pkg::DCACHE_SET_ASSOC)
) i_lzc_inv (
.in_i ( ~miss_vld_bits_i[miss_port_idx] ),
.cnt_o ( inv_way ),
.empty_o ( all_ways_valid )
.in_i (~miss_vld_bits_i[miss_port_idx]),
.cnt_o (inv_way),
.empty_o(all_ways_valid)
);
// generate random cacheline index
lfsr #(
.LfsrWidth ( 8 ),
.OutWidth ( $clog2(ariane_pkg::DCACHE_SET_ASSOC))
.LfsrWidth(8),
.OutWidth ($clog2(ariane_pkg::DCACHE_SET_ASSOC))
) i_lfsr_inv (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( update_lfsr ),
.out_o ( rnd_way )
.clk_i (clk_i),
.rst_ni(rst_ni),
.en_i (update_lfsr),
.out_o (rnd_way)
);
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign mshr_d.size = (mshr_allocate) ? miss_size_i [miss_port_idx] : mshr_q.size;
assign mshr_d.paddr = (mshr_allocate) ? miss_paddr_i [miss_port_idx] : mshr_q.paddr;
assign mshr_d.vld_bits = (mshr_allocate) ? miss_vld_bits_i[miss_port_idx] : mshr_q.vld_bits;
assign mshr_d.id = (mshr_allocate) ? miss_id_i [miss_port_idx] : mshr_q.id;
assign mshr_d.nc = (mshr_allocate) ? miss_nc_i [miss_port_idx] : mshr_q.nc;
assign mshr_d.repl_way = (mshr_allocate) ? repl_way : mshr_q.repl_way;
assign mshr_d.miss_port_idx = (mshr_allocate) ? miss_port_idx : mshr_q.miss_port_idx;
assign mshr_d.size = (mshr_allocate) ? miss_size_i[miss_port_idx] : mshr_q.size;
assign mshr_d.paddr = (mshr_allocate) ? miss_paddr_i[miss_port_idx] : mshr_q.paddr;
assign mshr_d.vld_bits = (mshr_allocate) ? miss_vld_bits_i[miss_port_idx] : mshr_q.vld_bits;
assign mshr_d.id = (mshr_allocate) ? miss_id_i[miss_port_idx] : mshr_q.id;
assign mshr_d.nc = (mshr_allocate) ? miss_nc_i[miss_port_idx] : mshr_q.nc;
assign mshr_d.repl_way = (mshr_allocate) ? repl_way : mshr_q.repl_way;
assign mshr_d.miss_port_idx = (mshr_allocate) ? miss_port_idx : mshr_q.miss_port_idx;
// currently we only have one outstanding read TX, hence an incoming load clears the MSHR
assign mshr_vld_d = (mshr_allocate) ? 1'b1 :
(load_ack) ? 1'b0 :
mshr_vld_q;
assign mshr_vld_d = (mshr_allocate) ? 1'b1 : (load_ack) ? 1'b0 : mshr_vld_q;
assign miss_o = (mshr_allocate) ? ~miss_nc_i[miss_port_idx] : 1'b0;
assign miss_o = (mshr_allocate) ? ~miss_nc_i[miss_port_idx] : 1'b0;
for(genvar k=0; k<NumPorts; k++) begin : gen_rdrd_collision
for (genvar k = 0; k < NumPorts; k++) begin : gen_rdrd_collision
assign mshr_rdrd_collision[k] = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1);
assign mshr_rdrd_collision_d[k] = (!miss_req_i[k]) ? 1'b0 : mshr_rdrd_collision_q[k] | mshr_rdrd_collision[k];
end
@ -226,25 +232,25 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// read collides with inflight TX
always_comb begin : p_tx_coll
tx_rdwr_collision = 1'b0;
for(int k=0; k<DCACHE_MAX_TX; k++) begin
for (int k = 0; k < DCACHE_MAX_TX; k++) begin
tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && tx_vld_i[k];
end
end
///////////////////////////////////////////////////////
// to memory
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// to memory
///////////////////////////////////////////////////////
// if size = 32bit word, select appropriate offset, replicate for openpiton...
always_comb begin
if (riscv::IS_XLEN64) begin
if (amo_req_i.size==2'b10) begin
amo_data = {amo_req_i.operand_b[0 +: 32], amo_req_i.operand_b[0 +: 32]};
if (amo_req_i.size == 2'b10) begin
amo_data = {amo_req_i.operand_b[0+:32], amo_req_i.operand_b[0+:32]};
end else begin
amo_data = amo_req_i.operand_b;
end
end else begin
amo_data = amo_req_i.operand_b[0 +: 32];
amo_data = amo_req_i.operand_b[0+:32];
end
if (ariane_pkg::DATA_USER_EN) begin
amo_user = amo_data;
@ -256,12 +262,14 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// note: openpiton returns a full cacheline!
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_rtrn_mux
if (CVA6Cfg.AxiDataWidth > 64) begin
assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[$clog2(CVA6Cfg.AxiDataWidth/8)-1:3]*64 +: 64];
assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[$clog2(
CVA6Cfg.AxiDataWidth/8
)-1:3]*64+:64];
end else begin
assign amo_rtrn_mux = mem_rtrn_i.data[0 +: 64];
assign amo_rtrn_mux = mem_rtrn_i.data[0+:64];
end
end else begin : gen_piton_rtrn_mux
assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64 +: 64];
assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64+:64];
end
// always sign extend 32bit values
@ -273,41 +281,41 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
assign amo_req_d = amo_req_i.req;
// outgoing memory requests (AMOs are always uncached)
assign mem_data_o.tid = (amo_sel) ? AmoTxId : miss_id_i[miss_port_idx];
assign mem_data_o.nc = (amo_sel) ? 1'b1 : miss_nc_i[miss_port_idx];
assign mem_data_o.way = (amo_sel) ? '0 : repl_way;
assign mem_data_o.data = (amo_sel) ? amo_data : miss_wdata_i[miss_port_idx];
assign mem_data_o.user = (amo_sel) ? amo_user : miss_wuser_i[miss_port_idx];
assign mem_data_o.size = (amo_sel) ? amo_req_i.size : miss_size_i [miss_port_idx];
assign mem_data_o.amo_op = (amo_sel) ? amo_req_i.amo_op : AMO_NONE;
assign mem_data_o.tid = (amo_sel) ? AmoTxId : miss_id_i[miss_port_idx];
assign mem_data_o.nc = (amo_sel) ? 1'b1 : miss_nc_i[miss_port_idx];
assign mem_data_o.way = (amo_sel) ? '0 : repl_way;
assign mem_data_o.data = (amo_sel) ? amo_data : miss_wdata_i[miss_port_idx];
assign mem_data_o.user = (amo_sel) ? amo_user : miss_wuser_i[miss_port_idx];
assign mem_data_o.size = (amo_sel) ? amo_req_i.size : miss_size_i[miss_port_idx];
assign mem_data_o.amo_op = (amo_sel) ? amo_req_i.amo_op : AMO_NONE;
assign tmp_paddr = (amo_sel) ? amo_req_i.operand_a[riscv::PLEN-1:0] : miss_paddr_i[miss_port_idx];
assign mem_data_o.paddr = paddrSizeAlign(tmp_paddr, mem_data_o.size);
assign tmp_paddr = (amo_sel) ? amo_req_i.operand_a[riscv::PLEN-1:0] : miss_paddr_i[miss_port_idx];
assign mem_data_o.paddr = paddrSizeAlign(tmp_paddr, mem_data_o.size);
///////////////////////////////////////////////////////
// back-off mechanism for LR/SC completion guarantee
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// back-off mechanism for LR/SC completion guarantee
///////////////////////////////////////////////////////
logic sc_fail, sc_pass, sc_backoff_over;
exp_backoff #(
.Seed(3),
.MaxExp(16)
.Seed (3),
.MaxExp(16)
) i_exp_backoff (
.clk_i,
.rst_ni,
.set_i ( sc_fail ),
.clr_i ( sc_pass ),
.is_zero_o ( sc_backoff_over )
.clk_i,
.rst_ni,
.set_i (sc_fail),
.clr_i (sc_pass),
.is_zero_o(sc_backoff_over)
);
///////////////////////////////////////////////////////
// responses from memory
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// responses from memory
///////////////////////////////////////////////////////
// keep track of pending stores
logic store_sent;
logic [$clog2(wt_cache_pkg::DCACHE_MAX_TX + 1)-1:0] stores_inflight_d, stores_inflight_q;
assign store_sent = mem_data_req_o & mem_data_ack_i & (mem_data_o.rtype == DCACHE_STORE_REQ);
assign store_sent = mem_data_req_o & mem_data_ack_i & (mem_data_o.rtype == DCACHE_STORE_REQ);
assign stores_inflight_d = (store_ack && store_sent) ? stores_inflight_q :
(store_ack) ? stores_inflight_q - 1 :
@ -323,7 +331,7 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
inv_vld_all = 1'b0;
sc_fail = 1'b0;
sc_pass = 1'b0;
miss_rtrn_vld_o ='0;
miss_rtrn_vld_o = '0;
if (mem_rtrn_vld_i) begin
unique case (mem_rtrn_i.rtype)
DCACHE_LOAD_ACK: begin
@ -333,7 +341,7 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
end
end
DCACHE_STORE_ACK: begin
if (stores_inflight_q>0) begin
if (stores_inflight_q > 0) begin
store_ack = 1'b1;
miss_rtrn_vld_o[NumPorts-1] = 1'b1;
end
@ -344,7 +352,7 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// need to set SC backoff counter if
// this op failed
if (amo_req_i.amo_op == AMO_SC) begin
if (amo_resp_o.result>0) begin
if (amo_resp_o.result > 0) begin
sc_fail = 1'b1;
end else begin
sc_pass = 1'b1;
@ -359,50 +367,49 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// TODO:
// DCACHE_INT_REQ: begin
// end
default : begin
default: begin
end
endcase
end
end
// to write buffer
assign miss_rtrn_id_o = mem_rtrn_i.tid;
assign miss_rtrn_id_o = mem_rtrn_i.tid;
///////////////////////////////////////////////////////
// writes to cache memory
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// writes to cache memory
///////////////////////////////////////////////////////
// cacheline write port
assign wr_cl_nc_o = mshr_q.nc;
assign wr_cl_vld_o = load_ack | (| wr_cl_we_o);
assign wr_cl_nc_o = mshr_q.nc;
assign wr_cl_vld_o = load_ack | (|wr_cl_we_o);
assign wr_cl_we_o = (flush_en ) ? '1 :
(inv_vld_all) ? '1 :
(inv_vld ) ? dcache_way_bin2oh(mem_rtrn_i.inv.way) :
(cl_write_en) ? dcache_way_bin2oh(mshr_q.repl_way) :
'0;
assign wr_cl_we_o = (flush_en) ? '1 : (inv_vld_all) ? '1 : (inv_vld) ? dcache_way_bin2oh(
mem_rtrn_i.inv.way
) : (cl_write_en) ? dcache_way_bin2oh(
mshr_q.repl_way
) : '0;
assign wr_vld_bits_o = (flush_en ) ? '0 :
(inv_vld ) ? '0 :
(cl_write_en) ? dcache_way_bin2oh(mshr_q.repl_way) :
'0;
assign wr_vld_bits_o = (flush_en) ? '0 : (inv_vld) ? '0 : (cl_write_en) ? dcache_way_bin2oh(
mshr_q.repl_way
) : '0;
assign wr_cl_idx_o = (flush_en) ? cnt_q :
(inv_vld) ? mem_rtrn_i.inv.idx[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] :
mshr_q.paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_cl_data_o = mem_rtrn_i.data;
assign wr_cl_user_o = mem_rtrn_i.user;
assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_cl_data_o = mem_rtrn_i.data;
assign wr_cl_user_o = mem_rtrn_i.user;
assign wr_cl_data_be_o = (cl_write_en) ? '1 : '0;// we only write complete cachelines into the memory
// only non-NC responses write to the cache
assign cl_write_en = load_ack & ~mshr_q.nc;
assign cl_write_en = load_ack & ~mshr_q.nc;
///////////////////////////////////////////////////////
// main control logic for generating tx
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// main control logic for generating tx
///////////////////////////////////////////////////////
always_comb begin : p_fsm
// default assignment
@ -434,39 +441,39 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
flush_ack_d = flush_i;
state_d = FLUSH;
end else begin
state_d = DRAIN;
state_d = DRAIN;
end
end else if (amo_req_i.req) begin
if (wbuffer_empty_i && !mshr_vld_q) begin
state_d = AMO;
state_d = AMO;
end else begin
state_d = DRAIN;
state_d = DRAIN;
end
// we've got a miss to handle
// we've got a miss to handle
end else if (|miss_req_masked_d) begin
// this is a write miss, just pass through (but check whether write collides with MSHR)
if (miss_is_write) begin
// stall in case this write collides with the MSHR address
if (!mshr_rdwr_collision) begin
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
if (!mem_data_ack_i) begin
state_d = STORE_WAIT;
end
end
// this is a read miss, can only allocate 1 MSHR
// in case of a load_ack we can accept a new miss, since the MSHR is being cleared
// this is a read miss, can only allocate 1 MSHR
// in case of a load_ack we can accept a new miss, since the MSHR is being cleared
end else if (!mshr_vld_q || load_ack) begin
// replay the read request in case the address has collided with MSHR during the time the request was pending
// i.e., the cache state may have been updated in the mean time due to a refill at the same CL address
if (mshr_rdrd_collision_d[miss_port_idx]) begin
miss_replay_o[miss_port_idx] = 1'b1;
// stall in case this CL address overlaps with a write TX that is in flight
// stall in case this CL address overlaps with a write TX that is in flight
end else if (!tx_rdwr_collision) begin
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_LOAD_REQ;
update_lfsr = all_ways_valid & mem_data_ack_i;// need to evict a random way
mshr_allocate = mem_data_ack_i;
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_LOAD_REQ;
update_lfsr = all_ways_valid & mem_data_ack_i; // need to evict a random way
mshr_allocate = mem_data_ack_i;
if (!mem_data_ack_i) begin
state_d = LOAD_WAIT;
end
@ -477,9 +484,9 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
//////////////////////////////////
// wait until this request is acked
STORE_WAIT: begin
lock_reqs = 1'b1;
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
lock_reqs = 1'b1;
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
if (mem_data_ack_i) begin
state_d = IDLE;
end
@ -487,11 +494,11 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
//////////////////////////////////
// wait until this request is acked
LOAD_WAIT: begin
lock_reqs = 1'b1;
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_LOAD_REQ;
lock_reqs = 1'b1;
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_LOAD_REQ;
if (mem_data_ack_i) begin
update_lfsr = all_ways_valid;// need to evict a random way
update_lfsr = all_ways_valid; // need to evict a random way
mshr_allocate = 1'b1;
state_d = IDLE;
end
@ -503,8 +510,8 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
mask_reads = 1'b1;
// these are writes, check whether they collide with MSHR
if (|miss_req_masked_d && !mshr_rdwr_collision) begin
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
end
if (wbuffer_empty_i && !mshr_vld_q) begin
@ -515,7 +522,7 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// flush the cache
FLUSH: begin
// internal flush signal
flush_en = 1'b1;
flush_en = 1'b1;
if (flush_done) begin
state_d = IDLE;
flush_ack_o = flush_ack_q;
@ -530,7 +537,7 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
amo_sel = 1'b1;
// if this is an LR, we need to consult the backoff counter
if ((amo_req_i.amo_op != AMO_LR) || sc_backoff_over) begin
mem_data_req_o = 1'b1;
mem_data_req_o = 1'b1;
if (mem_data_ack_i) begin
state_d = AMO_WAIT;
end
@ -550,66 +557,72 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// we should never get here
state_d = IDLE;
end
endcase // state_q
endcase // state_q
end
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (!rst_ni) begin
state_q <= FLUSH;
cnt_q <= '0;
enable_q <= '0;
flush_ack_q <= '0;
mshr_vld_q <= '0;
mshr_vld_q1 <= '0;
mshr_q <= '0;
mshr_rdrd_collision_q <= '0;
miss_req_masked_q <= '0;
amo_req_q <= '0;
stores_inflight_q <= '0;
end else begin
state_q <= state_d;
cnt_q <= cnt_d;
enable_q <= enable_d;
flush_ack_q <= flush_ack_d;
mshr_vld_q <= mshr_vld_d;
mshr_vld_q1 <= mshr_vld_q;
mshr_q <= mshr_d;
mshr_rdrd_collision_q <= mshr_rdrd_collision_d;
miss_req_masked_q <= miss_req_masked_d;
amo_req_q <= amo_req_d;
stores_inflight_q <= stores_inflight_d;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (!rst_ni) begin
state_q <= FLUSH;
cnt_q <= '0;
enable_q <= '0;
flush_ack_q <= '0;
mshr_vld_q <= '0;
mshr_vld_q1 <= '0;
mshr_q <= '0;
mshr_rdrd_collision_q <= '0;
miss_req_masked_q <= '0;
amo_req_q <= '0;
stores_inflight_q <= '0;
end else begin
state_q <= state_d;
cnt_q <= cnt_d;
enable_q <= enable_d;
flush_ack_q <= flush_ack_d;
mshr_vld_q <= mshr_vld_d;
mshr_vld_q1 <= mshr_vld_q;
mshr_q <= mshr_d;
mshr_rdrd_collision_q <= mshr_rdrd_collision_d;
miss_req_masked_q <= miss_req_masked_d;
amo_req_q <= amo_req_d;
stores_inflight_q <= stores_inflight_d;
end
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
//pragma translate_off
`ifndef VERILATOR
read_tid : assert property (
read_tid :
assert property (
@(posedge clk_i) disable iff (!rst_ni) mshr_vld_q |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.tid == mshr_q.id)
else $fatal(1,"[l1 dcache missunit] TID of load response doesn't match");
else $fatal(1, "[l1 dcache missunit] TID of load response doesn't match");
read_ports : assert property (
read_ports :
assert property (
@(posedge clk_i) disable iff (!rst_ni) |miss_req_i[NumPorts-2:0] |-> miss_we_i[NumPorts-2:0] == 0)
else $fatal(1,"[l1 dcache missunit] only last port can issue write requests");
else $fatal(1, "[l1 dcache missunit] only last port can issue write requests");
write_port : assert property (
write_port :
assert property (
@(posedge clk_i) disable iff (!rst_ni) miss_req_i[NumPorts-1] |-> miss_we_i[NumPorts-1])
else $fatal(1,"[l1 dcache missunit] last port can only issue write requests");
else $fatal(1, "[l1 dcache missunit] last port can only issue write requests");
initial begin
initial begin
// assert wrong parameterizations
assert (NumPorts>=2)
else $fatal(1,"[l1 dcache missunit] at least two ports are required (one read port, one write port)");
end
assert (NumPorts >= 2)
else
$fatal(
1, "[l1 dcache missunit] at least two ports are required (one read port, one write port)"
);
end
`endif
//pragma translate_on
//pragma translate_on
endmodule // wt_dcache_missunit
endmodule // wt_dcache_missunit

View file

@ -49,69 +49,73 @@
// word has been evicted from the write buffer.
module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
module wt_dcache_wbuffer
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic cache_en_i, // writes are treated as NC if disabled
output logic empty_o, // asserted if no data is present in write buffer
output logic not_ni_o, // asserted if no ni data is present in write buffer
// core request ports
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// interface to miss handler
input logic miss_ack_i,
output logic [riscv::PLEN-1:0] miss_paddr_o,
output logic miss_req_o,
output logic miss_we_o, // always 1 here
output riscv::xlen_t miss_wdata_o,
output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o,
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // unused here (set to 0)
output logic miss_nc_o, // request to I/O space
output logic [2:0] miss_size_o, //
output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1)
// write responses from memory
input logic miss_rtrn_vld_i,
input logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_i, // transaction ID to clear
// cache read interface
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
output logic rd_req_o, // read the word at offset off_i[:3] in all ways
output logic rd_tag_only_o, // set to 1 here as we do not have to read the data arrays
input logic rd_ack_i,
input riscv::xlen_t rd_data_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i,
// cacheline writes
input logic wr_cl_vld_i,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
// cache word write interface
output logic [DCACHE_SET_ASSOC-1:0] wr_req_o,
input logic wr_ack_i,
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o,
output riscv::xlen_t wr_data_o,
output logic [(riscv::XLEN/8)-1:0] wr_data_be_o,
output logic [DCACHE_USER_WIDTH-1:0] wr_user_o,
// to forwarding logic and miss unit
output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o,
output logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_o, // used to check for address collisions with read operations
output logic [DCACHE_MAX_TX-1:0] tx_vld_o
input logic cache_en_i, // writes are treated as NC if disabled
output logic empty_o, // asserted if no data is present in write buffer
output logic not_ni_o, // asserted if no ni data is present in write buffer
// core request ports
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// interface to miss handler
input logic miss_ack_i,
output logic [riscv::PLEN-1:0] miss_paddr_o,
output logic miss_req_o,
output logic miss_we_o, // always 1 here
output riscv::xlen_t miss_wdata_o,
output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o,
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // unused here (set to 0)
output logic miss_nc_o, // request to I/O space
output logic [2:0] miss_size_o, //
output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1)
// write responses from memory
input logic miss_rtrn_vld_i,
input logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_i, // transaction ID to clear
// cache read interface
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
output logic rd_req_o, // read the word at offset off_i[:3] in all ways
output logic rd_tag_only_o, // set to 1 here as we do not have to read the data arrays
input logic rd_ack_i,
input riscv::xlen_t rd_data_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i,
// cacheline writes
input logic wr_cl_vld_i,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
// cache word write interface
output logic [DCACHE_SET_ASSOC-1:0] wr_req_o,
input logic wr_ack_i,
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o,
output riscv::xlen_t wr_data_o,
output logic [(riscv::XLEN/8)-1:0] wr_data_be_o,
output logic [DCACHE_USER_WIDTH-1:0] wr_user_o,
// to forwarding logic and miss unit
output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o,
output logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_o, // used to check for address collisions with read operations
output logic [DCACHE_MAX_TX-1:0] tx_vld_o
);
tx_stat_t [DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q;
wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_d, wbuffer_q;
logic [DCACHE_WBUF_DEPTH-1:0] valid;
logic [DCACHE_WBUF_DEPTH-1:0] dirty;
logic [DCACHE_WBUF_DEPTH-1:0] tocheck;
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh, inval_hit;
tx_stat_t [DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q;
wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_d, wbuffer_q;
logic [DCACHE_WBUF_DEPTH-1:0] valid;
logic [DCACHE_WBUF_DEPTH-1:0] dirty;
logic [DCACHE_WBUF_DEPTH-1:0] tocheck;
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh, inval_hit;
//logic [DCACHE_WBUF_DEPTH-1:0][7:0] bdirty;
logic [DCACHE_WBUF_DEPTH-1:0][(riscv::XLEN/8)-1:0] bdirty;
logic [DCACHE_WBUF_DEPTH-1:0][(riscv::XLEN/8)-1:0] bdirty;
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr;
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0]
next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr;
logic [CACHE_ID_WIDTH-1:0] tx_id, rtrn_id;
logic [riscv::XLEN_ALIGN_BYTES-1:0] bdirty_off;
@ -129,54 +133,66 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
logic wr_cl_vld_q, wr_cl_vld_d;
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_q, wr_cl_idx_d;
logic [riscv::PLEN-1:0] debug_paddr [DCACHE_WBUF_DEPTH-1:0];
logic [riscv::PLEN-1:0] debug_paddr[DCACHE_WBUF_DEPTH-1:0];
wbuffer_t wbuffer_check_mux, wbuffer_dirty_mux;
///////////////////////////////////////////////////////
// misc
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// misc
///////////////////////////////////////////////////////
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] miss_tag;
logic is_nc_miss;
logic is_ni;
assign miss_tag = miss_paddr_o[ariane_pkg::DCACHE_INDEX_WIDTH+:ariane_pkg::DCACHE_TAG_WIDTH];
assign is_nc_miss = !config_pkg::is_inside_cacheable_regions(CVA6Cfg, {{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH{1'b0}}, miss_tag, {DCACHE_INDEX_WIDTH{1'b0}}});
assign is_nc_miss = !config_pkg::is_inside_cacheable_regions(
CVA6Cfg,
{
{64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}}, miss_tag, {DCACHE_INDEX_WIDTH{1'b0}}
}
);
assign miss_nc_o = !cache_en_i || is_nc_miss;
// Non-idempotent if request goes to NI region
assign is_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, {{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH{1'b0}}, req_port_i.address_tag, {DCACHE_INDEX_WIDTH{1'b0}}});
assign is_ni = config_pkg::is_inside_nonidempotent_regions(
CVA6Cfg,
{
{64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}},
req_port_i.address_tag,
{DCACHE_INDEX_WIDTH{1'b0}}
}
);
assign miss_we_o = 1'b1;
assign miss_we_o = 1'b1;
assign miss_vld_bits_o = '0;
assign wbuffer_data_o = wbuffer_q;
assign wbuffer_data_o = wbuffer_q;
for (genvar k=0; k<DCACHE_MAX_TX;k++) begin : gen_tx_vld
for (genvar k = 0; k < DCACHE_MAX_TX; k++) begin : gen_tx_vld
assign tx_vld_o[k] = tx_stat_q[k].vld;
assign tx_paddr_o[k] = wbuffer_q[tx_stat_q[k].ptr].wtag<<riscv::XLEN_ALIGN_BYTES;
assign tx_paddr_o[k] = wbuffer_q[tx_stat_q[k].ptr].wtag << riscv::XLEN_ALIGN_BYTES;
end
///////////////////////////////////////////////////////
// openpiton does not understand byte enable sigs
// need to convert to the four cases:
// 00: byte
// 01: halfword
// 10: word
// 11: dword
// non-contiguous writes need to be serialized!
// e.g. merged dwords with BE like this: 8'b01001100
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// openpiton does not understand byte enable sigs
// need to convert to the four cases:
// 00: byte
// 01: halfword
// 10: word
// 11: dword
// non-contiguous writes need to be serialized!
// e.g. merged dwords with BE like this: 8'b01001100
///////////////////////////////////////////////////////
// get byte offset
lzc #(
.WIDTH ( riscv::XLEN/8 )
.WIDTH(riscv::XLEN / 8)
) i_vld_bdirty (
.in_i ( bdirty[dirty_ptr] ),
.cnt_o ( bdirty_off ),
.empty_o ( )
.in_i (bdirty[dirty_ptr]),
.cnt_o (bdirty_off),
.empty_o()
);
// add the offset to the physical base address of this buffer entry
assign miss_paddr_o = {wbuffer_dirty_mux.wtag, bdirty_off};
assign miss_id_o = tx_id;
assign miss_id_o = tx_id;
// is there any dirty word to be transmitted, and is there a free TX slot?
assign miss_req_o = (|dirty) && free_tx_slots;
@ -186,43 +202,51 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
// we have to split unaligned data into multiple transfers (see toSize64)
// e.g. if we have the following valid bytes: 0011_1001 -> TX0: 0000_0001, TX1: 0000_1000, TX2: 0011_0000
assign miss_size_o = riscv::IS_XLEN64 ? toSize64(bdirty[dirty_ptr]):
toSize32(bdirty[dirty_ptr]);
assign miss_size_o = riscv::IS_XLEN64 ? toSize64(bdirty[dirty_ptr]) : toSize32(bdirty[dirty_ptr]);
// replicate transfers shorter than a dword
assign miss_wdata_o = riscv::IS_XLEN64 ? repData64(wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]):
repData32(wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]);
assign miss_wdata_o = riscv::IS_XLEN64 ? repData64(
wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]
) : repData32(
wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]
);
if (ariane_pkg::DATA_USER_EN) begin
assign miss_wuser_o = riscv::IS_XLEN64 ? repData64(wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]):
repData32(wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]);
assign miss_wuser_o = riscv::IS_XLEN64 ? repData64(
wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]
) : repData32(
wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]
);
end else begin
assign miss_wuser_o = '0;
end
assign tx_be = riscv::IS_XLEN64 ? to_byte_enable8(bdirty_off, miss_size_o[1:0]):
to_byte_enable4(bdirty_off, miss_size_o[1:0]);
assign tx_be = riscv::IS_XLEN64 ? to_byte_enable8(
bdirty_off, miss_size_o[1:0]
) : to_byte_enable4(
bdirty_off, miss_size_o[1:0]
);
///////////////////////////////////////////////////////
// TX status registers and ID counters
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// TX status registers and ID counters
///////////////////////////////////////////////////////
// TODO: todo: make this fall through if timing permits it
fifo_v3 #(
.FALL_THROUGH ( 1'b0 ),
.DATA_WIDTH ( $clog2(DCACHE_MAX_TX) ),
.DEPTH ( DCACHE_MAX_TX )
.FALL_THROUGH(1'b0),
.DATA_WIDTH ($clog2(DCACHE_MAX_TX)),
.DEPTH (DCACHE_MAX_TX)
) i_rtrn_id_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( ),
.empty_o ( rtrn_empty ),
.usage_o ( ),
.data_i ( miss_rtrn_id_i ),
.push_i ( miss_rtrn_vld_i ),
.data_o ( rtrn_id ),
.pop_i ( evict )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (),
.empty_o (rtrn_empty),
.usage_o (),
.data_i (miss_rtrn_id_i),
.push_i (miss_rtrn_vld_i),
.data_o (rtrn_id),
.pop_i (evict)
);
always_comb begin : p_tx_stat
@ -258,61 +282,61 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
// next word to lookup in the cache
rr_arb_tree #(
.NumIn (DCACHE_MAX_TX),
.LockIn (1'b1),
.DataWidth (1)
.NumIn (DCACHE_MAX_TX),
.LockIn (1'b1),
.DataWidth(1)
) i_tx_id_rr (
.clk_i (clk_i ),
.rst_ni (rst_ni ),
.flush_i('0 ),
.rr_i ('0 ),
.req_i (~tx_vld_o ),
.gnt_o ( ),
.data_i ('0 ),
.gnt_i (dirty_rd_en ),
.req_o ( ),
.data_o ( ),
.idx_o (tx_id )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i('0),
.rr_i ('0),
.req_i (~tx_vld_o),
.gnt_o (),
.data_i ('0),
.gnt_i (dirty_rd_en),
.req_o (),
.data_o (),
.idx_o (tx_id)
);
///////////////////////////////////////////////////////
// cache readout & update
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// cache readout & update
///////////////////////////////////////////////////////
assign extract_tag = rd_paddr>>DCACHE_INDEX_WIDTH;
assign extract_tag = rd_paddr >> DCACHE_INDEX_WIDTH;
assign rd_tag_d = extract_tag[DCACHE_TAG_WIDTH-1:0];
// trigger TAG readout in cache
assign rd_tag_only_o = 1'b1;
assign rd_paddr = wbuffer_check_mux.wtag<<riscv::XLEN_ALIGN_BYTES;
assign rd_req_o = |tocheck;
assign rd_tag_o = rd_tag_q;//delay by one cycle
assign rd_idx_o = rd_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign rd_off_o = rd_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign check_en_d = rd_req_o & rd_ack_i;
assign rd_paddr = wbuffer_check_mux.wtag << riscv::XLEN_ALIGN_BYTES;
assign rd_req_o = |tocheck;
assign rd_tag_o = rd_tag_q; //delay by one cycle
assign rd_idx_o = rd_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign rd_off_o = rd_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign check_en_d = rd_req_o & rd_ack_i;
// cache update port
assign rtrn_ptr = tx_stat_q[rtrn_id].ptr;
assign rtrn_ptr = tx_stat_q[rtrn_id].ptr;
// if we wrote into a word while it was in-flight, we cannot write the dirty bytes to the cache
// when the TX returns
assign wr_data_be_o = tx_stat_q[rtrn_id].be & (~wbuffer_q[rtrn_ptr].dirty);
assign wr_paddr = wbuffer_q[rtrn_ptr].wtag<<riscv::XLEN_ALIGN_BYTES;
assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_data_o = wbuffer_q[rtrn_ptr].data;
assign wr_user_o = wbuffer_q[rtrn_ptr].user;
assign wr_data_be_o = tx_stat_q[rtrn_id].be & (~wbuffer_q[rtrn_ptr].dirty);
assign wr_paddr = wbuffer_q[rtrn_ptr].wtag << riscv::XLEN_ALIGN_BYTES;
assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_data_o = wbuffer_q[rtrn_ptr].data;
assign wr_user_o = wbuffer_q[rtrn_ptr].user;
///////////////////////////////////////////////////////
// readout of status bits, index calculation
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// readout of status bits, index calculation
///////////////////////////////////////////////////////
logic [DCACHE_WBUF_DEPTH-1:0][DCACHE_CL_IDX_WIDTH-1:0] wtag_comp;
assign wr_cl_vld_d = wr_cl_vld_i;
assign wr_cl_idx_d = wr_cl_idx_i;
for (genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin : gen_flags
for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_flags
// only for debug, will be pruned
assign debug_paddr[k] = wbuffer_q[k].wtag << riscv::XLEN_ALIGN_BYTES;
@ -322,8 +346,8 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
assign bdirty[k] = (|wbuffer_q[k].txblock) ? '0 : wbuffer_q[k].dirty & wbuffer_q[k].valid;
assign dirty[k] = |bdirty[k];
assign valid[k] = |wbuffer_q[k].valid;
assign dirty[k] = |bdirty[k];
assign valid[k] = |wbuffer_q[k].valid;
assign wbuffer_hit_oh[k] = valid[k] & (wbuffer_q[k].wtag == {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]});
// checks if an invalidation/cache refill hits a particular word
@ -334,70 +358,70 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
(wr_cl_vld_q & valid[k] & (wtag_comp[k] == wr_cl_idx_q));
// these word have to be looked up in the cache
assign tocheck[k] = (~wbuffer_q[k].checked) & valid[k];
assign tocheck[k] = (~wbuffer_q[k].checked) & valid[k];
end
assign wr_ptr = (|wbuffer_hit_oh) ? hit_ptr : next_ptr;
assign rdy = (|wbuffer_hit_oh) | (~full);
assign wr_ptr = (|wbuffer_hit_oh) ? hit_ptr : next_ptr;
assign rdy = (|wbuffer_hit_oh) | (~full);
// next free entry in the buffer
lzc #(
.WIDTH ( DCACHE_WBUF_DEPTH )
.WIDTH(DCACHE_WBUF_DEPTH)
) i_vld_lzc (
.in_i ( ~valid ),
.cnt_o ( next_ptr ),
.empty_o ( full )
.in_i (~valid),
.cnt_o (next_ptr),
.empty_o(full)
);
// get index of hit
lzc #(
.WIDTH ( DCACHE_WBUF_DEPTH )
.WIDTH(DCACHE_WBUF_DEPTH)
) i_hit_lzc (
.in_i ( wbuffer_hit_oh ),
.cnt_o ( hit_ptr ),
.empty_o ( )
.in_i (wbuffer_hit_oh),
.cnt_o (hit_ptr),
.empty_o()
);
// next dirty word to serve
rr_arb_tree #(
.NumIn ( DCACHE_WBUF_DEPTH ),
.LockIn ( 1'b1 ),
.DataType ( wbuffer_t )
.NumIn (DCACHE_WBUF_DEPTH),
.LockIn (1'b1),
.DataType(wbuffer_t)
) i_dirty_rr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i( '0 ),
.rr_i ( '0 ),
.req_i ( dirty ),
.gnt_o ( ),
.data_i ( wbuffer_q ),
.gnt_i ( dirty_rd_en ),
.req_o ( ),
.data_o ( wbuffer_dirty_mux ),
.idx_o ( dirty_ptr )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i('0),
.rr_i ('0),
.req_i (dirty),
.gnt_o (),
.data_i (wbuffer_q),
.gnt_i (dirty_rd_en),
.req_o (),
.data_o (wbuffer_dirty_mux),
.idx_o (dirty_ptr)
);
// next word to lookup in the cache
rr_arb_tree #(
.NumIn ( DCACHE_WBUF_DEPTH ),
.DataType ( wbuffer_t )
.NumIn (DCACHE_WBUF_DEPTH),
.DataType(wbuffer_t)
) i_clean_rr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i( '0 ),
.rr_i ( '0 ),
.req_i ( tocheck ),
.gnt_o ( ),
.data_i ( wbuffer_q ),
.gnt_i ( check_en_d ),
.req_o ( ),
.data_o ( wbuffer_check_mux ),
.idx_o ( check_ptr_d )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i('0),
.rr_i ('0),
.req_i (tocheck),
.gnt_o (),
.data_i (wbuffer_q),
.gnt_i (check_en_d),
.req_o (),
.data_o (wbuffer_check_mux),
.idx_o (check_ptr_d)
);
///////////////////////////////////////////////////////
// update logic
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// update logic
///////////////////////////////////////////////////////
assign req_port_o.data_rvalid = '0;
assign req_port_o.data_rdata = '0;
@ -406,7 +430,7 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
assign rd_hit_oh_d = rd_hit_oh_i;
logic ni_inside,ni_conflict;
logic ni_inside, ni_conflict;
assign ni_inside = |ni_pending_q;
assign ni_conflict = is_ni && ni_inside;
assign not_ni_o = !ni_inside;
@ -424,13 +448,13 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
if (check_en_q1) begin
if (|wbuffer_q[check_ptr_q1].valid) begin
wbuffer_d[check_ptr_q1].checked = 1'b1;
wbuffer_d[check_ptr_q1].hit_oh = rd_hit_oh_q;
wbuffer_d[check_ptr_q1].hit_oh = rd_hit_oh_q;
end
end
// if an invalidation or cache line refill comes in and hits on the write buffer,
// we have to discard our knowledge of the corresponding cacheline state
for (int k=0; k<DCACHE_WBUF_DEPTH; k++) begin
for (int k = 0; k < DCACHE_WBUF_DEPTH; k++) begin
if (inval_hit[k]) begin
wbuffer_d[k].checked = 1'b0;
end
@ -439,7 +463,7 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
// once TX write response came back, we can clear the TX block. if it was not dirty, we
// can completely evict it - otherwise we have to leave it there for retransmission
if (evict) begin
for (int k=0; k<(riscv::XLEN/8); k++) begin
for (int k = 0; k < (riscv::XLEN / 8); k++) begin
if (tx_stat_q[rtrn_id].be[k]) begin
wbuffer_d[rtrn_ptr].txblock[k] = 1'b0;
if (!wbuffer_q[rtrn_ptr].dirty[k]) begin
@ -461,7 +485,7 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
// mark bytes sent out to the memory system
if (miss_req_o && miss_ack_i) begin
dirty_rd_en = 1'b1;
for (int k=0; k<(riscv::XLEN/8); k++) begin
for (int k = 0; k < (riscv::XLEN / 8); k++) begin
if (tx_be[k]) begin
wbuffer_d[dirty_ptr].dirty[k] = 1'b0;
wbuffer_d[dirty_ptr].txblock[k] = 1'b1;
@ -473,25 +497,28 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
if (req_port_i.data_req && rdy) begin
// in case we have an NI address, need to drain the buffer first
// in case we are serving an NI address, we block until it is written to memory
if (!ni_conflict) begin //empty of NI operations
wbuffer_wren = 1'b1;
if (!ni_conflict) begin //empty of NI operations
wbuffer_wren = 1'b1;
req_port_o.data_gnt = 1'b1;
ni_pending_d[wr_ptr] = is_ni;
req_port_o.data_gnt = 1'b1;
ni_pending_d[wr_ptr] = is_ni;
wbuffer_d[wr_ptr].checked = 1'b0;
wbuffer_d[wr_ptr].wtag = {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]};
wbuffer_d[wr_ptr].wtag = {
req_port_i.address_tag,
req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]
};
// mark bytes as dirty
for (int k=0; k<(riscv::XLEN/8); k++) begin
for (int k = 0; k < (riscv::XLEN / 8); k++) begin
if (req_port_i.data_be[k]) begin
wbuffer_d[wr_ptr].valid[k] = 1'b1;
wbuffer_d[wr_ptr].dirty[k] = 1'b1;
wbuffer_d[wr_ptr].data[k*8 +: 8] = req_port_i.data_wdata[k*8 +: 8];
wbuffer_d[wr_ptr].valid[k] = 1'b1;
wbuffer_d[wr_ptr].dirty[k] = 1'b1;
wbuffer_d[wr_ptr].data[k*8+:8] = req_port_i.data_wdata[k*8+:8];
if (ariane_pkg::DATA_USER_EN) begin
wbuffer_d[wr_ptr].user[k*8 +: 8] = req_port_i.data_wuser[k*8 +: 8];
wbuffer_d[wr_ptr].user[k*8+:8] = req_port_i.data_wuser[k*8+:8];
end else begin
wbuffer_d[wr_ptr].user[k*8 +: 8] = '0;
wbuffer_d[wr_ptr].user[k*8+:8] = '0;
end
end
end
@ -500,86 +527,96 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
end
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (!rst_ni) begin
wbuffer_q <= '{default: '0};
tx_stat_q <= '{default: '0};
ni_pending_q <= '0;
check_ptr_q <= '0;
check_ptr_q1 <= '0;
check_en_q <= '0;
check_en_q1 <= '0;
rd_tag_q <= '0;
rd_hit_oh_q <= '0;
wr_cl_vld_q <= '0;
wr_cl_idx_q <= '0;
wbuffer_q <= '{default: '0};
tx_stat_q <= '{default: '0};
ni_pending_q <= '0;
check_ptr_q <= '0;
check_ptr_q1 <= '0;
check_en_q <= '0;
check_en_q1 <= '0;
rd_tag_q <= '0;
rd_hit_oh_q <= '0;
wr_cl_vld_q <= '0;
wr_cl_idx_q <= '0;
end else begin
wbuffer_q <= wbuffer_d;
tx_stat_q <= tx_stat_d;
ni_pending_q <= ni_pending_d;
check_ptr_q <= check_ptr_d;
check_ptr_q1 <= check_ptr_q;
check_en_q <= check_en_d;
check_en_q1 <= check_en_q;
rd_tag_q <= rd_tag_d;
rd_hit_oh_q <= rd_hit_oh_d;
wr_cl_vld_q <= wr_cl_vld_d;
wr_cl_idx_q <= wr_cl_idx_d;
wbuffer_q <= wbuffer_d;
tx_stat_q <= tx_stat_d;
ni_pending_q <= ni_pending_d;
check_ptr_q <= check_ptr_d;
check_ptr_q1 <= check_ptr_q;
check_en_q <= check_en_d;
check_en_q1 <= check_en_q;
rd_tag_q <= rd_tag_d;
rd_hit_oh_q <= rd_hit_oh_d;
wr_cl_vld_q <= wr_cl_vld_d;
wr_cl_idx_q <= wr_cl_idx_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
//pragma translate_off
`ifndef VERILATOR
hot1: assert property (
@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> $onehot0(wbuffer_hit_oh))
else $fatal(1,"[l1 dcache wbuffer] wbuffer_hit_oh signal must be hot1");
hot1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> $onehot0(
wbuffer_hit_oh
))
else $fatal(1, "[l1 dcache wbuffer] wbuffer_hit_oh signal must be hot1");
tx_status: assert property (
tx_status :
assert property (
@(posedge clk_i) disable iff (!rst_ni) evict && miss_ack_i && miss_req_o |-> (tx_id != rtrn_id))
else $fatal(1,"[l1 dcache wbuffer] cannot allocate and clear same tx slot id in the same cycle");
else $fatal(1, "[l1 dcache wbuffer] cannot allocate and clear same tx slot id in the same cycle");
tx_valid0: assert property (
@(posedge clk_i) disable iff (!rst_ni) evict |-> tx_stat_q[rtrn_id].vld)
else $fatal(1,"[l1 dcache wbuffer] evicting invalid transaction slot");
tx_valid0 :
assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> tx_stat_q[rtrn_id].vld)
else $fatal(1, "[l1 dcache wbuffer] evicting invalid transaction slot");
tx_valid1: assert property (
@(posedge clk_i) disable iff (!rst_ni) evict |-> |wbuffer_q[rtrn_ptr].valid)
else $fatal(1,"[l1 dcache wbuffer] wbuffer entry corresponding to this transaction is invalid");
tx_valid1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> |wbuffer_q[rtrn_ptr].valid)
else $fatal(1, "[l1 dcache wbuffer] wbuffer entry corresponding to this transaction is invalid");
write_full: assert property (
write_full :
assert property (
@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> req_port_o.data_gnt |-> ((!full) || (|wbuffer_hit_oh)))
else $fatal(1,"[l1 dcache wbuffer] cannot write if full or no hit");
else $fatal(1, "[l1 dcache wbuffer] cannot write if full or no hit");
unused0: assert property (
@(posedge clk_i) disable iff (!rst_ni) !req_port_i.tag_valid)
else $fatal(1,"[l1 dcache wbuffer] req_port_i.tag_valid should not be asserted");
unused0 :
assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.tag_valid)
else $fatal(1, "[l1 dcache wbuffer] req_port_i.tag_valid should not be asserted");
unused1: assert property (
@(posedge clk_i) disable iff (!rst_ni) !req_port_i.kill_req)
else $fatal(1,"[l1 dcache wbuffer] req_port_i.kill_req should not be asserted");
unused1 :
assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.kill_req)
else $fatal(1, "[l1 dcache wbuffer] req_port_i.kill_req should not be asserted");
for (genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin : gen_assert1
for (genvar j=0; j<(riscv::XLEN/8); j++) begin : gen_assert2
byteStates: assert property (
for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_assert1
for (genvar j = 0; j < (riscv::XLEN / 8); j++) begin : gen_assert2
byteStates :
assert property (
@(posedge clk_i) disable iff (!rst_ni) {wbuffer_q[k].valid[j], wbuffer_q[k].dirty[j], wbuffer_q[k].txblock[j]} inside {3'b000, 3'b110, 3'b101, 3'b111} )
else $fatal(1,"[l1 dcache wbuffer] byte %02d of wbuffer entry %02d has invalid state: valid=%01b, dirty=%01b, txblock=%01b",
j,k,
else
$fatal(
1,
"[l1 dcache wbuffer] byte %02d of wbuffer entry %02d has invalid state: valid=%01b, dirty=%01b, txblock=%01b",
j,
k,
wbuffer_q[k].valid[j],
wbuffer_q[k].dirty[j],
wbuffer_q[k].txblock[j]);
wbuffer_q[k].txblock[j]
);
end
end
`endif
//pragma translate_on
//pragma translate_on
endmodule // wt_dcache_wbuffer
endmodule // wt_dcache_wbuffer

View file

@ -49,51 +49,54 @@
//
module wt_l15_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
module wt_l15_adapter
import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic clk_i,
input logic rst_ni,
// icache
input logic icache_data_req_i,
output logic icache_data_ack_o,
input icache_req_t icache_data_i,
// returning packets must be consumed immediately
output logic icache_rtrn_vld_o,
output icache_rtrn_t icache_rtrn_o,
// icache
input logic icache_data_req_i,
output logic icache_data_ack_o,
input icache_req_t icache_data_i,
// returning packets must be consumed immediately
output logic icache_rtrn_vld_o,
output icache_rtrn_t icache_rtrn_o,
// dcache
input logic dcache_data_req_i,
output logic dcache_data_ack_o,
input dcache_req_t dcache_data_i,
// returning packets must be consumed immediately
output logic dcache_rtrn_vld_o,
output dcache_rtrn_t dcache_rtrn_o,
// dcache
input logic dcache_data_req_i,
output logic dcache_data_ack_o,
input dcache_req_t dcache_data_i,
// returning packets must be consumed immediately
output logic dcache_rtrn_vld_o,
output dcache_rtrn_t dcache_rtrn_o,
// L15
output l15_req_t l15_req_o,
input l15_rtrn_t l15_rtrn_i
// L15
output l15_req_t l15_req_o,
input l15_rtrn_t l15_rtrn_i
);
// request path
icache_req_t icache_data;
logic icache_data_full, icache_data_empty;
// request path
icache_req_t icache_data;
logic icache_data_full, icache_data_empty;
dcache_req_t dcache_data;
logic dcache_data_full, dcache_data_empty;
dcache_req_t dcache_data;
logic dcache_data_full, dcache_data_empty;
logic [1:0] arb_req, arb_ack;
logic arb_idx;
logic [1:0] arb_req, arb_ack;
logic arb_idx;
// return path
logic rtrn_fifo_empty, rtrn_fifo_full, rtrn_fifo_pop;
l15_rtrn_t rtrn_fifo_data;
// return path
logic rtrn_fifo_empty, rtrn_fifo_full, rtrn_fifo_pop;
l15_rtrn_t rtrn_fifo_data;
///////////////////////////////////////////////////////
// request path to L15
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// request path to L15
///////////////////////////////////////////////////////
// relevant l15 signals
// l15_req_t l15_req_o.l15_rqtype; // see below for encoding
@ -108,58 +111,58 @@ l15_rtrn_t rtrn_fifo_data;
// logic [L15_TLB_CSM_WIDTH-1:0] l15_req_o.l15_csm_data;
assign icache_data_ack_o = icache_data_req_i & ~icache_data_full;
assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full;
assign icache_data_ack_o = icache_data_req_i & ~icache_data_full;
assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full;
// data mux
assign l15_req_o.l15_nc = (arb_idx) ? dcache_data.nc : icache_data.nc;
assign l15_req_o.l15_nc = (arb_idx) ? dcache_data.nc : icache_data.nc;
// icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not.
assign l15_req_o.l15_size = (arb_idx) ? dcache_data.size :
(icache_data.nc) ? 3'b010 : 3'b111;
assign l15_req_o.l15_threadid = (arb_idx) ? dcache_data.tid : icache_data.tid;
assign l15_req_o.l15_prefetch = '0; // unused in openpiton
assign l15_req_o.l15_size = (arb_idx) ? dcache_data.size : (icache_data.nc) ? 3'b010 : 3'b111;
assign l15_req_o.l15_threadid = (arb_idx) ? dcache_data.tid : icache_data.tid;
assign l15_req_o.l15_prefetch = '0; // unused in openpiton
assign l15_req_o.l15_invalidate_cacheline = '0; // unused by Ariane as L1 has no ECC at the moment
assign l15_req_o.l15_blockstore = '0; // unused in openpiton
assign l15_req_o.l15_blockinitstore = '0; // unused in openpiton
assign l15_req_o.l15_l1rplway = (arb_idx) ? dcache_data.way : icache_data.way;
assign l15_req_o.l15_blockstore = '0; // unused in openpiton
assign l15_req_o.l15_blockinitstore = '0; // unused in openpiton
assign l15_req_o.l15_l1rplway = (arb_idx) ? dcache_data.way : icache_data.way;
assign l15_req_o.l15_address = (arb_idx) ? dcache_data.paddr :
icache_data.paddr;
assign l15_req_o.l15_address = (arb_idx) ? dcache_data.paddr : icache_data.paddr;
assign l15_req_o.l15_data_next_entry = '0; // unused in Ariane (only used for CAS atomic requests)
assign l15_req_o.l15_csm_data = '0; // unused in Ariane (only used for coherence domain restriction features)
assign l15_req_o.l15_amo_op = dcache_data.amo_op;
assign l15_req_o.l15_amo_op = dcache_data.amo_op;
// openpiton is big endian
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_L15_BIG_ENDIAN) assign l15_req_o.l15_data = swendian64(dcache_data.data);
else if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_L15_LITTLE_ENDIAN) assign l15_req_o.l15_data = dcache_data.data;
else $fatal(1,"[wt_l15_adapter] Unsupported NOC type");
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_L15_BIG_ENDIAN)
assign l15_req_o.l15_data = swendian64(dcache_data.data);
else if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_L15_LITTLE_ENDIAN)
assign l15_req_o.l15_data = dcache_data.data;
else $fatal(1, "[wt_l15_adapter] Unsupported NOC type");
// arbiter
rrarbiter #(
.NUM_REQ(2),
.LOCK_IN(1)
.NUM_REQ(2),
.LOCK_IN(1)
) i_rrarbiter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i( '0 ),
.en_i ( l15_rtrn_i.l15_ack ),
.req_i ( arb_req ),
.ack_o ( arb_ack ),
.vld_o ( ),
.idx_o ( arb_idx )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i('0),
.en_i (l15_rtrn_i.l15_ack),
.req_i (arb_req),
.ack_o (arb_ack),
.vld_o (),
.idx_o (arb_idx)
);
assign arb_req = {~dcache_data_empty, ~icache_data_empty};
assign l15_req_o.l15_val = (|arb_req);// & ~header_ack_q;
assign l15_req_o.l15_val = (|arb_req); // & ~header_ack_q;
// encode packet type
always_comb begin : p_req
l15_req_o.l15_rqtype = L15_LOAD_RQ;
unique case (arb_idx)
0: begin// icache
0: begin // icache
l15_req_o.l15_rqtype = L15_IMISS_RQ;
end
1: begin
@ -179,53 +182,53 @@ l15_rtrn_t rtrn_fifo_data;
default: begin
;
end
endcase // dcache_data.rtype
endcase // dcache_data.rtype
end
default: begin
;
end
endcase
end // p_req
end // p_req
fifo_v2 #(
.dtype ( icache_req_t ),
.DEPTH ( ADAPTER_REQ_FIFO_DEPTH )
) i_icache_data_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( icache_data_full ),
.empty_o ( icache_data_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( icache_data_i ),
.push_i ( icache_data_ack_o ),
.data_o ( icache_data ),
.pop_i ( arb_ack[0] )
.dtype(icache_req_t),
.DEPTH(ADAPTER_REQ_FIFO_DEPTH)
) i_icache_data_fifo (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i (1'b0),
.full_o (icache_data_full),
.empty_o (icache_data_empty),
.alm_full_o (),
.alm_empty_o(),
.data_i (icache_data_i),
.push_i (icache_data_ack_o),
.data_o (icache_data),
.pop_i (arb_ack[0])
);
fifo_v2 #(
.dtype ( dcache_req_t ),
.DEPTH ( ADAPTER_REQ_FIFO_DEPTH )
) i_dcache_data_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( dcache_data_full ),
.empty_o ( dcache_data_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( dcache_data_i ),
.push_i ( dcache_data_ack_o ),
.data_o ( dcache_data ),
.pop_i ( arb_ack[1] )
.dtype(dcache_req_t),
.DEPTH(ADAPTER_REQ_FIFO_DEPTH)
) i_dcache_data_fifo (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i (1'b0),
.full_o (dcache_data_full),
.empty_o (dcache_data_empty),
.alm_full_o (),
.alm_empty_o(),
.data_i (dcache_data_i),
.push_i (dcache_data_ack_o),
.data_o (dcache_data),
.pop_i (arb_ack[1])
);
///////////////////////////////////////////////////////
// return path from L15
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// return path from L15
///////////////////////////////////////////////////////
// relevant l15 signals
// l15_rtrn_i.l15_returntype; // see below for encoding
@ -253,13 +256,13 @@ l15_rtrn_t rtrn_fifo_data;
dcache_rtrn_o.rtype = DCACHE_LOAD_ACK;
icache_rtrn_vld_o = 1'b0;
dcache_rtrn_vld_o = 1'b0;
if(!rtrn_fifo_empty) begin
if (!rtrn_fifo_empty) begin
unique case (rtrn_fifo_data.l15_returntype)
L15_LOAD_RET: begin
L15_LOAD_RET: begin
dcache_rtrn_o.rtype = DCACHE_LOAD_ACK;
dcache_rtrn_vld_o = 1'b1;
end
L15_ST_ACK: begin
L15_ST_ACK: begin
dcache_rtrn_o.rtype = DCACHE_STORE_ACK;
dcache_rtrn_vld_o = 1'b1;
end
@ -282,107 +285,126 @@ l15_rtrn_t rtrn_fifo_data;
// dcache_rtrn_o.reqType = DCACHE_INT_ACK;
// end
default: begin
;
;
end
endcase // rtrn_fifo_data.l15_returntype
endcase // rtrn_fifo_data.l15_returntype
end
end
// openpiton is big endian
if (SwapEndianess) begin : gen_swap
assign dcache_rtrn_o.data = { swendian64(rtrn_fifo_data.l15_data_1),
swendian64(rtrn_fifo_data.l15_data_0) };
assign dcache_rtrn_o.data = {
swendian64(rtrn_fifo_data.l15_data_1), swendian64(rtrn_fifo_data.l15_data_0)
};
assign icache_rtrn_o.data = { swendian64(rtrn_fifo_data.l15_data_3),
swendian64(rtrn_fifo_data.l15_data_2),
swendian64(rtrn_fifo_data.l15_data_1),
swendian64(rtrn_fifo_data.l15_data_0) };
assign icache_rtrn_o.data = {
swendian64(rtrn_fifo_data.l15_data_3),
swendian64(rtrn_fifo_data.l15_data_2),
swendian64(rtrn_fifo_data.l15_data_1),
swendian64(rtrn_fifo_data.l15_data_0)
};
end else begin : gen_no_swap
assign dcache_rtrn_o.data = { rtrn_fifo_data.l15_data_1,
rtrn_fifo_data.l15_data_0 };
assign dcache_rtrn_o.data = {rtrn_fifo_data.l15_data_1, rtrn_fifo_data.l15_data_0};
assign icache_rtrn_o.data = { rtrn_fifo_data.l15_data_3,
rtrn_fifo_data.l15_data_2,
rtrn_fifo_data.l15_data_1,
rtrn_fifo_data.l15_data_0 };
assign icache_rtrn_o.data = {
rtrn_fifo_data.l15_data_3,
rtrn_fifo_data.l15_data_2,
rtrn_fifo_data.l15_data_1,
rtrn_fifo_data.l15_data_0
};
end
// fifo signals
assign icache_rtrn_o.tid = rtrn_fifo_data.l15_threadid;
assign dcache_rtrn_o.tid = rtrn_fifo_data.l15_threadid;
assign icache_rtrn_o.tid = rtrn_fifo_data.l15_threadid;
assign dcache_rtrn_o.tid = rtrn_fifo_data.l15_threadid;
// invalidation signal mapping
assign icache_rtrn_o.inv.idx = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};
assign icache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way;
assign icache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_icache_inval;
assign icache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_icache_all_way;
assign icache_rtrn_o.inv.idx = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};
assign icache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way;
assign icache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_icache_inval;
assign icache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_icache_all_way;
assign dcache_rtrn_o.inv.idx = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};
assign dcache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way;
assign dcache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_dcache_inval;
assign dcache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_dcache_all_way;
assign dcache_rtrn_o.inv.idx = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};
assign dcache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way;
assign dcache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_dcache_inval;
assign dcache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_dcache_all_way;
fifo_v2 #(
.dtype ( l15_rtrn_t ),
.DEPTH ( ADAPTER_RTRN_FIFO_DEPTH )
.dtype(l15_rtrn_t),
.DEPTH(ADAPTER_RTRN_FIFO_DEPTH)
) i_rtrn_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( rtrn_fifo_full ),
.empty_o ( rtrn_fifo_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( l15_rtrn_i ),
.push_i ( l15_req_o.l15_req_ack ),
.data_o ( rtrn_fifo_data ),
.pop_i ( rtrn_fifo_pop )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i (1'b0),
.full_o (rtrn_fifo_full),
.empty_o (rtrn_fifo_empty),
.alm_full_o (),
.alm_empty_o(),
.data_i (l15_rtrn_i),
.push_i (l15_req_o.l15_req_ack),
.data_o (rtrn_fifo_data),
.pop_i (rtrn_fifo_pop)
);
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
//pragma translate_off
`ifndef VERILATOR
invalidations: assert property (
invalidations :
assert property (
@(posedge clk_i) disable iff (!rst_ni) l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype == L15_EVICT_REQ |-> (l15_rtrn_i.l15_inval_icache_inval |
l15_rtrn_i.l15_inval_dcache_inval |
l15_rtrn_i.l15_inval_icache_all_way |
l15_rtrn_i.l15_inval_dcache_all_way))
else $fatal(1,"[l15_adapter] got invalidation package with zero invalidation flags");
else $fatal(1, "[l15_adapter] got invalidation package with zero invalidation flags");
blockstore_o: assert property (
blockstore_o :
assert property (
@(posedge clk_i) disable iff (!rst_ni) l15_req_o.l15_val |-> l15_req_o.l15_rqtype == L15_STORE_RQ |-> !(l15_req_o.l15_blockstore || l15_req_o.l15_blockinitstore))
else $fatal(1,"[l15_adapter] blockstores are not supported (out)");
else $fatal(1, "[l15_adapter] blockstores are not supported (out)");
blockstore_i: assert property (
blockstore_i :
assert property (
@(posedge clk_i) disable iff (!rst_ni) l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_ST_ACK, L15_ST_ACK} |-> !l15_rtrn_i.l15_blockinitstore)
else $fatal(1,"[l15_adapter] blockstores are not supported (in)");
else $fatal(1, "[l15_adapter] blockstores are not supported (in)");
unsuported_rtrn_types: assert property (
unsuported_rtrn_types :
assert property (
@(posedge clk_i) disable iff (!rst_ni) (l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_LOAD_RET, L15_ST_ACK, L15_IFILL_RET, L15_EVICT_REQ, L15_CPX_RESTYPE_ATOMIC_RES}))
else $warning("[l15_adapter] return type %X04 is not (yet) supported by l15 adapter.", l15_rtrn_i.l15_returntype);
else
$warning(
"[l15_adapter] return type %X04 is not (yet) supported by l15 adapter.",
l15_rtrn_i.l15_returntype
);
amo_type: assert property (
amo_type :
assert property (
@(posedge clk_i) disable iff (!rst_ni) (l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_CPX_RESTYPE_ATOMIC_RES} |-> l15_rtrn_i.l15_atomic ))
else $fatal(1,"[l15_adapter] l15_atomic must be asserted when the return type is an ATOMIC_RES");
else $fatal(1, "[l15_adapter] l15_atomic must be asserted when the return type is an ATOMIC_RES");
initial begin
// assert wrong parameterizations
assert (L15_SET_ASSOC >= ICACHE_SET_ASSOC)
else $fatal(1,"[l15_adapter] number of icache ways must be smaller or equal the number of L15 ways");
else
$fatal(
1, "[l15_adapter] number of icache ways must be smaller or equal the number of L15 ways"
);
// assert wrong parameterizations
assert (L15_SET_ASSOC >= DCACHE_SET_ASSOC)
else $fatal(1,"[l15_adapter] number of dcache ways must be smaller or equal the number of L15 ways");
else
$fatal(
1, "[l15_adapter] number of dcache ways must be smaller or equal the number of L15 ways"
);
// invalidation address returned by L1.5 is 16 bit
assert (16 >= DCACHE_INDEX_WIDTH && 16 >= ICACHE_INDEX_WIDTH)
else $fatal(1,"[l15_adapter] maximum number of index bits supported by L1.5 is 16");
else $fatal(1, "[l15_adapter] maximum number of index bits supported by L1.5 is 16");
end
`endif
//pragma translate_on
//pragma translate_on
endmodule // wt_l15_adapter
endmodule // wt_l15_adapter

View file

@ -13,282 +13,286 @@
// Description: Commits to the architectural state resulting from the scoreboard.
module commit_stage import ariane_pkg::*; #(
module commit_stage
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
)(
input logic clk_i,
input logic rst_ni,
input logic halt_i, // request to halt the core
input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline
output exception_t exception_o, // take exception to controller
output logic dirty_fp_state_o, // mark the F state as dirty
input logic single_step_i, // we are in single step debug mode
) (
input logic clk_i,
input logic rst_ni,
input logic halt_i, // request to halt the core
input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline
output exception_t exception_o, // take exception to controller
output logic dirty_fp_state_o, // mark the F state as dirty
input logic single_step_i, // we are in single step debug mode
// from scoreboard
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit
output logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_o, // acknowledge that we are indeed committing
// to register file
output logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_o, // register file write address
output logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_o, // register file write data
output logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_o, // register file write enable
output logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_o, // floating point register enable
output logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_o, // register file write address
output logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_o, // register file write data
output logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_o, // register file write enable
output logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_o, // floating point register enable
// Atomic memory operations
input amo_resp_t amo_resp_i, // result of AMO operation
input amo_resp_t amo_resp_i, // result of AMO operation
// to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline)
output logic [riscv::VLEN-1:0] pc_o,
output logic [riscv::VLEN-1:0] pc_o,
// to/from CSR file
output fu_op csr_op_o, // decoded CSR operation
output riscv::xlen_t csr_wdata_o, // data to write to CSR
input riscv::xlen_t csr_rdata_i, // data to read from CSR
output fu_op csr_op_o, // decoded CSR operation
output riscv::xlen_t csr_wdata_o, // data to write to CSR
input riscv::xlen_t csr_rdata_i, // data to read from CSR
input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit)
output logic csr_write_fflags_o, // write the fflags CSR
output logic csr_write_fflags_o, // write the fflags CSR
// commit signals to ex
output logic commit_lsu_o, // commit the pending store
input logic commit_lsu_ready_i, // commit buffer of LSU is ready
output logic [TRANS_ID_BITS-1:0] commit_tran_id_o, // transaction id of first commit port
output logic amo_valid_commit_o, // valid AMO in commit stage
input logic no_st_pending_i, // there is no store pending
output logic commit_csr_o, // commit the pending CSR instruction
output logic fence_i_o, // flush I$ and pipeline
output logic fence_o, // flush D$ and pipeline
output logic flush_commit_o, // request a pipeline flush
output logic sfence_vma_o // flush TLBs and pipeline
output logic commit_lsu_o, // commit the pending store
input logic commit_lsu_ready_i, // commit buffer of LSU is ready
output logic [TRANS_ID_BITS-1:0] commit_tran_id_o, // transaction id of first commit port
output logic amo_valid_commit_o, // valid AMO in commit stage
input logic no_st_pending_i, // there is no store pending
output logic commit_csr_o, // commit the pending CSR instruction
output logic fence_i_o, // flush I$ and pipeline
output logic fence_o, // flush D$ and pipeline
output logic flush_commit_o, // request a pipeline flush
output logic sfence_vma_o // flush TLBs and pipeline
);
// ila_0 i_ila_commit (
// .clk(clk_i), // input wire clk
// .probe0(commit_instr_i[0].pc), // input wire [63:0] probe0
// .probe1(commit_instr_i[1].pc), // input wire [63:0] probe1
// .probe2(commit_instr_i[0].valid), // input wire [0:0] probe2
// .probe3(commit_instr_i[1].valid), // input wire [0:0] probe3
// .probe4(commit_ack_o[0]), // input wire [0:0] probe4
// .probe5(commit_ack_o[0]), // input wire [0:0] probe5
// .probe6(1'b0), // input wire [0:0] probe6
// .probe7(1'b0), // input wire [0:0] probe7
// .probe8(1'b0), // input wire [0:0] probe8
// .probe9(1'b0) // input wire [0:0] probe9
// );
// ila_0 i_ila_commit (
// .clk(clk_i), // input wire clk
// .probe0(commit_instr_i[0].pc), // input wire [63:0] probe0
// .probe1(commit_instr_i[1].pc), // input wire [63:0] probe1
// .probe2(commit_instr_i[0].valid), // input wire [0:0] probe2
// .probe3(commit_instr_i[1].valid), // input wire [0:0] probe3
// .probe4(commit_ack_o[0]), // input wire [0:0] probe4
// .probe5(commit_ack_o[0]), // input wire [0:0] probe5
// .probe6(1'b0), // input wire [0:0] probe6
// .probe7(1'b0), // input wire [0:0] probe7
// .probe8(1'b0), // input wire [0:0] probe8
// .probe9(1'b0) // input wire [0:0] probe9
// );
for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_waddr
assign waddr_o[i] = commit_instr_i[i].rd[4:0];
for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_waddr
assign waddr_o[i] = commit_instr_i[i].rd[4:0];
end
assign pc_o = commit_instr_i[0].pc;
// Dirty the FP state if we are committing anything related to the FPU
always_comb begin : dirty_fp_state
dirty_fp_state_o = 1'b0;
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
dirty_fp_state_o |= commit_ack_o[i] & (commit_instr_i[i].fu inside {FPU, FPU_VEC} || (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
commit_instr_i[i].op
)));
// Check if we issued a vector floating-point instruction to the accellerator
dirty_fp_state_o |= commit_instr_i[i].fu == ACCEL && commit_instr_i[i].vfp;
end
end
assign pc_o = commit_instr_i[0].pc;
// Dirty the FP state if we are committing anything related to the FPU
always_comb begin : dirty_fp_state
dirty_fp_state_o = 1'b0;
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
dirty_fp_state_o |= commit_ack_o[i] & (commit_instr_i[i].fu inside {FPU, FPU_VEC} || (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[i].op)));
// Check if we issued a vector floating-point instruction to the accellerator
dirty_fp_state_o |= commit_instr_i[i].fu == ACCEL && commit_instr_i[i].vfp;
assign commit_tran_id_o = commit_instr_i[0].trans_id;
logic instr_0_is_amo;
assign instr_0_is_amo = is_amo(commit_instr_i[0].op);
// -------------------
// Commit Instruction
// -------------------
// write register file or commit instruction in LSU or CSR Buffer
always_comb begin : commit
// default assignments
commit_ack_o[0] = 1'b0;
amo_valid_commit_o = 1'b0;
we_gpr_o[0] = 1'b0;
we_fpr_o = '{default: 1'b0};
commit_lsu_o = 1'b0;
commit_csr_o = 1'b0;
// amos will commit on port 0
wdata_o[0] = (amo_resp_i.ack) ? amo_resp_i.result[riscv::XLEN-1:0] : commit_instr_i[0].result;
csr_op_o = ADD; // this corresponds to a CSR NOP
csr_wdata_o = {riscv::XLEN{1'b0}};
fence_i_o = 1'b0;
fence_o = 1'b0;
sfence_vma_o = 1'b0;
csr_write_fflags_o = 1'b0;
flush_commit_o = 1'b0;
// we will not commit the instruction if we took an exception
// and we do not commit the instruction if we requested a halt
if (commit_instr_i[0].valid && !commit_instr_i[0].ex.valid && !halt_i) begin
// we can definitely write the register file
// if the instruction is not committing anything the destination
commit_ack_o[0] = 1'b1;
if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[0].op)) begin
we_fpr_o[0] = 1'b1;
end else begin
we_gpr_o[0] = 1'b1;
end
// check whether the instruction we retire was a store
if (commit_instr_i[0].fu == STORE && !instr_0_is_amo) begin
// check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store)
if (commit_lsu_ready_i) begin
commit_ack_o[0] = 1'b1;
commit_lsu_o = 1'b1;
// stall in case the store buffer is not able to accept anymore instructions
end else begin
commit_ack_o[0] = 1'b0;
end
end
// ---------
// FPU Flags
// ---------
if (CVA6Cfg.FpPresent) begin
if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin
// write the CSR with potential exception flags from retiring floating point instruction
csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[0].ex.cause[4:0]};
csr_write_fflags_o = 1'b1;
commit_ack_o[0] = 1'b1;
end
end
// ---------
// CSR Logic
// ---------
// check whether the instruction we retire was a CSR instruction and it did not
// throw an exception
if (commit_instr_i[0].fu == CSR) begin
// write the CSR file
csr_op_o = commit_instr_i[0].op;
csr_wdata_o = commit_instr_i[0].result;
if (!csr_exception_i.valid) begin
commit_csr_o = 1'b1;
wdata_o[0] = csr_rdata_i;
commit_ack_o[0] = 1'b1;
end else begin
commit_ack_o[0] = 1'b0;
we_gpr_o[0] = 1'b0;
end
end
// ------------------
// SFENCE.VMA Logic
// ------------------
// sfence.vma is idempotent so we can safely re-execute it after returning
// from interrupt service routine
// check if this instruction was a SFENCE_VMA
if (commit_instr_i[0].op == SFENCE_VMA) begin
// no store pending so we can flush the TLBs and pipeline
sfence_vma_o = no_st_pending_i;
// wait for the store buffer to drain until flushing the pipeline
commit_ack_o[0] = no_st_pending_i;
end
// ------------------
// FENCE.I Logic
// ------------------
// fence.i is idempotent so we can safely re-execute it after returning
// from interrupt service routine
// Fence synchronizes data and instruction streams. That means that we need to flush the private icache
// and the private dcache. This is the most expensive instruction.
if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && commit_instr_i[0].fu != STORE)) begin
commit_ack_o[0] = no_st_pending_i;
// tell the controller to flush the I$
fence_i_o = no_st_pending_i;
end
// ------------------
// FENCE Logic
// ------------------
// fence is idempotent so we can safely re-execute it after returning
// from interrupt service routine
if (commit_instr_i[0].op == FENCE) begin
commit_ack_o[0] = no_st_pending_i;
// tell the controller to flush the D$
fence_o = no_st_pending_i;
end
// ------------------
// AMO
// ------------------
if (CVA6Cfg.RVA && instr_0_is_amo) begin
// AMO finished
commit_ack_o[0] = amo_resp_i.ack;
// flush the pipeline
flush_commit_o = amo_resp_i.ack;
amo_valid_commit_o = 1'b1;
we_gpr_o[0] = amo_resp_i.ack;
end
end
assign commit_tran_id_o = commit_instr_i[0].trans_id;
if (CVA6Cfg.NrCommitPorts > 1) begin
logic instr_0_is_amo;
assign instr_0_is_amo = is_amo(commit_instr_i[0].op);
// -------------------
// Commit Instruction
// -------------------
// write register file or commit instruction in LSU or CSR Buffer
always_comb begin : commit
// default assignments
commit_ack_o[0] = 1'b0;
commit_ack_o[1] = 1'b0;
we_gpr_o[1] = 1'b0;
wdata_o[1] = commit_instr_i[1].result;
amo_valid_commit_o = 1'b0;
we_gpr_o[0] = 1'b0;
we_fpr_o = '{default: 1'b0};
commit_lsu_o = 1'b0;
commit_csr_o = 1'b0;
// amos will commit on port 0
wdata_o[0] = (amo_resp_i.ack) ? amo_resp_i.result[riscv::XLEN-1:0] : commit_instr_i[0].result;
csr_op_o = ADD; // this corresponds to a CSR NOP
csr_wdata_o = {riscv::XLEN{1'b0}};
fence_i_o = 1'b0;
fence_o = 1'b0;
sfence_vma_o = 1'b0;
csr_write_fflags_o = 1'b0;
flush_commit_o = 1'b0;
// we will not commit the instruction if we took an exception
// and we do not commit the instruction if we requested a halt
if (commit_instr_i[0].valid && !commit_instr_i[0].ex.valid && !halt_i) begin
// we can definitely write the register file
// if the instruction is not committing anything the destination
commit_ack_o[0] = 1'b1;
if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[0].op)) begin
we_fpr_o[0] = 1'b1;
end else begin
we_gpr_o[0] = 1'b1;
end
// check whether the instruction we retire was a store
if (commit_instr_i[0].fu == STORE && !instr_0_is_amo) begin
// check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store)
if (commit_lsu_ready_i) begin
commit_ack_o[0] = 1'b1;
commit_lsu_o = 1'b1;
// stall in case the store buffer is not able to accept anymore instructions
end else begin
commit_ack_o[0] = 1'b0;
end
end
// ---------
// FPU Flags
// ---------
if(CVA6Cfg.FpPresent) begin
if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin
// write the CSR with potential exception flags from retiring floating point instruction
csr_wdata_o = {{riscv::XLEN-5{1'b0}}, commit_instr_i[0].ex.cause[4:0]};
csr_write_fflags_o = 1'b1;
commit_ack_o[0] = 1'b1;
end
end
// ---------
// CSR Logic
// ---------
// check whether the instruction we retire was a CSR instruction and it did not
// throw an exception
if (commit_instr_i[0].fu == CSR) begin
// write the CSR file
csr_op_o = commit_instr_i[0].op;
csr_wdata_o = commit_instr_i[0].result;
if (!csr_exception_i.valid) begin
commit_csr_o = 1'b1;
wdata_o[0] = csr_rdata_i;
commit_ack_o[0] = 1'b1;
end else begin
commit_ack_o[0] = 1'b0;
we_gpr_o[0] = 1'b0;
end
end
// ------------------
// SFENCE.VMA Logic
// ------------------
// sfence.vma is idempotent so we can safely re-execute it after returning
// from interrupt service routine
// check if this instruction was a SFENCE_VMA
if (commit_instr_i[0].op == SFENCE_VMA) begin
// no store pending so we can flush the TLBs and pipeline
sfence_vma_o = no_st_pending_i;
// wait for the store buffer to drain until flushing the pipeline
commit_ack_o[0] = no_st_pending_i;
end
// ------------------
// FENCE.I Logic
// ------------------
// fence.i is idempotent so we can safely re-execute it after returning
// from interrupt service routine
// Fence synchronizes data and instruction streams. That means that we need to flush the private icache
// and the private dcache. This is the most expensive instruction.
if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && commit_instr_i[0].fu != STORE)) begin
commit_ack_o[0] = no_st_pending_i;
// tell the controller to flush the I$
fence_i_o = no_st_pending_i;
end
// ------------------
// FENCE Logic
// ------------------
// fence is idempotent so we can safely re-execute it after returning
// from interrupt service routine
if (commit_instr_i[0].op == FENCE) begin
commit_ack_o[0] = no_st_pending_i;
// tell the controller to flush the D$
fence_o = no_st_pending_i;
end
// ------------------
// AMO
// ------------------
if (CVA6Cfg.RVA && instr_0_is_amo) begin
// AMO finished
commit_ack_o[0] = amo_resp_i.ack;
// flush the pipeline
flush_commit_o = amo_resp_i.ack;
amo_valid_commit_o = 1'b1;
we_gpr_o[0] = amo_resp_i.ack;
end
end
if (CVA6Cfg.NrCommitPorts > 1) begin
commit_ack_o[1] = 1'b0;
we_gpr_o[1] = 1'b0;
wdata_o[1] = commit_instr_i[1].result;
// -----------------
// Commit Port 2
// -----------------
// check if the second instruction can be committed as well and the first wasn't a CSR instruction
// also if we are in single step mode don't retire the second instruction
if (commit_ack_o[0] && commit_instr_i[1].valid
// -----------------
// Commit Port 2
// -----------------
// check if the second instruction can be committed as well and the first wasn't a CSR instruction
// also if we are in single step mode don't retire the second instruction
if (commit_ack_o[0] && commit_instr_i[1].valid
&& !halt_i
&& !(commit_instr_i[0].fu inside {CSR})
&& !flush_dcache_i
&& !instr_0_is_amo
&& !single_step_i) begin
// only if the first instruction didn't throw an exception and this instruction won't throw an exception
// and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC
if (!exception_o.valid && !commit_instr_i[1].ex.valid
// only if the first instruction didn't throw an exception and this instruction won't throw an exception
// and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC
if (!exception_o.valid && !commit_instr_i[1].ex.valid
&& (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin
if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[1].op))
we_fpr_o[1] = 1'b1;
else
we_gpr_o[1] = 1'b1;
if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[1].op)) we_fpr_o[1] = 1'b1;
else we_gpr_o[1] = 1'b1;
commit_ack_o[1] = 1'b1;
commit_ack_o[1] = 1'b1;
// additionally check if we are retiring an FPU instruction because we need to make sure that we write all
// exception flags
if (CVA6Cfg.FpPresent && commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin
if (csr_write_fflags_o)
csr_wdata_o = {{riscv::XLEN-5{1'b0}}, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])};
else
csr_wdata_o = {{riscv::XLEN-5{1'b0}}, commit_instr_i[1].ex.cause[4:0]};
// additionally check if we are retiring an FPU instruction because we need to make sure that we write all
// exception flags
if (CVA6Cfg.FpPresent && commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin
if (csr_write_fflags_o)
csr_wdata_o = {
{riscv::XLEN - 5{1'b0}},
(commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])
};
else csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[1].ex.cause[4:0]};
csr_write_fflags_o = 1'b1;
end
end
end
csr_write_fflags_o = 1'b1;
end
end
end
end
end
// -----------------------------
// Exception & Interrupt Logic
// -----------------------------
// here we know for sure that we are taking the exception
always_comb begin : exception_handling
// Multiple simultaneous interrupts and traps at the same privilege level are handled in the following decreasing
// priority order: external interrupts, software interrupts, timer interrupts, then finally any synchronous traps. (1.10 p.30)
// interrupts are correctly prioritized in the CSR reg file, exceptions are prioritized here
exception_o.valid = 1'b0;
exception_o.cause = '0;
exception_o.tval = '0;
// we need a valid instruction in the commit stage
if (commit_instr_i[0].valid) begin
// ------------------------
// check for CSR exception
// ------------------------
if (csr_exception_i.valid) begin
exception_o = csr_exception_i;
// if no earlier exception happened the commit instruction will still contain
// the instruction bits from the ID stage. If a earlier exception happened we don't care
// as we will overwrite it anyway in the next IF bl
exception_o.tval = commit_instr_i[0].ex.tval;
end
// ------------------------
// Earlier Exceptions
// ------------------------
// but we give precedence to exceptions which happened earlier e.g.: instruction page
// faults for example
if (commit_instr_i[0].ex.valid) begin
exception_o = commit_instr_i[0].ex;
end
end
// Don't take any exceptions iff:
// - If we halted the processor
if (halt_i) begin
exception_o.valid = 1'b0;
end
// -----------------------------
// Exception & Interrupt Logic
// -----------------------------
// here we know for sure that we are taking the exception
always_comb begin : exception_handling
// Multiple simultaneous interrupts and traps at the same privilege level are handled in the following decreasing
// priority order: external interrupts, software interrupts, timer interrupts, then finally any synchronous traps. (1.10 p.30)
// interrupts are correctly prioritized in the CSR reg file, exceptions are prioritized here
exception_o.valid = 1'b0;
exception_o.cause = '0;
exception_o.tval = '0;
// we need a valid instruction in the commit stage
if (commit_instr_i[0].valid) begin
// ------------------------
// check for CSR exception
// ------------------------
if (csr_exception_i.valid) begin
exception_o = csr_exception_i;
// if no earlier exception happened the commit instruction will still contain
// the instruction bits from the ID stage. If a earlier exception happened we don't care
// as we will overwrite it anyway in the next IF bl
exception_o.tval = commit_instr_i[0].ex.tval;
end
// ------------------------
// Earlier Exceptions
// ------------------------
// but we give precedence to exceptions which happened earlier e.g.: instruction page
// faults for example
if (commit_instr_i[0].ex.valid) begin
exception_o = commit_instr_i[0].ex;
end
end
// Don't take any exceptions iff:
// - If we halted the processor
if (halt_i) begin
exception_o.valid = 1'b0;
end
end
endmodule

File diff suppressed because it is too large Load diff

View file

@ -13,174 +13,176 @@
// Description: Flush controller
module controller import ariane_pkg::*; #(
module controller
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
output logic set_pc_commit_o, // Set PC om PC Gen
output logic flush_if_o, // Flush the IF stage
output logic flush_unissued_instr_o, // Flush un-issued instructions of the scoreboard
output logic flush_id_o, // Flush ID stage
output logic flush_ex_o, // Flush EX stage
output logic flush_bp_o, // Flush branch predictors
output logic flush_icache_o, // Flush ICache
output logic flush_dcache_o, // Flush DCache
input logic flush_dcache_ack_i, // Acknowledge the whole DCache Flush
output logic flush_tlb_o, // Flush TLBs
input logic clk_i,
input logic rst_ni,
output logic set_pc_commit_o, // Set PC om PC Gen
output logic flush_if_o, // Flush the IF stage
output logic flush_unissued_instr_o, // Flush un-issued instructions of the scoreboard
output logic flush_id_o, // Flush ID stage
output logic flush_ex_o, // Flush EX stage
output logic flush_bp_o, // Flush branch predictors
output logic flush_icache_o, // Flush ICache
output logic flush_dcache_o, // Flush DCache
input logic flush_dcache_ack_i, // Acknowledge the whole DCache Flush
output logic flush_tlb_o, // Flush TLBs
input logic halt_csr_i, // Halt request from CSR (WFI instruction)
input logic halt_acc_i, // Halt request from accelerator dispatcher
output logic halt_o, // Halt signal to commit stage
input logic eret_i, // Return from exception
input logic ex_valid_i, // We got an exception, flush the pipeline
input logic set_debug_pc_i, // set the debug pc from CSR
input logic halt_csr_i, // Halt request from CSR (WFI instruction)
input logic halt_acc_i, // Halt request from accelerator dispatcher
output logic halt_o, // Halt signal to commit stage
input logic eret_i, // Return from exception
input logic ex_valid_i, // We got an exception, flush the pipeline
input logic set_debug_pc_i, // set the debug pc from CSR
input bp_resolve_t resolved_branch_i, // We got a resolved branch, check if we need to flush the front-end
input logic flush_csr_i, // We got an instruction which altered the CSR, flush the pipeline
input logic fence_i_i, // fence.i in
input logic fence_i, // fence in
input logic sfence_vma_i, // We got an instruction to flush the TLBs and pipeline
input logic flush_commit_i, // Flush request from commit stage
input logic flush_acc_i // Flush request from accelerator
input logic flush_csr_i, // We got an instruction which altered the CSR, flush the pipeline
input logic fence_i_i, // fence.i in
input logic fence_i, // fence in
input logic sfence_vma_i, // We got an instruction to flush the TLBs and pipeline
input logic flush_commit_i, // Flush request from commit stage
input logic flush_acc_i // Flush request from accelerator
);
// active fence - high if we are currently flushing the dcache
logic fence_active_d, fence_active_q;
logic flush_dcache;
// active fence - high if we are currently flushing the dcache
logic fence_active_d, fence_active_q;
logic flush_dcache;
// ------------
// Flush CTRL
// ------------
always_comb begin : flush_ctrl
fence_active_d = fence_active_q;
set_pc_commit_o = 1'b0;
flush_if_o = 1'b0;
flush_unissued_instr_o = 1'b0;
flush_id_o = 1'b0;
flush_ex_o = 1'b0;
flush_dcache = 1'b0;
flush_icache_o = 1'b0;
flush_tlb_o = 1'b0;
flush_bp_o = 1'b0;
// ------------
// Flush CTRL
// Mis-predict
// ------------
always_comb begin : flush_ctrl
fence_active_d = fence_active_q;
set_pc_commit_o = 1'b0;
flush_if_o = 1'b0;
flush_unissued_instr_o = 1'b0;
flush_id_o = 1'b0;
flush_ex_o = 1'b0;
flush_dcache = 1'b0;
flush_icache_o = 1'b0;
flush_tlb_o = 1'b0;
flush_bp_o = 1'b0;
// ------------
// Mis-predict
// ------------
// flush on mispredict
if (resolved_branch_i.is_mispredict) begin
// flush only un-issued instructions
flush_unissued_instr_o = 1'b1;
// and if stage
flush_if_o = 1'b1;
end
// flush on mispredict
if (resolved_branch_i.is_mispredict) begin
// flush only un-issued instructions
flush_unissued_instr_o = 1'b1;
// and if stage
flush_if_o = 1'b1;
end
// ---------------------------------
// FENCE
// ---------------------------------
if (fence_i) begin
// this can be seen as a CSR instruction with side-effect
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
// this is not needed in the case since we
// have a write-through cache in this case
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
flush_dcache = 1'b1;
fence_active_d = 1'b1;
end
end
// ---------------------------------
// FENCE.I
// ---------------------------------
if (fence_i_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_icache_o = 1'b1;
// this is not needed in the case since we
// have a write-through cache in this case
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
flush_dcache = 1'b1;
fence_active_d = 1'b1;
end
end
// this is not needed in the case since we
// have a write-through cache in this case
// ---------------------------------
// FENCE
// ---------------------------------
if (fence_i) begin
// this can be seen as a CSR instruction with side-effect
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
// this is not needed in the case since we
// have a write-through cache in this case
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
// wait for the acknowledge here
if (flush_dcache_ack_i && fence_active_q) begin
fence_active_d = 1'b0;
// keep the flush dcache signal high as long as we didn't get the acknowledge from the cache
end else if (fence_active_q) begin
flush_dcache = 1'b1;
end
flush_dcache = 1'b1;
fence_active_d = 1'b1;
end
// ---------------------------------
// SFENCE.VMA
// ---------------------------------
if (sfence_vma_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_tlb_o = 1'b1;
end
// Set PC to commit stage and flush pipeline
if (flush_csr_i || flush_commit_i || flush_acc_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
end
// ---------------------------------
// 1. Exception
// 2. Return from exception
// ---------------------------------
if (ex_valid_i || eret_i || set_debug_pc_i) begin
// don't flush pcgen as we want to take the exception: Flush PCGen is not a flush signal
// for the PC Gen stage but instead tells it to take the PC we gave it
set_pc_commit_o = 1'b0;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
// this potentially reduces performance, but is needed
// to suppress speculative fetches to virtual memory from
// machine mode. TODO: remove when PMA checkers have been
// added to the system
flush_bp_o = 1'b1;
end
end
// ----------------------
// Halt Logic
// ----------------------
always_comb begin
// halt the core if the fence is active
halt_o = halt_csr_i || halt_acc_i || fence_active_q;
// ---------------------------------
// FENCE.I
// ---------------------------------
if (fence_i_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_icache_o = 1'b1;
// this is not needed in the case since we
// have a write-through cache in this case
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
flush_dcache = 1'b1;
fence_active_d = 1'b1;
end
end
// ----------------------
// Registers
// ----------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
fence_active_q <= 1'b0;
flush_dcache_o <= 1'b0;
end else begin
fence_active_q <= fence_active_d;
// register on the flush signal, this signal might be critical
flush_dcache_o <= flush_dcache;
end
// this is not needed in the case since we
// have a write-through cache in this case
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
// wait for the acknowledge here
if (flush_dcache_ack_i && fence_active_q) begin
fence_active_d = 1'b0;
// keep the flush dcache signal high as long as we didn't get the acknowledge from the cache
end else if (fence_active_q) begin
flush_dcache = 1'b1;
end
end
// ---------------------------------
// SFENCE.VMA
// ---------------------------------
if (sfence_vma_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_tlb_o = 1'b1;
end
// Set PC to commit stage and flush pipeline
if (flush_csr_i || flush_commit_i || flush_acc_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
end
// ---------------------------------
// 1. Exception
// 2. Return from exception
// ---------------------------------
if (ex_valid_i || eret_i || set_debug_pc_i) begin
// don't flush pcgen as we want to take the exception: Flush PCGen is not a flush signal
// for the PC Gen stage but instead tells it to take the PC we gave it
set_pc_commit_o = 1'b0;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
// this potentially reduces performance, but is needed
// to suppress speculative fetches to virtual memory from
// machine mode. TODO: remove when PMA checkers have been
// added to the system
flush_bp_o = 1'b1;
end
end
// ----------------------
// Halt Logic
// ----------------------
always_comb begin
// halt the core if the fence is active
halt_o = halt_csr_i || halt_acc_i || fence_active_q;
end
// ----------------------
// Registers
// ----------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
fence_active_q <= 1'b0;
flush_dcache_o <= 1'b0;
end else begin
fence_active_q <= fence_active_d;
// register on the flush signal, this signal might be critical
flush_dcache_o <= flush_dcache;
end
end
endmodule

View file

@ -14,62 +14,63 @@
// to the scoreboard.
module csr_buffer import ariane_pkg::*; #(
module csr_buffer
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input fu_data_t fu_data_i,
input fu_data_t fu_data_i,
output logic csr_ready_o, // FU is ready e.g. not busy
input logic csr_valid_i, // Input is valid
output riscv::xlen_t csr_result_o,
input logic csr_commit_i, // commit the pending CSR OP
output logic csr_ready_o, // FU is ready e.g. not busy
input logic csr_valid_i, // Input is valid
output riscv::xlen_t csr_result_o,
input logic csr_commit_i, // commit the pending CSR OP
// to CSR file
output logic [11:0] csr_addr_o // CSR address to commit stage
output logic [11:0] csr_addr_o // CSR address to commit stage
);
// this is a single entry store buffer for the address of the CSR
// which we are going to need in the commit stage
struct packed {
logic [11:0] csr_address;
logic valid;
} csr_reg_n, csr_reg_q;
// this is a single entry store buffer for the address of the CSR
// which we are going to need in the commit stage
struct packed {
logic [11:0] csr_address;
logic valid;
}
csr_reg_n, csr_reg_q;
// control logic, scoreboard signals
assign csr_result_o = fu_data_i.operand_a;
assign csr_addr_o = csr_reg_q.csr_address;
// control logic, scoreboard signals
assign csr_result_o = fu_data_i.operand_a;
assign csr_addr_o = csr_reg_q.csr_address;
// write logic
always_comb begin : write
csr_reg_n = csr_reg_q;
// by default we are ready
csr_ready_o = 1'b1;
// if we have a valid uncomiited csr req or are just getting one WITHOUT a commit in, we are not ready
if ((csr_reg_q.valid || csr_valid_i) && ~csr_commit_i)
csr_ready_o = 1'b0;
// if we got a valid from the scoreboard
// store the CSR address
if (csr_valid_i) begin
csr_reg_n.csr_address = fu_data_i.operand_b[11:0];
csr_reg_n.valid = 1'b1;
end
// if we get a commit and no new valid instruction -> clear the valid bit
if (csr_commit_i && ~csr_valid_i) begin
csr_reg_n.valid = 1'b0;
end
// clear the buffer if we flushed
if (flush_i)
csr_reg_n.valid = 1'b0;
// write logic
always_comb begin : write
csr_reg_n = csr_reg_q;
// by default we are ready
csr_ready_o = 1'b1;
// if we have a valid uncomiited csr req or are just getting one WITHOUT a commit in, we are not ready
if ((csr_reg_q.valid || csr_valid_i) && ~csr_commit_i) csr_ready_o = 1'b0;
// if we got a valid from the scoreboard
// store the CSR address
if (csr_valid_i) begin
csr_reg_n.csr_address = fu_data_i.operand_b[11:0];
csr_reg_n.valid = 1'b1;
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
csr_reg_q <= '{default: 0};
end else begin
csr_reg_q <= csr_reg_n;
end
// if we get a commit and no new valid instruction -> clear the valid bit
if (csr_commit_i && ~csr_valid_i) begin
csr_reg_n.valid = 1'b0;
end
// clear the buffer if we flushed
if (flush_i) csr_reg_n.valid = 1'b0;
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
csr_reg_q <= '{default: 0};
end else begin
csr_reg_q <= csr_reg_n;
end
end
endmodule

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -7,14 +7,16 @@
// Module stub for the cva6_accel_first_pass_decoder. Replace this with your accelerator's
// first pass decoder.
module cva6_accel_first_pass_decoder import ariane_pkg::*; (
input logic [31:0] instruction_i, // instruction from IF
input riscv::xs_t fs_i, // floating point extension status
input riscv::xs_t vs_i, // vector extension status
output logic is_accel_o, // is an accelerator instruction
output scoreboard_entry_t instruction_o, // predecoded instruction
output logic illegal_instr_o, // is an illegal instruction
output logic is_control_flow_instr_o // is a control flow instruction
module cva6_accel_first_pass_decoder
import ariane_pkg::*;
(
input logic [31:0] instruction_i, // instruction from IF
input riscv::xs_t fs_i, // floating point extension status
input riscv::xs_t vs_i, // vector extension status
output logic is_accel_o, // is an accelerator instruction
output scoreboard_entry_t instruction_o, // predecoded instruction
output logic illegal_instr_o, // is an illegal instruction
output logic is_control_flow_instr_o // is a control flow instruction
);
assign is_accel_o = 1'b0;

View file

@ -9,51 +9,53 @@
// Example coprocessor adds rs1,rs2(,rs3) together and gives back the result to the CPU via the CoreV-X-Interface.
// Coprocessor delays the sending of the result depending on result least significant bits.
module cvxif_example_coprocessor import cvxif_pkg::*;
import cvxif_instr_pkg::*;(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input cvxif_req_t cvxif_req_i,
output cvxif_resp_t cvxif_resp_o
module cvxif_example_coprocessor
import cvxif_pkg::*;
import cvxif_instr_pkg::*;
(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input cvxif_req_t cvxif_req_i,
output cvxif_resp_t cvxif_resp_o
);
//Compressed interface
logic x_compressed_valid_i;
logic x_compressed_ready_o;
x_compressed_req_t x_compressed_req_i;
x_compressed_resp_t x_compressed_resp_o;
logic x_compressed_valid_i;
logic x_compressed_ready_o;
x_compressed_req_t x_compressed_req_i;
x_compressed_resp_t x_compressed_resp_o;
//Issue interface
logic x_issue_valid_i;
logic x_issue_ready_o;
x_issue_req_t x_issue_req_i;
x_issue_resp_t x_issue_resp_o;
logic x_issue_valid_i;
logic x_issue_ready_o;
x_issue_req_t x_issue_req_i;
x_issue_resp_t x_issue_resp_o;
//Commit interface
logic x_commit_valid_i;
x_commit_t x_commit_i;
logic x_commit_valid_i;
x_commit_t x_commit_i;
//Memory interface
logic x_mem_valid_o;
logic x_mem_ready_i;
x_mem_req_t x_mem_req_o;
x_mem_resp_t x_mem_resp_i;
logic x_mem_valid_o;
logic x_mem_ready_i;
x_mem_req_t x_mem_req_o;
x_mem_resp_t x_mem_resp_i;
//Memory result interface
logic x_mem_result_valid_i;
x_mem_result_t x_mem_result_i;
logic x_mem_result_valid_i;
x_mem_result_t x_mem_result_i;
//Result interface
logic x_result_valid_o;
logic x_result_ready_i;
x_result_t x_result_o;
logic x_result_valid_o;
logic x_result_ready_i;
x_result_t x_result_o;
assign x_compressed_valid_i = cvxif_req_i.x_compressed_valid;
assign x_compressed_req_i = cvxif_req_i.x_compressed_req;
assign x_issue_valid_i = cvxif_req_i.x_issue_valid;
assign x_issue_req_i = cvxif_req_i.x_issue_req;
assign x_commit_valid_i = cvxif_req_i.x_commit_valid;
assign x_commit_i = cvxif_req_i.x_commit;
assign x_mem_ready_i = cvxif_req_i.x_mem_ready;
assign x_mem_resp_i = cvxif_req_i.x_mem_resp;
assign x_mem_result_valid_i = cvxif_req_i.x_mem_result_valid;
assign x_mem_result_i = cvxif_req_i.x_mem_result;
assign x_result_ready_i = cvxif_req_i.x_result_ready;
assign x_compressed_valid_i = cvxif_req_i.x_compressed_valid;
assign x_compressed_req_i = cvxif_req_i.x_compressed_req;
assign x_issue_valid_i = cvxif_req_i.x_issue_valid;
assign x_issue_req_i = cvxif_req_i.x_issue_req;
assign x_commit_valid_i = cvxif_req_i.x_commit_valid;
assign x_commit_i = cvxif_req_i.x_commit;
assign x_mem_ready_i = cvxif_req_i.x_mem_ready;
assign x_mem_resp_i = cvxif_req_i.x_mem_resp;
assign x_mem_result_valid_i = cvxif_req_i.x_mem_result_valid;
assign x_mem_result_i = cvxif_req_i.x_mem_result;
assign x_result_ready_i = cvxif_req_i.x_result_ready;
assign cvxif_resp_o.x_compressed_ready = x_compressed_ready_o;
assign cvxif_resp_o.x_compressed_resp = x_compressed_resp_o;
@ -65,17 +67,17 @@ module cvxif_example_coprocessor import cvxif_pkg::*;
assign cvxif_resp_o.x_result = x_result_o;
//Compressed interface
assign x_compressed_ready_o = '0;
assign x_compressed_resp_o.instr = '0;
assign x_compressed_resp_o.accept = '0;
assign x_compressed_ready_o = '0;
assign x_compressed_resp_o.instr = '0;
assign x_compressed_resp_o.accept = '0;
instr_decoder #(
.NbInstr ( cvxif_instr_pkg::NbInstr ),
.CoproInstr ( cvxif_instr_pkg::CoproInstr )
.NbInstr (cvxif_instr_pkg::NbInstr),
.CoproInstr(cvxif_instr_pkg::CoproInstr)
) instr_decoder_i (
.clk_i ( clk_i ),
.x_issue_req_i ( x_issue_req_i ),
.x_issue_resp_o ( x_issue_resp_o )
.clk_i (clk_i),
.x_issue_req_i (x_issue_req_i),
.x_issue_resp_o(x_issue_resp_o)
);
typedef struct packed {
@ -86,20 +88,20 @@ module cvxif_example_coprocessor import cvxif_pkg::*;
logic fifo_full, fifo_empty;
logic x_issue_ready_q;
logic instr_push, instr_pop;
x_issue_t req_i;
x_issue_t req_o;
x_issue_t req_i;
x_issue_t req_o;
assign instr_push = x_issue_resp_o.accept ? 1 : 0 ;
assign instr_push = x_issue_resp_o.accept ? 1 : 0;
assign instr_pop = (x_commit_i.x_commit_kill && x_commit_valid_i) || x_result_valid_o;
assign x_issue_ready_q = ~fifo_full; // if something is in the fifo, the instruction is being processed
// so we can't receive anything else
// so we can't receive anything else
assign req_i.req = x_issue_req_i;
assign req_i.resp = x_issue_resp_o;
always_ff @(posedge clk_i or negedge rst_ni) begin : regs
if(!rst_ni) begin
if (!rst_ni) begin
x_issue_ready_o <= 1;
end else begin
x_issue_ready_o <= x_issue_ready_q;
@ -107,47 +109,47 @@ module cvxif_example_coprocessor import cvxif_pkg::*;
end
fifo_v3 #(
.FALL_THROUGH ( 1 ), //data_o ready and pop in the same cycle
.DATA_WIDTH ( 64 ),
.DEPTH ( 8 ),
.dtype ( x_issue_t )
) fifo_commit_i (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( fifo_full ),
.empty_o ( fifo_empty ),
.usage_o ( ),
.data_i ( req_i ),
.push_i ( instr_push ),
.data_o ( req_o ),
.pop_i ( instr_pop )
.FALL_THROUGH(1), //data_o ready and pop in the same cycle
.DATA_WIDTH (64),
.DEPTH (8),
.dtype (x_issue_t)
) fifo_commit_i (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (1'b0),
.testmode_i(1'b0),
.full_o (fifo_full),
.empty_o (fifo_empty),
.usage_o (),
.data_i (req_i),
.push_i (instr_push),
.data_o (req_o),
.pop_i (instr_pop)
);
logic [3:0] c;
counter #(
.WIDTH(4)
) counter_i(
.clk_i ( clk_i),
.rst_ni ( rst_ni),
.clear_i ( ~x_commit_i.x_commit_kill && x_commit_valid_i ),
.en_i ( 1'b1 ),
.load_i ( ),
.down_i ( ),
.d_i ( ),
.q_o ( c ),
.overflow_o ( )
.WIDTH(4)
) counter_i (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (~x_commit_i.x_commit_kill && x_commit_valid_i),
.en_i (1'b1),
.load_i (),
.down_i (),
.d_i (),
.q_o (c),
.overflow_o()
);
always_comb begin
x_result_o.data = req_o.req.rs[0] + req_o.req.rs[1] + ( X_NUM_RS == 3 ? req_o.req.rs[2] : 0);
x_result_valid_o = (c == x_result_o.data[3:0]) && ~fifo_empty ? 1 : 0;
x_result_o.id = req_o.req.id;
x_result_o.rd = req_o.req.instr[11:7];
x_result_o.we = req_o.resp.writeback & x_result_valid_o;
x_result_o.exc = 0;
x_result_o.exccode = 0;
x_result_o.data = req_o.req.rs[0] + req_o.req.rs[1] + (X_NUM_RS == 3 ? req_o.req.rs[2] : 0);
x_result_valid_o = (c == x_result_o.data[3:0]) && ~fifo_empty ? 1 : 0;
x_result_o.id = req_o.req.id;
x_result_o.rd = req_o.req.instr[11:7];
x_result_o.we = req_o.resp.writeback & x_result_valid_o;
x_result_o.exc = 0;
x_result_o.exccode = 0;
end
endmodule
endmodule

View file

@ -10,38 +10,38 @@
package cvxif_instr_pkg;
typedef struct packed {
logic [31:0] instr;
logic [31:0] mask;
cvxif_pkg::x_issue_resp_t resp;
logic [31:0] instr;
logic [31:0] mask;
cvxif_pkg::x_issue_resp_t resp;
} copro_issue_resp_t;
// 2 Possible RISCV instructions for Coprocessor
parameter int unsigned NbInstr = 2;
parameter copro_issue_resp_t CoproInstr[NbInstr] = '{
'{
instr: 32'b 00000_00_00000_00000_0_00_00000_0101011, // custom1 opcode
mask: 32'b 00000_00_00000_00000_0_00_00000_1111111,
resp : '{
accept : 1'b1,
writeback : 1'b0,
dualwrite : 1'b0,
dualread : 1'b0,
loadstore : 1'b0,
exc : 1'b0
'{
instr: 32'b00000_00_00000_00000_0_00_00000_0101011, // custom1 opcode
mask: 32'b00000_00_00000_00000_0_00_00000_1111111,
resp : '{
accept : 1'b1,
writeback : 1'b0,
dualwrite : 1'b0,
dualread : 1'b0,
loadstore : 1'b0,
exc : 1'b0
}
},
'{
instr: 32'b00000_00_00000_00000_0_00_00000_1011011, // custom2 opcode
mask: 32'b00000_00_00000_00000_0_00_00000_1111111,
resp : '{
accept : 1'b1,
writeback : 1'b1,
dualwrite : 1'b0,
dualread : 1'b0,
loadstore : 1'b0,
exc : 1'b0
}
}
},
'{
instr: 32'b 00000_00_00000_00000_0_00_00000_1011011, // custom2 opcode
mask: 32'b 00000_00_00000_00000_0_00_00000_1111111,
resp : '{
accept : 1'b1,
writeback : 1'b1,
dualwrite : 1'b0,
dualread : 1'b0,
loadstore : 1'b0,
exc : 1'b0
}
}
};
endpackage

View file

@ -7,42 +7,43 @@
//
// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com)
module instr_decoder import cvxif_pkg::*; #(
parameter int NbInstr = 1,
parameter cvxif_instr_pkg::copro_issue_resp_t CoproInstr[NbInstr] = {0}
)
(
input logic clk_i,
input x_issue_req_t x_issue_req_i,
output x_issue_resp_t x_issue_resp_o
module instr_decoder
import cvxif_pkg::*;
#(
parameter int NbInstr = 1,
parameter cvxif_instr_pkg::copro_issue_resp_t CoproInstr[NbInstr] = {0}
) (
input logic clk_i,
input x_issue_req_t x_issue_req_i,
output x_issue_resp_t x_issue_resp_o
);
logic [NbInstr-1:0] sel;
for (genvar i = 0; i < NbInstr; i++) begin : gen_predecoder_selector
assign sel[i] =
((CoproInstr[i].mask & x_issue_req_i.instr) == CoproInstr[i].instr);
assign sel[i] = ((CoproInstr[i].mask & x_issue_req_i.instr) == CoproInstr[i].instr);
end
always_comb begin
x_issue_resp_o.accept = '0;
x_issue_resp_o.writeback = '0;
x_issue_resp_o.dualwrite = '0;
x_issue_resp_o.dualread = '0;
x_issue_resp_o.loadstore = '0;
x_issue_resp_o.exc = '0;
x_issue_resp_o.accept = '0;
x_issue_resp_o.writeback = '0;
x_issue_resp_o.dualwrite = '0;
x_issue_resp_o.dualread = '0;
x_issue_resp_o.loadstore = '0;
x_issue_resp_o.exc = '0;
for (int unsigned i = 0; i < NbInstr; i++) begin
if (sel[i]) begin
x_issue_resp_o.accept = CoproInstr[i].resp.accept;
x_issue_resp_o.writeback = CoproInstr[i].resp.writeback;
x_issue_resp_o.dualwrite = CoproInstr[i].resp.dualwrite;
x_issue_resp_o.dualread = CoproInstr[i].resp.dualread;
x_issue_resp_o.loadstore = CoproInstr[i].resp.loadstore;
x_issue_resp_o.exc = CoproInstr[i].resp.exc;
x_issue_resp_o.accept = CoproInstr[i].resp.accept;
x_issue_resp_o.writeback = CoproInstr[i].resp.writeback;
x_issue_resp_o.dualwrite = CoproInstr[i].resp.dualwrite;
x_issue_resp_o.dualread = CoproInstr[i].resp.dualread;
x_issue_resp_o.loadstore = CoproInstr[i].resp.loadstore;
x_issue_resp_o.exc = CoproInstr[i].resp.exc;
end
end
end
assert property( @(posedge clk_i) $onehot0(sel)) else $warning("This offloaded instruction is valid for multiple coprocessor instructions !");
assert property (@(posedge clk_i) $onehot0(sel))
else $warning("This offloaded instruction is valid for multiple coprocessor instructions !");
endmodule

View file

@ -10,93 +10,95 @@
// Functional Unit for the logic of the CoreV-X-Interface
module cvxif_fu import ariane_pkg::*; #(
module cvxif_fu
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input fu_data_t fu_data_i,
input riscv::priv_lvl_t priv_lvl_i,
input logic clk_i,
input logic rst_ni,
input fu_data_t fu_data_i,
input riscv::priv_lvl_t priv_lvl_i,
//from issue
input logic x_valid_i,
output logic x_ready_o,
input logic [31:0] x_off_instr_i,
input logic x_valid_i,
output logic x_ready_o,
input logic [ 31:0] x_off_instr_i,
//to writeback
output logic [TRANS_ID_BITS-1:0] x_trans_id_o,
output exception_t x_exception_o,
output riscv::xlen_t x_result_o,
output logic x_valid_o,
output logic x_we_o,
output logic [TRANS_ID_BITS-1:0] x_trans_id_o,
output exception_t x_exception_o,
output riscv::xlen_t x_result_o,
output logic x_valid_o,
output logic x_we_o,
//to coprocessor
output cvxif_pkg::cvxif_req_t cvxif_req_o,
input cvxif_pkg::cvxif_resp_t cvxif_resp_i
output cvxif_pkg::cvxif_req_t cvxif_req_o,
input cvxif_pkg::cvxif_resp_t cvxif_resp_i
);
logic illegal_n, illegal_q;
logic [TRANS_ID_BITS-1:0] illegal_id_n, illegal_id_q;
logic [31:0] illegal_instr_n, illegal_instr_q;
logic illegal_n, illegal_q;
logic [TRANS_ID_BITS-1:0] illegal_id_n, illegal_id_q;
logic [31:0] illegal_instr_n, illegal_instr_q;
always_comb begin
cvxif_req_o = '0;
cvxif_req_o.x_result_ready = 1'b1;
x_ready_o = cvxif_resp_i.x_issue_ready;
if (x_valid_i) begin
cvxif_req_o.x_issue_valid = x_valid_i;
cvxif_req_o.x_issue_req.instr = x_off_instr_i;
cvxif_req_o.x_issue_req.mode = priv_lvl_i;
cvxif_req_o.x_issue_req.id = fu_data_i.trans_id;
cvxif_req_o.x_issue_req.rs[0] = fu_data_i.operand_a;
cvxif_req_o.x_issue_req.rs[1] = fu_data_i.operand_b;
if (cvxif_pkg::X_NUM_RS == 3) begin
cvxif_req_o.x_issue_req.rs[2] = fu_data_i.imm;
end
cvxif_req_o.x_issue_req.rs_valid = cvxif_pkg::X_NUM_RS == 3 ? 3'b111 : 2'b11;
cvxif_req_o.x_commit_valid = x_valid_i;
cvxif_req_o.x_commit.id = fu_data_i.trans_id;
cvxif_req_o.x_commit.x_commit_kill = 1'b0;
always_comb begin
cvxif_req_o = '0;
cvxif_req_o.x_result_ready = 1'b1;
x_ready_o = cvxif_resp_i.x_issue_ready;
if (x_valid_i) begin
cvxif_req_o.x_issue_valid = x_valid_i;
cvxif_req_o.x_issue_req.instr = x_off_instr_i;
cvxif_req_o.x_issue_req.mode = priv_lvl_i;
cvxif_req_o.x_issue_req.id = fu_data_i.trans_id;
cvxif_req_o.x_issue_req.rs[0] = fu_data_i.operand_a;
cvxif_req_o.x_issue_req.rs[1] = fu_data_i.operand_b;
if (cvxif_pkg::X_NUM_RS == 3) begin
cvxif_req_o.x_issue_req.rs[2] = fu_data_i.imm;
end
cvxif_req_o.x_issue_req.rs_valid = cvxif_pkg::X_NUM_RS == 3 ? 3'b111 : 2'b11;
cvxif_req_o.x_commit_valid = x_valid_i;
cvxif_req_o.x_commit.id = fu_data_i.trans_id;
cvxif_req_o.x_commit.x_commit_kill = 1'b0;
end
end
always_comb begin
illegal_n = illegal_q;
illegal_id_n = illegal_id_q;
illegal_instr_n = illegal_instr_q;
if (~cvxif_resp_i.x_issue_resp.accept && cvxif_req_o.x_issue_valid && cvxif_resp_i.x_issue_ready && ~illegal_n) begin
illegal_n = 1'b1;
illegal_id_n = cvxif_req_o.x_issue_req.id;
illegal_instr_n = cvxif_req_o.x_issue_req.instr;
end
x_valid_o = cvxif_resp_i.x_result_valid; //Read result only when CVXIF is enabled
x_trans_id_o = x_valid_o ? cvxif_resp_i.x_result.id : '0;
x_result_o = x_valid_o ? cvxif_resp_i.x_result.data : '0;
x_exception_o.cause = x_valid_o ? {{(riscv::XLEN-6){1'b0}}, cvxif_resp_i.x_result.exccode} : '0;
x_exception_o.valid = x_valid_o ? cvxif_resp_i.x_result.exc : '0;
x_exception_o.tval = '0;
x_we_o = x_valid_o ? cvxif_resp_i.x_result.we : '0;
if (illegal_n) begin
if (~x_valid_o) begin
x_trans_id_o = illegal_id_n;
x_result_o = '0;
x_valid_o = 1'b1;
x_exception_o.cause = riscv::ILLEGAL_INSTR;
x_exception_o.valid = 1'b1;
x_exception_o.tval = illegal_instr_n;
x_we_o = '0;
illegal_n = '0; // Reset flag for illegal instr. illegal_id and illegal instr values are a don't care, no need to reset it.
end
end
end
always_comb begin
illegal_n = illegal_q;
illegal_id_n = illegal_id_q;
illegal_instr_n = illegal_instr_q;
if (~cvxif_resp_i.x_issue_resp.accept && cvxif_req_o.x_issue_valid && cvxif_resp_i.x_issue_ready && ~illegal_n) begin
illegal_n = 1'b1;
illegal_id_n = cvxif_req_o.x_issue_req.id;
illegal_instr_n = cvxif_req_o.x_issue_req.instr;
end
x_valid_o = cvxif_resp_i.x_result_valid; //Read result only when CVXIF is enabled
x_trans_id_o = x_valid_o ? cvxif_resp_i.x_result.id : '0;
x_result_o = x_valid_o ? cvxif_resp_i.x_result.data : '0;
x_exception_o.cause = x_valid_o ? {{(riscv::XLEN-6){1'b0}}, cvxif_resp_i.x_result.exccode} : '0;
x_exception_o.valid = x_valid_o ? cvxif_resp_i.x_result.exc : '0;
x_exception_o.tval = '0;
x_we_o = x_valid_o ? cvxif_resp_i.x_result.we : '0;
if (illegal_n) begin
if (~x_valid_o) begin
x_trans_id_o = illegal_id_n;
x_result_o = '0;
x_valid_o = 1'b1;
x_exception_o.cause = riscv::ILLEGAL_INSTR;
x_exception_o.valid = 1'b1;
x_exception_o.tval = illegal_instr_n;
x_we_o = '0;
illegal_n = '0; // Reset flag for illegal instr. illegal_id and illegal instr values are a don't care, no need to reset it.
end
end
end
always_ff @(posedge clk_i, negedge rst_ni) begin
if (~rst_ni) begin
illegal_q <= 1'b0;
illegal_id_q <= '0;
illegal_instr_q <= '0;
end else begin
illegal_q <= illegal_n;
illegal_id_q <= illegal_id_n;
illegal_instr_q <= illegal_instr_n;
end
always_ff @(posedge clk_i, negedge rst_ni) begin
if (~rst_ni) begin
illegal_q <= 1'b0;
illegal_id_q <= '0;
illegal_instr_q <= '0;
end else begin
illegal_q <= illegal_n;
illegal_id_q <= illegal_id_n;
illegal_instr_q <= illegal_instr_n;
end
end
endmodule

File diff suppressed because it is too large Load diff

View file

@ -14,398 +14,400 @@
// Description: Instantiation of all functional units residing in the execute stage
module ex_stage import ariane_pkg::*; #(
module ex_stage
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned ASID_WIDTH = 1
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic debug_mode_i,
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic debug_mode_i,
input logic [riscv::VLEN-1:0] rs1_forwarding_i,
input logic [riscv::VLEN-1:0] rs2_forwarding_i,
input fu_data_t fu_data_i,
input logic [riscv::VLEN-1:0] pc_i, // PC of current instruction
input logic is_compressed_instr_i, // we need to know if this was a compressed instruction
// in order to calculate the next PC on a mis-predict
input logic [riscv::VLEN-1:0] rs1_forwarding_i,
input logic [riscv::VLEN-1:0] rs2_forwarding_i,
input fu_data_t fu_data_i,
input logic [riscv::VLEN-1:0] pc_i, // PC of current instruction
input logic is_compressed_instr_i, // we need to know if this was a compressed instruction
// in order to calculate the next PC on a mis-predict
// Fixed latency unit(s)
output riscv::xlen_t flu_result_o,
output riscv::xlen_t flu_result_o,
output logic [TRANS_ID_BITS-1:0] flu_trans_id_o, // ID of scoreboard entry at which to write back
output exception_t flu_exception_o,
output logic flu_ready_o, // FLU is ready
output logic flu_valid_o, // FLU result is valid
output exception_t flu_exception_o,
output logic flu_ready_o, // FLU is ready
output logic flu_valid_o, // FLU result is valid
// Branches and Jumps
// ALU 1
input logic alu_valid_i, // Output is valid
input logic alu_valid_i, // Output is valid
// Branch Unit
input logic branch_valid_i, // we are using the branch unit
input branchpredict_sbe_t branch_predict_i,
output bp_resolve_t resolved_branch_o, // the branch engine uses the write back from the ALU
output logic resolve_branch_o, // to ID signaling that we resolved the branch
input logic branch_valid_i, // we are using the branch unit
input branchpredict_sbe_t branch_predict_i,
output bp_resolve_t resolved_branch_o, // the branch engine uses the write back from the ALU
output logic resolve_branch_o, // to ID signaling that we resolved the branch
// CSR
input logic csr_valid_i,
output logic [11:0] csr_addr_o,
input logic csr_commit_i,
input logic csr_valid_i,
output logic [11:0] csr_addr_o,
input logic csr_commit_i,
// MULT
input logic mult_valid_i, // Output is valid
input logic mult_valid_i, // Output is valid
// LSU
output logic lsu_ready_o, // FU is ready
input logic lsu_valid_i, // Input is valid
output logic lsu_ready_o, // FU is ready
input logic lsu_valid_i, // Input is valid
output logic load_valid_o,
output riscv::xlen_t load_result_o,
output logic [TRANS_ID_BITS-1:0] load_trans_id_o,
output exception_t load_exception_o,
output logic store_valid_o,
output riscv::xlen_t store_result_o,
output logic [TRANS_ID_BITS-1:0] store_trans_id_o,
output exception_t store_exception_o,
output logic load_valid_o,
output riscv::xlen_t load_result_o,
output logic [TRANS_ID_BITS-1:0] load_trans_id_o,
output exception_t load_exception_o,
output logic store_valid_o,
output riscv::xlen_t store_result_o,
output logic [TRANS_ID_BITS-1:0] store_trans_id_o,
output exception_t store_exception_o,
input logic lsu_commit_i,
output logic lsu_commit_ready_o, // commit queue is ready to accept another commit request
input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
input logic stall_st_pending_i,
output logic no_st_pending_o,
input logic amo_valid_commit_i,
input logic lsu_commit_i,
output logic lsu_commit_ready_o, // commit queue is ready to accept another commit request
input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
input logic stall_st_pending_i,
output logic no_st_pending_o,
input logic amo_valid_commit_i,
// FPU
output logic fpu_ready_o, // FU is ready
input logic fpu_valid_i, // Output is valid
input logic [1:0] fpu_fmt_i, // FP format
input logic [2:0] fpu_rm_i, // FP rm
input logic [2:0] fpu_frm_i, // FP frm csr
input logic [6:0] fpu_prec_i, // FP precision control
output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
output riscv::xlen_t fpu_result_o,
output logic fpu_valid_o,
output exception_t fpu_exception_o,
output logic fpu_ready_o, // FU is ready
input logic fpu_valid_i, // Output is valid
input logic [1:0] fpu_fmt_i, // FP format
input logic [2:0] fpu_rm_i, // FP rm
input logic [2:0] fpu_frm_i, // FP frm csr
input logic [6:0] fpu_prec_i, // FP precision control
output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
output riscv::xlen_t fpu_result_o,
output logic fpu_valid_o,
output exception_t fpu_exception_o,
// CoreV-X-Interface
input logic x_valid_i,
output logic x_ready_o,
input logic [31:0] x_off_instr_i,
output logic [TRANS_ID_BITS-1:0] x_trans_id_o,
output exception_t x_exception_o,
output riscv::xlen_t x_result_o,
output logic x_valid_o,
output logic x_we_o,
output cvxif_pkg::cvxif_req_t cvxif_req_o,
input cvxif_pkg::cvxif_resp_t cvxif_resp_i,
input logic acc_valid_i, // Output is valid
input logic x_valid_i,
output logic x_ready_o,
input logic [31:0] x_off_instr_i,
output logic [TRANS_ID_BITS-1:0] x_trans_id_o,
output exception_t x_exception_o,
output riscv::xlen_t x_result_o,
output logic x_valid_o,
output logic x_we_o,
output cvxif_pkg::cvxif_req_t cvxif_req_o,
input cvxif_pkg::cvxif_resp_t cvxif_resp_i,
input logic acc_valid_i, // Output is valid
// Memory Management
input logic enable_translation_i,
input logic en_ld_st_translation_i,
input logic flush_tlb_i,
input logic enable_translation_i,
input logic en_ld_st_translation_i,
input logic flush_tlb_i,
input riscv::priv_lvl_t priv_lvl_i,
input riscv::priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i,
input logic mxr_i,
input logic [riscv::PPNW-1:0] satp_ppn_i,
input logic [ASID_WIDTH-1:0] asid_i,
input riscv::priv_lvl_t priv_lvl_i,
input riscv::priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i,
input logic mxr_i,
input logic [riscv::PPNW-1:0] satp_ppn_i,
input logic [ ASID_WIDTH-1:0] asid_i,
// icache translation requests
input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o,
input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o,
// interface to dcache
input dcache_req_o_t [2:0] dcache_req_ports_i,
output dcache_req_i_t [2:0] dcache_req_ports_o,
input logic dcache_wbuffer_empty_i,
input logic dcache_wbuffer_not_ni_i,
output amo_req_t amo_req_o, // request to cache subsytem
input amo_resp_t amo_resp_i, // response from cache subsystem
input dcache_req_o_t [2:0] dcache_req_ports_i,
output dcache_req_i_t [2:0] dcache_req_ports_o,
input logic dcache_wbuffer_empty_i,
input logic dcache_wbuffer_not_ni_i,
output amo_req_t amo_req_o, // request to cache subsytem
input amo_resp_t amo_resp_i, // response from cache subsystem
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
output logic itlb_miss_o,
output logic dtlb_miss_o,
// PMPs
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic[15:0][riscv::PLEN-3:0] pmpaddr_i,
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
// RVFI
output [riscv::VLEN-1:0] lsu_addr_o,
output [riscv::PLEN-1:0] mem_paddr_o,
output [(riscv::XLEN/8)-1:0] lsu_rmask_o,
output [(riscv::XLEN/8)-1:0] lsu_wmask_o,
output [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_o
output [ riscv::VLEN-1:0] lsu_addr_o,
output [ riscv::PLEN-1:0] mem_paddr_o,
output [ (riscv::XLEN/8)-1:0] lsu_rmask_o,
output [ (riscv::XLEN/8)-1:0] lsu_wmask_o,
output [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_o
);
// -------------------------
// Fixed Latency Units
// -------------------------
// all fixed latency units share a single issue port and a sing write
// port into the scoreboard. At the moment those are:
// 1. ALU - all operations are single cycle
// 2. Branch unit: operation is single cycle, the ALU is needed
// for comparison
// 3. CSR: This is a small buffer which saves the address of the CSR.
// The value is then re-fetched once the instruction retires. The buffer
// is only a single entry deep, hence this operation will block all
// other operations once this buffer is full. This should not be a major
// concern though as CSRs are infrequent.
// 4. Multiplier/Divider: The multiplier has a fixed latency of 1 cycle.
// The issue logic will take care of not issuing
// another instruction if it will collide on the
// output port. Divisions are arbitrary in length
// they will simply block the issue of all other
// instructions.
// -------------------------
// Fixed Latency Units
// -------------------------
// all fixed latency units share a single issue port and a sing write
// port into the scoreboard. At the moment those are:
// 1. ALU - all operations are single cycle
// 2. Branch unit: operation is single cycle, the ALU is needed
// for comparison
// 3. CSR: This is a small buffer which saves the address of the CSR.
// The value is then re-fetched once the instruction retires. The buffer
// is only a single entry deep, hence this operation will block all
// other operations once this buffer is full. This should not be a major
// concern though as CSRs are infrequent.
// 4. Multiplier/Divider: The multiplier has a fixed latency of 1 cycle.
// The issue logic will take care of not issuing
// another instruction if it will collide on the
// output port. Divisions are arbitrary in length
// they will simply block the issue of all other
// instructions.
logic current_instruction_is_sfence_vma;
// These two register store the rs1 and rs2 parameters in case of `SFENCE_VMA`
// instruction to be used for TLB flush in the next clock cycle.
logic [ASID_WIDTH-1:0] asid_to_be_flushed;
logic [riscv::VLEN-1:0] vaddr_to_be_flushed;
logic current_instruction_is_sfence_vma;
// These two register store the rs1 and rs2 parameters in case of `SFENCE_VMA`
// instruction to be used for TLB flush in the next clock cycle.
logic [ASID_WIDTH-1:0] asid_to_be_flushed;
logic [riscv::VLEN-1:0] vaddr_to_be_flushed;
// from ALU to branch unit
logic alu_branch_res; // branch comparison result
riscv::xlen_t alu_result, csr_result, mult_result;
logic [riscv::VLEN-1:0] branch_result;
logic csr_ready, mult_ready;
logic [TRANS_ID_BITS-1:0] mult_trans_id;
logic mult_valid;
// from ALU to branch unit
logic alu_branch_res; // branch comparison result
riscv::xlen_t alu_result, csr_result, mult_result;
logic [riscv::VLEN-1:0] branch_result;
logic csr_ready, mult_ready;
logic [TRANS_ID_BITS-1:0] mult_trans_id;
logic mult_valid;
// 1. ALU (combinatorial)
// data silence operation
fu_data_t alu_data;
assign alu_data = (alu_valid_i | branch_valid_i) ? fu_data_i : '0;
// 1. ALU (combinatorial)
// data silence operation
fu_data_t alu_data;
assign alu_data = (alu_valid_i | branch_valid_i) ? fu_data_i : '0;
alu #(
.CVA6Cfg ( CVA6Cfg )
) alu_i (
alu #(
.CVA6Cfg(CVA6Cfg)
) alu_i (
.clk_i,
.rst_ni,
.fu_data_i (alu_data),
.result_o (alu_result),
.alu_branch_res_o(alu_branch_res)
);
// 2. Branch Unit (combinatorial)
// we don't silence the branch unit as this is already critical and we do
// not want to add another layer of logic
branch_unit #(
.CVA6Cfg(CVA6Cfg)
) branch_unit_i (
.clk_i,
.rst_ni,
.debug_mode_i,
.fu_data_i,
.pc_i,
.is_compressed_instr_i,
// any functional unit is valid, check that there is no accidental mis-predict
.fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i || acc_valid_i ) ,
.branch_valid_i,
.branch_comp_res_i(alu_branch_res),
.branch_result_o(branch_result),
.branch_predict_i,
.resolved_branch_o,
.resolve_branch_o,
.branch_exception_o(flu_exception_o)
);
// 3. CSR (sequential)
csr_buffer #(
.CVA6Cfg(CVA6Cfg)
) csr_buffer_i (
.clk_i,
.rst_ni,
.flush_i,
.fu_data_i,
.csr_valid_i,
.csr_ready_o (csr_ready),
.csr_result_o(csr_result),
.csr_commit_i,
.csr_addr_o
);
assign flu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i | mult_valid;
// result MUX
always_comb begin
// Branch result as default case
flu_result_o = {{riscv::XLEN - riscv::VLEN{1'b0}}, branch_result};
flu_trans_id_o = fu_data_i.trans_id;
// ALU result
if (alu_valid_i) begin
flu_result_o = alu_result;
// CSR result
end else if (csr_valid_i) begin
flu_result_o = csr_result;
end else if (mult_valid) begin
flu_result_o = mult_result;
flu_trans_id_o = mult_trans_id;
end
end
// ready flags for FLU
always_comb begin
flu_ready_o = csr_ready & mult_ready;
end
// 4. Multiplication (Sequential)
fu_data_t mult_data;
// input silencing of multiplier
assign mult_data = mult_valid_i ? fu_data_i : '0;
mult #(
.CVA6Cfg(CVA6Cfg)
) i_mult (
.clk_i,
.rst_ni,
.flush_i,
.mult_valid_i,
.fu_data_i (mult_data),
.result_o (mult_result),
.mult_valid_o (mult_valid),
.mult_ready_o (mult_ready),
.mult_trans_id_o(mult_trans_id)
);
// ----------------
// FPU
// ----------------
generate
if (CVA6Cfg.FpPresent) begin : fpu_gen
fu_data_t fpu_data;
assign fpu_data = fpu_valid_i ? fu_data_i : '0;
fpu_wrap #(
.CVA6Cfg(CVA6Cfg)
) fpu_i (
.clk_i,
.rst_ni,
.flush_i,
.fpu_valid_i,
.fpu_ready_o,
.fu_data_i(fpu_data),
.fpu_fmt_i,
.fpu_rm_i,
.fpu_frm_i,
.fpu_prec_i,
.fpu_trans_id_o,
.result_o (fpu_result_o),
.fpu_valid_o,
.fpu_exception_o
);
end else begin : no_fpu_gen
assign fpu_ready_o = '0;
assign fpu_trans_id_o = '0;
assign fpu_result_o = '0;
assign fpu_valid_o = '0;
assign fpu_exception_o = '0;
end
endgenerate
// ----------------
// Load-Store Unit
// ----------------
fu_data_t lsu_data;
assign lsu_data = lsu_valid_i ? fu_data_i : '0;
load_store_unit #(
.CVA6Cfg (CVA6Cfg),
.ASID_WIDTH(ASID_WIDTH)
) lsu_i (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.fu_data_i (lsu_data),
.lsu_ready_o,
.lsu_valid_i,
.load_trans_id_o,
.load_result_o,
.load_valid_o,
.load_exception_o,
.store_trans_id_o,
.store_result_o,
.store_valid_o,
.store_exception_o,
.commit_i (lsu_commit_i),
.commit_ready_o (lsu_commit_ready_o),
.commit_tran_id_i,
.enable_translation_i,
.en_ld_st_translation_i,
.icache_areq_i,
.icache_areq_o,
.priv_lvl_i,
.ld_st_priv_lvl_i,
.sum_i,
.mxr_i,
.satp_ppn_i,
.asid_i,
.asid_to_be_flushed_i (asid_to_be_flushed),
.vaddr_to_be_flushed_i(vaddr_to_be_flushed),
.flush_tlb_i,
.itlb_miss_o,
.dtlb_miss_o,
.dcache_req_ports_i,
.dcache_req_ports_o,
.dcache_wbuffer_empty_i,
.dcache_wbuffer_not_ni_i,
.amo_valid_commit_i,
.amo_req_o,
.amo_resp_i,
.pmpcfg_i,
.pmpaddr_i,
.lsu_addr_o,
.mem_paddr_o,
.lsu_rmask_o,
.lsu_wmask_o,
.lsu_addr_trans_id_o
);
if (CVA6Cfg.CvxifEn) begin : gen_cvxif
fu_data_t cvxif_data;
assign cvxif_data = x_valid_i ? fu_data_i : '0;
cvxif_fu #(
.CVA6Cfg(CVA6Cfg)
) cvxif_fu_i (
.clk_i,
.rst_ni,
.fu_data_i ( alu_data ),
.result_o ( alu_result ),
.alu_branch_res_o ( alu_branch_res )
);
// 2. Branch Unit (combinatorial)
// we don't silence the branch unit as this is already critical and we do
// not want to add another layer of logic
branch_unit #(
.CVA6Cfg ( CVA6Cfg )
) branch_unit_i (
.clk_i,
.rst_ni,
.debug_mode_i,
.fu_data_i,
.pc_i,
.is_compressed_instr_i,
// any functional unit is valid, check that there is no accidental mis-predict
.fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i || acc_valid_i ) ,
.branch_valid_i,
.branch_comp_res_i ( alu_branch_res ),
.branch_result_o ( branch_result ),
.branch_predict_i,
.resolved_branch_o,
.resolve_branch_o,
.branch_exception_o ( flu_exception_o )
.priv_lvl_i(ld_st_priv_lvl_i),
.x_valid_i,
.x_ready_o,
.x_off_instr_i,
.x_trans_id_o,
.x_exception_o,
.x_result_o,
.x_valid_o,
.x_we_o,
.cvxif_req_o,
.cvxif_resp_i
);
end else begin : gen_no_cvxif
assign cvxif_req_o = '0;
assign x_trans_id_o = '0;
assign x_exception_o = '0;
assign x_result_o = '0;
assign x_valid_o = '0;
end
// 3. CSR (sequential)
csr_buffer #(
.CVA6Cfg ( CVA6Cfg )
) csr_buffer_i (
.clk_i,
.rst_ni,
.flush_i,
.fu_data_i,
.csr_valid_i,
.csr_ready_o ( csr_ready ),
.csr_result_o ( csr_result ),
.csr_commit_i,
.csr_addr_o
);
assign flu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i | mult_valid;
// result MUX
always_comb begin
// Branch result as default case
flu_result_o = {{riscv::XLEN-riscv::VLEN{1'b0}}, branch_result};
flu_trans_id_o = fu_data_i.trans_id;
// ALU result
if (alu_valid_i) begin
flu_result_o = alu_result;
// CSR result
end else if (csr_valid_i) begin
flu_result_o = csr_result;
end else if (mult_valid) begin
flu_result_o = mult_result;
flu_trans_id_o = mult_trans_id;
end
end
// ready flags for FLU
always_comb begin
flu_ready_o = csr_ready & mult_ready;
end
// 4. Multiplication (Sequential)
fu_data_t mult_data;
// input silencing of multiplier
assign mult_data = mult_valid_i ? fu_data_i : '0;
mult #(
.CVA6Cfg ( CVA6Cfg )
) i_mult (
.clk_i,
.rst_ni,
.flush_i,
.mult_valid_i,
.fu_data_i ( mult_data ),
.result_o ( mult_result ),
.mult_valid_o ( mult_valid ),
.mult_ready_o ( mult_ready ),
.mult_trans_id_o ( mult_trans_id )
);
// ----------------
// FPU
// ----------------
generate
if (CVA6Cfg.FpPresent) begin : fpu_gen
fu_data_t fpu_data;
assign fpu_data = fpu_valid_i ? fu_data_i : '0;
fpu_wrap #(
.CVA6Cfg ( CVA6Cfg )
) fpu_i (
.clk_i,
.rst_ni,
.flush_i,
.fpu_valid_i,
.fpu_ready_o,
.fu_data_i ( fpu_data ),
.fpu_fmt_i,
.fpu_rm_i,
.fpu_frm_i,
.fpu_prec_i,
.fpu_trans_id_o,
.result_o ( fpu_result_o ),
.fpu_valid_o,
.fpu_exception_o
);
end else begin : no_fpu_gen
assign fpu_ready_o = '0;
assign fpu_trans_id_o = '0;
assign fpu_result_o = '0;
assign fpu_valid_o = '0;
assign fpu_exception_o = '0;
end
endgenerate
// ----------------
// Load-Store Unit
// ----------------
fu_data_t lsu_data;
assign lsu_data = lsu_valid_i ? fu_data_i : '0;
load_store_unit #(
.CVA6Cfg ( CVA6Cfg ),
.ASID_WIDTH ( ASID_WIDTH )
) lsu_i (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.fu_data_i ( lsu_data ),
.lsu_ready_o,
.lsu_valid_i,
.load_trans_id_o,
.load_result_o,
.load_valid_o,
.load_exception_o,
.store_trans_id_o,
.store_result_o,
.store_valid_o,
.store_exception_o,
.commit_i ( lsu_commit_i ),
.commit_ready_o ( lsu_commit_ready_o ),
.commit_tran_id_i,
.enable_translation_i,
.en_ld_st_translation_i,
.icache_areq_i,
.icache_areq_o,
.priv_lvl_i,
.ld_st_priv_lvl_i,
.sum_i,
.mxr_i,
.satp_ppn_i,
.asid_i,
.asid_to_be_flushed_i (asid_to_be_flushed),
.vaddr_to_be_flushed_i (vaddr_to_be_flushed),
.flush_tlb_i,
.itlb_miss_o,
.dtlb_miss_o,
.dcache_req_ports_i,
.dcache_req_ports_o,
.dcache_wbuffer_empty_i,
.dcache_wbuffer_not_ni_i,
.amo_valid_commit_i,
.amo_req_o,
.amo_resp_i,
.pmpcfg_i,
.pmpaddr_i,
.lsu_addr_o,
.mem_paddr_o,
.lsu_rmask_o,
.lsu_wmask_o,
.lsu_addr_trans_id_o
);
if (CVA6Cfg.CvxifEn) begin : gen_cvxif
fu_data_t cvxif_data;
assign cvxif_data = x_valid_i ? fu_data_i : '0;
cvxif_fu #(
.CVA6Cfg ( CVA6Cfg )
) cvxif_fu_i (
.clk_i,
.rst_ni,
.fu_data_i,
.priv_lvl_i (ld_st_priv_lvl_i),
.x_valid_i,
.x_ready_o,
.x_off_instr_i,
.x_trans_id_o,
.x_exception_o,
.x_result_o,
.x_valid_o,
.x_we_o,
.cvxif_req_o,
.cvxif_resp_i
);
end else begin : gen_no_cvxif
assign cvxif_req_o = '0;
assign x_trans_id_o = '0;
assign x_exception_o = '0;
assign x_result_o = '0;
assign x_valid_o = '0;
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
current_instruction_is_sfence_vma <= 1'b0;
end else begin
if (flush_i) begin
current_instruction_is_sfence_vma <= 1'b0;
end else if ((fu_data_i.operation == SFENCE_VMA) && csr_valid_i) begin
current_instruction_is_sfence_vma <= 1'b1;
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
current_instruction_is_sfence_vma <= 1'b0;
end else begin
if (flush_i) begin
current_instruction_is_sfence_vma <= 1'b0;
end else if ((fu_data_i.operation == SFENCE_VMA) && csr_valid_i) begin
current_instruction_is_sfence_vma <= 1'b1;
end
end
end
// This process stores the rs1 and rs2 parameters of a SFENCE_VMA instruction.
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
asid_to_be_flushed <= '0;
vaddr_to_be_flushed <= '0;
// if the current instruction in EX_STAGE is a sfence.vma, in the next cycle no writes will happen
end else if ((~current_instruction_is_sfence_vma) && (~((fu_data_i.operation == SFENCE_VMA) && csr_valid_i))) begin
vaddr_to_be_flushed <= rs1_forwarding_i;
asid_to_be_flushed <= rs2_forwarding_i[ASID_WIDTH-1:0];
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
asid_to_be_flushed <= '0;
vaddr_to_be_flushed <= '0;
// if the current instruction in EX_STAGE is a sfence.vma, in the next cycle no writes will happen
end else if ((~current_instruction_is_sfence_vma) && (~((fu_data_i.operation == SFENCE_VMA) && csr_valid_i))) begin
vaddr_to_be_flushed <= rs1_forwarding_i;
asid_to_be_flushed <= rs2_forwarding_i[ASID_WIDTH-1:0];
end
end
endmodule

View file

@ -13,29 +13,35 @@
// Description: Wrapper for the floating-point unit
module fpu_wrap import ariane_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
module fpu_wrap
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic fpu_valid_i,
output logic fpu_ready_o,
input fu_data_t fu_data_i,
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic fpu_valid_i,
output logic fpu_ready_o,
input fu_data_t fu_data_i,
input logic [1:0] fpu_fmt_i,
input logic [2:0] fpu_rm_i,
input logic [2:0] fpu_frm_i,
input logic [6:0] fpu_prec_i,
output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
output logic [CVA6Cfg.FLen-1:0] result_o,
output logic fpu_valid_o,
output exception_t fpu_exception_o
input logic [ 1:0] fpu_fmt_i,
input logic [ 2:0] fpu_rm_i,
input logic [ 2:0] fpu_frm_i,
input logic [ 6:0] fpu_prec_i,
output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
output logic [ CVA6Cfg.FLen-1:0] result_o,
output logic fpu_valid_o,
output exception_t fpu_exception_o
);
// this is a workaround
// otherwise compilation might issue an error if FLEN=0
enum logic {READY, STALL} state_q, state_d;
enum logic {
READY,
STALL
}
state_q, state_d;
if (CVA6Cfg.FpPresent) begin : fpu_gen
logic [CVA6Cfg.FLen-1:0] operand_a_i;
logic [CVA6Cfg.FLen-1:0] operand_b_i;
@ -47,50 +53,60 @@ module fpu_wrap import ariane_pkg::*; #(
//-----------------------------------
// FPnew config from FPnew package
//-----------------------------------
localparam OPBITS = fpnew_pkg::OP_BITS;
localparam FMTBITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
localparam OPBITS = fpnew_pkg::OP_BITS;
localparam FMTBITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
localparam IFMTBITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
// Features (enabled formats, vectors etc.)
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: unsigned'(riscv::XLEN), // parameterized using XLEN
EnableVectors: CVA6Cfg.XFVec,
EnableNanBox: 1'b1,
FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT},
IntFmtMask: {CVA6Cfg.XFVec && CVA6Cfg.XF8, CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT), 1'b1, 1'b1}
Width: unsigned'(riscv::XLEN), // parameterized using XLEN
EnableVectors: CVA6Cfg.XFVec,
EnableNanBox: 1'b1,
FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT},
IntFmtMask: {
CVA6Cfg.XFVec && CVA6Cfg.XF8,
CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT),
1'b1,
1'b1
}
};
// Implementation (number of registers etc)
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs: '{// FP32, FP64, FP16, FP8, FP16alt
'{unsigned'(LAT_COMP_FP32 ),
unsigned'(LAT_COMP_FP64 ),
unsigned'(LAT_COMP_FP16 ),
unsigned'(LAT_COMP_FP8 ),
unsigned'(LAT_COMP_FP16ALT)}, // ADDMUL
'{default: unsigned'(LAT_DIVSQRT)}, // DIVSQRT
'{default: unsigned'(LAT_NONCOMP)}, // NONCOMP
'{default: unsigned'(LAT_CONV)}}, // CONV
UnitTypes: '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
PipeRegs: '{ // FP32, FP64, FP16, FP8, FP16alt
'{
unsigned'(LAT_COMP_FP32),
unsigned'(LAT_COMP_FP64),
unsigned'(LAT_COMP_FP16),
unsigned'(LAT_COMP_FP8),
unsigned'(LAT_COMP_FP16ALT)
}, // ADDMUL
'{default: unsigned'(LAT_DIVSQRT)}, // DIVSQRT
'{default: unsigned'(LAT_NONCOMP)}, // NONCOMP
'{default: unsigned'(LAT_CONV)}
}, // CONV
UnitTypes: '{
'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}
}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
};
//-------------------------------------------------
// Inputs to the FPU and protocol inversion buffer
//-------------------------------------------------
logic [CVA6Cfg.FLen-1:0] operand_a_d, operand_a_q, operand_a;
logic [CVA6Cfg.FLen-1:0] operand_b_d, operand_b_q, operand_b;
logic [CVA6Cfg.FLen-1:0] operand_c_d, operand_c_q, operand_c;
logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op;
logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod;
logic [FMTBITS-1:0] fpu_srcfmt_d, fpu_srcfmt_q, fpu_srcfmt;
logic [FMTBITS-1:0] fpu_dstfmt_d, fpu_dstfmt_q, fpu_dstfmt;
logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt;
logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm;
logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op;
logic [CVA6Cfg.FLen-1:0] operand_a_d, operand_a_q, operand_a;
logic [CVA6Cfg.FLen-1:0] operand_b_d, operand_b_q, operand_b;
logic [CVA6Cfg.FLen-1:0] operand_c_d, operand_c_q, operand_c;
logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op;
logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod;
logic [FMTBITS-1:0] fpu_srcfmt_d, fpu_srcfmt_q, fpu_srcfmt;
logic [FMTBITS-1:0] fpu_dstfmt_d, fpu_dstfmt_q, fpu_dstfmt;
logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt;
logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm;
logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op;
logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag;
@ -109,45 +125,41 @@ module fpu_wrap import ariane_pkg::*; #(
always_comb begin : input_translation
automatic logic vec_replication; // control honoring of replication flag
automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB)
automatic logic check_ah; // Decide for AH from RM field encoding
automatic logic vec_replication; // control honoring of replication flag
automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB)
automatic logic check_ah; // Decide for AH from RM field encoding
// Default Values
operand_a_d = operand_a_i;
operand_b_d = operand_b_i; // immediates come through this port unless used as operand
operand_c_d = operand_c_i; // immediates come through this port unless used as operand
fpu_op_d = fpnew_pkg::SGNJ; // sign injection by default
fpu_op_mod_d = 1'b0;
fpu_dstfmt_d = fpnew_pkg::FP32;
fpu_ifmt_d = fpnew_pkg::INT32;
fpu_rm_d = fpu_rm_i;
fpu_vec_op_d = fu_data_i.fu == FPU_VEC;
fpu_tag_d = fu_data_i.trans_id;
vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field
replicate_c = 1'b0;
check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i
operand_a_d = operand_a_i;
operand_b_d = operand_b_i; // immediates come through this port unless used as operand
operand_c_d = operand_c_i; // immediates come through this port unless used as operand
fpu_op_d = fpnew_pkg::SGNJ; // sign injection by default
fpu_op_mod_d = 1'b0;
fpu_dstfmt_d = fpnew_pkg::FP32;
fpu_ifmt_d = fpnew_pkg::INT32;
fpu_rm_d = fpu_rm_i;
fpu_vec_op_d = fu_data_i.fu == FPU_VEC;
fpu_tag_d = fu_data_i.trans_id;
vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field
replicate_c = 1'b0;
check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i
// Scalar Rounding Modes - some ops encode inside RM but use smaller range
if (!(fpu_rm_i inside {[3'b000:3'b100]}))
fpu_rm_d = fpu_frm_i;
if (!(fpu_rm_i inside {[3'b000 : 3'b100]})) fpu_rm_d = fpu_frm_i;
// Vectorial ops always consult FRM
if (fpu_vec_op_d)
fpu_rm_d = fpu_frm_i;
if (fpu_vec_op_d) fpu_rm_d = fpu_frm_i;
// Formats
unique case (fpu_fmt_i)
// FP32
2'b00: fpu_dstfmt_d = fpnew_pkg::FP32;
2'b00: fpu_dstfmt_d = fpnew_pkg::FP32;
// FP64 or FP16ALT (vectorial)
2'b01: fpu_dstfmt_d = fpu_vec_op_d ? fpnew_pkg::FP16ALT : fpnew_pkg::FP64;
2'b01: fpu_dstfmt_d = fpu_vec_op_d ? fpnew_pkg::FP16ALT : fpnew_pkg::FP64;
// FP16 or FP16ALT (scalar)
2'b10: begin
if (!fpu_vec_op_d && fpu_rm_i==3'b101)
fpu_dstfmt_d = fpnew_pkg::FP16ALT;
else
fpu_dstfmt_d = fpnew_pkg::FP16;
if (!fpu_vec_op_d && fpu_rm_i == 3'b101) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
else fpu_dstfmt_d = fpnew_pkg::FP16;
end
// FP8
default: fpu_dstfmt_d = fpnew_pkg::FP8;
@ -167,29 +179,29 @@ module fpu_wrap import ariane_pkg::*; #(
FSUB: begin
fpu_op_d = fpnew_pkg::ADD;
fpu_op_mod_d = 1'b1;
replicate_c = 1'b1; // second operand is in C
replicate_c = 1'b1; // second operand is in C
end
// Multiplication
FMUL: fpu_op_d = fpnew_pkg::MUL;
FMUL: fpu_op_d = fpnew_pkg::MUL;
// Division
FDIV: fpu_op_d = fpnew_pkg::DIV;
FDIV: fpu_op_d = fpnew_pkg::DIV;
// Min/Max - OP is encoded in rm (000-001)
FMIN_MAX: begin
fpu_op_d = fpnew_pkg::MINMAX;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
end
// Square Root
FSQRT: fpu_op_d = fpnew_pkg::SQRT;
FSQRT: fpu_op_d = fpnew_pkg::SQRT;
// Fused Multiply Add
FMADD: fpu_op_d = fpnew_pkg::FMADD;
FMADD: fpu_op_d = fpnew_pkg::FMADD;
// Fused Multiply Subtract is modified FMADD
FMSUB: begin
fpu_op_d = fpnew_pkg::FMADD;
fpu_op_mod_d = 1'b1;
end
// Fused Negated Multiply Subtract
FNMSUB: fpu_op_d = fpnew_pkg::FNMSUB;
FNMSUB: fpu_op_d = fpnew_pkg::FNMSUB;
// Fused Negated Multiply Add is modified FNMSUB
FNMADD: begin
fpu_op_d = fpnew_pkg::FNMSUB;
@ -197,24 +209,21 @@ module fpu_wrap import ariane_pkg::*; #(
end
// Float to Int Cast - Op encoded in lowest two imm bits or rm
FCVT_F2I: begin
fpu_op_d = fpnew_pkg::F2I;
fpu_op_d = fpnew_pkg::F2I;
// Vectorial Ops encoded in R bit
if (fpu_vec_op_d) begin
fpu_op_mod_d = fpu_rm_i[0];
vec_replication = 1'b0; // no replication, R bit used for op
fpu_op_mod_d = fpu_rm_i[0];
vec_replication = 1'b0; // no replication, R bit used for op
unique case (fpu_fmt_i)
2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
2'b01,
2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
endcase
// Scalar casts encoded in imm
// Scalar casts encoded in imm
end else begin
fpu_op_mod_d = operand_c_i[0];
if (operand_c_i[1])
fpu_ifmt_d = fpnew_pkg::INT64;
else
fpu_ifmt_d = fpnew_pkg::INT32;
if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
else fpu_ifmt_d = fpnew_pkg::INT32;
end
end
// Int to Float Cast - Op encoded in lowest two imm bits or rm
@ -222,21 +231,18 @@ module fpu_wrap import ariane_pkg::*; #(
fpu_op_d = fpnew_pkg::I2F;
// Vectorial Ops encoded in R bit
if (fpu_vec_op_d) begin
fpu_op_mod_d = fpu_rm_i[0];
vec_replication = 1'b0; // no replication, R bit used for op
fpu_op_mod_d = fpu_rm_i[0];
vec_replication = 1'b0; // no replication, R bit used for op
unique case (fpu_fmt_i)
2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
2'b01,
2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
endcase
// Scalar casts encoded in imm
// Scalar casts encoded in imm
end else begin
fpu_op_mod_d = operand_c_i[0];
if (operand_c_i[1])
fpu_ifmt_d = fpnew_pkg::INT64;
else
fpu_ifmt_d = fpnew_pkg::INT32;
if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
else fpu_ifmt_d = fpnew_pkg::INT32;
end
end
// Float to Float Cast - Source format encoded in lowest two/three imm bits
@ -244,171 +250,173 @@ module fpu_wrap import ariane_pkg::*; #(
fpu_op_d = fpnew_pkg::F2F;
// Vectorial ops encoded in lowest two imm bits
if (fpu_vec_op_d) begin
vec_replication = 1'b0; // no replication for casts (not needed)
vec_replication = 1'b0; // no replication for casts (not needed)
unique case (operand_c_i[1:0])
2'b00: fpu_srcfmt_d = fpnew_pkg::FP32;
2'b01: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
2'b10: fpu_srcfmt_d = fpnew_pkg::FP16;
2'b11: fpu_srcfmt_d = fpnew_pkg::FP8;
endcase
// Scalar ops encoded in lowest three imm bits
// Scalar ops encoded in lowest three imm bits
end else begin
unique case (operand_c_i[2:0])
3'b000: fpu_srcfmt_d = fpnew_pkg::FP32;
3'b001: fpu_srcfmt_d = fpnew_pkg::FP64;
3'b010: fpu_srcfmt_d = fpnew_pkg::FP16;
3'b110: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
3'b011: fpu_srcfmt_d = fpnew_pkg::FP8;
default: ; // Do nothing
3'b000: fpu_srcfmt_d = fpnew_pkg::FP32;
3'b001: fpu_srcfmt_d = fpnew_pkg::FP64;
3'b010: fpu_srcfmt_d = fpnew_pkg::FP16;
3'b110: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
3'b011: fpu_srcfmt_d = fpnew_pkg::FP8;
default: ; // Do nothing
endcase
end
end
// Scalar Sign Injection - op encoded in rm (000-010)
FSGNJ: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
end
// Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding
FMV_F2X: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b011; // passthrough without checking nan-box
fpu_op_mod_d = 1'b1; // no NaN-Boxing
check_ah = 1'b1; // AH has RM MSB encoding
vec_replication = 1'b0; // no replication, we set second operand
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b011; // passthrough without checking nan-box
fpu_op_mod_d = 1'b1; // no NaN-Boxing
check_ah = 1'b1; // AH has RM MSB encoding
vec_replication = 1'b0; // no replication, we set second operand
end
// Move from GPR to FPR - mapped to NOP since no recoding
FMV_X2F: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b011; // passthrough without checking nan-box
check_ah = 1'b1; // AH has RM MSB encoding
vec_replication = 1'b0; // no replication, we set second operand
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b011; // passthrough without checking nan-box
check_ah = 1'b1; // AH has RM MSB encoding
vec_replication = 1'b0; // no replication, we set second operand
end
// Scalar Comparisons - op encoded in rm (000-010)
FCMP: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
end
// Classification
FCLASS: begin
fpu_op_d = fpnew_pkg::CLASSIFY;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit - CLASS doesn't care anyways
check_ah = 1'b1; // AH has RM MSB encoding
fpu_rm_d = {
1'b0, fpu_rm_i[1:0]
}; // mask out AH encoding bit - CLASS doesn't care anyways
check_ah = 1'b1; // AH has RM MSB encoding
end
// Vectorial Minimum - set up scalar encoding in rm
VFMIN: begin
fpu_op_d = fpnew_pkg::MINMAX;
fpu_rm_d = 3'b000; // min
fpu_rm_d = 3'b000; // min
end
// Vectorial Maximum - set up scalar encoding in rm
VFMAX: begin
fpu_op_d = fpnew_pkg::MINMAX;
fpu_rm_d = 3'b001; // max
fpu_rm_d = 3'b001; // max
end
// Vectorial Sign Injection - set up scalar encoding in rm
VFSGNJ: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b000; // sgnj
fpu_rm_d = 3'b000; // sgnj
end
// Vectorial Negated Sign Injection - set up scalar encoding in rm
VFSGNJN: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b001; // sgnjn
fpu_rm_d = 3'b001; // sgnjn
end
// Vectorial Xored Sign Injection - set up scalar encoding in rm
VFSGNJX: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b010; // sgnjx
fpu_rm_d = 3'b010; // sgnjx
end
// Vectorial Equals - set up scalar encoding in rm
VFEQ: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = 3'b010; // eq
fpu_rm_d = 3'b010; // eq
end
// Vectorial Not Equals - set up scalar encoding in rm
VFNE: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b010; // eq
end
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b010; // eq
end
// Vectorial Less Than - set up scalar encoding in rm
VFLT: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = 3'b001; // lt
fpu_rm_d = 3'b001; // lt
end
// Vectorial Greater or Equal - set up scalar encoding in rm
VFGE: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b001; // lt
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b001; // lt
end
// Vectorial Less or Equal - set up scalar encoding in rm
VFLE: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = 3'b000; // le
fpu_rm_d = 3'b000; // le
end
// Vectorial Greater Than - set up scalar encoding in rm
VFGT: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b000; // le
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b000; // le
end
// Vectorial Convert-and-Pack from FP32, lower 4 entries
VFCPKAB_S: begin
fpu_op_d = fpnew_pkg::CPKAB;
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
end
// Vectorial Convert-and-Pack from FP32, upper 4 entries
VFCPKCD_S: begin
fpu_op_d = fpnew_pkg::CPKCD;
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
end
// Vectorial Convert-and-Pack from FP64, lower 4 entries
VFCPKAB_D: begin
fpu_op_d = fpnew_pkg::CPKAB;
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
end
// Vectorial Convert-and-Pack from FP64, upper 4 entries
VFCPKCD_D: begin
fpu_op_d = fpnew_pkg::CPKCD;
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
end
// No changes per default
default: ; //nothing
default: ; //nothing
endcase
// Scalar AH encoding fixing
if (!fpu_vec_op_d && check_ah)
if (fpu_rm_i[2])
fpu_dstfmt_d = fpnew_pkg::FP16ALT;
if (!fpu_vec_op_d && check_ah) if (fpu_rm_i[2]) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
// Replication
if (fpu_vec_op_d && vec_replication) begin
if (replicate_c) begin
unique case (fpu_dstfmt_d)
fpnew_pkg::FP32: operand_c_d = CVA6Cfg.RVD ? {2{operand_c_i[31:0]}} : operand_c_i;
fpnew_pkg::FP16,
fpnew_pkg::FP16ALT: operand_c_d = CVA6Cfg.RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}};
fpnew_pkg::FP8: operand_c_d = CVA6Cfg.RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}};
default: ; // Do nothing
endcase // fpu_dstfmt_d
fpnew_pkg::FP32: operand_c_d = CVA6Cfg.RVD ? {2{operand_c_i[31:0]}} : operand_c_i;
fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
operand_c_d = CVA6Cfg.RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}};
fpnew_pkg::FP8:
operand_c_d = CVA6Cfg.RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}};
default: ; // Do nothing
endcase // fpu_dstfmt_d
end else begin
unique case (fpu_dstfmt_d)
fpnew_pkg::FP32: operand_b_d = CVA6Cfg.RVD ? {2{operand_b_i[31:0]}} : operand_b_i;
fpnew_pkg::FP16,
fpnew_pkg::FP16ALT: operand_b_d = CVA6Cfg.RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}};
fpnew_pkg::FP8: operand_b_d = CVA6Cfg.RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}};
default: ; // Do nothing
endcase // fpu_dstfmt_d
fpnew_pkg::FP32: operand_b_d = CVA6Cfg.RVD ? {2{operand_b_i[31:0]}} : operand_b_i;
fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
operand_b_d = CVA6Cfg.RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}};
fpnew_pkg::FP8:
operand_b_d = CVA6Cfg.RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}};
default: ; // Do nothing
endcase // fpu_dstfmt_d
end
end
end
@ -422,31 +430,31 @@ module fpu_wrap import ariane_pkg::*; #(
// Default Values
fpu_ready_o = 1'b0;
fpu_in_valid = 1'b0;
hold_inputs = 1'b0; // hold register disabled
use_hold = 1'b0; // inputs go directly to unit
state_d = state_q; // stay in the same state
hold_inputs = 1'b0; // hold register disabled
use_hold = 1'b0; // inputs go directly to unit
state_d = state_q; // stay in the same state
// FSM
unique case (state_q)
// Default state, ready for instructions
READY: begin
fpu_ready_o = 1'b1; // Act as if FPU ready
fpu_in_valid = fpu_valid_i; // Forward input valid to FPU
fpu_ready_o = 1'b1; // Act as if FPU ready
fpu_in_valid = fpu_valid_i; // Forward input valid to FPU
// There is a transaction but the FPU can't handle it
if (fpu_valid_i & ~fpu_in_ready) begin
fpu_ready_o = 1'b0; // No token given to Issue
hold_inputs = 1'b1; // save inputs to the holding register
state_d = STALL; // stall future incoming requests
state_d = STALL; // stall future incoming requests
end
end
// We're stalling the upstream (ready=0)
STALL: begin
fpu_in_valid = 1'b1; // we have data for the FPU
use_hold = 1'b1; // the data comes from the hold reg
fpu_in_valid = 1'b1; // we have data for the FPU
use_hold = 1'b1; // the data comes from the hold reg
// Wait until it's consumed
if (fpu_in_ready) begin
fpu_ready_o = 1'b1; // Give a token to issue
state_d = READY; // accept future requests
state_d = READY; // accept future requests
end
end
// Default: emit default values
@ -462,50 +470,50 @@ module fpu_wrap import ariane_pkg::*; #(
// Buffer register and FSM state holding
always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg
if(~rst_ni) begin
state_q <= READY;
operand_a_q <= '0;
operand_b_q <= '0;
operand_c_q <= '0;
fpu_op_q <= '0;
fpu_op_mod_q <= '0;
fpu_srcfmt_q <= '0;
fpu_dstfmt_q <= '0;
fpu_ifmt_q <= '0;
fpu_rm_q <= '0;
fpu_vec_op_q <= '0;
fpu_tag_q <= '0;
if (~rst_ni) begin
state_q <= READY;
operand_a_q <= '0;
operand_b_q <= '0;
operand_c_q <= '0;
fpu_op_q <= '0;
fpu_op_mod_q <= '0;
fpu_srcfmt_q <= '0;
fpu_dstfmt_q <= '0;
fpu_ifmt_q <= '0;
fpu_rm_q <= '0;
fpu_vec_op_q <= '0;
fpu_tag_q <= '0;
end else begin
state_q <= state_d;
state_q <= state_d;
// Hold register is [TRIGGERED] by FSM
if (hold_inputs) begin
operand_a_q <= operand_a_d;
operand_b_q <= operand_b_d;
operand_c_q <= operand_c_d;
fpu_op_q <= fpu_op_d;
fpu_op_mod_q <= fpu_op_mod_d;
fpu_srcfmt_q <= fpu_srcfmt_d;
fpu_dstfmt_q <= fpu_dstfmt_d;
fpu_ifmt_q <= fpu_ifmt_d;
fpu_rm_q <= fpu_rm_d;
fpu_vec_op_q <= fpu_vec_op_d;
fpu_tag_q <= fpu_tag_d;
operand_a_q <= operand_a_d;
operand_b_q <= operand_b_d;
operand_c_q <= operand_c_d;
fpu_op_q <= fpu_op_d;
fpu_op_mod_q <= fpu_op_mod_d;
fpu_srcfmt_q <= fpu_srcfmt_d;
fpu_dstfmt_q <= fpu_dstfmt_d;
fpu_ifmt_q <= fpu_ifmt_d;
fpu_rm_q <= fpu_rm_d;
fpu_vec_op_q <= fpu_vec_op_d;
fpu_tag_q <= fpu_tag_d;
end
end
end
// Select FPU input data: from register if valid data in register, else directly from input
assign operand_a = use_hold ? operand_a_q : operand_a_d;
assign operand_b = use_hold ? operand_b_q : operand_b_d;
assign operand_c = use_hold ? operand_c_q : operand_c_d;
assign fpu_op = use_hold ? fpu_op_q : fpu_op_d;
assign operand_a = use_hold ? operand_a_q : operand_a_d;
assign operand_b = use_hold ? operand_b_q : operand_b_d;
assign operand_c = use_hold ? operand_c_q : operand_c_d;
assign fpu_op = use_hold ? fpu_op_q : fpu_op_d;
assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d;
assign fpu_srcfmt = use_hold ? fpu_srcfmt_q : fpu_srcfmt_d;
assign fpu_dstfmt = use_hold ? fpu_dstfmt_q : fpu_dstfmt_d;
assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d;
assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d;
assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d;
assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d;
assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d;
assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d;
assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d;
// Consolidate operands
logic [2:0][CVA6Cfg.FLen-1:0] fpu_operands;
@ -519,31 +527,31 @@ module fpu_wrap import ariane_pkg::*; #(
//---------------
fpnew_top #(
.Features ( FPU_FEATURES ),
.Implementation ( FPU_IMPLEMENTATION ),
.TagType ( logic [TRANS_ID_BITS-1:0] )
.Features (FPU_FEATURES),
.Implementation(FPU_IMPLEMENTATION),
.TagType (logic [TRANS_ID_BITS-1:0])
) i_fpnew_bulk (
.clk_i,
.rst_ni,
.operands_i ( fpu_operands ),
.rnd_mode_i ( fpnew_pkg::roundmode_e'(fpu_rm) ),
.op_i ( fpnew_pkg::operation_e'(fpu_op) ),
.op_mod_i ( fpu_op_mod ),
.src_fmt_i ( fpnew_pkg::fp_format_e'(fpu_srcfmt) ),
.dst_fmt_i ( fpnew_pkg::fp_format_e'(fpu_dstfmt) ),
.int_fmt_i ( fpnew_pkg::int_format_e'(fpu_ifmt) ),
.vectorial_op_i ( fpu_vec_op ),
.tag_i ( fpu_tag ),
.simd_mask_i ( 1'b1 ),
.in_valid_i ( fpu_in_valid ),
.in_ready_o ( fpu_in_ready ),
.flush_i,
.result_o,
.status_o ( fpu_status ),
.tag_o ( fpu_trans_id_o ),
.out_valid_o ( fpu_out_valid ),
.out_ready_i ( fpu_out_ready ),
.busy_o ( /* unused */ )
.clk_i,
.rst_ni,
.operands_i (fpu_operands),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rm)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_srcfmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dstfmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_ifmt)),
.vectorial_op_i(fpu_vec_op),
.tag_i (fpu_tag),
.simd_mask_i (1'b1),
.in_valid_i (fpu_in_valid),
.in_ready_o (fpu_in_ready),
.flush_i,
.result_o,
.status_o (fpu_status),
.tag_o (fpu_trans_id_o),
.out_valid_o (fpu_out_valid),
.out_ready_i (fpu_out_ready),
.busy_o ( /* unused */)
);
// Pack status flag into exception cause, tval ignored in wb, exception is always invalid

View file

@ -21,195 +21,195 @@
module bht #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NR_ENTRIES = 1024
)(
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic debug_mode_i,
input logic [riscv::VLEN-1:0] vpc_i,
input ariane_pkg::bht_update_t bht_update_i,
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic debug_mode_i,
input logic [ riscv::VLEN-1:0] vpc_i,
input ariane_pkg::bht_update_t bht_update_i,
// we potentially need INSTR_PER_FETCH predictions/cycle
output ariane_pkg::bht_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht_prediction_o
);
// the last bit is always zero, we don't need it for indexing
localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
// re-shape the branch history table
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
// number of bits needed to index the row
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
// number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
// we are not interested in all bits of the address
unread i_unread (.d_i(|vpc_i));
// the last bit is always zero, we don't need it for indexing
localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
// re-shape the branch history table
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
// number of bits needed to index the row
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
// number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
// we are not interested in all bits of the address
unread i_unread (.d_i(|vpc_i));
struct packed {
logic valid;
logic [1:0] saturation_counter;
} bht_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0], bht_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
struct packed {
logic valid;
logic [1:0] saturation_counter;
}
bht_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
bht_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
logic [ROW_INDEX_BITS-1:0] update_row_index;
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
logic [ROW_INDEX_BITS-1:0] update_row_index;
assign index = vpc_i[PREDICTION_BITS - 1:ROW_ADDR_BITS + OFFSET];
assign update_pc = bht_update_i.pc[PREDICTION_BITS - 1:ROW_ADDR_BITS + OFFSET];
if (CVA6Cfg.RVC) begin : gen_update_row_index
assign update_row_index = bht_update_i.pc[ROW_ADDR_BITS + OFFSET - 1:OFFSET];
end else begin
assign update_row_index = '0;
assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
assign update_pc = bht_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
if (CVA6Cfg.RVC) begin : gen_update_row_index
assign update_row_index = bht_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET];
end else begin
assign update_row_index = '0;
end
if (!ariane_pkg::FPGA_EN) begin : gen_asic_bht // ASIC TARGET
logic [1:0] saturation_counter;
// prediction assignment
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_output
assign bht_prediction_o[i].valid = bht_q[index][i].valid;
assign bht_prediction_o[i].taken = bht_q[index][i].saturation_counter[1] == 1'b1;
end
if (!ariane_pkg::FPGA_EN) begin : gen_asic_bht // ASIC TARGET
always_comb begin : update_bht
bht_d = bht_q;
saturation_counter = bht_q[update_pc][update_row_index].saturation_counter;
logic [1:0] saturation_counter;
// prediction assignment
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_output
assign bht_prediction_o[i].valid = bht_q[index][i].valid;
assign bht_prediction_o[i].taken = bht_q[index][i].saturation_counter[1] == 1'b1;
end
if (bht_update_i.valid && !debug_mode_i) begin
bht_d[update_pc][update_row_index].valid = 1'b1;
always_comb begin : update_bht
bht_d = bht_q;
saturation_counter = bht_q[update_pc][update_row_index].saturation_counter;
if (bht_update_i.valid && !debug_mode_i) begin
bht_d[update_pc][update_row_index].valid = 1'b1;
if (saturation_counter == 2'b11) begin
// we can safely decrease it
if (!bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
if (saturation_counter == 2'b11) begin
// we can safely decrease it
if (!bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
// then check if it saturated in the negative regime e.g.: branch not taken
end else if (saturation_counter == 2'b00) begin
// we can safely increase it
if (bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
end else begin // otherwise we are not in any boundaries and can decrease or increase it
if (bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
else
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
end
end else if (saturation_counter == 2'b00) begin
// we can safely increase it
if (bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
end else begin // otherwise we are not in any boundaries and can decrease or increase it
if (bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
else bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
end
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
for (int unsigned i = 0; i < NR_ROWS; i++) begin
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
for (int unsigned i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
bht_q[i][j] <= '0;
end
end
end else begin
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
bht_q[i][j] <= '0;
bht_q[i][j].valid <= 1'b0;
bht_q[i][j].saturation_counter <= 2'b10;
end
end
end else begin
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
bht_q[i][j].valid <= 1'b0;
bht_q[i][j].saturation_counter <= 2'b10;
end
end
end else begin
bht_q <= bht_d;
end
bht_q <= bht_d;
end
end
end else begin : gen_fpga_bht //FPGA TARGETS
// number of bits par word in the bram
localparam BRAM_WORD_BITS = $bits(ariane_pkg::bht_t);
logic [ROW_INDEX_BITS-1:0] row_index;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] bht_ram_we;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_0;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_1;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_write_address;
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_wdata;
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_0;
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_1;
ariane_pkg::bht_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht;
ariane_pkg::bht_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht_updated;
if (CVA6Cfg.RVC) begin : gen_row_index
assign row_index = vpc_i[ROW_ADDR_BITS + OFFSET - 1:OFFSET];
end else begin
assign row_index = '0;
end
// -------------------------
// prediction assignment & update Branch History Table
// -------------------------
always_comb begin : prediction_update_bht
bht_ram_we = '0;
bht_ram_read_address_0 = '0;
bht_ram_read_address_1 = '0;
bht_ram_write_address = '0;
bht_ram_wdata ='0;
bht_updated = '0;
bht = '0;
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (row_index == i) begin
bht_ram_read_address_0[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = index;
bht_prediction_o[i].valid = bht_ram_rdata_0[i*BRAM_WORD_BITS+2] ;
bht_prediction_o[i].taken = bht_ram_rdata_0[i*BRAM_WORD_BITS+1] ;
end
end
if (bht_update_i.valid && !debug_mode_i) begin
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (update_row_index == i) begin
bht_ram_read_address_1[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = update_pc;
bht[i].saturation_counter = bht_ram_rdata_1[i*BRAM_WORD_BITS +: 2];
if (bht[i].saturation_counter == 2'b11) begin
// we can safely decrease it
if (!bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
else
bht_updated[i].saturation_counter = 2'b11;
// then check if it saturated in the negative regime e.g.: branch not taken
end else if (bht[i].saturation_counter == 2'b00) begin
// we can safely increase it
if (bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
else
bht_updated[i].saturation_counter = 2'b00;
end else begin // otherwise we are not in any boundaries and can decrease or increase it
if (bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
else
bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
end
bht_updated[i].valid = 1'b1;
bht_ram_we[i] = 1'b1;
bht_ram_write_address[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = update_pc;
//bht_ram_wdata[(i+1)*BRAM_WORD_BITS-1] = 1'b1; //valid
bht_ram_wdata[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] = {bht_updated[i].valid , bht_updated[i].saturation_counter};
end
end
end
end
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_ram
AsyncThreePortRam #(
.ADDR_WIDTH($clog2(NR_ROWS)),
.DATA_DEPTH (NR_ROWS),
.DATA_WIDTH(BRAM_WORD_BITS)
) i_bht_ram (
.Clk_CI ( clk_i ),
.WrEn_SI ( bht_ram_we[i] ),
.WrAddr_DI ( bht_ram_write_address[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
.WrData_DI ( bht_ram_wdata[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ),
.RdAddr_DI_0 ( bht_ram_read_address_0[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
.RdAddr_DI_1 ( bht_ram_read_address_1[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
.RdData_DO_0 ( bht_ram_rdata_0[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ),
.RdData_DO_1 ( bht_ram_rdata_1[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] )
);
end
end
end else begin : gen_fpga_bht //FPGA TARGETS
// number of bits par word in the bram
localparam BRAM_WORD_BITS = $bits(ariane_pkg::bht_t);
logic [ ROW_INDEX_BITS-1:0] row_index;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_ram_we;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_0;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_1;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_write_address;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_wdata;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_0;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_1;
ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht;
ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_updated;
if (CVA6Cfg.RVC) begin : gen_row_index
assign row_index = vpc_i[ROW_ADDR_BITS+OFFSET-1:OFFSET];
end else begin
assign row_index = '0;
end
// -------------------------
// prediction assignment & update Branch History Table
// -------------------------
always_comb begin : prediction_update_bht
bht_ram_we = '0;
bht_ram_read_address_0 = '0;
bht_ram_read_address_1 = '0;
bht_ram_write_address = '0;
bht_ram_wdata = '0;
bht_updated = '0;
bht = '0;
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (row_index == i) begin
bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index;
bht_prediction_o[i].valid = bht_ram_rdata_0[i*BRAM_WORD_BITS+2];
bht_prediction_o[i].taken = bht_ram_rdata_0[i*BRAM_WORD_BITS+1];
end
end
if (bht_update_i.valid && !debug_mode_i) begin
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (update_row_index == i) begin
bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
bht[i].saturation_counter = bht_ram_rdata_1[i*BRAM_WORD_BITS+:2];
if (bht[i].saturation_counter == 2'b11) begin
// we can safely decrease it
if (!bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
else bht_updated[i].saturation_counter = 2'b11;
// then check if it saturated in the negative regime e.g.: branch not taken
end else if (bht[i].saturation_counter == 2'b00) begin
// we can safely increase it
if (bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
else bht_updated[i].saturation_counter = 2'b00;
end else begin // otherwise we are not in any boundaries and can decrease or increase it
if (bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
else bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
end
bht_updated[i].valid = 1'b1;
bht_ram_we[i] = 1'b1;
bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
//bht_ram_wdata[(i+1)*BRAM_WORD_BITS-1] = 1'b1; //valid
bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = {
bht_updated[i].valid, bht_updated[i].saturation_counter
};
end
end
end
end
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_ram
AsyncThreePortRam #(
.ADDR_WIDTH($clog2(NR_ROWS)),
.DATA_DEPTH(NR_ROWS),
.DATA_WIDTH(BRAM_WORD_BITS)
) i_bht_ram (
.Clk_CI (clk_i),
.WrEn_SI (bht_ram_we[i]),
.WrAddr_DI (bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.WrData_DI (bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdAddr_DI_0(bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.RdAddr_DI_1(bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.RdData_DO_0(bht_ram_rdata_0[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdData_DO_1(bht_ram_rdata_1[i*BRAM_WORD_BITS+:BRAM_WORD_BITS])
);
end
end
endmodule

View file

@ -28,156 +28,158 @@
module btb #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int NR_ENTRIES = 8
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush the btb
input logic debug_mode_i,
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush the btb
input logic debug_mode_i,
input logic [riscv::VLEN-1:0] vpc_i, // virtual PC from IF stage
input ariane_pkg::btb_update_t btb_update_i, // update btb with this information
input logic [riscv::VLEN-1:0] vpc_i, // virtual PC from IF stage
input ariane_pkg::btb_update_t btb_update_i, // update btb with this information
output ariane_pkg::btb_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] btb_prediction_o // prediction from btb
);
// the last bit is always zero, we don't need it for indexing
localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
// re-shape the branch history table
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
// number of bits needed to index the row
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
// number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
// prevent aliasing to degrade performance
localparam ANTIALIAS_BITS = 8;
// number of bits par word in the bram
localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t);
// we are not interested in all bits of the address
unread i_unread (.d_i(|vpc_i));
// the last bit is always zero, we don't need it for indexing
localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
// re-shape the branch history table
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
// number of bits needed to index the row
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
// number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
// prevent aliasing to degrade performance
localparam ANTIALIAS_BITS = 8;
// number of bits par word in the bram
localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t);
// we are not interested in all bits of the address
unread i_unread (.d_i(|vpc_i));
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
logic [ROW_INDEX_BITS-1:0] update_row_index;
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
logic [ROW_INDEX_BITS-1:0] update_row_index;
assign index = vpc_i[PREDICTION_BITS - 1:ROW_ADDR_BITS + OFFSET];
assign update_pc = btb_update_i.pc[PREDICTION_BITS - 1:ROW_ADDR_BITS + OFFSET];
if (CVA6Cfg.RVC) begin : gen_update_row_index
assign update_row_index = btb_update_i.pc[ROW_ADDR_BITS + OFFSET - 1:OFFSET];
end else begin
assign update_row_index = '0;
assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
assign update_pc = btb_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
if (CVA6Cfg.RVC) begin : gen_update_row_index
assign update_row_index = btb_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET];
end else begin
assign update_row_index = '0;
end
if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update;
// output matching prediction
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_ram_csel_prediction[i] = 1'b1;
assign btb_ram_we_prediction[i] = 1'b0;
assign btb_ram_wdata_prediction = '0;
assign btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index;
assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS];
end
if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction;
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction;
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction;
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_ram_csel_update = '0;
btb_ram_we_update = '0;
btb_ram_addr_update = '0;
btb_ram_wdata_update = '0;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update;
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update;
// output matching prediction
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_ram_csel_prediction[i] = 1'b1;
assign btb_ram_we_prediction[i] = 1'b0;
assign btb_ram_wdata_prediction = '0;
assign btb_ram_addr_prediction[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = index;
assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS];
end
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_ram_csel_update = '0;
btb_ram_we_update = '0;
btb_ram_addr_update = '0;
btb_ram_wdata_update = '0;
if (btb_update_i.valid && !debug_mode_i) begin
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (update_row_index == i) begin
btb_ram_csel_update[i] = 1'b1;
btb_ram_we_update[i] = 1'b1;
btb_ram_addr_update[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = update_pc;
btb_ram_wdata_update[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] = {1'b1 , btb_update_i.target_address};
end
if (btb_update_i.valid && !debug_mode_i) begin
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (update_row_index == i) begin
btb_ram_csel_update[i] = 1'b1;
btb_ram_we_update[i] = 1'b1;
btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = {
1'b1, btb_update_i.target_address
};
end
end
end
end
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram
SyncDpRam #(
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram
SyncDpRam #(
.ADDR_WIDTH($clog2(NR_ROWS)),
.DATA_DEPTH(NR_ROWS),
.DATA_WIDTH(BRAM_WORD_BITS),
.OUT_REGS (0),
.SIM_INIT (1)
) i_btb_ram (
.Clk_CI ( clk_i ),
.Rst_RBI ( rst_ni ),
.OUT_REGS (0),
.SIM_INIT (1)
) i_btb_ram (
.Clk_CI (clk_i),
.Rst_RBI (rst_ni),
//----------------------------
.CSelA_SI ( btb_ram_csel_update[i] ),
.WrEnA_SI ( btb_ram_we_update[i] ),
.AddrA_DI ( btb_ram_addr_update[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
.WrDataA_DI ( btb_ram_wdata_update[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ),
.RdDataA_DO ( ),
.CSelA_SI (btb_ram_csel_update[i]),
.WrEnA_SI (btb_ram_we_update[i]),
.AddrA_DI (btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.WrDataA_DI(btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdDataA_DO(),
//-----------------------------
.CSelB_SI ( btb_ram_csel_prediction[i] ),
.WrEnB_SI ( btb_ram_we_prediction[i] ),
.AddrB_DI ( btb_ram_addr_prediction[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
.WrDataB_DI ( btb_ram_wdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ),
.RdDataB_DO ( btb_ram_rdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] )
);
.CSelB_SI (btb_ram_csel_prediction[i]),
.WrEnB_SI (btb_ram_we_prediction[i]),
.AddrB_DI (btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.WrDataB_DI(btb_ram_wdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdDataB_DO(btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS])
);
end
end else begin : gen_asic_btb // ASIC TARGET
// typedef for all branch target entries
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
ariane_pkg::btb_prediction_t
btb_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
btb_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
// output matching prediction
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
end
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_d = btb_q;
if (btb_update_i.valid && !debug_mode_i) begin
btb_d[update_pc][update_row_index].valid = 1'b1;
// the target address is simply updated
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
end
end
end else begin : gen_asic_btb // ASIC TARGET
// typedef for all branch target entries
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
ariane_pkg::btb_prediction_t btb_d [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
btb_q [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
// output matching prediction
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
end
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_d = btb_q;
if (btb_update_i.valid && !debug_mode_i) begin
btb_d[update_pc][update_row_index].valid = 1'b1;
// the target address is simply updated
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
// Bias the branches to be taken upon first arrival
for (int i = 0; i < NR_ROWS; i++)
btb_q[i] <= '{default: 0};
end else begin
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
btb_q[i][j].valid <= 1'b0;
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
// Bias the branches to be taken upon first arrival
for (int i = 0; i < NR_ROWS; i++) btb_q[i] <= '{default: 0};
end else begin
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
btb_q[i][j].valid <= 1'b0;
end
end else begin
btb_q <= btb_d;
end
end else begin
btb_q <= btb_d;
end
end
end
end
endmodule

View file

@ -15,492 +15,502 @@
// This module interfaces with the instruction cache, handles control
// change request from the back-end and does branch prediction.
module frontend import ariane_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
module frontend
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush request for PCGEN
input logic flush_bp_i, // flush branch prediction
input logic halt_i, // halt commit stage
input logic debug_mode_i,
// global input
input logic [riscv::VLEN-1:0] boot_addr_i,
// Set a new PC
// mispredict
input bp_resolve_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB
// from commit, when flushing the whole pipeline
input logic set_pc_commit_i, // Take the PC from commit stage
input logic [riscv::VLEN-1:0] pc_commit_i, // PC of instruction in commit stage
// CSR input
input logic [riscv::VLEN-1:0] epc_i, // exception PC which we need to return to
input logic eret_i, // return from exception
input logic [riscv::VLEN-1:0] trap_vector_base_i, // base of trap vector
input logic ex_valid_i, // exception is valid - from commit
input logic set_debug_pc_i, // jump to debug address
// Instruction Fetch
output icache_dreq_t icache_dreq_o,
input icache_drsp_t icache_dreq_i,
// instruction output port -> to processor back-end
output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
output logic fetch_entry_valid_o, // instruction in IF is valid
input logic fetch_entry_ready_i // ID acknowledged this instruction
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush request for PCGEN
input logic flush_bp_i, // flush branch prediction
input logic halt_i, // halt commit stage
input logic debug_mode_i,
// global input
input logic [riscv::VLEN-1:0] boot_addr_i,
// Set a new PC
// mispredict
input bp_resolve_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB
// from commit, when flushing the whole pipeline
input logic set_pc_commit_i, // Take the PC from commit stage
input logic [riscv::VLEN-1:0] pc_commit_i, // PC of instruction in commit stage
// CSR input
input logic [riscv::VLEN-1:0] epc_i, // exception PC which we need to return to
input logic eret_i, // return from exception
input logic [riscv::VLEN-1:0] trap_vector_base_i, // base of trap vector
input logic ex_valid_i, // exception is valid - from commit
input logic set_debug_pc_i, // jump to debug address
// Instruction Fetch
output icache_dreq_t icache_dreq_o,
input icache_drsp_t icache_dreq_i,
// instruction output port -> to processor back-end
output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
output logic fetch_entry_valid_o, // instruction in IF is valid
input logic fetch_entry_ready_i // ID acknowledged this instruction
);
// Instruction Cache Registers, from I$
logic [FETCH_WIDTH-1:0] icache_data_q;
logic icache_valid_q;
ariane_pkg::frontend_exception_t icache_ex_valid_q;
logic [riscv::VLEN-1:0] icache_vaddr_q;
logic instr_queue_ready;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_consumed;
// upper-most branch-prediction from last cycle
btb_prediction_t btb_q;
bht_prediction_t bht_q;
// instruction fetch is ready
logic if_ready;
logic [riscv::VLEN-1:0] npc_d, npc_q; // next PC
// Instruction Cache Registers, from I$
logic [ FETCH_WIDTH-1:0] icache_data_q;
logic icache_valid_q;
ariane_pkg::frontend_exception_t icache_ex_valid_q;
logic [ riscv::VLEN-1:0] icache_vaddr_q;
logic instr_queue_ready;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_consumed;
// upper-most branch-prediction from last cycle
btb_prediction_t btb_q;
bht_prediction_t bht_q;
// instruction fetch is ready
logic if_ready;
logic [riscv::VLEN-1:0] npc_d, npc_q; // next PC
// indicates whether we come out of reset (then we need to load boot_addr_i)
logic npc_rst_load_q;
// indicates whether we come out of reset (then we need to load boot_addr_i)
logic npc_rst_load_q;
logic replay;
logic [riscv::VLEN-1:0] replay_addr;
logic replay;
logic [ riscv::VLEN-1:0] replay_addr;
// shift amount
logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt;
// address will always be 16 bit aligned, make this explicit here
if (CVA6Cfg.RVC) begin : gen_shamt
assign shamt = icache_dreq_i.vaddr[$clog2(ariane_pkg::INSTR_PER_FETCH):1];
end else begin
assign shamt = 1'b0;
// shift amount
logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt;
// address will always be 16 bit aligned, make this explicit here
if (CVA6Cfg.RVC) begin : gen_shamt
assign shamt = icache_dreq_i.vaddr[$clog2(ariane_pkg::INSTR_PER_FETCH):1];
end else begin
assign shamt = 1'b0;
end
// -----------------------
// Ctrl Flow Speculation
// -----------------------
// RVI ctrl flow prediction
logic [INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch, rvi_jalr, rvi_jump;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvi_imm;
// RVC branching
logic [INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return, rvc_jalr, rvc_call;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvc_imm;
// re-aligned instruction and address (coming from cache - combinationally)
logic [INSTR_PER_FETCH-1:0][ 31:0] instr;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr;
logic [INSTR_PER_FETCH-1:0] instruction_valid;
// BHT, BTB and RAS prediction
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction;
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction;
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted;
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted;
ras_t ras_predict;
logic [ riscv::VLEN-1:0] vpc_btb;
// branch-predict update
logic is_mispredict;
logic ras_push, ras_pop;
logic [ riscv::VLEN-1:0] ras_update;
// Instruction FIFO
logic [ riscv::VLEN-1:0] predict_address;
cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvi_cf;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf;
logic serving_unaligned;
// Re-align instructions
instr_realign #(
.CVA6Cfg(CVA6Cfg)
) i_instr_realign (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (icache_dreq_o.kill_s2),
.valid_i (icache_valid_q),
.serving_unaligned_o(serving_unaligned),
.address_i (icache_vaddr_q),
.data_i (icache_data_q),
.valid_o (instruction_valid),
.addr_o (addr),
.instr_o (instr)
);
// --------------------
// Branch Prediction
// --------------------
// select the right branch prediction result
// in case we are serving an unaligned instruction in instr[0] we need to take
// the prediction we saved from the previous fetch
if (CVA6Cfg.RVC) begin : gen_btb_prediction_shifted
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][$clog2(
INSTR_PER_FETCH
):1]];
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][$clog2(
INSTR_PER_FETCH
):1]];
// for all other predictions we can use the generated address to index
// into the branch prediction data structures
for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address
assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
end
end else begin
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][1]];
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][1]];
end
;
// -----------------------
// Ctrl Flow Speculation
// -----------------------
// RVI ctrl flow prediction
logic [INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch,
rvi_jalr, rvi_jump;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvi_imm;
// RVC branching
logic [INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return,
rvc_jalr, rvc_call;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvc_imm;
// re-aligned instruction and address (coming from cache - combinationally)
logic [INSTR_PER_FETCH-1:0][31:0] instr;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr;
logic [INSTR_PER_FETCH-1:0] instruction_valid;
// BHT, BTB and RAS prediction
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction;
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction;
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted;
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted;
ras_t ras_predict;
logic [riscv::VLEN-1:0] vpc_btb;
// for the return address stack it doens't matter as we have the
// address of the call/return already
logic bp_valid;
// branch-predict update
logic is_mispredict;
logic ras_push, ras_pop;
logic [riscv::VLEN-1:0] ras_update;
logic [INSTR_PER_FETCH-1:0] is_branch;
logic [INSTR_PER_FETCH-1:0] is_call;
logic [INSTR_PER_FETCH-1:0] is_jump;
logic [INSTR_PER_FETCH-1:0] is_return;
logic [INSTR_PER_FETCH-1:0] is_jalr;
// Instruction FIFO
logic [riscv::VLEN-1:0] predict_address;
cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvi_cf;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf;
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
// branch history table -> BHT
assign is_branch[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i]);
// function calls -> RAS
assign is_call[i] = instruction_valid[i] & (rvi_call[i] | rvc_call[i]);
// function return -> RAS
assign is_return[i] = instruction_valid[i] & (rvi_return[i] | rvc_return[i]);
// unconditional jumps with known target -> immediately resolved
assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]);
// unconditional jumps with unknown target -> BTB
assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
end
logic serving_unaligned;
// Re-align instructions
instr_realign #(
.CVA6Cfg ( CVA6Cfg )
) i_instr_realign (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( icache_dreq_o.kill_s2 ),
.valid_i ( icache_valid_q ),
.serving_unaligned_o ( serving_unaligned ),
.address_i ( icache_vaddr_q ),
.data_i ( icache_data_q ),
.valid_o ( instruction_valid ),
.addr_o ( addr ),
.instr_o ( instr )
);
// taken/not taken
always_comb begin
taken_rvi_cf = '0;
taken_rvc_cf = '0;
predict_address = '0;
// --------------------
// Branch Prediction
// --------------------
// select the right branch prediction result
// in case we are serving an unaligned instruction in instr[0] we need to take
// the prediction we saved from the previous fetch
if (CVA6Cfg.RVC) begin : gen_btb_prediction_shifted
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][$clog2(INSTR_PER_FETCH):1]];
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][$clog2(INSTR_PER_FETCH):1]];
for (int i = 0; i < INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF;
// for all other predictions we can use the generated address to index
// into the branch prediction data structures
for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address
assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
end
end else begin
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][1]];
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][1]];
end;
ras_push = 1'b0;
ras_pop = 1'b0;
ras_update = '0;
// for the return address stack it doens't matter as we have the
// address of the call/return already
logic bp_valid;
logic [INSTR_PER_FETCH-1:0] is_branch;
logic [INSTR_PER_FETCH-1:0] is_call;
logic [INSTR_PER_FETCH-1:0] is_jump;
logic [INSTR_PER_FETCH-1:0] is_return;
logic [INSTR_PER_FETCH-1:0] is_jalr;
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
// branch history table -> BHT
assign is_branch[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i]);
// function calls -> RAS
assign is_call[i] = instruction_valid[i] & (rvi_call[i] | rvc_call[i]);
// function return -> RAS
assign is_return[i] = instruction_valid[i] & (rvi_return[i] | rvc_return[i]);
// unconditional jumps with known target -> immediately resolved
assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]);
// unconditional jumps with unknown target -> BTB
assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
end
// taken/not taken
always_comb begin
taken_rvi_cf = '0;
taken_rvc_cf = '0;
predict_address = '0;
for (int i = 0; i < INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF;
ras_push = 1'b0;
ras_pop = 1'b0;
ras_update = '0;
// lower most prediction gets precedence
for (int i = INSTR_PER_FETCH - 1; i >= 0 ; i--) begin
unique case ({is_branch[i], is_return[i], is_jump[i], is_jalr[i]})
4'b0000:; // regular instruction e.g.: no branch
// unconditional jump to register, we need the BTB to resolve this
4'b0001: begin
ras_pop = 1'b0;
ras_push = 1'b0;
if (btb_prediction_shifted[i].valid) begin
predict_address = btb_prediction_shifted[i].target_address;
cf_type[i] = ariane_pkg::JumpR;
end
// lower most prediction gets precedence
for (int i = INSTR_PER_FETCH - 1; i >= 0; i--) begin
unique case ({
is_branch[i], is_return[i], is_jump[i], is_jalr[i]
})
4'b0000: ; // regular instruction e.g.: no branch
// unconditional jump to register, we need the BTB to resolve this
4'b0001: begin
ras_pop = 1'b0;
ras_push = 1'b0;
if (btb_prediction_shifted[i].valid) begin
predict_address = btb_prediction_shifted[i].target_address;
cf_type[i] = ariane_pkg::JumpR;
end
// its an unconditional jump to an immediate
4'b0010: begin
ras_pop = 1'b0;
ras_push = 1'b0;
taken_rvi_cf[i] = rvi_jump[i];
taken_rvc_cf[i] = rvc_jump[i];
cf_type[i] = ariane_pkg::Jump;
end
// return
4'b0100: begin
// make sure to only alter the RAS if we actually consumed the instruction
ras_pop = ras_predict.valid & instr_queue_consumed[i];
ras_push = 1'b0;
predict_address = ras_predict.ra;
cf_type[i] = ariane_pkg::Return;
end
// branch prediction
4'b1000: begin
ras_pop = 1'b0;
ras_push = 1'b0;
// if we have a valid dynamic prediction use it
if (bht_prediction_shifted[i].valid) begin
taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken;
taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken;
end
// its an unconditional jump to an immediate
4'b0010: begin
ras_pop = 1'b0;
ras_push = 1'b0;
taken_rvi_cf[i] = rvi_jump[i];
taken_rvc_cf[i] = rvc_jump[i];
cf_type[i] = ariane_pkg::Jump;
end
// return
4'b0100: begin
// make sure to only alter the RAS if we actually consumed the instruction
ras_pop = ras_predict.valid & instr_queue_consumed[i];
ras_push = 1'b0;
predict_address = ras_predict.ra;
cf_type[i] = ariane_pkg::Return;
end
// branch prediction
4'b1000: begin
ras_pop = 1'b0;
ras_push = 1'b0;
// if we have a valid dynamic prediction use it
if (bht_prediction_shifted[i].valid) begin
taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken;
taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken;
// otherwise default to static prediction
end else begin
// set if immediate is negative - static prediction
taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][riscv::VLEN-1];
taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][riscv::VLEN-1];
end
if (taken_rvi_cf[i] || taken_rvc_cf[i]) begin
cf_type[i] = ariane_pkg::Branch;
end
end else begin
// set if immediate is negative - static prediction
taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][riscv::VLEN-1];
taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][riscv::VLEN-1];
end
default:;
// default: $error("Decoded more than one control flow");
endcase
// if this instruction, in addition, is a call, save the resulting address
// but only if we actually consumed the address
if (is_call[i]) begin
ras_push = instr_queue_consumed[i];
ras_update = addr[i] + (rvc_call[i] ? 2 : 4);
end
// calculate the jump target address
if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin
predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]);
if (taken_rvi_cf[i] || taken_rvc_cf[i]) begin
cf_type[i] = ariane_pkg::Branch;
end
end
default: ;
// default: $error("Decoded more than one control flow");
endcase
// if this instruction, in addition, is a call, save the resulting address
// but only if we actually consumed the address
if (is_call[i]) begin
ras_push = instr_queue_consumed[i];
ras_update = addr[i] + (rvc_call[i] ? 2 : 4);
end
// calculate the jump target address
if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin
predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]);
end
end
// or reduce struct
always_comb begin
bp_valid = 1'b0;
// BP cannot be valid if we have a return instruction and the RAS is not giving a valid address
// Check that we encountered a control flow and that for a return the RAS
// contains a valid prediction.
for (int i = 0; i < INSTR_PER_FETCH; i++) bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid));
end
assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
end
// or reduce struct
always_comb begin
bp_valid = 1'b0;
// BP cannot be valid if we have a return instruction and the RAS is not giving a valid address
// Check that we encountered a control flow and that for a return the RAS
// contains a valid prediction.
for (int i = 0; i < INSTR_PER_FETCH; i++)
bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid));
end
assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
// Cache interface
assign icache_dreq_o.req = instr_queue_ready;
assign if_ready = icache_dreq_i.ready & instr_queue_ready;
// We need to flush the cache pipeline if:
// 1. We mispredicted
// 2. Want to flush the whole processor front-end
// 3. Need to replay an instruction because the fetch-fifo was full
assign icache_dreq_o.kill_s1 = is_mispredict | flush_i | replay;
// if we have a valid branch-prediction we need to only kill the last cache request
// also if we killed the first stage we also need to kill the second stage (inclusive flush)
assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
// Cache interface
assign icache_dreq_o.req = instr_queue_ready;
assign if_ready = icache_dreq_i.ready & instr_queue_ready;
// We need to flush the cache pipeline if:
// 1. We mispredicted
// 2. Want to flush the whole processor front-end
// 3. Need to replay an instruction because the fetch-fifo was full
assign icache_dreq_o.kill_s1 = is_mispredict | flush_i | replay;
// if we have a valid branch-prediction we need to only kill the last cache request
// also if we killed the first stage we also need to kill the second stage (inclusive flush)
assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
// Update Control Flow Predictions
bht_update_t bht_update;
btb_update_t btb_update;
// Update Control Flow Predictions
bht_update_t bht_update;
btb_update_t btb_update;
// assert on branch, deassert when resolved
logic speculative_q,speculative_d;
assign speculative_d = (speculative_q && !resolved_branch_i.valid || |is_branch || |is_return || |is_jalr) && !flush_i;
assign icache_dreq_o.spec = speculative_d;
// assert on branch, deassert when resolved
logic speculative_q, speculative_d;
assign speculative_d = (speculative_q && !resolved_branch_i.valid || |is_branch || |is_return || |is_jalr) && !flush_i;
assign icache_dreq_o.spec = speculative_d;
assign bht_update.valid = resolved_branch_i.valid
assign bht_update.valid = resolved_branch_i.valid
& (resolved_branch_i.cf_type == ariane_pkg::Branch);
assign bht_update.pc = resolved_branch_i.pc;
assign bht_update.taken = resolved_branch_i.is_taken;
// only update mispredicted branches e.g. no returns from the RAS
assign btb_update.valid = resolved_branch_i.valid
assign bht_update.pc = resolved_branch_i.pc;
assign bht_update.taken = resolved_branch_i.is_taken;
// only update mispredicted branches e.g. no returns from the RAS
assign btb_update.valid = resolved_branch_i.valid
& resolved_branch_i.is_mispredict
& (resolved_branch_i.cf_type == ariane_pkg::JumpR);
assign btb_update.pc = resolved_branch_i.pc;
assign btb_update.target_address = resolved_branch_i.target_address;
assign btb_update.pc = resolved_branch_i.pc;
assign btb_update.target_address = resolved_branch_i.target_address;
// -------------------
// Next PC
// -------------------
// next PC (NPC) can come from (in order of precedence):
// 0. Default assignment/replay instruction
// 1. Branch Predict taken
// 2. Control flow change request (misprediction)
// 3. Return from environment call
// 4. Exception/Interrupt
// 5. Pipeline Flush because of CSR side effects
// Mis-predict handling is a little bit different
// select PC a.k.a PC Gen
always_comb begin : npc_select
automatic logic [riscv::VLEN-1:0] fetch_address;
// check whether we come out of reset
// this is a workaround. some tools have issues
// having boot_addr_i in the asynchronous
// reset assignment to npc_q, even though
// boot_addr_i will be assigned a constant
// on the top-level.
if (npc_rst_load_q) begin
npc_d = boot_addr_i;
fetch_address = boot_addr_i;
end else begin
fetch_address = npc_q;
// keep stable by default
npc_d = npc_q;
end
// 0. Branch Prediction
if (bp_valid) begin
fetch_address = predict_address;
npc_d = predict_address;
end
// 1. Default assignment
if (if_ready) begin
npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0} + 'h4;
end
// 2. Replay instruction fetch
if (replay) begin
npc_d = replay_addr;
end
// 3. Control flow change request
if (is_mispredict) begin
npc_d = resolved_branch_i.target_address;
end
// 4. Return from environment call
if (eret_i) begin
npc_d = epc_i;
end
// 5. Exception/Interrupt
if (ex_valid_i) begin
npc_d = trap_vector_base_i;
end
// 6. Pipeline Flush because of CSR side effects
// On a pipeline flush start fetching from the next address
// of the instruction in the commit stage
// we either came here from a flush request of a CSR instruction or AMO,
// so as CSR or AMO instructions do not exist in a compressed form
// we can unconditionally do PC + 4 here
// or if the commit stage is halted, just take the current pc of the
// instruction in the commit stage
// TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
if (set_pc_commit_i) begin
npc_d = pc_commit_i + (halt_i ? '0 : {{riscv::VLEN-3{1'b0}}, 3'b100});
end
// 7. Debug
// enter debug on a hard-coded base-address
if (set_debug_pc_i) npc_d = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.HaltAddress[riscv::VLEN-1:0];
icache_dreq_o.vaddr = fetch_address;
// -------------------
// Next PC
// -------------------
// next PC (NPC) can come from (in order of precedence):
// 0. Default assignment/replay instruction
// 1. Branch Predict taken
// 2. Control flow change request (misprediction)
// 3. Return from environment call
// 4. Exception/Interrupt
// 5. Pipeline Flush because of CSR side effects
// Mis-predict handling is a little bit different
// select PC a.k.a PC Gen
always_comb begin : npc_select
automatic logic [riscv::VLEN-1:0] fetch_address;
// check whether we come out of reset
// this is a workaround. some tools have issues
// having boot_addr_i in the asynchronous
// reset assignment to npc_q, even though
// boot_addr_i will be assigned a constant
// on the top-level.
if (npc_rst_load_q) begin
npc_d = boot_addr_i;
fetch_address = boot_addr_i;
end else begin
fetch_address = npc_q;
// keep stable by default
npc_d = npc_q;
end
// 0. Branch Prediction
if (bp_valid) begin
fetch_address = predict_address;
npc_d = predict_address;
end
// 1. Default assignment
if (if_ready) begin
npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0} + 'h4;
end
// 2. Replay instruction fetch
if (replay) begin
npc_d = replay_addr;
end
// 3. Control flow change request
if (is_mispredict) begin
npc_d = resolved_branch_i.target_address;
end
// 4. Return from environment call
if (eret_i) begin
npc_d = epc_i;
end
// 5. Exception/Interrupt
if (ex_valid_i) begin
npc_d = trap_vector_base_i;
end
// 6. Pipeline Flush because of CSR side effects
// On a pipeline flush start fetching from the next address
// of the instruction in the commit stage
// we either came here from a flush request of a CSR instruction or AMO,
// so as CSR or AMO instructions do not exist in a compressed form
// we can unconditionally do PC + 4 here
// or if the commit stage is halted, just take the current pc of the
// instruction in the commit stage
// TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
if (set_pc_commit_i) begin
npc_d = pc_commit_i + (halt_i ? '0 : {{riscv::VLEN - 3{1'b0}}, 3'b100});
end
// 7. Debug
// enter debug on a hard-coded base-address
if (set_debug_pc_i)
npc_d = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.HaltAddress[riscv::VLEN-1:0];
icache_dreq_o.vaddr = fetch_address;
end
logic [FETCH_WIDTH-1:0] icache_data;
// re-align the cache line
assign icache_data = icache_dreq_i.data >> {shamt, 4'b0};
logic [FETCH_WIDTH-1:0] icache_data;
// re-align the cache line
assign icache_data = icache_dreq_i.data >> {shamt, 4'b0};
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
npc_rst_load_q <= 1'b1;
npc_q <= '0;
speculative_q <= '0;
icache_data_q <= '0;
icache_valid_q <= 1'b0;
icache_vaddr_q <= 'b0;
icache_ex_valid_q <= ariane_pkg::FE_NONE;
btb_q <= '0;
bht_q <= '0;
end else begin
npc_rst_load_q <= 1'b0;
npc_q <= npc_d;
speculative_q <= speculative_d;
icache_valid_q <= icache_dreq_i.valid;
if (icache_dreq_i.valid) begin
icache_data_q <= icache_data;
icache_vaddr_q <= icache_dreq_i.vaddr;
// Map the only three exceptions which can occur in the frontend to a two bit enum
if (icache_dreq_i.ex.cause == riscv::INSTR_PAGE_FAULT) begin
icache_ex_valid_q <= ariane_pkg::FE_INSTR_PAGE_FAULT;
end else if (icache_dreq_i.ex.cause == riscv::INSTR_ACCESS_FAULT) begin
icache_ex_valid_q <= ariane_pkg::FE_INSTR_ACCESS_FAULT;
end else begin
icache_ex_valid_q <= ariane_pkg::FE_NONE;
end
// save the uppermost prediction
btb_q <= btb_prediction[INSTR_PER_FETCH-1];
bht_q <= bht_prediction[INSTR_PER_FETCH-1];
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
npc_rst_load_q <= 1'b1;
npc_q <= '0;
speculative_q <= '0;
icache_data_q <= '0;
icache_valid_q <= 1'b0;
icache_vaddr_q <= 'b0;
icache_ex_valid_q <= ariane_pkg::FE_NONE;
btb_q <= '0;
bht_q <= '0;
end else begin
npc_rst_load_q <= 1'b0;
npc_q <= npc_d;
speculative_q <= speculative_d;
icache_valid_q <= icache_dreq_i.valid;
if (icache_dreq_i.valid) begin
icache_data_q <= icache_data;
icache_vaddr_q <= icache_dreq_i.vaddr;
// Map the only three exceptions which can occur in the frontend to a two bit enum
if (icache_dreq_i.ex.cause == riscv::INSTR_PAGE_FAULT) begin
icache_ex_valid_q <= ariane_pkg::FE_INSTR_PAGE_FAULT;
end else if (icache_dreq_i.ex.cause == riscv::INSTR_ACCESS_FAULT) begin
icache_ex_valid_q <= ariane_pkg::FE_INSTR_ACCESS_FAULT;
end else begin
icache_ex_valid_q <= ariane_pkg::FE_NONE;
end
// save the uppermost prediction
btb_q <= btb_prediction[INSTR_PER_FETCH-1];
bht_q <= bht_prediction[INSTR_PER_FETCH-1];
end
end
end
if (CVA6Cfg.RASDepth == 0) begin
assign ras_predict = '0;
end else begin : ras_gen
ras #(
.CVA6Cfg ( CVA6Cfg ),
.DEPTH ( CVA6Cfg.RASDepth )
) i_ras (
if (CVA6Cfg.RASDepth == 0) begin
assign ras_predict = '0;
end else begin : ras_gen
ras #(
.CVA6Cfg(CVA6Cfg),
.DEPTH (CVA6Cfg.RASDepth)
) i_ras (
.clk_i,
.rst_ni,
.flush_i( flush_bp_i ),
.push_i ( ras_push ),
.pop_i ( ras_pop ),
.data_i ( ras_update ),
.data_o ( ras_predict )
);
end
//For FPGA, BTB is implemented in read synchronous BRAM
//while for ASIC, BTB is implemented in D flip-flop
//and can be read at the same cycle.
assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q;
if (CVA6Cfg.BTBEntries == 0) begin
assign btb_prediction = '0;
end else begin : btb_gen
btb #(
.CVA6Cfg ( CVA6Cfg ),
.NR_ENTRIES ( CVA6Cfg.BTBEntries )
) i_btb (
.clk_i,
.rst_ni,
.flush_i ( flush_bp_i ),
.debug_mode_i,
.vpc_i ( vpc_btb ),
.btb_update_i ( btb_update ),
.btb_prediction_o ( btb_prediction )
);
end
if (CVA6Cfg.BHTEntries == 0) begin
assign bht_prediction = '0;
end else begin : bht_gen
bht #(
.CVA6Cfg ( CVA6Cfg ),
.NR_ENTRIES ( CVA6Cfg.BHTEntries )
) i_bht (
.clk_i,
.rst_ni,
.flush_i ( flush_bp_i ),
.debug_mode_i,
.vpc_i ( icache_vaddr_q ),
.bht_update_i ( bht_update ),
.bht_prediction_o ( bht_prediction )
);
end
// we need to inspect up to INSTR_PER_FETCH instructions for branches
// and jumps
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan
instr_scan #(
.CVA6Cfg ( CVA6Cfg )
) i_instr_scan (
.instr_i ( instr[i] ),
.rvi_return_o ( rvi_return[i] ),
.rvi_call_o ( rvi_call[i] ),
.rvi_branch_o ( rvi_branch[i] ),
.rvi_jalr_o ( rvi_jalr[i] ),
.rvi_jump_o ( rvi_jump[i] ),
.rvi_imm_o ( rvi_imm[i] ),
.rvc_branch_o ( rvc_branch[i] ),
.rvc_jump_o ( rvc_jump[i] ),
.rvc_jr_o ( rvc_jr[i] ),
.rvc_return_o ( rvc_return[i] ),
.rvc_jalr_o ( rvc_jalr[i] ),
.rvc_call_o ( rvc_call[i] ),
.rvc_imm_o ( rvc_imm[i] )
);
end
instr_queue #(
.CVA6Cfg ( CVA6Cfg )
) i_instr_queue (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.instr_i ( instr ), // from re-aligner
.addr_i ( addr ), // from re-aligner
.exception_i ( icache_ex_valid_q ), // from I$
.exception_addr_i ( icache_vaddr_q ),
.predict_address_i ( predict_address ),
.cf_type_i ( cf_type ),
.valid_i ( instruction_valid ), // from re-aligner
.consumed_o ( instr_queue_consumed ),
.ready_o ( instr_queue_ready ),
.replay_o ( replay ),
.replay_addr_o ( replay_addr ),
.fetch_entry_o ( fetch_entry_o ), // to back-end
.fetch_entry_valid_o ( fetch_entry_valid_o ), // to back-end
.fetch_entry_ready_i ( fetch_entry_ready_i ) // to back-end
.flush_i(flush_bp_i),
.push_i (ras_push),
.pop_i (ras_pop),
.data_i (ras_update),
.data_o (ras_predict)
);
end
// pragma translate_off
`ifndef VERILATOR
initial begin
assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64) else $fatal(1, "[frontend] fetch width != not supported");
end
`endif
// pragma translate_on
//For FPGA, BTB is implemented in read synchronous BRAM
//while for ASIC, BTB is implemented in D flip-flop
//and can be read at the same cycle.
assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q;
if (CVA6Cfg.BTBEntries == 0) begin
assign btb_prediction = '0;
end else begin : btb_gen
btb #(
.CVA6Cfg (CVA6Cfg),
.NR_ENTRIES(CVA6Cfg.BTBEntries)
) i_btb (
.clk_i,
.rst_ni,
.flush_i (flush_bp_i),
.debug_mode_i,
.vpc_i (vpc_btb),
.btb_update_i (btb_update),
.btb_prediction_o(btb_prediction)
);
end
if (CVA6Cfg.BHTEntries == 0) begin
assign bht_prediction = '0;
end else begin : bht_gen
bht #(
.CVA6Cfg (CVA6Cfg),
.NR_ENTRIES(CVA6Cfg.BHTEntries)
) i_bht (
.clk_i,
.rst_ni,
.flush_i (flush_bp_i),
.debug_mode_i,
.vpc_i (icache_vaddr_q),
.bht_update_i (bht_update),
.bht_prediction_o(bht_prediction)
);
end
// we need to inspect up to INSTR_PER_FETCH instructions for branches
// and jumps
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan
instr_scan #(
.CVA6Cfg(CVA6Cfg)
) i_instr_scan (
.instr_i (instr[i]),
.rvi_return_o(rvi_return[i]),
.rvi_call_o (rvi_call[i]),
.rvi_branch_o(rvi_branch[i]),
.rvi_jalr_o (rvi_jalr[i]),
.rvi_jump_o (rvi_jump[i]),
.rvi_imm_o (rvi_imm[i]),
.rvc_branch_o(rvc_branch[i]),
.rvc_jump_o (rvc_jump[i]),
.rvc_jr_o (rvc_jr[i]),
.rvc_return_o(rvc_return[i]),
.rvc_jalr_o (rvc_jalr[i]),
.rvc_call_o (rvc_call[i]),
.rvc_imm_o (rvc_imm[i])
);
end
instr_queue #(
.CVA6Cfg(CVA6Cfg)
) i_instr_queue (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_i),
.instr_i (instr), // from re-aligner
.addr_i (addr), // from re-aligner
.exception_i (icache_ex_valid_q), // from I$
.exception_addr_i (icache_vaddr_q),
.predict_address_i (predict_address),
.cf_type_i (cf_type),
.valid_i (instruction_valid), // from re-aligner
.consumed_o (instr_queue_consumed),
.ready_o (instr_queue_ready),
.replay_o (replay),
.replay_addr_o (replay_addr),
.fetch_entry_o (fetch_entry_o), // to back-end
.fetch_entry_valid_o(fetch_entry_valid_o), // to back-end
.fetch_entry_ready_i(fetch_entry_ready_i) // to back-end
);
// pragma translate_off
`ifndef VERILATOR
initial begin
assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64)
else $fatal(1, "[frontend] fetch width != not supported");
end
`endif
// pragma translate_on
endmodule

View file

@ -43,64 +43,67 @@
// the replay mechanism gets more complicated as it can be that a 32 bit instruction
// can not be pushed at once.
module instr_queue import ariane_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
module instr_queue
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i,
output logic ready_o,
output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o,
// we've encountered an exception, at this point the only possible exceptions are page-table faults
input ariane_pkg::frontend_exception_t exception_i,
input logic [riscv::VLEN-1:0] exception_addr_i,
// branch predict
input logic [riscv::VLEN-1:0] predict_address_i,
input ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type_i,
// replay instruction because one of the FIFO was already full
output logic replay_o,
output logic [riscv::VLEN-1:0] replay_addr_o, // address at which to replay this instruction
// to processor backend
output ariane_pkg::fetch_entry_t fetch_entry_o,
output logic fetch_entry_valid_o,
input logic fetch_entry_ready_i
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i,
output logic ready_o,
output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o,
// we've encountered an exception, at this point the only possible exceptions are page-table faults
input ariane_pkg::frontend_exception_t exception_i,
input logic [riscv::VLEN-1:0] exception_addr_i,
// branch predict
input logic [riscv::VLEN-1:0] predict_address_i,
input ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type_i,
// replay instruction because one of the FIFO was already full
output logic replay_o,
output logic [riscv::VLEN-1:0] replay_addr_o, // address at which to replay this instruction
// to processor backend
output ariane_pkg::fetch_entry_t fetch_entry_o,
output logic fetch_entry_valid_o,
input logic fetch_entry_ready_i
);
typedef struct packed {
logic [31:0] instr; // instruction word
ariane_pkg::cf_t cf; // branch was taken
ariane_pkg::frontend_exception_t ex; // exception happened
logic [riscv::VLEN-1:0] ex_vaddr; // lower VLEN bits of tval for exception
logic [31:0] instr; // instruction word
ariane_pkg::cf_t cf; // branch was taken
ariane_pkg::frontend_exception_t ex; // exception happened
logic [riscv::VLEN-1:0] ex_vaddr; // lower VLEN bits of tval for exception
} instr_data_t;
logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] branch_index;
// instruction queues
logic [ariane_pkg::INSTR_PER_FETCH-1:0]
[$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] instr_queue_usage;
instr_data_t [ariane_pkg::INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] pop_instr;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_full;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_empty;
logic instr_overflow;
logic [ariane_pkg::INSTR_PER_FETCH-1:0][$clog2(
ariane_pkg::FETCH_FIFO_DEPTH
)-1:0] instr_queue_usage;
instr_data_t [ariane_pkg::INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] pop_instr;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_full;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_empty;
logic instr_overflow;
// address queue
logic [$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] address_queue_usage;
logic [riscv::VLEN-1:0] address_out;
logic pop_address;
logic push_address;
logic full_address;
logic empty_address;
logic address_overflow;
logic [ riscv::VLEN-1:0] address_out;
logic pop_address;
logic push_address;
logic full_address;
logic empty_address;
logic address_overflow;
// input stream counter
logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] idx_is_d, idx_is_q;
// Registers
// output FIFO select, one-hot
logic [ariane_pkg::INSTR_PER_FETCH-1:0] idx_ds_d, idx_ds_q;
logic [riscv::VLEN-1:0] pc_d, pc_q; // current PC
logic reset_address_d, reset_address_q; // we need to re-set the address because of a flush
logic [riscv::VLEN-1:0] pc_d, pc_q; // current PC
logic reset_address_d, reset_address_q; // we need to re-set the address because of a flush
logic [ariane_pkg::INSTR_PER_FETCH*2-2:0] branch_mask_extended;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] branch_mask;
@ -129,12 +132,12 @@ module instr_queue import ariane_pkg::*; #(
// calculate a branch mask, e.g.: get the first taken branch
lzc #(
.WIDTH ( ariane_pkg::INSTR_PER_FETCH ),
.MODE ( 0 ) // count trailing zeros
.WIDTH(ariane_pkg::INSTR_PER_FETCH),
.MODE (0) // count trailing zeros
) i_lzc_branch_index (
.in_i ( taken ), // we want to count trailing zeros
.cnt_o ( branch_index ), // first branch on branch_index
.empty_o ( branch_empty )
.in_i (taken), // we want to count trailing zeros
.cnt_o (branch_index), // first branch on branch_index
.empty_o(branch_empty)
);
@ -154,10 +157,10 @@ module instr_queue import ariane_pkg::*; #(
assign consumed_o = consumed_extended[ariane_pkg::INSTR_PER_FETCH-1:0];
// count the numbers of valid instructions we've pushed from this package
popcount #(
.INPUT_WIDTH ( ariane_pkg::INSTR_PER_FETCH )
.INPUT_WIDTH(ariane_pkg::INSTR_PER_FETCH)
) i_popcount (
.data_i ( push_instr_fifo ),
.popcount_o ( popcount )
.data_i (push_instr_fifo),
.popcount_o(popcount)
);
assign shamt = popcount[$bits(shamt)-1:0];
@ -168,7 +171,7 @@ module instr_queue import ariane_pkg::*; #(
// Input interface
// ----------------------
// rotate left by the current position
assign fifo_pos_extended = { valid, valid } << idx_is_q;
assign fifo_pos_extended = {valid, valid} << idx_is_q;
// we just care about the upper bits
assign fifo_pos = fifo_pos_extended[ariane_pkg::INSTR_PER_FETCH*2-1:ariane_pkg::INSTR_PER_FETCH];
// the fifo_position signal can directly be used to guide the push signal of each FIFO
@ -178,17 +181,17 @@ module instr_queue import ariane_pkg::*; #(
// duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input
assign instr[i] = instr_i[i];
assign instr[i + ariane_pkg::INSTR_PER_FETCH] = instr_i[i];
assign instr[i+ariane_pkg::INSTR_PER_FETCH] = instr_i[i];
assign cf[i] = cf_type_i[i];
assign cf[i + ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i];
assign cf[i+ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i];
end
// shift the inputs
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_fifo_input_select
/* verilator lint_off WIDTH */
assign instr_data_in[i].instr = instr[i + idx_is_q];
assign instr_data_in[i].cf = cf[i + idx_is_q];
assign instr_data_in[i].ex = exception_i; // exceptions hold for the whole fetch packet
assign instr_data_in[i].instr = instr[i+idx_is_q];
assign instr_data_in[i].cf = cf[i+idx_is_q];
assign instr_data_in[i].ex = exception_i; // exceptions hold for the whole fetch packet
assign instr_data_in[i].ex_vaddr = exception_addr_i;
/* verilator lint_on WIDTH */
end
@ -217,7 +220,7 @@ module instr_queue import ariane_pkg::*; #(
/* verilator lint_off WIDTH */
assign instr_data_in[0].instr = instr_i[0];
assign instr_data_in[0].cf = cf_type_i[0];
assign instr_data_in[0].ex = exception_i; // exceptions hold for the whole fetch packet
assign instr_data_in[0].ex = exception_i; // exceptions hold for the whole fetch packet
assign instr_data_in[0].ex_vaddr = exception_addr_i;
/* verilator lint_on WIDTH */
end
@ -235,7 +238,7 @@ module instr_queue import ariane_pkg::*; #(
end else begin : gen_instr_overflow_fifo_without_C
assign instr_overflow_fifo = instr_queue_full & valid_i;
end
assign instr_overflow = |instr_overflow_fifo; // at least one instruction overflowed
assign instr_overflow = |instr_overflow_fifo; // at least one instruction overflowed
assign address_overflow = full_address & push_address;
assign replay_o = instr_overflow | address_overflow;
@ -279,14 +282,18 @@ module instr_queue import ariane_pkg::*; #(
end
fetch_entry_o.instruction = instr_data_out[i].instr;
fetch_entry_o.ex.valid = instr_data_out[i].ex != ariane_pkg::FE_NONE;
fetch_entry_o.ex.tval = {{(riscv::XLEN-riscv::VLEN){1'b0}}, instr_data_out[i].ex_vaddr};
fetch_entry_o.ex.tval = {
{(riscv::XLEN - riscv::VLEN) {1'b0}}, instr_data_out[i].ex_vaddr
};
fetch_entry_o.branch_predict.cf = instr_data_out[i].cf;
pop_instr[i] = fetch_entry_valid_o & fetch_entry_ready_i;
end
end
// rotate the pointer left
if (fetch_entry_ready_i) begin
idx_ds_d = {idx_ds_q[ariane_pkg::INSTR_PER_FETCH-2:0], idx_ds_q[ariane_pkg::INSTR_PER_FETCH-1]};
idx_ds_d = {
idx_ds_q[ariane_pkg::INSTR_PER_FETCH-2:0], idx_ds_q[ariane_pkg::INSTR_PER_FETCH-1]
};
end
end
end else begin : gen_downstream_itf_without_c
@ -302,7 +309,7 @@ module instr_queue import ariane_pkg::*; #(
end else begin
fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT;
end
fetch_entry_o.ex.tval = {{64-riscv::VLEN{1'b0}}, instr_data_out[0].ex_vaddr};
fetch_entry_o.ex.tval = {{64 - riscv::VLEN{1'b0}}, instr_data_out[0].ex_vaddr};
fetch_entry_o.branch_predict.predict_address = address_out;
fetch_entry_o.branch_predict.cf = instr_data_out[0].cf;
@ -326,15 +333,15 @@ module instr_queue import ariane_pkg::*; #(
// TODO(zarubaf): This needs to change for a dual issue implementation
// advance the PC
if (ariane_pkg::RVC == 1'b1) begin : gen_pc_with_c_extension
pc_d = pc_q + ((fetch_entry_o.instruction[1:0] != 2'b11) ? 'd2 : 'd4);
pc_d = pc_q + ((fetch_entry_o.instruction[1:0] != 2'b11) ? 'd2 : 'd4);
end else begin : gen_pc_without_c_extension
pc_d = pc_q + 'd4;
pc_d = pc_q + 'd4;
end
end
if (pop_address) pc_d = address_out;
// we previously flushed so we need to reset the address
// we previously flushed so we need to reset the address
if (valid_i[0] && reset_address_q) begin
// this is the base of the first instruction
pc_d = addr_i[0];
@ -347,20 +354,20 @@ module instr_queue import ariane_pkg::*; #(
// Make sure we don't save any instructions if we couldn't save the address
assign push_instr_fifo[i] = push_instr[i] & ~address_overflow;
fifo_v3 #(
.DEPTH ( ariane_pkg::FETCH_FIFO_DEPTH ),
.dtype ( instr_data_t )
.DEPTH(ariane_pkg::FETCH_FIFO_DEPTH),
.dtype(instr_data_t)
) i_fifo_instr_data (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.testmode_i ( 1'b0 ),
.full_o ( instr_queue_full[i] ),
.empty_o ( instr_queue_empty[i] ),
.usage_o ( instr_queue_usage[i] ),
.data_i ( instr_data_in[i] ),
.push_i ( push_instr_fifo[i] ),
.data_o ( instr_data_out[i] ),
.pop_i ( pop_instr[i] )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_i),
.testmode_i(1'b0),
.full_o (instr_queue_full[i]),
.empty_o (instr_queue_empty[i]),
.usage_o (instr_queue_usage[i]),
.data_i (instr_data_in[i]),
.push_i (push_instr_fifo[i]),
.data_o (instr_data_out[i]),
.pop_i (pop_instr[i])
);
end
// or reduce and check whether we are retiring a taken branch (might be that the corresponding)
@ -374,26 +381,26 @@ module instr_queue import ariane_pkg::*; #(
end
fifo_v3 #(
.DEPTH ( ariane_pkg::FETCH_FIFO_DEPTH ), // TODO(zarubaf): Fork out to separate param
.DATA_WIDTH ( riscv::VLEN )
.DEPTH (ariane_pkg::FETCH_FIFO_DEPTH), // TODO(zarubaf): Fork out to separate param
.DATA_WIDTH(riscv::VLEN)
) i_fifo_address (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.testmode_i ( 1'b0 ),
.full_o ( full_address ),
.empty_o ( empty_address ),
.usage_o ( address_queue_usage ),
.data_i ( predict_address_i ),
.push_i ( push_address & ~full_address ),
.data_o ( address_out ),
.pop_i ( pop_address )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_i),
.testmode_i(1'b0),
.full_o (full_address),
.empty_o (empty_address),
.usage_o (address_queue_usage),
.data_i (predict_address_i),
.push_i (push_address & ~full_address),
.data_o (address_out),
.pop_i (pop_address)
);
unread i_unread_address_fifo (.d_i(|{empty_address, address_queue_usage}));
unread i_unread_branch_mask (.d_i(|branch_mask_extended));
unread i_unread_lzc (.d_i(|{branch_empty}));
unread i_unread_fifo_pos (.d_i(|fifo_pos_extended)); // we don't care about the lower signals
unread i_unread_fifo_pos (.d_i(|fifo_pos_extended)); // we don't care about the lower signals
unread i_unread_instr_fifo (.d_i(|instr_queue_usage));
if (ariane_pkg::RVC) begin : gen_pc_q_with_c
@ -413,8 +420,8 @@ module instr_queue import ariane_pkg::*; #(
idx_is_q <= '0;
reset_address_q <= 1'b1;
end else begin
idx_ds_q <= idx_ds_d;
idx_is_q <= idx_is_d;
idx_ds_q <= idx_ds_d;
idx_is_q <= idx_is_d;
end
end
end
@ -436,14 +443,17 @@ module instr_queue import ariane_pkg::*; #(
end
// pragma translate_off
`ifndef VERILATOR
replay_address_fifo: assert property (
@(posedge clk_i) disable iff (!rst_ni) replay_o |-> !i_fifo_address.push_i
) else $fatal(1,"[instr_queue] Pushing address although replay asserted");
`ifndef VERILATOR
replay_address_fifo :
assert property (@(posedge clk_i) disable iff (!rst_ni) replay_o |-> !i_fifo_address.push_i)
else $fatal(1, "[instr_queue] Pushing address although replay asserted");
output_select_onehot: assert property (
@(posedge clk_i) $onehot0(idx_ds_q)
) else begin $error("Output select should be one-hot encoded"); $stop(); end
`endif
output_select_onehot :
assert property (@(posedge clk_i) $onehot0(idx_ds_q))
else begin
$error("Output select should be one-hot encoded");
$stop();
end
`endif
// pragma translate_on
endmodule

View file

@ -18,62 +18,66 @@
module instr_scan #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic [31:0] instr_i, // expect aligned instruction, compressed or not
output logic rvi_return_o,
output logic rvi_call_o,
output logic rvi_branch_o,
output logic rvi_jalr_o,
output logic rvi_jump_o,
input logic [ 31:0] instr_i, // expect aligned instruction, compressed or not
output logic rvi_return_o,
output logic rvi_call_o,
output logic rvi_branch_o,
output logic rvi_jalr_o,
output logic rvi_jump_o,
output logic [riscv::VLEN-1:0] rvi_imm_o,
output logic rvc_branch_o,
output logic rvc_jump_o,
output logic rvc_jr_o,
output logic rvc_return_o,
output logic rvc_jalr_o,
output logic rvc_call_o,
output logic rvc_branch_o,
output logic rvc_jump_o,
output logic rvc_jr_o,
output logic rvc_return_o,
output logic rvc_jalr_o,
output logic rvc_call_o,
output logic [riscv::VLEN-1:0] rvc_imm_o
);
logic is_rvc;
assign is_rvc = (instr_i[1:0] != 2'b11);
logic is_rvc;
assign is_rvc = (instr_i[1:0] != 2'b11);
logic rv32_rvc_jal;
assign rv32_rvc_jal = (riscv::XLEN == 32) & ((instr_i[15:13] == riscv::OpcodeC1Jal) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1));
logic rv32_rvc_jal;
assign rv32_rvc_jal = (riscv::XLEN == 32) & ((instr_i[15:13] == riscv::OpcodeC1Jal) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1));
logic is_xret;
assign is_xret = logic'(instr_i[31:30] == 2'b00) & logic'(instr_i[28:0] == 29'b10000001000000000000001110011);
logic is_xret;
assign is_xret = logic'(instr_i[31:30] == 2'b00) & logic'(instr_i[28:0] == 29'b10000001000000000000001110011);
// check that rs1 is either x1 or x5 and that rd is not rs1
assign rvi_return_o = rvi_jalr_o & ((instr_i[19:15] == 5'd1) | instr_i[19:15] == 5'd5)
// check that rs1 is either x1 or x5 and that rd is not rs1
assign rvi_return_o = rvi_jalr_o & ((instr_i[19:15] == 5'd1) | instr_i[19:15] == 5'd5)
& (instr_i[19:15] != instr_i[11:7]);
// Opocde is JAL[R] and destination register is either x1 or x5
assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & ((instr_i[11:7] == 5'd1) | instr_i[11:7] == 5'd5);
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvi_imm_o = is_xret ? '0 : (instr_i[3]) ? ariane_pkg::uj_imm(instr_i) : ariane_pkg::sb_imm(instr_i);
assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch);
assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr);
assign rvi_jump_o = logic'(instr_i[6:0] == riscv::OpcodeJal) | is_xret;
// Opocde is JAL[R] and destination register is either x1 or x5
assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & ((instr_i[11:7] == 5'd1) | instr_i[11:7] == 5'd5);
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvi_imm_o = is_xret ? '0 : (instr_i[3]) ? ariane_pkg::uj_imm(
instr_i
) : ariane_pkg::sb_imm(
instr_i
);
assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch);
assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr);
assign rvi_jump_o = logic'(instr_i[6:0] == riscv::OpcodeJal) | is_xret;
// opcode JAL
assign rvc_jump_o = ((instr_i[15:13] == riscv::OpcodeC1J) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)) | rv32_rvc_jal;
// opcode JAL
assign rvc_jump_o = ((instr_i[15:13] == riscv::OpcodeC1J) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)) | rv32_rvc_jal;
// always links to register 0
logic is_jal_r;
assign is_jal_r = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd)
// always links to register 0
logic is_jal_r;
assign is_jal_r = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd)
& (instr_i[6:2] == 5'b00000)
& (instr_i[1:0] == riscv::OpcodeC2)
& is_rvc;
assign rvc_jr_o = is_jal_r & ~instr_i[12];
// always links to register 1 e.g.: it is a jump
assign rvc_jalr_o = is_jal_r & instr_i[12];
assign rvc_call_o = rvc_jalr_o | rv32_rvc_jal;
assign rvc_jr_o = is_jal_r & ~instr_i[12];
// always links to register 1 e.g.: it is a jump
assign rvc_jalr_o = is_jal_r & instr_i[12];
assign rvc_call_o = rvc_jalr_o | rv32_rvc_jal;
assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez))
assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez))
& (instr_i[1:0] == riscv::OpcodeC1)
& is_rvc;
// check that rs1 is x1 or x5
assign rvc_return_o = ((instr_i[11:7] == 5'd1) | (instr_i[11:7] == 5'd5)) & rvc_jr_o ;
// check that rs1 is x1 or x5
assign rvc_return_o = ((instr_i[11:7] == 5'd1) | (instr_i[11:7] == 5'd5)) & rvc_jr_o;
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvc_imm_o = (instr_i[14]) ? {{56+riscv::VLEN-64{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0}
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvc_imm_o = (instr_i[14]) ? {{56+riscv::VLEN-64{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0}
: {{53+riscv::VLEN-64{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0};
endmodule

View file

@ -17,55 +17,55 @@
module ras #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DEPTH = 2
)(
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic push_i,
input logic pop_i,
input logic [riscv::VLEN-1:0] data_i,
output ariane_pkg::ras_t data_o
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic push_i,
input logic pop_i,
input logic [riscv::VLEN-1:0] data_i,
output ariane_pkg::ras_t data_o
);
ariane_pkg::ras_t [DEPTH-1:0] stack_d, stack_q;
ariane_pkg::ras_t [DEPTH-1:0] stack_d, stack_q;
assign data_o = stack_q[0];
assign data_o = stack_q[0];
always_comb begin
stack_d = stack_q;
always_comb begin
stack_d = stack_q;
// push on the stack
if (push_i) begin
stack_d[0].ra = data_i;
// mark the new return address as valid
stack_d[0].valid = 1'b1;
stack_d[DEPTH-1:1] = stack_q[DEPTH-2:0];
end
if (pop_i) begin
stack_d[DEPTH-2:0] = stack_q[DEPTH-1:1];
// we popped the value so invalidate the end of the stack
stack_d[DEPTH-1].valid = 1'b0;
stack_d[DEPTH-1].ra = 'b0;
end
// leave everything untouched and just push the latest value to the
// top of the stack
if (pop_i && push_i) begin
stack_d = stack_q;
stack_d[0].ra = data_i;
stack_d[0].valid = 1'b1;
end
if (flush_i) begin
stack_d = '0;
end
// push on the stack
if (push_i) begin
stack_d[0].ra = data_i;
// mark the new return address as valid
stack_d[0].valid = 1'b1;
stack_d[DEPTH-1:1] = stack_q[DEPTH-2:0];
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
stack_q <= '0;
end else begin
stack_q <= stack_d;
end
if (pop_i) begin
stack_d[DEPTH-2:0] = stack_q[DEPTH-1:1];
// we popped the value so invalidate the end of the stack
stack_d[DEPTH-1].valid = 1'b0;
stack_d[DEPTH-1].ra = 'b0;
end
// leave everything untouched and just push the latest value to the
// top of the stack
if (pop_i && push_i) begin
stack_d = stack_q;
stack_d[0].ra = data_i;
stack_d[0].valid = 1'b1;
end
if (flush_i) begin
stack_d = '0;
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
stack_q <= '0;
end else begin
stack_q <= stack_d;
end
end
endmodule

View file

@ -16,127 +16,125 @@
module id_stage #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic debug_req_i,
input logic flush_i,
input logic debug_req_i,
// from IF
input ariane_pkg::fetch_entry_t fetch_entry_i,
input logic fetch_entry_valid_i,
output logic fetch_entry_ready_o, // acknowledge the instruction (fetch entry)
input ariane_pkg::fetch_entry_t fetch_entry_i,
input logic fetch_entry_valid_i,
output logic fetch_entry_ready_o, // acknowledge the instruction (fetch entry)
// to ID
output ariane_pkg::scoreboard_entry_t issue_entry_o, // a decoded instruction
output logic issue_entry_valid_o, // issue entry is valid
output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions
input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions
output ariane_pkg::scoreboard_entry_t issue_entry_o, // a decoded instruction
output logic issue_entry_valid_o, // issue entry is valid
output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions
input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions
// from CSR file
input riscv::priv_lvl_t priv_lvl_i, // current privilege level
input riscv::xs_t fs_i, // floating point extension status
input logic [2:0] frm_i, // floating-point dynamic rounding mode
input riscv::xs_t vs_i, // vector extension status
input logic [1:0] irq_i,
input ariane_pkg::irq_ctrl_t irq_ctrl_i,
input logic debug_mode_i, // we are in debug mode
input logic tvm_i,
input logic tw_i,
input logic tsr_i
input riscv::priv_lvl_t priv_lvl_i, // current privilege level
input riscv::xs_t fs_i, // floating point extension status
input logic [2:0] frm_i, // floating-point dynamic rounding mode
input riscv::xs_t vs_i, // vector extension status
input logic [1:0] irq_i,
input ariane_pkg::irq_ctrl_t irq_ctrl_i,
input logic debug_mode_i, // we are in debug mode
input logic tvm_i,
input logic tw_i,
input logic tsr_i
);
// ID/ISSUE register stage
typedef struct packed {
logic valid;
ariane_pkg::scoreboard_entry_t sbe;
logic is_ctrl_flow;
} issue_struct_t;
issue_struct_t issue_n, issue_q;
// ID/ISSUE register stage
typedef struct packed {
logic valid;
ariane_pkg::scoreboard_entry_t sbe;
logic is_ctrl_flow;
} issue_struct_t;
issue_struct_t issue_n, issue_q;
logic is_control_flow_instr;
ariane_pkg::scoreboard_entry_t decoded_instruction;
logic is_control_flow_instr;
ariane_pkg::scoreboard_entry_t decoded_instruction;
logic is_illegal;
logic [31:0] instruction;
logic is_compressed;
logic is_illegal;
logic [31:0] instruction;
logic is_compressed;
if (CVA6Cfg.RVC) begin
// ---------------------------------------------------------
// 1. Check if they are compressed and expand in case they are
// ---------------------------------------------------------
compressed_decoder #(
.CVA6Cfg ( CVA6Cfg )
) compressed_decoder_i (
.instr_i ( fetch_entry_i.instruction ),
.instr_o ( instruction ),
.illegal_instr_o ( is_illegal ),
.is_compressed_o ( is_compressed )
);
end else begin
assign instruction = fetch_entry_i.instruction;
assign is_illegal = '0;
assign is_compressed = '0;
end
if (CVA6Cfg.RVC) begin
// ---------------------------------------------------------
// 2. Decode and emit instruction to issue stage
// 1. Check if they are compressed and expand in case they are
// ---------------------------------------------------------
decoder #(
.CVA6Cfg ( CVA6Cfg )
) decoder_i (
.debug_req_i,
.irq_ctrl_i,
.irq_i,
.pc_i ( fetch_entry_i.address ),
.is_compressed_i ( is_compressed ),
.is_illegal_i ( is_illegal ),
.instruction_i ( instruction ),
.compressed_instr_i ( fetch_entry_i.instruction[15:0] ),
.branch_predict_i ( fetch_entry_i.branch_predict ),
.ex_i ( fetch_entry_i.ex ),
.priv_lvl_i ( priv_lvl_i ),
.debug_mode_i ( debug_mode_i ),
.fs_i,
.frm_i,
.vs_i,
.tvm_i,
.tw_i,
.tsr_i,
.instruction_o ( decoded_instruction ),
.is_control_flow_instr_o ( is_control_flow_instr )
compressed_decoder #(
.CVA6Cfg(CVA6Cfg)
) compressed_decoder_i (
.instr_i (fetch_entry_i.instruction),
.instr_o (instruction),
.illegal_instr_o(is_illegal),
.is_compressed_o(is_compressed)
);
end else begin
assign instruction = fetch_entry_i.instruction;
assign is_illegal = '0;
assign is_compressed = '0;
end
// ---------------------------------------------------------
// 2. Decode and emit instruction to issue stage
// ---------------------------------------------------------
decoder #(
.CVA6Cfg(CVA6Cfg)
) decoder_i (
.debug_req_i,
.irq_ctrl_i,
.irq_i,
.pc_i (fetch_entry_i.address),
.is_compressed_i (is_compressed),
.is_illegal_i (is_illegal),
.instruction_i (instruction),
.compressed_instr_i (fetch_entry_i.instruction[15:0]),
.branch_predict_i (fetch_entry_i.branch_predict),
.ex_i (fetch_entry_i.ex),
.priv_lvl_i (priv_lvl_i),
.debug_mode_i (debug_mode_i),
.fs_i,
.frm_i,
.vs_i,
.tvm_i,
.tw_i,
.tsr_i,
.instruction_o (decoded_instruction),
.is_control_flow_instr_o(is_control_flow_instr)
);
// ------------------
// Pipeline Register
// ------------------
assign issue_entry_o = issue_q.sbe;
assign issue_entry_valid_o = issue_q.valid;
assign is_ctrl_flow_o = issue_q.is_ctrl_flow;
// ------------------
// Pipeline Register
// ------------------
assign issue_entry_o = issue_q.sbe;
assign issue_entry_valid_o = issue_q.valid;
assign is_ctrl_flow_o = issue_q.is_ctrl_flow;
always_comb begin
issue_n = issue_q;
fetch_entry_ready_o = 1'b0;
always_comb begin
issue_n = issue_q;
fetch_entry_ready_o = 1'b0;
// Clear the valid flag if issue has acknowledged the instruction
if (issue_instr_ack_i)
issue_n.valid = 1'b0;
// Clear the valid flag if issue has acknowledged the instruction
if (issue_instr_ack_i) issue_n.valid = 1'b0;
// if we have a space in the register and the fetch is valid, go get it
// or the issue stage is currently acknowledging an instruction, which means that we will have space
// for a new instruction
if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin
fetch_entry_ready_o = 1'b1;
issue_n = '{1'b1, decoded_instruction, is_control_flow_instr};
end
// invalidate the pipeline register on a flush
if (flush_i)
issue_n.valid = 1'b0;
// if we have a space in the register and the fetch is valid, go get it
// or the issue stage is currently acknowledging an instruction, which means that we will have space
// for a new instruction
if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin
fetch_entry_ready_o = 1'b1;
issue_n = '{1'b1, decoded_instruction, is_control_flow_instr};
end
// -------------------------
// Registers (ID <-> Issue)
// -------------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
issue_q <= '0;
end else begin
issue_q <= issue_n;
end
// invalidate the pipeline register on a flush
if (flush_i) issue_n.valid = 1'b0;
end
// -------------------------
// Registers (ID <-> Issue)
// -------------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
issue_q <= '0;
end else begin
issue_q <= issue_n;
end
end
endmodule

File diff suppressed because it is too large Load diff

View file

@ -9,161 +9,161 @@
package config_pkg;
// ---------------
// Global Config
// ---------------
localparam int unsigned ILEN = 32;
localparam int unsigned NRET = 1;
// ---------------
// Global Config
// ---------------
localparam int unsigned ILEN = 32;
localparam int unsigned NRET = 1;
/// The NoC type is a top-level parameter, hence we need a bit more
/// information on what protocol those type parameters are supporting.
/// Currently two values are supported"
typedef enum {
/// The "classic" AXI4 protocol.
NOC_TYPE_AXI4_ATOP,
/// In the OpenPiton setting the WT cache is connected to the L15.
NOC_TYPE_L15_BIG_ENDIAN,
NOC_TYPE_L15_LITTLE_ENDIAN
} noc_type_e;
/// The NoC type is a top-level parameter, hence we need a bit more
/// information on what protocol those type parameters are supporting.
/// Currently two values are supported"
typedef enum {
/// The "classic" AXI4 protocol.
NOC_TYPE_AXI4_ATOP,
/// In the OpenPiton setting the WT cache is connected to the L15.
NOC_TYPE_L15_BIG_ENDIAN,
NOC_TYPE_L15_LITTLE_ENDIAN
} noc_type_e;
/// Cache type parameter
typedef enum logic [1:0] {
WB = 0,
WT = 1,
HPDCACHE = 2
} cache_type_t ;
/// Cache type parameter
typedef enum logic [1:0] {
WB = 0,
WT = 1,
HPDCACHE = 2
} cache_type_t;
localparam NrMaxRules = 16;
localparam NrMaxRules = 16;
typedef struct packed {
/// Number of commit ports, i.e., maximum number of instructions that the
/// core can retire per cycle. It can be beneficial to have more commit
/// ports than issue ports, for the scoreboard to empty out in case one
/// instruction stalls a little longer.
int unsigned NrCommitPorts;
/// AXI parameters.
int unsigned AxiAddrWidth;
int unsigned AxiDataWidth;
int unsigned AxiIdWidth;
int unsigned AxiUserWidth;
int unsigned NrLoadBufEntries;
bit FpuEn;
bit XF16;
bit XF16ALT;
bit XF8;
bit RVA;
bit RVV;
bit RVC;
bit RVZCB;
bit XFVec;
bit CvxifEn;
bit ZiCondExtEn;
// Calculated
bit RVF;
bit RVD;
bit FpPresent;
bit NSX;
int unsigned FLen;
bit RVFVec;
bit XF16Vec;
bit XF16ALTVec;
bit XF8Vec;
int unsigned NrRgprPorts;
int unsigned NrWbPorts;
bit EnableAccelerator;
// Debug Module
// address to which a hart should jump when it was requested to halt
logic [63:0] HaltAddress;
logic [63:0] ExceptionAddress;
/// Return address stack depth, good values are around 2 to 4.
int unsigned RASDepth;
/// Branch target buffer entries.
int unsigned BTBEntries;
/// Branch history (2-bit saturation counter) size, to keep track of
/// branch otucomes.
int unsigned BHTEntries;
/// Offset of the debug module.
logic [63:0] DmBaseAddress;
/// Number of PMP entries.
int unsigned NrPMPEntries;
/// Set to the bus type in use.
noc_type_e NOCType;
/// Physical Memory Attributes (PMAs)
/// Number of non idempotent rules.
int unsigned NrNonIdempotentRules;
/// Base which needs to match.
logic [NrMaxRules-1:0][63:0] NonIdempotentAddrBase;
/// Bit mask which bits to consider when matching the rule.
logic [NrMaxRules-1:0][63:0] NonIdempotentLength;
/// Number of regions which have execute property.
int unsigned NrExecuteRegionRules;
/// Base which needs to match.
logic [NrMaxRules-1:0][63:0] ExecuteRegionAddrBase;
/// Bit mask which bits to consider when matching the rule.
logic [NrMaxRules-1:0][63:0] ExecuteRegionLength;
/// Number of regions which have cached property.
int unsigned NrCachedRegionRules;
/// Base which needs to match.
logic [NrMaxRules-1:0][63:0] CachedRegionAddrBase;
/// Bit mask which bits to consider when matching the rule.
logic [NrMaxRules-1:0][63:0] CachedRegionLength;
} cva6_cfg_t;
/// Empty configuration to sanity check proper parameter passing. Whenever
/// you develop a module that resides within the core, assign this constant.
localparam cva6_cfg_t cva6_cfg_empty = '0;
typedef struct packed {
/// Number of commit ports, i.e., maximum number of instructions that the
/// core can retire per cycle. It can be beneficial to have more commit
/// ports than issue ports, for the scoreboard to empty out in case one
/// instruction stalls a little longer.
int unsigned NrCommitPorts;
/// AXI parameters.
int unsigned AxiAddrWidth;
int unsigned AxiDataWidth;
int unsigned AxiIdWidth;
int unsigned AxiUserWidth;
int unsigned NrLoadBufEntries;
bit FpuEn;
bit XF16;
bit XF16ALT;
bit XF8;
bit RVA;
bit RVV;
bit RVC;
bit RVZCB;
bit XFVec;
bit CvxifEn;
bit ZiCondExtEn;
// Calculated
bit RVF;
bit RVD;
bit FpPresent;
bit NSX;
int unsigned FLen;
bit RVFVec;
bit XF16Vec;
bit XF16ALTVec;
bit XF8Vec;
int unsigned NrRgprPorts;
int unsigned NrWbPorts;
bit EnableAccelerator;
// Debug Module
// address to which a hart should jump when it was requested to halt
logic [63:0] HaltAddress;
logic [63:0] ExceptionAddress;
/// Return address stack depth, good values are around 2 to 4.
int unsigned RASDepth;
/// Branch target buffer entries.
int unsigned BTBEntries;
/// Branch history (2-bit saturation counter) size, to keep track of
/// branch otucomes.
int unsigned BHTEntries;
/// Offset of the debug module.
logic [63:0] DmBaseAddress;
/// Number of PMP entries.
int unsigned NrPMPEntries;
/// Set to the bus type in use.
noc_type_e NOCType;
/// Physical Memory Attributes (PMAs)
/// Number of non idempotent rules.
int unsigned NrNonIdempotentRules;
/// Base which needs to match.
logic [NrMaxRules-1:0][63:0] NonIdempotentAddrBase;
/// Bit mask which bits to consider when matching the rule.
logic [NrMaxRules-1:0][63:0] NonIdempotentLength;
/// Number of regions which have execute property.
int unsigned NrExecuteRegionRules;
/// Base which needs to match.
logic [NrMaxRules-1:0][63:0] ExecuteRegionAddrBase;
/// Bit mask which bits to consider when matching the rule.
logic [NrMaxRules-1:0][63:0] ExecuteRegionLength;
/// Number of regions which have cached property.
int unsigned NrCachedRegionRules;
/// Base which needs to match.
logic [NrMaxRules-1:0][63:0] CachedRegionAddrBase;
/// Bit mask which bits to consider when matching the rule.
logic [NrMaxRules-1:0][63:0] CachedRegionLength;
} cva6_cfg_t;
/// Utility function being called to check parameters. Not all values make
/// sense for all parameters, here is the place to sanity check them.
function automatic void check_cfg (cva6_cfg_t Cfg);
// pragma translate_off
`ifndef VERILATOR
assert(Cfg.RASDepth > 0);
assert(2**$clog2(Cfg.BTBEntries) == Cfg.BTBEntries);
assert(2**$clog2(Cfg.BHTEntries) == Cfg.BHTEntries);
assert(Cfg.NrNonIdempotentRules <= NrMaxRules);
assert(Cfg.NrExecuteRegionRules <= NrMaxRules);
assert(Cfg.NrCachedRegionRules <= NrMaxRules);
assert(Cfg.NrPMPEntries <= 16);
`endif
// pragma translate_on
endfunction
function automatic logic range_check(logic[63:0] base, logic[63:0] len, logic[63:0] address);
// if len is a power of two, and base is properly aligned, this check could be simplified
// Extend base by one bit to prevent an overflow.
return (address >= base) && (({1'b0, address}) < (65'(base)+len));
endfunction : range_check
/// Empty configuration to sanity check proper parameter passing. Whenever
/// you develop a module that resides within the core, assign this constant.
localparam cva6_cfg_t cva6_cfg_empty = '0;
function automatic logic is_inside_nonidempotent_regions (cva6_cfg_t Cfg, logic[63:0] address);
logic[NrMaxRules-1:0] pass;
pass = '0;
for (int unsigned k = 0; k < Cfg.NrNonIdempotentRules; k++) begin
pass[k] = range_check(Cfg.NonIdempotentAddrBase[k], Cfg.NonIdempotentLength[k], address);
end
return |pass;
endfunction : is_inside_nonidempotent_regions
/// Utility function being called to check parameters. Not all values make
/// sense for all parameters, here is the place to sanity check them.
function automatic void check_cfg(cva6_cfg_t Cfg);
// pragma translate_off
`ifndef VERILATOR
assert (Cfg.RASDepth > 0);
assert (2 ** $clog2(Cfg.BTBEntries) == Cfg.BTBEntries);
assert (2 ** $clog2(Cfg.BHTEntries) == Cfg.BHTEntries);
assert (Cfg.NrNonIdempotentRules <= NrMaxRules);
assert (Cfg.NrExecuteRegionRules <= NrMaxRules);
assert (Cfg.NrCachedRegionRules <= NrMaxRules);
assert (Cfg.NrPMPEntries <= 16);
`endif
// pragma translate_on
endfunction
function automatic logic is_inside_execute_regions (cva6_cfg_t Cfg, logic[63:0] address);
// if we don't specify any region we assume everything is accessible
logic[NrMaxRules-1:0] pass;
pass = '0;
for (int unsigned k = 0; k < Cfg.NrExecuteRegionRules; k++) begin
pass[k] = range_check(Cfg.ExecuteRegionAddrBase[k], Cfg.ExecuteRegionLength[k], address);
end
return |pass;
endfunction : is_inside_execute_regions
function automatic logic range_check(logic [63:0] base, logic [63:0] len, logic [63:0] address);
// if len is a power of two, and base is properly aligned, this check could be simplified
// Extend base by one bit to prevent an overflow.
return (address >= base) && (({1'b0, address}) < (65'(base) + len));
endfunction : range_check
function automatic logic is_inside_cacheable_regions (cva6_cfg_t Cfg, logic[63:0] address);
automatic logic[NrMaxRules-1:0] pass;
pass = '0;
for (int unsigned k = 0; k < Cfg.NrCachedRegionRules; k++) begin
pass[k] = range_check(Cfg.CachedRegionAddrBase[k], Cfg.CachedRegionLength[k], address);
end
return |pass;
endfunction : is_inside_cacheable_regions
function automatic logic is_inside_nonidempotent_regions(cva6_cfg_t Cfg, logic [63:0] address);
logic [NrMaxRules-1:0] pass;
pass = '0;
for (int unsigned k = 0; k < Cfg.NrNonIdempotentRules; k++) begin
pass[k] = range_check(Cfg.NonIdempotentAddrBase[k], Cfg.NonIdempotentLength[k], address);
end
return |pass;
endfunction : is_inside_nonidempotent_regions
function automatic logic is_inside_execute_regions(cva6_cfg_t Cfg, logic [63:0] address);
// if we don't specify any region we assume everything is accessible
logic [NrMaxRules-1:0] pass;
pass = '0;
for (int unsigned k = 0; k < Cfg.NrExecuteRegionRules; k++) begin
pass[k] = range_check(Cfg.ExecuteRegionAddrBase[k], Cfg.ExecuteRegionLength[k], address);
end
return |pass;
endfunction : is_inside_execute_regions
function automatic logic is_inside_cacheable_regions(cva6_cfg_t Cfg, logic [63:0] address);
automatic logic [NrMaxRules-1:0] pass;
pass = '0;
for (int unsigned k = 0; k < Cfg.NrCachedRegionRules; k++) begin
pass[k] = range_check(Cfg.CachedRegionAddrBase[k], Cfg.CachedRegionLength[k], address);
end
return |pass;
endfunction : is_inside_cacheable_regions
endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg;
localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 1;
localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 1;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 4;
localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 4;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigRASDepth = 0;
localparam CVA6ConfigBTBEntries = 0;
localparam CVA6ConfigBHTEntries = 0;
localparam CVA6ConfigRASDepth = 0;
localparam CVA6ConfigBTBEntries = 0;
localparam CVA6ConfigBHTEntries = 0;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 0;
localparam CVA6ConfigPerfCounterEn = 0;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended
RVF: bit'(0),
RVF:
bit'(
0
),
RVD: bit'(0),
FpPresent: bit'(0),
NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0),
HaltAddress: 64'h800,
ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region
NrNonIdempotentRules: unsigned'(2),
NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
ExecuteRegionAddrBase:
1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region
NrCachedRegionRules: unsigned'(1),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
NrCachedRegionRules:
unsigned'(
1
),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage

View file

@ -9,69 +9,69 @@
package cva6_config_pkg;
localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 0;
localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 0;
localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 2;
localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 4;
localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 4;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigRASDepth = 0;
localparam CVA6ConfigBTBEntries = 0;
localparam CVA6ConfigBHTEntries = 0;
localparam CVA6ConfigRASDepth = 0;
localparam CVA6ConfigBTBEntries = 0;
localparam CVA6ConfigBHTEntries = 0;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 0;
localparam CVA6ConfigPerfCounterEn = 0;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 0;
localparam CVA6ConfigMmuPresent = 0;
localparam CVA6ConfigRvfiTrace = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -90,7 +90,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended
RVF: bit'(0),
RVF:
bit'(
0
),
RVD: bit'(0),
FpPresent: bit'(0),
NSX: bit'(0),
@ -104,24 +107,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0),
HaltAddress: 64'h800,
ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region
NrNonIdempotentRules: unsigned'(2),
NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
ExecuteRegionAddrBase:
1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region
NrCachedRegionRules: unsigned'(1),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
NrCachedRegionRules:
unsigned'(
1
),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg;
localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 0;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 0;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 8192;
localparam CVA6ConfigIcacheSetAssoc = 2;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 8192;
localparam CVA6ConfigDcacheSetAssoc = 2;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigIcacheByteSize = 8192;
localparam CVA6ConfigIcacheSetAssoc = 2;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 8192;
localparam CVA6ConfigDcacheSetAssoc = 2;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 2;
localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 4;
localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 4;
localparam CVA6ConfigFPGAEn = 1;
localparam CVA6ConfigFPGAEn = 1;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 0;
localparam CVA6ConfigNrPMPEntries = 0;
localparam CVA6ConfigPerfCounterEn = 0;
localparam CVA6ConfigPerfCounterEn = 0;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended
RVF: bit'(0),
RVF:
bit'(
0
),
RVD: bit'(0),
FpPresent: bit'(0),
NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0),
HaltAddress: 64'h800,
ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region
NrNonIdempotentRules: unsigned'(2),
NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
ExecuteRegionAddrBase:
1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region
NrCachedRegionRules: unsigned'(1),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
NrCachedRegionRules:
unsigned'(
1
),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg;
localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1;
localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended
RVF: bit'(0),
RVF:
bit'(
0
),
RVD: bit'(0),
FpPresent: bit'(0),
NSX: bit'(0),
@ -105,23 +108,32 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0),
HaltAddress: 64'h800,
ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region
NrNonIdempotentRules: unsigned'(2),
NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
ExecuteRegionAddrBase:
1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region
NrCachedRegionRules: unsigned'(1),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
NrCachedRegionRules:
unsigned'(
1
),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg;
localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1;
localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended
RVF: bit'(0),
RVF:
bit'(
0
),
RVD: bit'(0),
FpPresent: bit'(0),
NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0),
HaltAddress: 64'h800,
ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region
NrNonIdempotentRules: unsigned'(2),
NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
ExecuteRegionAddrBase:
1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region
NrCachedRegionRules: unsigned'(1),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
NrCachedRegionRules:
unsigned'(
1
),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg;
localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1;
localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WB;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WB;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended
RVF: bit'(0),
RVF:
bit'(
0
),
RVD: bit'(0),
FpPresent: bit'(0),
NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0),
HaltAddress: 64'h800,
ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region
NrNonIdempotentRules: unsigned'(2),
NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
ExecuteRegionAddrBase:
1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region
NrCachedRegionRules: unsigned'(1),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
NrCachedRegionRules:
unsigned'(
1
),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage

View file

@ -10,68 +10,68 @@
package cva6_config_pkg;
localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 1;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 1;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 1;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 1;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1;
localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -90,7 +90,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended
RVF: bit'(0),
RVF:
bit'(
0
),
RVD: bit'(0),
FpPresent: bit'(0),
NSX: bit'(0),
@ -104,24 +107,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0),
HaltAddress: 64'h800,
ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_L15_BIG_ENDIAN,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_L15_BIG_ENDIAN,
// idempotent region
NrNonIdempotentRules: unsigned'(2),
NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
ExecuteRegionAddrBase:
1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region
NrCachedRegionRules: unsigned'(1),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
NrCachedRegionRules:
unsigned'(
1
),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg;
localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 1;
localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 1;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1;
localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended
RVF: bit'(0),
RVF:
bit'(
0
),
RVD: bit'(0),
FpPresent: bit'(0),
NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0),
HaltAddress: 64'h800,
ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region
NrNonIdempotentRules: unsigned'(2),
NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
ExecuteRegionAddrBase:
1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region
NrCachedRegionRules: unsigned'(1),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
NrCachedRegionRules:
unsigned'(
1
),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage

View file

@ -17,69 +17,69 @@
package cva6_config_pkg;
localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 1;
localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 1;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 3;
localparam CVA6ConfigMemTidWidth = CVA6ConfigAxiIdWidth;
localparam CVA6ConfigDcacheIdWidth = 3;
localparam CVA6ConfigMemTidWidth = CVA6ConfigAxiIdWidth;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 8;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 8;
localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1;
localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::HPDCACHE;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::HPDCACHE;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -98,7 +98,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended
RVF: bit'(0),
RVF:
bit'(
0
),
RVD: bit'(0),
FpPresent: bit'(0),
NSX: bit'(0),
@ -119,17 +122,26 @@ package cva6_config_pkg;
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region
NrNonIdempotentRules: unsigned'(2),
NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
ExecuteRegionAddrBase:
1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region
NrCachedRegionRules: unsigned'(1),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
NrCachedRegionRules:
unsigned'(
1
),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg;
localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1;
localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended
RVF: bit'(0),
RVF:
bit'(
0
),
RVD: bit'(0),
FpPresent: bit'(0),
NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0),
HaltAddress: 64'h800,
ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_L15_BIG_ENDIAN,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_L15_BIG_ENDIAN,
// idempotent region
NrNonIdempotentRules: unsigned'(2),
NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
ExecuteRegionAddrBase:
1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region
NrCachedRegionRules: unsigned'(1),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
NrCachedRegionRules:
unsigned'(
1
),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg;
localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 1;
localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 1;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1;
localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WB;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WB;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended
RVF: bit'(0),
RVF:
bit'(
0
),
RVD: bit'(0),
FpPresent: bit'(0),
NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0),
HaltAddress: 64'h800,
ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region
NrNonIdempotentRules: unsigned'(2),
NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
ExecuteRegionAddrBase:
1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region
NrCachedRegionRules: unsigned'(1),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
NrCachedRegionRules:
unsigned'(
1
),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage

View file

@ -10,68 +10,68 @@
package cva6_config_pkg;
localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 1;
localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 1;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 16384;
localparam CVA6ConfigDcacheSetAssoc = 4;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 16384;
localparam CVA6ConfigDcacheSetAssoc = 4;
localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1;
localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -90,7 +90,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(0),
// Extended
RVF: bit'(0),
RVF:
bit'(
0
),
RVD: bit'(0),
FpPresent: bit'(0),
NSX: bit'(0),
@ -104,23 +107,32 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0),
HaltAddress: 64'h800,
ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region
NrNonIdempotentRules: unsigned'(2),
NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
ExecuteRegionAddrBase:
1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region
NrCachedRegionRules: unsigned'(1),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
NrCachedRegionRules:
unsigned'(
1
),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage

View file

@ -14,110 +14,108 @@
// needed.
package hpdcache_params_pkg;
// Imports from the CVA6 configuration package
// {{{
import cva6_config_pkg::CVA6ConfigXlen;
import cva6_config_pkg::CVA6ConfigDcacheByteSize;
import cva6_config_pkg::CVA6ConfigDcacheSetAssoc;
import cva6_config_pkg::CVA6ConfigDcacheLineWidth;
import cva6_config_pkg::CVA6ConfigDcacheIdWidth;
import cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth;
import cva6_config_pkg::CVA6ConfigNrLoadBufEntries;
// }}}
// Imports from the CVA6 configuration package
// {{{
import cva6_config_pkg::CVA6ConfigXlen;
import cva6_config_pkg::CVA6ConfigDcacheByteSize;
import cva6_config_pkg::CVA6ConfigDcacheSetAssoc;
import cva6_config_pkg::CVA6ConfigDcacheLineWidth;
import cva6_config_pkg::CVA6ConfigDcacheIdWidth;
import cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth;
import cva6_config_pkg::CVA6ConfigNrLoadBufEntries;
// }}}
// Definition of constants used only in this file
// {{{
localparam int unsigned __BYTES_PER_WAY =
CVA6ConfigDcacheByteSize/CVA6ConfigDcacheSetAssoc;
// Definition of constants used only in this file
// {{{
localparam int unsigned __BYTES_PER_WAY = CVA6ConfigDcacheByteSize / CVA6ConfigDcacheSetAssoc;
localparam int unsigned __BYTES_PER_CACHELINE =
CVA6ConfigDcacheLineWidth/8;
// }}}
localparam int unsigned __BYTES_PER_CACHELINE = CVA6ConfigDcacheLineWidth / 8;
// }}}
// Definition of global constants for the HPDcache data and directory
// {{{
// HPDcache physical address width (in bits)
localparam int unsigned PARAM_PA_WIDTH = riscv::PLEN;
// Definition of global constants for the HPDcache data and directory
// {{{
// HPDcache physical address width (in bits)
localparam int unsigned PARAM_PA_WIDTH = riscv::PLEN;
// HPDcache number of sets
localparam int unsigned PARAM_SETS = __BYTES_PER_WAY/__BYTES_PER_CACHELINE;
// HPDcache number of sets
localparam int unsigned PARAM_SETS = __BYTES_PER_WAY / __BYTES_PER_CACHELINE;
// HPDcache number of ways
localparam int unsigned PARAM_WAYS = CVA6ConfigDcacheSetAssoc;
// HPDcache number of ways
localparam int unsigned PARAM_WAYS = CVA6ConfigDcacheSetAssoc;
// HPDcache word width (bits)
localparam int unsigned PARAM_WORD_WIDTH = CVA6ConfigXlen;
// HPDcache word width (bits)
localparam int unsigned PARAM_WORD_WIDTH = CVA6ConfigXlen;
// HPDcache cache-line width (bits)
localparam int unsigned PARAM_CL_WORDS = CVA6ConfigDcacheLineWidth/PARAM_WORD_WIDTH;
// HPDcache cache-line width (bits)
localparam int unsigned PARAM_CL_WORDS = CVA6ConfigDcacheLineWidth / PARAM_WORD_WIDTH;
// HPDcache number of words in the request data channels (request and response)
localparam int unsigned PARAM_REQ_WORDS = 1;
// HPDcache number of words in the request data channels (request and response)
localparam int unsigned PARAM_REQ_WORDS = 1;
// HPDcache request transaction ID width (bits)
localparam int unsigned PARAM_REQ_TRANS_ID_WIDTH = CVA6ConfigDcacheIdWidth;
// HPDcache request transaction ID width (bits)
localparam int unsigned PARAM_REQ_TRANS_ID_WIDTH = CVA6ConfigDcacheIdWidth;
// HPDcache request source ID width (bits)
localparam int unsigned PARAM_REQ_SRC_ID_WIDTH = 3;
// }}}
// HPDcache request source ID width (bits)
localparam int unsigned PARAM_REQ_SRC_ID_WIDTH = 3;
// }}}
// Definition of constants and types for HPDcache data memory
// {{{
localparam int unsigned PARAM_DATA_WAYS_PER_RAM_WORD = 128/PARAM_WORD_WIDTH;
localparam int unsigned PARAM_DATA_SETS_PER_RAM = PARAM_SETS;
// Definition of constants and types for HPDcache data memory
// {{{
localparam int unsigned PARAM_DATA_WAYS_PER_RAM_WORD = 128 / PARAM_WORD_WIDTH;
localparam int unsigned PARAM_DATA_SETS_PER_RAM = PARAM_SETS;
// HPDcache DATA RAM macros whether implements:
// - Write byte enable (1'b1)
// - Write bit mask (1'b0)
localparam bit PARAM_DATA_RAM_WBYTEENABLE = 1'b1;
// HPDcache DATA RAM macros whether implements:
// - Write byte enable (1'b1)
// - Write bit mask (1'b0)
localparam bit PARAM_DATA_RAM_WBYTEENABLE = 1'b1;
// Define the number of memory contiguous words that can be accessed
// simultaneously from the cache.
// - This limits the maximum width for the data channel from requesters
// - This impacts the refill latency (more ACCESS_WORDS -> less REFILL LATENCY)
localparam int unsigned PARAM_ACCESS_WORDS = PARAM_CL_WORDS/2;
// }}}
// Define the number of memory contiguous words that can be accessed
// simultaneously from the cache.
// - This limits the maximum width for the data channel from requesters
// - This impacts the refill latency (more ACCESS_WORDS -> less REFILL LATENCY)
localparam int unsigned PARAM_ACCESS_WORDS = PARAM_CL_WORDS / 2;
// }}}
// Definition of constants and types for the Miss Status Holding Register (MSHR)
// {{{
// HPDcache MSHR number of sets
localparam int unsigned PARAM_MSHR_SETS = 2;
// Definition of constants and types for the Miss Status Holding Register (MSHR)
// {{{
// HPDcache MSHR number of sets
localparam int unsigned PARAM_MSHR_SETS = 2;
// HPDcache MSHR number of ways
localparam int unsigned PARAM_MSHR_WAYS = (CVA6ConfigNrLoadBufEntries > 4) ? 4 : 2;
// HPDcache MSHR number of ways
localparam int unsigned PARAM_MSHR_WAYS = (CVA6ConfigNrLoadBufEntries > 4) ? 4 : 2;
// HPDcache MSHR number of ways in the same SRAM word
localparam int unsigned PARAM_MSHR_WAYS_PER_RAM_WORD = PARAM_MSHR_WAYS > 1 ? 2 : 1;
// HPDcache MSHR number of ways in the same SRAM word
localparam int unsigned PARAM_MSHR_WAYS_PER_RAM_WORD = PARAM_MSHR_WAYS > 1 ? 2 : 1;
// HPDcache MSHR number of sets in the same SRAM
localparam int unsigned PARAM_MSHR_SETS_PER_RAM = PARAM_MSHR_SETS;
// HPDcache MSHR number of sets in the same SRAM
localparam int unsigned PARAM_MSHR_SETS_PER_RAM = PARAM_MSHR_SETS;
// HPDcache MSHR RAM whether implements:
// - Write byte enable (1'b1)
// - Write bit mask (1'b0)
localparam bit PARAM_MSHR_RAM_WBYTEENABLE = 1'b1;
// HPDcache MSHR RAM whether implements:
// - Write byte enable (1'b1)
// - Write bit mask (1'b0)
localparam bit PARAM_MSHR_RAM_WBYTEENABLE = 1'b1;
// HPDcache MSHR whether uses FFs or SRAM
localparam bit PARAM_MSHR_USE_REGBANK = (PARAM_MSHR_SETS*PARAM_MSHR_WAYS) <= 16;
// }}}
// HPDcache MSHR whether uses FFs or SRAM
localparam bit PARAM_MSHR_USE_REGBANK = (PARAM_MSHR_SETS * PARAM_MSHR_WAYS) <= 16;
// }}}
// Definition of constants and types for the Write Buffer (WBUF)
// {{{
// HPDcache Write-Buffer number of entries in the directory
localparam int unsigned PARAM_WBUF_DIR_ENTRIES = CVA6ConfigWtDcacheWbufDepth;
// Definition of constants and types for the Write Buffer (WBUF)
// {{{
// HPDcache Write-Buffer number of entries in the directory
localparam int unsigned PARAM_WBUF_DIR_ENTRIES = CVA6ConfigWtDcacheWbufDepth;
// HPDcache Write-Buffer number of entries in the data buffer
localparam int unsigned PARAM_WBUF_DATA_ENTRIES = CVA6ConfigWtDcacheWbufDepth;
// HPDcache Write-Buffer number of entries in the data buffer
localparam int unsigned PARAM_WBUF_DATA_ENTRIES = CVA6ConfigWtDcacheWbufDepth;
// HPDcache Write-Buffer number of words per entry
localparam int unsigned PARAM_WBUF_WORDS = PARAM_REQ_WORDS;
// HPDcache Write-Buffer number of words per entry
localparam int unsigned PARAM_WBUF_WORDS = PARAM_REQ_WORDS;
// HPDcache Write-Buffer threshold counter width (in bits)
localparam int unsigned PARAM_WBUF_TIMECNT_WIDTH = 3;
// }}}
// HPDcache Write-Buffer threshold counter width (in bits)
localparam int unsigned PARAM_WBUF_TIMECNT_WIDTH = 3;
// }}}
// Definition of constants and types for the Replay Table (RTAB)
// {{{
localparam int PARAM_RTAB_ENTRIES = 4;
// }}}
// Definition of constants and types for the Replay Table (RTAB)
// {{{
localparam int PARAM_RTAB_ENTRIES = 4;
// }}}
endpackage

View file

@ -11,100 +11,100 @@
package cvxif_pkg;
localparam X_DATAWIDTH = riscv::XLEN;
localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS; //2 or 3
localparam X_ID_WIDTH = ariane_pkg::TRANS_ID_BITS;
localparam X_MEM_WIDTH = 64;
localparam X_RFR_WIDTH = riscv::XLEN;
localparam X_RFW_WIDTH = riscv::XLEN;
localparam X_DATAWIDTH = riscv::XLEN;
localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS; //2 or 3
localparam X_ID_WIDTH = ariane_pkg::TRANS_ID_BITS;
localparam X_MEM_WIDTH = 64;
localparam X_RFR_WIDTH = riscv::XLEN;
localparam X_RFW_WIDTH = riscv::XLEN;
typedef struct packed {
logic [15:0] instr;
logic [1:0] mode;
logic [X_ID_WIDTH-1:0] id;
} x_compressed_req_t;
typedef struct packed {
logic [15:0] instr;
logic [1:0] mode;
logic [X_ID_WIDTH-1:0] id;
} x_compressed_req_t;
typedef struct packed {
logic [31:0] instr;
logic accept;
} x_compressed_resp_t;
typedef struct packed {
logic [31:0] instr;
logic accept;
} x_compressed_resp_t;
typedef struct packed {
logic [31:0] instr;
logic [1:0] mode;
logic [X_ID_WIDTH-1:0] id;
logic [X_NUM_RS-1:0][X_RFR_WIDTH-1:0] rs;
logic [X_NUM_RS-1:0] rs_valid;
} x_issue_req_t;
typedef struct packed {
logic [31:0] instr;
logic [1:0] mode;
logic [X_ID_WIDTH-1:0] id;
logic [X_NUM_RS-1:0][X_RFR_WIDTH-1:0] rs;
logic [X_NUM_RS-1:0] rs_valid;
} x_issue_req_t;
typedef struct packed {
logic accept;
logic writeback;
logic dualwrite;
logic dualread;
logic loadstore;
logic exc;
} x_issue_resp_t;
typedef struct packed {
logic accept;
logic writeback;
logic dualwrite;
logic dualread;
logic loadstore;
logic exc;
} x_issue_resp_t;
typedef struct packed {
logic [X_ID_WIDTH-1:0] id;
logic x_commit_kill;
} x_commit_t;
typedef struct packed {
logic [X_ID_WIDTH-1:0] id;
logic x_commit_kill;
} x_commit_t;
typedef struct packed {
logic [X_ID_WIDTH-1:0] id;
logic [31:0] addr;
logic [1:0] mode;
logic we;
logic [1:0] size;
logic [X_MEM_WIDTH-1:0] wdata;
logic last;
logic spec;
} x_mem_req_t;
typedef struct packed {
logic [X_ID_WIDTH-1:0] id;
logic [31:0] addr;
logic [1:0] mode;
logic we;
logic [1:0] size;
logic [X_MEM_WIDTH-1:0] wdata;
logic last;
logic spec;
} x_mem_req_t;
typedef struct packed {
logic exc;
logic [5:0] exccode;
} x_mem_resp_t;
typedef struct packed {
logic exc;
logic [5:0] exccode;
} x_mem_resp_t;
typedef struct packed {
logic [X_ID_WIDTH-1:0] id;
logic [X_MEM_WIDTH-1:0] rdata;
logic err;
} x_mem_result_t ;
typedef struct packed {
logic [X_ID_WIDTH-1:0] id;
logic [X_MEM_WIDTH-1:0] rdata;
logic err;
} x_mem_result_t;
typedef struct packed {
logic [X_ID_WIDTH-1:0] id;
logic [X_RFW_WIDTH-1:0] data;
logic [4:0] rd;
logic we;
logic exc;
logic [5:0] exccode;
} x_result_t ;
typedef struct packed {
logic [X_ID_WIDTH-1:0] id;
logic [X_RFW_WIDTH-1:0] data;
logic [4:0] rd;
logic we;
logic exc;
logic [5:0] exccode;
} x_result_t;
typedef struct packed {
logic x_compressed_valid;
x_compressed_req_t x_compressed_req;
logic x_issue_valid;
x_issue_req_t x_issue_req;
logic x_commit_valid;
x_commit_t x_commit;
logic x_mem_ready;
x_mem_resp_t x_mem_resp;
logic x_mem_result_valid;
x_mem_result_t x_mem_result;
logic x_result_ready;
} cvxif_req_t;
typedef struct packed {
logic x_compressed_valid;
x_compressed_req_t x_compressed_req;
logic x_issue_valid;
x_issue_req_t x_issue_req;
logic x_commit_valid;
x_commit_t x_commit;
logic x_mem_ready;
x_mem_resp_t x_mem_resp;
logic x_mem_result_valid;
x_mem_result_t x_mem_result;
logic x_result_ready;
} cvxif_req_t;
typedef struct packed {
logic x_compressed_ready;
x_compressed_resp_t x_compressed_resp;
logic x_issue_ready;
x_issue_resp_t x_issue_resp;
logic x_mem_valid;
x_mem_req_t x_mem_req;
logic x_result_valid;
x_result_t x_result;
} cvxif_resp_t;
typedef struct packed {
logic x_compressed_ready;
x_compressed_resp_t x_compressed_resp;
logic x_issue_ready;
x_issue_resp_t x_issue_resp;
logic x_mem_valid;
x_mem_req_t x_mem_req;
logic x_result_valid;
x_result_t x_result;
} cvxif_resp_t;
endpackage

View file

@ -15,188 +15,188 @@
`ifndef VERILATOR
package instr_tracer_pkg;
parameter INSTR_NOP = 32'h00_00_00_13;
parameter INSTR_NOP = 32'h00_00_00_13;
parameter INSTR_LUI = { 25'b?, riscv::OpcodeLui };
parameter INSTR_AUIPC = { 25'b?, riscv::OpcodeAuipc };
parameter INSTR_JAL = { 25'b?, riscv::OpcodeJal };
parameter INSTR_JALR = { 17'b?, 3'b000, 5'b?, riscv::OpcodeJalr };
parameter INSTR_LUI = {25'b?, riscv::OpcodeLui};
parameter INSTR_AUIPC = {25'b?, riscv::OpcodeAuipc};
parameter INSTR_JAL = {25'b?, riscv::OpcodeJal};
parameter INSTR_JALR = {17'b?, 3'b000, 5'b?, riscv::OpcodeJalr};
// BRANCH
parameter INSTR_BEQZ = { 7'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch };
parameter INSTR_BEQ = { 7'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch };
parameter INSTR_BNEZ = { 7'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch };
parameter INSTR_BNE = { 7'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch };
parameter INSTR_BLTZ = { 7'b?, 5'b0, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch };
parameter INSTR_BLT = { 7'b?, 5'b?, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch };
parameter INSTR_BGEZ = { 7'b?, 5'b0, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch };
parameter INSTR_BGE = { 7'b?, 5'b?, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch };
parameter INSTR_BLTU = { 7'b?, 5'b?, 5'b?, 3'b110, 5'b?, riscv::OpcodeBranch };
parameter INSTR_BGEU = { 7'b?, 5'b?, 5'b?, 3'b111, 5'b?, riscv::OpcodeBranch };
parameter INSTR_BEQZ = {7'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BEQ = {7'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BNEZ = {7'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BNE = {7'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BLTZ = {7'b?, 5'b0, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BLT = {7'b?, 5'b?, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BGEZ = {7'b?, 5'b0, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BGE = {7'b?, 5'b?, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BLTU = {7'b?, 5'b?, 5'b?, 3'b110, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BGEU = {7'b?, 5'b?, 5'b?, 3'b111, 5'b?, riscv::OpcodeBranch};
// OP-IMM
parameter INSTR_LI = { 12'b?, 5'b0, 3'b000, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_ADDI = { 17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_SLTI = { 17'b?, 3'b010, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_SLTIU = { 17'b?, 3'b011, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_XORI = { 17'b?, 3'b100, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_ORI = { 17'b?, 3'b110, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_ANDI = { 17'b?, 3'b111, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_SLLI = { 6'b000000, 11'b?, 3'b001, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_SRLI = { 6'b000000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_SRAI = { 6'b010000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_LI = {12'b?, 5'b0, 3'b000, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_ADDI = {17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_SLTI = {17'b?, 3'b010, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_SLTIU = {17'b?, 3'b011, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_XORI = {17'b?, 3'b100, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_ORI = {17'b?, 3'b110, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_ANDI = {17'b?, 3'b111, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_SLLI = {6'b000000, 11'b?, 3'b001, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_SRLI = {6'b000000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_SRAI = {6'b010000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm};
// OP-IMM-32
parameter INSTR_ADDIW = { 17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm32 };
parameter INSTR_SLLIW = { 7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOpImm32 };
parameter INSTR_SRLIW = { 7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32 };
parameter INSTR_SRAIW = { 7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32 };
parameter INSTR_ADDIW = {17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm32};
parameter INSTR_SLLIW = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOpImm32};
parameter INSTR_SRLIW = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32};
parameter INSTR_SRAIW = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32};
// OP
parameter INSTR_ADD = { 7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp };
parameter INSTR_SUB = { 7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp };
parameter INSTR_SLL = { 7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp };
parameter INSTR_SLT = { 7'b0000000, 10'b?, 3'b010, 5'b?, riscv::OpcodeOp };
parameter INSTR_SLTU = { 7'b0000000, 10'b?, 3'b011, 5'b?, riscv::OpcodeOp };
parameter INSTR_XOR = { 7'b0000000, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp };
parameter INSTR_SRL = { 7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp };
parameter INSTR_SRA = { 7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp };
parameter INSTR_OR = { 7'b0000000, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp };
parameter INSTR_AND = { 7'b0000000, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp };
parameter INSTR_MUL = { 7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp };
parameter INSTR_ADD = {7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp};
parameter INSTR_SUB = {7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp};
parameter INSTR_SLL = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp};
parameter INSTR_SLT = {7'b0000000, 10'b?, 3'b010, 5'b?, riscv::OpcodeOp};
parameter INSTR_SLTU = {7'b0000000, 10'b?, 3'b011, 5'b?, riscv::OpcodeOp};
parameter INSTR_XOR = {7'b0000000, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp};
parameter INSTR_SRL = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp};
parameter INSTR_SRA = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp};
parameter INSTR_OR = {7'b0000000, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp};
parameter INSTR_AND = {7'b0000000, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp};
parameter INSTR_MUL = {7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp};
// OP32
parameter INSTR_ADDW = { 7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32 };
parameter INSTR_SUBW = { 7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32 };
parameter INSTR_SLLW = { 7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp32 };
parameter INSTR_SRLW = { 7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32 };
parameter INSTR_SRAW = { 7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32 };
parameter INSTR_MULW = { 7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp32 };
parameter INSTR_ADDW = {7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32};
parameter INSTR_SUBW = {7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32};
parameter INSTR_SLLW = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp32};
parameter INSTR_SRLW = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32};
parameter INSTR_SRAW = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32};
parameter INSTR_MULW = {7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp32};
// MISC-MEM
parameter INSTR_FENCE = { 4'b0, 8'b?, 13'b0, riscv::OpcodeMiscMem };
parameter INSTR_FENCEI = { 17'b0, 3'b001, 5'b0, riscv::OpcodeMiscMem };
parameter INSTR_FENCE = {4'b0, 8'b?, 13'b0, riscv::OpcodeMiscMem};
parameter INSTR_FENCEI = {17'b0, 3'b001, 5'b0, riscv::OpcodeMiscMem};
// SYSTEM
parameter INSTR_CSRW = { 12'b?, 5'b?, 3'b001, 5'b0, riscv::OpcodeSystem };
parameter INSTR_CSRRW = { 12'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeSystem };
parameter INSTR_CSRR = { 12'b?, 5'b0, 3'b010, 5'b?, riscv::OpcodeSystem };
parameter INSTR_CSRRS = { 12'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeSystem };
parameter INSTR_CSRS = { 12'b?, 5'b?, 3'b010, 5'b0, riscv::OpcodeSystem };
parameter INSTR_CSRRC = { 12'b?, 5'b?, 3'b011, 5'b?, riscv::OpcodeSystem };
parameter INSTR_CSRC = { 12'b?, 5'b?, 3'b011, 5'b0, riscv::OpcodeSystem };
parameter INSTR_CSRW = {12'b?, 5'b?, 3'b001, 5'b0, riscv::OpcodeSystem};
parameter INSTR_CSRRW = {12'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeSystem};
parameter INSTR_CSRR = {12'b?, 5'b0, 3'b010, 5'b?, riscv::OpcodeSystem};
parameter INSTR_CSRRS = {12'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeSystem};
parameter INSTR_CSRS = {12'b?, 5'b?, 3'b010, 5'b0, riscv::OpcodeSystem};
parameter INSTR_CSRRC = {12'b?, 5'b?, 3'b011, 5'b?, riscv::OpcodeSystem};
parameter INSTR_CSRC = {12'b?, 5'b?, 3'b011, 5'b0, riscv::OpcodeSystem};
parameter INSTR_CSRWI = { 17'b?, 3'b101, 5'b0, riscv::OpcodeSystem };
parameter INSTR_CSRRWI = { 17'b?, 3'b101, 5'b?, riscv::OpcodeSystem };
parameter INSTR_CSRSI = { 17'b?, 3'b110, 5'b0, riscv::OpcodeSystem };
parameter INSTR_CSRRSI = { 17'b?, 3'b110, 5'b?, riscv::OpcodeSystem };
parameter INSTR_CSRCI = { 17'b?, 3'b111, 5'b0, riscv::OpcodeSystem };
parameter INSTR_CSRRCI = { 17'b?, 3'b111, 5'b?, riscv::OpcodeSystem };
parameter INSTR_CSRWI = {17'b?, 3'b101, 5'b0, riscv::OpcodeSystem};
parameter INSTR_CSRRWI = {17'b?, 3'b101, 5'b?, riscv::OpcodeSystem};
parameter INSTR_CSRSI = {17'b?, 3'b110, 5'b0, riscv::OpcodeSystem};
parameter INSTR_CSRRSI = {17'b?, 3'b110, 5'b?, riscv::OpcodeSystem};
parameter INSTR_CSRCI = {17'b?, 3'b111, 5'b0, riscv::OpcodeSystem};
parameter INSTR_CSRRCI = {17'b?, 3'b111, 5'b?, riscv::OpcodeSystem};
parameter INSTR_ECALL = { 12'b000000000000, 13'b0, riscv::OpcodeSystem };
parameter INSTR_EBREAK = { 12'b000000000001, 13'b0, riscv::OpcodeSystem };
parameter INSTR_MRET = { 12'b001100000010, 13'b0, riscv::OpcodeSystem };
parameter INSTR_SRET = { 12'b000100000010, 13'b0, riscv::OpcodeSystem };
parameter INSTR_DRET = { 12'b011110110010, 13'b0, riscv::OpcodeSystem };
parameter INSTR_WFI = { 12'b000100000101, 13'b0, riscv::OpcodeSystem };
parameter INSTR_SFENCE = { 12'b0001001?????, 13'b?, riscv::OpcodeSystem };
parameter INSTR_ECALL = {12'b000000000000, 13'b0, riscv::OpcodeSystem};
parameter INSTR_EBREAK = {12'b000000000001, 13'b0, riscv::OpcodeSystem};
parameter INSTR_MRET = {12'b001100000010, 13'b0, riscv::OpcodeSystem};
parameter INSTR_SRET = {12'b000100000010, 13'b0, riscv::OpcodeSystem};
parameter INSTR_DRET = {12'b011110110010, 13'b0, riscv::OpcodeSystem};
parameter INSTR_WFI = {12'b000100000101, 13'b0, riscv::OpcodeSystem};
parameter INSTR_SFENCE = {12'b0001001?????, 13'b?, riscv::OpcodeSystem};
// RV32M
parameter INSTR_PMUL = { 7'b0000001, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp };
parameter INSTR_DIV = { 7'b0000001, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp };
parameter INSTR_DIVU = { 7'b0000001, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp };
parameter INSTR_REM = { 7'b0000001, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp };
parameter INSTR_REMU = { 7'b0000001, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp };
parameter INSTR_PMUL = {7'b0000001, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp};
parameter INSTR_DIV = {7'b0000001, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp};
parameter INSTR_DIVU = {7'b0000001, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp};
parameter INSTR_REM = {7'b0000001, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp};
parameter INSTR_REMU = {7'b0000001, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp};
// RVFD
parameter INSTR_FMADD = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMadd};
parameter INSTR_FMSUB = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMsub};
parameter INSTR_FNSMSUB = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmsub};
parameter INSTR_FNMADD = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmadd};
parameter INSTR_FMADD = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMadd};
parameter INSTR_FMSUB = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMsub};
parameter INSTR_FNSMSUB = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmsub};
parameter INSTR_FNMADD = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmadd};
parameter INSTR_FADD = { 5'b00000, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSUB = { 5'b00001, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMUL = { 5'b00010, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FDIV = { 5'b00011, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSQRT = { 5'b01011, 2'b?, 5'b0, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSGNJ = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSGNJN = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSGNJX = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMIN = { 5'b00101, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMAX = { 5'b00101, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FLE = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FLT = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FEQ = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FADD = {5'b00000, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSUB = {5'b00001, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMUL = {5'b00010, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FDIV = {5'b00011, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSQRT = {5'b01011, 2'b?, 5'b0, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSGNJ = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSGNJN = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSGNJX = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMIN = {5'b00101, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMAX = {5'b00101, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FLE = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FLT = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FEQ = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCVT_F2F = { 5'b01000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMV_F2X = { 5'b11100, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCLASS = { 5'b11100, 2'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMV_X2F = { 5'b11110, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCVT_F2I = { 5'b11000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCVT_I2F = { 5'b11010, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCVT_F2F = {5'b01000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMV_F2X = {5'b11100, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCLASS = {5'b11100, 2'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMV_X2F = {5'b11110, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCVT_F2I = {5'b11000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCVT_I2F = {5'b11010, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
// A
parameter INSTR_AMO = {25'b?, riscv::OpcodeAmo };
parameter INSTR_AMO = {25'b?, riscv::OpcodeAmo};
// Load/Stores
parameter [31:0] LB = 32'b?????????????????000?????0000011;
parameter [31:0] LH = 32'b?????????????????001?????0000011;
parameter [31:0] LW = 32'b?????????????????010?????0000011;
parameter [31:0] LD = 32'b?????????????????011?????0000011;
parameter [31:0] LBU = 32'b?????????????????100?????0000011;
parameter [31:0] LHU = 32'b?????????????????101?????0000011;
parameter [31:0] LWU = 32'b?????????????????110?????0000011;
parameter [31:0] FLW = 32'b?????????????????010?????0000111;
parameter [31:0] FLD = 32'b?????????????????011?????0000111;
parameter [31:0] FLQ = 32'b?????????????????100?????0000111;
parameter [31:0] SB = 32'b?????????????????000?????0100011;
parameter [31:0] SH = 32'b?????????????????001?????0100011;
parameter [31:0] SW = 32'b?????????????????010?????0100011;
parameter [31:0] SD = 32'b?????????????????011?????0100011;
parameter [31:0] FSW = 32'b?????????????????010?????0100111;
parameter [31:0] FSD = 32'b?????????????????011?????0100111;
parameter [31:0] FSQ = 32'b?????????????????100?????0100111;
parameter [31:0] LB = 32'b?????????????????000?????0000011;
parameter [31:0] LH = 32'b?????????????????001?????0000011;
parameter [31:0] LW = 32'b?????????????????010?????0000011;
parameter [31:0] LD = 32'b?????????????????011?????0000011;
parameter [31:0] LBU = 32'b?????????????????100?????0000011;
parameter [31:0] LHU = 32'b?????????????????101?????0000011;
parameter [31:0] LWU = 32'b?????????????????110?????0000011;
parameter [31:0] FLW = 32'b?????????????????010?????0000111;
parameter [31:0] FLD = 32'b?????????????????011?????0000111;
parameter [31:0] FLQ = 32'b?????????????????100?????0000111;
parameter [31:0] SB = 32'b?????????????????000?????0100011;
parameter [31:0] SH = 32'b?????????????????001?????0100011;
parameter [31:0] SW = 32'b?????????????????010?????0100011;
parameter [31:0] SD = 32'b?????????????????011?????0100011;
parameter [31:0] FSW = 32'b?????????????????010?????0100111;
parameter [31:0] FSD = 32'b?????????????????011?????0100111;
parameter [31:0] FSQ = 32'b?????????????????100?????0100111;
parameter [31:0] C_ADDI4SPN = 32'b????????????????000???????????00;
parameter [31:0] C_FLD = 32'b????????????????001???????????00;
parameter [31:0] C_LW = 32'b????????????????010???????????00;
parameter [31:0] C_FLW = 32'b????????????????011???????????00;
parameter [31:0] C_FSD = 32'b????????????????101???????????00;
parameter [31:0] C_SW = 32'b????????????????110???????????00;
parameter [31:0] C_FSW = 32'b????????????????111???????????00;
parameter [31:0] C_ADDI = 32'b????????????????000???????????01;
parameter [31:0] C_JAL = 32'b????????????????001???????????01;
parameter [31:0] C_LI = 32'b????????????????010???????????01;
parameter [31:0] C_LUI = 32'b????????????????011???????????01;
parameter [31:0] C_SRLI = 32'b????????????????100?00????????01;
parameter [31:0] C_SRAI = 32'b????????????????100?01????????01;
parameter [31:0] C_ANDI = 32'b????????????????100?10????????01;
parameter [31:0] C_SUB = 32'b????????????????100011???00???01;
parameter [31:0] C_XOR = 32'b????????????????100011???01???01;
parameter [31:0] C_OR = 32'b????????????????100011???10???01;
parameter [31:0] C_AND = 32'b????????????????100011???11???01;
parameter [31:0] C_SUBW = 32'b????????????????100111???00???01;
parameter [31:0] C_ADDW = 32'b????????????????100111???01???01;
parameter [31:0] C_J = 32'b????????????????101???????????01;
parameter [31:0] C_BEQZ = 32'b????????????????110???????????01;
parameter [31:0] C_BNEZ = 32'b????????????????111???????????01;
parameter [31:0] C_SLLI = 32'b????????????????000???????????10;
parameter [31:0] C_FLDSP = 32'b????????????????001???????????10;
parameter [31:0] C_LWSP = 32'b????????????????010???????????10;
parameter [31:0] C_FLWSP = 32'b????????????????011???????????10;
parameter [31:0] C_MV = 32'b????????????????1000??????????10;
parameter [31:0] C_ADD = 32'b????????????????1001??????????10;
parameter [31:0] C_FSDSP = 32'b????????????????101???????????10;
parameter [31:0] C_SWSP = 32'b????????????????110???????????10;
parameter [31:0] C_FSWSP = 32'b????????????????111???????????10;
parameter [31:0] C_NOP = 32'b????????????????0000000000000001;
parameter [31:0] C_FLD = 32'b????????????????001???????????00;
parameter [31:0] C_LW = 32'b????????????????010???????????00;
parameter [31:0] C_FLW = 32'b????????????????011???????????00;
parameter [31:0] C_FSD = 32'b????????????????101???????????00;
parameter [31:0] C_SW = 32'b????????????????110???????????00;
parameter [31:0] C_FSW = 32'b????????????????111???????????00;
parameter [31:0] C_ADDI = 32'b????????????????000???????????01;
parameter [31:0] C_JAL = 32'b????????????????001???????????01;
parameter [31:0] C_LI = 32'b????????????????010???????????01;
parameter [31:0] C_LUI = 32'b????????????????011???????????01;
parameter [31:0] C_SRLI = 32'b????????????????100?00????????01;
parameter [31:0] C_SRAI = 32'b????????????????100?01????????01;
parameter [31:0] C_ANDI = 32'b????????????????100?10????????01;
parameter [31:0] C_SUB = 32'b????????????????100011???00???01;
parameter [31:0] C_XOR = 32'b????????????????100011???01???01;
parameter [31:0] C_OR = 32'b????????????????100011???10???01;
parameter [31:0] C_AND = 32'b????????????????100011???11???01;
parameter [31:0] C_SUBW = 32'b????????????????100111???00???01;
parameter [31:0] C_ADDW = 32'b????????????????100111???01???01;
parameter [31:0] C_J = 32'b????????????????101???????????01;
parameter [31:0] C_BEQZ = 32'b????????????????110???????????01;
parameter [31:0] C_BNEZ = 32'b????????????????111???????????01;
parameter [31:0] C_SLLI = 32'b????????????????000???????????10;
parameter [31:0] C_FLDSP = 32'b????????????????001???????????10;
parameter [31:0] C_LWSP = 32'b????????????????010???????????10;
parameter [31:0] C_FLWSP = 32'b????????????????011???????????10;
parameter [31:0] C_MV = 32'b????????????????1000??????????10;
parameter [31:0] C_ADD = 32'b????????????????1001??????????10;
parameter [31:0] C_FSDSP = 32'b????????????????101???????????10;
parameter [31:0] C_SWSP = 32'b????????????????110???????????10;
parameter [31:0] C_FSWSP = 32'b????????????????111???????????10;
parameter [31:0] C_NOP = 32'b????????????????0000000000000001;
parameter [31:0] C_ADDI16SP = 32'b????????????????011?00010?????01;
parameter [31:0] C_JR = 32'b????????????????1000?????0000010;
parameter [31:0] C_JALR = 32'b????????????????1001?????0000010;
parameter [31:0] C_EBREAK = 32'b????????????????1001000000000010;
parameter [31:0] C_LD = 32'b????????????????011???????????00;
parameter [31:0] C_SD = 32'b????????????????111???????????00;
parameter [31:0] C_ADDIW = 32'b????????????????001???????????01;
parameter [31:0] C_LDSP = 32'b????????????????011???????????10;
parameter [31:0] C_SDSP = 32'b????????????????111???????????10;
parameter [31:0] C_JR = 32'b????????????????1000?????0000010;
parameter [31:0] C_JALR = 32'b????????????????1001?????0000010;
parameter [31:0] C_EBREAK = 32'b????????????????1001000000000010;
parameter [31:0] C_LD = 32'b????????????????011???????????00;
parameter [31:0] C_SD = 32'b????????????????111???????????00;
parameter [31:0] C_ADDIW = 32'b????????????????001???????????01;
parameter [31:0] C_LDSP = 32'b????????????????011???????????10;
parameter [31:0] C_SDSP = 32'b????????????????111???????????10;
endpackage
`endif

File diff suppressed because it is too large Load diff

View file

@ -17,84 +17,81 @@
package std_cache_pkg;
// Calculated parameter
localparam DCACHE_BYTE_OFFSET = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8);
localparam DCACHE_NUM_WORDS = 2**(ariane_pkg::DCACHE_INDEX_WIDTH-DCACHE_BYTE_OFFSET);
localparam DCACHE_DIRTY_WIDTH = ariane_pkg::DCACHE_SET_ASSOC*2;
// localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not
// Calculated parameter
localparam DCACHE_BYTE_OFFSET = $clog2(ariane_pkg::DCACHE_LINE_WIDTH / 8);
localparam DCACHE_NUM_WORDS = 2 ** (ariane_pkg::DCACHE_INDEX_WIDTH - DCACHE_BYTE_OFFSET);
localparam DCACHE_DIRTY_WIDTH = ariane_pkg::DCACHE_SET_ASSOC * 2;
// localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not
typedef struct packed {
logic [1:0] id; // id for which we handle the miss
logic valid;
logic we;
logic [55:0] addr;
logic [7:0][7:0] wdata;
logic [7:0] be;
} mshr_t;
typedef struct packed {
logic [1:0] id; // id for which we handle the miss
logic valid;
logic we;
logic [55:0] addr;
logic [7:0][7:0] wdata;
logic [7:0] be;
} mshr_t;
typedef struct packed {
logic valid;
logic [63:0] addr;
logic [7:0] be;
logic [1:0] size;
logic we;
logic [63:0] wdata;
logic bypass;
} miss_req_t;
typedef struct packed {
logic valid;
logic [63:0] addr;
logic [7:0] be;
logic [1:0] size;
logic we;
logic [63:0] wdata;
logic bypass;
} miss_req_t;
typedef struct packed {
logic req;
ariane_pkg::ad_req_t reqtype;
ariane_pkg::amo_t amo;
logic [3:0] id;
logic [63:0] addr;
logic [63:0] wdata;
logic we;
logic [7:0] be;
logic [1:0] size;
} bypass_req_t;
typedef struct packed {
logic req;
ariane_pkg::ad_req_t reqtype;
ariane_pkg::amo_t amo;
logic [3:0] id;
logic [63:0] addr;
logic [63:0] wdata;
logic we;
logic [7:0] be;
logic [1:0] size;
} bypass_req_t;
typedef struct packed {
logic gnt;
logic valid;
logic [63:0] rdata;
} bypass_rsp_t;
typedef struct packed {
logic gnt;
logic valid;
logic [63:0] rdata;
} bypass_rsp_t;
typedef struct packed {
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array
logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array
logic valid; // state array
logic dirty; // state array
} cache_line_t;
typedef struct packed {
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array
logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array
logic valid; // state array
logic dirty; // state array
} cache_line_t;
// cache line byte enable
typedef struct packed {
logic [(ariane_pkg::DCACHE_TAG_WIDTH+7)/8-1:0] tag; // byte enable into tag array
logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits)
} cl_be_t;
// cache line byte enable
typedef struct packed {
logic [(ariane_pkg::DCACHE_TAG_WIDTH+7)/8-1:0] tag; // byte enable into tag array
logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits)
} cl_be_t;
// convert one hot to bin for -> needed for cache replacement
function automatic logic [$clog2(ariane_pkg::DCACHE_SET_ASSOC)-1:0] one_hot_to_bin (
input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] in
);
for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin
if (in[i])
return i;
end
endfunction
// get the first bit set, returns one hot value
function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] get_victim_cl (
input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] valid_dirty
);
// one-hot return vector
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] oh = '0;
for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin
if (valid_dirty[i]) begin
oh[i] = 1'b1;
return oh;
end
end
endfunction
// convert one hot to bin for -> needed for cache replacement
function automatic logic [$clog2(ariane_pkg::DCACHE_SET_ASSOC)-1:0] one_hot_to_bin(
input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] in);
for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin
if (in[i]) return i;
end
endfunction
// get the first bit set, returns one hot value
function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] get_victim_cl(
input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] valid_dirty);
// one-hot return vector
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] oh = '0;
for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin
if (valid_dirty[i]) begin
oh[i] = 1'b1;
return oh;
end
end
endfunction
endpackage : std_cache_pkg

View file

@ -16,8 +16,8 @@
// configuration in case Ariane is
// instantiated in OpenPiton
`ifdef PITON_ARIANE
`include "l15.tmp.h"
`include "define.tmp.h"
`include "l15.tmp.h"
`include "define.tmp.h"
`endif
package wt_cache_pkg;
@ -27,56 +27,56 @@ package wt_cache_pkg;
`ifdef PITON_ARIANE
`ifndef CONFIG_L15_ASSOCIATIVITY
`define CONFIG_L15_ASSOCIATIVITY 4
`define CONFIG_L15_ASSOCIATIVITY 4
`endif
`ifndef TLB_CSM_WIDTH
`define TLB_CSM_WIDTH 33
`define TLB_CSM_WIDTH 33
`endif
localparam L15_SET_ASSOC = `CONFIG_L15_ASSOCIATIVITY;
localparam L15_TLB_CSM_WIDTH = `TLB_CSM_WIDTH;
localparam L15_SET_ASSOC = `CONFIG_L15_ASSOCIATIVITY;
localparam L15_TLB_CSM_WIDTH = `TLB_CSM_WIDTH;
`else
localparam L15_SET_ASSOC = ariane_pkg::DCACHE_SET_ASSOC;// align with dcache for compatibility with the standard Ariane setup
localparam L15_TLB_CSM_WIDTH = 33;
localparam L15_TLB_CSM_WIDTH = 33;
`endif
localparam L15_TID_WIDTH = ariane_pkg::MEM_TID_WIDTH;
localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC);
localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC);
localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC);
localparam L15_TID_WIDTH = ariane_pkg::MEM_TID_WIDTH;
localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC);
localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC);
localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC);
// FIFO depths of L15 adapter
localparam ADAPTER_REQ_FIFO_DEPTH = 2;
localparam ADAPTER_REQ_FIFO_DEPTH = 2;
localparam ADAPTER_RTRN_FIFO_DEPTH = 2;
// Calculated parameter
localparam ICACHE_OFFSET_WIDTH = $clog2(ariane_pkg::ICACHE_LINE_WIDTH/8);
localparam ICACHE_NUM_WORDS = 2**(ariane_pkg::ICACHE_INDEX_WIDTH-ICACHE_OFFSET_WIDTH);
localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS);// excluding byte offset
localparam ICACHE_OFFSET_WIDTH = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8);
localparam ICACHE_NUM_WORDS = 2 ** (ariane_pkg::ICACHE_INDEX_WIDTH - ICACHE_OFFSET_WIDTH);
localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS); // excluding byte offset
localparam DCACHE_OFFSET_WIDTH = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8);
localparam DCACHE_NUM_WORDS = 2**(ariane_pkg::DCACHE_INDEX_WIDTH-DCACHE_OFFSET_WIDTH);
localparam DCACHE_CL_IDX_WIDTH = $clog2(DCACHE_NUM_WORDS);// excluding byte offset
localparam DCACHE_OFFSET_WIDTH = $clog2(ariane_pkg::DCACHE_LINE_WIDTH / 8);
localparam DCACHE_NUM_WORDS = 2 ** (ariane_pkg::DCACHE_INDEX_WIDTH - DCACHE_OFFSET_WIDTH);
localparam DCACHE_CL_IDX_WIDTH = $clog2(DCACHE_NUM_WORDS); // excluding byte offset
localparam DCACHE_NUM_BANKS = ariane_pkg::DCACHE_LINE_WIDTH/riscv::XLEN;
localparam DCACHE_NUM_BANKS_WIDTH = $clog2(DCACHE_NUM_BANKS);
localparam DCACHE_NUM_BANKS = ariane_pkg::DCACHE_LINE_WIDTH / riscv::XLEN;
localparam DCACHE_NUM_BANKS_WIDTH = $clog2(DCACHE_NUM_BANKS);
// write buffer parameterization
localparam DCACHE_WBUF_DEPTH = ariane_pkg::WT_DCACHE_WBUF_DEPTH;
localparam DCACHE_MAX_TX = 2**L15_TID_WIDTH;
localparam CACHE_ID_WIDTH = L15_TID_WIDTH;
localparam DCACHE_WBUF_DEPTH = ariane_pkg::WT_DCACHE_WBUF_DEPTH;
localparam DCACHE_MAX_TX = 2 ** L15_TID_WIDTH;
localparam CACHE_ID_WIDTH = L15_TID_WIDTH;
typedef struct packed {
logic [ariane_pkg::DCACHE_TAG_WIDTH+(ariane_pkg::DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES)-1:0] wtag;
riscv::xlen_t data;
logic [ariane_pkg::DCACHE_USER_WIDTH-1:0] user;
logic [(riscv::XLEN/8)-1:0] dirty; // byte is dirty
logic [(riscv::XLEN/8)-1:0] valid; // byte is valid
logic [(riscv::XLEN/8)-1:0] txblock; // byte is part of transaction in-flight
logic checked; // if cache state of this word has been checked
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] hit_oh; // valid way in the cache
riscv::xlen_t data;
logic [ariane_pkg::DCACHE_USER_WIDTH-1:0] user;
logic [(riscv::XLEN/8)-1:0] dirty; // byte is dirty
logic [(riscv::XLEN/8)-1:0] valid; // byte is valid
logic [(riscv::XLEN/8)-1:0] txblock; // byte is part of transaction in-flight
logic checked; // if cache state of this word has been checked
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] hit_oh; // valid way in the cache
} wbuffer_t;
// TX status registers are indexed with the transaction ID
@ -95,263 +95,245 @@ package wt_cache_pkg;
DCACHE_LOAD_REQ,
DCACHE_ATOMIC_REQ,
DCACHE_INT_REQ
} dcache_out_t;
} dcache_out_t;
typedef enum logic [2:0] {
DCACHE_INV_REQ, // no ack from the core required
DCACHE_STORE_ACK,// note: this may contain an invalidation vector, too
DCACHE_STORE_ACK, // note: this may contain an invalidation vector, too
DCACHE_LOAD_ACK,
DCACHE_ATOMIC_ACK,
DCACHE_INT_ACK
} dcache_in_t;
} dcache_in_t;
typedef enum logic [0:0] {
ICACHE_INV_REQ, // no ack from the core required
ICACHE_INV_REQ, // no ack from the core required
ICACHE_IFILL_ACK
} icache_in_t;
// icache interface
typedef struct packed {
logic vld; // invalidate only affected way
logic all; // invalidate all ways
logic [ariane_pkg::ICACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate
logic [L15_WAY_WIDTH-1:0] way; // way to invalidate
logic vld; // invalidate only affected way
logic all; // invalidate all ways
logic [ariane_pkg::ICACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate
logic [L15_WAY_WIDTH-1:0] way; // way to invalidate
} icache_inval_t;
typedef struct packed {
logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] way; // way to replace
logic [riscv::PLEN-1:0] paddr; // physical address
logic nc; // noncacheable
logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] way; // way to replace
logic [riscv::PLEN-1:0] paddr; // physical address
logic nc; // noncacheable
logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
} icache_req_t;
typedef struct packed {
icache_in_t rtype; // see definitions above
logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] data; // full cache line width
logic [ariane_pkg::ICACHE_USER_LINE_WIDTH-1:0] user; // user bits
icache_inval_t inv; // invalidation vector
logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
icache_in_t rtype; // see definitions above
logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] data; // full cache line width
logic [ariane_pkg::ICACHE_USER_LINE_WIDTH-1:0] user; // user bits
icache_inval_t inv; // invalidation vector
logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
} icache_rtrn_t;
// dcache interface
typedef struct packed {
logic vld; // invalidate only affected way
logic all; // invalidate all ways
logic [ariane_pkg::DCACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate
logic [L15_WAY_WIDTH-1:0] way; // way to invalidate
logic vld; // invalidate only affected way
logic all; // invalidate all ways
logic [ariane_pkg::DCACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate
logic [L15_WAY_WIDTH-1:0] way; // way to invalidate
} dcache_inval_t;
typedef struct packed {
dcache_out_t rtype; // see definitions above
dcache_out_t rtype; // see definitions above
logic [2:0] size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte)
logic [L1D_WAY_WIDTH-1:0] way; // way to replace
logic [riscv::PLEN-1:0] paddr; // physical address
riscv::xlen_t data; // word width of processor (no block stores at the moment)
logic [L1D_WAY_WIDTH-1:0] way; // way to replace
logic [riscv::PLEN-1:0] paddr; // physical address
riscv::xlen_t data; // word width of processor (no block stores at the moment)
logic [ariane_pkg::DATA_USER_WIDTH-1:0] user; // user width of processor (no block stores at the moment)
logic nc; // noncacheable
logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
ariane_pkg::amo_t amo_op; // amo opcode
logic nc; // noncacheable
logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
ariane_pkg::amo_t amo_op; // amo opcode
} dcache_req_t;
typedef struct packed {
dcache_in_t rtype; // see definitions above
logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // full cache line width
logic [ariane_pkg::DCACHE_USER_LINE_WIDTH-1:0] user; // user bits
dcache_inval_t inv; // invalidation vector
logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
dcache_in_t rtype; // see definitions above
logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // full cache line width
logic [ariane_pkg::DCACHE_USER_LINE_WIDTH-1:0] user; // user bits
dcache_inval_t inv; // invalidation vector
logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
} dcache_rtrn_t;
// taken from iop.h in openpiton
// to l1.5 (only marked subset is used)
typedef enum logic [4:0] {
L15_LOAD_RQ = 5'b00000, // load request
L15_IMISS_RQ = 5'b10000, // instruction fill request
L15_STORE_RQ = 5'b00001, // store request
L15_ATOMIC_RQ = 5'b00110, // atomic op
L15_LOAD_RQ = 5'b00000, // load request
L15_IMISS_RQ = 5'b10000, // instruction fill request
L15_STORE_RQ = 5'b00001, // store request
L15_ATOMIC_RQ = 5'b00110, // atomic op
//L15_CAS1_RQ = 5'b00010, // compare and swap1 packet (OpenSparc atomics)
//L15_CAS2_RQ = 5'b00011, // compare and swap2 packet (OpenSparc atomics)
//L15_SWAP_RQ = 5'b00110, // swap packet (OpenSparc atomics)
L15_STRLOAD_RQ = 5'b00100, // unused
L15_STRST_RQ = 5'b00101, // unused
L15_STQ_RQ = 5'b00111, // unused
L15_INT_RQ = 5'b01001, // interrupt request
L15_FWD_RQ = 5'b01101, // unused
L15_FWD_RPY = 5'b01110, // unused
L15_RSVD_RQ = 5'b11111 // unused
L15_STRLOAD_RQ = 5'b00100, // unused
L15_STRST_RQ = 5'b00101, // unused
L15_STQ_RQ = 5'b00111, // unused
L15_INT_RQ = 5'b01001, // interrupt request
L15_FWD_RQ = 5'b01101, // unused
L15_FWD_RPY = 5'b01110, // unused
L15_RSVD_RQ = 5'b11111 // unused
} l15_reqtypes_t;
// from l1.5 (only marked subset is used)
typedef enum logic [3:0] {
L15_LOAD_RET = 4'b0000, // load packet
L15_LOAD_RET = 4'b0000, // load packet
// L15_INV_RET = 4'b0011, // invalidate packet, not unique...
L15_ST_ACK = 4'b0100, // store ack packet
L15_ST_ACK = 4'b0100, // store ack packet
//L15_AT_ACK = 4'b0011, // unused, not unique...
L15_INT_RET = 4'b0111, // interrupt packet
L15_TEST_RET = 4'b0101, // unused
L15_FP_RET = 4'b1000, // unused
L15_IFILL_RET = 4'b0001, // instruction fill packet
L15_EVICT_REQ = 4'b0011, // eviction request
L15_ERR_RET = 4'b1100, // unused
L15_STRLOAD_RET = 4'b0010, // unused
L15_STRST_ACK = 4'b0110, // unused
L15_FWD_RQ_RET = 4'b1010, // unused
L15_FWD_RPY_RET = 4'b1011, // unused
L15_RSVD_RET = 4'b1111, // unused
L15_CPX_RESTYPE_ATOMIC_RES = 4'b1110 // custom type for atomic responses
L15_INT_RET = 4'b0111, // interrupt packet
L15_TEST_RET = 4'b0101, // unused
L15_FP_RET = 4'b1000, // unused
L15_IFILL_RET = 4'b0001, // instruction fill packet
L15_EVICT_REQ = 4'b0011, // eviction request
L15_ERR_RET = 4'b1100, // unused
L15_STRLOAD_RET = 4'b0010, // unused
L15_STRST_ACK = 4'b0110, // unused
L15_FWD_RQ_RET = 4'b1010, // unused
L15_FWD_RPY_RET = 4'b1011, // unused
L15_RSVD_RET = 4'b1111, // unused
L15_CPX_RESTYPE_ATOMIC_RES = 4'b1110 // custom type for atomic responses
} l15_rtrntypes_t;
typedef struct packed {
logic l15_val; // valid signal, asserted with request
logic l15_req_ack; // ack for response
l15_reqtypes_t l15_rqtype; // see below for encoding
logic l15_nc; // non-cacheable bit
logic l15_val; // valid signal, asserted with request
logic l15_req_ack; // ack for response
l15_reqtypes_t l15_rqtype; // see below for encoding
logic l15_nc; // non-cacheable bit
logic [2:0] l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte)
logic [L15_TID_WIDTH-1:0] l15_threadid; // currently 0 or 1
logic l15_prefetch; // unused in openpiton
logic l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment
logic l15_blockstore; // unused in openpiton
logic l15_blockinitstore; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_l1rplway; // way to replace
logic [39:0] l15_address; // physical address
logic [63:0] l15_data; // word to write
logic [63:0] l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests)
logic [L15_TLB_CSM_WIDTH-1:0] l15_csm_data; // unused in Ariane
logic [3:0] l15_amo_op; // atomic operation type
logic [L15_TID_WIDTH-1:0] l15_threadid; // currently 0 or 1
logic l15_prefetch; // unused in openpiton
logic l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment
logic l15_blockstore; // unused in openpiton
logic l15_blockinitstore; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_l1rplway; // way to replace
logic [39:0] l15_address; // physical address
logic [63:0] l15_data; // word to write
logic [63:0] l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests)
logic [L15_TLB_CSM_WIDTH-1:0] l15_csm_data; // unused in Ariane
logic [3:0] l15_amo_op; // atomic operation type
} l15_req_t;
typedef struct packed {
logic l15_ack; // ack for request struct
logic l15_header_ack; // ack for request struct
logic l15_val; // valid signal for return struct
l15_rtrntypes_t l15_returntype; // see below for encoding
logic l15_l2miss; // unused in Ariane
logic [1:0] l15_error; // unused in openpiton
logic l15_noncacheable; // non-cacheable bit
logic l15_atomic; // asserted in load return and store ack packets of atomic tx
logic [L15_TID_WIDTH-1:0] l15_threadid; // used as transaction ID
logic l15_prefetch; // unused in openpiton
logic l15_f4b; // 4byte instruction fill from I/O space (nc).
logic [63:0] l15_data_0; // used for both caches
logic [63:0] l15_data_1; // used for both caches
logic [63:0] l15_data_2; // currently only used for I$
logic [63:0] l15_data_3; // currently only used for I$
logic l15_inval_icache_all_way; // invalidate all ways
logic l15_inval_dcache_all_way; // unused in openpiton
logic [15:4] l15_inval_address_15_4; // invalidate selected cacheline
logic l15_cross_invalidate; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_cross_invalidate_way; // unused in openpiton
logic l15_inval_dcache_inval; // invalidate selected cacheline and way
logic l15_inval_icache_inval; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_inval_way; // way to invalidate
logic l15_blockinitstore; // unused in openpiton
logic l15_ack; // ack for request struct
logic l15_header_ack; // ack for request struct
logic l15_val; // valid signal for return struct
l15_rtrntypes_t l15_returntype; // see below for encoding
logic l15_l2miss; // unused in Ariane
logic [1:0] l15_error; // unused in openpiton
logic l15_noncacheable; // non-cacheable bit
logic l15_atomic; // asserted in load return and store ack packets of atomic tx
logic [L15_TID_WIDTH-1:0] l15_threadid; // used as transaction ID
logic l15_prefetch; // unused in openpiton
logic l15_f4b; // 4byte instruction fill from I/O space (nc).
logic [63:0] l15_data_0; // used for both caches
logic [63:0] l15_data_1; // used for both caches
logic [63:0] l15_data_2; // currently only used for I$
logic [63:0] l15_data_3; // currently only used for I$
logic l15_inval_icache_all_way; // invalidate all ways
logic l15_inval_dcache_all_way; // unused in openpiton
logic [15:4] l15_inval_address_15_4; // invalidate selected cacheline
logic l15_cross_invalidate; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_cross_invalidate_way; // unused in openpiton
logic l15_inval_dcache_inval; // invalidate selected cacheline and way
logic l15_inval_icache_inval; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_inval_way; // way to invalidate
logic l15_blockinitstore; // unused in openpiton
} l15_rtrn_t;
// swap endianess in a 64bit word
function automatic logic[63:0] swendian64(input logic[63:0] in);
automatic logic[63:0] out;
for(int k=0; k<64;k+=8)begin
out[k +: 8] = in[63-k -: 8];
function automatic logic [63:0] swendian64(input logic [63:0] in);
automatic logic [63:0] out;
for (int k = 0; k < 64; k += 8) begin
out[k+:8] = in[63-k-:8];
end
return out;
endfunction
function automatic logic [5:0] popcnt64 (
input logic [63:0] in
);
logic [5:0] cnt= 0;
function automatic logic [5:0] popcnt64(input logic [63:0] in);
logic [5:0] cnt = 0;
foreach (in[k]) begin
cnt += 6'(in[k]);
end
return cnt;
endfunction : popcnt64
function automatic logic [7:0] to_byte_enable8(
input logic [2:0] offset,
input logic [1:0] size
);
function automatic logic [7:0] to_byte_enable8(input logic [2:0] offset, input logic [1:0] size);
logic [7:0] be;
be = '0;
unique case(size)
2'b00: be[offset] = '1; // byte
2'b01: be[offset +:2 ] = '1; // hword
2'b10: be[offset +:4 ] = '1; // word
default: be = '1; // dword
endcase // size
unique case (size)
2'b00: be[offset] = '1; // byte
2'b01: be[offset+:2] = '1; // hword
2'b10: be[offset+:4] = '1; // word
default: be = '1; // dword
endcase // size
return be;
endfunction : to_byte_enable8
function automatic logic [3:0] to_byte_enable4(
input logic [1:0] offset,
input logic [1:0] size
);
function automatic logic [3:0] to_byte_enable4(input logic [1:0] offset, input logic [1:0] size);
logic [3:0] be;
be = '0;
unique case(size)
2'b00: be[offset] = '1; // byte
2'b01: be[offset +:2 ] = '1; // hword
default: be = '1; // word
endcase // size
unique case (size)
2'b00: be[offset] = '1; // byte
2'b01: be[offset+:2] = '1; // hword
default: be = '1; // word
endcase // size
return be;
endfunction : to_byte_enable4
// openpiton requires the data to be replicated in case of smaller sizes than dwords
function automatic logic [63:0] repData64(
input logic [63:0] data,
input logic [2:0] offset,
input logic [1:0] size
);
function automatic logic [63:0] repData64(input logic [63:0] data, input logic [2:0] offset,
input logic [1:0] size);
logic [63:0] out;
unique case(size)
2'b00: for(int k=0; k<8; k++) out[k*8 +: 8] = data[offset*8 +: 8]; // byte
2'b01: for(int k=0; k<4; k++) out[k*16 +: 16] = data[offset*8 +: 16]; // hword
2'b10: for(int k=0; k<2; k++) out[k*32 +: 32] = data[offset*8 +: 32]; // word
default: out = data; // dword
endcase // size
unique case (size)
2'b00: for (int k = 0; k < 8; k++) out[k*8+:8] = data[offset*8+:8]; // byte
2'b01: for (int k = 0; k < 4; k++) out[k*16+:16] = data[offset*8+:16]; // hword
2'b10: for (int k = 0; k < 2; k++) out[k*32+:32] = data[offset*8+:32]; // word
default: out = data; // dword
endcase // size
return out;
endfunction : repData64
function automatic logic [31:0] repData32(
input logic [31:0] data,
input logic [1:0] offset,
input logic [1:0] size
);
function automatic logic [31:0] repData32(input logic [31:0] data, input logic [1:0] offset,
input logic [1:0] size);
logic [31:0] out;
unique case(size)
2'b00: for(int k=0; k<4; k++) out[k*8 +: 8] = data[offset*8 +: 8]; // byte
2'b01: for(int k=0; k<2; k++) out[k*16 +: 16] = data[offset*8 +: 16]; // hword
default: out = data; // word
endcase // size
unique case (size)
2'b00: for (int k = 0; k < 4; k++) out[k*8+:8] = data[offset*8+:8]; // byte
2'b01: for (int k = 0; k < 2; k++) out[k*16+:16] = data[offset*8+:16]; // hword
default: out = data; // word
endcase // size
return out;
endfunction : repData32
// note: this is openpiton specific. cannot transmit unaligned words.
// hence we default to individual bytes in that case, and they have to be transmitted
// one after the other
function automatic logic [1:0] toSize64(
input logic [7:0] be
);
function automatic logic [1:0] toSize64(input logic [7:0] be);
logic [1:0] size;
unique case(be)
unique case (be)
8'b1111_1111: size = 2'b11; // dword
8'b0000_1111, 8'b1111_0000: size = 2'b10; // word
8'b1100_0000, 8'b0011_0000, 8'b0000_1100, 8'b0000_0011: size = 2'b01; // hword
default: size = 2'b00; // individual bytes
endcase // be
8'b0000_1111, 8'b1111_0000: size = 2'b10; // word
8'b1100_0000, 8'b0011_0000, 8'b0000_1100, 8'b0000_0011: size = 2'b01; // hword
default: size = 2'b00; // individual bytes
endcase // be
return size;
endfunction : toSize64
function automatic logic [1:0] toSize32(
input logic [3:0] be
);
function automatic logic [1:0] toSize32(input logic [3:0] be);
logic [1:0] size;
unique case(be)
4'b1111: size = 2'b10; // word
4'b1100, 4'b0011: size = 2'b01; // hword
default: size = 2'b00; // individual bytes
endcase // be
unique case (be)
4'b1111: size = 2'b10; // word
4'b1100, 4'b0011: size = 2'b01; // hword
default: size = 2'b00; // individual bytes
endcase // be
return size;
endfunction : toSize32

View file

@ -20,340 +20,342 @@
// instruction e.g. a branch.
module instr_realign import ariane_pkg::*; #(
module instr_realign
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic valid_i,
output logic serving_unaligned_o, // we have an unaligned instruction in [0]
input logic [riscv::VLEN-1:0] address_i,
input logic [FETCH_WIDTH-1:0] data_i,
output logic [INSTR_PER_FETCH-1:0] valid_o,
output logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_o,
output logic [INSTR_PER_FETCH-1:0][31:0] instr_o
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic valid_i,
output logic serving_unaligned_o, // we have an unaligned instruction in [0]
input logic [riscv::VLEN-1:0] address_i,
input logic [FETCH_WIDTH-1:0] data_i,
output logic [INSTR_PER_FETCH-1:0] valid_o,
output logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_o,
output logic [INSTR_PER_FETCH-1:0][31:0] instr_o
);
// as a maximum we support a fetch width of 64-bit, hence there can be 4 compressed instructions
logic [3:0] instr_is_compressed;
// as a maximum we support a fetch width of 64-bit, hence there can be 4 compressed instructions
logic [3:0] instr_is_compressed;
for (genvar i = 0; i < INSTR_PER_FETCH; i ++) begin
// LSB != 2'b11
assign instr_is_compressed[i] = ~&data_i[i * 16 +: 2];
end
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
// LSB != 2'b11
assign instr_is_compressed[i] = ~&data_i[i*16+:2];
end
// save the unaligned part of the instruction to this ff
logic [15:0] unaligned_instr_d, unaligned_instr_q;
// the last instruction was unaligned
logic unaligned_d, unaligned_q;
// register to save the unaligned address
logic [riscv::VLEN-1:0] unaligned_address_d, unaligned_address_q;
// we have an unaligned instruction
assign serving_unaligned_o = unaligned_q;
// save the unaligned part of the instruction to this ff
logic [15:0] unaligned_instr_d, unaligned_instr_q;
// the last instruction was unaligned
logic unaligned_d, unaligned_q;
// register to save the unaligned address
logic [riscv::VLEN-1:0] unaligned_address_d, unaligned_address_q;
// we have an unaligned instruction
assign serving_unaligned_o = unaligned_q;
// Instruction re-alignment
if (FETCH_WIDTH == 32) begin : realign_bp_32
always_comb begin : re_align
unaligned_d = unaligned_q;
unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
unaligned_instr_d = data_i[31:16];
// Instruction re-alignment
if (FETCH_WIDTH == 32) begin : realign_bp_32
always_comb begin : re_align
unaligned_d = unaligned_q;
unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
unaligned_instr_d = data_i[31:16];
valid_o[0] = valid_i;
instr_o[0] = (unaligned_q) ? {data_i[15:0], unaligned_instr_q} : data_i[31:0];
addr_o[0] = (unaligned_q) ? unaligned_address_q : address_i;
valid_o[0] = valid_i;
instr_o[0] = (unaligned_q) ? {data_i[15:0], unaligned_instr_q} : data_i[31:0];
addr_o[0] = (unaligned_q) ? unaligned_address_q : address_i;
valid_o[1] = 1'b0;
instr_o[1] = '0;
addr_o[1] = {address_i[riscv::VLEN-1:2], 2'b10};
valid_o[1] = 1'b0;
instr_o[1] = '0;
addr_o[1] = {address_i[riscv::VLEN-1:2], 2'b10};
// this instruction is compressed or the last instruction was unaligned
if (instr_is_compressed[0] || unaligned_q) begin
// check if this is instruction is still unaligned e.g.: it is not compressed
// if its compressed re-set unaligned flag
// for 32 bit we can simply check the next instruction and whether it is compressed or not
// if it is compressed the next fetch will contain an aligned instruction
// is instruction 1 also compressed
// yes? -> no problem, no -> we've got an unaligned instruction
if (instr_is_compressed[1]) begin
unaligned_d = 1'b0;
valid_o[1] = valid_i;
instr_o[1] = {16'b0, data_i[31:16]};
end else begin
// save the upper bits for next cycle
unaligned_d = 1'b1;
unaligned_instr_d = data_i[31:16];
unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
end
end // else -> normal fetch
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
// received the next instruction
if (valid_i && address_i[1]) begin
// the instruction is not compressed so we can't do anything in this cycle
if (!instr_is_compressed[0]) begin
valid_o = '0;
unaligned_d = 1'b1;
unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
unaligned_instr_d = data_i[15:0];
// the instruction isn't compressed but only the lower is ready
end else begin
valid_o = 1'b1;
end
end
end
// TODO(zarubaf): Fix 64 bit FETCH_WIDTH, maybe generalize to arbitrary fetch width
end else if (FETCH_WIDTH == 64) begin : realign_bp_64
initial begin
$error("Not propperly implemented");
end
always_comb begin : re_align
unaligned_d = unaligned_q;
unaligned_address_d = unaligned_address_q;
unaligned_instr_d = unaligned_instr_q;
valid_o = '0;
valid_o[0] = valid_i;
instr_o[0] = data_i[31:0];
addr_o[0] = address_i;
instr_o[1] = '0;
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b010};
instr_o[2] = {16'b0, data_i[47:32]};
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b100};
instr_o[3] = {16'b0, data_i[63:48]};
addr_o[3] = {address_i[riscv::VLEN-1:3], 3'b110};
// last instruction was unaligned
if (unaligned_q) begin
instr_o[0] = {data_i[15:0], unaligned_instr_q};
addr_o[0] = unaligned_address_q;
// for 64 bit there exist the following options:
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | I | U | -> again unaligned
// | * | C | I | U | -> aligned
// | * | I | C | U | -> aligned
// | I | C | C | U | -> again unaligned
// | * | C | C | C | U | -> aligned
// Legend: C = compressed, I = 32 bit instruction, U = unaligned upper half
// * = don't care
if (instr_is_compressed[1]) begin
instr_o[1] = {16'b0, data_i[31:16]};
valid_o[1] = valid_i;
if (instr_is_compressed[2]) begin
if (instr_is_compressed[3]) begin
unaligned_d = 1'b0;
valid_o[3] = valid_i;
end else begin
// continues to be unaligned
end
end else begin
unaligned_d = 1'b0;
instr_o[2] = data_i[63:32];
valid_o[2] = valid_i;
end
// instruction 1 is not compressed
end else begin
instr_o[1] = data_i[47:16];
valid_o[1] = valid_i;
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[2]) begin
unaligned_d = 1'b0;
instr_o[2] = {16'b0, data_i[63:48]};
valid_o[2] = valid_i;
end else begin
// continues to be unaligned
end
end
end else if (instr_is_compressed[0]) begin // instruction zero is RVC
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | I | C | -> again unaligned
// | * | C | I | C | -> aligned
// | * | I | C | C | -> aligned
// | I | C | C | C | -> again unaligned
// | * | C | C | C | C | -> aligned
if (instr_is_compressed[1]) begin
instr_o[1] = {16'b0, data_i[31:16]};
valid_o[1] = valid_i;
if (instr_is_compressed[2]) begin
valid_o[2] = valid_i;
if (instr_is_compressed[3]) begin
valid_o[3] = valid_i;
end else begin
// this instruction is unaligned
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[3];
end
end else begin
instr_o[2] = data_i[63:32];
valid_o[2] = valid_i;
end
// instruction 1 is not compressed -> check slot 3
end else begin
instr_o[1] = data_i[47:16];
valid_o[1] = valid_i;
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[3]) begin
instr_o[2] = data_i[63:48];
valid_o[2] = valid_i;
end else begin
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[2];
end
end
// Full instruction in slot zero
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | C | I |
// | * | C | C | I |
// | * | I | I |
end else begin
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
if (instr_is_compressed[2]) begin
instr_o[1] = {16'b0, data_i[47:32]};
valid_o[1] = valid_i;
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[3]) begin
// | * | C | C | I |
valid_o[2] = valid_i;
addr_o[2] = {16'b0, data_i[63:48]};
end else begin
// this instruction is unaligned
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[2];
end
end else begin
// two regular instructions back-to-back
instr_o[1] = data_i[63:32];
valid_o[1] = valid_i;
end
end
// --------------------------
// Unaligned fetch
// --------------------------
// Address was not 64 bit aligned
case (address_i[2:1])
// this means the previouse instruction was either compressed or unaligned
// in any case we don't ccare
2'b01: begin
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | I | x -> again unaligned
// | * | C | I | x -> aligned
// | * | I | C | x -> aligned
// | I | C | C | x -> again unaligned
// | * | C | C | C | x -> aligned
addr_o[0] = {address_i[riscv::VLEN-1:3], 3'b010};
if (instr_is_compressed[1]) begin
instr_o[0] = {16'b0, data_i[31:16]};
valid_o[0] = valid_i;
if (instr_is_compressed[2]) begin
valid_o[1] = valid_i;
instr_o[1] = {16'b0, data_i[47:32]};
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
if (instr_is_compressed[3]) begin
instr_o[2] = {16'b0, data_i[63:48]};
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
valid_o[2] = valid_i;
end else begin
// this instruction is unaligned
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[3];
end
end else begin
instr_o[1] = data_i[63:32];
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
valid_o[1] = valid_i;
end
// instruction 1 is not compressed -> check slot 3
end else begin
instr_o[0] = data_i[47:16];
valid_o[0] = valid_i;
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[3]) begin
instr_o[1] = data_i[63:48];
valid_o[1] = valid_i;
end else begin
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[1];
end
end
end
2'b10: begin
valid_o = '0;
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | C | * | <- unaligned
// | C | C | * | <- aligned
// | I | * | <- aligned
if (instr_is_compressed[2]) begin
valid_o[0] = valid_i;
instr_o[0] = data_i[47:32];
// second instruction is also compressed
if (instr_is_compressed[3]) begin
valid_o[1] = valid_i;
instr_o[1] = data_i[63:48];
// regular instruction -> unaligned
end else begin
unaligned_d = 1'b1;
unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110};
unaligned_instr_d = data_i[63:48];
end
// instruction is a regular instruction
end else begin
valid_o[0] = valid_i;
instr_o[0] = data_i[63:32];
addr_o[0] = address_i;
end
end
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
// received the next instruction
2'b11: begin
valid_o = '0;
if (!instr_is_compressed[3]) begin
unaligned_d = 1'b1;
unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110};
unaligned_instr_d = data_i[63:48];
end else begin
valid_o[3] = valid_i;
end
end
endcase
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
unaligned_q <= 1'b0;
unaligned_address_q <= '0;
unaligned_instr_q <= '0;
// this instruction is compressed or the last instruction was unaligned
if (instr_is_compressed[0] || unaligned_q) begin
// check if this is instruction is still unaligned e.g.: it is not compressed
// if its compressed re-set unaligned flag
// for 32 bit we can simply check the next instruction and whether it is compressed or not
// if it is compressed the next fetch will contain an aligned instruction
// is instruction 1 also compressed
// yes? -> no problem, no -> we've got an unaligned instruction
if (instr_is_compressed[1]) begin
unaligned_d = 1'b0;
valid_o[1] = valid_i;
instr_o[1] = {16'b0, data_i[31:16]};
end else begin
if (valid_i) begin
unaligned_address_q <= unaligned_address_d;
unaligned_instr_q <= unaligned_instr_d;
end
if (flush_i) begin
unaligned_q <= 1'b0;
end else if (valid_i) begin
unaligned_q <= unaligned_d;
end
// save the upper bits for next cycle
unaligned_d = 1'b1;
unaligned_instr_d = data_i[31:16];
unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
end
end // else -> normal fetch
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
// received the next instruction
if (valid_i && address_i[1]) begin
// the instruction is not compressed so we can't do anything in this cycle
if (!instr_is_compressed[0]) begin
valid_o = '0;
unaligned_d = 1'b1;
unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
unaligned_instr_d = data_i[15:0];
// the instruction isn't compressed but only the lower is ready
end else begin
valid_o = 1'b1;
end
end
end
// TODO(zarubaf): Fix 64 bit FETCH_WIDTH, maybe generalize to arbitrary fetch width
end else if (FETCH_WIDTH == 64) begin : realign_bp_64
initial begin
$error("Not propperly implemented");
end
always_comb begin : re_align
unaligned_d = unaligned_q;
unaligned_address_d = unaligned_address_q;
unaligned_instr_d = unaligned_instr_q;
valid_o = '0;
valid_o[0] = valid_i;
instr_o[0] = data_i[31:0];
addr_o[0] = address_i;
instr_o[1] = '0;
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b010};
instr_o[2] = {16'b0, data_i[47:32]};
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b100};
instr_o[3] = {16'b0, data_i[63:48]};
addr_o[3] = {address_i[riscv::VLEN-1:3], 3'b110};
// last instruction was unaligned
if (unaligned_q) begin
instr_o[0] = {data_i[15:0], unaligned_instr_q};
addr_o[0] = unaligned_address_q;
// for 64 bit there exist the following options:
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | I | U | -> again unaligned
// | * | C | I | U | -> aligned
// | * | I | C | U | -> aligned
// | I | C | C | U | -> again unaligned
// | * | C | C | C | U | -> aligned
// Legend: C = compressed, I = 32 bit instruction, U = unaligned upper half
// * = don't care
if (instr_is_compressed[1]) begin
instr_o[1] = {16'b0, data_i[31:16]};
valid_o[1] = valid_i;
if (instr_is_compressed[2]) begin
if (instr_is_compressed[3]) begin
unaligned_d = 1'b0;
valid_o[3] = valid_i;
end else begin
// continues to be unaligned
end
end else begin
unaligned_d = 1'b0;
instr_o[2] = data_i[63:32];
valid_o[2] = valid_i;
end
// instruction 1 is not compressed
end else begin
instr_o[1] = data_i[47:16];
valid_o[1] = valid_i;
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[2]) begin
unaligned_d = 1'b0;
instr_o[2] = {16'b0, data_i[63:48]};
valid_o[2] = valid_i;
end else begin
// continues to be unaligned
end
end
end else if (instr_is_compressed[0]) begin // instruction zero is RVC
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | I | C | -> again unaligned
// | * | C | I | C | -> aligned
// | * | I | C | C | -> aligned
// | I | C | C | C | -> again unaligned
// | * | C | C | C | C | -> aligned
if (instr_is_compressed[1]) begin
instr_o[1] = {16'b0, data_i[31:16]};
valid_o[1] = valid_i;
if (instr_is_compressed[2]) begin
valid_o[2] = valid_i;
if (instr_is_compressed[3]) begin
valid_o[3] = valid_i;
end else begin
// this instruction is unaligned
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[3];
end
end else begin
instr_o[2] = data_i[63:32];
valid_o[2] = valid_i;
end
// instruction 1 is not compressed -> check slot 3
end else begin
instr_o[1] = data_i[47:16];
valid_o[1] = valid_i;
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[3]) begin
instr_o[2] = data_i[63:48];
valid_o[2] = valid_i;
end else begin
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[2];
end
end
// Full instruction in slot zero
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | C | I |
// | * | C | C | I |
// | * | I | I |
end else begin
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
if (instr_is_compressed[2]) begin
instr_o[1] = {16'b0, data_i[47:32]};
valid_o[1] = valid_i;
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[3]) begin
// | * | C | C | I |
valid_o[2] = valid_i;
addr_o[2] = {16'b0, data_i[63:48]};
end else begin
// this instruction is unaligned
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[2];
end
end else begin
// two regular instructions back-to-back
instr_o[1] = data_i[63:32];
valid_o[1] = valid_i;
end
end
// --------------------------
// Unaligned fetch
// --------------------------
// Address was not 64 bit aligned
case (address_i[2:1])
// this means the previouse instruction was either compressed or unaligned
// in any case we don't ccare
2'b01: begin
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | I | x -> again unaligned
// | * | C | I | x -> aligned
// | * | I | C | x -> aligned
// | I | C | C | x -> again unaligned
// | * | C | C | C | x -> aligned
addr_o[0] = {address_i[riscv::VLEN-1:3], 3'b010};
if (instr_is_compressed[1]) begin
instr_o[0] = {16'b0, data_i[31:16]};
valid_o[0] = valid_i;
if (instr_is_compressed[2]) begin
valid_o[1] = valid_i;
instr_o[1] = {16'b0, data_i[47:32]};
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
if (instr_is_compressed[3]) begin
instr_o[2] = {16'b0, data_i[63:48]};
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
valid_o[2] = valid_i;
end else begin
// this instruction is unaligned
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[3];
end
end else begin
instr_o[1] = data_i[63:32];
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
valid_o[1] = valid_i;
end
// instruction 1 is not compressed -> check slot 3
end else begin
instr_o[0] = data_i[47:16];
valid_o[0] = valid_i;
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[3]) begin
instr_o[1] = data_i[63:48];
valid_o[1] = valid_i;
end else begin
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[1];
end
end
end
2'b10: begin
valid_o = '0;
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | C | * | <- unaligned
// | C | C | * | <- aligned
// | I | * | <- aligned
if (instr_is_compressed[2]) begin
valid_o[0] = valid_i;
instr_o[0] = data_i[47:32];
// second instruction is also compressed
if (instr_is_compressed[3]) begin
valid_o[1] = valid_i;
instr_o[1] = data_i[63:48];
// regular instruction -> unaligned
end else begin
unaligned_d = 1'b1;
unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110};
unaligned_instr_d = data_i[63:48];
end
// instruction is a regular instruction
end else begin
valid_o[0] = valid_i;
instr_o[0] = data_i[63:32];
addr_o[0] = address_i;
end
end
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
// received the next instruction
2'b11: begin
valid_o = '0;
if (!instr_is_compressed[3]) begin
unaligned_d = 1'b1;
unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110};
unaligned_instr_d = data_i[63:48];
end else begin
valid_o[3] = valid_i;
end
end
endcase
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
unaligned_q <= 1'b0;
unaligned_address_q <= '0;
unaligned_instr_q <= '0;
end else begin
if (valid_i) begin
unaligned_address_q <= unaligned_address_d;
unaligned_instr_q <= unaligned_instr_d;
end
if (flush_i) begin
unaligned_q <= 1'b0;
end else if (valid_i) begin
unaligned_q <= unaligned_d;
end
end
end
endmodule

File diff suppressed because it is too large Load diff

View file

@ -14,196 +14,198 @@
// in a scoreboard like data-structure.
module issue_stage import ariane_pkg::*; #(
module issue_stage
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter bit IsRVFI = bit'(0),
parameter int unsigned NR_ENTRIES = 8
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
output logic sb_full_o,
input logic flush_unissued_instr_i,
input logic flush_i,
input logic stall_i, // Stall issue stage
output logic sb_full_o,
input logic flush_unissued_instr_i,
input logic flush_i,
input logic stall_i, // Stall issue stage
// from ISSUE
input scoreboard_entry_t decoded_instr_i,
input logic decoded_instr_valid_i,
input logic is_ctrl_flow_i,
output logic decoded_instr_ack_o,
input scoreboard_entry_t decoded_instr_i,
input logic decoded_instr_valid_i,
input logic is_ctrl_flow_i,
output logic decoded_instr_ack_o,
// to EX
output [riscv::VLEN-1:0] rs1_forwarding_o, // unregistered version of fu_data_o.operanda
output [riscv::VLEN-1:0] rs2_forwarding_o, // unregistered version of fu_data_o.operandb
output fu_data_t fu_data_o,
output logic [riscv::VLEN-1:0] pc_o,
output logic is_compressed_instr_o,
input logic flu_ready_i,
output logic alu_valid_o,
output [riscv::VLEN-1:0] rs1_forwarding_o, // unregistered version of fu_data_o.operanda
output [riscv::VLEN-1:0] rs2_forwarding_o, // unregistered version of fu_data_o.operandb
output fu_data_t fu_data_o,
output logic [riscv::VLEN-1:0] pc_o,
output logic is_compressed_instr_o,
input logic flu_ready_i,
output logic alu_valid_o,
// ex just resolved our predicted branch, we are ready to accept new requests
input logic resolve_branch_i,
input logic resolve_branch_i,
input logic lsu_ready_i,
output logic lsu_valid_o,
input logic lsu_ready_i,
output logic lsu_valid_o,
// branch prediction
output logic branch_valid_o, // use branch prediction unit
output branchpredict_sbe_t branch_predict_o, // Branch predict Out
output logic branch_valid_o, // use branch prediction unit
output branchpredict_sbe_t branch_predict_o, // Branch predict Out
output logic mult_valid_o,
output logic mult_valid_o,
input logic fpu_ready_i,
output logic fpu_valid_o,
output logic [1:0] fpu_fmt_o, // FP fmt field from instr.
output logic [2:0] fpu_rm_o, // FP rm field from instr.
input logic fpu_ready_i,
output logic fpu_valid_o,
output logic [1:0] fpu_fmt_o, // FP fmt field from instr.
output logic [2:0] fpu_rm_o, // FP rm field from instr.
output logic csr_valid_o,
output logic csr_valid_o,
// CVXIF
//Issue interface
output logic x_issue_valid_o,
input logic x_issue_ready_i,
output logic [31:0] x_off_instr_o,
output logic x_issue_valid_o,
input logic x_issue_ready_i,
output logic [31:0] x_off_instr_o,
// to accelerator dispatcher
output scoreboard_entry_t issue_instr_o,
output logic issue_instr_hs_o,
output scoreboard_entry_t issue_instr_o,
output logic issue_instr_hs_o,
// write back port
input logic [CVA6Cfg.NrWbPorts-1:0][TRANS_ID_BITS-1:0] trans_id_i,
input bp_resolve_t resolved_branch_i,
input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i,
input bp_resolve_t resolved_branch_i,
input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i,
input exception_t [CVA6Cfg.NrWbPorts-1:0] ex_ex_i, // exception from execute stage or CVXIF offloaded instruction
input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i,
input logic x_we_i,
input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i,
input logic x_we_i,
// commit port
input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_i,
output scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o,
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
output scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o,
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
output logic stall_issue_o, // Used in Performance Counters
output logic stall_issue_o, // Used in Performance Counters
//RVFI
input [riscv::VLEN-1:0] lsu_addr_i,
input [(riscv::XLEN/8)-1:0] lsu_rmask_i,
input [(riscv::XLEN/8)-1:0] lsu_wmask_i,
input [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_i
input [ riscv::VLEN-1:0] lsu_addr_i,
input [ (riscv::XLEN/8)-1:0] lsu_rmask_i,
input [ (riscv::XLEN/8)-1:0] lsu_wmask_i,
input [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_i
);
// ---------------------------------------------------
// Scoreboard (SB) <-> Issue and Read Operands (IRO)
// ---------------------------------------------------
typedef logic [(CVA6Cfg.NrRgprPorts == 3 ? riscv::XLEN : CVA6Cfg.FLen)-1:0] rs3_len_t;
// ---------------------------------------------------
// Scoreboard (SB) <-> Issue and Read Operands (IRO)
// ---------------------------------------------------
typedef logic [(CVA6Cfg.NrRgprPorts == 3 ? riscv::XLEN : CVA6Cfg.FLen)-1:0] rs3_len_t;
fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro;
fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro;
fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro;
fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro;
logic [REG_ADDR_SIZE-1:0] rs1_iro_sb;
riscv::xlen_t rs1_sb_iro;
logic rs1_valid_sb_iro;
logic [ REG_ADDR_SIZE-1:0] rs1_iro_sb;
riscv::xlen_t rs1_sb_iro;
logic rs1_valid_sb_iro;
logic [REG_ADDR_SIZE-1:0] rs2_iro_sb;
riscv::xlen_t rs2_sb_iro;
logic rs2_valid_iro_sb;
logic [ REG_ADDR_SIZE-1:0] rs2_iro_sb;
riscv::xlen_t rs2_sb_iro;
logic rs2_valid_iro_sb;
logic [REG_ADDR_SIZE-1:0] rs3_iro_sb;
rs3_len_t rs3_sb_iro;
logic rs3_valid_iro_sb;
logic [ REG_ADDR_SIZE-1:0] rs3_iro_sb;
rs3_len_t rs3_sb_iro;
logic rs3_valid_iro_sb;
scoreboard_entry_t issue_instr_sb_iro;
logic issue_instr_valid_sb_iro;
logic issue_ack_iro_sb;
scoreboard_entry_t issue_instr_sb_iro;
logic issue_instr_valid_sb_iro;
logic issue_ack_iro_sb;
riscv::xlen_t rs1_forwarding_xlen;
riscv::xlen_t rs2_forwarding_xlen;
riscv::xlen_t rs1_forwarding_xlen;
riscv::xlen_t rs2_forwarding_xlen;
assign rs1_forwarding_o = rs1_forwarding_xlen[riscv::VLEN-1:0];
assign rs2_forwarding_o = rs2_forwarding_xlen[riscv::VLEN-1:0];
assign rs1_forwarding_o = rs1_forwarding_xlen[riscv::VLEN-1:0];
assign rs2_forwarding_o = rs2_forwarding_xlen[riscv::VLEN-1:0];
assign issue_instr_o = issue_instr_sb_iro;
assign issue_instr_hs_o = issue_instr_valid_sb_iro & issue_ack_iro_sb;
assign issue_instr_o = issue_instr_sb_iro;
assign issue_instr_hs_o = issue_instr_valid_sb_iro & issue_ack_iro_sb;
// ---------------------------------------------------------
// 2. Manage instructions in a scoreboard
// ---------------------------------------------------------
scoreboard #(
.CVA6Cfg ( CVA6Cfg ),
.IsRVFI ( IsRVFI ),
.rs3_len_t ( rs3_len_t ),
.NR_ENTRIES (NR_ENTRIES )
) i_scoreboard (
.sb_full_o ( sb_full_o ),
.unresolved_branch_i ( 1'b0 ),
.rd_clobber_gpr_o ( rd_clobber_gpr_sb_iro ),
.rd_clobber_fpr_o ( rd_clobber_fpr_sb_iro ),
.rs1_i ( rs1_iro_sb ),
.rs1_o ( rs1_sb_iro ),
.rs1_valid_o ( rs1_valid_sb_iro ),
.rs2_i ( rs2_iro_sb ),
.rs2_o ( rs2_sb_iro ),
.rs2_valid_o ( rs2_valid_iro_sb ),
.rs3_i ( rs3_iro_sb ),
.rs3_o ( rs3_sb_iro ),
.rs3_valid_o ( rs3_valid_iro_sb ),
// ---------------------------------------------------------
// 2. Manage instructions in a scoreboard
// ---------------------------------------------------------
scoreboard #(
.CVA6Cfg (CVA6Cfg),
.IsRVFI (IsRVFI),
.rs3_len_t (rs3_len_t),
.NR_ENTRIES(NR_ENTRIES)
) i_scoreboard (
.sb_full_o (sb_full_o),
.unresolved_branch_i(1'b0),
.rd_clobber_gpr_o (rd_clobber_gpr_sb_iro),
.rd_clobber_fpr_o (rd_clobber_fpr_sb_iro),
.rs1_i (rs1_iro_sb),
.rs1_o (rs1_sb_iro),
.rs1_valid_o (rs1_valid_sb_iro),
.rs2_i (rs2_iro_sb),
.rs2_o (rs2_sb_iro),
.rs2_valid_o (rs2_valid_iro_sb),
.rs3_i (rs3_iro_sb),
.rs3_o (rs3_sb_iro),
.rs3_valid_o (rs3_valid_iro_sb),
.decoded_instr_i ( decoded_instr_i ),
.decoded_instr_valid_i ( decoded_instr_valid_i ),
.decoded_instr_ack_o ( decoded_instr_ack_o ),
.issue_instr_o ( issue_instr_sb_iro ),
.issue_instr_valid_o ( issue_instr_valid_sb_iro ),
.issue_ack_i ( issue_ack_iro_sb ),
.decoded_instr_i (decoded_instr_i),
.decoded_instr_valid_i(decoded_instr_valid_i),
.decoded_instr_ack_o (decoded_instr_ack_o),
.issue_instr_o (issue_instr_sb_iro),
.issue_instr_valid_o (issue_instr_valid_sb_iro),
.issue_ack_i (issue_ack_iro_sb),
.resolved_branch_i ( resolved_branch_i ),
.trans_id_i ( trans_id_i ),
.wbdata_i ( wbdata_i ),
.ex_i ( ex_ex_i ),
.lsu_addr_i ( lsu_addr_i ),
.lsu_rmask_i ( lsu_rmask_i ),
.lsu_wmask_i ( lsu_wmask_i ),
.lsu_addr_trans_id_i ( lsu_addr_trans_id_i ),
.rs1_forwarding_i ( rs1_forwarding_xlen ),
.rs2_forwarding_i ( rs2_forwarding_xlen ),
.*
);
.resolved_branch_i (resolved_branch_i),
.trans_id_i (trans_id_i),
.wbdata_i (wbdata_i),
.ex_i (ex_ex_i),
.lsu_addr_i (lsu_addr_i),
.lsu_rmask_i (lsu_rmask_i),
.lsu_wmask_i (lsu_wmask_i),
.lsu_addr_trans_id_i(lsu_addr_trans_id_i),
.rs1_forwarding_i (rs1_forwarding_xlen),
.rs2_forwarding_i (rs2_forwarding_xlen),
.*
);
// ---------------------------------------------------------
// 3. Issue instruction and read operand, also commit
// ---------------------------------------------------------
issue_read_operands #(
.CVA6Cfg ( CVA6Cfg ),
.rs3_len_t ( rs3_len_t )
)i_issue_read_operands (
.flush_i ( flush_unissued_instr_i ),
.issue_instr_i ( issue_instr_sb_iro ),
.issue_instr_valid_i ( issue_instr_valid_sb_iro ),
.issue_ack_o ( issue_ack_iro_sb ),
.fu_data_o ( fu_data_o ),
.flu_ready_i ( flu_ready_i ),
.rs1_o ( rs1_iro_sb ),
.rs1_i ( rs1_sb_iro ),
.rs1_valid_i ( rs1_valid_sb_iro ),
.rs2_o ( rs2_iro_sb ),
.rs2_i ( rs2_sb_iro ),
.rs2_valid_i ( rs2_valid_iro_sb ),
.rs3_o ( rs3_iro_sb ),
.rs3_i ( rs3_sb_iro ),
.rs3_valid_i ( rs3_valid_iro_sb ),
.rd_clobber_gpr_i ( rd_clobber_gpr_sb_iro ),
.rd_clobber_fpr_i ( rd_clobber_fpr_sb_iro ),
.alu_valid_o ( alu_valid_o ),
.branch_valid_o ( branch_valid_o ),
.csr_valid_o ( csr_valid_o ),
.cvxif_valid_o ( x_issue_valid_o ),
.cvxif_ready_i ( x_issue_ready_i ),
.cvxif_off_instr_o ( x_off_instr_o ),
.mult_valid_o ( mult_valid_o ),
.rs1_forwarding_o ( rs1_forwarding_xlen ),
.rs2_forwarding_o ( rs2_forwarding_xlen ),
.stall_issue_o ( stall_issue_o ),
.*
);
// ---------------------------------------------------------
// 3. Issue instruction and read operand, also commit
// ---------------------------------------------------------
issue_read_operands #(
.CVA6Cfg (CVA6Cfg),
.rs3_len_t(rs3_len_t)
) i_issue_read_operands (
.flush_i (flush_unissued_instr_i),
.issue_instr_i (issue_instr_sb_iro),
.issue_instr_valid_i(issue_instr_valid_sb_iro),
.issue_ack_o (issue_ack_iro_sb),
.fu_data_o (fu_data_o),
.flu_ready_i (flu_ready_i),
.rs1_o (rs1_iro_sb),
.rs1_i (rs1_sb_iro),
.rs1_valid_i (rs1_valid_sb_iro),
.rs2_o (rs2_iro_sb),
.rs2_i (rs2_sb_iro),
.rs2_valid_i (rs2_valid_iro_sb),
.rs3_o (rs3_iro_sb),
.rs3_i (rs3_sb_iro),
.rs3_valid_i (rs3_valid_iro_sb),
.rd_clobber_gpr_i (rd_clobber_gpr_sb_iro),
.rd_clobber_fpr_i (rd_clobber_fpr_sb_iro),
.alu_valid_o (alu_valid_o),
.branch_valid_o (branch_valid_o),
.csr_valid_o (csr_valid_o),
.cvxif_valid_o (x_issue_valid_o),
.cvxif_ready_i (x_issue_ready_i),
.cvxif_off_instr_o (x_off_instr_o),
.mult_valid_o (mult_valid_o),
.rs1_forwarding_o (rs1_forwarding_xlen),
.rs2_forwarding_o (rs2_forwarding_xlen),
.stall_issue_o (stall_issue_o),
.*
);
endmodule

View file

@ -13,484 +13,486 @@
// Description: Load Store Unit, handles address calculation and memory interface signals
module load_store_unit import ariane_pkg::*; #(
module load_store_unit
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned ASID_WIDTH = 1
)(
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic stall_st_pending_i,
output logic no_st_pending_o,
input logic amo_valid_commit_i,
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic stall_st_pending_i,
output logic no_st_pending_o,
input logic amo_valid_commit_i,
input fu_data_t fu_data_i,
output logic lsu_ready_o, // FU is ready e.g. not busy
input logic lsu_valid_i, // Input is valid
input fu_data_t fu_data_i,
output logic lsu_ready_o, // FU is ready e.g. not busy
input logic lsu_valid_i, // Input is valid
output logic [TRANS_ID_BITS-1:0] load_trans_id_o, // ID of scoreboard entry at which to write back
output riscv::xlen_t load_result_o,
output logic load_valid_o,
output exception_t load_exception_o, // to WB, signal exception status LD exception
output riscv::xlen_t load_result_o,
output logic load_valid_o,
output exception_t load_exception_o, // to WB, signal exception status LD exception
output logic [TRANS_ID_BITS-1:0] store_trans_id_o, // ID of scoreboard entry at which to write back
output riscv::xlen_t store_result_o,
output logic store_valid_o,
output exception_t store_exception_o, // to WB, signal exception status ST exception
output riscv::xlen_t store_result_o,
output logic store_valid_o,
output exception_t store_exception_o, // to WB, signal exception status ST exception
input logic commit_i, // commit the pending store
output logic commit_ready_o, // commit queue is ready to accept another commit request
input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
input logic commit_i, // commit the pending store
output logic commit_ready_o, // commit queue is ready to accept another commit request
input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
input logic enable_translation_i, // enable virtual memory translation
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
input logic enable_translation_i, // enable virtual memory translation
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
// icache translation requests
input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o,
input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o,
input riscv::priv_lvl_t priv_lvl_i, // From CSR register file
input riscv::priv_lvl_t ld_st_priv_lvl_i, // From CSR register file
input logic sum_i, // From CSR register file
input logic mxr_i, // From CSR register file
input logic [riscv::PPNW-1:0] satp_ppn_i, // From CSR register file
input logic [ASID_WIDTH-1:0] asid_i, // From CSR register file
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
input logic flush_tlb_i,
input riscv::priv_lvl_t priv_lvl_i, // From CSR register file
input riscv::priv_lvl_t ld_st_priv_lvl_i, // From CSR register file
input logic sum_i, // From CSR register file
input logic mxr_i, // From CSR register file
input logic [riscv::PPNW-1:0] satp_ppn_i, // From CSR register file
input logic [ ASID_WIDTH-1:0] asid_i, // From CSR register file
input logic [ ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
input logic flush_tlb_i,
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
output logic itlb_miss_o,
output logic dtlb_miss_o,
// interface to dcache
input dcache_req_o_t [2:0] dcache_req_ports_i,
output dcache_req_i_t [2:0] dcache_req_ports_o,
input logic dcache_wbuffer_empty_i,
input logic dcache_wbuffer_not_ni_i,
input dcache_req_o_t [ 2:0] dcache_req_ports_i,
output dcache_req_i_t [ 2:0] dcache_req_ports_o,
input logic dcache_wbuffer_empty_i,
input logic dcache_wbuffer_not_ni_i,
// AMO interface
output amo_req_t amo_req_o,
input amo_resp_t amo_resp_i,
output amo_req_t amo_req_o,
input amo_resp_t amo_resp_i,
// PMP
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
//RVFI
output [riscv::VLEN-1:0] lsu_addr_o,
output [riscv::PLEN-1:0] mem_paddr_o,
output [(riscv::XLEN/8)-1:0] lsu_rmask_o,
output [(riscv::XLEN/8)-1:0] lsu_wmask_o,
output [ riscv::VLEN-1:0] lsu_addr_o,
output [ riscv::PLEN-1:0] mem_paddr_o,
output [ (riscv::XLEN/8)-1:0] lsu_rmask_o,
output [ (riscv::XLEN/8)-1:0] lsu_wmask_o,
output [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_o
);
// data is misaligned
logic data_misaligned;
// --------------------------------------
// 1st register stage - (stall registers)
// --------------------------------------
// those are the signals which are always correct
// e.g.: they keep the value in the stall case
lsu_ctrl_t lsu_ctrl;
// data is misaligned
logic data_misaligned;
// --------------------------------------
// 1st register stage - (stall registers)
// --------------------------------------
// those are the signals which are always correct
// e.g.: they keep the value in the stall case
lsu_ctrl_t lsu_ctrl;
logic pop_st;
logic pop_ld;
logic pop_st;
logic pop_ld;
// ------------------------------
// Address Generation Unit (AGU)
// ------------------------------
// virtual address as calculated by the AGU in the first cycle
logic [riscv::VLEN-1:0] vaddr_i;
riscv::xlen_t vaddr_xlen;
logic overflow;
logic [(riscv::XLEN/8)-1:0] be_i;
// ------------------------------
// Address Generation Unit (AGU)
// ------------------------------
// virtual address as calculated by the AGU in the first cycle
logic [ riscv::VLEN-1:0] vaddr_i;
riscv::xlen_t vaddr_xlen;
logic overflow;
logic [(riscv::XLEN/8)-1:0] be_i;
assign vaddr_xlen = $unsigned($signed(fu_data_i.imm) + $signed(fu_data_i.operand_a));
assign vaddr_i = vaddr_xlen[riscv::VLEN-1:0];
// we work with SV39 or SV32, so if VM is enabled, check that all bits [XLEN-1:38] or [XLEN-1:31] are equal
assign overflow = !((&vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b1 || (|vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b0);
assign vaddr_xlen = $unsigned($signed(fu_data_i.imm) + $signed(fu_data_i.operand_a));
assign vaddr_i = vaddr_xlen[riscv::VLEN-1:0];
// we work with SV39 or SV32, so if VM is enabled, check that all bits [XLEN-1:38] or [XLEN-1:31] are equal
assign overflow = !((&vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b1 || (|vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b0);
logic st_valid_i;
logic ld_valid_i;
logic ld_translation_req;
logic st_translation_req;
logic [riscv::VLEN-1:0] ld_vaddr;
logic [riscv::VLEN-1:0] st_vaddr;
logic translation_req;
logic translation_valid;
logic [riscv::VLEN-1:0] mmu_vaddr;
logic [riscv::PLEN-1:0] mmu_paddr, mmu_vaddr_plen, fetch_vaddr_plen;
exception_t mmu_exception;
logic dtlb_hit;
logic [riscv::PPNW-1:0] dtlb_ppn;
logic st_valid_i;
logic ld_valid_i;
logic ld_translation_req;
logic st_translation_req;
logic [riscv::VLEN-1:0] ld_vaddr;
logic [riscv::VLEN-1:0] st_vaddr;
logic translation_req;
logic translation_valid;
logic [riscv::VLEN-1:0] mmu_vaddr;
logic [riscv::PLEN-1:0] mmu_paddr, mmu_vaddr_plen, fetch_vaddr_plen;
exception_t mmu_exception;
logic dtlb_hit;
logic [ riscv::PPNW-1:0] dtlb_ppn;
logic ld_valid;
logic [TRANS_ID_BITS-1:0] ld_trans_id;
riscv::xlen_t ld_result;
logic st_valid;
logic [TRANS_ID_BITS-1:0] st_trans_id;
riscv::xlen_t st_result;
logic ld_valid;
logic [TRANS_ID_BITS-1:0] ld_trans_id;
riscv::xlen_t ld_result;
logic st_valid;
logic [TRANS_ID_BITS-1:0] st_trans_id;
riscv::xlen_t st_result;
logic [11:0] page_offset;
logic page_offset_matches;
logic [ 11:0] page_offset;
logic page_offset_matches;
exception_t misaligned_exception;
exception_t ld_ex;
exception_t st_ex;
exception_t misaligned_exception;
exception_t ld_ex;
exception_t st_ex;
// -------------------
// MMU e.g.: TLBs/PTW
// -------------------
if (MMU_PRESENT && (riscv::XLEN == 64)) begin : gen_mmu_sv39
mmu #(
.CVA6Cfg ( CVA6Cfg ),
.INSTR_TLB_ENTRIES ( ariane_pkg::INSTR_TLB_ENTRIES ),
.DATA_TLB_ENTRIES ( ariane_pkg::DATA_TLB_ENTRIES ),
.ASID_WIDTH ( ASID_WIDTH )
) i_cva6_mmu (
// misaligned bypass
.misaligned_ex_i ( misaligned_exception ),
.lsu_is_store_i ( st_translation_req ),
.lsu_req_i ( translation_req ),
.lsu_vaddr_i ( mmu_vaddr ),
.lsu_valid_o ( translation_valid ),
.lsu_paddr_o ( mmu_paddr ),
.lsu_exception_o ( mmu_exception ),
.lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request
.lsu_dtlb_ppn_o ( dtlb_ppn ), // send in the same cycle as the request
// connecting PTW to D$ IF
.req_port_i ( dcache_req_ports_i [0] ),
.req_port_o ( dcache_req_ports_o [0] ),
// icache address translation requests
.icache_areq_i ( icache_areq_i ),
.asid_to_be_flushed_i,
.vaddr_to_be_flushed_i,
.icache_areq_o ( icache_areq_o ),
.pmpcfg_i,
.pmpaddr_i,
.*
);
end else if (MMU_PRESENT && (riscv::XLEN == 32)) begin : gen_mmu_sv32
cva6_mmu_sv32 #(
.CVA6Cfg ( CVA6Cfg ),
.INSTR_TLB_ENTRIES ( ariane_pkg::INSTR_TLB_ENTRIES ),
.DATA_TLB_ENTRIES ( ariane_pkg::DATA_TLB_ENTRIES ),
.ASID_WIDTH ( ASID_WIDTH )
) i_cva6_mmu (
// misaligned bypass
.misaligned_ex_i ( misaligned_exception ),
.lsu_is_store_i ( st_translation_req ),
.lsu_req_i ( translation_req ),
.lsu_vaddr_i ( mmu_vaddr ),
.lsu_valid_o ( translation_valid ),
.lsu_paddr_o ( mmu_paddr ),
.lsu_exception_o ( mmu_exception ),
.lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request
.lsu_dtlb_ppn_o ( dtlb_ppn ), // send in the same cycle as the request
// connecting PTW to D$ IF
.req_port_i ( dcache_req_ports_i [0] ),
.req_port_o ( dcache_req_ports_o [0] ),
// icache address translation requests
.icache_areq_i ( icache_areq_i ),
.asid_to_be_flushed_i,
.vaddr_to_be_flushed_i,
.icache_areq_o ( icache_areq_o ),
.pmpcfg_i,
.pmpaddr_i,
.*
);
end else begin : gen_no_mmu
if (riscv::VLEN > riscv::PLEN) begin
assign mmu_vaddr_plen = mmu_vaddr[riscv::PLEN-1:0];
assign fetch_vaddr_plen = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0];
end else begin
assign mmu_vaddr_plen = {{{riscv::PLEN-riscv::VLEN}{1'b0}}, mmu_vaddr};
assign fetch_vaddr_plen = {{{riscv::PLEN-riscv::VLEN}{1'b0}}, icache_areq_i.fetch_vaddr};
end
assign icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
assign icache_areq_o.fetch_paddr = fetch_vaddr_plen;
assign icache_areq_o.fetch_exception = '0;
assign dcache_req_ports_o[0].address_index = '0;
assign dcache_req_ports_o[0].address_tag = '0;
assign dcache_req_ports_o[0].data_wdata = '0;
assign dcache_req_ports_o[0].data_req = 1'b0;
assign dcache_req_ports_o[0].data_be = '1;
assign dcache_req_ports_o[0].data_size = 2'b11;
assign dcache_req_ports_o[0].data_we = 1'b0;
assign dcache_req_ports_o[0].kill_req = '0;
assign dcache_req_ports_o[0].tag_valid = 1'b0;
assign itlb_miss_o = 1'b0;
assign dtlb_miss_o = 1'b0;
assign dtlb_ppn = mmu_vaddr_plen[riscv::PLEN-1:12];
assign dtlb_hit = 1'b1;
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mmu_paddr <= '0;
translation_valid <= '0;
mmu_exception <= '0;
end else begin
mmu_paddr <= mmu_vaddr_plen;
translation_valid <= translation_req;
mmu_exception <= misaligned_exception;
end
end
end
logic store_buffer_empty;
// ------------------
// Store Unit
// ------------------
store_unit #(
.CVA6Cfg ( CVA6Cfg )
) i_store_unit (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.store_buffer_empty_o ( store_buffer_empty ),
.valid_i ( st_valid_i ),
.lsu_ctrl_i ( lsu_ctrl ),
.pop_st_o ( pop_st ),
.commit_i,
.commit_ready_o,
.amo_valid_commit_i,
.valid_o ( st_valid ),
.trans_id_o ( st_trans_id ),
.result_o ( st_result ),
.ex_o ( st_ex ),
// MMU port
.translation_req_o ( st_translation_req ),
.vaddr_o ( st_vaddr ),
.mem_paddr_o ( mem_paddr_o ),
.paddr_i ( mmu_paddr ),
.ex_i ( mmu_exception ),
.dtlb_hit_i ( dtlb_hit ),
// Load Unit
.page_offset_i ( page_offset ),
.page_offset_matches_o ( page_offset_matches ),
// AMOs
.amo_req_o,
.amo_resp_i,
// to memory arbiter
.req_port_i ( dcache_req_ports_i [2] ),
.req_port_o ( dcache_req_ports_o [2] )
);
// ------------------
// Load Unit
// ------------------
load_unit #(
.CVA6Cfg ( CVA6Cfg )
) i_load_unit (
.valid_i ( ld_valid_i ),
.lsu_ctrl_i ( lsu_ctrl ),
.pop_ld_o ( pop_ld ),
.valid_o ( ld_valid ),
.trans_id_o ( ld_trans_id ),
.result_o ( ld_result ),
.ex_o ( ld_ex ),
// MMU port
.translation_req_o ( ld_translation_req ),
.vaddr_o ( ld_vaddr ),
.paddr_i ( mmu_paddr ),
.ex_i ( mmu_exception ),
.dtlb_hit_i ( dtlb_hit ),
.dtlb_ppn_i ( dtlb_ppn ),
// to store unit
.page_offset_o ( page_offset ),
.page_offset_matches_i ( page_offset_matches ),
.store_buffer_empty_i ( store_buffer_empty ),
// to memory arbiter
.req_port_i ( dcache_req_ports_i [1] ),
.req_port_o ( dcache_req_ports_o [1] ),
.dcache_wbuffer_not_ni_i,
.commit_tran_id_i,
// -------------------
// MMU e.g.: TLBs/PTW
// -------------------
if (MMU_PRESENT && (riscv::XLEN == 64)) begin : gen_mmu_sv39
mmu #(
.CVA6Cfg (CVA6Cfg),
.INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES),
.DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES),
.ASID_WIDTH (ASID_WIDTH)
) i_cva6_mmu (
// misaligned bypass
.misaligned_ex_i(misaligned_exception),
.lsu_is_store_i (st_translation_req),
.lsu_req_i (translation_req),
.lsu_vaddr_i (mmu_vaddr),
.lsu_valid_o (translation_valid),
.lsu_paddr_o (mmu_paddr),
.lsu_exception_o(mmu_exception),
.lsu_dtlb_hit_o (dtlb_hit), // send in the same cycle as the request
.lsu_dtlb_ppn_o (dtlb_ppn), // send in the same cycle as the request
// connecting PTW to D$ IF
.req_port_i (dcache_req_ports_i[0]),
.req_port_o (dcache_req_ports_o[0]),
// icache address translation requests
.icache_areq_i (icache_areq_i),
.asid_to_be_flushed_i,
.vaddr_to_be_flushed_i,
.icache_areq_o (icache_areq_o),
.pmpcfg_i,
.pmpaddr_i,
.*
);
// ----------------------------
// Output Pipeline Register
// ----------------------------
// amount of pipeline registers inserted for load/store return path
// can be tuned to trade-off IPC vs. cycle time
shift_reg #(
.dtype ( logic[$bits(ld_valid) + $bits(ld_trans_id) + $bits(ld_result) + $bits(ld_ex) - 1: 0]),
.Depth ( cva6_config_pkg::CVA6ConfigNrLoadPipeRegs )
) i_pipe_reg_load (
.clk_i,
.rst_ni,
.d_i ( {ld_valid, ld_trans_id, ld_result, ld_ex} ),
.d_o ( {load_valid_o, load_trans_id_o, load_result_o, load_exception_o} )
end else if (MMU_PRESENT && (riscv::XLEN == 32)) begin : gen_mmu_sv32
cva6_mmu_sv32 #(
.CVA6Cfg (CVA6Cfg),
.INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES),
.DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES),
.ASID_WIDTH (ASID_WIDTH)
) i_cva6_mmu (
// misaligned bypass
.misaligned_ex_i(misaligned_exception),
.lsu_is_store_i (st_translation_req),
.lsu_req_i (translation_req),
.lsu_vaddr_i (mmu_vaddr),
.lsu_valid_o (translation_valid),
.lsu_paddr_o (mmu_paddr),
.lsu_exception_o(mmu_exception),
.lsu_dtlb_hit_o (dtlb_hit), // send in the same cycle as the request
.lsu_dtlb_ppn_o (dtlb_ppn), // send in the same cycle as the request
// connecting PTW to D$ IF
.req_port_i (dcache_req_ports_i[0]),
.req_port_o (dcache_req_ports_o[0]),
// icache address translation requests
.icache_areq_i (icache_areq_i),
.asid_to_be_flushed_i,
.vaddr_to_be_flushed_i,
.icache_areq_o (icache_areq_o),
.pmpcfg_i,
.pmpaddr_i,
.*
);
end else begin : gen_no_mmu
shift_reg #(
.dtype ( logic[$bits(st_valid) + $bits(st_trans_id) + $bits(st_result) + $bits(st_ex) - 1: 0]),
.Depth ( cva6_config_pkg::CVA6ConfigNrStorePipeRegs )
) i_pipe_reg_store (
.clk_i,
.rst_ni,
.d_i ( {st_valid, st_trans_id, st_result, st_ex} ),
.d_o ( {store_valid_o, store_trans_id_o, store_result_o, store_exception_o} )
);
// determine whether this is a load or store
always_comb begin : which_op
ld_valid_i = 1'b0;
st_valid_i = 1'b0;
translation_req = 1'b0;
mmu_vaddr = {riscv::VLEN{1'b0}};
// check the operation to activate the right functional unit accordingly
unique case (lsu_ctrl.fu)
// all loads go here
LOAD: begin
ld_valid_i = lsu_ctrl.valid;
translation_req = ld_translation_req;
mmu_vaddr = ld_vaddr;
end
// all stores go here
STORE: begin
st_valid_i = lsu_ctrl.valid;
translation_req = st_translation_req;
mmu_vaddr = st_vaddr;
end
// not relevant for the LSU
default: ;
endcase
if (riscv::VLEN > riscv::PLEN) begin
assign mmu_vaddr_plen = mmu_vaddr[riscv::PLEN-1:0];
assign fetch_vaddr_plen = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0];
end else begin
assign mmu_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, mmu_vaddr};
assign fetch_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, icache_areq_i.fetch_vaddr};
end
assign icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
assign icache_areq_o.fetch_paddr = fetch_vaddr_plen;
assign icache_areq_o.fetch_exception = '0;
// ---------------
// Byte Enable
// ---------------
// we can generate the byte enable from the virtual address since the last
// 12 bit are the same anyway
// and we can always generate the byte enable from the address at hand
assign be_i = riscv::IS_XLEN64 ? be_gen(vaddr_i[2:0], extract_transfer_size(fu_data_i.operation)):
be_gen_32(vaddr_i[1:0], extract_transfer_size(fu_data_i.operation));
assign dcache_req_ports_o[0].address_index = '0;
assign dcache_req_ports_o[0].address_tag = '0;
assign dcache_req_ports_o[0].data_wdata = '0;
assign dcache_req_ports_o[0].data_req = 1'b0;
assign dcache_req_ports_o[0].data_be = '1;
assign dcache_req_ports_o[0].data_size = 2'b11;
assign dcache_req_ports_o[0].data_we = 1'b0;
assign dcache_req_ports_o[0].kill_req = '0;
assign dcache_req_ports_o[0].tag_valid = 1'b0;
// ------------------------
// Misaligned Exception
// ------------------------
// we can detect a misaligned exception immediately
// the misaligned exception is passed to the functional unit via the MMU, which in case
// can augment the exception if other memory related exceptions like a page fault or access errors
always_comb begin : data_misaligned_detection
assign itlb_miss_o = 1'b0;
assign dtlb_miss_o = 1'b0;
assign dtlb_ppn = mmu_vaddr_plen[riscv::PLEN-1:12];
assign dtlb_hit = 1'b1;
misaligned_exception = {
{riscv::XLEN{1'b0}},
{riscv::XLEN{1'b0}},
1'b0
};
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mmu_paddr <= '0;
translation_valid <= '0;
mmu_exception <= '0;
end else begin
mmu_paddr <= mmu_vaddr_plen;
translation_valid <= translation_req;
mmu_exception <= misaligned_exception;
end
end
end
data_misaligned = 1'b0;
if (lsu_ctrl.valid) begin
case (lsu_ctrl.operation)
// double word
LD, SD, FLD, FSD,
logic store_buffer_empty;
// ------------------
// Store Unit
// ------------------
store_unit #(
.CVA6Cfg(CVA6Cfg)
) i_store_unit (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.store_buffer_empty_o(store_buffer_empty),
.valid_i (st_valid_i),
.lsu_ctrl_i(lsu_ctrl),
.pop_st_o (pop_st),
.commit_i,
.commit_ready_o,
.amo_valid_commit_i,
.valid_o (st_valid),
.trans_id_o (st_trans_id),
.result_o (st_result),
.ex_o (st_ex),
// MMU port
.translation_req_o (st_translation_req),
.vaddr_o (st_vaddr),
.mem_paddr_o (mem_paddr_o),
.paddr_i (mmu_paddr),
.ex_i (mmu_exception),
.dtlb_hit_i (dtlb_hit),
// Load Unit
.page_offset_i (page_offset),
.page_offset_matches_o(page_offset_matches),
// AMOs
.amo_req_o,
.amo_resp_i,
// to memory arbiter
.req_port_i (dcache_req_ports_i[2]),
.req_port_o (dcache_req_ports_o[2])
);
// ------------------
// Load Unit
// ------------------
load_unit #(
.CVA6Cfg(CVA6Cfg)
) i_load_unit (
.valid_i (ld_valid_i),
.lsu_ctrl_i(lsu_ctrl),
.pop_ld_o (pop_ld),
.valid_o (ld_valid),
.trans_id_o (ld_trans_id),
.result_o (ld_result),
.ex_o (ld_ex),
// MMU port
.translation_req_o (ld_translation_req),
.vaddr_o (ld_vaddr),
.paddr_i (mmu_paddr),
.ex_i (mmu_exception),
.dtlb_hit_i (dtlb_hit),
.dtlb_ppn_i (dtlb_ppn),
// to store unit
.page_offset_o (page_offset),
.page_offset_matches_i(page_offset_matches),
.store_buffer_empty_i (store_buffer_empty),
// to memory arbiter
.req_port_i (dcache_req_ports_i[1]),
.req_port_o (dcache_req_ports_o[1]),
.dcache_wbuffer_not_ni_i,
.commit_tran_id_i,
.*
);
// ----------------------------
// Output Pipeline Register
// ----------------------------
// amount of pipeline registers inserted for load/store return path
// can be tuned to trade-off IPC vs. cycle time
shift_reg #(
.dtype(logic [$bits(ld_valid) + $bits(ld_trans_id) + $bits(ld_result) + $bits(ld_ex) - 1:0]),
.Depth(cva6_config_pkg::CVA6ConfigNrLoadPipeRegs)
) i_pipe_reg_load (
.clk_i,
.rst_ni,
.d_i({ld_valid, ld_trans_id, ld_result, ld_ex}),
.d_o({load_valid_o, load_trans_id_o, load_result_o, load_exception_o})
);
shift_reg #(
.dtype(logic [$bits(st_valid) + $bits(st_trans_id) + $bits(st_result) + $bits(st_ex) - 1:0]),
.Depth(cva6_config_pkg::CVA6ConfigNrStorePipeRegs)
) i_pipe_reg_store (
.clk_i,
.rst_ni,
.d_i({st_valid, st_trans_id, st_result, st_ex}),
.d_o({store_valid_o, store_trans_id_o, store_result_o, store_exception_o})
);
// determine whether this is a load or store
always_comb begin : which_op
ld_valid_i = 1'b0;
st_valid_i = 1'b0;
translation_req = 1'b0;
mmu_vaddr = {riscv::VLEN{1'b0}};
// check the operation to activate the right functional unit accordingly
unique case (lsu_ctrl.fu)
// all loads go here
LOAD: begin
ld_valid_i = lsu_ctrl.valid;
translation_req = ld_translation_req;
mmu_vaddr = ld_vaddr;
end
// all stores go here
STORE: begin
st_valid_i = lsu_ctrl.valid;
translation_req = st_translation_req;
mmu_vaddr = st_vaddr;
end
// not relevant for the LSU
default: ;
endcase
end
// ---------------
// Byte Enable
// ---------------
// we can generate the byte enable from the virtual address since the last
// 12 bit are the same anyway
// and we can always generate the byte enable from the address at hand
assign be_i = riscv::IS_XLEN64 ? be_gen(
vaddr_i[2:0], extract_transfer_size(fu_data_i.operation)
) : be_gen_32(
vaddr_i[1:0], extract_transfer_size(fu_data_i.operation)
);
// ------------------------
// Misaligned Exception
// ------------------------
// we can detect a misaligned exception immediately
// the misaligned exception is passed to the functional unit via the MMU, which in case
// can augment the exception if other memory related exceptions like a page fault or access errors
always_comb begin : data_misaligned_detection
misaligned_exception = {{riscv::XLEN{1'b0}}, {riscv::XLEN{1'b0}}, 1'b0};
data_misaligned = 1'b0;
if (lsu_ctrl.valid) begin
case (lsu_ctrl.operation)
// double word
LD, SD, FLD, FSD,
AMO_LRD, AMO_SCD,
AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD,
AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND,
AMO_MINDU: begin
if (lsu_ctrl.vaddr[2:0] != 3'b000) begin
data_misaligned = 1'b1;
end
end
// word
LW, LWU, SW, FLW, FSW,
if (lsu_ctrl.vaddr[2:0] != 3'b000) begin
data_misaligned = 1'b1;
end
end
// word
LW, LWU, SW, FLW, FSW,
AMO_LRW, AMO_SCW,
AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW,
AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW,
AMO_MINWU: begin
if (lsu_ctrl.vaddr[1:0] != 2'b00) begin
data_misaligned = 1'b1;
end
end
// half word
LH, LHU, SH, FLH, FSH: begin
if (lsu_ctrl.vaddr[0] != 1'b0) begin
data_misaligned = 1'b1;
end
end
// byte -> is always aligned
default:;
endcase
if (lsu_ctrl.vaddr[1:0] != 2'b00) begin
data_misaligned = 1'b1;
end
end
if (data_misaligned) begin
if (lsu_ctrl.fu == LOAD) begin
misaligned_exception = {
riscv::LD_ADDR_MISALIGNED,
{{riscv::XLEN-riscv::VLEN{1'b0}},lsu_ctrl.vaddr},
1'b1
};
end else if (lsu_ctrl.fu == STORE) begin
misaligned_exception = {
riscv::ST_ADDR_MISALIGNED,
{{riscv::XLEN-riscv::VLEN{1'b0}},lsu_ctrl.vaddr},
1'b1
};
end
end
if (ariane_pkg::MMU_PRESENT && en_ld_st_translation_i && lsu_ctrl.overflow) begin
if (lsu_ctrl.fu == LOAD) begin
misaligned_exception = {
riscv::LD_ACCESS_FAULT,
{{riscv::XLEN-riscv::VLEN{1'b0}},lsu_ctrl.vaddr},
1'b1
};
end else if (lsu_ctrl.fu == STORE) begin
misaligned_exception = {
riscv::ST_ACCESS_FAULT,
{{riscv::XLEN-riscv::VLEN{1'b0}},lsu_ctrl.vaddr},
1'b1
};
end
// half word
LH, LHU, SH, FLH, FSH: begin
if (lsu_ctrl.vaddr[0] != 1'b0) begin
data_misaligned = 1'b1;
end
end
// byte -> is always aligned
default: ;
endcase
end
// ------------------
// LSU Control
// ------------------
// new data arrives here
lsu_ctrl_t lsu_req_i;
if (data_misaligned) begin
assign lsu_req_i = {lsu_valid_i, vaddr_i, overflow, fu_data_i.operand_b, be_i, fu_data_i.fu, fu_data_i.operation, fu_data_i.trans_id};
if (lsu_ctrl.fu == LOAD) begin
misaligned_exception = {
riscv::LD_ADDR_MISALIGNED, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1
};
lsu_bypass #(
.CVA6Cfg ( CVA6Cfg )
) lsu_bypass_i (
.lsu_req_i ( lsu_req_i ),
.lsu_req_valid_i ( lsu_valid_i ),
.pop_ld_i ( pop_ld ),
.pop_st_i ( pop_st ),
end else if (lsu_ctrl.fu == STORE) begin
misaligned_exception = {
riscv::ST_ADDR_MISALIGNED, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1
};
end
end
.lsu_ctrl_o ( lsu_ctrl ),
.ready_o ( lsu_ready_o ),
.*
);
if (ariane_pkg::MMU_PRESENT && en_ld_st_translation_i && lsu_ctrl.overflow) begin
assign lsu_addr_o = lsu_ctrl.vaddr;
assign lsu_rmask_o = lsu_ctrl.fu == LOAD ? lsu_ctrl.be : '0;
assign lsu_wmask_o = lsu_ctrl.fu == STORE ? lsu_ctrl.be : '0;
assign lsu_addr_trans_id_o = lsu_ctrl.trans_id;
if (lsu_ctrl.fu == LOAD) begin
misaligned_exception = {
riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1
};
end else if (lsu_ctrl.fu == STORE) begin
misaligned_exception = {
riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1
};
end
end
end
// ------------------
// LSU Control
// ------------------
// new data arrives here
lsu_ctrl_t lsu_req_i;
assign lsu_req_i = {
lsu_valid_i,
vaddr_i,
overflow,
fu_data_i.operand_b,
be_i,
fu_data_i.fu,
fu_data_i.operation,
fu_data_i.trans_id
};
lsu_bypass #(
.CVA6Cfg(CVA6Cfg)
) lsu_bypass_i (
.lsu_req_i (lsu_req_i),
.lsu_req_valid_i(lsu_valid_i),
.pop_ld_i (pop_ld),
.pop_st_i (pop_st),
.lsu_ctrl_o(lsu_ctrl),
.ready_o (lsu_ready_o),
.*
);
assign lsu_addr_o = lsu_ctrl.vaddr;
assign lsu_rmask_o = lsu_ctrl.fu == LOAD ? lsu_ctrl.be : '0;
assign lsu_wmask_o = lsu_ctrl.fu == STORE ? lsu_ctrl.be : '0;
assign lsu_addr_trans_id_o = lsu_ctrl.trans_id;
endmodule

View file

@ -18,411 +18,420 @@
// Modification: add support for multiple outstanding load operations
// to the data cache
module load_unit import ariane_pkg::*; #(
module load_unit
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
// load unit input port
input logic valid_i,
input lsu_ctrl_t lsu_ctrl_i,
output logic pop_ld_o,
input logic valid_i,
input lsu_ctrl_t lsu_ctrl_i,
output logic pop_ld_o,
// load unit output port
output logic valid_o,
output logic valid_o,
output logic [TRANS_ID_BITS-1:0] trans_id_o,
output riscv::xlen_t result_o,
output exception_t ex_o,
output riscv::xlen_t result_o,
output exception_t ex_o,
// MMU -> Address Translation
output logic translation_req_o, // request address translation
output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out
input logic [riscv::PLEN-1:0] paddr_i, // physical address in
output logic translation_req_o, // request address translation
output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out
input logic [riscv::PLEN-1:0] paddr_i, // physical address in
input exception_t ex_i, // exception which may has happened earlier. for example: mis-aligned exception
input logic dtlb_hit_i, // hit on the dtlb, send in the same cycle as the request
input logic dtlb_hit_i, // hit on the dtlb, send in the same cycle as the request
input logic [riscv::PPNW-1:0] dtlb_ppn_i, // ppn on the dtlb, send in the same cycle as the request
// address checker
output logic [11:0] page_offset_o,
input logic page_offset_matches_i,
input logic store_buffer_empty_i, // the entire store-buffer is empty
input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
output logic [11:0] page_offset_o,
input logic page_offset_matches_i,
input logic store_buffer_empty_i, // the entire store-buffer is empty
input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
// D$ interface
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o,
input logic dcache_wbuffer_not_ni_i
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o,
input logic dcache_wbuffer_not_ni_i
);
enum logic [3:0] { IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET,
ABORT_TRANSACTION, ABORT_TRANSACTION_NI, WAIT_TRANSLATION, WAIT_FLUSH,
WAIT_WB_EMPTY
} state_d, state_q;
enum logic [3:0] {
IDLE,
WAIT_GNT,
SEND_TAG,
WAIT_PAGE_OFFSET,
ABORT_TRANSACTION,
ABORT_TRANSACTION_NI,
WAIT_TRANSLATION,
WAIT_FLUSH,
WAIT_WB_EMPTY
}
state_d, state_q;
// in order to decouple the response interface from the request interface,
// we need a a buffer which can hold all inflight memory load requests
typedef struct packed {
logic [TRANS_ID_BITS-1:0] trans_id; // scoreboard identifier
logic [riscv::XLEN_ALIGN_BYTES-1:0] address_offset; // least significant bits of the address
fu_op operation; // type of load
} ldbuf_t;
// in order to decouple the response interface from the request interface,
// we need a a buffer which can hold all inflight memory load requests
typedef struct packed {
logic [TRANS_ID_BITS-1:0] trans_id; // scoreboard identifier
logic [riscv::XLEN_ALIGN_BYTES-1:0] address_offset; // least significant bits of the address
fu_op operation; // type of load
} ldbuf_t;
// to support a throughput of one load per cycle, if the number of entries
// of the load buffer is 1, implement a fall-through mode. This however
// adds a combinational path between the request and response interfaces
// towards the cache.
localparam logic LDBUF_FALLTHROUGH = (CVA6Cfg.NrLoadBufEntries == 1);
localparam int unsigned REQ_ID_BITS = CVA6Cfg.NrLoadBufEntries > 1 ?
$clog2(CVA6Cfg.NrLoadBufEntries) : 1;
// to support a throughput of one load per cycle, if the number of entries
// of the load buffer is 1, implement a fall-through mode. This however
// adds a combinational path between the request and response interfaces
// towards the cache.
localparam logic LDBUF_FALLTHROUGH = (CVA6Cfg.NrLoadBufEntries == 1);
localparam int unsigned REQ_ID_BITS = CVA6Cfg.NrLoadBufEntries > 1 ? $clog2(
CVA6Cfg.NrLoadBufEntries
) : 1;
typedef logic [REQ_ID_BITS-1:0] ldbuf_id_t;
typedef logic [REQ_ID_BITS-1:0] ldbuf_id_t;
logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_valid_q, ldbuf_valid_d;
logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_flushed_q, ldbuf_flushed_d;
ldbuf_t [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_q;
logic ldbuf_empty, ldbuf_full;
ldbuf_id_t ldbuf_free_index;
logic ldbuf_w;
ldbuf_t ldbuf_wdata;
ldbuf_id_t ldbuf_windex;
logic ldbuf_r;
ldbuf_t ldbuf_rdata;
ldbuf_id_t ldbuf_rindex;
ldbuf_id_t ldbuf_last_id_q;
logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_valid_q, ldbuf_valid_d;
logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_flushed_q, ldbuf_flushed_d;
ldbuf_t [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_q;
logic ldbuf_empty, ldbuf_full;
ldbuf_id_t ldbuf_free_index;
logic ldbuf_w;
ldbuf_t ldbuf_wdata;
ldbuf_id_t ldbuf_windex;
logic ldbuf_r;
ldbuf_t ldbuf_rdata;
ldbuf_id_t ldbuf_rindex;
ldbuf_id_t ldbuf_last_id_q;
assign ldbuf_full = &ldbuf_valid_q;
assign ldbuf_full = &ldbuf_valid_q;
//
// buffer of outstanding loads
//
// buffer of outstanding loads
// write in the first available slot
generate
if (CVA6Cfg.NrLoadBufEntries > 1) begin : ldbuf_free_index_multi_gen
lzc #(
.WIDTH (CVA6Cfg.NrLoadBufEntries),
.MODE (1'b0) // Count leading zeros
) lzc_windex_i (
.in_i (~ldbuf_valid_q),
.cnt_o (ldbuf_free_index),
.empty_o (ldbuf_empty)
);
end else begin : ldbuf_free_index_single_gen
assign ldbuf_free_index = 1'b0;
end
endgenerate
assign ldbuf_windex = (LDBUF_FALLTHROUGH && ldbuf_r) ? ldbuf_rindex : ldbuf_free_index;
always_comb
begin : ldbuf_comb
ldbuf_flushed_d = ldbuf_flushed_q;
ldbuf_valid_d = ldbuf_valid_q;
// In case of flush, raise the flushed flag in all slots.
if (flush_i) begin
ldbuf_flushed_d = '1;
end
// Free read entry (in the case of fall-through mode, free the entry
// only if there is no pending load)
if (ldbuf_r && (!LDBUF_FALLTHROUGH || !ldbuf_w)) begin
ldbuf_valid_d[ldbuf_rindex] = 1'b0;
end
// Track a new outstanding operation in the load buffer
if (ldbuf_w) begin
ldbuf_flushed_d[ldbuf_windex] = 1'b0;
ldbuf_valid_d[ldbuf_windex] = 1'b1;
end
// write in the first available slot
generate
if (CVA6Cfg.NrLoadBufEntries > 1) begin : ldbuf_free_index_multi_gen
lzc #(
.WIDTH(CVA6Cfg.NrLoadBufEntries),
.MODE (1'b0) // Count leading zeros
) lzc_windex_i (
.in_i (~ldbuf_valid_q),
.cnt_o (ldbuf_free_index),
.empty_o(ldbuf_empty)
);
end else begin : ldbuf_free_index_single_gen
assign ldbuf_free_index = 1'b0;
end
endgenerate
always_ff @(posedge clk_i or negedge rst_ni)
begin : ldbuf_ff
if (!rst_ni) begin
ldbuf_flushed_q <= '0;
ldbuf_valid_q <= '0;
ldbuf_last_id_q <= '0;
ldbuf_q <= '0;
end else begin
ldbuf_flushed_q <= ldbuf_flushed_d;
ldbuf_valid_q <= ldbuf_valid_d;
if (ldbuf_w) begin
ldbuf_last_id_q <= ldbuf_windex;
ldbuf_q[ldbuf_windex] <= ldbuf_wdata;
end
end
assign ldbuf_windex = (LDBUF_FALLTHROUGH && ldbuf_r) ? ldbuf_rindex : ldbuf_free_index;
always_comb begin : ldbuf_comb
ldbuf_flushed_d = ldbuf_flushed_q;
ldbuf_valid_d = ldbuf_valid_q;
// In case of flush, raise the flushed flag in all slots.
if (flush_i) begin
ldbuf_flushed_d = '1;
end
// Free read entry (in the case of fall-through mode, free the entry
// only if there is no pending load)
if (ldbuf_r && (!LDBUF_FALLTHROUGH || !ldbuf_w)) begin
ldbuf_valid_d[ldbuf_rindex] = 1'b0;
end
// Track a new outstanding operation in the load buffer
if (ldbuf_w) begin
ldbuf_flushed_d[ldbuf_windex] = 1'b0;
ldbuf_valid_d[ldbuf_windex] = 1'b1;
end
end
// page offset is defined as the lower 12 bits, feed through for address checker
assign page_offset_o = lsu_ctrl_i.vaddr[11:0];
// feed-through the virtual address for VA translation
assign vaddr_o = lsu_ctrl_i.vaddr;
// this is a read-only interface so set the write enable to 0
assign req_port_o.data_we = 1'b0;
assign req_port_o.data_wdata = '0;
// compose the load buffer write data, control is handled in the FSM
assign ldbuf_wdata = {lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[riscv::XLEN_ALIGN_BYTES-1:0], lsu_ctrl_i.operation};
// output address
// we can now output the lower 12 bit as the index to the cache
assign req_port_o.address_index = lsu_ctrl_i.vaddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
// translation from last cycle, again: control is handled in the FSM
assign req_port_o.address_tag = paddr_i[ariane_pkg::DCACHE_TAG_WIDTH +
always_ff @(posedge clk_i or negedge rst_ni) begin : ldbuf_ff
if (!rst_ni) begin
ldbuf_flushed_q <= '0;
ldbuf_valid_q <= '0;
ldbuf_last_id_q <= '0;
ldbuf_q <= '0;
end else begin
ldbuf_flushed_q <= ldbuf_flushed_d;
ldbuf_valid_q <= ldbuf_valid_d;
if (ldbuf_w) begin
ldbuf_last_id_q <= ldbuf_windex;
ldbuf_q[ldbuf_windex] <= ldbuf_wdata;
end
end
end
// page offset is defined as the lower 12 bits, feed through for address checker
assign page_offset_o = lsu_ctrl_i.vaddr[11:0];
// feed-through the virtual address for VA translation
assign vaddr_o = lsu_ctrl_i.vaddr;
// this is a read-only interface so set the write enable to 0
assign req_port_o.data_we = 1'b0;
assign req_port_o.data_wdata = '0;
// compose the load buffer write data, control is handled in the FSM
assign ldbuf_wdata = {
lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[riscv::XLEN_ALIGN_BYTES-1:0], lsu_ctrl_i.operation
};
// output address
// we can now output the lower 12 bit as the index to the cache
assign req_port_o.address_index = lsu_ctrl_i.vaddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
// translation from last cycle, again: control is handled in the FSM
assign req_port_o.address_tag = paddr_i[ariane_pkg::DCACHE_TAG_WIDTH +
ariane_pkg::DCACHE_INDEX_WIDTH-1 :
ariane_pkg::DCACHE_INDEX_WIDTH];
// request id = index of the load buffer's entry
assign req_port_o.data_id = ldbuf_windex;
// directly forward exception fields (valid bit is set below)
assign ex_o.cause = ex_i.cause;
assign ex_o.tval = ex_i.tval;
// request id = index of the load buffer's entry
assign req_port_o.data_id = ldbuf_windex;
// directly forward exception fields (valid bit is set below)
assign ex_o.cause = ex_i.cause;
assign ex_o.tval = ex_i.tval;
// Check that NI operations follow the necessary conditions
logic paddr_ni;
logic not_commit_time;
logic inflight_stores;
logic stall_ni;
assign paddr_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, {dtlb_ppn_i,12'd0});
assign not_commit_time = commit_tran_id_i != lsu_ctrl_i.trans_id;
assign inflight_stores = (!dcache_wbuffer_not_ni_i || !store_buffer_empty_i);
assign stall_ni = (inflight_stores || not_commit_time) && paddr_ni;
// Check that NI operations follow the necessary conditions
logic paddr_ni;
logic not_commit_time;
logic inflight_stores;
logic stall_ni;
assign paddr_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, {dtlb_ppn_i, 12'd0});
assign not_commit_time = commit_tran_id_i != lsu_ctrl_i.trans_id;
assign inflight_stores = (!dcache_wbuffer_not_ni_i || !store_buffer_empty_i);
assign stall_ni = (inflight_stores || not_commit_time) && paddr_ni;
// ---------------
// Load Control
// ---------------
always_comb begin : load_control
automatic logic accept_req;
// ---------------
// Load Control
// ---------------
always_comb begin : load_control
automatic logic accept_req;
// default assignments
state_d = state_q;
translation_req_o = 1'b0;
req_port_o.data_req = 1'b0;
// tag control
req_port_o.kill_req = 1'b0;
req_port_o.tag_valid = 1'b0;
req_port_o.data_be = lsu_ctrl_i.be;
req_port_o.data_size = extract_transfer_size(lsu_ctrl_i.operation);
pop_ld_o = 1'b0;
// default assignments
state_d = state_q;
translation_req_o = 1'b0;
req_port_o.data_req = 1'b0;
// tag control
req_port_o.kill_req = 1'b0;
req_port_o.tag_valid = 1'b0;
req_port_o.data_be = lsu_ctrl_i.be;
req_port_o.data_size = extract_transfer_size(lsu_ctrl_i.operation);
pop_ld_o = 1'b0;
// In IDLE and SEND_TAG states, this unit can accept a new load request
// when the load buffer is not full or if there is a response and the
// load buffer is in fall-through mode
accept_req = (valid_i && (!ldbuf_full || (LDBUF_FALLTHROUGH && ldbuf_r)));
// In IDLE and SEND_TAG states, this unit can accept a new load request
// when the load buffer is not full or if there is a response and the
// load buffer is in fall-through mode
accept_req = (valid_i && (!ldbuf_full || (LDBUF_FALLTHROUGH && ldbuf_r)));
case (state_q)
IDLE: begin
if (accept_req) begin
// start the translation process even though we do not know if the addresses match
// this should ease timing
translation_req_o = 1'b1;
// check if the page offset matches with a store, if it does then stall and wait
if (!page_offset_matches_i) begin
// make a load request to memory
req_port_o.data_req = 1'b1;
// we got no data grant so wait for the grant before sending the tag
if (!req_port_i.data_gnt) begin
state_d = WAIT_GNT;
end else begin
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = ABORT_TRANSACTION;
end else begin
if (!stall_ni) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
state_d = SEND_TAG;
pop_ld_o = 1'b1;
// translation valid but this is to NC and the WB is not yet empty.
end else begin
state_d = ABORT_TRANSACTION_NI;
end
end
end
end else begin
// wait for the store buffer to train and the page offset to not match anymore
state_d = WAIT_PAGE_OFFSET;
end
case (state_q)
IDLE: begin
if (accept_req) begin
// start the translation process even though we do not know if the addresses match
// this should ease timing
translation_req_o = 1'b1;
// check if the page offset matches with a store, if it does then stall and wait
if (!page_offset_matches_i) begin
// make a load request to memory
req_port_o.data_req = 1'b1;
// we got no data grant so wait for the grant before sending the tag
if (!req_port_i.data_gnt) begin
state_d = WAIT_GNT;
end else begin
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = ABORT_TRANSACTION;
end else begin
if (!stall_ni) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
state_d = SEND_TAG;
pop_ld_o = 1'b1;
// translation valid but this is to NC and the WB is not yet empty.
end else begin
state_d = ABORT_TRANSACTION_NI;
end
end
end
// wait here for the page offset to not match anymore
WAIT_PAGE_OFFSET: begin
// we make a new request as soon as the page offset does not match anymore
if (!page_offset_matches_i) begin
state_d = WAIT_GNT;
end
end
// abort the previous request - free the D$ arbiter
// we are here because of a TLB miss, we need to abort the current request and give way for the
// PTW walker to satisfy the TLB miss
ABORT_TRANSACTION, ABORT_TRANSACTION_NI: begin
req_port_o.kill_req = 1'b1;
req_port_o.tag_valid = 1'b1;
// either re-do the request or wait until the WB is empty (depending on where we came from).
state_d = (state_q == ABORT_TRANSACTION_NI) ? WAIT_WB_EMPTY : WAIT_TRANSLATION;
end
// Wait until the write-back buffer is empty in the data cache.
WAIT_WB_EMPTY: begin
// the write buffer is empty, so lets go and re-do the translation.
if (dcache_wbuffer_not_ni_i) state_d = WAIT_TRANSLATION;
end
WAIT_TRANSLATION: begin
translation_req_o = 1'b1;
// we've got a hit and we can continue with the request process
if (dtlb_hit_i)
state_d = WAIT_GNT;
// we got an exception
if (ex_i.valid) begin
// the next state will be the idle state
state_d = IDLE;
// pop load - but only if we are not getting an rvalid in here - otherwise we will over-write an incoming transaction
pop_ld_o = ~req_port_i.data_rvalid;
end
end
WAIT_GNT: begin
// keep the translation request up
translation_req_o = 1'b1;
// keep the request up
req_port_o.data_req = 1'b1;
// we finally got a data grant
if (req_port_i.data_gnt) begin
// so we send the tag in the next cycle
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = ABORT_TRANSACTION;
end else begin
if (!stall_ni) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
state_d = SEND_TAG;
pop_ld_o = 1'b1;
// translation valid but this is to NC and the WB is not yet empty.
end else begin
state_d = ABORT_TRANSACTION_NI;
end
end
end
// otherwise we keep waiting on our grant
end
// we know for sure that the tag we want to send is valid
SEND_TAG: begin
req_port_o.tag_valid = 1'b1;
state_d = IDLE;
if (accept_req) begin
// start the translation process even though we do not know if the addresses match
// this should ease timing
translation_req_o = 1'b1;
// check if the page offset matches with a store, if it does stall and wait
if (!page_offset_matches_i) begin
// make a load request to memory
req_port_o.data_req = 1'b1;
// we got no data grant so wait for the grant before sending the tag
if (!req_port_i.data_gnt) begin
state_d = WAIT_GNT;
end else begin
// we got a grant so we can send the tag in the next cycle
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = ABORT_TRANSACTION;
end else begin
if (!stall_ni) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
state_d = SEND_TAG;
pop_ld_o = 1'b1;
// translation valid but this is to NC and the WB is not yet empty.
end else begin
state_d = ABORT_TRANSACTION_NI;
end
end
end
end else begin
// wait for the store buffer to train and the page offset to not match anymore
state_d = WAIT_PAGE_OFFSET;
end
end
// ----------
// Exception
// ----------
// if we got an exception we need to kill the request immediately
if (ex_i.valid) begin
req_port_o.kill_req = 1'b1;
end
end
WAIT_FLUSH: begin
// the D$ arbiter will take care of presenting this to the memory only in case we
// have an outstanding request
req_port_o.kill_req = 1'b1;
req_port_o.tag_valid = 1'b1;
// we've killed the current request so we can go back to idle
state_d = IDLE;
end
default: state_d = IDLE;
endcase
// if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage
if (flush_i) begin
state_d = WAIT_FLUSH;
end else begin
// wait for the store buffer to train and the page offset to not match anymore
state_d = WAIT_PAGE_OFFSET;
end
end
end
// wait here for the page offset to not match anymore
WAIT_PAGE_OFFSET: begin
// we make a new request as soon as the page offset does not match anymore
if (!page_offset_matches_i) begin
state_d = WAIT_GNT;
end
end
// abort the previous request - free the D$ arbiter
// we are here because of a TLB miss, we need to abort the current request and give way for the
// PTW walker to satisfy the TLB miss
ABORT_TRANSACTION, ABORT_TRANSACTION_NI: begin
req_port_o.kill_req = 1'b1;
req_port_o.tag_valid = 1'b1;
// either re-do the request or wait until the WB is empty (depending on where we came from).
state_d = (state_q == ABORT_TRANSACTION_NI) ? WAIT_WB_EMPTY : WAIT_TRANSLATION;
end
// Wait until the write-back buffer is empty in the data cache.
WAIT_WB_EMPTY: begin
// the write buffer is empty, so lets go and re-do the translation.
if (dcache_wbuffer_not_ni_i) state_d = WAIT_TRANSLATION;
end
WAIT_TRANSLATION: begin
translation_req_o = 1'b1;
// we've got a hit and we can continue with the request process
if (dtlb_hit_i) state_d = WAIT_GNT;
// we got an exception
if (ex_i.valid) begin
// the next state will be the idle state
state_d = IDLE;
// pop load - but only if we are not getting an rvalid in here - otherwise we will over-write an incoming transaction
pop_ld_o = ~req_port_i.data_rvalid;
end
end
WAIT_GNT: begin
// keep the translation request up
translation_req_o = 1'b1;
// keep the request up
req_port_o.data_req = 1'b1;
// we finally got a data grant
if (req_port_i.data_gnt) begin
// so we send the tag in the next cycle
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = ABORT_TRANSACTION;
end else begin
if (!stall_ni) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
state_d = SEND_TAG;
pop_ld_o = 1'b1;
// translation valid but this is to NC and the WB is not yet empty.
end else begin
state_d = ABORT_TRANSACTION_NI;
end
end
end
// otherwise we keep waiting on our grant
end
// we know for sure that the tag we want to send is valid
SEND_TAG: begin
req_port_o.tag_valid = 1'b1;
state_d = IDLE;
if (accept_req) begin
// start the translation process even though we do not know if the addresses match
// this should ease timing
translation_req_o = 1'b1;
// check if the page offset matches with a store, if it does stall and wait
if (!page_offset_matches_i) begin
// make a load request to memory
req_port_o.data_req = 1'b1;
// we got no data grant so wait for the grant before sending the tag
if (!req_port_i.data_gnt) begin
state_d = WAIT_GNT;
end else begin
// we got a grant so we can send the tag in the next cycle
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = ABORT_TRANSACTION;
end else begin
if (!stall_ni) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
state_d = SEND_TAG;
pop_ld_o = 1'b1;
// translation valid but this is to NC and the WB is not yet empty.
end else begin
state_d = ABORT_TRANSACTION_NI;
end
end
end
end else begin
// wait for the store buffer to train and the page offset to not match anymore
state_d = WAIT_PAGE_OFFSET;
end
end
// ----------
// Exception
// ----------
// if we got an exception we need to kill the request immediately
if (ex_i.valid) begin
req_port_o.kill_req = 1'b1;
end
end
WAIT_FLUSH: begin
// the D$ arbiter will take care of presenting this to the memory only in case we
// have an outstanding request
req_port_o.kill_req = 1'b1;
req_port_o.tag_valid = 1'b1;
// we've killed the current request so we can go back to idle
state_d = IDLE;
end
default: state_d = IDLE;
endcase
// if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage
if (flush_i) begin
state_d = WAIT_FLUSH;
end
end
// track the load data for later usage
assign ldbuf_w = req_port_o.data_req & req_port_i.data_gnt;
// ---------------
// Retire Load
// ---------------
assign ldbuf_rindex = (CVA6Cfg.NrLoadBufEntries > 1) ? ldbuf_id_t'(req_port_i.data_rid) : 1'b0,
ldbuf_rdata = ldbuf_q[ldbuf_rindex];
// decoupled rvalid process
always_comb begin : rvalid_output
// read the pending load buffer
ldbuf_r = req_port_i.data_rvalid;
trans_id_o = ldbuf_q[ldbuf_rindex].trans_id;
valid_o = 1'b0;
ex_o.valid = 1'b0;
// we got an rvalid and it's corresponding request was not flushed
if (req_port_i.data_rvalid && !ldbuf_flushed_q[ldbuf_rindex]) begin
// if the response corresponds to the last request, check that we are not killing it
if ((ldbuf_last_id_q != ldbuf_rindex) || !req_port_o.kill_req) valid_o = 1'b1;
// the output is also valid if we got an exception. An exception arrives one cycle after
// dtlb_hit_i is asserted, i.e. when we are in SEND_TAG. Otherwise, the exception
// corresponds to the next request that is already being translated (see below).
if (ex_i.valid && (state_q == SEND_TAG)) begin
valid_o = 1'b1;
ex_o.valid = 1'b1;
end
end
// track the load data for later usage
assign ldbuf_w = req_port_o.data_req & req_port_i.data_gnt;
// ---------------
// Retire Load
// ---------------
assign ldbuf_rindex = (CVA6Cfg.NrLoadBufEntries > 1) ? ldbuf_id_t'(req_port_i.data_rid) : 1'b0,
ldbuf_rdata = ldbuf_q[ldbuf_rindex];
// decoupled rvalid process
always_comb begin : rvalid_output
// read the pending load buffer
ldbuf_r = req_port_i.data_rvalid;
trans_id_o = ldbuf_q[ldbuf_rindex].trans_id;
valid_o = 1'b0;
ex_o.valid = 1'b0;
// we got an rvalid and it's corresponding request was not flushed
if (req_port_i.data_rvalid && !ldbuf_flushed_q[ldbuf_rindex]) begin
// if the response corresponds to the last request, check that we are not killing it
if((ldbuf_last_id_q != ldbuf_rindex) || !req_port_o.kill_req)
valid_o = 1'b1;
// the output is also valid if we got an exception. An exception arrives one cycle after
// dtlb_hit_i is asserted, i.e. when we are in SEND_TAG. Otherwise, the exception
// corresponds to the next request that is already being translated (see below).
if (ex_i.valid && (state_q == SEND_TAG)) begin
valid_o = 1'b1;
ex_o.valid = 1'b1;
end
end
// an exception occurred during translation
// exceptions can retire out-of-order -> but we need to give priority to non-excepting load and stores
// so we simply check if we got an rvalid if so we prioritize it by not retiring the exception - we simply go for another
// round in the load FSM
if ((state_q == WAIT_TRANSLATION) && !req_port_i.data_rvalid && ex_i.valid && valid_i) begin
trans_id_o = lsu_ctrl_i.trans_id;
valid_o = 1'b1;
ex_o.valid = 1'b1;
end
// an exception occurred during translation
// exceptions can retire out-of-order -> but we need to give priority to non-excepting load and stores
// so we simply check if we got an rvalid if so we prioritize it by not retiring the exception - we simply go for another
// round in the load FSM
if ((state_q == WAIT_TRANSLATION) && !req_port_i.data_rvalid && ex_i.valid && valid_i) begin
trans_id_o = lsu_ctrl_i.trans_id;
valid_o = 1'b1;
ex_o.valid = 1'b1;
end
end
// latch physical address for the tag cycle (one cycle after applying the index)
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
end else begin
state_q <= state_d;
end
// latch physical address for the tag cycle (one cycle after applying the index)
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
end else begin
state_q <= state_d;
end
end
// ---------------
// Sign Extend
// ---------------
riscv::xlen_t shifted_data;
// ---------------
// Sign Extend
// ---------------
riscv::xlen_t shifted_data;
// realign as needed
assign shifted_data = req_port_i.data_rdata >> {ldbuf_rdata.address_offset, 3'b000};
// realign as needed
assign shifted_data = req_port_i.data_rdata >> {ldbuf_rdata.address_offset, 3'b000};
/* // result mux (leaner code, but more logic stages.
/* // result mux (leaner code, but more logic stages.
// can be used instead of the code below (in between //result mux fast) if timing is not so critical)
always_comb begin
unique case (ldbuf_rdata.operation)
@ -436,74 +445,81 @@ module load_unit import ariane_pkg::*; #(
endcase
end */
// result mux fast
logic [(riscv::XLEN/8)-1:0] rdata_sign_bits;
logic [riscv::XLEN_ALIGN_BYTES-1:0] rdata_offset;
logic rdata_sign_bit, rdata_is_signed, rdata_is_fp_signed;
// result mux fast
logic [ (riscv::XLEN/8)-1:0] rdata_sign_bits;
logic [riscv::XLEN_ALIGN_BYTES-1:0] rdata_offset;
logic rdata_sign_bit, rdata_is_signed, rdata_is_fp_signed;
// prepare these signals for faster selection in the next cycle
assign rdata_is_signed = ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::LH, ariane_pkg::LB};
assign rdata_is_fp_signed = ldbuf_rdata.operation inside {ariane_pkg::FLW, ariane_pkg::FLH, ariane_pkg::FLB};
assign rdata_offset = ((ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::FLW}) & riscv::IS_XLEN64) ? ldbuf_rdata.address_offset + 3 :
// prepare these signals for faster selection in the next cycle
assign rdata_is_signed = ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::LH, ariane_pkg::LB};
assign rdata_is_fp_signed = ldbuf_rdata.operation inside {ariane_pkg::FLW, ariane_pkg::FLH, ariane_pkg::FLB};
assign rdata_offset = ((ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::FLW}) & riscv::IS_XLEN64) ? ldbuf_rdata.address_offset + 3 :
( ldbuf_rdata.operation inside {ariane_pkg::LH, ariane_pkg::FLH}) ? ldbuf_rdata.address_offset + 1 :
ldbuf_rdata.address_offset;
for (genvar i = 0; i < (riscv::XLEN/8); i++) begin : gen_sign_bits
assign rdata_sign_bits[i] = req_port_i.data_rdata[(i+1)*8-1];
end
for (genvar i = 0; i < (riscv::XLEN / 8); i++) begin : gen_sign_bits
assign rdata_sign_bits[i] = req_port_i.data_rdata[(i+1)*8-1];
end
// select correct sign bit in parallel to result shifter above
// pull to 0 if unsigned
assign rdata_sign_bit = rdata_is_signed & rdata_sign_bits[rdata_offset] | rdata_is_fp_signed;
// select correct sign bit in parallel to result shifter above
// pull to 0 if unsigned
assign rdata_sign_bit = rdata_is_signed & rdata_sign_bits[rdata_offset] | rdata_is_fp_signed;
// result mux
always_comb begin
unique case (ldbuf_rdata.operation)
ariane_pkg::LW, ariane_pkg::LWU: result_o = {{riscv::XLEN-32{rdata_sign_bit}}, shifted_data[31:0]};
ariane_pkg::LH, ariane_pkg::LHU: result_o = {{riscv::XLEN-32+16{rdata_sign_bit}}, shifted_data[15:0]};
ariane_pkg::LB, ariane_pkg::LBU: result_o = {{riscv::XLEN-32+24{rdata_sign_bit}}, shifted_data[7:0]};
ariane_pkg::FLW: begin
if(CVA6Cfg.FpPresent) begin
result_o = {{riscv::XLEN-32{rdata_sign_bit}}, shifted_data[31:0]};
end
end
ariane_pkg::FLH: begin
if(CVA6Cfg.FpPresent) begin
result_o = {{riscv::XLEN-32+16{rdata_sign_bit}}, shifted_data[15:0]};
end
end
ariane_pkg::FLB: begin
if(CVA6Cfg.FpPresent) begin
result_o = {{riscv::XLEN-32+24{rdata_sign_bit}}, shifted_data[7:0]};
end
end
// result mux
always_comb begin
unique case (ldbuf_rdata.operation)
ariane_pkg::LW, ariane_pkg::LWU:
result_o = {{riscv::XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]};
ariane_pkg::LH, ariane_pkg::LHU:
result_o = {{riscv::XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]};
ariane_pkg::LB, ariane_pkg::LBU:
result_o = {{riscv::XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]};
ariane_pkg::FLW: begin
if (CVA6Cfg.FpPresent) begin
result_o = {{riscv::XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]};
end
end
ariane_pkg::FLH: begin
if (CVA6Cfg.FpPresent) begin
result_o = {{riscv::XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]};
end
end
ariane_pkg::FLB: begin
if (CVA6Cfg.FpPresent) begin
result_o = {{riscv::XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]};
end
end
default: result_o = shifted_data[riscv::XLEN-1:0];
endcase
end
// end result mux fast
default: result_o = shifted_data[riscv::XLEN-1:0];
endcase
end
// end result mux fast
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
//pragma translate_off
`ifndef VERILATOR
initial assert (ariane_pkg::DCACHE_TID_WIDTH >= REQ_ID_BITS) else
$fatal(1, "CVA6ConfigDcacheIdWidth parameter is not wide enough to encode pending loads");
// check invalid offsets, but only issue a warning as these conditions actually trigger a load address misaligned exception
addr_offset0: assert property (@(posedge clk_i) disable iff (~rst_ni)
ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LW, ariane_pkg::LWU}) |-> ldbuf_wdata.address_offset < 5) else
$fatal(1, "invalid address offset used with {LW, LWU}");
addr_offset1: assert property (@(posedge clk_i) disable iff (~rst_ni)
ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LH, ariane_pkg::LHU}) |-> ldbuf_wdata.address_offset < 7) else
$fatal(1, "invalid address offset used with {LH, LHU}");
addr_offset2: assert property (@(posedge clk_i) disable iff (~rst_ni)
ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LB, ariane_pkg::LBU}) |-> ldbuf_wdata.address_offset < 8) else
$fatal(1, "invalid address offset used with {LB, LBU}");
initial
assert (ariane_pkg::DCACHE_TID_WIDTH >= REQ_ID_BITS)
else $fatal(1, "CVA6ConfigDcacheIdWidth parameter is not wide enough to encode pending loads");
// check invalid offsets, but only issue a warning as these conditions actually trigger a load address misaligned exception
addr_offset0 :
assert property (@(posedge clk_i) disable iff (~rst_ni)
ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LW, ariane_pkg::LWU}) |-> ldbuf_wdata.address_offset < 5)
else $fatal(1, "invalid address offset used with {LW, LWU}");
addr_offset1 :
assert property (@(posedge clk_i) disable iff (~rst_ni)
ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LH, ariane_pkg::LHU}) |-> ldbuf_wdata.address_offset < 7)
else $fatal(1, "invalid address offset used with {LH, LHU}");
addr_offset2 :
assert property (@(posedge clk_i) disable iff (~rst_ni)
ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LB, ariane_pkg::LBU}) |-> ldbuf_wdata.address_offset < 8)
else $fatal(1, "invalid address offset used with {LB, LBU}");
`endif
//pragma translate_on
//pragma translate_on
endmodule

View file

@ -23,99 +23,100 @@
// the LSU control should sample it and store it for later application to the units. It does so, by storing it in a
// two element FIFO. This is necessary as we only know very late in the cycle whether the load/store will succeed (address check,
// TLB hit mainly). So we better unconditionally allow another request to arrive and store this request in case we need to.
module lsu_bypass import ariane_pkg::*; #(
module lsu_bypass
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input lsu_ctrl_t lsu_req_i,
input logic lsu_req_valid_i,
input logic pop_ld_i,
input logic pop_st_i,
input lsu_ctrl_t lsu_req_i,
input logic lsu_req_valid_i,
input logic pop_ld_i,
input logic pop_st_i,
output lsu_ctrl_t lsu_ctrl_o,
output logic ready_o
);
);
lsu_ctrl_t [1:0] mem_n, mem_q;
logic read_pointer_n, read_pointer_q;
logic write_pointer_n, write_pointer_q;
logic [1:0] status_cnt_n, status_cnt_q;
lsu_ctrl_t [1:0] mem_n, mem_q;
logic read_pointer_n, read_pointer_q;
logic write_pointer_n, write_pointer_q;
logic [1:0] status_cnt_n, status_cnt_q;
logic empty;
assign empty = (status_cnt_q == 0);
assign ready_o = empty;
logic empty;
assign empty = (status_cnt_q == 0);
assign ready_o = empty;
always_comb begin
automatic logic [1:0] status_cnt;
automatic logic write_pointer;
automatic logic read_pointer;
always_comb begin
automatic logic [1:0] status_cnt;
automatic logic write_pointer;
automatic logic read_pointer;
status_cnt = status_cnt_q;
write_pointer = write_pointer_q;
read_pointer = read_pointer_q;
status_cnt = status_cnt_q;
write_pointer = write_pointer_q;
read_pointer = read_pointer_q;
mem_n = mem_q;
// we've got a valid LSU request
if (lsu_req_valid_i) begin
mem_n[write_pointer_q] = lsu_req_i;
write_pointer++;
status_cnt++;
end
if (pop_ld_i) begin
// invalidate the result
mem_n[read_pointer_q].valid = 1'b0;
read_pointer++;
status_cnt--;
end
if (pop_st_i) begin
// invalidate the result
mem_n[read_pointer_q].valid = 1'b0;
read_pointer++;
status_cnt--;
end
if (pop_st_i && pop_ld_i)
mem_n = '0;
if (flush_i) begin
status_cnt = '0;
write_pointer = '0;
read_pointer = '0;
mem_n = '0;
end
// default assignments
read_pointer_n = read_pointer;
write_pointer_n = write_pointer;
status_cnt_n = status_cnt;
mem_n = mem_q;
// we've got a valid LSU request
if (lsu_req_valid_i) begin
mem_n[write_pointer_q] = lsu_req_i;
write_pointer++;
status_cnt++;
end
// output assignment
always_comb begin : output_assignments
if (empty) begin
lsu_ctrl_o = lsu_req_i;
end else begin
lsu_ctrl_o = mem_q[read_pointer_q];
end
if (pop_ld_i) begin
// invalidate the result
mem_n[read_pointer_q].valid = 1'b0;
read_pointer++;
status_cnt--;
end
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mem_q <= '0;
status_cnt_q <= '0;
write_pointer_q <= '0;
read_pointer_q <= '0;
end else begin
mem_q <= mem_n;
status_cnt_q <= status_cnt_n;
write_pointer_q <= write_pointer_n;
read_pointer_q <= read_pointer_n;
end
if (pop_st_i) begin
// invalidate the result
mem_n[read_pointer_q].valid = 1'b0;
read_pointer++;
status_cnt--;
end
if (pop_st_i && pop_ld_i) mem_n = '0;
if (flush_i) begin
status_cnt = '0;
write_pointer = '0;
read_pointer = '0;
mem_n = '0;
end
// default assignments
read_pointer_n = read_pointer;
write_pointer_n = write_pointer;
status_cnt_n = status_cnt;
end
// output assignment
always_comb begin : output_assignments
if (empty) begin
lsu_ctrl_o = lsu_req_i;
end else begin
lsu_ctrl_o = mem_q[read_pointer_q];
end
end
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mem_q <= '0;
status_cnt_q <= '0;
write_pointer_q <= '0;
read_pointer_q <= '0;
end else begin
mem_q <= mem_n;
status_cnt_q <= status_cnt_n;
write_pointer_q <= write_pointer_n;
read_pointer_q <= read_pointer_n;
end
end
endmodule

View file

@ -26,495 +26,538 @@
// 2020-02-17 0.1 S.Jacq MMU Sv32 for CV32A6
// =========================================================================== //
module cva6_mmu_sv32 import ariane_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned INSTR_TLB_ENTRIES = 2,
parameter int unsigned DATA_TLB_ENTRIES = 2,
parameter int unsigned ASID_WIDTH = 1
module cva6_mmu_sv32
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned INSTR_TLB_ENTRIES = 2,
parameter int unsigned DATA_TLB_ENTRIES = 2,
parameter int unsigned ASID_WIDTH = 1
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic enable_translation_i,
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic enable_translation_i,
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
// IF interface
input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o,
input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o,
// LSU interface
// this is a more minimalistic interface because the actual addressing logic is handled
// in the LSU as we distinguish load and stores, what we do here is simple address translation
input exception_t misaligned_ex_i,
input logic lsu_req_i, // request address translation
input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in
input logic lsu_is_store_i, // the translation is requested by a store
input exception_t misaligned_ex_i,
input logic lsu_req_i, // request address translation
input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in
input logic lsu_is_store_i, // the translation is requested by a store
// if we need to walk the page table we can't grant in the same cycle
// Cycle 0
output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB
output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit)
output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit)
// Cycle 1
output logic lsu_valid_o, // translation is valid
output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address
output exception_t lsu_exception_o, // address translation threw an exception
output logic lsu_valid_o, // translation is valid
output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address
output exception_t lsu_exception_o, // address translation threw an exception
// General control signals
input riscv::priv_lvl_t priv_lvl_i,
input riscv::priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i,
input logic mxr_i,
input riscv::priv_lvl_t priv_lvl_i,
input riscv::priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i,
input logic mxr_i,
// input logic flag_mprv_i,
input logic [riscv::PPNW-1:0] satp_ppn_i,
input logic [ASID_WIDTH-1:0] asid_i,
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
input logic flush_tlb_i,
input logic [riscv::PPNW-1:0] satp_ppn_i,
input logic [ASID_WIDTH-1:0] asid_i,
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
input logic flush_tlb_i,
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
output logic itlb_miss_o,
output logic dtlb_miss_o,
// PTW memory interface
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o,
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o,
// PMP
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i
);
logic iaccess_err; // insufficient privilege to access this instruction page
logic daccess_err; // insufficient privilege to access this data page
logic ptw_active; // PTW is currently walking a page table
logic walking_instr; // PTW is walking because of an ITLB miss
logic ptw_error; // PTW threw an exception
logic ptw_access_exception; // PTW threw an access exception (PMPs)
logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr
logic iaccess_err; // insufficient privilege to access this instruction page
logic daccess_err; // insufficient privilege to access this data page
logic ptw_active; // PTW is currently walking a page table
logic walking_instr; // PTW is walking because of an ITLB miss
logic ptw_error; // PTW threw an exception
logic ptw_access_exception; // PTW threw an access exception (PMPs)
logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr
logic [riscv::VLEN-1:0] update_vaddr;
tlb_update_sv32_t update_itlb, update_dtlb, update_shared_tlb;
logic [riscv::VLEN-1:0] update_vaddr;
tlb_update_sv32_t update_itlb, update_dtlb, update_shared_tlb;
logic itlb_lu_access;
riscv::pte_sv32_t itlb_content;
logic itlb_is_4M;
logic itlb_lu_hit;
logic itlb_lu_access;
riscv::pte_sv32_t itlb_content;
logic itlb_is_4M;
logic itlb_lu_hit;
logic dtlb_lu_access;
riscv::pte_sv32_t dtlb_content;
logic dtlb_is_4M;
logic dtlb_lu_hit;
logic dtlb_lu_access;
riscv::pte_sv32_t dtlb_content;
logic dtlb_is_4M;
logic dtlb_lu_hit;
logic shared_tlb_access;
logic [riscv::VLEN-1:0] shared_tlb_vaddr;
logic shared_tlb_hit;
logic shared_tlb_access;
logic [riscv::VLEN-1:0] shared_tlb_vaddr;
logic shared_tlb_hit;
logic itlb_req;
logic itlb_req;
// Assignments
assign itlb_lu_access = icache_areq_i.fetch_req;
assign dtlb_lu_access = lsu_req_i;
// Assignments
assign itlb_lu_access = icache_areq_i.fetch_req;
assign dtlb_lu_access = lsu_req_i;
cva6_tlb_sv32 #(
.CVA6Cfg ( CVA6Cfg ),
.TLB_ENTRIES ( INSTR_TLB_ENTRIES ),
.ASID_WIDTH ( ASID_WIDTH )
) i_itlb (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_tlb_i ),
cva6_tlb_sv32 #(
.CVA6Cfg (CVA6Cfg),
.TLB_ENTRIES(INSTR_TLB_ENTRIES),
.ASID_WIDTH (ASID_WIDTH)
) i_itlb (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(flush_tlb_i),
.update_i ( update_itlb ),
.update_i(update_itlb),
.lu_access_i ( itlb_lu_access ),
.lu_asid_i ( asid_i ),
.asid_to_be_flushed_i ( asid_to_be_flushed_i ),
.vaddr_to_be_flushed_i ( vaddr_to_be_flushed_i ),
.lu_vaddr_i ( icache_areq_i.fetch_vaddr ),
.lu_content_o ( itlb_content ),
.lu_access_i (itlb_lu_access),
.lu_asid_i (asid_i),
.asid_to_be_flushed_i (asid_to_be_flushed_i),
.vaddr_to_be_flushed_i(vaddr_to_be_flushed_i),
.lu_vaddr_i (icache_areq_i.fetch_vaddr),
.lu_content_o (itlb_content),
.lu_is_4M_o ( itlb_is_4M ),
.lu_hit_o ( itlb_lu_hit )
);
.lu_is_4M_o(itlb_is_4M),
.lu_hit_o (itlb_lu_hit)
);
cva6_tlb_sv32 #(
.CVA6Cfg ( CVA6Cfg ),
.TLB_ENTRIES ( DATA_TLB_ENTRIES ),
.ASID_WIDTH ( ASID_WIDTH )
) i_dtlb (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_tlb_i ),
cva6_tlb_sv32 #(
.CVA6Cfg (CVA6Cfg),
.TLB_ENTRIES(DATA_TLB_ENTRIES),
.ASID_WIDTH (ASID_WIDTH)
) i_dtlb (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(flush_tlb_i),
.update_i ( update_dtlb ),
.update_i(update_dtlb),
.lu_access_i ( dtlb_lu_access ),
.lu_asid_i ( asid_i ),
.asid_to_be_flushed_i ( asid_to_be_flushed_i ),
.vaddr_to_be_flushed_i ( vaddr_to_be_flushed_i ),
.lu_vaddr_i ( lsu_vaddr_i ),
.lu_content_o ( dtlb_content ),
.lu_access_i (dtlb_lu_access),
.lu_asid_i (asid_i),
.asid_to_be_flushed_i (asid_to_be_flushed_i),
.vaddr_to_be_flushed_i(vaddr_to_be_flushed_i),
.lu_vaddr_i (lsu_vaddr_i),
.lu_content_o (dtlb_content),
.lu_is_4M_o ( dtlb_is_4M ),
.lu_hit_o ( dtlb_lu_hit )
);
.lu_is_4M_o(dtlb_is_4M),
.lu_hit_o (dtlb_lu_hit)
);
cva6_shared_tlb_sv32 #(
.CVA6Cfg ( CVA6Cfg ),
.SHARED_TLB_DEPTH ( 64 ),
.SHARED_TLB_WAYS ( 2 ),
.ASID_WIDTH ( ASID_WIDTH )
) i_shared_tlb (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_tlb_i ),
cva6_shared_tlb_sv32 #(
.CVA6Cfg (CVA6Cfg),
.SHARED_TLB_DEPTH(64),
.SHARED_TLB_WAYS (2),
.ASID_WIDTH (ASID_WIDTH)
) i_shared_tlb (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(flush_tlb_i),
.enable_translation_i ( enable_translation_i ),
.en_ld_st_translation_i ( en_ld_st_translation_i),
.enable_translation_i (enable_translation_i),
.en_ld_st_translation_i(en_ld_st_translation_i),
.asid_i (asid_i ),
// from TLBs
// did we miss?
.itlb_access_i ( itlb_lu_access ),
.itlb_hit_i ( itlb_lu_hit ),
.itlb_vaddr_i ( icache_areq_i.fetch_vaddr ),
.asid_i (asid_i),
// from TLBs
// did we miss?
.itlb_access_i(itlb_lu_access),
.itlb_hit_i (itlb_lu_hit),
.itlb_vaddr_i (icache_areq_i.fetch_vaddr),
.dtlb_access_i ( dtlb_lu_access ),
.dtlb_hit_i ( dtlb_lu_hit ),
.dtlb_vaddr_i ( lsu_vaddr_i ),
.dtlb_access_i(dtlb_lu_access),
.dtlb_hit_i (dtlb_lu_hit),
.dtlb_vaddr_i (lsu_vaddr_i),
// to TLBs, update logic
.itlb_update_o ( update_itlb ),
.dtlb_update_o ( update_dtlb ),
// to TLBs, update logic
.itlb_update_o(update_itlb),
.dtlb_update_o(update_dtlb),
// Performance counters
.itlb_miss_o (itlb_miss_o ),
.dtlb_miss_o (dtlb_miss_o ),
// Performance counters
.itlb_miss_o(itlb_miss_o),
.dtlb_miss_o(dtlb_miss_o),
.shared_tlb_access_o ( shared_tlb_access ),
.shared_tlb_hit_o ( shared_tlb_hit ),
.shared_tlb_vaddr_o ( shared_tlb_vaddr ),
.shared_tlb_access_o(shared_tlb_access),
.shared_tlb_hit_o (shared_tlb_hit),
.shared_tlb_vaddr_o (shared_tlb_vaddr),
.itlb_req_o ( itlb_req ),
// to update shared tlb
.shared_tlb_update_i (update_shared_tlb )
);
.itlb_req_o (itlb_req),
// to update shared tlb
.shared_tlb_update_i(update_shared_tlb)
);
cva6_ptw_sv32 #(
.CVA6Cfg ( CVA6Cfg ),
.ASID_WIDTH ( ASID_WIDTH )
) i_ptw (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
cva6_ptw_sv32 #(
.CVA6Cfg (CVA6Cfg),
.ASID_WIDTH(ASID_WIDTH)
) i_ptw (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(flush_i),
.ptw_active_o ( ptw_active ),
.walking_instr_o ( walking_instr ),
.ptw_error_o ( ptw_error ),
.ptw_access_exception_o ( ptw_access_exception ),
.ptw_active_o (ptw_active),
.walking_instr_o (walking_instr),
.ptw_error_o (ptw_error),
.ptw_access_exception_o(ptw_access_exception),
.lsu_is_store_i ( lsu_is_store_i ),
// PTW memory interface
.req_port_i ( req_port_i ),
.req_port_o ( req_port_o ),
.lsu_is_store_i(lsu_is_store_i),
// PTW memory interface
.req_port_i (req_port_i),
.req_port_o (req_port_o),
// to Shared TLB, update logic
.shared_tlb_update_o ( update_shared_tlb ),
// to Shared TLB, update logic
.shared_tlb_update_o(update_shared_tlb),
.update_vaddr_o ( update_vaddr ),
.update_vaddr_o(update_vaddr),
.asid_i ( asid_i ),
.asid_i(asid_i),
// from shared TLB
// did we miss?
.shared_tlb_access_i ( shared_tlb_access ),
.shared_tlb_hit_i ( shared_tlb_hit ),
.shared_tlb_vaddr_i ( shared_tlb_vaddr ),
// from shared TLB
// did we miss?
.shared_tlb_access_i(shared_tlb_access),
.shared_tlb_hit_i (shared_tlb_hit),
.shared_tlb_vaddr_i (shared_tlb_vaddr),
.itlb_req_i ( itlb_req ),
.itlb_req_i(itlb_req),
// from CSR file
.satp_ppn_i ( satp_ppn_i ), // ppn from satp
.mxr_i ( mxr_i ),
// from CSR file
.satp_ppn_i(satp_ppn_i), // ppn from satp
.mxr_i (mxr_i),
// Performance counters
.shared_tlb_miss_o ( ), //open for now
// Performance counters
.shared_tlb_miss_o(), //open for now
// PMP
.pmpcfg_i ( pmpcfg_i ),
.pmpaddr_i ( pmpaddr_i ),
.bad_paddr_o ( ptw_bad_paddr )
// PMP
.pmpcfg_i (pmpcfg_i),
.pmpaddr_i (pmpaddr_i),
.bad_paddr_o(ptw_bad_paddr)
);
);
// ila_1 i_ila_1 (
// .clk(clk_i), // input wire clk
// .probe0({req_port_o.address_tag, req_port_o.address_index}),
// .probe1(req_port_o.data_req), // input wire [63:0] probe1
// .probe2(req_port_i.data_gnt), // input wire [0:0] probe2
// .probe3(req_port_i.data_rdata), // input wire [0:0] probe3
// .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4
// .probe5(ptw_error), // input wire [1:0] probe5
// .probe6(update_vaddr), // input wire [0:0] probe6
// .probe7(update_itlb.valid), // input wire [0:0] probe7
// .probe8(update_dtlb.valid), // input wire [0:0] probe8
// .probe9(dtlb_lu_access), // input wire [0:0] probe9
// .probe10(lsu_vaddr_i), // input wire [0:0] probe10
// .probe11(dtlb_lu_hit), // input wire [0:0] probe11
// .probe12(itlb_lu_access), // input wire [0:0] probe12
// .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13
// .probe14(itlb_lu_hit) // input wire [0:0] probe13
// );
// ila_1 i_ila_1 (
// .clk(clk_i), // input wire clk
// .probe0({req_port_o.address_tag, req_port_o.address_index}),
// .probe1(req_port_o.data_req), // input wire [63:0] probe1
// .probe2(req_port_i.data_gnt), // input wire [0:0] probe2
// .probe3(req_port_i.data_rdata), // input wire [0:0] probe3
// .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4
// .probe5(ptw_error), // input wire [1:0] probe5
// .probe6(update_vaddr), // input wire [0:0] probe6
// .probe7(update_itlb.valid), // input wire [0:0] probe7
// .probe8(update_dtlb.valid), // input wire [0:0] probe8
// .probe9(dtlb_lu_access), // input wire [0:0] probe9
// .probe10(lsu_vaddr_i), // input wire [0:0] probe10
// .probe11(dtlb_lu_hit), // input wire [0:0] probe11
// .probe12(itlb_lu_access), // input wire [0:0] probe12
// .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13
// .probe14(itlb_lu_hit) // input wire [0:0] probe13
// );
//-----------------------
// Instruction Interface
//-----------------------
logic match_any_execute_region;
logic pmp_instr_allow;
//-----------------------
// Instruction Interface
//-----------------------
logic match_any_execute_region;
logic pmp_instr_allow;
// The instruction interface is a simple request response interface
always_comb begin : instr_interface
// MMU disabled: just pass through
icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
if (riscv::PLEN > riscv::VLEN)
icache_areq_o.fetch_paddr = {{riscv::PLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr};// play through in case we disabled address translation
else
icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0];// play through in case we disabled address translation
// two potential exception sources:
// 1. HPTW threw an exception -> signal with a page fault exception
// 2. We got an access error because of insufficient permissions -> throw an access exception
icache_areq_o.fetch_exception = '0;
// Check whether we are allowed to access this memory region from a fetch perspective
iaccess_err = icache_areq_i.fetch_req && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u)
// The instruction interface is a simple request response interface
always_comb begin : instr_interface
// MMU disabled: just pass through
icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
if (riscv::PLEN > riscv::VLEN)
icache_areq_o.fetch_paddr = {
{riscv::PLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr
}; // play through in case we disabled address translation
else
icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0];// play through in case we disabled address translation
// two potential exception sources:
// 1. HPTW threw an exception -> signal with a page fault exception
// 2. We got an access error because of insufficient permissions -> throw an access exception
icache_areq_o.fetch_exception = '0;
// Check whether we are allowed to access this memory region from a fetch perspective
iaccess_err = icache_areq_i.fetch_req && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u)
|| ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u));
// MMU enabled: address from TLB, request delayed until hit. Error when TLB
// hit and no access right or TLB hit and translated address not valid (e.g.
// AXI decode error), or when PTW performs walk due to ITLB miss and raises
// an error.
if (enable_translation_i) begin
// we work with SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal
if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin
icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1};
end
// MMU enabled: address from TLB, request delayed until hit. Error when TLB
// hit and no access right or TLB hit and translated address not valid (e.g.
// AXI decode error), or when PTW performs walk due to ITLB miss and raises
// an error.
if (enable_translation_i) begin
// we work with SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal
if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin
icache_areq_o.fetch_exception = {
riscv::INSTR_ACCESS_FAULT,
{{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr},
1'b1
};
end
icache_areq_o.fetch_valid = 1'b0;
icache_areq_o.fetch_valid = 1'b0;
// 4K page
icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]};
// Mega page
if (itlb_is_4M) begin
icache_areq_o.fetch_paddr[21:12] = icache_areq_i.fetch_vaddr[21:12];
end
// 4K page
icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]};
// Mega page
if (itlb_is_4M) begin
icache_areq_o.fetch_paddr[21:12] = icache_areq_i.fetch_vaddr[21:12];
end
// ---------
// ITLB Hit
// --------
// if we hit the ITLB output the request signal immediately
if (itlb_lu_hit) begin
icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
// we got an access error
if (iaccess_err) begin
// throw a page fault
icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1};//to check on wave --> not connected
end else if (!pmp_instr_allow) begin
icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, icache_areq_i.fetch_vaddr, 1'b1};//to check on wave --> not connected
end
end else
// ---------
// ITLB Miss
// ---------
// watch out for exceptions happening during walking the page table
if (ptw_active && walking_instr) begin
icache_areq_o.fetch_valid = ptw_error | ptw_access_exception;
if (ptw_error) icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, update_vaddr}, 1'b1};//to check on wave
// TODO(moschn,zarubaf): What should the value of tval be in this case?
else icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1};//to check on wave --> not connected
end
end
// if it didn't match any execute region throw an `Instruction Access Fault`
// or: if we are not translating, check PMPs immediately on the paddr
if (!match_any_execute_region || (!enable_translation_i && !pmp_instr_allow)) begin
icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, icache_areq_o.fetch_paddr[riscv::PLEN-1:2], 1'b1};//to check on wave --> not connected
// ---------
// ITLB Hit
// --------
// if we hit the ITLB output the request signal immediately
if (itlb_lu_hit) begin
icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
// we got an access error
if (iaccess_err) begin
// throw a page fault
icache_areq_o.fetch_exception = {
riscv::INSTR_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr},
1'b1
}; //to check on wave --> not connected
end else if (!pmp_instr_allow) begin
icache_areq_o.fetch_exception = {
riscv::INSTR_ACCESS_FAULT, icache_areq_i.fetch_vaddr, 1'b1
}; //to check on wave --> not connected
end
end else
// ---------
// ITLB Miss
// ---------
// watch out for exceptions happening during walking the page table
if (ptw_active && walking_instr) begin
icache_areq_o.fetch_valid = ptw_error | ptw_access_exception;
if (ptw_error)
icache_areq_o.fetch_exception = {
riscv::INSTR_PAGE_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1
}; //to check on wave
// TODO(moschn,zarubaf): What should the value of tval be in this case?
else
icache_areq_o.fetch_exception = {
riscv::INSTR_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1
}; //to check on wave --> not connected
end
end
// if it didn't match any execute region throw an `Instruction Access Fault`
// or: if we are not translating, check PMPs immediately on the paddr
if (!match_any_execute_region || (!enable_translation_i && !pmp_instr_allow)) begin
icache_areq_o.fetch_exception = {
riscv::INSTR_ACCESS_FAULT, icache_areq_o.fetch_paddr[riscv::PLEN-1:2], 1'b1
}; //to check on wave --> not connected
end
end
// check for execute flag on memory
assign match_any_execute_region = config_pkg::is_inside_execute_regions(CVA6Cfg, {{64-riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr});
// check for execute flag on memory
assign match_any_execute_region = config_pkg::is_inside_execute_regions(
CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}
);
// Instruction fetch
pmp #(
.PLEN ( riscv::PLEN ),
.PMP_LEN ( riscv::PLEN - 2 ),
.NR_ENTRIES ( CVA6Cfg.NrPMPEntries )
) i_pmp_if (
.addr_i ( icache_areq_o.fetch_paddr ),
.priv_lvl_i,
// we will always execute on the instruction fetch port
.access_type_i ( riscv::ACCESS_EXEC ),
// Configuration
.conf_addr_i ( pmpaddr_i ),
.conf_i ( pmpcfg_i ),
.allow_o ( pmp_instr_allow )
);
// Instruction fetch
pmp #(
.PLEN (riscv::PLEN),
.PMP_LEN (riscv::PLEN - 2),
.NR_ENTRIES(CVA6Cfg.NrPMPEntries)
) i_pmp_if (
.addr_i (icache_areq_o.fetch_paddr),
.priv_lvl_i,
// we will always execute on the instruction fetch port
.access_type_i(riscv::ACCESS_EXEC),
// Configuration
.conf_addr_i (pmpaddr_i),
.conf_i (pmpcfg_i),
.allow_o (pmp_instr_allow)
);
//-----------------------
// Data Interface
//-----------------------
logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q;
riscv::pte_sv32_t dtlb_pte_n, dtlb_pte_q;
exception_t misaligned_ex_n, misaligned_ex_q;
logic lsu_req_n, lsu_req_q;
logic lsu_is_store_n, lsu_is_store_q;
logic dtlb_hit_n, dtlb_hit_q;
logic dtlb_is_4M_n, dtlb_is_4M_q;
//-----------------------
// Data Interface
//-----------------------
logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q;
riscv::pte_sv32_t dtlb_pte_n, dtlb_pte_q;
exception_t misaligned_ex_n, misaligned_ex_q;
logic lsu_req_n, lsu_req_q;
logic lsu_is_store_n, lsu_is_store_q;
logic dtlb_hit_n, dtlb_hit_q;
logic dtlb_is_4M_n, dtlb_is_4M_q;
// check if we need to do translation or if we are always ready (e.g.: we are not translating anything)
assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1;
// check if we need to do translation or if we are always ready (e.g.: we are not translating anything)
assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1;
// Wires to PMP checks
riscv::pmp_access_t pmp_access_type;
logic pmp_data_allow;
localparam PPNWMin = (riscv::PPNW-1 > 29) ? 29 : riscv::PPNW-1;
// The data interface is simpler and only consists of a request/response interface
always_comb begin : data_interface
// save request and DTLB response
lsu_vaddr_n = lsu_vaddr_i;
lsu_req_n = lsu_req_i;
misaligned_ex_n = misaligned_ex_i;
dtlb_pte_n = dtlb_content;
dtlb_hit_n = dtlb_lu_hit;
lsu_is_store_n = lsu_is_store_i;
dtlb_is_4M_n = dtlb_is_4M;
// Wires to PMP checks
riscv::pmp_access_t pmp_access_type;
logic pmp_data_allow;
localparam PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1;
// The data interface is simpler and only consists of a request/response interface
always_comb begin : data_interface
// save request and DTLB response
lsu_vaddr_n = lsu_vaddr_i;
lsu_req_n = lsu_req_i;
misaligned_ex_n = misaligned_ex_i;
dtlb_pte_n = dtlb_content;
dtlb_hit_n = dtlb_lu_hit;
lsu_is_store_n = lsu_is_store_i;
dtlb_is_4M_n = dtlb_is_4M;
if (riscv::PLEN > riscv::VLEN) begin
lsu_paddr_o = {{riscv::PLEN-riscv::VLEN{1'b0}}, lsu_vaddr_q};
lsu_dtlb_ppn_o = {{riscv::PLEN-riscv::VLEN{1'b0}},lsu_vaddr_n[riscv::VLEN-1:12]};
if (riscv::PLEN > riscv::VLEN) begin
lsu_paddr_o = {{riscv::PLEN - riscv::VLEN{1'b0}}, lsu_vaddr_q};
lsu_dtlb_ppn_o = {{riscv::PLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n[riscv::VLEN-1:12]};
end else begin
lsu_paddr_o = lsu_vaddr_q[riscv::PLEN-1:0];
lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PPNW-1:0];
end
lsu_valid_o = lsu_req_q;
lsu_exception_o = misaligned_ex_q;
pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ;
// mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions
misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i;
// Check if the User flag is set, then we may only access it in supervisor mode
// if SUM is enabled
daccess_err = (ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode
(ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u); // this is not a user page but we are in user mode and trying to access it
// translation is enabled and no misaligned exception occurred
if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin
lsu_valid_o = 1'b0;
// 4K page
lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]};
lsu_dtlb_ppn_o = dtlb_content.ppn;
// Mega page
if (dtlb_is_4M_q) begin
lsu_paddr_o[21:12] = lsu_vaddr_q[21:12];
lsu_dtlb_ppn_o[21:12] = lsu_vaddr_n[21:12];
end
// ---------
// DTLB Hit
// --------
if (dtlb_hit_q && lsu_req_q) begin
lsu_valid_o = 1'b1;
// exception priority:
// PAGE_FAULTS have higher priority than ACCESS_FAULTS
// virtual memory based exceptions are PAGE_FAULTS
// physical memory based exceptions are ACCESS_FAULTS (PMA/PMP)
// this is a store
if (lsu_is_store_q) begin
// check if the page is write-able and we are not violating privileges
// also check if the dirty flag is set
if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin
lsu_exception_o = {
riscv::STORE_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
1'b1
}; //to check on wave
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {
riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1
}; //only 32 bits on 34b of lsu_paddr_o are returned.
end
// this is a load
end else begin
lsu_paddr_o = lsu_vaddr_q[riscv::PLEN-1:0];
lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PPNW-1:0];
// check for sufficient access privileges - throw a page fault if necessary
if (daccess_err) begin
lsu_exception_o = {
riscv::LOAD_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
1'b1
};
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {
riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1
}; //only 32 bits on 34b of lsu_paddr_o are returned.
end
end
lsu_valid_o = lsu_req_q;
lsu_exception_o = misaligned_ex_q;
pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ;
end else
// mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions
misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i;
// Check if the User flag is set, then we may only access it in supervisor mode
// if SUM is enabled
daccess_err = (ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode
(ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u); // this is not a user page but we are in user mode and trying to access it
// translation is enabled and no misaligned exception occurred
if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin
lsu_valid_o = 1'b0;
// 4K page
lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]};
lsu_dtlb_ppn_o = dtlb_content.ppn;
// Mega page
if (dtlb_is_4M_q) begin
lsu_paddr_o[21:12] = lsu_vaddr_q[21:12];
lsu_dtlb_ppn_o[21:12] = lsu_vaddr_n[21:12];
end
// ---------
// DTLB Hit
// --------
if (dtlb_hit_q && lsu_req_q) begin
lsu_valid_o = 1'b1;
// exception priority:
// PAGE_FAULTS have higher priority than ACCESS_FAULTS
// virtual memory based exceptions are PAGE_FAULTS
// physical memory based exceptions are ACCESS_FAULTS (PMA/PMP)
// this is a store
if (lsu_is_store_q) begin
// check if the page is write-able and we are not violating privileges
// also check if the dirty flag is set
if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin
lsu_exception_o = {riscv::STORE_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1}; //to check on wave
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; //only 32 bits on 34b of lsu_paddr_o are returned.
end
// this is a load
end else begin
// check for sufficient access privileges - throw a page fault if necessary
if (daccess_err) begin
lsu_exception_o = {riscv::LOAD_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1};
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; //only 32 bits on 34b of lsu_paddr_o are returned.
end
end
end else
// ---------
// DTLB Miss
// ---------
// watch out for exceptions
if (ptw_active && !walking_instr) begin
// page table walker threw an exception
if (ptw_error) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// the page table walker can only throw page faults
if (lsu_is_store_q) begin
lsu_exception_o = {riscv::STORE_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},update_vaddr}, 1'b1};
end else begin
lsu_exception_o = {riscv::LOAD_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},update_vaddr}, 1'b1};
end
end
if (ptw_access_exception) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// the page table walker can only throw page faults
lsu_exception_o = {riscv::LD_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1};
end
end
// ---------
// DTLB Miss
// ---------
// watch out for exceptions
if (ptw_active && !walking_instr) begin
// page table walker threw an exception
if (ptw_error) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// the page table walker can only throw page faults
if (lsu_is_store_q) begin
lsu_exception_o = {
riscv::STORE_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr},
1'b1
};
end else begin
lsu_exception_o = {
riscv::LOAD_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr},
1'b1
};
end
end
// If translation is not enabled, check the paddr immediately against PMPs
else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin
if (lsu_is_store_q) begin
lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1};
end else begin
lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1};
end
if (ptw_access_exception) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// the page table walker can only throw page faults
lsu_exception_o = {riscv::LD_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1};
end
end
end // If translation is not enabled, check the paddr immediately against PMPs
else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin
if (lsu_is_store_q) begin
lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1};
end else begin
lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1};
end
end
end
// Load/store PMP check
pmp #(
.PLEN ( riscv::PLEN ),
.PMP_LEN ( riscv::PLEN - 2 ),
.NR_ENTRIES ( CVA6Cfg.NrPMPEntries )
) i_pmp_data (
.addr_i ( lsu_paddr_o ),
.priv_lvl_i ( ld_st_priv_lvl_i ),
.access_type_i ( pmp_access_type ),
// Configuration
.conf_addr_i ( pmpaddr_i ),
.conf_i ( pmpcfg_i ),
.allow_o ( pmp_data_allow )
);
// Load/store PMP check
pmp #(
.PLEN (riscv::PLEN),
.PMP_LEN (riscv::PLEN - 2),
.NR_ENTRIES(CVA6Cfg.NrPMPEntries)
) i_pmp_data (
.addr_i (lsu_paddr_o),
.priv_lvl_i (ld_st_priv_lvl_i),
.access_type_i(pmp_access_type),
// Configuration
.conf_addr_i (pmpaddr_i),
.conf_i (pmpcfg_i),
.allow_o (pmp_data_allow)
);
// ----------
// Registers
// ----------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
lsu_vaddr_q <= '0;
lsu_req_q <= '0;
misaligned_ex_q <= '0;
dtlb_pte_q <= '0;
dtlb_hit_q <= '0;
lsu_is_store_q <= '0;
dtlb_is_4M_q <= '0;
end else begin
lsu_vaddr_q <= lsu_vaddr_n;
lsu_req_q <= lsu_req_n;
misaligned_ex_q <= misaligned_ex_n;
dtlb_pte_q <= dtlb_pte_n;
dtlb_hit_q <= dtlb_hit_n;
lsu_is_store_q <= lsu_is_store_n;
dtlb_is_4M_q <= dtlb_is_4M_n;
end
// ----------
// Registers
// ----------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
lsu_vaddr_q <= '0;
lsu_req_q <= '0;
misaligned_ex_q <= '0;
dtlb_pte_q <= '0;
dtlb_hit_q <= '0;
lsu_is_store_q <= '0;
dtlb_is_4M_q <= '0;
end else begin
lsu_vaddr_q <= lsu_vaddr_n;
lsu_req_q <= lsu_req_n;
misaligned_ex_q <= misaligned_ex_n;
dtlb_pte_q <= dtlb_pte_n;
dtlb_hit_q <= dtlb_hit_n;
lsu_is_store_q <= lsu_is_store_n;
dtlb_is_4M_q <= dtlb_is_4M_n;
end
end
endmodule

View file

@ -26,374 +26,375 @@
/* verilator lint_off WIDTH */
module cva6_ptw_sv32 import ariane_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int ASID_WIDTH = 1
module cva6_ptw_sv32
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int ASID_WIDTH = 1
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush everything, we need to do this because
// actually everything we do is speculative at this stage
// e.g.: there could be a CSR instruction that changes everything
output logic ptw_active_o,
output logic walking_instr_o, // set when walking for TLB
output logic ptw_error_o, // set when an error occurred
output logic ptw_access_exception_o, // set when an PMP access exception occured
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush everything, we need to do this because
// actually everything we do is speculative at this stage
// e.g.: there could be a CSR instruction that changes everything
output logic ptw_active_o,
output logic walking_instr_o, // set when walking for TLB
output logic ptw_error_o, // set when an error occurred
output logic ptw_access_exception_o, // set when an PMP access exception occured
input logic lsu_is_store_i, // this translation was triggered by a store
input logic lsu_is_store_i, // this translation was triggered by a store
// PTW memory interface
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o,
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o,
// to Shared TLB, update logic
output tlb_update_sv32_t shared_tlb_update_o,
output tlb_update_sv32_t shared_tlb_update_o,
output logic [riscv::VLEN-1:0] update_vaddr_o,
output logic [riscv::VLEN-1:0] update_vaddr_o,
input logic [ASID_WIDTH-1:0] asid_i,
input logic [ASID_WIDTH-1:0] asid_i,
// from shared TLB
input logic shared_tlb_access_i,
input logic shared_tlb_hit_i,
input logic [riscv::VLEN-1:0] shared_tlb_vaddr_i,
input logic shared_tlb_access_i,
input logic shared_tlb_hit_i,
input logic [riscv::VLEN-1:0] shared_tlb_vaddr_i,
input logic itlb_req_i,
input logic itlb_req_i,
// from CSR file
input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp
input logic mxr_i,
input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp
input logic mxr_i,
// Performance counters
output logic shared_tlb_miss_o,
output logic shared_tlb_miss_o,
// PMP
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
output logic [riscv::PLEN-1:0] bad_paddr_o
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
output logic [riscv::PLEN-1:0] bad_paddr_o
);
// input registers
logic data_rvalid_q;
riscv::xlen_t data_rdata_q;
riscv::pte_sv32_t pte;
assign pte = riscv::pte_sv32_t'(data_rdata_q);
enum logic [2:0] {
IDLE,
WAIT_GRANT,
PTE_LOOKUP,
WAIT_RVALID,
PROPAGATE_ERROR,
PROPAGATE_ACCESS_ERROR,
LATENCY
}
state_q, state_d;
// SV32 defines two levels of page tables
enum logic {
LVL1,
LVL2
}
ptw_lvl_q, ptw_lvl_n;
// is this an instruction page table walk?
logic is_instr_ptw_q, is_instr_ptw_n;
logic global_mapping_q, global_mapping_n;
// latched tag signal
logic tag_valid_n, tag_valid_q;
// register the ASID
logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n;
// register the VPN we need to walk, SV32 defines a 32 bit virtual address
logic [riscv::VLEN-1:0] vaddr_q, vaddr_n;
// 4 byte aligned physical pointer
logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n;
// Assignments
assign update_vaddr_o = vaddr_q;
assign ptw_active_o = (state_q != IDLE);
//assign walking_instr_o = is_instr_ptw_q;
assign walking_instr_o = is_instr_ptw_q;
// directly output the correct physical address
assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0];
assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH];
// we are never going to kill this request
assign req_port_o.kill_req = '0;
// we are never going to write with the HPTW
assign req_port_o.data_wdata = '0;
// we only issue one single request at a time
assign req_port_o.data_id = '0;
// -----------
// Shared TLB Update
// -----------
assign shared_tlb_update_o.vpn = vaddr_q[riscv::SV-1:12];
// update the correct page table level
assign shared_tlb_update_o.is_4M = (ptw_lvl_q == LVL1);
// output the correct ASID
assign shared_tlb_update_o.asid = tlb_update_asid_q;
// set the global mapping bit
assign shared_tlb_update_o.content = pte | (global_mapping_q << 5);
assign req_port_o.tag_valid = tag_valid_q;
logic allow_access;
assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0;
pmp #(
.CVA6Cfg (CVA6Cfg),
.PLEN (riscv::PLEN),
.PMP_LEN (riscv::PLEN - 2),
.NR_ENTRIES(CVA6Cfg.NrPMPEntries)
) i_pmp_ptw (
.addr_i (ptw_pptr_q),
// PTW access are always checked as if in S-Mode...
.priv_lvl_i (riscv::PRIV_LVL_S),
// ...and they are always loads
.access_type_i(riscv::ACCESS_READ),
// Configuration
.conf_addr_i (pmpaddr_i),
.conf_i (pmpcfg_i),
.allow_o (allow_access)
);
assign req_port_o.data_be = be_gen_32(req_port_o.address_index[1:0], req_port_o.data_size);
//-------------------
// Page table walker
//-------------------
// A virtual address va is translated into a physical address pa as follows:
// 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
// PAGESIZE=2^12 and LEVELS=3.)
// 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For
// Sv32, PTESIZE=4.)
// 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access
// exception.
// 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5.
// Otherwise, this PTE is a pointer to the next level of the page table.
// Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let
// a = pte.ppn × PAGESIZE and go to step 2.
// 5. A leaf PTE has been found. Determine if the requested memory access
// is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and
// raise an access exception. Otherwise, the translation is successful.
// Set pte.a to 1, and, if the memory access is a store, set pte.d to 1.
// The translated physical address is given as follows:
// - pa.pgoff = va.pgoff.
// - If i > 0, then this is a superpage translation and
// pa.ppn[i-1:0] = va.vpn[i-1:0].
// - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
always_comb begin : ptw
// default assignments
// PTW memory interface
tag_valid_n = 1'b0;
req_port_o.data_req = 1'b0;
req_port_o.data_size = 2'b10;
req_port_o.data_we = 1'b0;
ptw_error_o = 1'b0;
ptw_access_exception_o = 1'b0;
shared_tlb_update_o.valid = 1'b0;
is_instr_ptw_n = is_instr_ptw_q;
ptw_lvl_n = ptw_lvl_q;
ptw_pptr_n = ptw_pptr_q;
state_d = state_q;
global_mapping_n = global_mapping_q;
// input registers
logic data_rvalid_q;
riscv::xlen_t data_rdata_q;
tlb_update_asid_n = tlb_update_asid_q;
vaddr_n = vaddr_q;
riscv::pte_sv32_t pte;
assign pte = riscv::pte_sv32_t'(data_rdata_q);
shared_tlb_miss_o = 1'b0;
case (state_q)
enum logic[2:0] {
IDLE,
WAIT_GRANT,
PTE_LOOKUP,
WAIT_RVALID,
PROPAGATE_ERROR,
PROPAGATE_ACCESS_ERROR,
LATENCY
} state_q, state_d;
IDLE: begin
// by default we start with the top-most page table
ptw_lvl_n = LVL1;
global_mapping_n = 1'b0;
is_instr_ptw_n = 1'b0;
// if we got a Shared TLB miss
if (shared_tlb_access_i & ~shared_tlb_hit_i) begin
ptw_pptr_n = {
satp_ppn_i, shared_tlb_vaddr_i[riscv::SV-1:22], 2'b0
}; // SATP.PPN * PAGESIZE + VPN*PTESIZE = SATP.PPN * 2^(12) + VPN*4
is_instr_ptw_n = itlb_req_i;
tlb_update_asid_n = asid_i;
vaddr_n = shared_tlb_vaddr_i;
state_d = WAIT_GRANT;
shared_tlb_miss_o = 1'b1;
end
end
// SV32 defines two levels of page tables
enum logic {
LVL1, LVL2
} ptw_lvl_q, ptw_lvl_n;
WAIT_GRANT: begin
// send a request out
req_port_o.data_req = 1'b1;
// wait for the WAIT_GRANT
if (req_port_i.data_gnt) begin
// send the tag valid signal one cycle later
tag_valid_n = 1'b1;
state_d = PTE_LOOKUP;
end
end
// is this an instruction page table walk?
logic is_instr_ptw_q, is_instr_ptw_n;
logic global_mapping_q, global_mapping_n;
// latched tag signal
logic tag_valid_n, tag_valid_q;
// register the ASID
logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n;
// register the VPN we need to walk, SV32 defines a 32 bit virtual address
logic [riscv::VLEN-1:0] vaddr_q, vaddr_n;
// 4 byte aligned physical pointer
logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n;
PTE_LOOKUP: begin
// we wait for the valid signal
if (data_rvalid_q) begin
// Assignments
assign update_vaddr_o = vaddr_q;
// check if the global mapping bit is set
if (pte.g) global_mapping_n = 1'b1;
assign ptw_active_o = (state_q != IDLE);
//assign walking_instr_o = is_instr_ptw_q;
assign walking_instr_o = is_instr_ptw_q;
// directly output the correct physical address
assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0];
assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH];
// we are never going to kill this request
assign req_port_o.kill_req = '0;
// we are never going to write with the HPTW
assign req_port_o.data_wdata = '0;
// we only issue one single request at a time
assign req_port_o.data_id = '0;
// -------------
// Invalid PTE
// -------------
// If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception.
if (!pte.v || (!pte.r && pte.w)) state_d = PROPAGATE_ERROR;
// -----------
// Valid PTE
// -----------
else begin
//state_d = IDLE;
state_d = LATENCY;
// it is a valid PTE
// if pte.r = 1 or pte.x = 1 it is a valid PTE
if (pte.r || pte.x) begin
// Valid translation found (either 4M or 4K entry)
if (is_instr_ptw_q) begin
// ------------
// Update ITLB
// ------------
// If page is not executable, we can directly raise an error. This
// doesn't put a useless entry into the TLB. The same idea applies
// to the access flag since we let the access flag be managed by SW.
if (!pte.x || !pte.a) state_d = PROPAGATE_ERROR;
else shared_tlb_update_o.valid = 1'b1;
// -----------
// Shared TLB Update
// -----------
assign shared_tlb_update_o.vpn = vaddr_q[riscv::SV-1:12];
// update the correct page table level
assign shared_tlb_update_o.is_4M = (ptw_lvl_q == LVL1);
// output the correct ASID
assign shared_tlb_update_o.asid = tlb_update_asid_q;
// set the global mapping bit
assign shared_tlb_update_o.content = pte | (global_mapping_q << 5);
assign req_port_o.tag_valid = tag_valid_q;
logic allow_access;
assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0;
pmp #(
.CVA6Cfg ( CVA6Cfg ),
.PLEN ( riscv::PLEN ),
.PMP_LEN ( riscv::PLEN - 2 ),
.NR_ENTRIES ( CVA6Cfg.NrPMPEntries )
) i_pmp_ptw (
.addr_i ( ptw_pptr_q ),
// PTW access are always checked as if in S-Mode...
.priv_lvl_i ( riscv::PRIV_LVL_S ),
// ...and they are always loads
.access_type_i ( riscv::ACCESS_READ ),
// Configuration
.conf_addr_i ( pmpaddr_i ),
.conf_i ( pmpcfg_i ),
.allow_o ( allow_access )
);
assign req_port_o.data_be = be_gen_32(req_port_o.address_index[1:0],req_port_o.data_size );
//-------------------
// Page table walker
//-------------------
// A virtual address va is translated into a physical address pa as follows:
// 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
// PAGESIZE=2^12 and LEVELS=3.)
// 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For
// Sv32, PTESIZE=4.)
// 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access
// exception.
// 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5.
// Otherwise, this PTE is a pointer to the next level of the page table.
// Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let
// a = pte.ppn × PAGESIZE and go to step 2.
// 5. A leaf PTE has been found. Determine if the requested memory access
// is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and
// raise an access exception. Otherwise, the translation is successful.
// Set pte.a to 1, and, if the memory access is a store, set pte.d to 1.
// The translated physical address is given as follows:
// - pa.pgoff = va.pgoff.
// - If i > 0, then this is a superpage translation and
// pa.ppn[i-1:0] = va.vpn[i-1:0].
// - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
always_comb begin : ptw
// default assignments
// PTW memory interface
tag_valid_n = 1'b0;
req_port_o.data_req = 1'b0;
req_port_o.data_size = 2'b10;
req_port_o.data_we = 1'b0;
ptw_error_o = 1'b0;
ptw_access_exception_o = 1'b0;
shared_tlb_update_o.valid = 1'b0;
is_instr_ptw_n = is_instr_ptw_q;
ptw_lvl_n = ptw_lvl_q;
ptw_pptr_n = ptw_pptr_q;
state_d = state_q;
global_mapping_n = global_mapping_q;
// input registers
tlb_update_asid_n = tlb_update_asid_q;
vaddr_n = vaddr_q;
shared_tlb_miss_o = 1'b0;
case (state_q)
IDLE: begin
// by default we start with the top-most page table
ptw_lvl_n = LVL1;
global_mapping_n = 1'b0;
is_instr_ptw_n = 1'b0;
// if we got a Shared TLB miss
if (shared_tlb_access_i & ~shared_tlb_hit_i) begin
ptw_pptr_n = {satp_ppn_i, shared_tlb_vaddr_i[riscv::SV-1:22], 2'b0}; // SATP.PPN * PAGESIZE + VPN*PTESIZE = SATP.PPN * 2^(12) + VPN*4
is_instr_ptw_n = itlb_req_i;
tlb_update_asid_n = asid_i;
vaddr_n = shared_tlb_vaddr_i;
state_d = WAIT_GRANT;
shared_tlb_miss_o = 1'b1;
end else begin
// ------------
// Update DTLB
// ------------
// Check if the access flag has been set, otherwise throw a page-fault
// and let the software handle those bits.
// If page is not readable (there are no write-only pages)
// we can directly raise an error. This doesn't put a useless
// entry into the TLB.
if (pte.a && (pte.r || (pte.x && mxr_i))) begin
shared_tlb_update_o.valid = 1'b1;
end else begin
state_d = PROPAGATE_ERROR;
end
end
WAIT_GRANT: begin
// send a request out
req_port_o.data_req = 1'b1;
// wait for the WAIT_GRANT
if (req_port_i.data_gnt) begin
// send the tag valid signal one cycle later
tag_valid_n = 1'b1;
state_d = PTE_LOOKUP;
// Request is a store: perform some additional checks
// If the request was a store and the page is not write-able, raise an error
// the same applies if the dirty flag is not set
if (lsu_is_store_i && (!pte.w || !pte.d)) begin
shared_tlb_update_o.valid = 1'b0;
state_d = PROPAGATE_ERROR;
end
end
// check if the ppn is correctly aligned:
// 6. If i > 0 and pa.ppn[i 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault
// exception.
if (ptw_lvl_q == LVL1 && pte.ppn[9:0] != '0) begin
state_d = PROPAGATE_ERROR;
shared_tlb_update_o.valid = 1'b0;
end
// this is a pointer to the next TLB level
end else begin
// pointer to next level of page table
if (ptw_lvl_q == LVL1) begin
// we are in the second level now
ptw_lvl_n = LVL2;
ptw_pptr_n = {pte.ppn, vaddr_q[21:12], 2'b0};
end
state_d = WAIT_GRANT;
if (ptw_lvl_q == LVL2) begin
// Should already be the last level page table => Error
ptw_lvl_n = LVL2;
state_d = PROPAGATE_ERROR;
end
end
end
PTE_LOOKUP: begin
// we wait for the valid signal
if (data_rvalid_q) begin
// Check if this access was actually allowed from a PMP perspective
if (!allow_access) begin
shared_tlb_update_o.valid = 1'b0;
// we have to return the failed address in bad_addr
ptw_pptr_n = ptw_pptr_q;
state_d = PROPAGATE_ACCESS_ERROR;
end
end
// we've got a data WAIT_GRANT so tell the cache that the tag is valid
end
// Propagate error to MMU/LSU
PROPAGATE_ERROR: begin
state_d = LATENCY;
ptw_error_o = 1'b1;
end
PROPAGATE_ACCESS_ERROR: begin
state_d = LATENCY;
ptw_access_exception_o = 1'b1;
end
// wait for the rvalid before going back to IDLE
WAIT_RVALID: begin
if (data_rvalid_q) state_d = IDLE;
end
LATENCY: begin
state_d = IDLE;
end
default: begin
state_d = IDLE;
end
endcase
// check if the global mapping bit is set
if (pte.g)
global_mapping_n = 1'b1;
// -------------
// Invalid PTE
// -------------
// If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception.
if (!pte.v || (!pte.r && pte.w))
state_d = PROPAGATE_ERROR;
// -----------
// Valid PTE
// -----------
else begin
//state_d = IDLE;
state_d = LATENCY;
// it is a valid PTE
// if pte.r = 1 or pte.x = 1 it is a valid PTE
if (pte.r || pte.x) begin
// Valid translation found (either 4M or 4K entry)
if (is_instr_ptw_q) begin
// ------------
// Update ITLB
// ------------
// If page is not executable, we can directly raise an error. This
// doesn't put a useless entry into the TLB. The same idea applies
// to the access flag since we let the access flag be managed by SW.
if (!pte.x || !pte.a)
state_d = PROPAGATE_ERROR;
else
shared_tlb_update_o.valid = 1'b1;
end else begin
// ------------
// Update DTLB
// ------------
// Check if the access flag has been set, otherwise throw a page-fault
// and let the software handle those bits.
// If page is not readable (there are no write-only pages)
// we can directly raise an error. This doesn't put a useless
// entry into the TLB.
if (pte.a && (pte.r || (pte.x && mxr_i))) begin
shared_tlb_update_o.valid = 1'b1;
end else begin
state_d = PROPAGATE_ERROR;
end
// Request is a store: perform some additional checks
// If the request was a store and the page is not write-able, raise an error
// the same applies if the dirty flag is not set
if (lsu_is_store_i && (!pte.w || !pte.d)) begin
shared_tlb_update_o.valid = 1'b0;
state_d = PROPAGATE_ERROR;
end
end
// check if the ppn is correctly aligned:
// 6. If i > 0 and pa.ppn[i 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault
// exception.
if (ptw_lvl_q == LVL1 && pte.ppn[9:0] != '0) begin
state_d = PROPAGATE_ERROR;
shared_tlb_update_o.valid = 1'b0;
end
// this is a pointer to the next TLB level
end else begin
// pointer to next level of page table
if (ptw_lvl_q == LVL1) begin
// we are in the second level now
ptw_lvl_n = LVL2;
ptw_pptr_n = {pte.ppn, vaddr_q[21:12], 2'b0};
end
state_d = WAIT_GRANT;
if (ptw_lvl_q == LVL2) begin
// Should already be the last level page table => Error
ptw_lvl_n = LVL2;
state_d = PROPAGATE_ERROR;
end
end
end
// Check if this access was actually allowed from a PMP perspective
if (!allow_access) begin
shared_tlb_update_o.valid = 1'b0;
// we have to return the failed address in bad_addr
ptw_pptr_n = ptw_pptr_q;
state_d = PROPAGATE_ACCESS_ERROR;
end
end
// we've got a data WAIT_GRANT so tell the cache that the tag is valid
end
// Propagate error to MMU/LSU
PROPAGATE_ERROR: begin
state_d = LATENCY;
ptw_error_o = 1'b1;
end
PROPAGATE_ACCESS_ERROR: begin
state_d = LATENCY;
ptw_access_exception_o = 1'b1;
end
// wait for the rvalid before going back to IDLE
WAIT_RVALID: begin
if (data_rvalid_q)
state_d = IDLE;
end
LATENCY: begin
state_d = IDLE;
end
default: begin
state_d = IDLE;
end
endcase
// -------
// Flush
// -------
// should we have flushed before we got an rvalid, wait for it until going back to IDLE
if (flush_i) begin
// on a flush check whether we are
// 1. in the PTE Lookup check whether we still need to wait for an rvalid
// 2. waiting for a grant, if so: wait for it
// if not, go back to idle
if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) ||
// -------
// Flush
// -------
// should we have flushed before we got an rvalid, wait for it until going back to IDLE
if (flush_i) begin
// on a flush check whether we are
// 1. in the PTE Lookup check whether we still need to wait for an rvalid
// 2. waiting for a grant, if so: wait for it
// if not, go back to idle
if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) ||
((state_q == WAIT_GRANT) && req_port_i.data_gnt))
state_d = WAIT_RVALID;
else
state_d = LATENCY;
end
state_d = WAIT_RVALID;
else state_d = LATENCY;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
is_instr_ptw_q <= 1'b0;
ptw_lvl_q <= LVL1;
tag_valid_q <= 1'b0;
tlb_update_asid_q <= '0;
vaddr_q <= '0;
ptw_pptr_q <= '0;
global_mapping_q <= 1'b0;
data_rdata_q <= '0;
data_rvalid_q <= 1'b0;
end else begin
state_q <= state_d;
ptw_pptr_q <= ptw_pptr_n;
is_instr_ptw_q <= is_instr_ptw_n;
ptw_lvl_q <= ptw_lvl_n;
tag_valid_q <= tag_valid_n;
tlb_update_asid_q <= tlb_update_asid_n;
vaddr_q <= vaddr_n;
global_mapping_q <= global_mapping_n;
data_rdata_q <= req_port_i.data_rdata;
data_rvalid_q <= req_port_i.data_rvalid;
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
is_instr_ptw_q <= 1'b0;
ptw_lvl_q <= LVL1;
tag_valid_q <= 1'b0;
tlb_update_asid_q <= '0;
vaddr_q <= '0;
ptw_pptr_q <= '0;
global_mapping_q <= 1'b0;
data_rdata_q <= '0;
data_rvalid_q <= 1'b0;
end else begin
state_q <= state_d;
ptw_pptr_q <= ptw_pptr_n;
is_instr_ptw_q <= is_instr_ptw_n;
ptw_lvl_q <= ptw_lvl_n;
tag_valid_q <= tag_valid_n;
tlb_update_asid_q <= tlb_update_asid_n;
vaddr_q <= vaddr_n;
global_mapping_q <= global_mapping_n;
data_rdata_q <= req_port_i.data_rdata;
data_rvalid_q <= req_port_i.data_rvalid;
end
end
endmodule
/* verilator lint_on WIDTH */

View file

@ -17,348 +17,351 @@
/* verilator lint_off WIDTH */
module cva6_shared_tlb_sv32 import ariane_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int SHARED_TLB_DEPTH = 64,
parameter int SHARED_TLB_WAYS = 2,
parameter int ASID_WIDTH = 1
module cva6_shared_tlb_sv32
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int SHARED_TLB_DEPTH = 64,
parameter int SHARED_TLB_WAYS = 2,
parameter int ASID_WIDTH = 1
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic enable_translation_i, // CSRs indicate to enable SV32
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
input logic enable_translation_i, // CSRs indicate to enable SV32
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
input logic [ASID_WIDTH-1:0] asid_i,
input logic [ASID_WIDTH-1:0] asid_i,
// from TLBs
// did we miss?
input logic itlb_access_i,
input logic itlb_hit_i,
input logic [riscv::VLEN-1:0] itlb_vaddr_i,
input logic itlb_access_i,
input logic itlb_hit_i,
input logic [riscv::VLEN-1:0] itlb_vaddr_i,
input logic dtlb_access_i,
input logic dtlb_hit_i,
input logic [riscv::VLEN-1:0] dtlb_vaddr_i,
input logic dtlb_access_i,
input logic dtlb_hit_i,
input logic [riscv::VLEN-1:0] dtlb_vaddr_i,
// to TLBs, update logic
output tlb_update_sv32_t itlb_update_o,
output tlb_update_sv32_t dtlb_update_o,
output tlb_update_sv32_t itlb_update_o,
output tlb_update_sv32_t dtlb_update_o,
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
output logic itlb_miss_o,
output logic dtlb_miss_o,
output logic shared_tlb_access_o,
output logic shared_tlb_hit_o,
output logic [riscv::VLEN-1:0] shared_tlb_vaddr_o,
output logic shared_tlb_access_o,
output logic shared_tlb_hit_o,
output logic [riscv::VLEN-1:0] shared_tlb_vaddr_o,
output logic itlb_req_o,
output logic itlb_req_o,
// Update shared TLB in case of miss
input tlb_update_sv32_t shared_tlb_update_i
input tlb_update_sv32_t shared_tlb_update_i
);
function logic [SHARED_TLB_WAYS-1:0] shared_tlb_way_bin2oh ( input logic [$clog2(SHARED_TLB_WAYS)-1:0] in);
logic [SHARED_TLB_WAYS-1:0] out;
out = '0;
out[in] = 1'b1;
return out;
endfunction
function logic [SHARED_TLB_WAYS-1:0] shared_tlb_way_bin2oh(input logic [$clog2(SHARED_TLB_WAYS
)-1:0] in);
logic [SHARED_TLB_WAYS-1:0] out;
out = '0;
out[in] = 1'b1;
return out;
endfunction
typedef struct packed {
logic [8:0] asid; //9 bits wide
logic [9:0] vpn1; //10 bits wide
logic [9:0] vpn0; //10 bits wide
logic is_4M;
} shared_tag_t;
typedef struct packed {
logic [8:0] asid; //9 bits wide
logic [9:0] vpn1; //10 bits wide
logic [9:0] vpn0; //10 bits wide
logic is_4M;
} shared_tag_t;
shared_tag_t shared_tag_wr;
shared_tag_t [SHARED_TLB_WAYS-1:0] shared_tag_rd;
shared_tag_t shared_tag_wr;
shared_tag_t [SHARED_TLB_WAYS-1:0] shared_tag_rd;
logic [SHARED_TLB_DEPTH-1:0][SHARED_TLB_WAYS-1:0] shared_tag_valid_q, shared_tag_valid_d ;
logic [SHARED_TLB_DEPTH-1:0][SHARED_TLB_WAYS-1:0] shared_tag_valid_q, shared_tag_valid_d;
logic [SHARED_TLB_WAYS-1:0] shared_tag_valid;
logic [ SHARED_TLB_WAYS-1:0] shared_tag_valid;
logic [SHARED_TLB_WAYS-1:0] tag_wr_en;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_wr_addr;
logic [$bits(shared_tag_t)-1:0] tag_wr_data;
logic [ SHARED_TLB_WAYS-1:0] tag_wr_en;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_wr_addr;
logic [ $bits(shared_tag_t)-1:0] tag_wr_data;
logic [SHARED_TLB_WAYS-1:0] tag_rd_en;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_rd_addr;
logic [$bits(shared_tag_t)-1:0] tag_rd_data [SHARED_TLB_WAYS-1:0];
logic [ SHARED_TLB_WAYS-1:0] tag_rd_en;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_rd_addr;
logic [ $bits(shared_tag_t)-1:0] tag_rd_data [SHARED_TLB_WAYS-1:0];
logic [SHARED_TLB_WAYS-1:0] tag_req;
logic [SHARED_TLB_WAYS-1:0] tag_we;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_addr;
logic [ SHARED_TLB_WAYS-1:0] tag_req;
logic [ SHARED_TLB_WAYS-1:0] tag_we;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_addr;
logic [SHARED_TLB_WAYS-1:0] pte_wr_en;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_wr_addr;
logic [$bits(riscv::pte_sv32_t)-1:0] pte_wr_data;
logic [ SHARED_TLB_WAYS-1:0] pte_wr_en;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_wr_addr;
logic [$bits(riscv::pte_sv32_t)-1:0] pte_wr_data;
logic [SHARED_TLB_WAYS-1:0] pte_rd_en;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_rd_addr;
logic [$bits(riscv::pte_sv32_t)-1:0] pte_rd_data [SHARED_TLB_WAYS-1:0];
logic [ SHARED_TLB_WAYS-1:0] pte_rd_en;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_rd_addr;
logic [$bits(riscv::pte_sv32_t)-1:0] pte_rd_data [SHARED_TLB_WAYS-1:0];
logic [SHARED_TLB_WAYS-1:0] pte_req;
logic [SHARED_TLB_WAYS-1:0] pte_we;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_addr;
logic [ SHARED_TLB_WAYS-1:0] pte_req;
logic [ SHARED_TLB_WAYS-1:0] pte_we;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_addr;
logic [9:0] vpn0_d, vpn1_d, vpn0_q, vpn1_q;
logic [9:0] vpn0_d, vpn1_d, vpn0_q, vpn1_q;
riscv::pte_sv32_t [SHARED_TLB_WAYS-1:0] pte;
riscv::pte_sv32_t [SHARED_TLB_WAYS-1:0] pte;
logic [riscv::VLEN-1-12:0] itlb_vpn_q;
logic [riscv::VLEN-1-12:0] dtlb_vpn_q;
logic [riscv::VLEN-1-12:0] itlb_vpn_q;
logic [riscv::VLEN-1-12:0] dtlb_vpn_q;
logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_d;
logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_d;
logic shared_tlb_access_q, shared_tlb_access_d;
logic shared_tlb_hit_d;
logic [riscv::VLEN-1:0] shared_tlb_vaddr_q, shared_tlb_vaddr_d;
logic shared_tlb_access_q, shared_tlb_access_d;
logic shared_tlb_hit_d;
logic [riscv::VLEN-1:0] shared_tlb_vaddr_q, shared_tlb_vaddr_d;
logic itlb_req_d, itlb_req_q;
logic dtlb_req_d, dtlb_req_q;
logic itlb_req_d, itlb_req_q;
logic dtlb_req_d, dtlb_req_q;
// replacement strategy
logic [SHARED_TLB_WAYS-1:0] way_valid;
logic update_lfsr; // shift the LFSR
logic [$clog2(SHARED_TLB_WAYS)-1:0] inv_way; // first non-valid encountered
logic [$clog2(SHARED_TLB_WAYS)-1:0] rnd_way; // random index for replacement
logic [$clog2(SHARED_TLB_WAYS)-1:0] repl_way; // way to replace
logic [SHARED_TLB_WAYS-1:0] repl_way_oh_d; // way to replace (onehot)
logic all_ways_valid; // we need to switch repl strategy since all are valid
// replacement strategy
logic [SHARED_TLB_WAYS-1:0] way_valid;
logic update_lfsr; // shift the LFSR
logic [$clog2(SHARED_TLB_WAYS)-1:0] inv_way; // first non-valid encountered
logic [$clog2(SHARED_TLB_WAYS)-1:0] rnd_way; // random index for replacement
logic [$clog2(SHARED_TLB_WAYS)-1:0] repl_way; // way to replace
logic [SHARED_TLB_WAYS-1:0] repl_way_oh_d; // way to replace (onehot)
logic all_ways_valid; // we need to switch repl strategy since all are valid
assign shared_tlb_access_o = shared_tlb_access_q;
assign shared_tlb_hit_o = shared_tlb_hit_d;
assign shared_tlb_vaddr_o = shared_tlb_vaddr_q;
assign shared_tlb_access_o = shared_tlb_access_q;
assign shared_tlb_hit_o = shared_tlb_hit_d;
assign shared_tlb_vaddr_o = shared_tlb_vaddr_q;
assign itlb_req_o = itlb_req_q;
assign itlb_req_o = itlb_req_q;
///////////////////////////////////////////////////////
// tag comparison, hit generation
///////////////////////////////////////////////////////
always_comb begin : itlb_dtlb_miss
itlb_miss_o = 1'b0;
dtlb_miss_o = 1'b0;
vpn0_d = vpn0_q;
vpn1_d = vpn1_q;
///////////////////////////////////////////////////////
// tag comparison, hit generation
///////////////////////////////////////////////////////
always_comb begin : itlb_dtlb_miss
itlb_miss_o = 1'b0;
dtlb_miss_o = 1'b0;
vpn0_d = vpn0_q;
vpn1_d = vpn1_q;
tag_rd_en = '0;
pte_rd_en = '0;
tag_rd_en = '0;
pte_rd_en = '0;
itlb_req_d = 1'b0;
dtlb_req_d = 1'b0;
itlb_req_d = 1'b0;
dtlb_req_d = 1'b0;
tlb_update_asid_d = tlb_update_asid_q;
tlb_update_asid_d = tlb_update_asid_q;
shared_tlb_access_d = '0;
shared_tlb_vaddr_d = shared_tlb_vaddr_q;
shared_tlb_access_d = '0;
shared_tlb_vaddr_d = shared_tlb_vaddr_q;
tag_rd_addr = '0;
pte_rd_addr = '0;
tag_rd_addr = '0;
pte_rd_addr = '0;
// if we got an ITLB miss
if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin
tag_rd_en = '1;
tag_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
pte_rd_en = '1;
pte_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
// if we got an ITLB miss
if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin
tag_rd_en = '1;
tag_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
pte_rd_en = '1;
pte_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
vpn0_d = itlb_vaddr_i[21:12];
vpn1_d = itlb_vaddr_i[31:22];
vpn0_d = itlb_vaddr_i[21:12];
vpn1_d = itlb_vaddr_i[31:22];
itlb_miss_o = 1'b1;
itlb_req_d = 1'b1;
itlb_miss_o = 1'b1;
itlb_req_d = 1'b1;
tlb_update_asid_d = asid_i;
tlb_update_asid_d = asid_i;
shared_tlb_access_d = 1'b1;
shared_tlb_vaddr_d = itlb_vaddr_i;
shared_tlb_access_d = 1'b1;
shared_tlb_vaddr_d = itlb_vaddr_i;
// we got an DTLB miss
end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin
tag_rd_en = '1;
tag_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
pte_rd_en = '1;
pte_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
// we got an DTLB miss
end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin
tag_rd_en = '1;
tag_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
pte_rd_en = '1;
pte_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
vpn0_d = dtlb_vaddr_i[21:12];
vpn1_d = dtlb_vaddr_i[31:22];
vpn0_d = dtlb_vaddr_i[21:12];
vpn1_d = dtlb_vaddr_i[31:22];
dtlb_miss_o = 1'b1;
dtlb_req_d = 1'b1;
dtlb_miss_o = 1'b1;
dtlb_req_d = 1'b1;
tlb_update_asid_d = asid_i;
tlb_update_asid_d = asid_i;
shared_tlb_access_d = 1'b1;
shared_tlb_vaddr_d = dtlb_vaddr_i;
shared_tlb_access_d = 1'b1;
shared_tlb_vaddr_d = dtlb_vaddr_i;
end
end //itlb_dtlb_miss
always_comb begin : tag_comparison
shared_tlb_hit_d = 1'b0;
dtlb_update_o = '0;
itlb_update_o = '0;
//number of ways
for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin
if (shared_tag_valid[i] && ((tlb_update_asid_q == shared_tag_rd[i].asid) || pte[i].g) && vpn1_q == shared_tag_rd[i].vpn1) begin
if (shared_tag_rd[i].is_4M || vpn0_q == shared_tag_rd[i].vpn0) begin
shared_tlb_hit_d = 1'b1;
if (itlb_req_q) begin
itlb_update_o.valid = 1'b1;
itlb_update_o.vpn = itlb_vpn_q;
itlb_update_o.is_4M = shared_tag_rd[i].is_4M;
itlb_update_o.asid = tlb_update_asid_q;
itlb_update_o.content = pte[i];
end else if (dtlb_req_q) begin
dtlb_update_o.valid = 1'b1;
dtlb_update_o.vpn = dtlb_vpn_q;
dtlb_update_o.is_4M = shared_tag_rd[i].is_4M;
dtlb_update_o.asid = tlb_update_asid_q;
dtlb_update_o.content = pte[i];
end
end
end //itlb_dtlb_miss
always_comb begin : tag_comparison
shared_tlb_hit_d = 1'b0;
dtlb_update_o = '0;
itlb_update_o = '0;
//number of ways
for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin
if (shared_tag_valid[i] && ((tlb_update_asid_q == shared_tag_rd[i].asid) || pte[i].g) && vpn1_q == shared_tag_rd[i].vpn1) begin
if (shared_tag_rd[i].is_4M || vpn0_q == shared_tag_rd[i].vpn0) begin
shared_tlb_hit_d = 1'b1;
if (itlb_req_q) begin
itlb_update_o.valid = 1'b1;
itlb_update_o.vpn = itlb_vpn_q;
itlb_update_o.is_4M = shared_tag_rd[i].is_4M;
itlb_update_o.asid = tlb_update_asid_q;
itlb_update_o.content = pte[i];
end else if (dtlb_req_q) begin
dtlb_update_o.valid = 1'b1;
dtlb_update_o.vpn = dtlb_vpn_q;
dtlb_update_o.is_4M = shared_tag_rd[i].is_4M;
dtlb_update_o.asid = tlb_update_asid_q;
dtlb_update_o.content = pte[i];
end
end
end
end
end //tag_comparison
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
itlb_vpn_q <= '0;
dtlb_vpn_q <= '0;
tlb_update_asid_q <= '0;
shared_tlb_access_q <= '0;
shared_tlb_vaddr_q <= '0;
shared_tag_valid_q <= '0;
vpn0_q <= '0;
vpn1_q <= '0;
itlb_req_q <= '0;
dtlb_req_q <= '0;
shared_tag_valid <= '0;
end else begin
itlb_vpn_q <= itlb_vaddr_i[riscv::SV-1:12];
dtlb_vpn_q <= dtlb_vaddr_i[riscv::SV-1:12];
tlb_update_asid_q <= tlb_update_asid_d;
shared_tlb_access_q <= shared_tlb_access_d;
shared_tlb_vaddr_q <= shared_tlb_vaddr_d;
shared_tag_valid_q <= shared_tag_valid_d;
vpn0_q <= vpn0_d;
vpn1_q <= vpn1_d;
itlb_req_q <= itlb_req_d;
dtlb_req_q <= dtlb_req_d;
shared_tag_valid <= shared_tag_valid_q[tag_rd_addr];
end
end
end //tag_comparison
// ------------------
// Update and Flush
// ------------------
always_comb begin : update_flush
shared_tag_valid_d = shared_tag_valid_q;
tag_wr_en = '0;
pte_wr_en = '0;
if (flush_i) begin
shared_tag_valid_d = '0;
end else if (shared_tlb_update_i.valid) begin
for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin
if (repl_way_oh_d[i]) begin
shared_tag_valid_d[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]][i] = 1'b1;
tag_wr_en[i] = 1'b1;
pte_wr_en[i] = 1'b1;
end
end
end
end //update_flush
assign shared_tag_wr.asid = shared_tlb_update_i.asid;
assign shared_tag_wr.vpn1 = shared_tlb_update_i.vpn[19:10];
assign shared_tag_wr.vpn0 = shared_tlb_update_i.vpn[9:0];
assign shared_tag_wr.is_4M = shared_tlb_update_i.is_4M;
assign tag_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0];
assign tag_wr_data = shared_tag_wr;
assign pte_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0];
assign pte_wr_data = shared_tlb_update_i.content;
assign way_valid = shared_tag_valid_q[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]];
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign update_lfsr = shared_tlb_update_i.valid & all_ways_valid;
assign repl_way_oh_d = (shared_tlb_update_i.valid) ? shared_tlb_way_bin2oh(repl_way) : '0;
lzc #(
.WIDTH ( SHARED_TLB_WAYS )
) i_lzc (
.in_i ( ~way_valid ),
.cnt_o ( inv_way ),
.empty_o ( all_ways_valid )
);
lfsr #(
.LfsrWidth ( 8 ),
.OutWidth ( $clog2(SHARED_TLB_WAYS))
) i_lfsr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( update_lfsr ),
.out_o ( rnd_way )
);
///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////
assign tag_req = tag_wr_en | tag_rd_en;
assign tag_we = tag_wr_en;
assign tag_addr = tag_wr_en ? tag_wr_addr : tag_rd_addr;
assign pte_req = pte_wr_en | pte_rd_en;
assign pte_we = pte_wr_en;
assign pte_addr = pte_wr_en ? pte_wr_addr : pte_rd_addr;
for (genvar i = 0; i < SHARED_TLB_WAYS; i++) begin : gen_sram
// Tag RAM
sram #(
.DATA_WIDTH ( $bits(shared_tag_t) ),
.NUM_WORDS ( SHARED_TLB_DEPTH )
) tag_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( tag_req[i] ),
.we_i ( tag_we[i] ),
.addr_i ( tag_addr ),
.wuser_i ( '0 ),
.wdata_i ( tag_wr_data ),
.be_i ( '1 ),
.ruser_o ( ),
.rdata_o ( tag_rd_data[i] )
);
assign shared_tag_rd[i] = shared_tag_t'(tag_rd_data[i]);
// PTE RAM
sram #(
.DATA_WIDTH ( $bits(riscv::pte_sv32_t) ),
.NUM_WORDS ( SHARED_TLB_DEPTH )
) pte_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( pte_req[i] ),
.we_i ( pte_we[i] ),
.addr_i ( pte_addr ),
.wuser_i ( '0 ),
.wdata_i ( pte_wr_data ),
.be_i ( '1 ),
.ruser_o ( ),
.rdata_o ( pte_rd_data[i] )
);
assign pte[i] = riscv::pte_sv32_t'(pte_rd_data[i]);
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
itlb_vpn_q <= '0;
dtlb_vpn_q <= '0;
tlb_update_asid_q <= '0;
shared_tlb_access_q <= '0;
shared_tlb_vaddr_q <= '0;
shared_tag_valid_q <= '0;
vpn0_q <= '0;
vpn1_q <= '0;
itlb_req_q <= '0;
dtlb_req_q <= '0;
shared_tag_valid <= '0;
end else begin
itlb_vpn_q <= itlb_vaddr_i[riscv::SV-1:12];
dtlb_vpn_q <= dtlb_vaddr_i[riscv::SV-1:12];
tlb_update_asid_q <= tlb_update_asid_d;
shared_tlb_access_q <= shared_tlb_access_d;
shared_tlb_vaddr_q <= shared_tlb_vaddr_d;
shared_tag_valid_q <= shared_tag_valid_d;
vpn0_q <= vpn0_d;
vpn1_q <= vpn1_d;
itlb_req_q <= itlb_req_d;
dtlb_req_q <= dtlb_req_d;
shared_tag_valid <= shared_tag_valid_q[tag_rd_addr];
end
end
// ------------------
// Update and Flush
// ------------------
always_comb begin : update_flush
shared_tag_valid_d = shared_tag_valid_q;
tag_wr_en = '0;
pte_wr_en = '0;
if (flush_i) begin
shared_tag_valid_d = '0;
end else if (shared_tlb_update_i.valid) begin
for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin
if (repl_way_oh_d[i]) begin
shared_tag_valid_d[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]][i] = 1'b1;
tag_wr_en[i] = 1'b1;
pte_wr_en[i] = 1'b1;
end
end
end
end //update_flush
assign shared_tag_wr.asid = shared_tlb_update_i.asid;
assign shared_tag_wr.vpn1 = shared_tlb_update_i.vpn[19:10];
assign shared_tag_wr.vpn0 = shared_tlb_update_i.vpn[9:0];
assign shared_tag_wr.is_4M = shared_tlb_update_i.is_4M;
assign tag_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0];
assign tag_wr_data = shared_tag_wr;
assign pte_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0];
assign pte_wr_data = shared_tlb_update_i.content;
assign way_valid = shared_tag_valid_q[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]];
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign update_lfsr = shared_tlb_update_i.valid & all_ways_valid;
assign repl_way_oh_d = (shared_tlb_update_i.valid) ? shared_tlb_way_bin2oh(repl_way) : '0;
lzc #(
.WIDTH(SHARED_TLB_WAYS)
) i_lzc (
.in_i (~way_valid),
.cnt_o (inv_way),
.empty_o(all_ways_valid)
);
lfsr #(
.LfsrWidth(8),
.OutWidth ($clog2(SHARED_TLB_WAYS))
) i_lfsr (
.clk_i (clk_i),
.rst_ni(rst_ni),
.en_i (update_lfsr),
.out_o (rnd_way)
);
///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////
assign tag_req = tag_wr_en | tag_rd_en;
assign tag_we = tag_wr_en;
assign tag_addr = tag_wr_en ? tag_wr_addr : tag_rd_addr;
assign pte_req = pte_wr_en | pte_rd_en;
assign pte_we = pte_wr_en;
assign pte_addr = pte_wr_en ? pte_wr_addr : pte_rd_addr;
for (genvar i = 0; i < SHARED_TLB_WAYS; i++) begin : gen_sram
// Tag RAM
sram #(
.DATA_WIDTH($bits(shared_tag_t)),
.NUM_WORDS (SHARED_TLB_DEPTH)
) tag_sram (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (tag_req[i]),
.we_i (tag_we[i]),
.addr_i (tag_addr),
.wuser_i('0),
.wdata_i(tag_wr_data),
.be_i ('1),
.ruser_o(),
.rdata_o(tag_rd_data[i])
);
assign shared_tag_rd[i] = shared_tag_t'(tag_rd_data[i]);
// PTE RAM
sram #(
.DATA_WIDTH($bits(riscv::pte_sv32_t)),
.NUM_WORDS (SHARED_TLB_DEPTH)
) pte_sram (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (pte_req[i]),
.we_i (pte_we[i]),
.addr_i (pte_addr),
.wuser_i('0),
.wdata_i(pte_wr_data),
.be_i ('1),
.ruser_o(),
.rdata_o(pte_rd_data[i])
);
assign pte[i] = riscv::pte_sv32_t'(pte_rd_data[i]);
end
endmodule
/* verilator lint_on WIDTH */

View file

@ -24,134 +24,136 @@
// 2020-02-17 0.1 S.Jacq TLB Sv32 for CV32A6
// =========================================================================== //
module cva6_tlb_sv32 import ariane_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned TLB_ENTRIES = 4,
parameter int unsigned ASID_WIDTH = 1
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // Flush signal
module cva6_tlb_sv32
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned TLB_ENTRIES = 4,
parameter int unsigned ASID_WIDTH = 1
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // Flush signal
// Update TLB
input tlb_update_sv32_t update_i,
input tlb_update_sv32_t update_i,
// Lookup signals
input logic lu_access_i,
input logic [ASID_WIDTH-1:0] lu_asid_i,
input logic [riscv::VLEN-1:0] lu_vaddr_i,
output riscv::pte_sv32_t lu_content_o,
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
output logic lu_is_4M_o,
output logic lu_hit_o
input logic lu_access_i,
input logic [ASID_WIDTH-1:0] lu_asid_i,
input logic [riscv::VLEN-1:0] lu_vaddr_i,
output riscv::pte_sv32_t lu_content_o,
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
output logic lu_is_4M_o,
output logic lu_hit_o
);
// Sv32 defines two levels of page tables
struct packed {
logic [8:0] asid; //9 bits wide
logic [9:0] vpn1; //10 bits wide
logic [9:0] vpn0; //10 bits wide
logic is_4M;
logic valid;
} [TLB_ENTRIES-1:0] tags_q, tags_n;
// Sv32 defines two levels of page tables
struct packed {
logic [8:0] asid; //9 bits wide
logic [9:0] vpn1; //10 bits wide
logic [9:0] vpn0; //10 bits wide
logic is_4M;
logic valid;
} [TLB_ENTRIES-1:0]
tags_q, tags_n;
riscv::pte_sv32_t [TLB_ENTRIES-1:0] content_q, content_n;
logic [9:0] vpn0, vpn1;
logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic
logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy
//-------------
// Translation
//-------------
always_comb begin : translation
vpn0 = lu_vaddr_i[21:12];
vpn1 = lu_vaddr_i[31:22];
riscv::pte_sv32_t [TLB_ENTRIES-1:0] content_q, content_n;
logic [9:0] vpn0, vpn1;
logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic
logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy
//-------------
// Translation
//-------------
always_comb begin : translation
vpn0 = lu_vaddr_i[21:12];
vpn1 = lu_vaddr_i[31:22];
// default assignment
lu_hit = '{default: 0};
lu_hit_o = 1'b0;
lu_content_o = '{default: 0};
lu_is_4M_o = 1'b0;
// default assignment
lu_hit = '{default: 0};
lu_hit_o = 1'b0;
lu_content_o = '{default: 0};
lu_is_4M_o = 1'b0;
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
// first level match, this may be a mega page, check the ASID flags as well
// if the entry is associated to a global address, don't match the ASID (ASID is don't care)
if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid[ASID_WIDTH-1:0]) || content_q[i].g) && vpn1 == tags_q[i].vpn1) begin
if (tags_q[i].is_4M || vpn0 == tags_q[i].vpn0) begin
lu_is_4M_o = tags_q[i].is_4M;
lu_content_o = content_q[i];
lu_hit_o = 1'b1;
lu_hit[i] = 1'b1;
end
end
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
// first level match, this may be a mega page, check the ASID flags as well
// if the entry is associated to a global address, don't match the ASID (ASID is don't care)
if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid[ASID_WIDTH-1:0]) || content_q[i].g) && vpn1 == tags_q[i].vpn1) begin
if (tags_q[i].is_4M || vpn0 == tags_q[i].vpn0) begin
lu_is_4M_o = tags_q[i].is_4M;
lu_content_o = content_q[i];
lu_hit_o = 1'b1;
lu_hit[i] = 1'b1;
end
end
end
end
logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high
logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high
logic [TLB_ENTRIES-1:0] vaddr_vpn0_match;
logic [TLB_ENTRIES-1:0] vaddr_vpn1_match;
logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high
logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high
logic [TLB_ENTRIES-1:0] vaddr_vpn0_match;
logic [TLB_ENTRIES-1:0] vaddr_vpn1_match;
assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i);
assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i);
assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i);
assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i);
// ------------------
// Update and Flush
// ------------------
always_comb begin : update_flush
tags_n = tags_q;
content_n = content_q;
// ------------------
// Update and Flush
// ------------------
always_comb begin : update_flush
tags_n = tags_q;
content_n = content_q;
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[21:12] == tags_q[i].vpn0);
vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[31:22] == tags_q[i].vpn1);
vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[21:12] == tags_q[i].vpn0);
vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[31:22] == tags_q[i].vpn1);
if (flush_i) begin
// invalidate logic
// flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case)
if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0 )
tags_n[i].valid = 1'b0;
// flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages
else if (asid_to_be_flushed_is0 && ( (vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M) ) && (~vaddr_to_be_flushed_is0))
tags_n[i].valid = 1'b0;
// the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case)
else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M)) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0))
tags_n[i].valid = 1'b0;
// the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case)
else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!asid_to_be_flushed_is0))
tags_n[i].valid = 1'b0;
// normal replacement
end else if (update_i.valid & replace_en[i]) begin
// update tag array
tags_n[i] = '{
asid: update_i.asid,
vpn1: update_i.vpn [19:10],
vpn0: update_i.vpn [9:0],
is_4M: update_i.is_4M,
valid: 1'b1
};
// and content as well
content_n[i] = update_i.content;
end
end
if (flush_i) begin
// invalidate logic
// flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case)
if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0) tags_n[i].valid = 1'b0;
// flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages
else if (asid_to_be_flushed_is0 && ( (vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M) ) && (~vaddr_to_be_flushed_is0))
tags_n[i].valid = 1'b0;
// the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case)
else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M)) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0))
tags_n[i].valid = 1'b0;
// the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case)
else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!asid_to_be_flushed_is0))
tags_n[i].valid = 1'b0;
// normal replacement
end else if (update_i.valid & replace_en[i]) begin
// update tag array
tags_n[i] = '{
asid: update_i.asid,
vpn1: update_i.vpn[19:10],
vpn0: update_i.vpn[9:0],
is_4M: update_i.is_4M,
valid: 1'b1
};
// and content as well
content_n[i] = update_i.content;
end
end
end
// -----------------------------------------------
// PLRU - Pseudo Least Recently Used Replacement
// -----------------------------------------------
logic[2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n;
logic en;
int unsigned idx_base, shift, new_index;
always_comb begin : plru_replacement
plru_tree_n = plru_tree_q;
en = '0;
idx_base = '0;
shift = '0;
new_index = '0;
// The PLRU-tree indexing:
// lvl0 0
// / \
// -----------------------------------------------
// PLRU - Pseudo Least Recently Used Replacement
// -----------------------------------------------
logic [2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n;
logic en;
int unsigned idx_base, shift, new_index;
always_comb begin : plru_replacement
plru_tree_n = plru_tree_q;
en = '0;
idx_base = '0;
shift = '0;
new_index = '0;
// The PLRU-tree indexing:
// lvl0 0
// / \
// / \
// lvl1 1 2
// / \ / \
@ -172,94 +174,108 @@ module cva6_tlb_sv32 import ariane_pkg::*; #(
// lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0};
// default: begin /* No hit */ end
// endcase
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
// we got a hit so update the pointer as it was least recently used
if (lu_hit[i] & lu_access_i) begin
// Set the nodes to the values we would expect
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
idx_base = $unsigned((2**lvl)-1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
shift = $clog2(TLB_ENTRIES) - lvl;
// to circumvent the 32 bit integer arithmetic assignment
new_index = ~((i >> (shift-1)) & 32'b1);
plru_tree_n[idx_base + (i >> shift)] = new_index[0];
end
end
for (
int unsigned i = 0; i < TLB_ENTRIES; i++
) begin
// we got a hit so update the pointer as it was least recently used
if (lu_hit[i] & lu_access_i) begin
// Set the nodes to the values we would expect
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
idx_base = $unsigned((2 ** lvl) - 1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
shift = $clog2(TLB_ENTRIES) - lvl;
// to circumvent the 32 bit integer arithmetic assignment
new_index = ~((i >> (shift - 1)) & 32'b1);
plru_tree_n[idx_base+(i>>shift)] = new_index[0];
end
// Decode tree to write enable signals
// Next for-loop basically creates the following logic for e.g. an 8 entry
// TLB (note: pseudo-code obviously):
// replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1}
// replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0}
// replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1}
// replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0}
// replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1}
// replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0}
// replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1}
// replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0}
// For each entry traverse the tree. If every tree-node matches,
// the corresponding bit of the entry's index, this is
// the next entry to replace.
for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin
en = 1'b1;
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
idx_base = $unsigned((2**lvl)-1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
shift = $clog2(TLB_ENTRIES) - lvl;
// en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
new_index = (i >> (shift-1)) & 32'b1;
if (new_index[0]) begin
en &= plru_tree_q[idx_base + (i>>shift)];
end else begin
en &= ~plru_tree_q[idx_base + (i>>shift)];
end
end
replace_en[i] = en;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
tags_q <= '{default: 0};
content_q <= '{default: 0};
plru_tree_q <= '{default: 0};
end else begin
tags_q <= tags_n;
content_q <= content_n;
plru_tree_q <= plru_tree_n;
end
end
//--------------
// Sanity checks
//--------------
//pragma translate_off
`ifndef VERILATOR
initial begin : p_assertions
assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1))
else begin $error("TLB size must be a multiple of 2 and greater than 1"); $stop(); end
assert (ASID_WIDTH >= 1)
else begin $error("ASID width must be at least 1"); $stop(); end
end
// Just for checking
function int countSetBits(logic[TLB_ENTRIES-1:0] vector);
automatic int count = 0;
foreach (vector[idx]) begin
count += vector[idx];
end
return count;
endfunction
end
// Decode tree to write enable signals
// Next for-loop basically creates the following logic for e.g. an 8 entry
// TLB (note: pseudo-code obviously):
// replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1}
// replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0}
// replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1}
// replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0}
// replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1}
// replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0}
// replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1}
// replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0}
// For each entry traverse the tree. If every tree-node matches,
// the corresponding bit of the entry's index, this is
// the next entry to replace.
for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin
en = 1'b1;
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
idx_base = $unsigned((2 ** lvl) - 1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
shift = $clog2(TLB_ENTRIES) - lvl;
assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1))
else begin $error("More then one hit in TLB!"); $stop(); end
assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1))
else begin $error("More then one TLB entry selected for next replace!"); $stop(); end
// en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
new_index = (i >> (shift - 1)) & 32'b1;
if (new_index[0]) begin
en &= plru_tree_q[idx_base+(i>>shift)];
end else begin
en &= ~plru_tree_q[idx_base+(i>>shift)];
end
end
replace_en[i] = en;
end
end
`endif
//pragma translate_on
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
tags_q <= '{default: 0};
content_q <= '{default: 0};
plru_tree_q <= '{default: 0};
end else begin
tags_q <= tags_n;
content_q <= content_n;
plru_tree_q <= plru_tree_n;
end
end
//--------------
// Sanity checks
//--------------
//pragma translate_off
`ifndef VERILATOR
initial begin : p_assertions
assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1))
else begin
$error("TLB size must be a multiple of 2 and greater than 1");
$stop();
end
assert (ASID_WIDTH >= 1)
else begin
$error("ASID width must be at least 1");
$stop();
end
end
// Just for checking
function int countSetBits(logic [TLB_ENTRIES-1:0] vector);
automatic int count = 0;
foreach (vector[idx]) begin
count += vector[idx];
end
return count;
endfunction
assert property (@(posedge clk_i) (countSetBits(lu_hit) <= 1))
else begin
$error("More then one hit in TLB!");
$stop();
end
assert property (@(posedge clk_i) (countSetBits(replace_en) <= 1))
else begin
$error("More then one TLB entry selected for next replace!");
$stop();
end
`endif
//pragma translate_on
endmodule

View file

@ -15,448 +15,505 @@
// privilege specification 1.11-WIP
module mmu import ariane_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned INSTR_TLB_ENTRIES = 4,
parameter int unsigned DATA_TLB_ENTRIES = 4,
parameter int unsigned ASID_WIDTH = 1
module mmu
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned INSTR_TLB_ENTRIES = 4,
parameter int unsigned DATA_TLB_ENTRIES = 4,
parameter int unsigned ASID_WIDTH = 1
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic enable_translation_i,
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic enable_translation_i,
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
// IF interface
input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o,
input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o,
// LSU interface
// this is a more minimalistic interface because the actual addressing logic is handled
// in the LSU as we distinguish load and stores, what we do here is simple address translation
input exception_t misaligned_ex_i,
input logic lsu_req_i, // request address translation
input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in
input logic lsu_is_store_i, // the translation is requested by a store
input exception_t misaligned_ex_i,
input logic lsu_req_i, // request address translation
input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in
input logic lsu_is_store_i, // the translation is requested by a store
// if we need to walk the page table we can't grant in the same cycle
// Cycle 0
output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB
output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit)
output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit)
// Cycle 1
output logic lsu_valid_o, // translation is valid
output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address
output exception_t lsu_exception_o, // address translation threw an exception
output logic lsu_valid_o, // translation is valid
output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address
output exception_t lsu_exception_o, // address translation threw an exception
// General control signals
input riscv::priv_lvl_t priv_lvl_i,
input riscv::priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i,
input logic mxr_i,
input riscv::priv_lvl_t priv_lvl_i,
input riscv::priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i,
input logic mxr_i,
// input logic flag_mprv_i,
input logic [riscv::PPNW-1:0] satp_ppn_i,
input logic [ASID_WIDTH-1:0] asid_i,
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
input logic flush_tlb_i,
input logic [riscv::PPNW-1:0] satp_ppn_i,
input logic [ASID_WIDTH-1:0] asid_i,
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
input logic flush_tlb_i,
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
output logic itlb_miss_o,
output logic dtlb_miss_o,
// PTW memory interface
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o,
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o,
// PMP
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i
);
logic iaccess_err; // insufficient privilege to access this instruction page
logic daccess_err; // insufficient privilege to access this data page
logic ptw_active; // PTW is currently walking a page table
logic walking_instr; // PTW is walking because of an ITLB miss
logic ptw_error; // PTW threw an exception
logic ptw_access_exception; // PTW threw an access exception (PMPs)
logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr
logic iaccess_err; // insufficient privilege to access this instruction page
logic daccess_err; // insufficient privilege to access this data page
logic ptw_active; // PTW is currently walking a page table
logic walking_instr; // PTW is walking because of an ITLB miss
logic ptw_error; // PTW threw an exception
logic ptw_access_exception; // PTW threw an access exception (PMPs)
logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr
logic [riscv::VLEN-1:0] update_vaddr;
tlb_update_t update_ptw_itlb, update_ptw_dtlb;
logic [riscv::VLEN-1:0] update_vaddr;
tlb_update_t update_ptw_itlb, update_ptw_dtlb;
logic itlb_lu_access;
riscv::pte_t itlb_content;
logic itlb_is_2M;
logic itlb_is_1G;
logic itlb_lu_hit;
logic itlb_lu_access;
riscv::pte_t itlb_content;
logic itlb_is_2M;
logic itlb_is_1G;
logic itlb_lu_hit;
logic dtlb_lu_access;
riscv::pte_t dtlb_content;
logic dtlb_is_2M;
logic dtlb_is_1G;
logic dtlb_lu_hit;
logic dtlb_lu_access;
riscv::pte_t dtlb_content;
logic dtlb_is_2M;
logic dtlb_is_1G;
logic dtlb_lu_hit;
// Assignments
assign itlb_lu_access = icache_areq_i.fetch_req;
assign dtlb_lu_access = lsu_req_i;
// Assignments
assign itlb_lu_access = icache_areq_i.fetch_req;
assign dtlb_lu_access = lsu_req_i;
tlb #(
.CVA6Cfg ( CVA6Cfg ),
.TLB_ENTRIES ( INSTR_TLB_ENTRIES ),
.ASID_WIDTH ( ASID_WIDTH )
) i_itlb (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_tlb_i ),
tlb #(
.CVA6Cfg (CVA6Cfg),
.TLB_ENTRIES(INSTR_TLB_ENTRIES),
.ASID_WIDTH (ASID_WIDTH)
) i_itlb (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(flush_tlb_i),
.update_i ( update_ptw_itlb ),
.update_i(update_ptw_itlb),
.lu_access_i ( itlb_lu_access ),
.lu_asid_i ( asid_i ),
.asid_to_be_flushed_i ( asid_to_be_flushed_i ),
.vaddr_to_be_flushed_i ( vaddr_to_be_flushed_i ),
.lu_vaddr_i ( icache_areq_i.fetch_vaddr ),
.lu_content_o ( itlb_content ),
.lu_access_i (itlb_lu_access),
.lu_asid_i (asid_i),
.asid_to_be_flushed_i (asid_to_be_flushed_i),
.vaddr_to_be_flushed_i(vaddr_to_be_flushed_i),
.lu_vaddr_i (icache_areq_i.fetch_vaddr),
.lu_content_o (itlb_content),
.lu_is_2M_o ( itlb_is_2M ),
.lu_is_1G_o ( itlb_is_1G ),
.lu_hit_o ( itlb_lu_hit )
);
.lu_is_2M_o(itlb_is_2M),
.lu_is_1G_o(itlb_is_1G),
.lu_hit_o (itlb_lu_hit)
);
tlb #(
.CVA6Cfg ( CVA6Cfg ),
.TLB_ENTRIES ( DATA_TLB_ENTRIES ),
.ASID_WIDTH ( ASID_WIDTH )
) i_dtlb (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_tlb_i ),
tlb #(
.CVA6Cfg (CVA6Cfg),
.TLB_ENTRIES(DATA_TLB_ENTRIES),
.ASID_WIDTH (ASID_WIDTH)
) i_dtlb (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(flush_tlb_i),
.update_i ( update_ptw_dtlb ),
.update_i(update_ptw_dtlb),
.lu_access_i ( dtlb_lu_access ),
.lu_asid_i ( asid_i ),
.asid_to_be_flushed_i ( asid_to_be_flushed_i ),
.vaddr_to_be_flushed_i ( vaddr_to_be_flushed_i ),
.lu_vaddr_i ( lsu_vaddr_i ),
.lu_content_o ( dtlb_content ),
.lu_access_i (dtlb_lu_access),
.lu_asid_i (asid_i),
.asid_to_be_flushed_i (asid_to_be_flushed_i),
.vaddr_to_be_flushed_i(vaddr_to_be_flushed_i),
.lu_vaddr_i (lsu_vaddr_i),
.lu_content_o (dtlb_content),
.lu_is_2M_o ( dtlb_is_2M ),
.lu_is_1G_o ( dtlb_is_1G ),
.lu_hit_o ( dtlb_lu_hit )
);
.lu_is_2M_o(dtlb_is_2M),
.lu_is_1G_o(dtlb_is_1G),
.lu_hit_o (dtlb_lu_hit)
);
ptw #(
.CVA6Cfg ( CVA6Cfg ),
.ASID_WIDTH ( ASID_WIDTH )
) i_ptw (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.ptw_active_o ( ptw_active ),
.walking_instr_o ( walking_instr ),
.ptw_error_o ( ptw_error ),
.ptw_access_exception_o ( ptw_access_exception ),
.enable_translation_i ( enable_translation_i ),
ptw #(
.CVA6Cfg (CVA6Cfg),
.ASID_WIDTH(ASID_WIDTH)
) i_ptw (
.clk_i (clk_i),
.rst_ni (rst_ni),
.ptw_active_o (ptw_active),
.walking_instr_o (walking_instr),
.ptw_error_o (ptw_error),
.ptw_access_exception_o(ptw_access_exception),
.enable_translation_i (enable_translation_i),
.update_vaddr_o ( update_vaddr ),
.itlb_update_o ( update_ptw_itlb ),
.dtlb_update_o ( update_ptw_dtlb ),
.update_vaddr_o(update_vaddr),
.itlb_update_o (update_ptw_itlb),
.dtlb_update_o (update_ptw_dtlb),
.itlb_access_i ( itlb_lu_access ),
.itlb_hit_i ( itlb_lu_hit ),
.itlb_vaddr_i ( icache_areq_i.fetch_vaddr ),
.itlb_access_i(itlb_lu_access),
.itlb_hit_i (itlb_lu_hit),
.itlb_vaddr_i (icache_areq_i.fetch_vaddr),
.dtlb_access_i ( dtlb_lu_access ),
.dtlb_hit_i ( dtlb_lu_hit ),
.dtlb_vaddr_i ( lsu_vaddr_i ),
.dtlb_access_i(dtlb_lu_access),
.dtlb_hit_i (dtlb_lu_hit),
.dtlb_vaddr_i (lsu_vaddr_i),
.req_port_i ( req_port_i ),
.req_port_o ( req_port_o ),
.pmpcfg_i,
.pmpaddr_i,
.bad_paddr_o ( ptw_bad_paddr ),
.*
);
.req_port_i (req_port_i),
.req_port_o (req_port_o),
.pmpcfg_i,
.pmpaddr_i,
.bad_paddr_o(ptw_bad_paddr),
.*
);
// ila_1 i_ila_1 (
// .clk(clk_i), // input wire clk
// .probe0({req_port_o.address_tag, req_port_o.address_index}),
// .probe1(req_port_o.data_req), // input wire [63:0] probe1
// .probe2(req_port_i.data_gnt), // input wire [0:0] probe2
// .probe3(req_port_i.data_rdata), // input wire [0:0] probe3
// .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4
// .probe5(ptw_error), // input wire [1:0] probe5
// .probe6(update_vaddr), // input wire [0:0] probe6
// .probe7(update_ptw_itlb.valid), // input wire [0:0] probe7
// .probe8(update_ptw_dtlb.valid), // input wire [0:0] probe8
// .probe9(dtlb_lu_access), // input wire [0:0] probe9
// .probe10(lsu_vaddr_i), // input wire [0:0] probe10
// .probe11(dtlb_lu_hit), // input wire [0:0] probe11
// .probe12(itlb_lu_access), // input wire [0:0] probe12
// .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13
// .probe14(itlb_lu_hit) // input wire [0:0] probe13
// );
// ila_1 i_ila_1 (
// .clk(clk_i), // input wire clk
// .probe0({req_port_o.address_tag, req_port_o.address_index}),
// .probe1(req_port_o.data_req), // input wire [63:0] probe1
// .probe2(req_port_i.data_gnt), // input wire [0:0] probe2
// .probe3(req_port_i.data_rdata), // input wire [0:0] probe3
// .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4
// .probe5(ptw_error), // input wire [1:0] probe5
// .probe6(update_vaddr), // input wire [0:0] probe6
// .probe7(update_ptw_itlb.valid), // input wire [0:0] probe7
// .probe8(update_ptw_dtlb.valid), // input wire [0:0] probe8
// .probe9(dtlb_lu_access), // input wire [0:0] probe9
// .probe10(lsu_vaddr_i), // input wire [0:0] probe10
// .probe11(dtlb_lu_hit), // input wire [0:0] probe11
// .probe12(itlb_lu_access), // input wire [0:0] probe12
// .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13
// .probe14(itlb_lu_hit) // input wire [0:0] probe13
// );
//-----------------------
// Instruction Interface
//-----------------------
logic match_any_execute_region;
logic pmp_instr_allow;
//-----------------------
// Instruction Interface
//-----------------------
logic match_any_execute_region;
logic pmp_instr_allow;
// The instruction interface is a simple request response interface
always_comb begin : instr_interface
// MMU disabled: just pass through
icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; // play through in case we disabled address translation
// two potential exception sources:
// 1. HPTW threw an exception -> signal with a page fault exception
// 2. We got an access error because of insufficient permissions -> throw an access exception
icache_areq_o.fetch_exception = '0;
// Check whether we are allowed to access this memory region from a fetch perspective
iaccess_err = icache_areq_i.fetch_req && enable_translation_i
// The instruction interface is a simple request response interface
always_comb begin : instr_interface
// MMU disabled: just pass through
icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; // play through in case we disabled address translation
// two potential exception sources:
// 1. HPTW threw an exception -> signal with a page fault exception
// 2. We got an access error because of insufficient permissions -> throw an access exception
icache_areq_o.fetch_exception = '0;
// Check whether we are allowed to access this memory region from a fetch perspective
iaccess_err = icache_areq_i.fetch_req && enable_translation_i
&& (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u)
|| ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u));
// MMU enabled: address from TLB, request delayed until hit. Error when TLB
// hit and no access right or TLB hit and translated address not valid (e.g.
// AXI decode error), or when PTW performs walk due to ITLB miss and raises
// an error.
if (enable_translation_i) begin
// we work with SV39 or SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal
if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin
icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1};
end
// MMU enabled: address from TLB, request delayed until hit. Error when TLB
// hit and no access right or TLB hit and translated address not valid (e.g.
// AXI decode error), or when PTW performs walk due to ITLB miss and raises
// an error.
if (enable_translation_i) begin
// we work with SV39 or SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal
if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin
icache_areq_o.fetch_exception = {
riscv::INSTR_ACCESS_FAULT,
{{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr},
1'b1
};
end
icache_areq_o.fetch_valid = 1'b0;
icache_areq_o.fetch_valid = 1'b0;
// 4K page
icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]};
// Mega page
if (itlb_is_2M) begin
icache_areq_o.fetch_paddr[20:12] = icache_areq_i.fetch_vaddr[20:12];
end
// Giga page
if (itlb_is_1G) begin
icache_areq_o.fetch_paddr[29:12] = icache_areq_i.fetch_vaddr[29:12];
end
// 4K page
icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]};
// Mega page
if (itlb_is_2M) begin
icache_areq_o.fetch_paddr[20:12] = icache_areq_i.fetch_vaddr[20:12];
end
// Giga page
if (itlb_is_1G) begin
icache_areq_o.fetch_paddr[29:12] = icache_areq_i.fetch_vaddr[29:12];
end
// ---------
// ITLB Hit
// --------
// if we hit the ITLB output the request signal immediately
if (itlb_lu_hit) begin
icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
// we got an access error
if (iaccess_err) begin
// throw a page fault
icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1};
end else if (!pmp_instr_allow) begin
icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN-riscv::PLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1};
end
end else
// ---------
// ITLB Miss
// ---------
// watch out for exceptions happening during walking the page table
if (ptw_active && walking_instr) begin
icache_areq_o.fetch_valid = ptw_error | ptw_access_exception;
if (ptw_error) icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, update_vaddr}, 1'b1};
else icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, update_vaddr}, 1'b1};
end
end
// if it didn't match any execute region throw an `Instruction Access Fault`
// or: if we are not translating, check PMPs immediately on the paddr
if ((!match_any_execute_region && !ptw_error) || (!enable_translation_i && !pmp_instr_allow)) begin
icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN-riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}, 1'b1};
// ---------
// ITLB Hit
// --------
// if we hit the ITLB output the request signal immediately
if (itlb_lu_hit) begin
icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
// we got an access error
if (iaccess_err) begin
// throw a page fault
icache_areq_o.fetch_exception = {
riscv::INSTR_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr},
1'b1
};
end else if (!pmp_instr_allow) begin
icache_areq_o.fetch_exception = {
riscv::INSTR_ACCESS_FAULT,
{{riscv::XLEN - riscv::PLEN{1'b0}}, icache_areq_i.fetch_vaddr},
1'b1
};
end
end else
// ---------
// ITLB Miss
// ---------
// watch out for exceptions happening during walking the page table
if (ptw_active && walking_instr) begin
icache_areq_o.fetch_valid = ptw_error | ptw_access_exception;
if (ptw_error)
icache_areq_o.fetch_exception = {
riscv::INSTR_PAGE_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1
};
else
icache_areq_o.fetch_exception = {
riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1
};
end
end
// check for execute flag on memory
assign match_any_execute_region = config_pkg::is_inside_execute_regions(CVA6Cfg, {{64-riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr});
// Instruction fetch
pmp #(
.CVA6Cfg ( CVA6Cfg ),
.PLEN ( riscv::PLEN ),
.PMP_LEN ( riscv::PLEN - 2 ),
.NR_ENTRIES ( CVA6Cfg.NrPMPEntries )
) i_pmp_if (
.addr_i ( icache_areq_o.fetch_paddr ),
.priv_lvl_i,
// we will always execute on the instruction fetch port
.access_type_i ( riscv::ACCESS_EXEC ),
// Configuration
.conf_addr_i ( pmpaddr_i ),
.conf_i ( pmpcfg_i ),
.allow_o ( pmp_instr_allow )
);
//-----------------------
// Data Interface
//-----------------------
logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q;
riscv::pte_t dtlb_pte_n, dtlb_pte_q;
exception_t misaligned_ex_n, misaligned_ex_q;
logic lsu_req_n, lsu_req_q;
logic lsu_is_store_n, lsu_is_store_q;
logic dtlb_hit_n, dtlb_hit_q;
logic dtlb_is_2M_n, dtlb_is_2M_q;
logic dtlb_is_1G_n, dtlb_is_1G_q;
// check if we need to do translation or if we are always ready (e.g.: we are not translating anything)
assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1;
// Wires to PMP checks
riscv::pmp_access_t pmp_access_type;
logic pmp_data_allow;
localparam PPNWMin = (riscv::PPNW-1 > 29) ? 29 : riscv::PPNW-1;
// The data interface is simpler and only consists of a request/response interface
always_comb begin : data_interface
// save request and DTLB response
lsu_vaddr_n = lsu_vaddr_i;
lsu_req_n = lsu_req_i;
misaligned_ex_n = misaligned_ex_i;
dtlb_pte_n = dtlb_content;
dtlb_hit_n = dtlb_lu_hit;
lsu_is_store_n = lsu_is_store_i;
dtlb_is_2M_n = dtlb_is_2M;
dtlb_is_1G_n = dtlb_is_1G;
lsu_paddr_o = lsu_vaddr_q[riscv::PLEN-1:0];
lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PLEN-1:12];
lsu_valid_o = lsu_req_q;
lsu_exception_o = misaligned_ex_q;
pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ;
// mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions
misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i;
// Check if the User flag is set, then we may only access it in supervisor mode
// if SUM is enabled
daccess_err = en_ld_st_translation_i && ((ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode
(ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u)); // this is not a user page but we are in user mode and trying to access it
// translation is enabled and no misaligned exception occurred
if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin
lsu_valid_o = 1'b0;
// 4K page
lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]};
lsu_dtlb_ppn_o = dtlb_content.ppn;
// Mega page
if (dtlb_is_2M_q) begin
lsu_paddr_o[20:12] = lsu_vaddr_q[20:12];
lsu_dtlb_ppn_o[20:12] = lsu_vaddr_n[20:12];
end
// Giga page
if (dtlb_is_1G_q) begin
lsu_paddr_o[PPNWMin:12] = lsu_vaddr_q[PPNWMin:12];
lsu_dtlb_ppn_o[PPNWMin:12] = lsu_vaddr_n[PPNWMin:12];
end
// ---------
// DTLB Hit
// --------
if (dtlb_hit_q && lsu_req_q) begin
lsu_valid_o = 1'b1;
// exception priority:
// PAGE_FAULTS have higher priority than ACCESS_FAULTS
// virtual memory based exceptions are PAGE_FAULTS
// physical memory based exceptions are ACCESS_FAULTS (PMA/PMP)
// this is a store
if (lsu_is_store_q) begin
// check if the page is write-able and we are not violating privileges
// also check if the dirty flag is set
if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin
lsu_exception_o = {riscv::STORE_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1};
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {riscv::ST_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1};
end
// this is a load
end else begin
// check for sufficient access privileges - throw a page fault if necessary
if (daccess_err) begin
lsu_exception_o = {riscv::LOAD_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1};
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {riscv::LD_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1};
end
end
end else
// ---------
// DTLB Miss
// ---------
// watch out for exceptions
if (ptw_active && !walking_instr) begin
// page table walker threw an exception
if (ptw_error) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// the page table walker can only throw page faults
if (lsu_is_store_q) begin
lsu_exception_o = {riscv::STORE_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},update_vaddr}, 1'b1};
end else begin
lsu_exception_o = {riscv::LOAD_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},update_vaddr}, 1'b1};
end
end
if (ptw_access_exception) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// Any fault of the page table walk should be based of the original access type
if (lsu_is_store_q) begin
lsu_exception_o = {riscv::ST_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1};
end else begin
lsu_exception_o = {riscv::LD_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1};
end
end
end
end
// If translation is not enabled, check the paddr immediately against PMPs
else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin
if (lsu_is_store_q) begin
lsu_exception_o = {riscv::ST_ACCESS_FAULT, {{riscv::XLEN-riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1};
end else begin
lsu_exception_o = {riscv::LD_ACCESS_FAULT, {{riscv::XLEN-riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1};
end
end
// if it didn't match any execute region throw an `Instruction Access Fault`
// or: if we are not translating, check PMPs immediately on the paddr
if ((!match_any_execute_region && !ptw_error) || (!enable_translation_i && !pmp_instr_allow)) begin
icache_areq_o.fetch_exception = {
riscv::INSTR_ACCESS_FAULT,
{{riscv::XLEN - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr},
1'b1
};
end
end
// Load/store PMP check
pmp #(
.CVA6Cfg ( CVA6Cfg ),
.PLEN ( riscv::PLEN ),
.PMP_LEN ( riscv::PLEN - 2 ),
.NR_ENTRIES ( CVA6Cfg.NrPMPEntries )
) i_pmp_data (
.addr_i ( lsu_paddr_o ),
.priv_lvl_i ( ld_st_priv_lvl_i ),
.access_type_i ( pmp_access_type ),
// Configuration
.conf_addr_i ( pmpaddr_i ),
.conf_i ( pmpcfg_i ),
.allow_o ( pmp_data_allow )
);
// check for execute flag on memory
assign match_any_execute_region = config_pkg::is_inside_execute_regions(
CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}
);
// ----------
// Registers
// ----------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
lsu_vaddr_q <= '0;
lsu_req_q <= '0;
misaligned_ex_q <= '0;
dtlb_pte_q <= '0;
dtlb_hit_q <= '0;
lsu_is_store_q <= '0;
dtlb_is_2M_q <= '0;
dtlb_is_1G_q <= '0;
// Instruction fetch
pmp #(
.CVA6Cfg (CVA6Cfg),
.PLEN (riscv::PLEN),
.PMP_LEN (riscv::PLEN - 2),
.NR_ENTRIES(CVA6Cfg.NrPMPEntries)
) i_pmp_if (
.addr_i (icache_areq_o.fetch_paddr),
.priv_lvl_i,
// we will always execute on the instruction fetch port
.access_type_i(riscv::ACCESS_EXEC),
// Configuration
.conf_addr_i (pmpaddr_i),
.conf_i (pmpcfg_i),
.allow_o (pmp_instr_allow)
);
//-----------------------
// Data Interface
//-----------------------
logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q;
riscv::pte_t dtlb_pte_n, dtlb_pte_q;
exception_t misaligned_ex_n, misaligned_ex_q;
logic lsu_req_n, lsu_req_q;
logic lsu_is_store_n, lsu_is_store_q;
logic dtlb_hit_n, dtlb_hit_q;
logic dtlb_is_2M_n, dtlb_is_2M_q;
logic dtlb_is_1G_n, dtlb_is_1G_q;
// check if we need to do translation or if we are always ready (e.g.: we are not translating anything)
assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1;
// Wires to PMP checks
riscv::pmp_access_t pmp_access_type;
logic pmp_data_allow;
localparam PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1;
// The data interface is simpler and only consists of a request/response interface
always_comb begin : data_interface
// save request and DTLB response
lsu_vaddr_n = lsu_vaddr_i;
lsu_req_n = lsu_req_i;
misaligned_ex_n = misaligned_ex_i;
dtlb_pte_n = dtlb_content;
dtlb_hit_n = dtlb_lu_hit;
lsu_is_store_n = lsu_is_store_i;
dtlb_is_2M_n = dtlb_is_2M;
dtlb_is_1G_n = dtlb_is_1G;
lsu_paddr_o = lsu_vaddr_q[riscv::PLEN-1:0];
lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PLEN-1:12];
lsu_valid_o = lsu_req_q;
lsu_exception_o = misaligned_ex_q;
pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ;
// mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions
misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i;
// Check if the User flag is set, then we may only access it in supervisor mode
// if SUM is enabled
daccess_err = en_ld_st_translation_i && ((ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode
(ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u)); // this is not a user page but we are in user mode and trying to access it
// translation is enabled and no misaligned exception occurred
if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin
lsu_valid_o = 1'b0;
// 4K page
lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]};
lsu_dtlb_ppn_o = dtlb_content.ppn;
// Mega page
if (dtlb_is_2M_q) begin
lsu_paddr_o[20:12] = lsu_vaddr_q[20:12];
lsu_dtlb_ppn_o[20:12] = lsu_vaddr_n[20:12];
end
// Giga page
if (dtlb_is_1G_q) begin
lsu_paddr_o[PPNWMin:12] = lsu_vaddr_q[PPNWMin:12];
lsu_dtlb_ppn_o[PPNWMin:12] = lsu_vaddr_n[PPNWMin:12];
end
// ---------
// DTLB Hit
// --------
if (dtlb_hit_q && lsu_req_q) begin
lsu_valid_o = 1'b1;
// exception priority:
// PAGE_FAULTS have higher priority than ACCESS_FAULTS
// virtual memory based exceptions are PAGE_FAULTS
// physical memory based exceptions are ACCESS_FAULTS (PMA/PMP)
// this is a store
if (lsu_is_store_q) begin
// check if the page is write-able and we are not violating privileges
// also check if the dirty flag is set
if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin
lsu_exception_o = {
riscv::STORE_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
1'b1
};
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {
riscv::ST_ACCESS_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
1'b1
};
end
// this is a load
end else begin
lsu_vaddr_q <= lsu_vaddr_n;
lsu_req_q <= lsu_req_n;
misaligned_ex_q <= misaligned_ex_n;
dtlb_pte_q <= dtlb_pte_n;
dtlb_hit_q <= dtlb_hit_n;
lsu_is_store_q <= lsu_is_store_n;
dtlb_is_2M_q <= dtlb_is_2M_n;
dtlb_is_1G_q <= dtlb_is_1G_n;
// check for sufficient access privileges - throw a page fault if necessary
if (daccess_err) begin
lsu_exception_o = {
riscv::LOAD_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
1'b1
};
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {
riscv::LD_ACCESS_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
1'b1
};
end
end
end else
// ---------
// DTLB Miss
// ---------
// watch out for exceptions
if (ptw_active && !walking_instr) begin
// page table walker threw an exception
if (ptw_error) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// the page table walker can only throw page faults
if (lsu_is_store_q) begin
lsu_exception_o = {
riscv::STORE_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr},
1'b1
};
end else begin
lsu_exception_o = {
riscv::LOAD_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr},
1'b1
};
end
end
if (ptw_access_exception) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// Any fault of the page table walk should be based of the original access type
if (lsu_is_store_q) begin
lsu_exception_o = {
riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1
};
end else begin
lsu_exception_o = {
riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1
};
end
end
end
end // If translation is not enabled, check the paddr immediately against PMPs
else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin
if (lsu_is_store_q) begin
lsu_exception_o = {
riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1
};
end else begin
lsu_exception_o = {
riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1
};
end
end
end
// Load/store PMP check
pmp #(
.CVA6Cfg (CVA6Cfg),
.PLEN (riscv::PLEN),
.PMP_LEN (riscv::PLEN - 2),
.NR_ENTRIES(CVA6Cfg.NrPMPEntries)
) i_pmp_data (
.addr_i (lsu_paddr_o),
.priv_lvl_i (ld_st_priv_lvl_i),
.access_type_i(pmp_access_type),
// Configuration
.conf_addr_i (pmpaddr_i),
.conf_i (pmpcfg_i),
.allow_o (pmp_data_allow)
);
// ----------
// Registers
// ----------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
lsu_vaddr_q <= '0;
lsu_req_q <= '0;
misaligned_ex_q <= '0;
dtlb_pte_q <= '0;
dtlb_hit_q <= '0;
lsu_is_store_q <= '0;
dtlb_is_2M_q <= '0;
dtlb_is_1G_q <= '0;
end else begin
lsu_vaddr_q <= lsu_vaddr_n;
lsu_req_q <= lsu_req_n;
misaligned_ex_q <= misaligned_ex_n;
dtlb_pte_q <= dtlb_pte_n;
dtlb_hit_q <= dtlb_hit_n;
lsu_is_store_q <= lsu_is_store_n;
dtlb_is_2M_q <= dtlb_is_2M_n;
dtlb_is_1G_q <= dtlb_is_1G_n;
end
end
endmodule

View file

@ -15,395 +15,395 @@
/* verilator lint_off WIDTH */
module ptw import ariane_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int ASID_WIDTH = 1
module ptw
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int ASID_WIDTH = 1
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush everything, we need to do this because
// actually everything we do is speculative at this stage
// e.g.: there could be a CSR instruction that changes everything
output logic ptw_active_o,
output logic walking_instr_o, // set when walking for TLB
output logic ptw_error_o, // set when an error occurred
output logic ptw_access_exception_o, // set when an PMP access exception occured
input logic enable_translation_i, // CSRs indicate to enable SV39
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush everything, we need to do this because
// actually everything we do is speculative at this stage
// e.g.: there could be a CSR instruction that changes everything
output logic ptw_active_o,
output logic walking_instr_o, // set when walking for TLB
output logic ptw_error_o, // set when an error occurred
output logic ptw_access_exception_o, // set when an PMP access exception occured
input logic enable_translation_i, // CSRs indicate to enable SV39
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
input logic lsu_is_store_i, // this translation was triggered by a store
input logic lsu_is_store_i, // this translation was triggered by a store
// PTW memory interface
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o,
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o,
// to TLBs, update logic
output tlb_update_t itlb_update_o,
output tlb_update_t dtlb_update_o,
output tlb_update_t itlb_update_o,
output tlb_update_t dtlb_update_o,
output logic [riscv::VLEN-1:0] update_vaddr_o,
output logic [riscv::VLEN-1:0] update_vaddr_o,
input logic [ASID_WIDTH-1:0] asid_i,
input logic [ ASID_WIDTH-1:0] asid_i,
// from TLBs
// did we miss?
input logic itlb_access_i,
input logic itlb_hit_i,
input logic [riscv::VLEN-1:0] itlb_vaddr_i,
input logic itlb_access_i,
input logic itlb_hit_i,
input logic [riscv::VLEN-1:0] itlb_vaddr_i,
input logic dtlb_access_i,
input logic dtlb_hit_i,
input logic [riscv::VLEN-1:0] dtlb_vaddr_i,
input logic dtlb_access_i,
input logic dtlb_hit_i,
input logic [riscv::VLEN-1:0] dtlb_vaddr_i,
// from CSR file
input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp
input logic mxr_i,
input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp
input logic mxr_i,
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
output logic itlb_miss_o,
output logic dtlb_miss_o,
// PMP
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
output logic [riscv::PLEN-1:0] bad_paddr_o
input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
output logic [riscv::PLEN-1:0] bad_paddr_o
);
// input registers
logic data_rvalid_q;
logic [63:0] data_rdata_q;
riscv::pte_t pte;
assign pte = riscv::pte_t'(data_rdata_q);
enum logic [2:0] {
IDLE,
WAIT_GRANT,
PTE_LOOKUP,
WAIT_RVALID,
PROPAGATE_ERROR,
PROPAGATE_ACCESS_ERROR
}
state_q, state_d;
// SV39 defines three levels of page tables
enum logic [1:0] {
LVL1,
LVL2,
LVL3
}
ptw_lvl_q, ptw_lvl_n;
// is this an instruction page table walk?
logic is_instr_ptw_q, is_instr_ptw_n;
logic global_mapping_q, global_mapping_n;
// latched tag signal
logic tag_valid_n, tag_valid_q;
// register the ASID
logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n;
// register the VPN we need to walk, SV39 defines a 39 bit virtual address
logic [riscv::VLEN-1:0] vaddr_q, vaddr_n;
// 4 byte aligned physical pointer
logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n;
// Assignments
assign update_vaddr_o = vaddr_q;
assign ptw_active_o = (state_q != IDLE);
assign walking_instr_o = is_instr_ptw_q;
// directly output the correct physical address
assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0];
assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH];
// we are never going to kill this request
assign req_port_o.kill_req = '0;
// we are never going to write with the HPTW
assign req_port_o.data_wdata = 64'b0;
// we only issue one single request at a time
assign req_port_o.data_id = '0;
// -----------
// TLB Update
// -----------
assign itlb_update_o.vpn = {{39 - riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]};
assign dtlb_update_o.vpn = {{39 - riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]};
// update the correct page table level
assign itlb_update_o.is_2M = (ptw_lvl_q == LVL2);
assign itlb_update_o.is_1G = (ptw_lvl_q == LVL1);
assign dtlb_update_o.is_2M = (ptw_lvl_q == LVL2);
assign dtlb_update_o.is_1G = (ptw_lvl_q == LVL1);
// output the correct ASID
assign itlb_update_o.asid = tlb_update_asid_q;
assign dtlb_update_o.asid = tlb_update_asid_q;
// set the global mapping bit
assign itlb_update_o.content = pte | (global_mapping_q << 5);
assign dtlb_update_o.content = pte | (global_mapping_q << 5);
assign req_port_o.tag_valid = tag_valid_q;
logic allow_access;
assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0;
pmp #(
.CVA6Cfg (CVA6Cfg),
.PLEN (riscv::PLEN),
.PMP_LEN (riscv::PLEN - 2),
.NR_ENTRIES(CVA6Cfg.NrPMPEntries)
) i_pmp_ptw (
.addr_i (ptw_pptr_q),
// PTW access are always checked as if in S-Mode...
.priv_lvl_i (riscv::PRIV_LVL_S),
// ...and they are always loads
.access_type_i(riscv::ACCESS_READ),
// Configuration
.conf_addr_i (pmpaddr_i),
.conf_i (pmpcfg_i),
.allow_o (allow_access)
);
//-------------------
// Page table walker
//-------------------
// A virtual address va is translated into a physical address pa as follows:
// 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
// PAGESIZE=2^12 and LEVELS=3.)
// 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For
// Sv32, PTESIZE=4.)
// 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access
// exception.
// 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5.
// Otherwise, this PTE is a pointer to the next level of the page table.
// Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let
// a = pte.ppn × PAGESIZE and go to step 2.
// 5. A leaf PTE has been found. Determine if the requested memory access
// is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and
// raise an access exception. Otherwise, the translation is successful.
// Set pte.a to 1, and, if the memory access is a store, set pte.d to 1.
// The translated physical address is given as follows:
// - pa.pgoff = va.pgoff.
// - If i > 0, then this is a superpage translation and
// pa.ppn[i-1:0] = va.vpn[i-1:0].
// - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
always_comb begin : ptw
// default assignments
// PTW memory interface
tag_valid_n = 1'b0;
req_port_o.data_req = 1'b0;
req_port_o.data_be = 8'hFF;
req_port_o.data_size = 2'b11;
req_port_o.data_we = 1'b0;
ptw_error_o = 1'b0;
ptw_access_exception_o = 1'b0;
itlb_update_o.valid = 1'b0;
dtlb_update_o.valid = 1'b0;
is_instr_ptw_n = is_instr_ptw_q;
ptw_lvl_n = ptw_lvl_q;
ptw_pptr_n = ptw_pptr_q;
state_d = state_q;
global_mapping_n = global_mapping_q;
// input registers
logic data_rvalid_q;
logic [63:0] data_rdata_q;
tlb_update_asid_n = tlb_update_asid_q;
vaddr_n = vaddr_q;
riscv::pte_t pte;
assign pte = riscv::pte_t'(data_rdata_q);
itlb_miss_o = 1'b0;
dtlb_miss_o = 1'b0;
enum logic[2:0] {
IDLE,
WAIT_GRANT,
PTE_LOOKUP,
WAIT_RVALID,
PROPAGATE_ERROR,
PROPAGATE_ACCESS_ERROR
} state_q, state_d;
case (state_q)
// SV39 defines three levels of page tables
enum logic [1:0] {
LVL1, LVL2, LVL3
} ptw_lvl_q, ptw_lvl_n;
IDLE: begin
// by default we start with the top-most page table
ptw_lvl_n = LVL1;
global_mapping_n = 1'b0;
is_instr_ptw_n = 1'b0;
// if we got an ITLB miss
if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin
ptw_pptr_n = {satp_ppn_i, itlb_vaddr_i[riscv::SV-1:30], 3'b0};
is_instr_ptw_n = 1'b1;
tlb_update_asid_n = asid_i;
vaddr_n = itlb_vaddr_i;
state_d = WAIT_GRANT;
itlb_miss_o = 1'b1;
// we got an DTLB miss
end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin
ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[riscv::SV-1:30], 3'b0};
tlb_update_asid_n = asid_i;
vaddr_n = dtlb_vaddr_i;
state_d = WAIT_GRANT;
dtlb_miss_o = 1'b1;
end
end
// is this an instruction page table walk?
logic is_instr_ptw_q, is_instr_ptw_n;
logic global_mapping_q, global_mapping_n;
// latched tag signal
logic tag_valid_n, tag_valid_q;
// register the ASID
logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n;
// register the VPN we need to walk, SV39 defines a 39 bit virtual address
logic [riscv::VLEN-1:0] vaddr_q, vaddr_n;
// 4 byte aligned physical pointer
logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n;
WAIT_GRANT: begin
// send a request out
req_port_o.data_req = 1'b1;
// wait for the WAIT_GRANT
if (req_port_i.data_gnt) begin
// send the tag valid signal one cycle later
tag_valid_n = 1'b1;
state_d = PTE_LOOKUP;
end
end
// Assignments
assign update_vaddr_o = vaddr_q;
PTE_LOOKUP: begin
// we wait for the valid signal
if (data_rvalid_q) begin
assign ptw_active_o = (state_q != IDLE);
assign walking_instr_o = is_instr_ptw_q;
// directly output the correct physical address
assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0];
assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH];
// we are never going to kill this request
assign req_port_o.kill_req = '0;
// we are never going to write with the HPTW
assign req_port_o.data_wdata = 64'b0;
// we only issue one single request at a time
assign req_port_o.data_id = '0;
// -----------
// TLB Update
// -----------
assign itlb_update_o.vpn = {{39-riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]};
assign dtlb_update_o.vpn = {{39-riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]};
// update the correct page table level
assign itlb_update_o.is_2M = (ptw_lvl_q == LVL2);
assign itlb_update_o.is_1G = (ptw_lvl_q == LVL1);
assign dtlb_update_o.is_2M = (ptw_lvl_q == LVL2);
assign dtlb_update_o.is_1G = (ptw_lvl_q == LVL1);
// output the correct ASID
assign itlb_update_o.asid = tlb_update_asid_q;
assign dtlb_update_o.asid = tlb_update_asid_q;
// set the global mapping bit
assign itlb_update_o.content = pte | (global_mapping_q << 5);
assign dtlb_update_o.content = pte | (global_mapping_q << 5);
// check if the global mapping bit is set
if (pte.g) global_mapping_n = 1'b1;
assign req_port_o.tag_valid = tag_valid_q;
// -------------
// Invalid PTE
// -------------
// If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception.
if (!pte.v || (!pte.r && pte.w)) state_d = PROPAGATE_ERROR;
// -----------
// Valid PTE
// -----------
else begin
state_d = IDLE;
// it is a valid PTE
// if pte.r = 1 or pte.x = 1 it is a valid PTE
if (pte.r || pte.x) begin
// Valid translation found (either 1G, 2M or 4K entry)
if (is_instr_ptw_q) begin
// ------------
// Update ITLB
// ------------
// If page is not executable, we can directly raise an error. This
// doesn't put a useless entry into the TLB. The same idea applies
// to the access flag since we let the access flag be managed by SW.
if (!pte.x || !pte.a) state_d = PROPAGATE_ERROR;
else itlb_update_o.valid = 1'b1;
logic allow_access;
assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0;
pmp #(
.CVA6Cfg ( CVA6Cfg ),
.PLEN ( riscv::PLEN ),
.PMP_LEN ( riscv::PLEN - 2 ),
.NR_ENTRIES ( CVA6Cfg.NrPMPEntries )
) i_pmp_ptw (
.addr_i ( ptw_pptr_q ),
// PTW access are always checked as if in S-Mode...
.priv_lvl_i ( riscv::PRIV_LVL_S ),
// ...and they are always loads
.access_type_i ( riscv::ACCESS_READ ),
// Configuration
.conf_addr_i ( pmpaddr_i ),
.conf_i ( pmpcfg_i ),
.allow_o ( allow_access )
);
//-------------------
// Page table walker
//-------------------
// A virtual address va is translated into a physical address pa as follows:
// 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
// PAGESIZE=2^12 and LEVELS=3.)
// 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For
// Sv32, PTESIZE=4.)
// 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access
// exception.
// 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5.
// Otherwise, this PTE is a pointer to the next level of the page table.
// Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let
// a = pte.ppn × PAGESIZE and go to step 2.
// 5. A leaf PTE has been found. Determine if the requested memory access
// is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and
// raise an access exception. Otherwise, the translation is successful.
// Set pte.a to 1, and, if the memory access is a store, set pte.d to 1.
// The translated physical address is given as follows:
// - pa.pgoff = va.pgoff.
// - If i > 0, then this is a superpage translation and
// pa.ppn[i-1:0] = va.vpn[i-1:0].
// - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
always_comb begin : ptw
// default assignments
// PTW memory interface
tag_valid_n = 1'b0;
req_port_o.data_req = 1'b0;
req_port_o.data_be = 8'hFF;
req_port_o.data_size = 2'b11;
req_port_o.data_we = 1'b0;
ptw_error_o = 1'b0;
ptw_access_exception_o = 1'b0;
itlb_update_o.valid = 1'b0;
dtlb_update_o.valid = 1'b0;
is_instr_ptw_n = is_instr_ptw_q;
ptw_lvl_n = ptw_lvl_q;
ptw_pptr_n = ptw_pptr_q;
state_d = state_q;
global_mapping_n = global_mapping_q;
// input registers
tlb_update_asid_n = tlb_update_asid_q;
vaddr_n = vaddr_q;
itlb_miss_o = 1'b0;
dtlb_miss_o = 1'b0;
case (state_q)
IDLE: begin
// by default we start with the top-most page table
ptw_lvl_n = LVL1;
global_mapping_n = 1'b0;
is_instr_ptw_n = 1'b0;
// if we got an ITLB miss
if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin
ptw_pptr_n = {satp_ppn_i, itlb_vaddr_i[riscv::SV-1:30], 3'b0};
is_instr_ptw_n = 1'b1;
tlb_update_asid_n = asid_i;
vaddr_n = itlb_vaddr_i;
state_d = WAIT_GRANT;
itlb_miss_o = 1'b1;
// we got an DTLB miss
end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin
ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[riscv::SV-1:30], 3'b0};
tlb_update_asid_n = asid_i;
vaddr_n = dtlb_vaddr_i;
state_d = WAIT_GRANT;
dtlb_miss_o = 1'b1;
end else begin
// ------------
// Update DTLB
// ------------
// Check if the access flag has been set, otherwise throw a page-fault
// and let the software handle those bits.
// If page is not readable (there are no write-only pages)
// we can directly raise an error. This doesn't put a useless
// entry into the TLB.
if (pte.a && (pte.r || (pte.x && mxr_i))) begin
dtlb_update_o.valid = 1'b1;
end else begin
state_d = PROPAGATE_ERROR;
end
end
WAIT_GRANT: begin
// send a request out
req_port_o.data_req = 1'b1;
// wait for the WAIT_GRANT
if (req_port_i.data_gnt) begin
// send the tag valid signal one cycle later
tag_valid_n = 1'b1;
state_d = PTE_LOOKUP;
// Request is a store: perform some additional checks
// If the request was a store and the page is not write-able, raise an error
// the same applies if the dirty flag is not set
if (lsu_is_store_i && (!pte.w || !pte.d)) begin
dtlb_update_o.valid = 1'b0;
state_d = PROPAGATE_ERROR;
end
end
// check if the ppn is correctly aligned:
// 6. If i > 0 and pa.ppn[i 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault
// exception.
if (ptw_lvl_q == LVL1 && pte.ppn[17:0] != '0) begin
state_d = PROPAGATE_ERROR;
dtlb_update_o.valid = 1'b0;
itlb_update_o.valid = 1'b0;
end else if (ptw_lvl_q == LVL2 && pte.ppn[8:0] != '0) begin
state_d = PROPAGATE_ERROR;
dtlb_update_o.valid = 1'b0;
itlb_update_o.valid = 1'b0;
end
// this is a pointer to the next TLB level
end else begin
// pointer to next level of page table
if (ptw_lvl_q == LVL1) begin
// we are in the second level now
ptw_lvl_n = LVL2;
ptw_pptr_n = {pte.ppn, vaddr_q[29:21], 3'b0};
end
if (ptw_lvl_q == LVL2) begin
// here we received a pointer to the third level
ptw_lvl_n = LVL3;
ptw_pptr_n = {pte.ppn, vaddr_q[20:12], 3'b0};
end
state_d = WAIT_GRANT;
if (ptw_lvl_q == LVL3) begin
// Should already be the last level page table => Error
ptw_lvl_n = LVL3;
state_d = PROPAGATE_ERROR;
end
end
end
PTE_LOOKUP: begin
// we wait for the valid signal
if (data_rvalid_q) begin
// Check if this access was actually allowed from a PMP perspective
if (!allow_access) begin
itlb_update_o.valid = 1'b0;
dtlb_update_o.valid = 1'b0;
// we have to return the failed address in bad_addr
ptw_pptr_n = ptw_pptr_q;
state_d = PROPAGATE_ACCESS_ERROR;
end
end
// we've got a data WAIT_GRANT so tell the cache that the tag is valid
end
// Propagate error to MMU/LSU
PROPAGATE_ERROR: begin
state_d = IDLE;
ptw_error_o = 1'b1;
end
PROPAGATE_ACCESS_ERROR: begin
state_d = IDLE;
ptw_access_exception_o = 1'b1;
end
// wait for the rvalid before going back to IDLE
WAIT_RVALID: begin
if (data_rvalid_q) state_d = IDLE;
end
default: begin
state_d = IDLE;
end
endcase
// check if the global mapping bit is set
if (pte.g)
global_mapping_n = 1'b1;
// -------------
// Invalid PTE
// -------------
// If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception.
if (!pte.v || (!pte.r && pte.w))
state_d = PROPAGATE_ERROR;
// -----------
// Valid PTE
// -----------
else begin
state_d = IDLE;
// it is a valid PTE
// if pte.r = 1 or pte.x = 1 it is a valid PTE
if (pte.r || pte.x) begin
// Valid translation found (either 1G, 2M or 4K entry)
if (is_instr_ptw_q) begin
// ------------
// Update ITLB
// ------------
// If page is not executable, we can directly raise an error. This
// doesn't put a useless entry into the TLB. The same idea applies
// to the access flag since we let the access flag be managed by SW.
if (!pte.x || !pte.a)
state_d = PROPAGATE_ERROR;
else
itlb_update_o.valid = 1'b1;
end else begin
// ------------
// Update DTLB
// ------------
// Check if the access flag has been set, otherwise throw a page-fault
// and let the software handle those bits.
// If page is not readable (there are no write-only pages)
// we can directly raise an error. This doesn't put a useless
// entry into the TLB.
if (pte.a && (pte.r || (pte.x && mxr_i))) begin
dtlb_update_o.valid = 1'b1;
end else begin
state_d = PROPAGATE_ERROR;
end
// Request is a store: perform some additional checks
// If the request was a store and the page is not write-able, raise an error
// the same applies if the dirty flag is not set
if (lsu_is_store_i && (!pte.w || !pte.d)) begin
dtlb_update_o.valid = 1'b0;
state_d = PROPAGATE_ERROR;
end
end
// check if the ppn is correctly aligned:
// 6. If i > 0 and pa.ppn[i 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault
// exception.
if (ptw_lvl_q == LVL1 && pte.ppn[17:0] != '0) begin
state_d = PROPAGATE_ERROR;
dtlb_update_o.valid = 1'b0;
itlb_update_o.valid = 1'b0;
end else if (ptw_lvl_q == LVL2 && pte.ppn[8:0] != '0) begin
state_d = PROPAGATE_ERROR;
dtlb_update_o.valid = 1'b0;
itlb_update_o.valid = 1'b0;
end
// this is a pointer to the next TLB level
end else begin
// pointer to next level of page table
if (ptw_lvl_q == LVL1) begin
// we are in the second level now
ptw_lvl_n = LVL2;
ptw_pptr_n = {pte.ppn, vaddr_q[29:21], 3'b0};
end
if (ptw_lvl_q == LVL2) begin
// here we received a pointer to the third level
ptw_lvl_n = LVL3;
ptw_pptr_n = {pte.ppn, vaddr_q[20:12], 3'b0};
end
state_d = WAIT_GRANT;
if (ptw_lvl_q == LVL3) begin
// Should already be the last level page table => Error
ptw_lvl_n = LVL3;
state_d = PROPAGATE_ERROR;
end
end
end
// Check if this access was actually allowed from a PMP perspective
if (!allow_access) begin
itlb_update_o.valid = 1'b0;
dtlb_update_o.valid = 1'b0;
// we have to return the failed address in bad_addr
ptw_pptr_n = ptw_pptr_q;
state_d = PROPAGATE_ACCESS_ERROR;
end
end
// we've got a data WAIT_GRANT so tell the cache that the tag is valid
end
// Propagate error to MMU/LSU
PROPAGATE_ERROR: begin
state_d = IDLE;
ptw_error_o = 1'b1;
end
PROPAGATE_ACCESS_ERROR: begin
state_d = IDLE;
ptw_access_exception_o = 1'b1;
end
// wait for the rvalid before going back to IDLE
WAIT_RVALID: begin
if (data_rvalid_q)
state_d = IDLE;
end
default: begin
state_d = IDLE;
end
endcase
// -------
// Flush
// -------
// should we have flushed before we got an rvalid, wait for it until going back to IDLE
if (flush_i) begin
// on a flush check whether we are
// 1. in the PTE Lookup check whether we still need to wait for an rvalid
// 2. waiting for a grant, if so: wait for it
// if not, go back to idle
if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) ||
// -------
// Flush
// -------
// should we have flushed before we got an rvalid, wait for it until going back to IDLE
if (flush_i) begin
// on a flush check whether we are
// 1. in the PTE Lookup check whether we still need to wait for an rvalid
// 2. waiting for a grant, if so: wait for it
// if not, go back to idle
if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) ||
((state_q == WAIT_GRANT) && req_port_i.data_gnt))
state_d = WAIT_RVALID;
else
state_d = IDLE;
end
state_d = WAIT_RVALID;
else state_d = IDLE;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
is_instr_ptw_q <= 1'b0;
ptw_lvl_q <= LVL1;
tag_valid_q <= 1'b0;
tlb_update_asid_q <= '0;
vaddr_q <= '0;
ptw_pptr_q <= '0;
global_mapping_q <= 1'b0;
data_rdata_q <= '0;
data_rvalid_q <= 1'b0;
end else begin
state_q <= state_d;
ptw_pptr_q <= ptw_pptr_n;
is_instr_ptw_q <= is_instr_ptw_n;
ptw_lvl_q <= ptw_lvl_n;
tag_valid_q <= tag_valid_n;
tlb_update_asid_q <= tlb_update_asid_n;
vaddr_q <= vaddr_n;
global_mapping_q <= global_mapping_n;
data_rdata_q <= req_port_i.data_rdata;
data_rvalid_q <= req_port_i.data_rvalid;
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
is_instr_ptw_q <= 1'b0;
ptw_lvl_q <= LVL1;
tag_valid_q <= 1'b0;
tlb_update_asid_q <= '0;
vaddr_q <= '0;
ptw_pptr_q <= '0;
global_mapping_q <= 1'b0;
data_rdata_q <= '0;
data_rvalid_q <= 1'b0;
end else begin
state_q <= state_d;
ptw_pptr_q <= ptw_pptr_n;
is_instr_ptw_q <= is_instr_ptw_n;
ptw_lvl_q <= ptw_lvl_n;
tag_valid_q <= tag_valid_n;
tlb_update_asid_q <= tlb_update_asid_n;
vaddr_q <= vaddr_n;
global_mapping_q <= global_mapping_n;
data_rdata_q <= req_port_i.data_rdata;
data_rvalid_q <= req_port_i.data_rvalid;
end
end
endmodule
/* verilator lint_on WIDTH */

View file

@ -15,149 +15,151 @@
// fully set-associative
module tlb import ariane_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned TLB_ENTRIES = 4,
parameter int unsigned ASID_WIDTH = 1
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // Flush signal
module tlb
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned TLB_ENTRIES = 4,
parameter int unsigned ASID_WIDTH = 1
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // Flush signal
// Update TLB
input tlb_update_t update_i,
input tlb_update_t update_i,
// Lookup signals
input logic lu_access_i,
input logic [ASID_WIDTH-1:0] lu_asid_i,
input logic [riscv::VLEN-1:0] lu_vaddr_i,
output riscv::pte_t lu_content_o,
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
output logic lu_is_2M_o,
output logic lu_is_1G_o,
output logic lu_hit_o
input logic lu_access_i,
input logic [ ASID_WIDTH-1:0] lu_asid_i,
input logic [riscv::VLEN-1:0] lu_vaddr_i,
output riscv::pte_t lu_content_o,
input logic [ ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
output logic lu_is_2M_o,
output logic lu_is_1G_o,
output logic lu_hit_o
);
// SV39 defines three levels of page tables
struct packed {
logic [ASID_WIDTH-1:0] asid;
logic [riscv::VPN2:0] vpn2;
logic [8:0] vpn1;
logic [8:0] vpn0;
logic is_2M;
logic is_1G;
logic valid;
} [TLB_ENTRIES-1:0] tags_q, tags_n;
// SV39 defines three levels of page tables
struct packed {
logic [ASID_WIDTH-1:0] asid;
logic [riscv::VPN2:0] vpn2;
logic [8:0] vpn1;
logic [8:0] vpn0;
logic is_2M;
logic is_1G;
logic valid;
} [TLB_ENTRIES-1:0]
tags_q, tags_n;
riscv::pte_t [TLB_ENTRIES-1:0] content_q, content_n;
logic [8:0] vpn0, vpn1;
logic [riscv::VPN2:0] vpn2;
logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic
logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy
//-------------
// Translation
//-------------
always_comb begin : translation
vpn0 = lu_vaddr_i[20:12];
vpn1 = lu_vaddr_i[29:21];
vpn2 = lu_vaddr_i[30+riscv::VPN2:30];
riscv::pte_t [TLB_ENTRIES-1:0] content_q, content_n;
logic [8:0] vpn0, vpn1;
logic [ riscv::VPN2:0] vpn2;
logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic
logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy
//-------------
// Translation
//-------------
always_comb begin : translation
vpn0 = lu_vaddr_i[20:12];
vpn1 = lu_vaddr_i[29:21];
vpn2 = lu_vaddr_i[30+riscv::VPN2:30];
// default assignment
lu_hit = '{default: 0};
lu_hit_o = 1'b0;
lu_content_o = '{default: 0};
lu_is_1G_o = 1'b0;
lu_is_2M_o = 1'b0;
// default assignment
lu_hit = '{default: 0};
lu_hit_o = 1'b0;
lu_content_o = '{default: 0};
lu_is_1G_o = 1'b0;
lu_is_2M_o = 1'b0;
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
// first level match, this may be a giga page, check the ASID flags as well
// if the entry is associated to a global address, don't match the ASID (ASID is don't care)
if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid) || content_q[i].g) && vpn2 == tags_q[i].vpn2) begin
// second level
if (tags_q[i].is_1G) begin
lu_is_1G_o = 1'b1;
lu_content_o = content_q[i];
lu_hit_o = 1'b1;
lu_hit[i] = 1'b1;
// not a giga page hit so check further
end else if (vpn1 == tags_q[i].vpn1) begin
// this could be a 2 mega page hit or a 4 kB hit
// output accordingly
if (tags_q[i].is_2M || vpn0 == tags_q[i].vpn0) begin
lu_is_2M_o = tags_q[i].is_2M;
lu_content_o = content_q[i];
lu_hit_o = 1'b1;
lu_hit[i] = 1'b1;
end
end
end
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
// first level match, this may be a giga page, check the ASID flags as well
// if the entry is associated to a global address, don't match the ASID (ASID is don't care)
if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid) || content_q[i].g) && vpn2 == tags_q[i].vpn2) begin
// second level
if (tags_q[i].is_1G) begin
lu_is_1G_o = 1'b1;
lu_content_o = content_q[i];
lu_hit_o = 1'b1;
lu_hit[i] = 1'b1;
// not a giga page hit so check further
end else if (vpn1 == tags_q[i].vpn1) begin
// this could be a 2 mega page hit or a 4 kB hit
// output accordingly
if (tags_q[i].is_2M || vpn0 == tags_q[i].vpn0) begin
lu_is_2M_o = tags_q[i].is_2M;
lu_content_o = content_q[i];
lu_hit_o = 1'b1;
lu_hit[i] = 1'b1;
end
end
end
end
end
logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high
logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high
logic [TLB_ENTRIES-1:0] vaddr_vpn0_match;
logic [TLB_ENTRIES-1:0] vaddr_vpn1_match;
logic [TLB_ENTRIES-1:0] vaddr_vpn2_match;
logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high
logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high
logic [TLB_ENTRIES-1:0] vaddr_vpn0_match;
logic [TLB_ENTRIES-1:0] vaddr_vpn1_match;
logic [TLB_ENTRIES-1:0] vaddr_vpn2_match;
assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i);
assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i);
assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i);
assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i);
// ------------------
// Update and Flush
// ------------------
always_comb begin : update_flush
tags_n = tags_q;
content_n = content_q;
// ------------------
// Update and Flush
// ------------------
always_comb begin : update_flush
tags_n = tags_q;
content_n = content_q;
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[20:12] == tags_q[i].vpn0);
vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[29:21] == tags_q[i].vpn1);
vaddr_vpn2_match[i] = (vaddr_to_be_flushed_i[30+riscv::VPN2:30] == tags_q[i].vpn2);
vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[20:12] == tags_q[i].vpn0);
vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[29:21] == tags_q[i].vpn1);
vaddr_vpn2_match[i] = (vaddr_to_be_flushed_i[30+riscv::VPN2:30] == tags_q[i].vpn2);
if (flush_i) begin
// invalidate logic
// flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case)
if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0 )
tags_n[i].valid = 1'b0;
// flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages
else if (asid_to_be_flushed_is0 && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M) ) && (~vaddr_to_be_flushed_is0))
tags_n[i].valid = 1'b0;
// the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case)
else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M)) && (asid_to_be_flushed_i == tags_q[i].asid) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0))
tags_n[i].valid = 1'b0;
// the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case)
else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid) && (!asid_to_be_flushed_is0))
tags_n[i].valid = 1'b0;
// normal replacement
end else if (update_i.valid & replace_en[i]) begin
// update tag array
tags_n[i] = '{
asid: update_i.asid,
vpn2: update_i.vpn [18+riscv::VPN2:18],
vpn1: update_i.vpn [17:9],
vpn0: update_i.vpn [8:0],
is_1G: update_i.is_1G,
is_2M: update_i.is_2M,
valid: 1'b1
};
// and content as well
content_n[i] = update_i.content;
end
end
if (flush_i) begin
// invalidate logic
// flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case)
if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0) tags_n[i].valid = 1'b0;
// flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages
else if (asid_to_be_flushed_is0 && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M) ) && (~vaddr_to_be_flushed_is0))
tags_n[i].valid = 1'b0;
// the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case)
else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M)) && (asid_to_be_flushed_i == tags_q[i].asid) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0))
tags_n[i].valid = 1'b0;
// the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case)
else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid) && (!asid_to_be_flushed_is0))
tags_n[i].valid = 1'b0;
// normal replacement
end else if (update_i.valid & replace_en[i]) begin
// update tag array
tags_n[i] = '{
asid: update_i.asid,
vpn2: update_i.vpn[18+riscv::VPN2:18],
vpn1: update_i.vpn[17:9],
vpn0: update_i.vpn[8:0],
is_1G: update_i.is_1G,
is_2M: update_i.is_2M,
valid: 1'b1
};
// and content as well
content_n[i] = update_i.content;
end
end
end
// -----------------------------------------------
// PLRU - Pseudo Least Recently Used Replacement
// -----------------------------------------------
logic[2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n;
always_comb begin : plru_replacement
plru_tree_n = plru_tree_q;
// The PLRU-tree indexing:
// lvl0 0
// / \
// -----------------------------------------------
// PLRU - Pseudo Least Recently Used Replacement
// -----------------------------------------------
logic [2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n;
always_comb begin : plru_replacement
plru_tree_n = plru_tree_q;
// The PLRU-tree indexing:
// lvl0 0
// / \
// / \
// lvl1 1 2
// / \ / \
@ -178,97 +180,111 @@ module tlb import ariane_pkg::*; #(
// lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0};
// default: begin /* No hit */ end
// endcase
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
automatic int unsigned idx_base, shift, new_index;
// we got a hit so update the pointer as it was least recently used
if (lu_hit[i] & lu_access_i) begin
// Set the nodes to the values we would expect
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
idx_base = $unsigned((2**lvl)-1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
shift = $clog2(TLB_ENTRIES) - lvl;
// to circumvent the 32 bit integer arithmetic assignment
new_index = ~((i >> (shift-1)) & 32'b1);
plru_tree_n[idx_base + (i >> shift)] = new_index[0];
end
end
for (
int unsigned i = 0; i < TLB_ENTRIES; i++
) begin
automatic int unsigned idx_base, shift, new_index;
// we got a hit so update the pointer as it was least recently used
if (lu_hit[i] & lu_access_i) begin
// Set the nodes to the values we would expect
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
idx_base = $unsigned((2 ** lvl) - 1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
shift = $clog2(TLB_ENTRIES) - lvl;
// to circumvent the 32 bit integer arithmetic assignment
new_index = ~((i >> (shift - 1)) & 32'b1);
plru_tree_n[idx_base+(i>>shift)] = new_index[0];
end
// Decode tree to write enable signals
// Next for-loop basically creates the following logic for e.g. an 8 entry
// TLB (note: pseudo-code obviously):
// replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1}
// replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0}
// replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1}
// replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0}
// replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1}
// replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0}
// replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1}
// replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0}
// For each entry traverse the tree. If every tree-node matches,
// the corresponding bit of the entry's index, this is
// the next entry to replace.
for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin
automatic logic en;
automatic int unsigned idx_base, shift, new_index;
en = 1'b1;
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
idx_base = $unsigned((2**lvl)-1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
shift = $clog2(TLB_ENTRIES) - lvl;
// en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
new_index = (i >> (shift-1)) & 32'b1;
if (new_index[0]) begin
en &= plru_tree_q[idx_base + (i>>shift)];
end else begin
en &= ~plru_tree_q[idx_base + (i>>shift)];
end
end
replace_en[i] = en;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
tags_q <= '{default: 0};
content_q <= '{default: 0};
plru_tree_q <= '{default: 0};
end else begin
tags_q <= tags_n;
content_q <= content_n;
plru_tree_q <= plru_tree_n;
end
end
//--------------
// Sanity checks
//--------------
//pragma translate_off
`ifndef VERILATOR
initial begin : p_assertions
assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1))
else begin $error("TLB size must be a multiple of 2 and greater than 1"); $stop(); end
assert (ASID_WIDTH >= 1)
else begin $error("ASID width must be at least 1"); $stop(); end
end
// Just for checking
function int countSetBits(logic[TLB_ENTRIES-1:0] vector);
automatic int count = 0;
foreach (vector[idx]) begin
count += vector[idx];
end
return count;
endfunction
end
// Decode tree to write enable signals
// Next for-loop basically creates the following logic for e.g. an 8 entry
// TLB (note: pseudo-code obviously):
// replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1}
// replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0}
// replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1}
// replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0}
// replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1}
// replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0}
// replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1}
// replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0}
// For each entry traverse the tree. If every tree-node matches,
// the corresponding bit of the entry's index, this is
// the next entry to replace.
for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin
automatic logic en;
automatic int unsigned idx_base, shift, new_index;
en = 1'b1;
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
idx_base = $unsigned((2 ** lvl) - 1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
shift = $clog2(TLB_ENTRIES) - lvl;
assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1))
else begin $error("More then one hit in TLB!"); $stop(); end
assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1))
else begin $error("More then one TLB entry selected for next replace!"); $stop(); end
// en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
new_index = (i >> (shift - 1)) & 32'b1;
if (new_index[0]) begin
en &= plru_tree_q[idx_base+(i>>shift)];
end else begin
en &= ~plru_tree_q[idx_base+(i>>shift)];
end
end
replace_en[i] = en;
end
end
`endif
//pragma translate_on
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
tags_q <= '{default: 0};
content_q <= '{default: 0};
plru_tree_q <= '{default: 0};
end else begin
tags_q <= tags_n;
content_q <= content_n;
plru_tree_q <= plru_tree_n;
end
end
//--------------
// Sanity checks
//--------------
//pragma translate_off
`ifndef VERILATOR
initial begin : p_assertions
assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1))
else begin
$error("TLB size must be a multiple of 2 and greater than 1");
$stop();
end
assert (ASID_WIDTH >= 1)
else begin
$error("ASID width must be at least 1");
$stop();
end
end
// Just for checking
function int countSetBits(logic [TLB_ENTRIES-1:0] vector);
automatic int count = 0;
foreach (vector[idx]) begin
count += vector[idx];
end
return count;
endfunction
assert property (@(posedge clk_i) (countSetBits(lu_hit) <= 1))
else begin
$error("More then one hit in TLB!");
$stop();
end
assert property (@(posedge clk_i) (countSetBits(replace_en) <= 1))
else begin
$error("More then one TLB entry selected for next replace!");
$stop();
end
`endif
//pragma translate_on
endmodule

View file

@ -1,145 +1,149 @@
module mult import ariane_pkg::*; #(
module mult
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input fu_data_t fu_data_i,
input logic mult_valid_i,
output riscv::xlen_t result_o,
output logic mult_valid_o,
output logic mult_ready_o,
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input fu_data_t fu_data_i,
input logic mult_valid_i,
output riscv::xlen_t result_o,
output logic mult_valid_o,
output logic mult_ready_o,
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o
);
logic mul_valid;
logic div_valid;
logic div_ready_i; // receiver of division result is able to accept the result
logic [TRANS_ID_BITS-1:0] mul_trans_id;
logic [TRANS_ID_BITS-1:0] div_trans_id;
riscv::xlen_t mul_result;
riscv::xlen_t div_result;
logic mul_valid;
logic div_valid;
logic div_ready_i; // receiver of division result is able to accept the result
logic [TRANS_ID_BITS-1:0] mul_trans_id;
logic [TRANS_ID_BITS-1:0] div_trans_id;
riscv::xlen_t mul_result;
riscv::xlen_t div_result;
logic div_valid_op;
logic mul_valid_op;
// Input Arbitration
logic div_valid_op;
logic mul_valid_op;
// Input Arbitration
assign mul_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR });
assign mul_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR });
assign div_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW });
assign div_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW });
// ---------------------
// Output Arbitration
// ---------------------
// we give precedence to multiplication as the divider supports stalling and the multiplier is
// just a dumb pipelined multiplier
assign div_ready_i = (mul_valid) ? 1'b0 : 1'b1;
assign mult_trans_id_o = (mul_valid) ? mul_trans_id : div_trans_id;
assign result_o = (mul_valid) ? mul_result : div_result;
assign mult_valid_o = div_valid | mul_valid;
// mult_ready_o = division as the multiplication will unconditionally be ready to accept new requests
// ---------------------
// Output Arbitration
// ---------------------
// we give precedence to multiplication as the divider supports stalling and the multiplier is
// just a dumb pipelined multiplier
assign div_ready_i = (mul_valid) ? 1'b0 : 1'b1;
assign mult_trans_id_o = (mul_valid) ? mul_trans_id : div_trans_id;
assign result_o = (mul_valid) ? mul_result : div_result;
assign mult_valid_o = div_valid | mul_valid;
// mult_ready_o = division as the multiplication will unconditionally be ready to accept new requests
// ---------------------
// Multiplication
// ---------------------
multiplier #(
.CVA6Cfg ( CVA6Cfg )
) i_multiplier (
.clk_i,
.rst_ni,
.trans_id_i ( fu_data_i.trans_id ),
.operation_i ( fu_data_i.operation ),
.operand_a_i ( fu_data_i.operand_a ),
.operand_b_i ( fu_data_i.operand_b ),
.result_o ( mul_result ),
.mult_valid_i ( mul_valid_op ),
.mult_valid_o ( mul_valid ),
.mult_trans_id_o ( mul_trans_id ),
.mult_ready_o ( ) // this unit is unconditionally ready
);
// ---------------------
// Multiplication
// ---------------------
multiplier #(
.CVA6Cfg(CVA6Cfg)
) i_multiplier (
.clk_i,
.rst_ni,
.trans_id_i (fu_data_i.trans_id),
.operation_i (fu_data_i.operation),
.operand_a_i (fu_data_i.operand_a),
.operand_b_i (fu_data_i.operand_b),
.result_o (mul_result),
.mult_valid_i (mul_valid_op),
.mult_valid_o (mul_valid),
.mult_trans_id_o(mul_trans_id),
.mult_ready_o () // this unit is unconditionally ready
);
// ---------------------
// Division
// ---------------------
riscv::xlen_t operand_b, operand_a; // input operands after input MUX (input silencing, word operations or full inputs)
riscv::xlen_t result; // result before result mux
// ---------------------
// Division
// ---------------------
riscv::xlen_t
operand_b,
operand_a; // input operands after input MUX (input silencing, word operations or full inputs)
riscv::xlen_t result; // result before result mux
logic div_signed; // signed or unsigned division
logic rem; // is it a reminder (or not a reminder e.g.: a division)
logic word_op_d, word_op_q; // save whether the operation was signed or not
logic div_signed; // signed or unsigned division
logic rem; // is it a reminder (or not a reminder e.g.: a division)
logic word_op_d, word_op_q; // save whether the operation was signed or not
// is this a signed op?
assign div_signed = fu_data_i.operation inside {DIV, DIVW, REM, REMW};
// is this a modulo?
assign rem = fu_data_i.operation inside {REM, REMU, REMW, REMUW};
// is this a signed op?
assign div_signed = fu_data_i.operation inside {DIV, DIVW, REM, REMW};
// is this a modulo?
assign rem = fu_data_i.operation inside {REM, REMU, REMW, REMUW};
// prepare the input operands and control divider
always_comb begin
// silence the inputs
operand_a = '0;
operand_b = '0;
// control signals
word_op_d = word_op_q;
// prepare the input operands and control divider
always_comb begin
// silence the inputs
operand_a = '0;
operand_b = '0;
// control signals
word_op_d = word_op_q;
// we've go a new division operation
if (mult_valid_i && fu_data_i.operation inside {DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW}) begin
// is this a word operation?
if (fu_data_i.operation inside {DIVW, DIVUW, REMW, REMUW}) begin
// yes so check if we should sign extend this is only done for a signed operation
if (div_signed) begin
operand_a = sext32(fu_data_i.operand_a[31:0]);
operand_b = sext32(fu_data_i.operand_b[31:0]);
end else begin
operand_a = fu_data_i.operand_a[31:0];
operand_b = fu_data_i.operand_b[31:0];
end
// save whether we want sign extend the result or not, this is done for all word operations
word_op_d = 1'b1;
end else begin
// regular op
operand_a = fu_data_i.operand_a;
operand_b = fu_data_i.operand_b;
word_op_d = 1'b0;
end
end
end
// ---------------------
// Serial Divider
// ---------------------
serdiv #(
.CVA6Cfg ( CVA6Cfg ),
.WIDTH ( riscv::XLEN )
) i_div (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.id_i ( fu_data_i.trans_id ),
.op_a_i ( operand_a ),
.op_b_i ( operand_b ),
.opcode_i ( {rem, div_signed} ), // 00: udiv, 10: urem, 01: div, 11: rem
.in_vld_i ( div_valid_op ),
.in_rdy_o ( mult_ready_o ),
.flush_i ( flush_i ),
.out_vld_o ( div_valid ),
.out_rdy_i ( div_ready_i ),
.id_o ( div_trans_id ),
.res_o ( result )
);
// Result multiplexer
// if it was a signed word operation the bit will be set and the result will be sign extended accordingly
assign div_result = (word_op_q) ? sext32(result) : result;
// ---------------------
// Registers
// ---------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
word_op_q <= '0;
// we've go a new division operation
if (mult_valid_i && fu_data_i.operation inside {DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW}) begin
// is this a word operation?
if (fu_data_i.operation inside {DIVW, DIVUW, REMW, REMUW}) begin
// yes so check if we should sign extend this is only done for a signed operation
if (div_signed) begin
operand_a = sext32(fu_data_i.operand_a[31:0]);
operand_b = sext32(fu_data_i.operand_b[31:0]);
end else begin
word_op_q <= word_op_d;
operand_a = fu_data_i.operand_a[31:0];
operand_b = fu_data_i.operand_b[31:0];
end
// save whether we want sign extend the result or not, this is done for all word operations
word_op_d = 1'b1;
end else begin
// regular op
operand_a = fu_data_i.operand_a;
operand_b = fu_data_i.operand_b;
word_op_d = 1'b0;
end
end
end
// ---------------------
// Serial Divider
// ---------------------
serdiv #(
.CVA6Cfg(CVA6Cfg),
.WIDTH (riscv::XLEN)
) i_div (
.clk_i (clk_i),
.rst_ni (rst_ni),
.id_i (fu_data_i.trans_id),
.op_a_i (operand_a),
.op_b_i (operand_b),
.opcode_i ({rem, div_signed}), // 00: udiv, 10: urem, 01: div, 11: rem
.in_vld_i (div_valid_op),
.in_rdy_o (mult_ready_o),
.flush_i (flush_i),
.out_vld_o(div_valid),
.out_rdy_i(div_ready_i),
.id_o (div_trans_id),
.res_o (result)
);
// Result multiplexer
// if it was a signed word operation the bit will be set and the result will be sign extended accordingly
assign div_result = (word_op_q) ? sext32(result) : result;
// ---------------------
// Registers
// ---------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
word_op_q <= '0;
end else begin
word_op_q <= word_op_d;
end
end
endmodule

View file

@ -15,136 +15,142 @@
//
module multiplier import ariane_pkg::*; #(
module multiplier
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i,
input logic rst_ni,
input logic [TRANS_ID_BITS-1:0] trans_id_i,
input logic mult_valid_i,
input fu_op operation_i,
input riscv::xlen_t operand_a_i,
input riscv::xlen_t operand_b_i,
output riscv::xlen_t result_o,
output logic mult_valid_o,
output logic mult_ready_o,
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o
input logic clk_i,
input logic rst_ni,
input logic [TRANS_ID_BITS-1:0] trans_id_i,
input logic mult_valid_i,
input fu_op operation_i,
input riscv::xlen_t operand_a_i,
input riscv::xlen_t operand_b_i,
output riscv::xlen_t result_o,
output logic mult_valid_o,
output logic mult_ready_o,
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o
);
// Carry-less multiplication
logic [riscv::XLEN-1:0] clmul_q, clmul_d, clmulr_q, clmulr_d, operand_a, operand_b, operand_a_rev, operand_b_rev;
logic clmul_rmode, clmul_hmode;
// Carry-less multiplication
logic [riscv::XLEN-1:0]
clmul_q, clmul_d, clmulr_q, clmulr_d, operand_a, operand_b, operand_a_rev, operand_b_rev;
logic clmul_rmode, clmul_hmode;
if (ariane_pkg::BITMANIP) begin : gen_bitmanip
// checking for clmul_rmode and clmul_hmode
assign clmul_rmode = (operation_i == CLMULR);
assign clmul_hmode = (operation_i == CLMULH);
if (ariane_pkg::BITMANIP) begin : gen_bitmanip
// checking for clmul_rmode and clmul_hmode
assign clmul_rmode = (operation_i == CLMULR);
assign clmul_hmode = (operation_i == CLMULH);
// operand_a and b reverse generator
for (genvar i = 0; i < riscv::XLEN; i++) begin
assign operand_a_rev[i] = operand_a_i[(riscv::XLEN-1) -i];
assign operand_b_rev[i] = operand_b_i[(riscv::XLEN-1) -i];
end
// operand_a and operand_b selection
assign operand_a = (clmul_rmode | clmul_hmode) ? operand_a_rev : operand_a_i;
assign operand_b = (clmul_rmode | clmul_hmode) ? operand_b_rev : operand_b_i;
// implementation
always_comb begin
clmul_d = '0;
for (int i = 0; i <= riscv::XLEN; i++) begin
clmul_d = (|((operand_b >> i) & 1)) ? clmul_d ^ (operand_a << i) : clmul_d;
end
end
// clmulr + clmulh result generator
for (genvar i = 0; i < riscv::XLEN; i++) begin
assign clmulr_d[i] = clmul_d[(riscv::XLEN-1)-i];
end
// operand_a and b reverse generator
for (genvar i = 0; i < riscv::XLEN; i++) begin
assign operand_a_rev[i] = operand_a_i[(riscv::XLEN-1)-i];
assign operand_b_rev[i] = operand_b_i[(riscv::XLEN-1)-i];
end
// Pipeline register
logic [TRANS_ID_BITS-1:0] trans_id_q;
logic mult_valid_q;
fu_op operator_d, operator_q;
logic [riscv::XLEN*2-1:0] mult_result_d, mult_result_q;
// operand_a and operand_b selection
assign operand_a = (clmul_rmode | clmul_hmode) ? operand_a_rev : operand_a_i;
assign operand_b = (clmul_rmode | clmul_hmode) ? operand_b_rev : operand_b_i;
// control registers
logic sign_a, sign_b;
logic mult_valid;
// control signals
assign mult_valid_o = mult_valid_q;
assign mult_trans_id_o = trans_id_q;
assign mult_ready_o = 1'b1;
assign mult_valid = mult_valid_i && (operation_i inside {MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR});
// Sign Select MUX
// implementation
always_comb begin
sign_a = 1'b0;
sign_b = 1'b0;
// signed multiplication
if (operation_i == MULH) begin
sign_a = 1'b1;
sign_b = 1'b1;
// signed - unsigned multiplication
end else if (operation_i == MULHSU) begin
sign_a = 1'b1;
// unsigned multiplication
end else begin
sign_a = 1'b0;
sign_b = 1'b0;
end
clmul_d = '0;
for (int i = 0; i <= riscv::XLEN; i++) begin
clmul_d = (|((operand_b >> i) & 1)) ? clmul_d ^ (operand_a << i) : clmul_d;
end
end
// single stage version
assign mult_result_d = $signed({operand_a_i[riscv::XLEN-1] & sign_a, operand_a_i}) *
$signed({operand_b_i[riscv::XLEN-1] & sign_b, operand_b_i});
assign operator_d = operation_i;
always_comb begin : p_selmux
unique case (operator_q)
MULH, MULHU, MULHSU: result_o = mult_result_q[riscv::XLEN*2-1:riscv::XLEN];
MULW: result_o = sext32(mult_result_q[31:0]);
CLMUL: result_o = clmul_q;
CLMULH: result_o = clmulr_q >> 1;
CLMULR: result_o = clmulr_q;
// MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register
default: result_o = mult_result_q[riscv::XLEN-1:0];// including MUL
endcase
// clmulr + clmulh result generator
for (genvar i = 0; i < riscv::XLEN; i++) begin
assign clmulr_d[i] = clmul_d[(riscv::XLEN-1)-i];
end
if (ariane_pkg::BITMANIP) begin
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
clmul_q <= '0;
clmulr_q <= '0;
end else begin
clmul_q <= clmul_d;
clmulr_q <= clmulr_d;
end
end
end
// Pipeline register
logic [TRANS_ID_BITS-1:0] trans_id_q;
logic mult_valid_q;
fu_op operator_d, operator_q;
logic [riscv::XLEN*2-1:0] mult_result_d, mult_result_q;
// control registers
logic sign_a, sign_b;
logic mult_valid;
// control signals
assign mult_valid_o = mult_valid_q;
assign mult_trans_id_o = trans_id_q;
assign mult_ready_o = 1'b1;
assign mult_valid = mult_valid_i && (operation_i inside {MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR});
// Sign Select MUX
always_comb begin
sign_a = 1'b0;
sign_b = 1'b0;
// signed multiplication
if (operation_i == MULH) begin
sign_a = 1'b1;
sign_b = 1'b1;
// signed - unsigned multiplication
end else if (operation_i == MULHSU) begin
sign_a = 1'b1;
// unsigned multiplication
end else begin
sign_a = 1'b0;
sign_b = 1'b0;
end
// -----------------------
// Output pipeline register
// -----------------------
end
// single stage version
assign mult_result_d = $signed(
{operand_a_i[riscv::XLEN-1] & sign_a, operand_a_i}
) * $signed(
{operand_b_i[riscv::XLEN-1] & sign_b, operand_b_i}
);
assign operator_d = operation_i;
always_comb begin : p_selmux
unique case (operator_q)
MULH, MULHU, MULHSU: result_o = mult_result_q[riscv::XLEN*2-1:riscv::XLEN];
MULW: result_o = sext32(mult_result_q[31:0]);
CLMUL: result_o = clmul_q;
CLMULH: result_o = clmulr_q >> 1;
CLMULR: result_o = clmulr_q;
// MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register
default: result_o = mult_result_q[riscv::XLEN-1:0]; // including MUL
endcase
end
if (ariane_pkg::BITMANIP) begin
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mult_valid_q <= '0;
trans_id_q <= '0;
operator_q <= MUL;
mult_result_q <= '0;
end else begin
// Input silencing
trans_id_q <= trans_id_i;
// Output Register
mult_valid_q <= mult_valid;
operator_q <= operator_d;
mult_result_q <= mult_result_d;
end
if (~rst_ni) begin
clmul_q <= '0;
clmulr_q <= '0;
end else begin
clmul_q <= clmul_d;
clmulr_q <= clmulr_d;
end
end
end
// -----------------------
// Output pipeline register
// -----------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mult_valid_q <= '0;
trans_id_q <= '0;
operator_q <= MUL;
mult_result_q <= '0;
end else begin
// Input silencing
trans_id_q <= trans_id_i;
// Output Register
mult_valid_q <= mult_valid;
operator_q <= operator_d;
mult_result_q <= mult_result_d;
end
end
endmodule

View file

@ -13,50 +13,52 @@
// Description: Performance counters
module perf_counters import ariane_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 3 // number of miss ports
module perf_counters
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 3 // number of miss ports
) (
input logic clk_i,
input logic rst_ni,
input logic debug_mode_i, // debug mode
// SRAM like interface
input logic [11:0] addr_i, // read/write address (up to 6 counters possible)
input logic we_i, // write enable
input riscv::xlen_t data_i, // data to write
output riscv::xlen_t data_o, // data to read
// from commit stage
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, // acknowledge that we are indeed committing
// from L1 caches
input logic l1_icache_miss_i,
input logic l1_dcache_miss_i,
// from MMU
input logic itlb_miss_i,
input logic dtlb_miss_i,
// from issue stage
input logic sb_full_i,
// from frontend
input logic if_empty_i,
// from PC Gen
input exception_t ex_i,
input logic eret_i,
input bp_resolve_t resolved_branch_i,
// for newly added events
input exception_t branch_exceptions_i, //Branch exceptions->execute unit-> branch_exception_o
input icache_dreq_t l1_icache_access_i,
input dcache_req_i_t[2:0] l1_dcache_access_i,
input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0]miss_vld_bits_i, //For Cache eviction (3ports-LOAD,STORE,PTW)
input logic i_tlb_flush_i,
input logic stall_issue_i, //stall-read operands
input logic[31:0] mcountinhibit_i
input logic clk_i,
input logic rst_ni,
input logic debug_mode_i, // debug mode
// SRAM like interface
input logic [11:0] addr_i, // read/write address (up to 6 counters possible)
input logic we_i, // write enable
input riscv::xlen_t data_i, // data to write
output riscv::xlen_t data_o, // data to read
// from commit stage
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, // acknowledge that we are indeed committing
// from L1 caches
input logic l1_icache_miss_i,
input logic l1_dcache_miss_i,
// from MMU
input logic itlb_miss_i,
input logic dtlb_miss_i,
// from issue stage
input logic sb_full_i,
// from frontend
input logic if_empty_i,
// from PC Gen
input exception_t ex_i,
input logic eret_i,
input bp_resolve_t resolved_branch_i,
// for newly added events
input exception_t branch_exceptions_i, //Branch exceptions->execute unit-> branch_exception_o
input icache_dreq_t l1_icache_access_i,
input dcache_req_i_t [2:0] l1_dcache_access_i,
input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0]miss_vld_bits_i, //For Cache eviction (3ports-LOAD,STORE,PTW)
input logic i_tlb_flush_i,
input logic stall_issue_i, //stall-read operands
input logic [31:0] mcountinhibit_i
);
logic [63:0] generic_counter_d[6:1];
logic [63:0] generic_counter_q[6:1];
//internal signal to keep track of exception
logic read_access_exception,update_access_exception;
logic read_access_exception, update_access_exception;
logic events[6:1];
//internal signal for MUX select line input
@ -64,116 +66,155 @@ module perf_counters import ariane_pkg::*; #(
logic [4:0] mhpmevent_q[6:1];
//Multiplexer
always_comb begin : Mux
events[6:1]='{default:0};
for(int unsigned i = 1; i <= 6; i++) begin
case(mhpmevent_q[i])
5'b00000 : events[i] = 0;
5'b00001 : events[i] = l1_icache_miss_i;//L1 I-Cache misses
5'b00010 : events[i] = l1_dcache_miss_i;//L1 D-Cache misses
5'b00011 : events[i] = itlb_miss_i;//ITLB misses
5'b00100 : events[i] = dtlb_miss_i;//DTLB misses
5'b00101 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == LOAD;//Load accesses
5'b00110 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == STORE;//Store accesses
5'b00111 : events[i] = ex_i.valid;//Exceptions
5'b01000 : events[i] = eret_i;//Exception handler returns
5'b01001 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == CTRL_FLOW;//Branch instructions
5'b01010 : events[i] = resolved_branch_i.valid && resolved_branch_i.is_mispredict;//Branch mispredicts
5'b01011 : events[i] = branch_exceptions_i.valid;//Branch exceptions
// The standard software calling convention uses register x1 to hold the return address on a call
// the unconditional jump is decoded as ADD op
5'b01100 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == CTRL_FLOW && (commit_instr_i[j].op == ADD || commit_instr_i[j].op == JALR) && (commit_instr_i[j].rd == 'd1 || commit_instr_i[j].rd == 'd5);//Call
5'b01101 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].op == JALR && commit_instr_i[j].rd == 'd0;//Return
5'b01110 : events[i] = sb_full_i;//MSB Full
5'b01111 : events[i] = if_empty_i;//Instruction fetch Empty
5'b10000 : events[i] = l1_icache_access_i.req;//L1 I-Cache accesses
5'b10001 : events[i] = l1_dcache_access_i[0].data_req || l1_dcache_access_i[1].data_req || l1_dcache_access_i[2].data_req;//L1 D-Cache accesses
5'b10010 : events[i] = (l1_dcache_miss_i && miss_vld_bits_i[0] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[1] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[2] == 8'hFF);//eviction
5'b10011 : events[i] = i_tlb_flush_i;//I-TLB flush
5'b10100 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == ALU || commit_instr_i[j].fu == MULT;//Integer instructions
5'b10101 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == FPU || commit_instr_i[j].fu == FPU_VEC;//Floating Point Instructions
5'b10110 : events[i] = stall_issue_i;//Pipeline bubbles
default: events[i] = 0;
endcase
end
always_comb begin : Mux
events[6:1] = '{default: 0};
for (int unsigned i = 1; i <= 6; i++) begin
case (mhpmevent_q[i])
5'b00000: events[i] = 0;
5'b00001: events[i] = l1_icache_miss_i; //L1 I-Cache misses
5'b00010: events[i] = l1_dcache_miss_i; //L1 D-Cache misses
5'b00011: events[i] = itlb_miss_i; //ITLB misses
5'b00100: events[i] = dtlb_miss_i; //DTLB misses
5'b00101:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == LOAD; //Load accesses
5'b00110:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == STORE; //Store accesses
5'b00111: events[i] = ex_i.valid; //Exceptions
5'b01000: events[i] = eret_i; //Exception handler returns
5'b01001:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == CTRL_FLOW; //Branch instructions
5'b01010:
events[i] = resolved_branch_i.valid && resolved_branch_i.is_mispredict;//Branch mispredicts
5'b01011: events[i] = branch_exceptions_i.valid; //Branch exceptions
// The standard software calling convention uses register x1 to hold the return address on a call
// the unconditional jump is decoded as ADD op
5'b01100:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j])
events[i] = commit_instr_i[j].fu == CTRL_FLOW && (commit_instr_i[j].op == ADD || commit_instr_i[j].op == JALR) && (commit_instr_i[j].rd == 'd1 || commit_instr_i[j].rd == 'd5);//Call
5'b01101:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j])
events[i] = commit_instr_i[j].op == JALR && commit_instr_i[j].rd == 'd0; //Return
5'b01110: events[i] = sb_full_i; //MSB Full
5'b01111: events[i] = if_empty_i; //Instruction fetch Empty
5'b10000: events[i] = l1_icache_access_i.req; //L1 I-Cache accesses
5'b10001:
events[i] = l1_dcache_access_i[0].data_req || l1_dcache_access_i[1].data_req || l1_dcache_access_i[2].data_req;//L1 D-Cache accesses
5'b10010:
events[i] = (l1_dcache_miss_i && miss_vld_bits_i[0] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[1] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[2] == 8'hFF);//eviction
5'b10011: events[i] = i_tlb_flush_i; //I-TLB flush
5'b10100:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j])
events[i] = commit_instr_i[j].fu == ALU || commit_instr_i[j].fu == MULT;//Integer instructions
5'b10101:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j])
events[i] = commit_instr_i[j].fu == FPU || commit_instr_i[j].fu == FPU_VEC;//Floating Point Instructions
5'b10110: events[i] = stall_issue_i; //Pipeline bubbles
default: events[i] = 0;
endcase
end
always_comb begin : generic_counter
generic_counter_d = generic_counter_q;
data_o = 'b0;
mhpmevent_d = mhpmevent_q;
read_access_exception = 1'b0;
update_access_exception = 1'b0;
end
for(int unsigned i = 1; i <= 6; i++) begin
if ((!debug_mode_i) && (!we_i)) begin
if ((events[i]) == 1 && (!mcountinhibit_i[i+2]))begin
generic_counter_d[i] = generic_counter_q[i] + 1'b1;end
else begin
generic_counter_d[i] = 'b0;end
always_comb begin : generic_counter
generic_counter_d = generic_counter_q;
data_o = 'b0;
mhpmevent_d = mhpmevent_q;
read_access_exception = 1'b0;
update_access_exception = 1'b0;
for (int unsigned i = 1; i <= 6; i++) begin
if ((!debug_mode_i) && (!we_i)) begin
if ((events[i]) == 1 && (!mcountinhibit_i[i+2])) begin
generic_counter_d[i] = generic_counter_q[i] + 1'b1;
end else begin
generic_counter_d[i] = 'b0;
end
end
//Read
unique case (addr_i)
riscv::CSR_MHPM_COUNTER_3,
riscv::CSR_MHPM_COUNTER_4,
riscv::CSR_MHPM_COUNTER_5,
riscv::CSR_MHPM_COUNTER_6,
riscv::CSR_MHPM_COUNTER_7,
riscv::CSR_MHPM_COUNTER_8 :begin if (riscv::XLEN == 32) data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3 + 1][31:0]; else data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3 + 1];end
riscv::CSR_MHPM_COUNTER_3H,
riscv::CSR_MHPM_COUNTER_4H,
riscv::CSR_MHPM_COUNTER_5H,
riscv::CSR_MHPM_COUNTER_6H,
riscv::CSR_MHPM_COUNTER_7H,
riscv::CSR_MHPM_COUNTER_8H :begin if (riscv::XLEN == 32) data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3H + 1][63:32]; else read_access_exception = 1'b1;end
riscv::CSR_MHPM_EVENT_3,
riscv::CSR_MHPM_EVENT_4,
riscv::CSR_MHPM_EVENT_5,
riscv::CSR_MHPM_EVENT_6,
riscv::CSR_MHPM_EVENT_7,
riscv::CSR_MHPM_EVENT_8 : data_o = mhpmevent_q[addr_i-riscv::CSR_MHPM_EVENT_3 + 1] ;
default: data_o = 'b0;
endcase
//Write
if(we_i) begin
unique case(addr_i)
riscv::CSR_MHPM_COUNTER_3,
riscv::CSR_MHPM_COUNTER_4,
riscv::CSR_MHPM_COUNTER_5,
riscv::CSR_MHPM_COUNTER_6,
riscv::CSR_MHPM_COUNTER_7,
riscv::CSR_MHPM_COUNTER_8 :begin if (riscv::XLEN == 32) generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3 + 1][31:0] = data_i; else generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3 + 1] = data_i; end
riscv::CSR_MHPM_COUNTER_3H,
riscv::CSR_MHPM_COUNTER_4H,
riscv::CSR_MHPM_COUNTER_5H,
riscv::CSR_MHPM_COUNTER_6H,
riscv::CSR_MHPM_COUNTER_7H,
riscv::CSR_MHPM_COUNTER_8H :begin if (riscv::XLEN == 32) generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3H + 1][63:32] = data_i; else update_access_exception = 1'b1;end
riscv::CSR_MHPM_EVENT_3,
riscv::CSR_MHPM_EVENT_4,
riscv::CSR_MHPM_EVENT_5,
riscv::CSR_MHPM_EVENT_6,
riscv::CSR_MHPM_EVENT_7,
riscv::CSR_MHPM_EVENT_8 : mhpmevent_d[addr_i-riscv::CSR_MHPM_EVENT_3 + 1] = data_i;
default: update_access_exception = 1'b1;
endcase
end
end
//Registers
//Read
unique case (addr_i)
riscv::CSR_MHPM_COUNTER_3,
riscv::CSR_MHPM_COUNTER_4,
riscv::CSR_MHPM_COUNTER_5,
riscv::CSR_MHPM_COUNTER_6,
riscv::CSR_MHPM_COUNTER_7,
riscv::CSR_MHPM_COUNTER_8 :begin
if (riscv::XLEN == 32) data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0];
else data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1];
end
riscv::CSR_MHPM_COUNTER_3H,
riscv::CSR_MHPM_COUNTER_4H,
riscv::CSR_MHPM_COUNTER_5H,
riscv::CSR_MHPM_COUNTER_6H,
riscv::CSR_MHPM_COUNTER_7H,
riscv::CSR_MHPM_COUNTER_8H :begin
if (riscv::XLEN == 32)
data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3H+1][63:32];
else read_access_exception = 1'b1;
end
riscv::CSR_MHPM_EVENT_3,
riscv::CSR_MHPM_EVENT_4,
riscv::CSR_MHPM_EVENT_5,
riscv::CSR_MHPM_EVENT_6,
riscv::CSR_MHPM_EVENT_7,
riscv::CSR_MHPM_EVENT_8 :
data_o = mhpmevent_q[addr_i-riscv::CSR_MHPM_EVENT_3+1];
default: data_o = 'b0;
endcase
//Write
if (we_i) begin
unique case (addr_i)
riscv::CSR_MHPM_COUNTER_3,
riscv::CSR_MHPM_COUNTER_4,
riscv::CSR_MHPM_COUNTER_5,
riscv::CSR_MHPM_COUNTER_6,
riscv::CSR_MHPM_COUNTER_7,
riscv::CSR_MHPM_COUNTER_8 :begin
if (riscv::XLEN == 32)
generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0] = data_i;
else generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3+1] = data_i;
end
riscv::CSR_MHPM_COUNTER_3H,
riscv::CSR_MHPM_COUNTER_4H,
riscv::CSR_MHPM_COUNTER_5H,
riscv::CSR_MHPM_COUNTER_6H,
riscv::CSR_MHPM_COUNTER_7H,
riscv::CSR_MHPM_COUNTER_8H :begin
if (riscv::XLEN == 32)
generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3H+1][63:32] = data_i;
else update_access_exception = 1'b1;
end
riscv::CSR_MHPM_EVENT_3,
riscv::CSR_MHPM_EVENT_4,
riscv::CSR_MHPM_EVENT_5,
riscv::CSR_MHPM_EVENT_6,
riscv::CSR_MHPM_EVENT_7,
riscv::CSR_MHPM_EVENT_8 :
mhpmevent_d[addr_i-riscv::CSR_MHPM_EVENT_3+1] = data_i;
default: update_access_exception = 1'b1;
endcase
end
end
//Registers
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
generic_counter_q <= '{default:0};
mhpmevent_q <= '{default:0};
end else begin
generic_counter_q <= generic_counter_d;
mhpmevent_q <= mhpmevent_d;
end
end
if (!rst_ni) begin
generic_counter_q <= '{default: 0};
mhpmevent_q <= '{default: 0};
end else begin
generic_counter_q <= generic_counter_d;
mhpmevent_q <= mhpmevent_d;
end
end
endmodule

View file

@ -13,43 +13,43 @@
// Description: PMP package
package riscv;
// --------------------
// Privilege Spec
// --------------------
typedef enum logic[1:0] {
PRIV_LVL_M = 2'b11,
PRIV_LVL_S = 2'b01,
PRIV_LVL_U = 2'b00
} priv_lvl_t;
// --------------------
// Privilege Spec
// --------------------
typedef enum logic [1:0] {
PRIV_LVL_M = 2'b11,
PRIV_LVL_S = 2'b01,
PRIV_LVL_U = 2'b00
} priv_lvl_t;
// PMP
typedef enum logic [1:0] {
OFF = 2'b00,
TOR = 2'b01,
NA4 = 2'b10,
NAPOT = 2'b11
} pmp_addr_mode_t;
// PMP
typedef enum logic [1:0] {
OFF = 2'b00,
TOR = 2'b01,
NA4 = 2'b10,
NAPOT = 2'b11
} pmp_addr_mode_t;
// PMP Access Type
typedef enum logic [2:0] {
ACCESS_NONE = 3'b000,
ACCESS_READ = 3'b001,
ACCESS_WRITE = 3'b010,
ACCESS_EXEC = 3'b100
} pmp_access_t;
// PMP Access Type
typedef enum logic [2:0] {
ACCESS_NONE = 3'b000,
ACCESS_READ = 3'b001,
ACCESS_WRITE = 3'b010,
ACCESS_EXEC = 3'b100
} pmp_access_t;
typedef struct packed {
logic x;
logic w;
logic r;
} pmpcfg_access_t;
typedef struct packed {
logic x;
logic w;
logic r;
} pmpcfg_access_t;
// packed struct of a PMP configuration register (8bit)
typedef struct packed {
logic locked; // lock this configuration
logic [1:0] reserved;
pmp_addr_mode_t addr_mode; // Off, TOR, NA4, NAPOT
pmpcfg_access_t access_type;
} pmpcfg_t;
// packed struct of a PMP configuration register (8bit)
typedef struct packed {
logic locked; // lock this configuration
logic [1:0] reserved;
pmp_addr_mode_t addr_mode; // Off, TOR, NA4, NAPOT
pmpcfg_access_t access_type;
} pmpcfg_t;
endpackage
endpackage

View file

@ -13,10 +13,10 @@
// Description: purely combinatorial PMP unit (with extraction for more complex configs such as NAPOT)
module pmp #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned PLEN = 34, // rv64: 56
parameter int unsigned PMP_LEN = 32, // rv64: 54
parameter int unsigned NR_ENTRIES = 4
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned PLEN = 34, // rv64: 56
parameter int unsigned PMP_LEN = 32, // rv64: 54
parameter int unsigned NR_ENTRIES = 4
) (
// Input
input logic [PLEN-1:0] addr_i,
@ -28,66 +28,67 @@ module pmp #(
// Output
output logic allow_o
);
// if there are no PMPs we can always grant the access.
if (NR_ENTRIES > 0) begin : gen_pmp
logic [NR_ENTRIES-1:0] match;
// if there are no PMPs we can always grant the access.
if (NR_ENTRIES > 0) begin : gen_pmp
logic [NR_ENTRIES-1:0] match;
for (genvar i = 0; i < NR_ENTRIES; i++) begin
logic [PMP_LEN-1:0] conf_addr_prev;
for (genvar i = 0; i < NR_ENTRIES; i++) begin
logic [PMP_LEN-1:0] conf_addr_prev;
assign conf_addr_prev = (i == 0) ? '0 : conf_addr_i[i-1];
assign conf_addr_prev = (i == 0) ? '0 : conf_addr_i[i-1];
pmp_entry #(
.CVA6Cfg ( CVA6Cfg ),
.PLEN ( PLEN ),
.PMP_LEN ( PMP_LEN )
) i_pmp_entry(
.addr_i ( addr_i ),
.conf_addr_i ( conf_addr_i[i] ),
.conf_addr_prev_i ( conf_addr_prev ),
.conf_addr_mode_i ( conf_i[i].addr_mode ),
.match_o ( match[i] )
);
end
always_comb begin
int i;
allow_o = 1'b0;
for (i = 0; i < NR_ENTRIES; i++) begin
// either we are in S or U mode or the config is locked in which
// case it also applies in M mode
if (priv_lvl_i != riscv::PRIV_LVL_M || conf_i[i].locked) begin
if (match[i]) begin
if ((access_type_i & conf_i[i].access_type) != access_type_i) allow_o = 1'b0;
else allow_o = 1'b1;
break;
end
end
end
if (i == NR_ENTRIES) begin // no PMP entry matched the address
// allow all accesses from M-mode for no pmp match
if (priv_lvl_i == riscv::PRIV_LVL_M) allow_o = 1'b1;
// disallow accesses for all other modes
else allow_o = 1'b0;
end
end
end else assign allow_o = 1'b1;
// synthesis translate_off
always_comb begin
logic no_locked;
no_locked = 1'b0;
if(priv_lvl_i == riscv::PRIV_LVL_M) begin
no_locked = 1'b1;
for (int i = 0; i < NR_ENTRIES; i++) begin
if (conf_i[i].locked && conf_i[i].addr_mode != riscv::OFF) begin
no_locked &= 1'b0;
end else no_locked &= 1'b1;
end
if (no_locked == 1'b1) assert(allow_o == 1'b1);
end
pmp_entry #(
.CVA6Cfg(CVA6Cfg),
.PLEN (PLEN),
.PMP_LEN(PMP_LEN)
) i_pmp_entry (
.addr_i (addr_i),
.conf_addr_i (conf_addr_i[i]),
.conf_addr_prev_i(conf_addr_prev),
.conf_addr_mode_i(conf_i[i].addr_mode),
.match_o (match[i])
);
end
// synthesis translate_on
always_comb begin
int i;
allow_o = 1'b0;
for (i = 0; i < NR_ENTRIES; i++) begin
// either we are in S or U mode or the config is locked in which
// case it also applies in M mode
if (priv_lvl_i != riscv::PRIV_LVL_M || conf_i[i].locked) begin
if (match[i]) begin
if ((access_type_i & conf_i[i].access_type) != access_type_i) allow_o = 1'b0;
else allow_o = 1'b1;
break;
end
end
end
if (i == NR_ENTRIES) begin // no PMP entry matched the address
// allow all accesses from M-mode for no pmp match
if (priv_lvl_i == riscv::PRIV_LVL_M) allow_o = 1'b1;
// disallow accesses for all other modes
else
allow_o = 1'b0;
end
end
end else assign allow_o = 1'b1;
// synthesis translate_off
always_comb begin
logic no_locked;
no_locked = 1'b0;
if (priv_lvl_i == riscv::PRIV_LVL_M) begin
no_locked = 1'b1;
for (int i = 0; i < NR_ENTRIES; i++) begin
if (conf_i[i].locked && conf_i[i].addr_mode != riscv::OFF) begin
no_locked &= 1'b0;
end else no_locked &= 1'b1;
end
if (no_locked == 1'b1) assert (allow_o == 1'b1);
end
end
// synthesis translate_on
endmodule

View file

@ -28,95 +28,98 @@ module pmp_entry #(
// Output
output logic match_o
);
logic [PLEN-1:0] conf_addr_n;
logic [$clog2(PLEN)-1:0] trail_ones;
logic [PLEN-1:0] base;
logic [PLEN-1:0] mask;
int unsigned size;
assign conf_addr_n = {2'b11, ~conf_addr_i};
lzc #(.WIDTH(PLEN), .MODE(1'b0)) i_lzc(
.in_i ( conf_addr_n ),
.cnt_o ( trail_ones ),
.empty_o ( )
);
logic [PLEN-1:0] conf_addr_n;
logic [$clog2(PLEN)-1:0] trail_ones;
logic [PLEN-1:0] base;
logic [PLEN-1:0] mask;
int unsigned size;
assign conf_addr_n = {2'b11, ~conf_addr_i};
lzc #(
.WIDTH(PLEN),
.MODE (1'b0)
) i_lzc (
.in_i (conf_addr_n),
.cnt_o (trail_ones),
.empty_o()
);
always_comb begin
case (conf_addr_mode_i)
riscv::TOR: begin
base = '0;
mask = '0;
size = '0;
// check that the requested address is in between the two
// configuration addresses
if (addr_i >= ({2'b0, conf_addr_prev_i} << 2) && addr_i < ({2'b0, conf_addr_i} << 2)) begin
match_o = 1'b1;
end else match_o = 1'b0;
always_comb begin
case (conf_addr_mode_i)
riscv::TOR: begin
base = '0;
mask = '0;
size = '0;
// check that the requested address is in between the two
// configuration addresses
if (addr_i >= ({2'b0, conf_addr_prev_i} << 2) && addr_i < ({2'b0, conf_addr_i} << 2)) begin
match_o = 1'b1;
end else match_o = 1'b0;
// synthesis translate_off
if (match_o == 0) begin
assert(addr_i >= ({2'b0, conf_addr_i} << 2) || addr_i < ({2'b0, conf_addr_prev_i} << 2));
end else begin
assert(addr_i < ({2'b0, conf_addr_i} << 2) && addr_i >= ({2'b0, conf_addr_prev_i} << 2));
end
// synthesis translate_on
// synthesis translate_off
if (match_o == 0) begin
assert (addr_i >= ({2'b0, conf_addr_i} << 2) || addr_i < ({2'b0, conf_addr_prev_i} << 2));
end else begin
assert (addr_i < ({2'b0, conf_addr_i} << 2) && addr_i >= ({2'b0, conf_addr_prev_i} << 2));
end
// synthesis translate_on
end
riscv::NA4, riscv::NAPOT: begin
if (conf_addr_mode_i == riscv::NA4) size = 2;
else begin
// use the extracted trailing ones
size = {{(32 - $clog2(PLEN)) {1'b0}}, trail_ones} + 3;
end
mask = '1 << size;
base = ({2'b0, conf_addr_i} << 2) & mask;
match_o = (addr_i & mask) == base ? 1'b1 : 1'b0;
// synthesis translate_off
// size extract checks
assert (size >= 2);
if (conf_addr_mode_i == riscv::NAPOT) begin
assert (size > 2);
if (size < PMP_LEN) assert (conf_addr_i[size-3] == 0);
for (int i = 0; i < PMP_LEN; i++) begin
if (size > 3 && i <= size - 4) begin
assert (conf_addr_i[i] == 1); // check that all the rest are ones
end
riscv::NA4, riscv::NAPOT: begin
if (conf_addr_mode_i == riscv::NA4) size = 2;
else begin
// use the extracted trailing ones
size = {{(32-$clog2(PLEN)){1'b0}}, trail_ones} + 3;
end
mask = '1 << size;
base = ({2'b0, conf_addr_i} << 2) & mask;
match_o = (addr_i & mask) == base ? 1'b1 : 1'b0;
// synthesis translate_off
// size extract checks
assert(size >= 2);
if (conf_addr_mode_i == riscv::NAPOT) begin
assert(size > 2);
if (size < PMP_LEN) assert(conf_addr_i[size - 3] == 0);
for (int i = 0; i < PMP_LEN; i++) begin
if (size > 3 && i <= size - 4) begin
assert(conf_addr_i[i] == 1); // check that all the rest are ones
end
end
end
if (size < PLEN-1) begin
if (base + 2**size > base) begin // check for overflow
if (match_o == 0) begin
assert(addr_i >= base + 2**size || addr_i < base);
end else begin
assert(addr_i < base + 2**size && addr_i >= base);
end
end else begin
if (match_o == 0) begin
assert(addr_i - 2**size >= base || addr_i < base);
end else begin
assert(addr_i - 2**size < base && addr_i >= base);
end
end
end
// synthesis translate_on
end
end
if (size < PLEN - 1) begin
if (base + 2 ** size > base) begin // check for overflow
if (match_o == 0) begin
assert (addr_i >= base + 2 ** size || addr_i < base);
end else begin
assert (addr_i < base + 2 ** size && addr_i >= base);
end
riscv::OFF: begin
match_o = 1'b0;
base = '0;
mask = '0;
size = '0;
end else begin
if (match_o == 0) begin
assert (addr_i - 2 ** size >= base || addr_i < base);
end else begin
assert (addr_i - 2 ** size < base && addr_i >= base);
end
default: begin
match_o = 0;
base = '0;
mask = '0;
size = '0;
end
endcase
end
end
end
// synthesis translate_on
end
riscv::OFF: begin
match_o = 1'b0;
base = '0;
mask = '0;
size = '0;
end
default: begin
match_o = 0;
base = '0;
mask = '0;
size = '0;
end
endcase
end
endmodule

View file

@ -15,84 +15,98 @@
import tb_pkg::*;
module pmp_tb;
timeunit 1ns;
timeprecision 1ps;
timeunit 1ns; timeprecision 1ps;
localparam int unsigned WIDTH = 16;
localparam int unsigned PMP_LEN = 13;
localparam int unsigned NR_ENTRIES = 4;
localparam int unsigned WIDTH = 16;
localparam int unsigned PMP_LEN = 13;
localparam int unsigned NR_ENTRIES = 4;
logic [WIDTH-1:0] addr;
riscv::pmp_access_t access_type;
// Configuration
logic [NR_ENTRIES-1:0][PMP_LEN-1:0] conf_addr;
riscv::pmpcfg_t [NR_ENTRIES-1:0] conf;
logic [WIDTH-1:0] addr;
riscv::pmp_access_t access_type;
// Output
logic allow;
// Configuration
logic [NR_ENTRIES-1:0][PMP_LEN-1:0] conf_addr;
riscv::pmpcfg_t [NR_ENTRIES-1:0] conf;
// helper signals
logic[WIDTH-1:0] base;
int unsigned size;
// Output
logic allow;
pmp #(
.PLEN(WIDTH),
.PMP_LEN(PMP_LEN),
.NR_ENTRIES(NR_ENTRIES)
) i_pmp(
.addr_i ( addr ),
.access_type_i ( access_type ),
.priv_lvl_i ( riscv::PRIV_LVL_U ),
.conf_addr_i ( conf_addr ),
.conf_i ( conf ),
.allow_o ( allow )
);
// helper signals
logic [WIDTH-1:0] base;
int unsigned size;
initial begin
// set all pmps to disabled initially
for (int i = 0; i < NR_ENTRIES; i++) begin
conf[i].addr_mode = riscv::OFF;
end
pmp #(
.PLEN(WIDTH),
.PMP_LEN(PMP_LEN),
.NR_ENTRIES(NR_ENTRIES)
) i_pmp (
.addr_i (addr),
.access_type_i(access_type),
.priv_lvl_i (riscv::PRIV_LVL_U),
.conf_addr_i (conf_addr),
.conf_i (conf),
.allow_o (allow)
);
// test napot 1
addr = 16'b00011001_10111010;
access_type = riscv::ACCESS_READ;
// pmp 3
base = 16'b00011001_00000000;
size = 8;
conf_addr[2] = P#(.WIDTH(WIDTH), .PMP_LEN(PMP_LEN))::base_to_conf(base, size);
conf[2].addr_mode = riscv::NAPOT;
conf[2].access_type = riscv::ACCESS_READ | riscv::ACCESS_WRITE | riscv::ACCESS_EXEC;
#5ns;
assert(allow == 1);
// add second PMP entry that disallows
// pmp 1
base = 16'b00011001_10110000;
size = 4;
conf_addr[1] = P#(.WIDTH(WIDTH), .PMP_LEN(PMP_LEN))::base_to_conf(base, size);
conf[1].addr_mode = riscv::NAPOT;
conf[1].access_type = '0;
#5ns;
assert(allow == 0);
// add third PMP entry that allows again
// pmp 2
base = 16'b00011001_10111000;
size = 3;
conf_addr[0] = P#(.WIDTH(WIDTH), .PMP_LEN(PMP_LEN))::base_to_conf(base, size);
conf[0].addr_mode = riscv::NAPOT;
conf[0].access_type = riscv::ACCESS_READ;
#5ns;
assert(allow == 1);
initial begin
// set all pmps to disabled initially
for (int i = 0; i < NR_ENTRIES; i++) begin
conf[i].addr_mode = riscv::OFF;
end
endmodule
// test napot 1
addr = 16'b00011001_10111010;
access_type = riscv::ACCESS_READ;
// pmp 3
base = 16'b00011001_00000000;
size = 8;
conf_addr[2] = P#(
.WIDTH (WIDTH),
.PMP_LEN(PMP_LEN)
)::base_to_conf(
base, size
);
conf[2].addr_mode = riscv::NAPOT;
conf[2].access_type = riscv::ACCESS_READ | riscv::ACCESS_WRITE | riscv::ACCESS_EXEC;
#5ns;
assert (allow == 1);
// add second PMP entry that disallows
// pmp 1
base = 16'b00011001_10110000;
size = 4;
conf_addr[1] = P#(
.WIDTH (WIDTH),
.PMP_LEN(PMP_LEN)
)::base_to_conf(
base, size
);
conf[1].addr_mode = riscv::NAPOT;
conf[1].access_type = '0;
#5ns;
assert (allow == 0);
// add third PMP entry that allows again
// pmp 2
base = 16'b00011001_10111000;
size = 3;
conf_addr[0] = P#(
.WIDTH (WIDTH),
.PMP_LEN(PMP_LEN)
)::base_to_conf(
base, size
);
conf[0].addr_mode = riscv::NAPOT;
conf[0].access_type = riscv::ACCESS_READ;
#5ns;
assert (allow == 1);
end
endmodule

View file

@ -14,23 +14,26 @@
package tb_pkg;
class P #(parameter WIDTH=32, parameter PMP_LEN=32);
static function logic[PMP_LEN-1:0] base_to_conf(logic[WIDTH-1:0] base, int unsigned size_i);
logic[PMP_LEN-1:0] pmp_reg;
class P #(
parameter WIDTH = 32,
parameter PMP_LEN = 32
);
static function logic [PMP_LEN-1:0] base_to_conf(logic [WIDTH-1:0] base, int unsigned size_i);
logic [PMP_LEN-1:0] pmp_reg;
pmp_reg = '0;
for (int i = 0; i < WIDTH-2 && i < PMP_LEN; i++) begin
if (i+3 > size_i) begin
pmp_reg[i] = base[i+2];
end else if (i+3 == size_i) begin
pmp_reg[i] = 1'b0;
end else begin
pmp_reg[i] = 1'b1;
end
end
pmp_reg = '0;
for (int i = 0; i < WIDTH - 2 && i < PMP_LEN; i++) begin
if (i + 3 > size_i) begin
pmp_reg[i] = base[i+2];
end else if (i + 3 == size_i) begin
pmp_reg[i] = 1'b0;
end else begin
pmp_reg[i] = 1'b1;
end
end
return pmp_reg;
endfunction
endclass
return pmp_reg;
endfunction
endclass
endpackage
endpackage

View file

@ -13,78 +13,78 @@
// Description: Scoreboard - keeps track of all decoded, issued and committed instructions
module scoreboard #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter bit IsRVFI = bit'(0),
parameter type rs3_len_t = logic,
parameter int unsigned NR_ENTRIES = 8 // must be a power of 2
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter bit IsRVFI = bit'(0),
parameter type rs3_len_t = logic,
parameter int unsigned NR_ENTRIES = 8 // must be a power of 2
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
output logic sb_full_o,
input logic flush_unissued_instr_i, // flush only un-issued instructions
input logic flush_i, // flush whole scoreboard
input logic unresolved_branch_i, // we have an unresolved branch
// list of clobbered registers to issue stage
output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr_o,
output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_fpr_o,
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
output logic sb_full_o,
input logic flush_unissued_instr_i, // flush only un-issued instructions
input logic flush_i, // flush whole scoreboard
input logic unresolved_branch_i, // we have an unresolved branch
// list of clobbered registers to issue stage
output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr_o,
output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_fpr_o,
// regfile like interface to operand read stage
input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i,
output riscv::xlen_t rs1_o,
output logic rs1_valid_o,
// regfile like interface to operand read stage
input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i,
output riscv::xlen_t rs1_o,
output logic rs1_valid_o,
input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i,
output riscv::xlen_t rs2_o,
output logic rs2_valid_o,
input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i,
output riscv::xlen_t rs2_o,
output logic rs2_valid_o,
input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i,
output rs3_len_t rs3_o,
output logic rs3_valid_o,
input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i,
output rs3_len_t rs3_o,
output logic rs3_valid_o,
// advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer
output ariane_pkg::scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o,
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
// advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer
output ariane_pkg::scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o,
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
// instruction to put on top of scoreboard e.g.: top pointer
// we can always put this instruction to the top unless we signal with asserted full_o
input ariane_pkg::scoreboard_entry_t decoded_instr_i,
input logic decoded_instr_valid_i,
output logic decoded_instr_ack_o,
// instruction to put on top of scoreboard e.g.: top pointer
// we can always put this instruction to the top unless we signal with asserted full_o
input ariane_pkg::scoreboard_entry_t decoded_instr_i,
input logic decoded_instr_valid_i,
output logic decoded_instr_ack_o,
// instruction to issue logic, if issue_instr_valid and issue_ready is asserted, advance the issue pointer
output ariane_pkg::scoreboard_entry_t issue_instr_o,
output logic issue_instr_valid_o,
input logic issue_ack_i,
// instruction to issue logic, if issue_instr_valid and issue_ready is asserted, advance the issue pointer
output ariane_pkg::scoreboard_entry_t issue_instr_o,
output logic issue_instr_valid_o,
input logic issue_ack_i,
// write-back port
input ariane_pkg::bp_resolve_t resolved_branch_i,
input logic [CVA6Cfg.NrWbPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] trans_id_i, // transaction ID at which to write the result back
input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, // write data in
input ariane_pkg::exception_t [CVA6Cfg.NrWbPorts-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception)
input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i, // data in is valid
input logic x_we_i, // cvxif we for writeback
// write-back port
input ariane_pkg::bp_resolve_t resolved_branch_i,
input logic [CVA6Cfg.NrWbPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] trans_id_i, // transaction ID at which to write the result back
input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, // write data in
input ariane_pkg::exception_t [CVA6Cfg.NrWbPorts-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception)
input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i, // data in is valid
input logic x_we_i, // cvxif we for writeback
// RVFI
input [riscv::VLEN-1:0] lsu_addr_i,
input [(riscv::XLEN/8)-1:0] lsu_rmask_i,
input [(riscv::XLEN/8)-1:0] lsu_wmask_i,
input [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_i,
input riscv::xlen_t rs1_forwarding_i,
input riscv::xlen_t rs2_forwarding_i
// RVFI
input [ riscv::VLEN-1:0] lsu_addr_i,
input [ (riscv::XLEN/8)-1:0] lsu_rmask_i,
input [ (riscv::XLEN/8)-1:0] lsu_wmask_i,
input [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_i,
input riscv::xlen_t rs1_forwarding_i,
input riscv::xlen_t rs2_forwarding_i
);
localparam int unsigned BITS_ENTRIES = $clog2(NR_ENTRIES);
// this is the FIFO struct of the issue queue
typedef struct packed {
logic issued; // this bit indicates whether we issued this instruction e.g.: if it is valid
logic is_rd_fpr_flag; // redundant meta info, added for speed
ariane_pkg::scoreboard_entry_t sbe; // this is the score board entry we will send to ex
logic issued; // this bit indicates whether we issued this instruction e.g.: if it is valid
logic is_rd_fpr_flag; // redundant meta info, added for speed
ariane_pkg::scoreboard_entry_t sbe; // this is the score board entry we will send to ex
} sb_mem_t;
sb_mem_t [NR_ENTRIES-1:0] mem_q, mem_n;
logic issue_full, issue_en;
logic [BITS_ENTRIES:0] issue_cnt_n, issue_cnt_q;
logic [BITS_ENTRIES-1:0] issue_pointer_n, issue_pointer_q;
logic issue_full, issue_en;
logic [BITS_ENTRIES:0] issue_cnt_n, issue_cnt_q;
logic [BITS_ENTRIES-1:0] issue_pointer_n, issue_pointer_q;
logic [CVA6Cfg.NrCommitPorts-1:0][BITS_ENTRIES-1:0] commit_pointer_n, commit_pointer_q;
logic [$clog2(CVA6Cfg.NrCommitPorts):0] num_commit;
@ -92,7 +92,7 @@ module scoreboard #(
// works since aligned to power of 2
assign issue_full = (issue_cnt_q[BITS_ENTRIES] == 1'b1);
assign sb_full_o = issue_full;
assign sb_full_o = issue_full;
ariane_pkg::scoreboard_entry_t decoded_instr;
always_comb begin
@ -130,18 +130,21 @@ module scoreboard #(
// keep track of all issued instructions
always_comb begin : issue_fifo
// default assignment
mem_n = mem_q;
issue_en = 1'b0;
mem_n = mem_q;
issue_en = 1'b0;
// if we got a acknowledge from the issue stage, put this scoreboard entry in the queue
if (decoded_instr_valid_i && decoded_instr_ack_o && !flush_unissued_instr_i) begin
// the decoded instruction we put in there is valid (1st bit)
// increase the issue counter and advance issue pointer
issue_en = 1'b1;
mem_n[issue_pointer_q] = {1'b1, // valid bit
(CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(decoded_instr_i.op)), // whether rd goes to the fpr
decoded_instr // decoded instruction record
};
mem_n[issue_pointer_q] = {
1'b1, // valid bit
(CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
decoded_instr_i.op
)), // whether rd goes to the fpr
decoded_instr // decoded instruction record
};
end
// ------------
@ -149,8 +152,7 @@ module scoreboard #(
// ------------
for (int unsigned i = 0; i < NR_ENTRIES; i++) begin
// The FU is NONE -> this instruction is valid immediately
if (mem_q[i].sbe.fu == ariane_pkg::NONE && mem_q[i].issued)
mem_n[i].sbe.valid = 1'b1;
if (mem_q[i].sbe.fu == ariane_pkg::NONE && mem_q[i].issued) mem_n[i].sbe.valid = 1'b1;
end
// ------------
@ -158,10 +160,10 @@ module scoreboard #(
// ------------
if (IsRVFI) begin
if (lsu_rmask_i != 0) begin
mem_n[lsu_addr_trans_id_i].sbe.lsu_addr = lsu_addr_i;
mem_n[lsu_addr_trans_id_i].sbe.lsu_addr = lsu_addr_i;
mem_n[lsu_addr_trans_id_i].sbe.lsu_rmask = lsu_rmask_i;
end else if (lsu_wmask_i != 0) begin
mem_n[lsu_addr_trans_id_i].sbe.lsu_addr = lsu_addr_i;
mem_n[lsu_addr_trans_id_i].sbe.lsu_addr = lsu_addr_i;
mem_n[lsu_addr_trans_id_i].sbe.lsu_wmask = lsu_wmask_i;
mem_n[lsu_addr_trans_id_i].sbe.lsu_wdata = wbdata_i[1];
end
@ -171,7 +173,7 @@ module scoreboard #(
// check if this instruction was issued (e.g.: it could happen after a flush that there is still
// something in the pipeline e.g. an incomplete memory operation)
if (wt_valid_i[i] && mem_q[trans_id_i[i]].issued) begin
mem_n[trans_id_i[i]].sbe.valid = 1'b1;
mem_n[trans_id_i[i]].sbe.valid = 1'b1;
mem_n[trans_id_i[i]].sbe.result = wbdata_i[i];
// save the target address of a branch (needed for debug in commit stage)
mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address;
@ -179,11 +181,10 @@ module scoreboard #(
mem_n[trans_id_i[i]].sbe.rd = 5'b0;
end
// write the exception back if it is valid
if (ex_i[i].valid)
mem_n[trans_id_i[i]].sbe.ex = ex_i[i];
if (ex_i[i].valid) mem_n[trans_id_i[i]].sbe.ex = ex_i[i];
// write the fflags back from the FPU (exception valid is never set), leave tval intact
else if(CVA6Cfg.FpPresent && mem_q[trans_id_i[i]].sbe.fu inside {ariane_pkg::FPU, ariane_pkg::FPU_VEC}) begin
mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause;
mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause;
end
end
end
@ -195,8 +196,8 @@ module scoreboard #(
for (logic [CVA6Cfg.NrCommitPorts-1:0] i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
if (commit_ack_i[i]) begin
// this instruction is no longer in issue e.g.: it is considered finished
mem_n[commit_pointer_q[i]].issued = 1'b0;
mem_n[commit_pointer_q[i]].sbe.valid = 1'b0;
mem_n[commit_pointer_q[i]].issued = 1'b0;
mem_n[commit_pointer_q[i]].sbe.valid = 1'b0;
end
end
@ -216,12 +217,12 @@ module scoreboard #(
// FIFO counter updates
assign num_commit = (CVA6Cfg.NrCommitPorts == 2) ? commit_ack_i[1] + commit_ack_i[0] : commit_ack_i[0];
assign issue_cnt_n = (flush_i) ? '0 : issue_cnt_q - num_commit + issue_en;
assign issue_cnt_n = (flush_i) ? '0 : issue_cnt_q - num_commit + issue_en;
assign commit_pointer_n[0] = (flush_i) ? '0 : commit_pointer_q[0] + num_commit;
assign issue_pointer_n = (flush_i) ? '0 : issue_pointer_q + issue_en;
assign issue_pointer_n = (flush_i) ? '0 : issue_pointer_q + issue_en;
// precompute offsets for commit slots
for (genvar k=1; k < CVA6Cfg.NrCommitPorts; k++) begin : gen_cnt_incr
for (genvar k = 1; k < CVA6Cfg.NrCommitPorts; k++) begin : gen_cnt_incr
assign commit_pointer_n[k] = (flush_i) ? '0 : commit_pointer_n[0] + unsigned'(k);
end
@ -229,17 +230,17 @@ module scoreboard #(
// RD clobber process
// -------------------
// rd_clobber output: output currently clobbered destination registers
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][NR_ENTRIES:0] gpr_clobber_vld;
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][NR_ENTRIES:0] fpr_clobber_vld;
ariane_pkg::fu_t [NR_ENTRIES:0] clobber_fu;
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][NR_ENTRIES:0] gpr_clobber_vld;
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][NR_ENTRIES:0] fpr_clobber_vld;
ariane_pkg::fu_t [ NR_ENTRIES:0] clobber_fu;
always_comb begin : clobber_assign
gpr_clobber_vld = '0;
fpr_clobber_vld = '0;
gpr_clobber_vld = '0;
fpr_clobber_vld = '0;
// default (highest entry hast lowest prio in arbiter tree below)
clobber_fu[NR_ENTRIES] = ariane_pkg::NONE;
for (int unsigned i = 0; i < 2**ariane_pkg::REG_ADDR_SIZE; i++) begin
for (int unsigned i = 0; i < 2 ** ariane_pkg::REG_ADDR_SIZE; i++) begin
gpr_clobber_vld[i][NR_ENTRIES] = 1'b1;
fpr_clobber_vld[i][NR_ENTRIES] = 1'b1;
end
@ -255,45 +256,45 @@ module scoreboard #(
gpr_clobber_vld[0] = '0;
end
for (genvar k = 0; k < 2**ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers
for (genvar k = 0; k < 2 ** ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers
// get fu that is going to clobber this register (there should be only one)
rr_arb_tree #(
.NumIn(NR_ENTRIES+1),
.DataType(ariane_pkg::fu_t),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
.NumIn(NR_ENTRIES + 1),
.DataType(ariane_pkg::fu_t),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_gpr_clobbers (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.rr_i ( '0 ),
.req_i ( gpr_clobber_vld[k] ),
.gnt_o ( ),
.data_i ( clobber_fu ),
.gnt_i ( 1'b1 ),
.req_o ( ),
.data_o ( rd_clobber_gpr_o[k] ),
.idx_o ( )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (gpr_clobber_vld[k]),
.gnt_o (),
.data_i (clobber_fu),
.gnt_i (1'b1),
.req_o (),
.data_o (rd_clobber_gpr_o[k]),
.idx_o ()
);
if(CVA6Cfg.FpPresent) begin
rr_arb_tree #(
.NumIn(NR_ENTRIES+1),
if (CVA6Cfg.FpPresent) begin
rr_arb_tree #(
.NumIn(NR_ENTRIES + 1),
.DataType(ariane_pkg::fu_t),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_fpr_clobbers (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.rr_i ( '0 ),
.req_i ( fpr_clobber_vld[k] ),
.gnt_o ( ),
.data_i ( clobber_fu ),
.gnt_i ( 1'b1 ),
.req_o ( ),
.data_o ( rd_clobber_fpr_o[k] ),
.idx_o ( )
);
) i_sel_fpr_clobbers (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (fpr_clobber_vld[k]),
.gnt_o (),
.data_i (clobber_fu),
.gnt_i (1'b1),
.req_o (),
.data_o (rd_clobber_fpr_o[k]),
.idx_o ()
);
end
end
@ -307,136 +308,157 @@ module scoreboard #(
// WB ports have higher prio than entries
for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb
assign rs1_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(issue_instr_o.op)));
assign rs2_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(issue_instr_o.op)));
assign rs3_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_o.op)));
assign rs_data[k] = wbdata_i[k];
assign rs1_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
issue_instr_o.op
)));
assign rs2_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_o.op
)));
assign rs3_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_o.op
)));
assign rs_data[k] = wbdata_i[k];
end
for (genvar k = 0; unsigned'(k) < NR_ENTRIES; k++) begin : gen_rs_entries
assign rs1_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs1_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(issue_instr_o.op)));
assign rs2_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs2_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(issue_instr_o.op)));
assign rs3_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs3_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_o.op)));
assign rs_data[k+CVA6Cfg.NrWbPorts] = mem_q[k].sbe.result;
assign rs1_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs1_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
issue_instr_o.op
)));
assign rs2_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs2_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_o.op
)));
assign rs3_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs3_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_o.op
)));
assign rs_data[k+CVA6Cfg.NrWbPorts] = mem_q[k].sbe.result;
end
// check whether we are accessing GPR[0]
assign rs1_valid_o = rs1_valid & ((|rs1_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(issue_instr_o.op)));
assign rs2_valid_o = rs2_valid & ((|rs2_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(issue_instr_o.op)));
assign rs3_valid_o = CVA6Cfg.NrRgprPorts == 3 ? rs3_valid & ((|rs3_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_o.op))) : rs3_valid;
assign rs1_valid_o = rs1_valid & ((|rs1_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
issue_instr_o.op
)));
assign rs2_valid_o = rs2_valid & ((|rs2_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_o.op
)));
assign rs3_valid_o = CVA6Cfg.NrRgprPorts == 3 ? rs3_valid & ((|rs3_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_o.op
))) : rs3_valid;
// use fixed prio here
// this implicitly gives higher prio to WB ports
rr_arb_tree #(
.NumIn(NR_ENTRIES+CVA6Cfg.NrWbPorts),
.DataWidth(riscv::XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
.NumIn(NR_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(riscv::XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_rs1 (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.rr_i ( '0 ),
.req_i ( rs1_fwd_req ),
.gnt_o ( ),
.data_i ( rs_data ),
.gnt_i ( 1'b1 ),
.req_o ( rs1_valid ),
.data_o ( rs1_o ),
.idx_o ( )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (rs1_fwd_req),
.gnt_o (),
.data_i (rs_data),
.gnt_i (1'b1),
.req_o (rs1_valid),
.data_o (rs1_o),
.idx_o ()
);
rr_arb_tree #(
.NumIn(NR_ENTRIES+CVA6Cfg.NrWbPorts),
.DataWidth(riscv::XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
.NumIn(NR_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(riscv::XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_rs2 (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.rr_i ( '0 ),
.req_i ( rs2_fwd_req ),
.gnt_o ( ),
.data_i ( rs_data ),
.gnt_i ( 1'b1 ),
.req_o ( rs2_valid ),
.data_o ( rs2_o ),
.idx_o ( )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (rs2_fwd_req),
.gnt_o (),
.data_i (rs_data),
.gnt_i (1'b1),
.req_o (rs2_valid),
.data_o (rs2_o),
.idx_o ()
);
riscv::xlen_t rs3;
riscv::xlen_t rs3;
rr_arb_tree #(
.NumIn(NR_ENTRIES+CVA6Cfg.NrWbPorts),
.DataWidth(riscv::XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
.NumIn(NR_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(riscv::XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_rs3 (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.rr_i ( '0 ),
.req_i ( rs3_fwd_req ),
.gnt_o ( ),
.data_i ( rs_data ),
.gnt_i ( 1'b1 ),
.req_o ( rs3_valid ),
.data_o ( rs3 ),
.idx_o ( )
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (rs3_fwd_req),
.gnt_o (),
.data_i (rs_data),
.gnt_i (1'b1),
.req_o (rs3_valid),
.data_o (rs3),
.idx_o ()
);
if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port
assign rs3_o = rs3[riscv::XLEN-1:0];
assign rs3_o = rs3[riscv::XLEN-1:0];
end else begin : gen_fp_three_port
assign rs3_o = rs3[CVA6Cfg.FLen-1:0];
assign rs3_o = rs3[CVA6Cfg.FLen-1:0];
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin : regs
if(!rst_ni) begin
mem_q <= '{default: sb_mem_t'(0)};
issue_cnt_q <= '0;
commit_pointer_q <= '0;
issue_pointer_q <= '0;
if (!rst_ni) begin
mem_q <= '{default: sb_mem_t'(0)};
issue_cnt_q <= '0;
commit_pointer_q <= '0;
issue_pointer_q <= '0;
end else begin
issue_cnt_q <= issue_cnt_n;
issue_pointer_q <= issue_pointer_n;
mem_q <= mem_n;
commit_pointer_q <= commit_pointer_n;
issue_cnt_q <= issue_cnt_n;
issue_pointer_q <= issue_pointer_n;
mem_q <= mem_n;
commit_pointer_q <= commit_pointer_n;
end
end
//pragma translate_off
initial begin
assert (NR_ENTRIES == 2**BITS_ENTRIES) else $fatal(1, "Scoreboard size needs to be a power of two.");
assert (NR_ENTRIES == 2 ** BITS_ENTRIES)
else $fatal(1, "Scoreboard size needs to be a power of two.");
end
// assert that zero is never set
assert property (
@(posedge clk_i) disable iff (!rst_ni) (rd_clobber_gpr_o[0] == ariane_pkg::NONE))
else $fatal (1,"RD 0 should not bet set");
assert property (@(posedge clk_i) disable iff (!rst_ni) (rd_clobber_gpr_o[0] == ariane_pkg::NONE))
else $fatal(1, "RD 0 should not bet set");
// assert that we never acknowledge a commit if the instruction is not valid
assert property (
@(posedge clk_i) disable iff (!rst_ni) commit_ack_i[0] |-> commit_instr_o[0].valid)
else $fatal (1,"Commit acknowledged but instruction is not valid");
else $fatal(1, "Commit acknowledged but instruction is not valid");
assert property (
@(posedge clk_i) disable iff (!rst_ni) commit_ack_i[1] |-> commit_instr_o[1].valid)
else $fatal (1,"Commit acknowledged but instruction is not valid");
else $fatal(1, "Commit acknowledged but instruction is not valid");
// assert that we never give an issue ack signal if the instruction is not valid
assert property (
@(posedge clk_i) disable iff (!rst_ni) issue_ack_i |-> issue_instr_valid_o)
else $fatal (1,"Issue acknowledged but instruction is not valid");
assert property (@(posedge clk_i) disable iff (!rst_ni) issue_ack_i |-> issue_instr_valid_o)
else $fatal(1, "Issue acknowledged but instruction is not valid");
// there should never be more than one instruction writing the same destination register (except x0)
// check that no functional unit is retiring with the same transaction id
for (genvar i = 0; i < CVA6Cfg.NrWbPorts; i++) begin
for (genvar j = 0; j < CVA6Cfg.NrWbPorts; j++) begin
for (genvar j = 0; j < CVA6Cfg.NrWbPorts; j++) begin
assert property (
@(posedge clk_i) disable iff (!rst_ni) wt_valid_i[i] && wt_valid_i[j] && (i != j) |-> (trans_id_i[i] != trans_id_i[j]))
else $fatal (1,"Two or more functional units are retiring instructions with the same transaction id!");
else
$fatal(
1,
"Two or more functional units are retiring instructions with the same transaction id!"
);
end
end
//pragma translate_on

View file

@ -15,41 +15,48 @@
// Description: simple 64bit serial divider
module serdiv import ariane_pkg::*; #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter WIDTH = 64,
parameter STABLE_HANDSHAKE = 0 // Guarantee a stable in_rdy_o during the input handshake. Keep it at 0 in CVA6
module serdiv
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter WIDTH = 64,
parameter STABLE_HANDSHAKE = 0 // Guarantee a stable in_rdy_o during the input handshake. Keep it at 0 in CVA6
) (
input logic clk_i,
input logic rst_ni,
// input IF
input logic [TRANS_ID_BITS-1:0] id_i,
input logic [WIDTH-1:0] op_a_i,
input logic [WIDTH-1:0] op_b_i,
input logic [1:0] opcode_i, // 0: udiv, 2: urem, 1: div, 3: rem
// handshake
input logic in_vld_i, // there is a cycle delay from in_rdy_o->in_vld_i, see issue_read_operands.sv stage
output logic in_rdy_o,
input logic flush_i,
// output IF
output logic out_vld_o,
input logic out_rdy_i,
output logic [TRANS_ID_BITS-1:0] id_o,
output logic [WIDTH-1:0] res_o
input logic clk_i,
input logic rst_ni,
// input IF
input logic [TRANS_ID_BITS-1:0] id_i,
input logic [WIDTH-1:0] op_a_i,
input logic [WIDTH-1:0] op_b_i,
input logic [1:0] opcode_i, // 0: udiv, 2: urem, 1: div, 3: rem
// handshake
input logic in_vld_i, // there is a cycle delay from in_rdy_o->in_vld_i, see issue_read_operands.sv stage
output logic in_rdy_o,
input logic flush_i,
// output IF
output logic out_vld_o,
input logic out_rdy_i,
output logic [TRANS_ID_BITS-1:0] id_o,
output logic [WIDTH-1:0] res_o
);
/////////////////////////////////////
// signal declarations
/////////////////////////////////////
/////////////////////////////////////
// signal declarations
/////////////////////////////////////
enum logic [1:0] {IDLE, DIVIDE, FINISH} state_d, state_q;
enum logic [1:0] {
IDLE,
DIVIDE,
FINISH
}
state_d, state_q;
logic [WIDTH-1:0] res_q, res_d;
logic [WIDTH-1:0] op_a_q, op_a_d;
logic [WIDTH-1:0] op_b_q, op_b_d;
logic op_a_sign, op_b_sign;
logic op_b_zero, op_b_zero_q, op_b_zero_d;
logic op_b_neg_one, op_b_neg_one_q, op_b_neg_one_d;
logic [WIDTH-1:0] res_q, res_d;
logic [WIDTH-1:0] op_a_q, op_a_d;
logic [WIDTH-1:0] op_b_q, op_b_d;
logic op_a_sign, op_b_sign;
logic op_b_zero, op_b_zero_q, op_b_zero_d;
logic op_b_neg_one, op_b_neg_one_q, op_b_neg_one_d;
logic [TRANS_ID_BITS-1:0] id_q, id_d;
@ -69,125 +76,124 @@ module serdiv import ariane_pkg::*; #(
logic [WIDTH-1:0] lzc_a_input, lzc_b_input, op_b;
logic [$clog2(WIDTH)-1:0] lzc_a_result, lzc_b_result;
logic [$clog2(WIDTH+1)-1:0] shift_a;
logic [$clog2(WIDTH+1):0] div_shift;
logic [ $clog2(WIDTH+1):0] div_shift;
logic a_reg_en, b_reg_en, res_reg_en, ab_comp, pm_sel, load_en;
logic lzc_a_no_one, lzc_b_no_one;
logic div_res_zero_d, div_res_zero_q;
/////////////////////////////////////
// align the input operands
// for faster division
/////////////////////////////////////
/////////////////////////////////////
// align the input operands
// for faster division
/////////////////////////////////////
assign op_a_sign = op_a_i[$high(op_a_i)];
assign op_b_sign = op_b_i[$high(op_b_i)];
assign op_a_sign = op_a_i[$high(op_a_i)];
assign op_b_sign = op_b_i[$high(op_b_i)];
assign op_b_zero = lzc_b_no_one & ~op_b_sign;
assign op_b_neg_one = lzc_b_no_one & op_b_sign;
assign op_b_neg_one = lzc_b_no_one & op_b_sign;
assign lzc_a_input = (opcode_i[0] & op_a_sign) ? {~op_a_i[$high(op_a_i)-1:0], 1'b1} : op_a_i;
assign lzc_b_input = (opcode_i[0] & op_b_sign) ? ~op_b_i : op_b_i;
assign lzc_a_input = (opcode_i[0] & op_a_sign) ? {~op_a_i[$high(op_a_i)-1:0], 1'b1} : op_a_i;
assign lzc_b_input = (opcode_i[0] & op_b_sign) ? ~op_b_i : op_b_i;
lzc #(
.MODE ( 1 ), // count leading zeros
.WIDTH ( WIDTH )
.MODE (1), // count leading zeros
.WIDTH(WIDTH)
) i_lzc_a (
.in_i ( lzc_a_input ),
.cnt_o ( lzc_a_result ),
.empty_o ( lzc_a_no_one )
.in_i (lzc_a_input),
.cnt_o (lzc_a_result),
.empty_o(lzc_a_no_one)
);
lzc #(
.MODE ( 1 ), // count leading zeros
.WIDTH ( WIDTH )
.MODE (1), // count leading zeros
.WIDTH(WIDTH)
) i_lzc_b (
.in_i ( lzc_b_input ),
.cnt_o ( lzc_b_result ),
.empty_o ( lzc_b_no_one )
.in_i (lzc_b_input),
.cnt_o (lzc_b_result),
.empty_o(lzc_b_no_one)
);
assign shift_a = (lzc_a_no_one) ? WIDTH : {1'b0, lzc_a_result};
assign div_shift = {1'b0, lzc_b_result} - shift_a;
assign shift_a = (lzc_a_no_one) ? WIDTH : {1'b0, lzc_a_result};
assign div_shift = {1'b0, lzc_b_result} - shift_a;
assign op_b = op_b_i <<< $unsigned(div_shift);
assign op_b = op_b_i <<< $unsigned(div_shift);
// the division is zero if |opB| > |opA| and can be terminated
assign div_res_zero_d = (load_en) ? div_shift[$high(div_shift)] : div_res_zero_q;
/////////////////////////////////////
// Datapath
/////////////////////////////////////
/////////////////////////////////////
// Datapath
/////////////////////////////////////
assign pm_sel = load_en & ~(opcode_i[0] & (op_a_sign ^ op_b_sign));
assign pm_sel = load_en & ~(opcode_i[0] & (op_a_sign ^ op_b_sign));
// muxes
assign add_mux = (load_en) ? op_a_i : op_b_q;
assign add_mux = (load_en) ? op_a_i : op_b_q;
// attention: logical shift by one in case of negative operand B!
assign b_mux = (load_en) ? op_b : {comp_inv_q, (op_b_q[$high(op_b_q):1])};
assign b_mux = (load_en) ? op_b : {comp_inv_q, (op_b_q[$high(op_b_q):1])};
// in case of bad timing, we could output from regs -> needs a cycle more in the FSM
assign out_mux = (rem_sel_q) ? (op_b_neg_one_q ? '0 : op_a_q) : (op_b_zero_q ? '1 : (op_b_neg_one_q ? op_a_q : res_q));
// invert if necessary
assign res_o = (res_inv_q) ? -$signed(out_mux) : out_mux;
assign res_o = (res_inv_q) ? -$signed(out_mux) : out_mux;
// main comparator
assign ab_comp = ((op_a_q == op_b_q) | ((op_a_q > op_b_q) ^ comp_inv_q)) & ((|op_a_q) | op_b_zero_q);
// main adder
assign add_tmp = (load_en) ? 0 : op_a_q;
assign add_out = (pm_sel) ? add_tmp + add_mux : add_tmp - $signed(add_mux);
assign add_tmp = (load_en) ? 0 : op_a_q;
assign add_out = (pm_sel) ? add_tmp + add_mux : add_tmp - $signed(add_mux);
/////////////////////////////////////
// FSM, counter
/////////////////////////////////////
/////////////////////////////////////
// FSM, counter
/////////////////////////////////////
assign cnt_zero = (cnt_q == 0);
assign cnt_d = (load_en) ? div_shift[$clog2(WIDTH)-1:0] :
(~cnt_zero) ? cnt_q - 1 : cnt_q;
assign cnt_d = (load_en) ? div_shift[$clog2(WIDTH)-1:0] : (~cnt_zero) ? cnt_q - 1 : cnt_q;
always_comb begin : p_fsm
// default
state_d = state_q;
in_rdy_o = 1'b0;
out_vld_o = 1'b0;
load_en = 1'b0;
a_reg_en = 1'b0;
b_reg_en = 1'b0;
res_reg_en = 1'b0;
state_d = state_q;
in_rdy_o = 1'b0;
out_vld_o = 1'b0;
load_en = 1'b0;
a_reg_en = 1'b0;
b_reg_en = 1'b0;
res_reg_en = 1'b0;
unique case (state_q)
IDLE: begin
in_rdy_o = 1'b1;
in_rdy_o = 1'b1;
if (in_vld_i) begin
// CVA6: there is a cycle delay until the valid signal is asserted by the id stage
// Ara: we need a stable handshake
in_rdy_o = (STABLE_HANDSHAKE) ? 1'b1 : 1'b0;
a_reg_en = 1'b1;
b_reg_en = 1'b1;
load_en = 1'b1;
state_d = DIVIDE;
in_rdy_o = (STABLE_HANDSHAKE) ? 1'b1 : 1'b0;
a_reg_en = 1'b1;
b_reg_en = 1'b1;
load_en = 1'b1;
state_d = DIVIDE;
end
end
DIVIDE: begin
if (~(div_res_zero_q | op_b_zero_q | op_b_neg_one_q)) begin
a_reg_en = ab_comp;
b_reg_en = 1'b1;
res_reg_en = 1'b1;
a_reg_en = ab_comp;
b_reg_en = 1'b1;
res_reg_en = 1'b1;
end
// can end the division immediately if the result is known
if (div_res_zero_q | op_b_zero_q | op_b_neg_one_q) begin
out_vld_o = 1'b1;
state_d = FINISH;
if(out_rdy_i) begin
if (out_rdy_i) begin
// in_rdy_o = 1'b1;// there is a cycle delay until the valid signal is asserted by the id stage
state_d = IDLE;
state_d = IDLE;
end
end else if (cnt_zero) begin
state_d = FINISH;
state_d = FINISH;
end
end
FINISH: begin
@ -195,41 +201,40 @@ module serdiv import ariane_pkg::*; #(
if (out_rdy_i) begin
// in_rdy_o = 1'b1;// there is a cycle delay until the valid signal is asserted by the id stage
state_d = IDLE;
state_d = IDLE;
end
end
default : state_d = IDLE;
default: state_d = IDLE;
endcase
if (flush_i) begin
in_rdy_o = 1'b0;
out_vld_o = 1'b0;
a_reg_en = 1'b0;
b_reg_en = 1'b0;
load_en = 1'b0;
state_d = IDLE;
in_rdy_o = 1'b0;
out_vld_o = 1'b0;
a_reg_en = 1'b0;
b_reg_en = 1'b0;
load_en = 1'b0;
state_d = IDLE;
end
end
/////////////////////////////////////
// regs, flags
/////////////////////////////////////
/////////////////////////////////////
// regs, flags
/////////////////////////////////////
// get flags
assign rem_sel_d = (load_en) ? opcode_i[1] : rem_sel_q;
assign comp_inv_d = (load_en) ? opcode_i[0] & op_b_sign : comp_inv_q;
assign op_b_zero_d = (load_en) ? op_b_zero : op_b_zero_q;
assign op_b_neg_one_d = (load_en) ? op_b_neg_one : op_b_neg_one_q;
assign rem_sel_d = (load_en) ? opcode_i[1] : rem_sel_q;
assign comp_inv_d = (load_en) ? opcode_i[0] & op_b_sign : comp_inv_q;
assign op_b_zero_d = (load_en) ? op_b_zero : op_b_zero_q;
assign op_b_neg_one_d = (load_en) ? op_b_neg_one : op_b_neg_one_q;
assign res_inv_d = (load_en) ? (~op_b_zero | opcode_i[1]) & opcode_i[0] & (op_a_sign ^ op_b_sign ^ op_b_neg_one) : res_inv_q;
// transaction id
assign id_d = (load_en) ? id_i : id_q;
assign id_o = id_q;
assign op_a_d = (a_reg_en) ? add_out : op_a_q;
assign op_b_d = (b_reg_en) ? b_mux : op_b_q;
assign res_d = (load_en) ? '0 :
(res_reg_en) ? {res_q[$high(res_q)-1:0], ab_comp} : res_q;
assign op_a_d = (a_reg_en) ? add_out : op_a_q;
assign op_b_d = (b_reg_en) ? b_mux : op_b_q;
assign res_d = (load_en) ? '0 : (res_reg_en) ? {res_q[$high(res_q)-1:0], ab_comp} : res_q;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (~rst_ni) begin

View file

@ -14,272 +14,277 @@
// if they are no longer speculative
module store_buffer import ariane_pkg::*; #(
module store_buffer
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // if we flush we need to pause the transactions on the memory
// otherwise we will run in a deadlock with the memory arbiter
input logic stall_st_pending_i, // Stall issuing non-speculative request
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // if we flush we need to pause the transactions on the memory
// otherwise we will run in a deadlock with the memory arbiter
input logic stall_st_pending_i, // Stall issuing non-speculative request
output logic no_st_pending_o, // non-speculative queue is empty (e.g.: everything is committed to the memory hierarchy)
output logic store_buffer_empty_o, // there is no store pending in neither the speculative unit or the non-speculative queue
input logic [11:0] page_offset_i, // check for the page offset (the last 12 bit if the current load matches them)
output logic page_offset_matches_o, // the above input page offset matches -> let the store buffer drain
input logic commit_i, // commit the instruction which was placed there most recently
output logic commit_ready_o, // commit queue is ready to accept another commit request
output logic ready_o, // the store queue is ready to accept a new request
// it is only ready if it can unconditionally commit the instruction, e.g.:
// the commit buffer needs to be empty
input logic valid_i, // this is a valid store
input logic commit_i, // commit the instruction which was placed there most recently
output logic commit_ready_o, // commit queue is ready to accept another commit request
output logic ready_o, // the store queue is ready to accept a new request
// it is only ready if it can unconditionally commit the instruction, e.g.:
// the commit buffer needs to be empty
input logic valid_i, // this is a valid store
input logic valid_without_flush_i, // just tell if the address is valid which we are current putting and do not take any further action
input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue
output [riscv::PLEN-1:0] mem_paddr_o,
input riscv::xlen_t data_i, // data which is placed in the queue
input logic [(riscv::XLEN/8)-1:0] be_i, // byte enable in
input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write)
output [riscv::PLEN-1:0] mem_paddr_o,
input riscv::xlen_t data_i, // data which is placed in the queue
input logic [(riscv::XLEN/8)-1:0] be_i, // byte enable in
input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write)
// D$ interface
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o
);
// the store queue has two parts:
// 1. Speculative queue
// 2. Commit queue which is non-speculative, e.g.: the store will definitely happen.
struct packed {
logic [riscv::PLEN-1:0] address;
riscv::xlen_t data;
logic [(riscv::XLEN/8)-1:0] be;
logic [1:0] data_size;
logic valid; // this entry is valid, we need this for checking if the address offset matches
} speculative_queue_n [DEPTH_SPEC-1:0], speculative_queue_q [DEPTH_SPEC-1:0],
commit_queue_n [DEPTH_COMMIT-1:0], commit_queue_q [DEPTH_COMMIT-1:0];
// the store queue has two parts:
// 1. Speculative queue
// 2. Commit queue which is non-speculative, e.g.: the store will definitely happen.
struct packed {
logic [riscv::PLEN-1:0] address;
riscv::xlen_t data;
logic [(riscv::XLEN/8)-1:0] be;
logic [1:0] data_size;
logic valid; // this entry is valid, we need this for checking if the address offset matches
}
speculative_queue_n[DEPTH_SPEC-1:0],
speculative_queue_q[DEPTH_SPEC-1:0],
commit_queue_n[DEPTH_COMMIT-1:0],
commit_queue_q[DEPTH_COMMIT-1:0];
// keep a status count for both buffers
logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt_n, speculative_status_cnt_q;
logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt_n, commit_status_cnt_q;
// Speculative queue
logic [$clog2(DEPTH_SPEC)-1:0] speculative_read_pointer_n, speculative_read_pointer_q;
logic [$clog2(DEPTH_SPEC)-1:0] speculative_write_pointer_n, speculative_write_pointer_q;
// Commit Queue
logic [$clog2(DEPTH_COMMIT)-1:0] commit_read_pointer_n, commit_read_pointer_q;
logic [$clog2(DEPTH_COMMIT)-1:0] commit_write_pointer_n, commit_write_pointer_q;
// keep a status count for both buffers
logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt_n, speculative_status_cnt_q;
logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt_n, commit_status_cnt_q;
// Speculative queue
logic [$clog2(DEPTH_SPEC)-1:0] speculative_read_pointer_n, speculative_read_pointer_q;
logic [$clog2(DEPTH_SPEC)-1:0] speculative_write_pointer_n, speculative_write_pointer_q;
// Commit Queue
logic [$clog2(DEPTH_COMMIT)-1:0] commit_read_pointer_n, commit_read_pointer_q;
logic [$clog2(DEPTH_COMMIT)-1:0] commit_write_pointer_n, commit_write_pointer_q;
assign store_buffer_empty_o = (speculative_status_cnt_q == 0) & no_st_pending_o;
// ----------------------------------------
// Speculative Queue - Core Interface
// ----------------------------------------
always_comb begin : core_if
automatic logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt;
speculative_status_cnt = speculative_status_cnt_q;
assign store_buffer_empty_o = (speculative_status_cnt_q == 0) & no_st_pending_o;
// ----------------------------------------
// Speculative Queue - Core Interface
// ----------------------------------------
always_comb begin : core_if
automatic logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt;
speculative_status_cnt = speculative_status_cnt_q;
// default assignments
speculative_status_cnt_n = speculative_status_cnt_q;
speculative_read_pointer_n = speculative_read_pointer_q;
speculative_write_pointer_n = speculative_write_pointer_q;
speculative_queue_n = speculative_queue_q;
// LSU interface
// we are ready to accept a new entry and the input data is valid
if (valid_i) begin
speculative_queue_n[speculative_write_pointer_q].address = paddr_i;
speculative_queue_n[speculative_write_pointer_q].data = data_i;
speculative_queue_n[speculative_write_pointer_q].be = be_i;
speculative_queue_n[speculative_write_pointer_q].data_size = data_size_i;
speculative_queue_n[speculative_write_pointer_q].valid = 1'b1;
// advance the write pointer
speculative_write_pointer_n = speculative_write_pointer_q + 1'b1;
speculative_status_cnt++;
end
// evict the current entry out of this queue, the commit queue will thankfully take it and commit it
// to the memory hierarchy
if (commit_i) begin
// invalidate
speculative_queue_n[speculative_read_pointer_q].valid = 1'b0;
// advance the read pointer
speculative_read_pointer_n = speculative_read_pointer_q + 1'b1;
speculative_status_cnt--;
end
speculative_status_cnt_n = speculative_status_cnt;
// when we flush evict the speculative stores
if (flush_i) begin
// reset all valid flags
for (int unsigned i = 0; i < DEPTH_SPEC; i++)
speculative_queue_n[i].valid = 1'b0;
speculative_write_pointer_n = speculative_read_pointer_q;
// also reset the status count
speculative_status_cnt_n = 'b0;
end
// we are ready if the speculative and the commit queue have a space left
ready_o = (speculative_status_cnt_n < (DEPTH_SPEC)) || commit_i;
// default assignments
speculative_status_cnt_n = speculative_status_cnt_q;
speculative_read_pointer_n = speculative_read_pointer_q;
speculative_write_pointer_n = speculative_write_pointer_q;
speculative_queue_n = speculative_queue_q;
// LSU interface
// we are ready to accept a new entry and the input data is valid
if (valid_i) begin
speculative_queue_n[speculative_write_pointer_q].address = paddr_i;
speculative_queue_n[speculative_write_pointer_q].data = data_i;
speculative_queue_n[speculative_write_pointer_q].be = be_i;
speculative_queue_n[speculative_write_pointer_q].data_size = data_size_i;
speculative_queue_n[speculative_write_pointer_q].valid = 1'b1;
// advance the write pointer
speculative_write_pointer_n = speculative_write_pointer_q + 1'b1;
speculative_status_cnt++;
end
// ----------------------------------------
// Commit Queue - Memory Interface
// ----------------------------------------
// evict the current entry out of this queue, the commit queue will thankfully take it and commit it
// to the memory hierarchy
if (commit_i) begin
// invalidate
speculative_queue_n[speculative_read_pointer_q].valid = 1'b0;
// advance the read pointer
speculative_read_pointer_n = speculative_read_pointer_q + 1'b1;
speculative_status_cnt--;
end
// we will never kill a request in the store buffer since we already know that the translation is valid
// e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault
assign req_port_o.kill_req = 1'b0;
assign req_port_o.data_we = 1'b1; // we will always write in the store queue
assign req_port_o.tag_valid = 1'b0;
speculative_status_cnt_n = speculative_status_cnt;
// we do not require an acknowledgement for writes, thus we do not need to identify uniquely the responses
assign req_port_o.data_id = '0;
// those signals can directly be output to the memory
assign req_port_o.address_index = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
// if we got a new request we already saved the tag from the previous cycle
assign req_port_o.address_tag = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_TAG_WIDTH +
// when we flush evict the speculative stores
if (flush_i) begin
// reset all valid flags
for (int unsigned i = 0; i < DEPTH_SPEC; i++) speculative_queue_n[i].valid = 1'b0;
speculative_write_pointer_n = speculative_read_pointer_q;
// also reset the status count
speculative_status_cnt_n = 'b0;
end
// we are ready if the speculative and the commit queue have a space left
ready_o = (speculative_status_cnt_n < (DEPTH_SPEC)) || commit_i;
end
// ----------------------------------------
// Commit Queue - Memory Interface
// ----------------------------------------
// we will never kill a request in the store buffer since we already know that the translation is valid
// e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault
assign req_port_o.kill_req = 1'b0;
assign req_port_o.data_we = 1'b1; // we will always write in the store queue
assign req_port_o.tag_valid = 1'b0;
// we do not require an acknowledgement for writes, thus we do not need to identify uniquely the responses
assign req_port_o.data_id = '0;
// those signals can directly be output to the memory
assign req_port_o.address_index = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
// if we got a new request we already saved the tag from the previous cycle
assign req_port_o.address_tag = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_TAG_WIDTH +
ariane_pkg::DCACHE_INDEX_WIDTH-1 :
ariane_pkg::DCACHE_INDEX_WIDTH];
assign req_port_o.data_wdata = commit_queue_q[commit_read_pointer_q].data;
assign req_port_o.data_be = commit_queue_q[commit_read_pointer_q].be;
assign req_port_o.data_size = commit_queue_q[commit_read_pointer_q].data_size;
assign req_port_o.data_wdata = commit_queue_q[commit_read_pointer_q].data;
assign req_port_o.data_be = commit_queue_q[commit_read_pointer_q].be;
assign req_port_o.data_size = commit_queue_q[commit_read_pointer_q].data_size;
assign mem_paddr_o = commit_queue_n[commit_read_pointer_n].address;
assign mem_paddr_o = commit_queue_n[commit_read_pointer_n].address;
always_comb begin : store_if
automatic logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt;
commit_status_cnt = commit_status_cnt_q;
always_comb begin : store_if
automatic logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt;
commit_status_cnt = commit_status_cnt_q;
commit_ready_o = (commit_status_cnt_q < DEPTH_COMMIT);
// no store is pending if we don't have any element in the commit queue e.g.: it is empty
no_st_pending_o = (commit_status_cnt_q == 0);
// default assignments
commit_read_pointer_n = commit_read_pointer_q;
commit_write_pointer_n = commit_write_pointer_q;
commit_ready_o = (commit_status_cnt_q < DEPTH_COMMIT);
// no store is pending if we don't have any element in the commit queue e.g.: it is empty
no_st_pending_o = (commit_status_cnt_q == 0);
// default assignments
commit_read_pointer_n = commit_read_pointer_q;
commit_write_pointer_n = commit_write_pointer_q;
commit_queue_n = commit_queue_q;
commit_queue_n = commit_queue_q;
req_port_o.data_req = 1'b0;
req_port_o.data_req = 1'b0;
// there should be no commit when we are flushing
// if the entry in the commit queue is valid and not speculative anymore we can issue this instruction
if (commit_queue_q[commit_read_pointer_q].valid && !stall_st_pending_i) begin
req_port_o.data_req = 1'b1;
if (req_port_i.data_gnt) begin
// we can evict it from the commit buffer
commit_queue_n[commit_read_pointer_q].valid = 1'b0;
// advance the read_pointer
commit_read_pointer_n = commit_read_pointer_q + 1'b1;
commit_status_cnt--;
end
end
// we ignore the rvalid signal for now as we assume that the store
// happened if we got a grant
// there should be no commit when we are flushing
// if the entry in the commit queue is valid and not speculative anymore we can issue this instruction
if (commit_queue_q[commit_read_pointer_q].valid && !stall_st_pending_i) begin
req_port_o.data_req = 1'b1;
if (req_port_i.data_gnt) begin
// we can evict it from the commit buffer
commit_queue_n[commit_read_pointer_q].valid = 1'b0;
// advance the read_pointer
commit_read_pointer_n = commit_read_pointer_q + 1'b1;
commit_status_cnt--;
end
end
// we ignore the rvalid signal for now as we assume that the store
// happened if we got a grant
// shift the store request from the speculative buffer to the non-speculative
if (commit_i) begin
commit_queue_n[commit_write_pointer_q] = speculative_queue_q[speculative_read_pointer_q];
commit_write_pointer_n = commit_write_pointer_n + 1'b1;
commit_status_cnt++;
end
commit_status_cnt_n = commit_status_cnt;
// shift the store request from the speculative buffer to the non-speculative
if (commit_i) begin
commit_queue_n[commit_write_pointer_q] = speculative_queue_q[speculative_read_pointer_q];
commit_write_pointer_n = commit_write_pointer_n + 1'b1;
commit_status_cnt++;
end
// ------------------
// Address Checker
// ------------------
// The load should return the data stored by the most recent store to the
// same physical address. The most direct way to implement this is to
// maintain physical addresses in the store buffer.
commit_status_cnt_n = commit_status_cnt;
end
// Of course, there are other micro-architectural techniques to accomplish
// the same thing: you can interlock and wait for the store buffer to
// drain if the load VA matches any store VA modulo the page size (i.e.
// bits 11:0). As a special case, it is correct to bypass if the full VA
// matches, and no younger stores' VAs match in bits 11:0.
//
// checks if the requested load is in the store buffer
// page offsets are virtually and physically the same
always_comb begin : address_checker
page_offset_matches_o = 1'b0;
// ------------------
// Address Checker
// ------------------
// The load should return the data stored by the most recent store to the
// same physical address. The most direct way to implement this is to
// maintain physical addresses in the store buffer.
// check if the LSBs are identical and the entry is valid
for (int unsigned i = 0; i < DEPTH_COMMIT; i++) begin
// Check if the page offset matches and whether the entry is valid, for the commit queue
if ((page_offset_i[11:3] == commit_queue_q[i].address[11:3]) && commit_queue_q[i].valid) begin
page_offset_matches_o = 1'b1;
break;
end
end
// Of course, there are other micro-architectural techniques to accomplish
// the same thing: you can interlock and wait for the store buffer to
// drain if the load VA matches any store VA modulo the page size (i.e.
// bits 11:0). As a special case, it is correct to bypass if the full VA
// matches, and no younger stores' VAs match in bits 11:0.
//
// checks if the requested load is in the store buffer
// page offsets are virtually and physically the same
always_comb begin : address_checker
page_offset_matches_o = 1'b0;
for (int unsigned i = 0; i < DEPTH_SPEC; i++) begin
// do the same for the speculative queue
if ((page_offset_i[11:3] == speculative_queue_q[i].address[11:3]) && speculative_queue_q[i].valid) begin
page_offset_matches_o = 1'b1;
break;
end
end
// or it matches with the entry we are currently putting into the queue
if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_without_flush_i) begin
page_offset_matches_o = 1'b1;
end
// check if the LSBs are identical and the entry is valid
for (int unsigned i = 0; i < DEPTH_COMMIT; i++) begin
// Check if the page offset matches and whether the entry is valid, for the commit queue
if ((page_offset_i[11:3] == commit_queue_q[i].address[11:3]) && commit_queue_q[i].valid) begin
page_offset_matches_o = 1'b1;
break;
end
end
for (int unsigned i = 0; i < DEPTH_SPEC; i++) begin
// do the same for the speculative queue
if ((page_offset_i[11:3] == speculative_queue_q[i].address[11:3]) && speculative_queue_q[i].valid) begin
page_offset_matches_o = 1'b1;
break;
end
end
// or it matches with the entry we are currently putting into the queue
if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_without_flush_i) begin
page_offset_matches_o = 1'b1;
end
end
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin : p_spec
if (~rst_ni) begin
speculative_queue_q <= '{default: 0};
speculative_read_pointer_q <= '0;
speculative_write_pointer_q <= '0;
speculative_status_cnt_q <= '0;
end else begin
speculative_queue_q <= speculative_queue_n;
speculative_read_pointer_q <= speculative_read_pointer_n;
speculative_write_pointer_q <= speculative_write_pointer_n;
speculative_status_cnt_q <= speculative_status_cnt_n;
end
end
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin : p_commit
if (~rst_ni) begin
commit_queue_q <= '{default: 0};
commit_read_pointer_q <= '0;
commit_write_pointer_q <= '0;
commit_status_cnt_q <= '0;
end else begin
commit_queue_q <= commit_queue_n;
commit_read_pointer_q <= commit_read_pointer_n;
commit_write_pointer_q <= commit_write_pointer_n;
commit_status_cnt_q <= commit_status_cnt_n;
end
end
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin : p_spec
if (~rst_ni) begin
speculative_queue_q <= '{default: 0};
speculative_read_pointer_q <= '0;
speculative_write_pointer_q <= '0;
speculative_status_cnt_q <= '0;
end else begin
speculative_queue_q <= speculative_queue_n;
speculative_read_pointer_q <= speculative_read_pointer_n;
speculative_write_pointer_q <= speculative_write_pointer_n;
speculative_status_cnt_q <= speculative_status_cnt_n;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin : p_commit
if (~rst_ni) begin
commit_queue_q <= '{default: 0};
commit_read_pointer_q <= '0;
commit_write_pointer_q <= '0;
commit_status_cnt_q <= '0;
end else begin
commit_queue_q <= commit_queue_n;
commit_read_pointer_q <= commit_read_pointer_n;
commit_write_pointer_q <= commit_write_pointer_n;
commit_status_cnt_q <= commit_status_cnt_n;
end
end
//pragma translate_off
// assert that commit is never set when we are flushing this would be counter intuitive
// as flush and commit is decided in the same stage
commit_and_flush: assert property (
@(posedge clk_i) rst_ni && flush_i |-> !commit_i)
else $error ("[Commit Queue] You are trying to commit and flush in the same cycle");
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
speculative_buffer_overflow: assert property (
@(posedge clk_i) rst_ni && (speculative_status_cnt_q == DEPTH_SPEC) |-> !valid_i)
else $error ("[Speculative Queue] You are trying to push new data although the buffer is not ready");
//pragma translate_off
// assert that commit is never set when we are flushing this would be counter intuitive
// as flush and commit is decided in the same stage
commit_and_flush :
assert property (@(posedge clk_i) rst_ni && flush_i |-> !commit_i)
else $error("[Commit Queue] You are trying to commit and flush in the same cycle");
speculative_buffer_underflow: assert property (
@(posedge clk_i) rst_ni && (speculative_status_cnt_q == 0) |-> !commit_i)
else $error ("[Speculative Queue] You are committing although there are no stores to commit");
speculative_buffer_overflow :
assert property (@(posedge clk_i) rst_ni && (speculative_status_cnt_q == DEPTH_SPEC) |-> !valid_i)
else
$error("[Speculative Queue] You are trying to push new data although the buffer is not ready");
commit_buffer_overflow: assert property (
@(posedge clk_i) rst_ni && (commit_status_cnt_q == DEPTH_COMMIT) |-> !commit_i)
else $error("[Commit Queue] You are trying to commit a store although the buffer is full");
//pragma translate_on
speculative_buffer_underflow :
assert property (@(posedge clk_i) rst_ni && (speculative_status_cnt_q == 0) |-> !commit_i)
else $error("[Speculative Queue] You are committing although there are no stores to commit");
commit_buffer_overflow :
assert property (@(posedge clk_i) rst_ni && (commit_status_cnt_q == DEPTH_COMMIT) |-> !commit_i)
else $error("[Commit Queue] You are trying to commit a store although the buffer is full");
//pragma translate_on
endmodule

View file

@ -13,278 +13,279 @@
// Description: Store Unit, takes care of all store requests and atomic memory operations (AMOs)
module store_unit import ariane_pkg::*; #(
module store_unit
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic stall_st_pending_i,
output logic no_st_pending_o,
output logic store_buffer_empty_o,
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic stall_st_pending_i,
output logic no_st_pending_o,
output logic store_buffer_empty_o,
// store unit input port
input logic valid_i,
input lsu_ctrl_t lsu_ctrl_i,
output logic pop_st_o,
input logic commit_i,
output logic commit_ready_o,
input logic amo_valid_commit_i,
input logic valid_i,
input lsu_ctrl_t lsu_ctrl_i,
output logic pop_st_o,
input logic commit_i,
output logic commit_ready_o,
input logic amo_valid_commit_i,
// store unit output port
output logic valid_o,
output logic valid_o,
output logic [TRANS_ID_BITS-1:0] trans_id_o,
output riscv::xlen_t result_o,
output exception_t ex_o,
output riscv::xlen_t result_o,
output exception_t ex_o,
// MMU -> Address Translation
output logic translation_req_o, // request address translation
output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out
output [riscv::PLEN-1:0] mem_paddr_o,
input logic [riscv::PLEN-1:0] paddr_i, // physical address in
input exception_t ex_i,
output logic translation_req_o, // request address translation
output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out
output [riscv::PLEN-1:0] mem_paddr_o,
input logic [riscv::PLEN-1:0] paddr_i, // physical address in
input exception_t ex_i,
input logic dtlb_hit_i, // will be one in the same cycle translation_req was asserted if it hits
// address checker
input logic [11:0] page_offset_i,
output logic page_offset_matches_o,
input logic [11:0] page_offset_i,
output logic page_offset_matches_o,
// D$ interface
output amo_req_t amo_req_o,
input amo_resp_t amo_resp_i,
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o
output amo_req_t amo_req_o,
input amo_resp_t amo_resp_i,
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o
);
// it doesn't matter what we are writing back as stores don't return anything
assign result_o = lsu_ctrl_i.data;
// it doesn't matter what we are writing back as stores don't return anything
assign result_o = lsu_ctrl_i.data;
enum logic [1:0] {
IDLE,
VALID_STORE,
WAIT_TRANSLATION,
WAIT_STORE_READY
} state_d, state_q;
enum logic [1:0] {
IDLE,
VALID_STORE,
WAIT_TRANSLATION,
WAIT_STORE_READY
}
state_d, state_q;
// store buffer control signals
logic st_ready;
logic st_valid;
logic st_valid_without_flush;
logic instr_is_amo;
assign instr_is_amo = is_amo(lsu_ctrl_i.operation);
// keep the data and the byte enable for the second cycle (after address translation)
riscv::xlen_t st_data_n, st_data_q;
logic [(riscv::XLEN/8)-1:0] st_be_n, st_be_q;
logic [1:0] st_data_size_n, st_data_size_q;
amo_t amo_op_d, amo_op_q;
// store buffer control signals
logic st_ready;
logic st_valid;
logic st_valid_without_flush;
logic instr_is_amo;
assign instr_is_amo = is_amo(lsu_ctrl_i.operation);
// keep the data and the byte enable for the second cycle (after address translation)
riscv::xlen_t st_data_n, st_data_q;
logic [(riscv::XLEN/8)-1:0] st_be_n, st_be_q;
logic [1:0] st_data_size_n, st_data_size_q;
amo_t amo_op_d, amo_op_q;
logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
// output assignments
assign vaddr_o = lsu_ctrl_i.vaddr; // virtual address
assign trans_id_o = trans_id_q; // transaction id from previous cycle
// output assignments
assign vaddr_o = lsu_ctrl_i.vaddr; // virtual address
assign trans_id_o = trans_id_q; // transaction id from previous cycle
always_comb begin : store_control
translation_req_o = 1'b0;
valid_o = 1'b0;
st_valid = 1'b0;
st_valid_without_flush = 1'b0;
pop_st_o = 1'b0;
ex_o = ex_i;
trans_id_n = lsu_ctrl_i.trans_id;
state_d = state_q;
always_comb begin : store_control
translation_req_o = 1'b0;
valid_o = 1'b0;
st_valid = 1'b0;
st_valid_without_flush = 1'b0;
pop_st_o = 1'b0;
ex_o = ex_i;
trans_id_n = lsu_ctrl_i.trans_id;
state_d = state_q;
case (state_q)
// we got a valid store
IDLE: begin
if (valid_i) begin
state_d = VALID_STORE;
translation_req_o = 1'b1;
pop_st_o = 1'b1;
// check if translation was valid and we have space in the store buffer
// otherwise simply stall
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = WAIT_TRANSLATION;
pop_st_o = 1'b0;
end
case (state_q)
// we got a valid store
IDLE: begin
if (valid_i) begin
state_d = VALID_STORE;
translation_req_o = 1'b1;
pop_st_o = 1'b1;
// check if translation was valid and we have space in the store buffer
// otherwise simply stall
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = WAIT_TRANSLATION;
pop_st_o = 1'b0;
end
if (!st_ready) begin
state_d = WAIT_STORE_READY;
pop_st_o = 1'b0;
end
end
end
VALID_STORE: begin
valid_o = 1'b1;
// post this store to the store buffer if we are not flushing
if (!flush_i)
st_valid = 1'b1;
st_valid_without_flush = 1'b1;
// we have another request and its not an AMO (the AMO buffer only has depth 1)
if (valid_i && !instr_is_amo) begin
translation_req_o = 1'b1;
state_d = VALID_STORE;
pop_st_o = 1'b1;
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = WAIT_TRANSLATION;
pop_st_o = 1'b0;
end
if (!st_ready) begin
state_d = WAIT_STORE_READY;
pop_st_o = 1'b0;
end
// if we do not have another request go back to idle
end else begin
state_d = IDLE;
end
end
// the store queue is currently full
WAIT_STORE_READY: begin
// keep the translation request high
translation_req_o = 1'b1;
if (st_ready && dtlb_hit_i) begin
state_d = IDLE;
end
end
// we didn't receive a valid translation, wait for one
// but we know that the store queue is not full as we could only have landed here if
// it wasn't full
WAIT_TRANSLATION: begin
if(ariane_pkg::MMU_PRESENT) begin
translation_req_o = 1'b1;
if (dtlb_hit_i) begin
state_d = IDLE;
end
end
end
endcase
// -----------------
// Access Exception
// -----------------
// we got an address translation exception (access rights, misaligned or page fault)
if (ex_i.valid && (state_q != IDLE)) begin
// the only difference is that we do not want to store this request
pop_st_o = 1'b1;
st_valid = 1'b0;
state_d = IDLE;
valid_o = 1'b1;
if (!st_ready) begin
state_d = WAIT_STORE_READY;
pop_st_o = 1'b0;
end
end
end
if (flush_i)
state_d = IDLE;
end
VALID_STORE: begin
valid_o = 1'b1;
// post this store to the store buffer if we are not flushing
if (!flush_i) st_valid = 1'b1;
// -----------
// Re-aligner
// -----------
// re-align the write data to comply with the address offset
always_comb begin
st_be_n = lsu_ctrl_i.be;
// don't shift the data if we are going to perform an AMO as we still need to operate on this data
st_data_n = instr_is_amo ? lsu_ctrl_i.data[riscv::XLEN-1:0]
: data_align(lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.data);
st_data_size_n = extract_transfer_size(lsu_ctrl_i.operation);
// save AMO op for next cycle
case (lsu_ctrl_i.operation)
AMO_LRW, AMO_LRD: amo_op_d = AMO_LR;
AMO_SCW, AMO_SCD: amo_op_d = AMO_SC;
AMO_SWAPW, AMO_SWAPD: amo_op_d = AMO_SWAP;
AMO_ADDW, AMO_ADDD: amo_op_d = AMO_ADD;
AMO_ANDW, AMO_ANDD: amo_op_d = AMO_AND;
AMO_ORW, AMO_ORD: amo_op_d = AMO_OR;
AMO_XORW, AMO_XORD: amo_op_d = AMO_XOR;
AMO_MAXW, AMO_MAXD: amo_op_d = AMO_MAX;
AMO_MAXWU, AMO_MAXDU: amo_op_d = AMO_MAXU;
AMO_MINW, AMO_MIND: amo_op_d = AMO_MIN;
AMO_MINWU, AMO_MINDU: amo_op_d = AMO_MINU;
default: amo_op_d = AMO_NONE;
endcase
end
st_valid_without_flush = 1'b1;
logic store_buffer_valid, amo_buffer_valid;
logic store_buffer_ready, amo_buffer_ready;
// we have another request and its not an AMO (the AMO buffer only has depth 1)
if (valid_i && !instr_is_amo) begin
// multiplex between store unit and amo buffer
assign store_buffer_valid = st_valid & (amo_op_q == AMO_NONE);
assign amo_buffer_valid = st_valid & (amo_op_q != AMO_NONE);
translation_req_o = 1'b1;
state_d = VALID_STORE;
pop_st_o = 1'b1;
assign st_ready = store_buffer_ready & amo_buffer_ready;
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = WAIT_TRANSLATION;
pop_st_o = 1'b0;
end
// ---------------
// Store Queue
// ---------------
store_buffer #(
.CVA6Cfg ( CVA6Cfg )
) store_buffer_i (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.store_buffer_empty_o,
.page_offset_i,
.page_offset_matches_o,
.commit_i,
.commit_ready_o,
.ready_o ( store_buffer_ready ),
.valid_i ( store_buffer_valid ),
// the flush signal can be critical and we need this valid
// signal to check whether the page_offset matches or not,
// functionaly it doesn't make a difference whether we use
// the correct valid signal or not as we are flushing
// the whole pipeline anyway
.valid_without_flush_i ( st_valid_without_flush ),
.paddr_i,
.mem_paddr_o ( mem_paddr_o ),
.data_i ( st_data_q ),
.be_i ( st_be_q ),
.data_size_i ( st_data_size_q ),
.req_port_i ( req_port_i ),
.req_port_o ( req_port_o )
);
amo_buffer #(
.CVA6Cfg ( CVA6Cfg )
) i_amo_buffer (
.clk_i,
.rst_ni,
.flush_i,
.valid_i ( amo_buffer_valid ),
.ready_o ( amo_buffer_ready ),
.paddr_i ( paddr_i ),
.amo_op_i ( amo_op_q ),
.data_i ( st_data_q ),
.data_size_i ( st_data_size_q ),
.amo_req_o ( amo_req_o ),
.amo_resp_i ( amo_resp_i ),
.amo_valid_commit_i ( amo_valid_commit_i ),
.no_st_pending_i ( no_st_pending_o )
);
// ---------------
// Registers
// ---------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
st_be_q <= '0;
st_data_q <= '0;
st_data_size_q <= '0;
trans_id_q <= '0;
amo_op_q <= AMO_NONE;
if (!st_ready) begin
state_d = WAIT_STORE_READY;
pop_st_o = 1'b0;
end
// if we do not have another request go back to idle
end else begin
state_q <= state_d;
st_be_q <= st_be_n;
st_data_q <= st_data_n;
trans_id_q <= trans_id_n;
st_data_size_q <= st_data_size_n;
amo_op_q <= amo_op_d;
state_d = IDLE;
end
end
// the store queue is currently full
WAIT_STORE_READY: begin
// keep the translation request high
translation_req_o = 1'b1;
if (st_ready && dtlb_hit_i) begin
state_d = IDLE;
end
end
// we didn't receive a valid translation, wait for one
// but we know that the store queue is not full as we could only have landed here if
// it wasn't full
WAIT_TRANSLATION: begin
if (ariane_pkg::MMU_PRESENT) begin
translation_req_o = 1'b1;
if (dtlb_hit_i) begin
state_d = IDLE;
end
end
end
endcase
// -----------------
// Access Exception
// -----------------
// we got an address translation exception (access rights, misaligned or page fault)
if (ex_i.valid && (state_q != IDLE)) begin
// the only difference is that we do not want to store this request
pop_st_o = 1'b1;
st_valid = 1'b0;
state_d = IDLE;
valid_o = 1'b1;
end
if (flush_i) state_d = IDLE;
end
// -----------
// Re-aligner
// -----------
// re-align the write data to comply with the address offset
always_comb begin
st_be_n = lsu_ctrl_i.be;
// don't shift the data if we are going to perform an AMO as we still need to operate on this data
st_data_n = instr_is_amo ? lsu_ctrl_i.data[riscv::XLEN-1:0] :
data_align(lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.data);
st_data_size_n = extract_transfer_size(lsu_ctrl_i.operation);
// save AMO op for next cycle
case (lsu_ctrl_i.operation)
AMO_LRW, AMO_LRD: amo_op_d = AMO_LR;
AMO_SCW, AMO_SCD: amo_op_d = AMO_SC;
AMO_SWAPW, AMO_SWAPD: amo_op_d = AMO_SWAP;
AMO_ADDW, AMO_ADDD: amo_op_d = AMO_ADD;
AMO_ANDW, AMO_ANDD: amo_op_d = AMO_AND;
AMO_ORW, AMO_ORD: amo_op_d = AMO_OR;
AMO_XORW, AMO_XORD: amo_op_d = AMO_XOR;
AMO_MAXW, AMO_MAXD: amo_op_d = AMO_MAX;
AMO_MAXWU, AMO_MAXDU: amo_op_d = AMO_MAXU;
AMO_MINW, AMO_MIND: amo_op_d = AMO_MIN;
AMO_MINWU, AMO_MINDU: amo_op_d = AMO_MINU;
default: amo_op_d = AMO_NONE;
endcase
end
logic store_buffer_valid, amo_buffer_valid;
logic store_buffer_ready, amo_buffer_ready;
// multiplex between store unit and amo buffer
assign store_buffer_valid = st_valid & (amo_op_q == AMO_NONE);
assign amo_buffer_valid = st_valid & (amo_op_q != AMO_NONE);
assign st_ready = store_buffer_ready & amo_buffer_ready;
// ---------------
// Store Queue
// ---------------
store_buffer #(
.CVA6Cfg(CVA6Cfg)
) store_buffer_i (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.store_buffer_empty_o,
.page_offset_i,
.page_offset_matches_o,
.commit_i,
.commit_ready_o,
.ready_o (store_buffer_ready),
.valid_i (store_buffer_valid),
// the flush signal can be critical and we need this valid
// signal to check whether the page_offset matches or not,
// functionaly it doesn't make a difference whether we use
// the correct valid signal or not as we are flushing
// the whole pipeline anyway
.valid_without_flush_i(st_valid_without_flush),
.paddr_i,
.mem_paddr_o (mem_paddr_o),
.data_i (st_data_q),
.be_i (st_be_q),
.data_size_i (st_data_size_q),
.req_port_i (req_port_i),
.req_port_o (req_port_o)
);
amo_buffer #(
.CVA6Cfg(CVA6Cfg)
) i_amo_buffer (
.clk_i,
.rst_ni,
.flush_i,
.valid_i (amo_buffer_valid),
.ready_o (amo_buffer_ready),
.paddr_i (paddr_i),
.amo_op_i (amo_op_q),
.data_i (st_data_q),
.data_size_i (st_data_size_q),
.amo_req_o (amo_req_o),
.amo_resp_i (amo_resp_i),
.amo_valid_commit_i(amo_valid_commit_i),
.no_st_pending_i (no_st_pending_o)
);
// ---------------
// Registers
// ---------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
st_be_q <= '0;
st_data_q <= '0;
st_data_size_q <= '0;
trans_id_q <= '0;
amo_op_q <= AMO_NONE;
end else begin
state_q <= state_d;
st_be_q <= st_be_n;
st_data_q <= st_data_n;
trans_id_q <= trans_id_n;
st_data_size_q <= st_data_size_n;
amo_op_q <= amo_op_d;
end
end
endmodule